svn commit: r343579 - head/sys/dev/netmap

Vincenzo Maffione vmaffione at FreeBSD.org
Wed Jan 30 15:51:57 UTC 2019


Author: vmaffione
Date: Wed Jan 30 15:51:55 2019
New Revision: 343579
URL: https://svnweb.freebsd.org/changeset/base/343579

Log:
  netmap: fix lock order reversal related to kqueue usage
  
  When using poll(), select() or kevent() on netmap file descriptors,
  netmap executes the equivalent of NIOCTXSYNC and NIOCRXSYNC commands,
  before collecting the events that are ready. In other words, the
  poll/kevent callback has side effects. This is done to avoid the
  overhead of two system call per iteration (e.g., poll() + ioctl(NIOC*XSYNC)).
  
  When the kqueue subsystem invokes the kqueue(9) f_event callback
  (netmap_knrw), it holds the lock of the struct knlist object associated
  to the netmap port (the lock is provided at initialization, by calling
  knlist_init_mtx).
  However, netmap_knrw() may need to wake up another netmap port (or even
  the same one), which means that it may need to call knote().
  Since knote() needs the lock of the struct knlist object associated to
  the to-be-wake-up netmap port, it is possible to have a lock order reversal
  problem (AB/BA deadlock).
  
  This change prevents the deadlock by executing the knote() call in a
  per-selinfo taskqueue, where it is possible to hold a mutex.
  
  Reviewed by:	aleksandr.fedorov_itglobal.com
  MFC after:	2 weeks
  Differential Revision:	https://reviews.freebsd.org/D18956

Modified:
  head/sys/dev/netmap/netmap.c
  head/sys/dev/netmap/netmap_freebsd.c
  head/sys/dev/netmap/netmap_kern.h

Modified: head/sys/dev/netmap/netmap.c
==============================================================================
--- head/sys/dev/netmap/netmap.c	Wed Jan 30 13:21:26 2019	(r343578)
+++ head/sys/dev/netmap/netmap.c	Wed Jan 30 15:51:55 2019	(r343579)
@@ -830,6 +830,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int 
 	struct netmap_kring *kring;
 	u_int n[NR_TXRX];
 	enum txrx t;
+	int err = 0;
 
 	if (na->tx_rings != NULL) {
 		if (netmap_debug & NM_DEBUG_ON)
@@ -869,7 +870,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int 
 		for (i = 0; i < n[t]; i++) {
 			kring = NMR(na, t)[i];
 			bzero(kring, sizeof(*kring));
-			kring->na = na;
 			kring->notify_na = na;
 			kring->ring_id = i;
 			kring->tx = t;
@@ -895,13 +895,21 @@ netmap_krings_create(struct netmap_adapter *na, u_int 
 					nm_txrx2str(t), i);
 			ND("ktx %s h %d c %d t %d",
 				kring->name, kring->rhead, kring->rcur, kring->rtail);
+			err = nm_os_selinfo_init(&kring->si, kring->name);
+			if (err) {
+				netmap_krings_delete(na);
+				return err;
+			}
 			mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
-			nm_os_selinfo_init(&kring->si);
+			kring->na = na;	/* setting this field marks the mutex as initialized */
 		}
-		nm_os_selinfo_init(&na->si[t]);
+		err = nm_os_selinfo_init(&na->si[t], na->name);
+		if (err) {
+			netmap_krings_delete(na);
+			return err;
+		}
 	}
 
-
 	return 0;
 }
 
@@ -925,7 +933,8 @@ netmap_krings_delete(struct netmap_adapter *na)
 
 	/* we rely on the krings layout described above */
 	for ( ; kring != na->tailroom; kring++) {
-		mtx_destroy(&(*kring)->q_lock);
+		if ((*kring)->na != NULL)
+			mtx_destroy(&(*kring)->q_lock);
 		nm_os_selinfo_uninit(&(*kring)->si);
 	}
 	nm_os_free(na->tx_rings);

Modified: head/sys/dev/netmap/netmap_freebsd.c
==============================================================================
--- head/sys/dev/netmap/netmap_freebsd.c	Wed Jan 30 13:21:26 2019	(r343578)
+++ head/sys/dev/netmap/netmap_freebsd.c	Wed Jan 30 15:51:55 2019	(r343579)
@@ -58,6 +58,7 @@
 #include <sys/unistd.h> /* RFNOWAIT */
 #include <sys/sched.h> /* sched_bind() */
 #include <sys/smp.h> /* mp_maxid */
+#include <sys/taskqueue.h> /* taskqueue_enqueue(), taskqueue_create(), ... */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h> /* IFT_ETHER */
@@ -75,16 +76,48 @@
 
 /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
 
-void nm_os_selinfo_init(NM_SELINFO_T *si) {
-	struct mtx *m = &si->m;
-	mtx_init(m, "nm_kn_lock", NULL, MTX_DEF);
-	knlist_init_mtx(&si->si.si_note, m);
+static void
+nm_kqueue_notify(void *opaque, int pending)
+{
+	struct nm_selinfo *si = opaque;
+
+	/* We use a non-zero hint to distinguish this notification call
+	 * from the call done in kqueue_scan(), which uses hint=0.
+	 */
+	KNOTE_UNLOCKED(&si->si.si_note, /*hint=*/0x100);
 }
 
+int nm_os_selinfo_init(NM_SELINFO_T *si, const char *name) {
+	int err;
+
+	TASK_INIT(&si->ntfytask, 0, nm_kqueue_notify, si);
+	si->ntfytq = taskqueue_create(name, M_NOWAIT,
+	    taskqueue_thread_enqueue, &si->ntfytq);
+	if (si->ntfytq == NULL)
+		return -ENOMEM;
+	err = taskqueue_start_threads(&si->ntfytq, 1, PI_NET, "tq %s", name);
+	if (err) {
+		taskqueue_free(si->ntfytq);
+		si->ntfytq = NULL;
+		return err;
+	}
+
+	snprintf(si->mtxname, sizeof(si->mtxname), "nmkl%s", name);
+	mtx_init(&si->m, si->mtxname, NULL, MTX_DEF);
+	knlist_init_mtx(&si->si.si_note, &si->m);
+
+	return (0);
+}
+
 void
 nm_os_selinfo_uninit(NM_SELINFO_T *si)
 {
-	/* XXX kqueue(9) needed; these will mirror knlist_init. */
+	if (si->ntfytq == NULL) {
+		return;	/* si was not initialized */
+	}
+	taskqueue_drain(si->ntfytq, &si->ntfytask);
+	taskqueue_free(si->ntfytq);
+	si->ntfytq = NULL;
 	knlist_delete(&si->si.si_note, curthread, /*islocked=*/0);
 	knlist_destroy(&si->si.si_note);
 	/* now we don't need the mutex anymore */
@@ -1292,13 +1325,18 @@ nm_os_kctx_destroy(struct nm_kctx *nmk)
 
 /*
  * In addition to calling selwakeuppri(), nm_os_selwakeup() also
- * needs to call KNOTE to wake up kqueue listeners.
- * We use a non-zero 'hint' argument to inform the netmap_knrw()
- * function that it is being called from 'nm_os_selwakeup'; this
- * is necessary because when netmap_knrw() is called by the kevent
- * subsystem (i.e. kevent_scan()) we also need to call netmap_poll().
- * The knote uses a private mutex associated to the 'si' (see struct
- * selinfo, struct nm_selinfo, and nm_os_selinfo_init).
+ * needs to call knote() to wake up kqueue listeners.
+ * This operation is deferred to a taskqueue in order to avoid possible
+ * lock order reversals; these may happen because knote() grabs a
+ * private lock associated to the 'si' (see struct selinfo,
+ * struct nm_selinfo, and nm_os_selinfo_init), and nm_os_selwakeup()
+ * can be called while holding the lock associated to a different
+ * 'si'.
+ * When calling knote() we use a non-zero 'hint' argument to inform
+ * the netmap_knrw() function that it is being called from
+ * 'nm_os_selwakeup'; this is necessary because when netmap_knrw() is
+ * called by the kevent subsystem (i.e. kevent_scan()) we also need to
+ * call netmap_poll().
  *
  * The netmap_kqfilter() function registers one or another f_event
  * depending on read or write mode. A pointer to the struct
@@ -1315,11 +1353,7 @@ nm_os_selwakeup(struct nm_selinfo *si)
 	if (netmap_verbose)
 		nm_prinf("on knote %p", &si->si.si_note);
 	selwakeuppri(&si->si, PI_NET);
-	/* We use a non-zero hint to distinguish this notification call
-	 * from the call done in kqueue_scan(), which uses hint=0.
-	 */
-	KNOTE(&si->si.si_note, /*hint=*/0x100,
-	    mtx_owned(&si->m) ? KNF_LISTLOCKED : 0);
+	taskqueue_enqueue(si->ntfytq, &si->ntfytask);
 }
 
 void

Modified: head/sys/dev/netmap/netmap_kern.h
==============================================================================
--- head/sys/dev/netmap/netmap_kern.h	Wed Jan 30 13:21:26 2019	(r343578)
+++ head/sys/dev/netmap/netmap_kern.h	Wed Jan 30 15:51:55 2019	(r343579)
@@ -133,7 +133,10 @@ struct netmap_adapter *netmap_getna(if_t ifp);
 
 struct nm_selinfo {
 	struct selinfo si;
+	struct taskqueue *ntfytq;
+	struct task ntfytask;
 	struct mtx m;
+	char mtxname[32];
 };
 
 
@@ -295,7 +298,7 @@ struct netmap_priv_d;
 struct nm_bdg_args;
 
 /* os-specific NM_SELINFO_T initialzation/destruction functions */
-void nm_os_selinfo_init(NM_SELINFO_T *);
+int nm_os_selinfo_init(NM_SELINFO_T *, const char *name);
 void nm_os_selinfo_uninit(NM_SELINFO_T *);
 
 const char *nm_dump_buf(char *p, int len, int lim, char *dst);


More information about the svn-src-all mailing list