Deferring inp_freemoptions() to an asychronous task
John Baldwin
jhb at freebsd.org
Thu Dec 22 16:15:14 UTC 2011
I have a workload at work where a particular device driver can take a while to
update its MAC filter table when adding or removing multicast link-layer
addresses. One of the ways I've tackled fixing this is to change
inp_freemoptions() so that it does all of its actual work asychronously in a
separate task. Currently it does its work synchronously; however, it can be
invoked while the associated protocol holds a write lock on its pcbinfo lock
(e.g. from in_pcbdetach() called from udp_detach()). This stalls all packet
reception for that protocol since received packets need a read lock on the
pcbinfo to lookup the socket associated with a given (ip, port) tuple. Moving
the meat of inp_freemoptions() out to a task moves out it from under the
pcbinfo lock meaning that a driver that takes a while to update its MAC filter
table no longer stalls receive processing for the entire machine. The patch
below is against 8, but I expect it to apply to HEAD as well. If folks don't
object I'll port this forward to HEAD.
Index: in_mcast.c
===================================================================
--- in_mcast.c (revision 225431)
+++ in_mcast.c (working copy)
@@ -46,6 +46,7 @@
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/ktr.h>
+#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <net/if.h>
@@ -144,6 +145,8 @@
static void inm_reap(struct in_multi *);
static struct ip_moptions *
inp_findmoptions(struct inpcb *);
+static void inp_freemoptions_internal(struct ip_moptions *);
+static void inp_gcmoptions(void *, int);
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
@@ -178,6 +181,10 @@
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
"Per-interface stack-wide source filters");
+static STAILQ_HEAD(, ip_moptions) imo_gc_list =
+ STAILQ_HEAD_INITIALIZER(imo_gc_list);
+static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
+
/*
* Inline function which wraps assertions for a valid ifp.
* The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
@@ -1517,18 +1524,30 @@
}
/*
- * Discard the IP multicast options (and source filters).
+ * Discard the IP multicast options (and source filters). To minimize
+ * the amount of work done while holding locks such as the INP's
+ * pcbinfo lock (which is used in the receive path), the free
+ * operation is performed asynchronously in a separate task.
*
* SMPng: NOTE: assumes INP write lock is held.
*/
void
inp_freemoptions(struct ip_moptions *imo)
{
+
+ KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+ IN_MULTI_LOCK();
+ STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
+ taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
+ IN_MULTI_UNLOCK();
+}
+
+static void
+inp_freemoptions_internal(struct ip_moptions *imo)
+{
struct in_mfilter *imf;
size_t idx, nmships;
- KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
-
nmships = imo->imo_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
@@ -1545,6 +1564,22 @@
free(imo, M_IPMOPTS);
}
+static void
+inp_gcmoptions(void *context, int pending)
+{
+ struct ip_moptions *imo;
+
+ IN_MULTI_LOCK();
+ while (!STAILQ_EMPTY(&imo_gc_list)) {
+ imo = STAILQ_FIRST(&imo_gc_list);
+ STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
+ IN_MULTI_UNLOCK();
+ inp_freemoptions_internal(imo);
+ IN_MULTI_LOCK();
+ }
+ IN_MULTI_UNLOCK();
+}
+
/*
* Atomically get source filters on a socket for an IPv4 multicast group.
* Called with INP lock held; returns with lock released.
Index: ip_var.h
===================================================================
--- ip_var.h (revision 225431)
+++ ip_var.h (working copy)
@@ -93,6 +93,7 @@
u_short imo_max_memberships; /* max memberships this socket */
struct in_multi **imo_membership; /* group memberships */
struct in_mfilter *imo_mfilters; /* source filters */
+ STAILQ_ENTRY(ip_moptions) imo_link;
};
struct ipstat {
--
John Baldwin
More information about the freebsd-net
mailing list