taskqueue for pf periodic events

Gleb Smirnoff glebius at FreeBSD.org
Tue Mar 7 10:59:41 UTC 2006


  Max, Daniel, Scott,

  running a pf load balancer I have noticed that the "swi4: clock"
process consumes a noticable amount of CPU time, when a lot of
states are purged from pf cache. The load balancer is also running
CARP, and a hot spare is working here too. Reading daily run outputs
from the second router, I have noticed that a few times per day
the redundant router preempts the main one, since it doesn't
receive announcement in time from master. So, I had a theory that
a heavy pf purge is running so long, that a CARP announcement
is delayed. You know, all callout(9) events are serialized in one
thread - "swi4: clock".

  So I made a patch that moves all periodic pf(4) job into separate
context. The patch uses new taskqueue API made by Scott. I have
ported the API to RELENG_6 and made my patch for RELENG_6. I've
been running the patch for 27 days and the spurious preemtions
of CARP backup had gone away. No problems were noticed. The box
is running SMP kernel on a single CPU box with HTT (2 logical
CPUs), HTT enabled.

  The patch attached.

-- 
Totus tuus, Glebius.
GLEBIUS-RIPN GLEB-RIPE
-------------- next part --------------
Index: if_pfsync.c
===================================================================
RCS file: /home/ncvs/src/sys/contrib/pf/net/if_pfsync.c,v
retrieving revision 1.19.2.3
diff -u -r1.19.2.3 if_pfsync.c
--- if_pfsync.c	17 Sep 2005 15:19:38 -0000	1.19.2.3
+++ if_pfsync.c	7 Feb 2006 13:20:01 -0000
@@ -57,6 +57,7 @@
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
+#include <sys/taskqueue.h>
 #else
 #include <sys/ioctl.h>
 #include <sys/timeout.h>
@@ -136,7 +137,7 @@
 
 static void	pfsync_clone_destroy(struct ifnet *);
 static int	pfsync_clone_create(struct if_clone *, int);
-static void	pfsync_senddef(void *);
+static void	pfsync_senddef(void *, int);
 #else
 void	pfsyncattach(int);
 #endif
@@ -154,6 +155,11 @@
 void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
 void	pfsync_bulk_update(void *);
 void	pfsync_bulkfail(void *);
+#ifdef __FreeBSD__
+void	pfsync_task(void *, int);
+void	pfsync_bulk_task(void *, int);
+void	pfsync_bulkfail_task(void *, int);
+#endif
 
 int	pfsync_sync_ok;
 #ifndef __FreeBSD__
@@ -175,12 +181,14 @@
         struct pfsync_softc *sc;
 
 	sc = ifp->if_softc;
+	taskqueue_drain(pf_tq, &sc->sc_send_task);
+	taskqueue_drain(pf_tq, &sc->sc_task);
 	callout_stop(&sc->sc_tmo);
+	taskqueue_drain(pf_tq, &sc->sc_bulk_task);
 	callout_stop(&sc->sc_bulk_tmo);
+	taskqueue_drain(pf_tq, &sc->sc_bulkfail_task);
 	callout_stop(&sc->sc_bulkfail_tmo);
 
-	callout_stop(&sc->sc_send_tmo);
-
 #if NBPFILTER > 0
         bpfdetach(ifp);
 #endif
@@ -227,7 +235,10 @@
 	callout_init(&sc->sc_tmo, NET_CALLOUT_MPSAFE);
 	callout_init(&sc->sc_bulk_tmo, NET_CALLOUT_MPSAFE);
 	callout_init(&sc->sc_bulkfail_tmo, NET_CALLOUT_MPSAFE);
-	callout_init(&sc->sc_send_tmo, NET_CALLOUT_MPSAFE);
+	TASK_INIT(&sc->sc_task, 0, pfsync_task, sc);
+	TASK_INIT(&sc->sc_bulk_task, 0, pfsync_bulk_task, sc);
+	TASK_INIT(&sc->sc_bulkfail_task, 0, pfsync_bulkfail_task, sc);
+	TASK_INIT(&sc->sc_send_task, 0, pfsync_senddef, sc);
 	sc->sc_ifq.ifq_maxlen = ifqmaxlen;
 	mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
 	    MTX_DEF);
@@ -878,8 +889,7 @@
 				pfsync_send_bus(sc, PFSYNC_BUS_START);
 #ifdef __FreeBSD__
 				callout_reset(&sc->sc_bulk_tmo, 1 * hz,
-				    pfsync_bulk_update,
-				    LIST_FIRST(&pfsync_list));
+				    pfsync_bulk_update, sc);
 #else
 				timeout_add(&sc->sc_bulk_tmo, 1 * hz);
 #endif
@@ -918,7 +928,7 @@
 			callout_reset(&sc->sc_bulkfail_tmo,
 			    pf_pool_limits[PF_LIMIT_STATES].limit /
 			    (PFSYNC_BULKPACKETS * sc->sc_maxcount), 
-			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
+			    pfsync_bulkfail, sc);
 #else
 			timeout_add(&sc->sc_bulkfail_tmo,
 			    pf_pool_limits[PF_LIMIT_STATES].limit /
@@ -1044,7 +1054,7 @@
 		if (pfsyncr.pfsyncr_maxupdates > 255)
 			return (EINVAL);
 #ifdef __FreeBSD__
-		callout_drain(&sc->sc_send_tmo);
+		taskqueue_drain(pf_tq, &sc->sc_send_task);
 		PF_LOCK();
 #endif
 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
@@ -1146,7 +1156,7 @@
 				printf("pfsync: requesting bulk update\n");
 #ifdef __FreeBSD__
 			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
-			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
+			    pfsync_bulkfail, sc);
 #else
 			timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
 #endif
@@ -1269,8 +1279,7 @@
 
 	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
 #ifdef __FreeBSD__
-	callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
-	    LIST_FIRST(&pfsync_list));
+	callout_reset(&sc->sc_tmo, hz, pfsync_timeout, sc);
 #else
 	timeout_add(&sc->sc_tmo, hz);
 #endif
@@ -1569,15 +1578,27 @@
 pfsync_timeout(void *v)
 {
 	struct pfsync_softc *sc = v;
+
+#ifdef __FreeBSD__
+	taskqueue_enqueue(pf_tq, &sc->sc_task);
+}
+
+void
+pfsync_task(void *v, int pending)
+{
+	struct pfsync_softc *sc = v;
+#endif
 	int s;
 
 	s = splnet();
 #ifdef __FreeBSD__
+	NET_LOCK_GIANT();
 	PF_LOCK();
 #endif
 	pfsync_sendout(sc);
 #ifdef __FreeBSD__
 	PF_UNLOCK();
+	NET_UNLOCK_GIANT();
 #endif
 	splx(s);
 }
@@ -1610,10 +1631,21 @@
 pfsync_bulk_update(void *v)
 {
 	struct pfsync_softc *sc = v;
+
+#ifdef __FreeBSD__
+	taskqueue_enqueue(pf_tq, &sc->sc_bulk_task);
+}
+
+void
+pfsync_bulk_task(void *v, int pending)
+{
+	struct pfsync_softc *sc = v;
+#endif
 	int s, i = 0;
 	struct pf_state *state;
 
 #ifdef __FreeBSD__
+	NET_LOCK_GIANT();
 	PF_LOCK();
 #endif
 	s = splnet();
@@ -1649,8 +1681,7 @@
 
 			/* look again for more in a bit */
 #ifdef __FreeBSD__
-			callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
-			    LIST_FIRST(&pfsync_list));
+			taskqueue_enqueue(pf_tq, &sc->sc_task);
 #else
 			timeout_add(&sc->sc_bulk_tmo, 1);
 #endif
@@ -1661,6 +1692,7 @@
 	splx(s);
 #ifdef __FreeBSD__
 	PF_UNLOCK();
+	NET_UNLOCK_GIANT();
 #endif
 }
 
@@ -1668,16 +1700,26 @@
 pfsync_bulkfail(void *v)
 {
 	struct pfsync_softc *sc = v;
+
+#ifdef __FreeBSD__
+	taskqueue_enqueue(pf_tq, &sc->sc_bulkfail_task);
+}
+
+void
+pfsync_bulkfail_task(void *v, int pending)
+{
+	struct pfsync_softc *sc = v;
+#endif
 	int s, error;
 
 #ifdef __FreeBSD__
+	NET_LOCK_GIANT();
 	PF_LOCK();
 #endif
 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
 		/* Try again in a bit */
 #ifdef __FreeBSD__
-		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
-		    LIST_FIRST(&pfsync_list));
+		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, sc);
 #else
 		timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
 #endif
@@ -1710,6 +1752,7 @@
 	}
 #ifdef __FreeBSD__
 	PF_UNLOCK();
+	NET_UNLOCK_GIANT();
 #endif
 }
 
@@ -1804,7 +1847,7 @@
 #ifdef __FreeBSD__
 		if (!IF_HANDOFF(&sc->sc_ifq, m, NULL))
 			pfsyncstats.pfsyncs_oerrors++;
-		callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
+		taskqueue_enqueue(pf_tq, &sc->sc_send_task);
 #else
 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
 			pfsyncstats.pfsyncs_oerrors++;
@@ -1817,7 +1860,7 @@
 
 #ifdef __FreeBSD__
 static void
-pfsync_senddef(void *arg)
+pfsync_senddef(void *arg, int pending)
 {
 	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
 	struct mbuf *m;
Index: if_pfsync.h
===================================================================
RCS file: /home/ncvs/src/sys/contrib/pf/net/if_pfsync.h,v
retrieving revision 1.7
diff -u -r1.7 if_pfsync.h
--- if_pfsync.h	10 Jun 2005 17:23:49 -0000	1.7
+++ if_pfsync.h	6 Feb 2006 12:38:37 -0000
@@ -159,6 +159,9 @@
 	struct callout		 sc_tmo;
 	struct callout		 sc_bulk_tmo;
 	struct callout		 sc_bulkfail_tmo;
+	struct task		 sc_task;
+	struct task		 sc_bulk_task;
+	struct task		 sc_bulkfail_task;
 #else
 	struct timeout		 sc_tmo;
 	struct timeout		 sc_bulk_tmo;
@@ -170,7 +173,7 @@
 	struct mbuf		*sc_mbuf_net;	/* current cumulative mbuf */
 #ifdef __FreeBSD__
 	struct ifqueue		 sc_ifq;
-	struct callout		 sc_send_tmo;
+	struct task		 sc_send_task;
 #endif
 	union sc_statep		 sc_statep;
 	union sc_statep		 sc_statep_net;
Index: pf.c
===================================================================
RCS file: /home/ncvs/src/sys/contrib/pf/net/pf.c,v
retrieving revision 1.34.2.3
diff -u -r1.34.2.3 pf.c
--- pf.c	30 Dec 2005 00:50:18 -0000	1.34.2.3
+++ pf.c	7 Feb 2006 13:18:33 -0000
@@ -64,6 +64,7 @@
 #ifdef __FreeBSD__
 #include <sys/sysctl.h>
 #include <sys/endian.h>
+#include <sys/taskqueue.h>
 #else
 #include <sys/pool.h>
 #endif
@@ -139,6 +140,8 @@
 
 #ifdef __FreeBSD__
 struct callout	 	 pf_expire_to;			/* expire timeout */
+struct task		 pf_expire_task;
+struct taskqueue	 *pf_tq = NULL;
 #else
 struct timeout		 pf_expire_to;			/* expire timeout */
 #endif
@@ -964,6 +967,12 @@
 pf_purge_timeout(void *arg)
 {
 #ifdef __FreeBSD__
+	taskqueue_enqueue(pf_tq, &pf_expire_task);
+}
+
+void
+pf_purge(void *arg, int pending)
+{
 	struct callout  *to = arg;
 #else
 	struct timeout	*to = arg;
@@ -971,6 +980,7 @@
 	int		 s;
 
 #ifdef __FreeBSD__
+	NET_LOCK_GIANT();
 	PF_LOCK();
 #endif
 	s = splsoftnet();
@@ -985,6 +995,7 @@
 #ifdef __FreeBSD__
 	callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
 	    pf_purge_timeout, to);
+	NET_UNLOCK_GIANT();
 #else
 	timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz);
 #endif
Index: pf_ioctl.c
===================================================================
RCS file: /home/ncvs/src/sys/contrib/pf/net/pf_ioctl.c,v
retrieving revision 1.20.2.1
diff -u -r1.20.2.1 pf_ioctl.c
--- pf_ioctl.c	12 Sep 2005 11:25:17 -0000	1.20.2.1
+++ pf_ioctl.c	6 Feb 2006 13:17:57 -0000
@@ -67,6 +67,7 @@
 #include <sys/module.h>
 #include <sys/conf.h>
 #include <sys/proc.h>
+#include <sys/taskqueue.h>
 #else
 #include <sys/timeout.h>
 #include <sys/pool.h>
@@ -147,6 +148,7 @@
 
 #ifdef __FreeBSD__
 extern struct callout	 pf_expire_to;
+extern struct task       pf_expire_task;
 #else
 extern struct timeout	 pf_expire_to;
 #endif
@@ -335,6 +337,11 @@
 	my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
 	my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
 
+	TASK_INIT(&pf_expire_task, 0, pf_purge, &pf_expire_to);
+	pf_tq = taskqueue_create_fast("pf taskq", M_NOWAIT,
+	    taskqueue_thread_enqueue, &pf_tq);
+	taskqueue_start_threads(&pf_tq, 1, PI_NET, "pf taskq");
+
 	callout_init(&pf_expire_to, NET_CALLOUT_MPSAFE);
 	callout_reset(&pf_expire_to, my_timeout[PFTM_INTERVAL] * hz,
 	    pf_purge_timeout, &pf_expire_to);
@@ -3270,6 +3277,8 @@
 	u_int32_t t[5];
 	char nn = '\0';
 
+	taskqueue_drain(pf_tq, &pf_expire_task);
+	taskqueue_free(pf_tq);
 	callout_stop(&pf_expire_to);
 
 	pf_status.running = 0;
Index: pfvar.h
===================================================================
RCS file: /home/ncvs/src/sys/contrib/pf/net/pfvar.h,v
retrieving revision 1.11.2.2
diff -u -r1.11.2.2 pfvar.h
--- pfvar.h	30 Dec 2005 00:50:18 -0000	1.11.2.2
+++ pfvar.h	6 Feb 2006 13:19:47 -0000
@@ -1525,12 +1525,16 @@
 extern uma_zone_t		 pf_cache_pl, pf_cent_pl;
 extern uma_zone_t		 pf_state_scrub_pl;
 extern uma_zone_t		 pfi_addr_pl;
+extern struct taskqueue		*pf_tq;
 #else
 extern struct pool		 pf_src_tree_pl, pf_rule_pl;
 extern struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
 extern struct pool		 pf_state_scrub_pl;
 #endif
 extern void			 pf_purge_timeout(void *);
+#ifdef __FreeBSD__
+extern void			 pf_purge(void *, int);
+#endif
 extern void			 pf_purge_expired_src_nodes(void);
 extern void			 pf_purge_expired_states(void);
 extern void			 pf_purge_expired_state(struct pf_state *);


More information about the freebsd-net mailing list