dynamic rule deadlock

Brian Fundakowski Feldman green at FreeBSD.org
Wed Jun 1 17:01:26 PDT 2005


This is a pretty easy one to diagnose.  In FreeBSD 5.x+, there are
network interface locks that the ifnet::if_start() routines grab and
the IPFW dynamic rule lock that IPFW grabs.  When IPFW periodically
runs its dynamic rule keepalive event, it tries to grab the locks in
the order: IPFW dynamic rule lock, ifnet lock.  This is the wrong
order, and in my 5.4-STABLE IPFW+if_bridge(4)+ALTQ configuration,
leads to a full system deadlock.

The solution I have is pretty simple, but I have not actually
tested what was there before against WITNESS to see exactly what
it had to say since I was more interested in fixing this in my
production environment.  If interested, I can easily help set up
a test environment for this.  Here are the changes, which are, I
believe, a complete fix.  I don't particularly love the style
and feel that could probably stand improvement.

Index: ip_fw2.c
===================================================================
RCS file: /export/ncvs/src/sys/netinet/ip_fw2.c,v
retrieving revision 1.70.2.11
diff -u -r1.70.2.11 ip_fw2.c
--- ip_fw2.c	12 May 2005 15:11:30 -0000	1.70.2.11
+++ ip_fw2.c	1 Jun 2005 20:54:56 -0000
@@ -1237,12 +1237,12 @@
 }
 
 /*
- * Transmit a TCP packet, containing either a RST or a keepalive.
+ * Generate a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  */
-static void
+static struct mbuf *
 send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags)
 {
 	struct mbuf *m;
@@ -1251,7 +1251,7 @@
 
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 	if (m == 0)
-		return;
+		return (NULL);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
 	m->m_data += max_linkhdr;
@@ -1314,7 +1314,7 @@
 	ip->ip_ttl = ip_defttl;
 	ip->ip_len = m->m_pkthdr.len;
 	m->m_flags |= M_SKIP_FIREWALL;
-	ip_output(m, NULL, NULL, 0, NULL, NULL);
+	return (m);
 }
 
 /*
@@ -1335,10 +1335,14 @@
 	} else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
-		if ( (tcp->th_flags & TH_RST) == 0)
-			send_pkt(&(args->f_id), ntohl(tcp->th_seq),
+		if ( (tcp->th_flags & TH_RST) == 0) {
+			struct mbuf *m;
+			m = send_pkt(&(args->f_id), ntohl(tcp->th_seq),
 				ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
+			if (m != NULL)
+				ip_output(m, NULL, NULL, 0, NULL, NULL);
+		}
 		m_freem(args->m);
 	} else
 		m_freem(args->m);
@@ -3476,12 +3480,20 @@
 static void
 ipfw_tick(void * __unused unused)
 {
+	struct mbuf *m0, *m, *mn;
 	int i;
 	ipfw_dyn_rule *q;
 
 	if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0)
 		goto done;
 
+	/*
+	 * We make a chain of packets to go out here -- not deferring
+	 * until after we drop the IPFW dynamic rule lock would result
+	 * in a lock order reversal with the normal packet input -> ipfw
+	 * call stack.
+	 */
+	m0 = m = NULL;
 	IPFW_DYN_LOCK();
 	for (i = 0 ; i < curr_dyn_buckets ; i++) {
 		for (q = ipfw_dyn_v[i] ; q ; q = q->next ) {
@@ -3497,11 +3509,33 @@
 			if (TIME_LEQ(q->expire, time_second))
 				continue;	/* too late, rule expired */
 
-			send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN);
-			send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0);
+			mn = send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd,
+				TH_SYN);
+			if (mn != NULL) {
+				if (m0 == NULL) {
+					m0 = m = mn;
+				} else {
+					m->m_nextpkt = mn;
+					m = mn;
+				}
+			}
+			mn = send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0);
+			if (mn != NULL) {
+				if (m0 == NULL) {
+					m0 = m = mn;
+				} else {
+					m->m_nextpkt = mn;
+					m = mn;
+				}
+			}
 		}
 	}
 	IPFW_DYN_UNLOCK();
+	for (m = mn = m0; m != NULL; m = mn) {
+		mn = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		ip_output(m, NULL, NULL, 0, NULL, NULL);
+	}
 done:
 	callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL);
 }

-- 
Brian Fundakowski Feldman                           \'[ FreeBSD ]''''''''''\
  <> green at FreeBSD.org                               \  The Power to Serve! \
 Opinions expressed are my own.                       \,,,,,,,,,,,,,,,,,,,,,,\


More information about the freebsd-ipfw mailing list