kern/176763: Removing pf Source entries locks kernel.

Kajetan Staszkiewicz vegeta at tuxpowered.net
Fri Mar 8 19:40:02 UTC 2013


>Number:         176763
>Category:       kern
>Synopsis:       Removing pf Source entries locks kernel.
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Fri Mar 08 19:40:01 UTC 2013
>Closed-Date:
>Last-Modified:
>Originator:     Kajetan Staszkiewicz
>Release:        9.1-RELEASE
>Organization:
InnoGames GmbH
>Environment:
FreeBSD xxxxxxx 9.1-RELEASE FreeBSD 9.1-RELEASE #10 r247265M: Mon Feb 25 14:58:39 CET 2013     root at xxxxxxx:/usr/obj/usr/src/sys/IGLB3  amd64
>Description:
The case will happen on a FreeBSD router with large amount of pf State and Source entries. Use pfctl to remove some Sources. For each matching source whole State table is searched element by element.

With hundreds of matching Sources (and hundreds of thousands of States to search through) this can freeze the kernel for a few seconds. Under some conditions (e.g. a DDoS attack hitting the IP address for which the Source entries will be removed), kernel will freeze permanently.
>How-To-Repeat:
`pfctl -K`
>Fix:
Create a list of State entries and attach it to Source. This way only this short list must be searched when Source is removed. List is maintained during State creation and removal.

Patch attached with submission follows:

--- sys/contrib/pf/net/pf.c.orig	2013-03-05 11:27:01.000000000 +0100
+++ sys/contrib/pf/net/pf.c	2013-03-08 14:14:39.000000000 +0100
@@ -1517,6 +1517,19 @@
 	u_int32_t timeout;
 
 	if (s->src_node != NULL) {
+
+		/* Remove this pf_state from the list of states linked to pf_src_node */
+		if (s->prev_state) /* not the first pf_state in list */
+			s->prev_state->next_state = s->next_state;
+		else /* the fist pf_state in the list, modify list head in pf_src_node */
+			s->src_node->linked_states = s->next_state;
+	
+		if (s->next_state) /* not the last pf_state in list */
+			s->next_state->prev_state = s->prev_state;
+
+		s->prev_state = NULL;
+		s->next_state = NULL;
+
 		if (s->src.tcp_est)
 			--s->src_node->conn;
 		if (--s->src_node->states <= 0) {
@@ -1532,6 +1545,19 @@
 		}
 	}
 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+
+		/* Remove this pf_state from the list of states linked to pf_src_node */
+		if (s->prev_state) /* not the first pf_state in list */
+			s->prev_state->next_state = s->next_state;
+		else /* the fist pf_state in the list, modify list head in pf_src_node */
+			s->nat_src_node->linked_states = s->next_state;
+	
+		if (s->next_state) /* not the last pf_state in list */
+			s->next_state->prev_state = s->prev_state;
+
+		s->prev_state = NULL;
+		s->next_state = NULL;
+
 		if (--s->nat_src_node->states <= 0) {
 			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
 			if (!timeout)
@@ -3895,12 +3921,24 @@
 	if (sn != NULL) {
 		s->src_node = sn;
 		s->src_node->states++;
+
+		/* attach this state to head of list */
+		s->next_state = sn->linked_states;
+		if (s->next_state)
+			s->next_state->prev_state = s;
+		sn->linked_states = s;
 	}
 	if (nsn != NULL) {
 		/* XXX We only modify one side for now. */
 		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
 		s->nat_src_node = nsn;
 		s->nat_src_node->states++;
+
+		/* attach this state to head of list */
+		s->next_state = nsn->linked_states;
+		if (s->next_state)
+			s->next_state->prev_state = s;
+		nsn->linked_states = s;
 	}
 	if (pd->proto == IPPROTO_TCP) {
 		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
--- sys/contrib/pf/net/pf_ioctl.c.orig	2013-03-05 11:26:44.000000000 +0100
+++ sys/contrib/pf/net/pf_ioctl.c	2013-03-08 14:10:08.000000000 +0100
@@ -3790,6 +3790,7 @@
 	case DIOCKILLSRCNODES: {
 		struct pf_src_node	*sn;
 		struct pf_state		*s;
+		struct pf_state		**pns; /* pointer to next_state of previous state */
 		struct pfioc_src_node_kill *psnk =
 		    (struct pfioc_src_node_kill *)addr;
 		u_int			killed = 0;
@@ -3808,20 +3809,17 @@
 				&psnk->psnk_dst.addr.v.a.mask,
 				&sn->raddr, sn->af)) {
 				/* Handle state to src_node linkage */
-				if (sn->states != 0) {
-					RB_FOREACH(s, pf_state_tree_id,
-#ifdef __FreeBSD__
-					    &V_tree_id) {
-#else
-					    &tree_id) {
-#endif
-						if (s->src_node == sn)
-							s->src_node = NULL;
-						if (s->nat_src_node == sn)
-							s->nat_src_node = NULL;
-					}
-					sn->states = 0;
+				s = NULL; /* make gcc happy */
+				pns = &sn->linked_states;
+				for (s = sn->linked_states; s != NULL; s = s->next_state) {
+					s->src_node = NULL;
+					s->nat_src_node = NULL;
+					*pns = NULL;
+					s->prev_state = NULL;
+					pns = &s->next_state;
 				}
+				*pns = NULL;
+				sn->states = 0;
 				sn->expire = 1;
 				killed++;
 			}
--- sys/contrib/pf/net/pfvar.h.orig	2013-03-05 11:27:14.000000000 +0100
+++ sys/contrib/pf/net/pfvar.h	2013-03-08 11:02:29.000000000 +0100
@@ -748,6 +748,7 @@
 	u_int32_t	 expire;
 	sa_family_t	 af;
 	u_int8_t	 ruletype;
+	struct pf_state  *linked_states;
 };
 
 #define PFSNODE_HIWAT		10000	/* default source node table size */
@@ -852,6 +853,8 @@
 	struct pfi_kif		*rt_kif;
 	struct pf_src_node	*src_node;
 	struct pf_src_node	*nat_src_node;
+	struct pf_state		*prev_state;
+	struct pf_state		*next_state;
 	u_int64_t		 packets[2];
 	u_int64_t		 bytes[2];
 	u_int32_t		 creation;


>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list