kern/176763: [pf] [patch] Removing pf Source entries locks kernel.

Kajetan Staszkiewicz vegeta at tuxpowered.net
Mon Nov 18 17:20:02 UTC 2013


The following reply was made to PR kern/176763; it has been noted by GNATS.

From: Kajetan Staszkiewicz <vegeta at tuxpowered.net>
To: bug-followup at freebsd.org
Cc:  
Subject: Re: kern/176763: [pf] [patch] Removing pf Source entries locks kernel.
Date: Mon, 18 Nov 2013 18:12:36 +0100

 --Boundary-00=_EqkiSNwqoUJOzB0
 Content-Type: Text/Plain;
   charset="us-ascii"
 Content-Transfer-Encoding: 7bit
 
 The attached patch is for FreeBSD 10. It adds a new parameter "-c" to pfctl 
 which when killing src_nodes, also kills states linked to the found nodes.
 
 -- 
 | pozdrawiam / greetings | powered by Debian, FreeBSD and CentOS |
 |  Kajetan Staszkiewicz  | jabber,email: vegeta()tuxpowered net  |
 |        Vegeta          | www: http://vegeta.tuxpowered.net     |
 `------------------------^---------------------------------------'
 
 --Boundary-00=_EqkiSNwqoUJOzB0
 Content-Type: text/x-patch;
   charset="UTF-8";
   name="link-states-to-src-nodes.patch"
 Content-Transfer-Encoding: 7bit
 Content-Disposition: attachment;
 	filename="link-states-to-src-nodes.patch"
 
 # Removing src_nodes causes the list of states to be fully searched through
 # to find ones linked to the given src_node. With large amount of src_nodes
 # and states (for example when under a DDoS attack) this operation can take
 # many seconds to complete.
 #
 # Provide a list of states linked to each src_node and use the list to make
 # the operation faster. Add new parameter "-c" to pfctl which, when
 # killing src_nodes, also kills states linked to found nodes.
 #
 # kajetan.staszkiewicz at innogames.de
 # Work sponsored by InnoGames GmbH
 #
 diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8
 index 5c0e7b3..61c5711 100644
 --- a/sbin/pfctl/pfctl.8
 +++ b/sbin/pfctl/pfctl.8
 @@ -42,7 +42,8 @@
  .Op Fl F Ar modifier
  .Op Fl f Ar file
  .Op Fl i Ar interface
 -.Op Fl K Ar host | network
 +.Oo Fl K Ar host | network
 +.Op Fl c Oc
  .Xo
  .Oo Fl k
  .Ar host | network | label | id
 @@ -189,6 +190,10 @@ as the anchor name:
  .Bd -literal -offset indent
  # pfctl -a '*' -sr
  .Ed
 +.It Fl c
 +When removing source tracking entries, remove state entries linked to
 +them. This option can be only used in conjunction with
 +.Fl K .
  .It Fl D Ar macro Ns = Ns Ar value
  Define
  .Ar macro
 diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c
 index 90a2bb5..6a2dd90 100644
 --- a/sbin/pfctl/pfctl.c
 +++ b/sbin/pfctl/pfctl.c
 @@ -236,7 +236,7 @@ usage(void)
  
  	fprintf(stderr, "usage: %s [-AdeghmNnOPqRrvz] ", __progname);
  	fprintf(stderr, "[-a anchor] [-D macro=value] [-F modifier]\n");
 -	fprintf(stderr, "\t[-f file] [-i interface] [-K host | network]\n");
 +	fprintf(stderr, "\t[-f file] [-i interface] [-K host | network [-c]]\n");
  	fprintf(stderr, "\t[-k host | network | label | id] ");
  	fprintf(stderr, "[-o level] [-p device]\n");
  	fprintf(stderr, "\t[-s modifier] ");
 @@ -449,10 +449,10 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
  	struct pfioc_src_node_kill psnk;
  	struct addrinfo *res[2], *resp[2];
  	struct sockaddr last_src, last_dst;
 -	int killed, sources, dests;
 +	int killed, killed_states, sources, dests;
  	int ret_ga;
  
 -	killed = sources = dests = 0;
 +	killed = killed_states = sources = dests = 0;
  
  	memset(&psnk, 0, sizeof(psnk));
  	memset(&psnk.psnk_src.addr.v.a.mask, 0xff,
 @@ -462,6 +462,8 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
  
  	pfctl_addrprefix(src_node_kill[0], &psnk.psnk_src.addr.v.a.mask);
  
 +	psnk.psnk_kill_linked_states = opts & PF_OPT_KILLLINKEDSTATES;
 +
  	if ((ret_ga = getaddrinfo(src_node_kill[0], NULL, NULL, &res[0]))) {
  		errx(1, "getaddrinfo: %s", gai_strerror(ret_ga));
  		/* NOTREACHED */
 @@ -529,20 +531,23 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
  				if (ioctl(dev, DIOCKILLSRCNODES, &psnk))
  					err(1, "DIOCKILLSRCNODES");
  				killed += psnk.psnk_killed;
 +				killed_states += psnk.psnk_killed_states;
  			}
  			freeaddrinfo(res[1]);
  		} else {
  			if (ioctl(dev, DIOCKILLSRCNODES, &psnk))
  				err(1, "DIOCKILLSRCNODES");
  			killed += psnk.psnk_killed;
 +			killed_states += psnk.psnk_killed_states;
  		}
  	}
  
  	freeaddrinfo(res[0]);
  
  	if ((opts & PF_OPT_QUIET) == 0)
 -		fprintf(stderr, "killed %d src nodes from %d sources and %d "
 -		    "destinations\n", killed, sources, dests);
 +		fprintf(stderr, "killed %d src nodes and %d linked states "
 +		    "from %d sources and %d destinations\n",
 +		    killed, killed_states, sources, dests);
  	return (0);
  }
  
 @@ -2002,11 +2007,14 @@ main(int argc, char *argv[])
  		usage();
  
  	while ((ch = getopt(argc, argv,
 -	    "a:AdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) {
 +	    "a:AcdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) {
  		switch (ch) {
  		case 'a':
  			anchoropt = optarg;
  			break;
 +		case 'c':
 +			opts |= PF_OPT_KILLLINKEDSTATES;
 +			break;
  		case 'd':
  			opts |= PF_OPT_DISABLE;
  			mode = O_RDWR;
 diff --git a/sbin/pfctl/pfctl_parser.h b/sbin/pfctl/pfctl_parser.h
 index 4560d66..b272b0b 100644
 --- a/sbin/pfctl/pfctl_parser.h
 +++ b/sbin/pfctl/pfctl_parser.h
 @@ -51,6 +51,7 @@
  #define PF_OPT_NUMERIC		0x1000
  #define PF_OPT_MERGE		0x2000
  #define PF_OPT_RECURSE		0x4000
 +#define PF_OPT_KILLLINKEDSTATES	0x8000
  
  #define PF_TH_ALL		0xFF
  
 diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
 index c16591b..e5395e3 100644
 --- a/sys/net/pfvar.h
 +++ b/sys/net/pfvar.h
 @@ -697,6 +697,7 @@ struct pf_threshold {
  
  struct pf_src_node {
  	LIST_ENTRY(pf_src_node) entry;
 +	TAILQ_HEAD(, pf_state)	state_list;
  	struct pf_addr	 addr;
  	struct pf_addr	 raddr;
  	union pf_rule_ptr rule;
 @@ -787,6 +788,7 @@ struct pf_state {
  	TAILQ_ENTRY(pf_state)	 sync_list;
  	TAILQ_ENTRY(pf_state)	 key_list[2];
  	LIST_ENTRY(pf_state)	 entry;
 +	TAILQ_ENTRY(pf_state)	 srcnode_link;
  	struct pf_state_peer	 src;
  	struct pf_state_peer	 dst;
  	union pf_rule_ptr	 rule;
 @@ -1445,6 +1447,8 @@ struct pfioc_src_node_kill {
  	struct pf_rule_addr psnk_src;
  	struct pf_rule_addr psnk_dst;
  	u_int		    psnk_killed;
 +	u_int		    psnk_killed_states;
 +	u_int		    psnk_kill_linked_states;
  };
  
  struct pfioc_state_kill {
 diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
 index 2de8c40..9da73c5 100644
 --- a/sys/netpfil/pf/pf.c
 +++ b/sys/netpfil/pf/pf.c
 @@ -652,6 +652,8 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
  		    rule->max_src_conn_rate.limit,
  		    rule->max_src_conn_rate.seconds);
  
 +		TAILQ_INIT(&(*sn)->state_list);
 +
  		(*sn)->af = af;
  		(*sn)->rule.ptr = rule;
  		PF_ACPY(&(*sn)->addr, src, af);
 @@ -1482,6 +1484,7 @@ static void
  pf_src_tree_remove_state(struct pf_state *s)
  {
  	u_int32_t timeout;
 +	struct pf_srchash *sh = NULL;
  
  	if (s->src_node != NULL) {
  		if (s->src.tcp_est)
 @@ -1493,6 +1496,12 @@ pf_src_tree_remove_state(struct pf_state *s)
  				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
  			s->src_node->expire = time_uptime + timeout;
  		}
 +		sh = &V_pf_srchash[pf_hashsrc(&s->src_node->addr, s->src_node->af)];
 +		PF_HASHROW_LOCK(sh);
 +		if (!TAILQ_EMPTY(&s->src_node->state_list))
 +			TAILQ_REMOVE(&s->src_node->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
 +
  	}
  	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
  		if (--s->nat_src_node->states <= 0) {
 @@ -1502,6 +1511,11 @@ pf_src_tree_remove_state(struct pf_state *s)
  				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
  			s->nat_src_node->expire = time_uptime + timeout;
  		}
 +		sh = &V_pf_srchash[pf_hashsrc(&s->nat_src_node->addr, s->nat_src_node->af)];
 +		PF_HASHROW_LOCK(sh);
 +		if (!TAILQ_EMPTY(&s->nat_src_node->state_list))
 +			TAILQ_REMOVE(&s->nat_src_node->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
  	}
  	s->src_node = s->nat_src_node = NULL;
  }
 @@ -3407,6 +3421,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
      int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
  {
  	struct pf_state		*s = NULL;
 +	struct pf_srchash	*sh = NULL;
  	struct pf_src_node	*sn = NULL;
  	struct tcphdr		*th = pd->hdr.tcp;
  	u_int16_t		 mss = V_tcp_mssdflt;
 @@ -3505,14 +3520,22 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
  	s->expire = time_uptime;
  
  	if (sn != NULL) {
 +		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 +		PF_HASHROW_LOCK(sh);
  		s->src_node = sn;
  		s->src_node->states++;
 +		TAILQ_INSERT_HEAD(&sn->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
  	}
  	if (nsn != NULL) {
  		/* XXX We only modify one side for now. */
 +		sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
 +		PF_HASHROW_LOCK(sh);
  		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
  		s->nat_src_node = nsn;
  		s->nat_src_node->states++;
 +		TAILQ_INSERT_HEAD(&nsn->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
  	}
  	if (pd->proto == IPPROTO_TCP) {
  		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
 diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
 index 2b0f2cd..0267ef0 100644
 --- a/sys/netpfil/pf/pf_ioctl.c
 +++ b/sys/netpfil/pf/pf_ioctl.c
 @@ -150,7 +150,8 @@ struct cdev *pf_dev;
   */
  static void		 pf_clear_states(void);
  static int		 pf_clear_tables(void);
 -static void		 pf_clear_srcnodes(struct pf_src_node *);
 +static u_int32_t	 pf_clear_srcnodes(struct pf_src_node *,
 +    int kill_states);
  static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
  
  /*
 @@ -3134,7 +3135,7 @@ DIOCCHANGEADDR_error:
  
  	case DIOCCLRSRCNODES: {
  
 -		pf_clear_srcnodes(NULL);
 +		pf_clear_srcnodes(NULL, 0);
  		pf_purge_expired_src_nodes();
  		V_pf_status.src_nodes = 0;
  		break;
 @@ -3145,7 +3146,7 @@ DIOCCHANGEADDR_error:
  		    (struct pfioc_src_node_kill *)addr;
  		struct pf_srchash	*sh;
  		struct pf_src_node	*sn;
 -		u_int			i, killed = 0;
 +		u_int			i, killed = 0, killed_states = 0;
  
  		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
  		    i++, sh++) {
 @@ -3166,7 +3167,7 @@ DIOCCHANGEADDR_error:
  				&sn->raddr, sn->af)) {
  				/* Handle state to src_node linkage */
  				if (sn->states != 0)
 -					pf_clear_srcnodes(sn);
 +					killed_states += pf_clear_srcnodes(sn,  psnk->psnk_kill_linked_states);
  				sn->expire = 1;
  				killed++;
  			}
 @@ -3177,6 +3178,7 @@ DIOCCHANGEADDR_error:
  			pf_purge_expired_src_nodes();
  
  		psnk->psnk_killed = killed;
 +		psnk->psnk_killed_states = killed_states;
  		break;
  	}
  
 @@ -3360,24 +3362,12 @@ pf_clear_tables(void)
  	return (error);
  }
  
 -static void
 -pf_clear_srcnodes(struct pf_src_node *n)
 +static u_int32_t
 +pf_clear_srcnodes(struct pf_src_node *n, int kill_states)
  {
  	struct pf_state *s;
  	int i;
 -
 -	for (i = 0; i <= V_pf_hashmask; i++) {
 -		struct pf_idhash *ih = &V_pf_idhash[i];
 -
 -		PF_HASHROW_LOCK(ih);
 -		LIST_FOREACH(s, &ih->states, entry) {
 -			if (n == NULL || n == s->src_node)
 -				s->src_node = NULL;
 -			if (n == NULL || n == s->nat_src_node)
 -				s->nat_src_node = NULL;
 -		}
 -		PF_HASHROW_UNLOCK(ih);
 -	}
 +	int killed_states = 0;
  
  	if (n == NULL) {
  		struct pf_srchash *sh;
 @@ -3386,6 +3376,19 @@ pf_clear_srcnodes(struct pf_src_node *n)
  		    i++, sh++) {
  			PF_HASHROW_LOCK(sh);
  			LIST_FOREACH(n, &sh->nodes, entry) {
 +				while (!TAILQ_EMPTY(&n->state_list)) {
 +					s = TAILQ_FIRST(&n->state_list);
 +					if (kill_states) {
 +						pf_unlink_state(s, 0);
 +						killed_states++;
 +					} else {
 +						PF_STATE_LOCK(s);
 +						TAILQ_REMOVE(&n->state_list, s, srcnode_link);
 +						s->src_node = NULL;
 +						s->nat_src_node = NULL;
 +						PF_STATE_UNLOCK(s);
 +					}
 +				}
  				n->expire = 1;
  				n->states = 0;
  			}
 @@ -3393,9 +3396,24 @@ pf_clear_srcnodes(struct pf_src_node *n)
  		}
  	} else {
  		/* XXX: hash slot should already be locked here. */
 +		while (!TAILQ_EMPTY(&n->state_list)) {
 +			s = TAILQ_FIRST(&n->state_list);
 +			if (kill_states) {
 +				pf_unlink_state(s, 0);
 +				killed_states++;
 +			} else {
 +				PF_STATE_LOCK(s);
 +				TAILQ_REMOVE(&n->state_list, s, srcnode_link);
 +				s->src_node = NULL;
 +				s->nat_src_node = NULL;
 +				PF_STATE_UNLOCK(s);
 +			}
 +		}
  		n->expire = 1;
  		n->states = 0;
  	}
 +
 +	return killed_states;
  }
  /*
   * XXX - Check for version missmatch!!!
 @@ -3459,7 +3477,7 @@ shutdown_pf(void)
  
  		pf_clear_states();
  
 -		pf_clear_srcnodes(NULL);
 +		pf_clear_srcnodes(NULL, 0);
  
  		/* status does not use malloced mem so no need to cleanup */
  		/* fingerprints and interfaces have thier own cleanup code */
 
 --Boundary-00=_EqkiSNwqoUJOzB0--


More information about the freebsd-pf mailing list