svn commit: r200855 - in head/sys: net netgraph netinet netinet/ipfw

Luigi Rizzo luigi at FreeBSD.org
Tue Dec 22 19:01:48 UTC 2009


Author: luigi
Date: Tue Dec 22 19:01:47 2009
New Revision: 200855
URL: http://svn.freebsd.org/changeset/base/200855

Log:
  merge code from ipfw3-head to reduce contention on the ipfw lock
  and remove all O(N) sequences from kernel critical sections in ipfw.
  
  In detail:
  
   1. introduce a IPFW_UH_LOCK to arbitrate requests from
       the upper half of the kernel. Some things, such as 'ipfw show',
       can be done holding this lock in read mode, whereas insert and
       delete require IPFW_UH_WLOCK.
  
    2. introduce a mapping structure to keep rules together. This replaces
       the 'next' chain currently used in ipfw rules. At the moment
       the map is a simple array (sorted by rule number and then rule_id),
       so we can find a rule quickly instead of having to scan the list.
       This reduces many expensive lookups from O(N) to O(log N).
  
    3. when an expensive operation (such as insert or delete) is done
       by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
       without blocking the bottom half of the kernel, then acquire
       IPFW_WLOCK and quickly update pointers to the map and related info.
       After dropping IPFW_LOCK we can then continue the cleanup protected
       by IPFW_UH_LOCK. So userland still costs O(N) but the kernel side
       is only blocked for O(1).
  
    4. do not pass pointers to rules through dummynet, netgraph, divert etc,
       but rather pass a <slot, chain_id, rulenum, rule_id> tuple.
       We validate the slot index (in the array of #2) with chain_id,
       and if successful do a O(1) dereference; otherwise, we can find
       the rule in O(log N) through <rulenum, rule_id>
  
  All the above does not change the userland/kernel ABI, though there
  are some disgusting casts between pointers and uint32_t
  
  Operation costs now are as follows:
  
    Function				Old	Now	  Planned
  -------------------------------------------------------------------
    + skipto X, non cached		O(N)	O(log N)
    + skipto X, cached			O(1)	O(1)
  XXX dynamic rule lookup			O(1)	O(log N)  O(1)
    + skipto tablearg			O(N)	O(1)
    + reinject, non cached		O(N)	O(log N)
    + reinject, cached			O(1)	O(1)
    + kernel blocked during setsockopt()	O(N)	O(1)
  -------------------------------------------------------------------
  
  The only (very small) regression is on dynamic rule lookup and this will
  be fixed in a day or two, without changing the userland/kernel ABI
  
  Supported by: Valeria Paoli
  MFC after:	1 month

Modified:
  head/sys/net/if_bridge.c
  head/sys/net/if_ethersubr.c
  head/sys/netgraph/ng_ipfw.c
  head/sys/netgraph/ng_ipfw.h
  head/sys/netinet/ip_dummynet.h
  head/sys/netinet/ip_fw.h
  head/sys/netinet/ipfw/ip_dummynet.c
  head/sys/netinet/ipfw/ip_fw2.c
  head/sys/netinet/ipfw/ip_fw_log.c
  head/sys/netinet/ipfw/ip_fw_pfil.c
  head/sys/netinet/ipfw/ip_fw_private.h
  head/sys/netinet/ipfw/ip_fw_sockopt.c

Modified: head/sys/net/if_bridge.c
==============================================================================
--- head/sys/net/if_bridge.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/net/if_bridge.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -3039,20 +3039,23 @@ bridge_pfil(struct mbuf **mp, struct ifn
 			goto bad;
 	}
 
+	/* XXX this section is also in if_ethersubr.c */
 	if (V_ip_fw_chk_ptr && pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) {
 		struct dn_pkt_tag *dn_tag;
 
 		error = -1;
 		dn_tag = ip_dn_claim_tag(*mp);
-		if (dn_tag != NULL) {
-			if (dn_tag->rule != NULL && V_fw_one_pass)
+		if (dn_tag == NULL) {
+			args.slot = 0;
+		} else {
+			if (dn_tag->slot != 0 && V_fw_one_pass)
 				/* packet already partially processed */
 				goto ipfwpass;
-			args.rule = dn_tag->rule; /* matching rule to restart */
-			args.rule_id = dn_tag->rule_id;
+			args.slot = dn_tag->slot; /* next rule to use */
 			args.chain_id = dn_tag->chain_id;
-		} else
-			args.rule = NULL;
+			args.rulenum = dn_tag->rulenum;
+			args.rule_id = dn_tag->rule_id;
+		}
 
 		args.m = *mp;
 		args.oif = ifp;

Modified: head/sys/net/if_ethersubr.c
==============================================================================
--- head/sys/net/if_ethersubr.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/net/if_ethersubr.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -471,15 +471,17 @@ ether_ipfw_chk(struct mbuf **m0, struct 
 
 	dn_tag = ip_dn_claim_tag(*m0);
 
-	if (dn_tag != NULL) {
-		if (dn_tag->rule != NULL && V_fw_one_pass)
+	if (dn_tag == NULL) {
+		args.slot = 0;
+	} else {
+		if (dn_tag->slot != 0 && V_fw_one_pass)
 			/* dummynet packet, already partially processed */
 			return (1);
-		args.rule = dn_tag->rule;	/* matching rule to restart */
+		args.slot = dn_tag->slot;	/* matching rule to restart */
+		args.rulenum = dn_tag->rulenum;
 		args.rule_id = dn_tag->rule_id;
 		args.chain_id = dn_tag->chain_id;
-	} else
-		args.rule = NULL;
+	}
 
 	/*
 	 * I need some amt of data to be contiguous, and in case others need

Modified: head/sys/netgraph/ng_ipfw.c
==============================================================================
--- head/sys/netgraph/ng_ipfw.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netgraph/ng_ipfw.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -293,7 +293,8 @@ ng_ipfw_input(struct mbuf **m0, int dir,
 			m_freem(m);
 			return (ENOMEM);
 		}
-		ngit->rule = fwa->rule;
+		ngit->slot = fwa->slot;
+		ngit->rulenum = fwa->rulenum;
 		ngit->rule_id = fwa->rule_id;
 		ngit->chain_id = fwa->chain_id;
 		ngit->dir = dir;

Modified: head/sys/netgraph/ng_ipfw.h
==============================================================================
--- head/sys/netgraph/ng_ipfw.h	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netgraph/ng_ipfw.h	Tue Dec 22 19:01:47 2009	(r200855)
@@ -37,7 +37,8 @@ extern	ng_ipfw_input_t	*ng_ipfw_input_p;
 
 struct ng_ipfw_tag {
 	struct m_tag	mt;		/* tag header */
-	struct ip_fw	*rule;		/* matching rule */
+	uint32_t	slot;		/* slot for next rule */
+	uint32_t	rulenum;	/* matching rule number */
 	uint32_t	rule_id;	/* matching rule id */
 	uint32_t	chain_id;	/* ruleset id */
 	struct ifnet	*ifp;		/* interface, for ip_output */

Modified: head/sys/netinet/ip_dummynet.h
==============================================================================
--- head/sys/netinet/ip_dummynet.h	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ip_dummynet.h	Tue Dec 22 19:01:47 2009	(r200855)
@@ -112,7 +112,8 @@ struct dn_heap {
  * processing requirements.
  */
 struct dn_pkt_tag {
-    struct ip_fw *rule;		/* matching rule */
+    uint32_t slot;		/* slot of next rule to use */
+    uint32_t rulenum;		/* matching rule number */
     uint32_t rule_id;		/* matching rule id */
     uint32_t chain_id;		/* ruleset id */
     int dn_dir;			/* action when packet comes out. */

Modified: head/sys/netinet/ip_fw.h
==============================================================================
--- head/sys/netinet/ip_fw.h	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ip_fw.h	Tue Dec 22 19:01:47 2009	(r200855)
@@ -461,7 +461,7 @@ typedef struct _ipfw_insn_icmp6 {
  */
 
 struct ip_fw {
-	struct ip_fw	*next;		/* linked list of rules		*/
+	struct ip_fw	*x_next;	/* linked list of rules		*/
 	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
 	/* 'next_rule' is used to pass up 'set_disable' status		*/
 

Modified: head/sys/netinet/ipfw/ip_dummynet.c
==============================================================================
--- head/sys/netinet/ipfw/ip_dummynet.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ipfw/ip_dummynet.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -1367,20 +1367,11 @@ dummynet_io(struct mbuf **m0, int dir, s
 	struct dn_pipe *pipe;
 	uint64_t len = m->m_pkthdr.len;
 	struct dn_flow_queue *q = NULL;
-	int is_pipe;
-	ipfw_insn *cmd = ACTION_PTR(fwa->rule);
+	int is_pipe = fwa->cookie & 0x8000000 ? 0 : 1;
 
 	KASSERT(m->m_nextpkt == NULL,
 	    ("dummynet_io: mbuf queue passed to dummynet"));
 
-	if (cmd->opcode == O_LOG)
-		cmd += F_LEN(cmd);
-	if (cmd->opcode == O_ALTQ)
-		cmd += F_LEN(cmd);
-	if (cmd->opcode == O_TAG)
-		cmd += F_LEN(cmd);
-	is_pipe = (cmd->opcode == O_PIPE);
-
 	DUMMYNET_LOCK();
 	io_pkt++;
 	/*
@@ -1390,11 +1381,11 @@ dummynet_io(struct mbuf **m0, int dir, s
 	 * below can be simplified.
 	 */
 	if (is_pipe) {
-		pipe = locate_pipe(fwa->cookie);
+		pipe = locate_pipe(fwa->cookie & 0xffff);
 		if (pipe != NULL)
 			fs = &(pipe->fs);
 	} else
-		fs = locate_flowset(fwa->cookie);
+		fs = locate_flowset(fwa->cookie & 0xffff);
 
 	if (fs == NULL)
 		goto dropit;	/* This queue/pipe does not exist! */
@@ -1440,7 +1431,8 @@ dummynet_io(struct mbuf **m0, int dir, s
 	 * Ok, i can handle the pkt now...
 	 * Build and enqueue packet + parameters.
 	 */
-	pkt->rule = fwa->rule;
+	pkt->slot = fwa->slot;
+	pkt->rulenum = fwa->rulenum;
 	pkt->rule_id = fwa->rule_id;
 	pkt->chain_id = fwa->chain_id;
 	pkt->dn_dir = dir;

Modified: head/sys/netinet/ipfw/ip_fw2.c
==============================================================================
--- head/sys/netinet/ipfw/ip_fw2.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ipfw/ip_fw2.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -628,31 +628,6 @@ send_reject(struct ip_fw_args *args, int
 	args->m = NULL;
 }
 
-/**
- * Return the pointer to the skipto target.
- *
- * IMPORTANT: this should only be called on SKIPTO rules, and the
- * jump target is taken from the 'rulenum' argument, which may come
- * from the rule itself (direct skipto) or not (tablearg)
- *
- * The function never returns NULL: if the requested rule is not
- * present, it returns the next rule in the chain.
- * This also happens in case of a bogus argument > 65535
- */
-static struct ip_fw *
-lookup_next_rule(struct ip_fw *me, uint32_t rulenum)
-{
-	struct ip_fw *rule;
-
-	for (rule = me->next; rule ; rule = rule->next) {
-		if (rule->rulenum >= rulenum)
-			break;
-	}
-	if (rule == NULL)	/* failure or not a skipto */
-		rule = me->next ? me->next : me;
-	return rule;
-}
-
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
@@ -741,11 +716,13 @@ check_uidgid(ipfw_insn_u32 *insn, int pr
  * Helper function to write the matching rule into args
  */
 static inline void
-set_match(struct ip_fw_args *args, struct ip_fw *f, struct ip_fw_chain *chain)
+set_match(struct ip_fw_args *args, int slot,
+	struct ip_fw_chain *chain)
 {
-	args->rule = f;
-	args->rule_id = f->id;
 	args->chain_id = chain->id;
+	args->slot = slot + 1; /* we use 0 as a marker */
+	args->rule_id = chain->map[slot]->id;
+	args->rulenum = chain->map[slot]->rulenum;
 }
 
 /*
@@ -839,7 +816,7 @@ ipfw_chk(struct ip_fw_args *args)
 	 */
 	struct ifnet *oif = args->oif;
 
-	struct ip_fw *f = NULL;		/* matching rule */
+	int f_pos = 0;		/* index of current rule in the array */
 	int retval = 0;
 
 	/*
@@ -1168,31 +1145,21 @@ do {								\
 		return (IP_FW_PASS);	/* accept */
 	}
 	mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
-	if (args->rule) {
+	if (args->slot) {
 		/*
 		 * Packet has already been tagged as a result of a previous
 		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
 		 * REASS, NETGRAPH and similar, never a skipto).
-		 * Validate the pointer and continue from args->rule->next
-		 * if still present, otherwise use the default rule.
-		 * XXX If fw_one_pass != 0 then just accept it, though
-		 * the caller should never pass us such packets.
+		 * Validate the slot and continue from the next one
+		 * if still present, otherwise do a lookup.
 		 */
 		if (V_fw_one_pass) {
 			IPFW_RUNLOCK(chain);
 			return (IP_FW_PASS);
 		}
-		if (chain->id == args->chain_id) { /* pointer still valid */
-			f = args->rule->next;
-		} else { /* must revalidate the pointer */
-			for (f = chain->rules; f != NULL; f = f->next)
-				if (f == args->rule && f->id == args->rule_id) {
-					f = args->rule->next;
-					break;
-				}
-		}
-		if (f == NULL) /* in case of errors, use default; */
-			f = chain->default_rule;
+		f_pos = (args->chain_id == chain->id) ?
+		    args->slot /* already incremented */ :
+		    ipfw_find_rule(chain, args->rulenum, args->rule_id+1);
 	} else {
 		/*
 		 * Find the starting rule. It can be either the first
@@ -1200,18 +1167,13 @@ do {								\
 		 */
 		int skipto = mtag ? divert_cookie(mtag) : 0;
 
-		f = chain->rules;
+		f_pos = 0;
 		if (args->eh == NULL && skipto != 0) {
 			if (skipto >= IPFW_DEFAULT_RULE) {
 				IPFW_RUNLOCK(chain);
 				return (IP_FW_DENY); /* invalid */
 			}
-			while (f && f->rulenum <= skipto)
-				f = f->next;
-			if (f == NULL) {	/* drop packet */
-				IPFW_RUNLOCK(chain);
-				return (IP_FW_DENY);
-			}
+			f_pos = ipfw_find_rule(chain, skipto, 0);
 		}
 	}
 	/* reset divert rule to avoid confusion later */
@@ -1227,7 +1189,7 @@ do {								\
 	 * need to break out of one or both loops, or re-enter one of
 	 * the loops with updated variables. Loop variables are:
 	 *
-	 *	f (outer loop) points to the current rule.
+	 *	f_pos (outer loop) points to the current rule.
 	 *		On output it points to the matching rule.
 	 *	done (outer loop) is used as a flag to break the loop.
 	 *	l (inner loop)	residual length of current rule.
@@ -1236,15 +1198,16 @@ do {								\
 	 * We break the inner loop by setting l=0 and possibly
 	 * cmdlen=0 if we don't want to advance cmd.
 	 * We break the outer loop by setting done=1
-	 * We can restart the inner loop by setting l>0 and f, cmd
+	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
 	 * as needed.
 	 */
-	for (; f; f = f->next) {
+	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
+		struct ip_fw *f;
 
-/* again: */
+		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
@@ -1864,8 +1827,8 @@ do {								\
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
-			 *   (setting l=0), or to the SKIPTO target (by
-			 *   setting f, cmd and l as needed), respectively.
+			 *   (setting l=0), or to the SKIPTO target (setting
+			 *   f/f_len, cmd and l as needed), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
@@ -1926,7 +1889,14 @@ do {								\
 					 */
 					q->pcnt++;
 					q->bcnt += pktlen;
+					/* XXX we would like to have f_pos
+					 * readily accessible in the dynamic
+				         * rule, instead of having to
+					 * lookup q->rule.
+					 */
 					f = q->rule;
+					f_pos = ipfw_find_rule(chain,
+						f->rulenum, f->id);
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					ipfw_dyn_unlock();
@@ -1952,9 +1922,11 @@ do {								\
 
 			case O_PIPE:
 			case O_QUEUE:
-				set_match(args, f, chain);
+				set_match(args, f_pos, chain);
 				args->cookie = (cmd->arg1 == IP_FW_TABLEARG) ?
 					tablearg : cmd->arg1;
+				if (cmd->opcode == O_QUEUE)
+					args->cookie |= 0x80000000;
 				retval = IP_FW_DUMMYNET;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
@@ -1987,38 +1959,53 @@ do {								\
 				break;
 
 			case O_COUNT:
-			case O_SKIPTO:
 				f->pcnt++;	/* update stats */
 				f->bcnt += pktlen;
 				f->timestamp = time_uptime;
-				if (cmd->opcode == O_COUNT) {
 					l = 0;		/* exit inner loop */
 					break;
-				}
-				/* skipto: */
-				if (cmd->arg1 == IP_FW_TABLEARG) {
-				    f = lookup_next_rule(f, tablearg);
-				} else { /* direct skipto */
-				    /* update f->next_rule if not set */
-				    if (f->next_rule == NULL)
+
+			case O_SKIPTO:
+			    f->pcnt++;	/* update stats */
+			    f->bcnt += pktlen;
+			    f->timestamp = time_uptime;
+			    /* If possible use cached f_pos (in f->next_rule),
+			     * whose version is written in f->next_rule
+			     * (horrible hacks to avoid changing the ABI).
+			     */
+			    if (cmd->arg1 != IP_FW_TABLEARG &&
+				    (uint32_t)f->x_next == chain->id) {
+				f_pos = (uint32_t)f->next_rule;
+			    } else {
+				int i = (cmd->arg1 == IP_FW_TABLEARG) ?
+					tablearg : cmd->arg1;
+				/* make sure we do not jump backward */
+				if (i <= f->rulenum)
+				    i = f->rulenum + 1;
+				f_pos = ipfw_find_rule(chain, i, 0);
+				/* update the cache */
+				if (cmd->arg1 != IP_FW_TABLEARG) {
 					f->next_rule =
-					    lookup_next_rule(f, cmd->arg1);
-				    f = f->next_rule;
+					(void *)(uintptr_t)f_pos;
+				    f->x_next =
+					(void *)(uintptr_t)chain->id;
+				}
 				}
 				/*
-				 * Skip disabled rules, and
-				 * re-enter the inner loop
-				 * with the correct f, l and cmd.
+			     * Skip disabled rules, and re-enter
+			     * the inner loop with the correct
+			     * f_pos, f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
-				while (f && (V_set_disable & (1 << f->set)))
-					f = f->next;
-				if (f) { /* found a valid rule */
+			    for (; f_pos < chain->n_rules - 1 &&
+				    (V_set_disable &
+				     (1 << chain->map[f_pos]->set));
+				    f_pos++)
+				;
+			    /* prepare to enter the inner loop */
+			    f = chain->map[f_pos];
 					l = f->cmd_len;
 					cmd = f->cmd;
-				} else { /* should not happen */
-					l = 0;  /* exit inner loop */
-				}
 				match = 1;
 				cmdlen = 0;
 				skip_or = 0;
@@ -2083,7 +2070,7 @@ do {								\
 
 			case O_NETGRAPH:
 			case O_NGTEE:
-				set_match(args, f, chain);
+				set_match(args, f_pos, chain);
 				args->cookie = (cmd->arg1 == IP_FW_TABLEARG) ?
 					tablearg : cmd->arg1;
 				retval = (cmd->opcode == O_NETGRAPH) ?
@@ -2108,7 +2095,7 @@ do {								\
 				    struct cfg_nat *t;
 				    int nat_id;
 
-				    set_match(args, f, chain);
+				    set_match(args, f_pos, chain);
 				    t = ((ipfw_insn_nat *)cmd)->nat;
 				    if (t == NULL) {
 					nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
@@ -2175,7 +2162,7 @@ do {								\
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
-				    set_match(args, f, chain);
+				    set_match(args, f_pos, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
@@ -2209,10 +2196,11 @@ do {								\
 	}		/* end of outer for, scan rules */
 
 	if (done) {
+		struct ip_fw *rule = chain->map[f_pos];
 		/* Update statistics */
-		f->pcnt++;
-		f->bcnt += pktlen;
-		f->timestamp = time_uptime;
+		rule->pcnt++;
+		rule->bcnt += pktlen;
+		rule->timestamp = time_uptime;
 	} else {
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
@@ -2308,7 +2296,7 @@ static int
 vnet_ipfw_init(const void *unused)
 {
 	int error;
-	struct ip_fw default_rule;
+	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
@@ -2322,38 +2310,39 @@ vnet_ipfw_init(const void *unused)
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
+#ifdef IPFIREWALL_NAT
+	LIST_INIT(&chain->nat);
+#endif
 
+	/* insert the default rule and create the initial map */
+	chain->n_rules = 1;
+	chain->static_len = sizeof(struct ip_fw);
+	chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO);
+	if (chain->map)
+		rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO);
+	if (rule == NULL) {
+		if (chain->map)
+			free(chain->map, M_IPFW);
+		printf("ipfw2: ENOSPC initializing default rule "
+			"(support disabled)\n");
+		return (ENOSPC);
+	}
 	error = ipfw_init_tables(chain);
 	if (error) {
 		panic("init_tables"); /* XXX Marko fix this ! */
 	}
-#ifdef IPFIREWALL_NAT
-	LIST_INIT(&chain->nat);
-#endif
 
+	/* fill and insert the default rule */
+	rule->act_ofs = 0;
+	rule->rulenum = IPFW_DEFAULT_RULE;
+	rule->cmd_len = 1;
+	rule->set = RESVD_SET;
+	rule->cmd[0].len = 1;
+	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
+	chain->rules = chain->default_rule = chain->map[0] = rule;
+	chain->id = rule->id = 1;
 
-	chain->rules = NULL;
 	IPFW_LOCK_INIT(chain);
-
-	bzero(&default_rule, sizeof default_rule);
-	default_rule.act_ofs = 0;
-	default_rule.rulenum = IPFW_DEFAULT_RULE;
-	default_rule.cmd_len = 1;
-	default_rule.set = RESVD_SET;
-	default_rule.cmd[0].len = 1;
-	default_rule.cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
-	error = ipfw_add_rule(chain, &default_rule);
-
-	if (error != 0) {
-		printf("ipfw2: error %u initializing default rule "
-			"(support disabled)\n", error);
-		IPFW_LOCK_DESTROY(chain);
-		printf("leaving ipfw_iattach (1) with error %d\n", error);
-		return (error);
-	}
-
-	chain->default_rule = chain->rules;
-
 	ipfw_dyn_init();
 
 	/* First set up some values that are compile time options */
@@ -2385,8 +2374,9 @@ vnet_ipfw_init(const void *unused)
 static int
 vnet_ipfw_uninit(const void *unused)
 {
-	struct ip_fw *reap;
+	struct ip_fw *reap, *rule;
 	struct ip_fw_chain *chain = &V_layer3_chain;
+	int i;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
@@ -2400,19 +2390,26 @@ vnet_ipfw_uninit(const void *unused)
 #endif
 	V_ip_fw_chk_ptr = NULL;
 	V_ip_fw_ctl_ptr = NULL;
+	IPFW_UH_WLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+	IPFW_UH_WLOCK(chain);
 
 	IPFW_WLOCK(chain);
-	/* We wait on the wlock here until the last user leaves */
 	IPFW_WUNLOCK(chain);
 	IPFW_WLOCK(chain);
 
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
 	ipfw_flush_tables(chain);
-	chain->reap = NULL;
-	ipfw_free_chain(chain, 1 /* kill default rule */);
-	reap = chain->reap;
-	chain->reap = NULL;
+	reap = NULL;
+	for (i = 0; i < chain->n_rules; i++) {
+		rule = chain->map[i];
+		rule->x_next = reap;
+		reap = rule;
+	}
+	if (chain->map)
+		free(chain->map, M_IPFW);
 	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
 	IPFW_LOCK_DESTROY(chain);

Modified: head/sys/netinet/ipfw/ip_fw_log.c
==============================================================================
--- head/sys/netinet/ipfw/ip_fw_log.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ipfw/ip_fw_log.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -26,9 +26,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#define        DEB(x)
-#define        DDB(x) x
-
 /*
  * Logging support for ipfw
  */

Modified: head/sys/netinet/ipfw/ip_fw_pfil.c
==============================================================================
--- head/sys/netinet/ipfw/ip_fw_pfil.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ipfw/ip_fw_pfil.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -125,7 +125,8 @@ ipfw_check_in(void *arg, struct mbuf **m
 	if (ng_tag != NULL) {
 		KASSERT(ng_tag->dir == NG_IPFW_IN,
 		    ("ng_ipfw tag with wrong direction"));
-		args.rule = ng_tag->rule;
+		args.slot = ng_tag->slot;
+		args.rulenum = ng_tag->rulenum;
 		args.rule_id = ng_tag->rule_id;
 		args.chain_id = ng_tag->chain_id;
 		m_tag_delete(*m0, (struct m_tag *)ng_tag);
@@ -137,10 +138,10 @@ again:
 		struct dn_pkt_tag *dt;
 
 		dt = (struct dn_pkt_tag *)(dn_tag+1);
-		args.rule = dt->rule;
+		args.slot = dt->slot;
+		args.rulenum = dt->rulenum;
 		args.rule_id = dt->rule_id;
 		args.chain_id = dt->chain_id;
-
 		m_tag_delete(*m0, dn_tag);
 	}
 
@@ -148,7 +149,7 @@ again:
 	args.inp = inp;
 	tee = 0;
 
-	if (V_fw_one_pass == 0 || args.rule == NULL) {
+	if (V_fw_one_pass == 0 || args.slot == 0) {
 		ipfw = ipfw_chk(&args);
 		*m0 = args.m;
 	} else
@@ -201,7 +202,7 @@ again:
 			*m0 = NULL;
 			return 0;	/* packet consumed */
 		} else {
-			args.rule = NULL;
+			args.slot = 0;
 			goto again;	/* continue with packet */
 		}
 
@@ -258,7 +259,8 @@ ipfw_check_out(void *arg, struct mbuf **
 	if (ng_tag != NULL) {
 		KASSERT(ng_tag->dir == NG_IPFW_OUT,
 		    ("ng_ipfw tag with wrong direction"));
-		args.rule = ng_tag->rule;
+		args.slot = ng_tag->slot;
+		args.rulenum = ng_tag->rulenum;
 		args.rule_id = ng_tag->rule_id;
 		args.chain_id = ng_tag->chain_id;
 		m_tag_delete(*m0, (struct m_tag *)ng_tag);
@@ -270,10 +272,10 @@ again:
 		struct dn_pkt_tag *dt;
 
 		dt = (struct dn_pkt_tag *)(dn_tag+1);
-		args.rule = dt->rule;
+		args.slot = dt->slot;
+		args.rulenum = dt->rulenum;
 		args.rule_id = dt->rule_id;
 		args.chain_id = dt->chain_id;
-
 		m_tag_delete(*m0, dn_tag);
 	}
 
@@ -282,7 +284,7 @@ again:
 	args.inp = inp;
 	tee = 0;
 
-	if (V_fw_one_pass == 0 || args.rule == NULL) {
+	if (V_fw_one_pass == 0 || args.slot == 0) {
 		ipfw = ipfw_chk(&args);
 		*m0 = args.m;
 	} else
@@ -341,7 +343,7 @@ again:
 			*m0 = NULL;
 			return 0;	/* packet consumed */
 		} else {
-			args.rule = NULL;
+			args.slot = 0;
 			goto again;	/* continue with packet */
 		}
 

Modified: head/sys/netinet/ipfw/ip_fw_private.h
==============================================================================
--- head/sys/netinet/ipfw/ip_fw_private.h	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ipfw/ip_fw_private.h	Tue Dec 22 19:01:47 2009	(r200855)
@@ -79,7 +79,12 @@ struct ip_fw_args {
 	struct ifnet	*oif;		/* output interface		*/
 	struct sockaddr_in *next_hop;	/* forward address		*/
 
-	struct ip_fw	*rule;		/* matching rule		*/
+	/* chain_id validates 'slot', the location of the pointer to
+	 * a matching rule.
+	 * If invalid, we can lookup the rule using rule_id and rulenum
+	 */
+	uint32_t	slot;		/* slot for matching rule	*/
+	uint32_t	rulenum;	/* matching rule number		*/
 	uint32_t	rule_id;	/* matching rule id		*/
 	uint32_t	chain_id;	/* ruleset id			*/
 
@@ -178,9 +183,11 @@ struct ip_fw_chain {
 	struct ip_fw	*default_rule;
 	int		n_rules;	/* number of static rules */
 	int		static_len;	/* total len of static rules */
+	struct ip_fw    **map;	/* array of rule ptrs to ease lookup */
 	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
 	struct radix_node_head *tables[IPFW_TABLES_MAX];
 	struct rwlock	rwmtx;
+	struct rwlock	uh_lock;	/* lock for upper half */
 	uint32_t	id;		/* ruleset id */
 };
 
@@ -191,9 +198,16 @@ struct sockopt;	/* used by tcp_var.h */
  * so the variable and the macros must be here.
  */
 
-#define	IPFW_LOCK_INIT(_chain) \
-	rw_init(&(_chain)->rwmtx, "IPFW static rules")
-#define	IPFW_LOCK_DESTROY(_chain)	rw_destroy(&(_chain)->rwmtx)
+#define	IPFW_LOCK_INIT(_chain) do {			\
+	rw_init(&(_chain)->rwmtx, "IPFW static rules");	\
+	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
+	} while (0)
+
+#define	IPFW_LOCK_DESTROY(_chain) do {			\
+	rw_destroy(&(_chain)->rwmtx);			\
+	rw_destroy(&(_chain)->uh_lock);			\
+	} while (0)
+
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
@@ -201,12 +215,17 @@ struct sockopt;	/* used by tcp_var.h */
 #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
 #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
 
+#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
+#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
+#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
+#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
+
 /* In ip_fw_sockopt.c */
+int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
 int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
 int ipfw_ctl(struct sockopt *sopt);
 int ipfw_chk(struct ip_fw_args *args);
 void ipfw_reap_rules(struct ip_fw *head);
-void ipfw_free_chain(struct ip_fw_chain *chain, int kill_default);
 
 /* In ip_fw_table.c */
 struct radix_node;

Modified: head/sys/netinet/ipfw/ip_fw_sockopt.c
==============================================================================
--- head/sys/netinet/ipfw/ip_fw_sockopt.c	Tue Dec 22 19:00:18 2009	(r200854)
+++ head/sys/netinet/ipfw/ip_fw_sockopt.c	Tue Dec 22 19:01:47 2009	(r200855)
@@ -1,5 +1,7 @@
 /*-
- * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Supported by: Valeria Paoli
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,9 +28,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#define        DEB(x)
-#define        DDB(x) x
-
 /*
  * Sockopt support for ipfw. The routines here implement
  * the upper half of the ipfw code.
@@ -81,136 +80,143 @@ MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "Ip
  */
 
 /*
- * When a rule is added/deleted, clear the next_rule pointers in all rules.
- * These will be reconstructed on the fly as packets are matched.
+ * Find the smallest rule >= key, id.
+ * We could use bsearch but it is so simple that we code it directly
  */
-static void
-flush_rule_ptrs(struct ip_fw_chain *chain)
+int
+ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
 {
-	struct ip_fw *rule;
+	int i, lo, hi;
+	struct ip_fw *r;
 
-	IPFW_WLOCK_ASSERT(chain);
+  	for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
+		i = (lo + hi) / 2;
+		r = chain->map[i];
+		if (r->rulenum < key)
+			lo = i + 1;	/* continue from the next one */
+		else if (r->rulenum > key)
+			hi = i;		/* this might be good */
+		else if (r->id < id)
+			lo = i + 1;	/* continue from the next one */
+		else /* r->id >= id */
+			hi = i;		/* this might be good */
+	};
+	return hi;
+}
 
-	chain->id++;
+/*
+ * allocate a new map, returns the chain locked. extra is the number
+ * of entries to add or delete.
+ */
+static struct ip_fw **
+get_map(struct ip_fw_chain *chain, int extra, int locked)
+{
+
+	for (;;) {
+		struct ip_fw **map;
+		int i;
+
+		i = chain->n_rules + extra;
+		map = malloc(i * sizeof(struct ip_fw *), M_IPFW, M_WAITOK);
+		if (map == NULL) {
+			printf("%s: cannot allocate map\n", __FUNCTION__);
+			return NULL;
+		}
+		if (!locked)
+			IPFW_UH_WLOCK(chain);
+		if (i >= chain->n_rules + extra) /* good */
+			return map;
+		/* otherwise we lost the race, free and retry */
+		if (!locked)
+			IPFW_UH_WUNLOCK(chain);
+		free(map, M_IPFW);
+	}
+}
+
+/*
+ * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
+ */
+static struct ip_fw **
+swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
+{
+	struct ip_fw **old_map;
 
-	for (rule = chain->rules; rule; rule = rule->next)
-		rule->next_rule = NULL;
+	IPFW_WLOCK(chain);
+	chain->id++;
+	chain->n_rules = new_len;
+	old_map = chain->map;
+	chain->map = new_map;
+	IPFW_WUNLOCK(chain);
+	return old_map;
 }
 
 /*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
+ * XXX DO NOT USE FOR THE DEFAULT RULE.
+ * Must be called without IPFW_UH held
  */
 int
 ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
 {
-	struct ip_fw *rule, *f, *prev;
-	int l = RULESIZE(input_rule);
+	struct ip_fw *rule;
+	int i, l, insert_before;
+	struct ip_fw **map;	/* the new array of pointers */
 
-	if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
+	if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
 		return (EINVAL);
 
-	rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO);
+	l = RULESIZE(input_rule);
+	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
 	if (rule == NULL)
 		return (ENOSPC);
+	/* get_map returns with IPFW_UH_WLOCK if successful */
+	map = get_map(chain, 1, 0 /* not locked */);
+	if (map == NULL) {
+		free(rule, M_IPFW);
+		return ENOSPC;
+	}
 
 	bcopy(input_rule, rule, l);
-
-	rule->next = NULL;
+	/* clear fields not settable from userland */
+	rule->x_next = NULL;
 	rule->next_rule = NULL;
-
 	rule->pcnt = 0;
 	rule->bcnt = 0;
 	rule->timestamp = 0;
 
-	IPFW_WLOCK(chain);
-
-	if (chain->rules == NULL) {	/* default rule */
-		chain->rules = rule;
-		rule->id = ++chain->id;
-		goto done;
-        }
-
-	/*
-	 * If rulenum is 0, find highest numbered rule before the
-	 * default rule, and add autoinc_step
-	 */
 	if (V_autoinc_step < 1)
 		V_autoinc_step = 1;
 	else if (V_autoinc_step > 1000)
 		V_autoinc_step = 1000;
+	/* find the insertion point, we will insert before */
+	insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+	i = ipfw_find_rule(chain, insert_before, 0);
+	/* duplicate first part */
+	if (i > 0)
+		bcopy(chain->map, map, i * sizeof(struct ip_fw *));
+	map[i] = rule;
+	/* duplicate remaining part, we always have the default rule */
+	bcopy(chain->map + i, map + i + 1,
+		sizeof(struct ip_fw *) *(chain->n_rules - i));
 	if (rule->rulenum == 0) {
-		/*
-		 * locate the highest numbered rule before default
-		 */
-		for (f = chain->rules; f; f = f->next) {
-			if (f->rulenum == IPFW_DEFAULT_RULE)
-				break;
-			rule->rulenum = f->rulenum;
-		}
+		/* write back the number */
+		rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
 		if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
 			rule->rulenum += V_autoinc_step;
 		input_rule->rulenum = rule->rulenum;
 	}
 
-	/*
-	 * Now insert the new rule in the right place in the sorted list.
-	 */
-	for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) {
-		if (f->rulenum > rule->rulenum) { /* found the location */
-			if (prev) {
-				rule->next = f;
-				prev->next = rule;
-			} else { /* head insert */
-				rule->next = chain->rules;
-				chain->rules = rule;
-			}
-			break;
-		}
-	}
-	flush_rule_ptrs(chain);
-	/* chain->id incremented inside flush_rule_ptrs() */
-	rule->id = chain->id;
-done:
-	chain->n_rules++;
+	rule->id = chain->id + 1;
+	map = swap_map(chain, map, chain->n_rules + 1);
 	chain->static_len += l;
-	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+	if (map)
+		free(map, M_IPFW);
 	return (0);
 }
 
-/**
- * Remove a static rule (including derived * dynamic rules)
- * and place it on the ``reap list'' for later reclamation.
- * The caller is in charge of clearing rule pointers to avoid
- * dangling pointers.
- * @return a pointer to the next entry.
- * Arguments are not checked, so they better be correct.
- */
-static struct ip_fw *
-remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
-    struct ip_fw *prev)
-{
-	struct ip_fw *n;
-	int l = RULESIZE(rule);
-
-	IPFW_WLOCK_ASSERT(chain);
-
-	n = rule->next;
-	ipfw_remove_dyn_children(rule);
-	if (prev == NULL)
-		chain->rules = n;
-	else
-		prev->next = n;
-	chain->n_rules--;
-	chain->static_len -= l;
-
-	rule->next = chain->reap;
-	chain->reap = rule;
-
-	return n;
-}
-
 /*
  * Reclaim storage associated with a list of rules.  This is
  * typically the list created using remove_rule.
@@ -222,34 +228,11 @@ ipfw_reap_rules(struct ip_fw *head)
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
-		head = head->next;
+		head = head->x_next;
 		free(rule, M_IPFW);
 	}
 }
 
-/*
- * Remove all rules from a chain (except rules in set RESVD_SET

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list