PERFORCE change 153374 for review

Gleb Kurtsou gk at FreeBSD.org
Sat Nov 22 16:12:29 PST 2008


http://perforce.freebsd.org/chv.cgi?CH=153374

Change 153374 by gk at gk_h1 on 2008/11/23 00:11:54

	Major improvement to ipfw tables. Extend tables to containg layer2 addresses only (= any ip address) and table lookup during layer2 filtering.
	Sponsored by: Korcett Holdings, Inc (www.korcett.com)

Affected files ...

.. //depot/projects/soc2008/gk_l2filter/sbin-ipfw/ipfw2.c#13 edit
.. //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw.h#13 edit
.. //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw2.c#19 edit

Differences ...

==== //depot/projects/soc2008/gk_l2filter/sbin-ipfw/ipfw2.c#13 (text+ko) ====

@@ -6022,7 +6022,7 @@
 		do_add = **av == 'a';
 		ac--; av++;
 		if (!ac)
-			errx(EX_USAGE, "IP address required");
+			errx(EX_USAGE, "Address required");
 		p = strchr(*av, '/');
 		if (p) {
 			*p++ = '\0';
@@ -6031,11 +6031,19 @@
 				errx(EX_DATAERR, "bad width ``%s''", p);
 		} else
 			ent.masklen = 32;
-		if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0)
-			errx(EX_NOHOST, "hostname ``%s'' unknown", *av);
-		ac--; av++;
+		if (strcmp(*av, "ether") == 0 || strcmp(*av, "any") == 0) {
+			ent.addr = INADDR_ANY;
+			ent.masklen = 0;
+			if ((*av)[0] == 'a') {	/* any */
+				ac--; av++;
+			}
+		} else {
+			if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0)
+				errx(EX_NOHOST, "hostname ``%s'' unknown", *av);
+			ac--; av++;
+		}
 		bzero(&ent.ether_addr, sizeof(ent.ether_addr));
-		if (do_add && ac >= 2 && strcmp(*av, "ether") == 0) {
+		if (ac >= 2 && strcmp(*av, "ether") == 0) {
 			get_ether_addr(av[1], &ent.ether_addr);
 			ac-=2; av+=2;
 		}
@@ -6136,9 +6144,13 @@
 		    tether_buf[0] = 0;
 		}
 
-		printf("%s/%u %s%s\n",
-		    inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr),
-		    tbl->ent[a].masklen, tether_buf, tval_buf);
+		if (tbl->ent[a].addr == INADDR_ANY && tbl->ent[a].masklen == 0)
+			printf("any");
+		else
+			printf("%s/%u",
+			    inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr),
+			    tbl->ent[a].masklen);
+		printf(" %s%s\n", tether_buf, tval_buf);
 	}
 	free(tbl);
 }

==== //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw.h#13 (text+ko) ====

@@ -682,11 +682,15 @@
 
 #ifdef IPFW_INTERNAL
 
+struct ipfw_table_head {
+	struct radix_node_head *in_rnh, *ether_rnh;
+};
+
 struct ip_fw_chain {
 	struct ip_fw	*rules;		/* list of rules */
 	struct ip_fw	*reap;		/* list of rules to reap */
 	LIST_HEAD(, cfg_nat) nat;       /* list of nat entries */
-	struct radix_node_head *tables[IPFW_TABLES_MAX];
+ 	struct ipfw_table_head tables[IPFW_TABLES_MAX];
 	struct rwlock	rwmtx;
 };
 #define	IPFW_LOCK_INIT(_chain) \

==== //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw2.c#19 (text+ko) ====

@@ -58,6 +58,7 @@
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -181,10 +182,17 @@
 	return (ether_addr_allow(want, a));
 }
 
+struct table_entry_addr {
+	u_char			len;
+	u_char			__reserved;
+	struct ether_addr	ether_addr;
+	in_addr_t		in_addr;
+};
+
 struct table_entry {
-	struct radix_node	rn[2];
-	struct sockaddr_in	addr, mask;
-	ipfw_ether_addr 	ether_addr;
+	struct radix_node	in_rn[2], ether_rn[2];
+	struct table_entry_addr	addr, mask;
+	int			refcnt;
 	u_int32_t		value;
 };
 
@@ -1802,86 +1810,149 @@
 	return rule;
 }
 
+static void
+init_table_entry_addr(struct table_entry_addr *addr, struct table_entry_addr *mask,
+    in_addr_t in_addr, uint8_t mlen, ipfw_ether_addr *ether_addr)
+{
+	addr->len = mask->len = sizeof(struct table_entry_addr);
+	mask->in_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+	addr->in_addr = in_addr & mask->in_addr;
+	if (ether_addr && (ether_addr->flags & IPFW_EA_CHECK)) {
+		if (ether_addr->flags & IPFW_EA_MULTICAST) {
+			bzero(addr->ether_addr.octet, ETHER_ADDR_LEN);
+			addr->ether_addr.octet[0] = 0x01;
+			bzero(mask->ether_addr.octet, ETHER_ADDR_LEN);
+			mask->ether_addr.octet[0] = 0x01;
+		} else {
+			memcpy(addr->ether_addr.octet, ether_addr->octet, ETHER_ADDR_LEN);
+			memset(mask->ether_addr.octet, 0xff, ETHER_ADDR_LEN);
+		}
+	} else {
+		/* set any ether addr */
+		bzero(addr->ether_addr.octet, ETHER_ADDR_LEN);
+		memset(mask->ether_addr.octet, 0xff, ETHER_ADDR_LEN);
+	}
+}
+
+static __inline struct table_entry *
+__rn_to_table_entry(struct radix_node *_rn, int off)
+{
+	char *rn = (char*) _rn;
+
+	if (rn == NULL)
+		return NULL;
+	return (struct table_entry*)(rn - off);
+
+}
+
+#define RN_TO_ENT(e, r) (__rn_to_table_entry(e, __offsetof(struct table_entry, r)))
+
+static __inline void
+release_table_entry(struct ipfw_table_head *th, struct table_entry *ent)
+{
+	IPFW_WLOCK_ASSERT(&V_layer3_chain);	/* FIXME */
+
+	if (refcount_release(&ent->refcnt)) {
+		if (ent->in_rn[0].rn_flags)
+			th->in_rnh->rnh_deladdr(&ent->addr, &ent->mask, th->in_rnh);
+		free(ent, M_IPFW_TBL);
+	}
+}
+
 static int
 add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
     uint8_t mlen, ipfw_ether_addr *ether_addr, uint32_t value)
 {
 	INIT_VNET_IPFW(curvnet);
-	struct radix_node_head *rnh;
-	struct table_entry *ent;
+	struct ipfw_table_head *th;
+	struct table_entry *ent, *in_ent;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
-	rnh = ch->tables[tbl];
+	th = &ch->tables[tbl];
 	ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO);
 	if (ent == NULL)
 		return (ENOMEM);
+	refcount_init(&ent->refcnt, 1);
 	ent->value = value;
-	ent->addr.sin_len = ent->mask.sin_len = 8;
-	ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
-	ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
-	ent->ether_addr = *ether_addr;
+	init_table_entry_addr(&ent->addr, &ent->mask, addr, mlen, ether_addr);
 	IPFW_WLOCK(&V_layer3_chain);
-	if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) ==
-	    NULL) {
+	if (th->ether_rnh->rnh_addaddr(&ent->addr, &ent->mask, th->ether_rnh, 
+	    ent->ether_rn) == NULL) {
 		IPFW_WUNLOCK(&V_layer3_chain);
 		free(ent, M_IPFW_TBL);
 		return (EEXIST);
 	}
+	in_ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&ent->addr, &ent->mask, th->in_rnh),
+	    in_rn);
+	if (in_ent == NULL) {
+		in_ent = RN_TO_ENT(th->in_rnh->rnh_addaddr(&ent->addr, &ent->mask,
+		    th->in_rnh, ent->in_rn), in_rn);
+		if (in_ent == NULL) {
+			th->ether_rnh->rnh_deladdr(&ent->addr, &ent->mask, th->ether_rnh);
+			IPFW_WUNLOCK(&V_layer3_chain);
+			free(ent, M_IPFW_TBL);
+			return (EEXIST);
+		}
+	}
+	refcount_acquire(&in_ent->refcnt);
 	IPFW_WUNLOCK(&V_layer3_chain);
 	return (0);
 }
 
+static __inline int
+delete_table_entry_rn(struct ipfw_table_head *th, void *addr, void *mask)
+{
+	struct table_entry *ent, *in_ent;
+
+	ent = RN_TO_ENT(th->ether_rnh->rnh_deladdr(addr, mask, th->ether_rnh),
+	    ether_rn);
+	if (ent == NULL)
+		return (ESRCH);
+	in_ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&ent->addr, &ent->mask, th->in_rnh),
+	    in_rn);
+	release_table_entry(th, in_ent);
+	release_table_entry(th, ent);
+	return (0);
+}
+
 static int
 del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
-    uint8_t mlen)
+    uint8_t mlen, ipfw_ether_addr *ether_addr)
 {
-	struct radix_node_head *rnh;
-	struct table_entry *ent;
-	struct sockaddr_in sa, mask;
+	struct ipfw_table_head *th;
+	struct table_entry_addr sa, mask;
+	int err;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
-	rnh = ch->tables[tbl];
-	sa.sin_len = mask.sin_len = 8;
-	mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
-	sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
+	th = &ch->tables[tbl];
+	init_table_entry_addr(&sa, &mask, addr, mlen, ether_addr);
 	IPFW_WLOCK(ch);
-	ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh);
-	if (ent == NULL) {
-		IPFW_WUNLOCK(ch);
-		return (ESRCH);
-	}
+	err = delete_table_entry_rn(th, &sa, &mask);
 	IPFW_WUNLOCK(ch);
-	free(ent, M_IPFW_TBL);
-	return (0);
+	return (err);
 }
 
 static int
 flush_table_entry(struct radix_node *rn, void *arg)
 {
-	struct radix_node_head * const rnh = arg;
-	struct table_entry *ent;
-
-	ent = (struct table_entry *)
-	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
-	if (ent != NULL)
-		free(ent, M_IPFW_TBL);
+	delete_table_entry_rn((struct ipfw_table_head *)arg, rn->rn_key, rn->rn_mask);
 	return (0);
 }
 
 static int
 flush_table(struct ip_fw_chain *ch, uint16_t tbl)
 {
-	struct radix_node_head *rnh;
+	struct ipfw_table_head *th;
 
 	IPFW_WLOCK_ASSERT(ch);
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
-	rnh = ch->tables[tbl];
-	KASSERT(rnh != NULL, ("NULL IPFW table"));
-	rnh->rnh_walktree(rnh, flush_table_entry, rnh);
+	th = &ch->tables[tbl];
+	KASSERT(th->ether_rnh != NULL, ("NULL IPFW table"));
+	th->ether_rnh->rnh_walktree(th->ether_rnh, flush_table_entry, th);
 	return (0);
 }
 
@@ -1903,7 +1974,12 @@
 	uint16_t j;
 
 	for (i = 0; i < IPFW_TABLES_MAX; i++) {
-		if (!rn_inithead((void **)&ch->tables[i], 32)) {
+		struct ipfw_table_head *th = &ch->tables[i];
+
+		if (!rn_inithead((void**)&(th->in_rnh), 
+		    __offsetof(struct table_entry_addr, in_addr) * 8) ||
+		    !rn_inithead((void**)&(th->ether_rnh),
+		    __offsetof(struct table_entry_addr, ether_addr) * 8)) {
 			for (j = 0; j < i; j++) {
 				(void) flush_table(ch, j);
 			}
@@ -1915,22 +1991,35 @@
 
 static int
 lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
-    ipfw_ether_addr *ea, uint32_t *val)
+    ipfw_ether_addr *ether_addr, uint32_t *val)
 {
-	struct radix_node_head *rnh;
-	struct table_entry *ent;
-	struct sockaddr_in sa;
+	struct ipfw_table_head *th;
+	struct table_entry_addr sa, mask;
+	struct table_entry *ent = NULL;
+	const int has_ether_addr = (ether_addr && (ether_addr->flags & IPFW_EA_CHECK));
+	const int has_in_addr = (addr != INADDR_ANY);
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (0);
-	rnh = ch->tables[tbl];
-	sa.sin_len = 8;
-	sa.sin_addr.s_addr = addr;
-	ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
+	th = &ch->tables[tbl];
+	init_table_entry_addr(&sa, &mask, addr, (addr == INADDR_ANY ? 0 : 32), ether_addr);
+	if (has_ether_addr) {
+		ent = RN_TO_ENT(th->ether_rnh->rnh_lookup(&sa, NULL, th->ether_rnh),
+		    ether_rn);
+		if (ent == NULL && has_in_addr) {
+			/* 
+			 * Try to lookup entry with any (zero) ether_addr. It's
+			 * handled this way not to deal with non-continuous
+			 * masks in radix trees.
+			 */
+			bzero(sa.ether_addr.octet, ETHER_ADDR_LEN);
+			ent = RN_TO_ENT(th->ether_rnh->rnh_lookup(&sa, NULL, th->ether_rnh),
+			    ether_rn);
+		}
+	} else if (has_in_addr) {
+		ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&sa, NULL, th->in_rnh), in_rn);
+	}
 	if (ent != NULL) {
-		if (ea && !ether_addr_allow(&ent->ether_addr, ea))
-			return (0);
-		/* use address to create dynamic rule */
 		*val = ent->value;
 		return (1);
 	}
@@ -1949,20 +2038,20 @@
 static int
 count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
 {
-	struct radix_node_head *rnh;
+	struct ipfw_table_head *th;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
-	rnh = ch->tables[tbl];
+	th = &ch->tables[tbl];
 	*cnt = 0;
-	rnh->rnh_walktree(rnh, count_table_entry, cnt);
+	th->ether_rnh->rnh_walktree(th->ether_rnh, count_table_entry, cnt);
 	return (0);
 }
 
 static int
 dump_table_entry(struct radix_node *rn, void *arg)
 {
-	struct table_entry * const n = (struct table_entry *)rn;
+	struct table_entry * const n = RN_TO_ENT(rn, ether_rn);
 	ipfw_table * const tbl = arg;
 	ipfw_table_entry *ent;
 
@@ -1970,12 +2059,23 @@
 		return (1);
 	ent = &tbl->ent[tbl->cnt];
 	ent->tbl = tbl->tbl;
-	if (in_nullhost(n->mask.sin_addr))
+	if (n->mask.in_addr == INADDR_ANY)
 		ent->masklen = 0;
 	else
-		ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
-	ent->addr = n->addr.sin_addr.s_addr;
-	ent->ether_addr = n->ether_addr;
+		ent->masklen = 33 - ffs(ntohl(n->mask.in_addr));
+	ent->addr = n->addr.in_addr;
+	memcpy(ent->ether_addr.octet, n->addr.ether_addr.octet, ETHER_ADDR_LEN);
+	ent->ether_addr.flags = 0;
+
+#define __ETHER_IS_ZERO(a) (((a)[0] | (a)[1] | (a)[2] | (a)[3] | (a)[4] | (a)[5]) == 0)
+	if (!__ETHER_IS_ZERO(n->mask.ether_addr.octet) &&
+	    !__ETHER_IS_ZERO(n->addr.ether_addr.octet)) {
+		ent->ether_addr.flags = IPFW_EA_CHECK;
+		/* Should be fixed after adding new flags */
+		if (n->mask.ether_addr.octet[0] == 0x01)
+			ent->ether_addr.flags |= IPFW_EA_MULTICAST;
+	}
+#undef __ETHER_IS_ZERO
 	ent->value = n->value;
 	tbl->cnt++;
 	return (0);
@@ -1984,13 +2084,13 @@
 static int
 dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
 {
-	struct radix_node_head *rnh;
+	struct ipfw_table_head *th;
 
 	if (tbl->tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
-	rnh = ch->tables[tbl->tbl];
+	th = &ch->tables[tbl->tbl];
 	tbl->cnt = 0;
-	rnh->rnh_walktree(rnh, dump_table_entry, tbl);
+	th->ether_rnh->rnh_walktree(th->ether_rnh, dump_table_entry, tbl);
 	return (0);
 }
 
@@ -2707,16 +2807,21 @@
 
 			case O_IP_SRC_LOOKUP:
 			case O_IP_DST_LOOKUP:
-				if (is_ipv4) {
-				    ipfw_ether_addr *ea =
-					(cmd->opcode == O_IP_DST_LOOKUP ?
-					    &args->f_id.dst_ether :
-					    &args->f_id.src_ether);
-				    uint32_t a =
-					(cmd->opcode == O_IP_DST_LOOKUP) ?
-					    dst_ip.s_addr : src_ip.s_addr;
+				if (is_ipv4 || (args->flags & IP_FW_ARGS_LAYER2)) {
+				    ipfw_ether_addr *ea;
+				    uint32_t a;
 				    uint32_t v;
 
+				    if (cmd->opcode == O_IP_DST_LOOKUP) {
+					    a = dst_ip.s_addr;
+					    ea = &args->f_id.dst_ether;
+				    } else {
+					    a = src_ip.s_addr;
+					    ea = &args->f_id.src_ether;
+				    }
+				    if (args->flags & IP_FW_ARGS_LAYER2)
+					    a = INADDR_ANY;
+
 				    match = lookup_table(chain, cmd->arg1, a,
 					ea, &v);
 				    if (!match)
@@ -4441,7 +4546,7 @@
 			if (error)
 				break;
 			error = del_table_entry(&V_layer3_chain, ent.tbl,
-			    ent.addr, ent.masklen);
+			    ent.addr, ent.masklen, &ent.ether_addr);
 		}
 		break;
 


More information about the p4-projects mailing list