PERFORCE change 153374 for review
Gleb Kurtsou
gk at FreeBSD.org
Sat Nov 22 16:12:29 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=153374
Change 153374 by gk at gk_h1 on 2008/11/23 00:11:54
Major improvement to ipfw tables. Extend tables to containg layer2 addresses only (= any ip address) and table lookup during layer2 filtering.
Sponsored by: Korcett Holdings, Inc (www.korcett.com)
Affected files ...
.. //depot/projects/soc2008/gk_l2filter/sbin-ipfw/ipfw2.c#13 edit
.. //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw.h#13 edit
.. //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw2.c#19 edit
Differences ...
==== //depot/projects/soc2008/gk_l2filter/sbin-ipfw/ipfw2.c#13 (text+ko) ====
@@ -6022,7 +6022,7 @@
do_add = **av == 'a';
ac--; av++;
if (!ac)
- errx(EX_USAGE, "IP address required");
+ errx(EX_USAGE, "Address required");
p = strchr(*av, '/');
if (p) {
*p++ = '\0';
@@ -6031,11 +6031,19 @@
errx(EX_DATAERR, "bad width ``%s''", p);
} else
ent.masklen = 32;
- if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0)
- errx(EX_NOHOST, "hostname ``%s'' unknown", *av);
- ac--; av++;
+ if (strcmp(*av, "ether") == 0 || strcmp(*av, "any") == 0) {
+ ent.addr = INADDR_ANY;
+ ent.masklen = 0;
+ if ((*av)[0] == 'a') { /* any */
+ ac--; av++;
+ }
+ } else {
+ if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0)
+ errx(EX_NOHOST, "hostname ``%s'' unknown", *av);
+ ac--; av++;
+ }
bzero(&ent.ether_addr, sizeof(ent.ether_addr));
- if (do_add && ac >= 2 && strcmp(*av, "ether") == 0) {
+ if (ac >= 2 && strcmp(*av, "ether") == 0) {
get_ether_addr(av[1], &ent.ether_addr);
ac-=2; av+=2;
}
@@ -6136,9 +6144,13 @@
tether_buf[0] = 0;
}
- printf("%s/%u %s%s\n",
- inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr),
- tbl->ent[a].masklen, tether_buf, tval_buf);
+ if (tbl->ent[a].addr == INADDR_ANY && tbl->ent[a].masklen == 0)
+ printf("any");
+ else
+ printf("%s/%u",
+ inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr),
+ tbl->ent[a].masklen);
+ printf(" %s%s\n", tether_buf, tval_buf);
}
free(tbl);
}
==== //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw.h#13 (text+ko) ====
@@ -682,11 +682,15 @@
#ifdef IPFW_INTERNAL
+struct ipfw_table_head {
+ struct radix_node_head *in_rnh, *ether_rnh;
+};
+
struct ip_fw_chain {
struct ip_fw *rules; /* list of rules */
struct ip_fw *reap; /* list of rules to reap */
LIST_HEAD(, cfg_nat) nat; /* list of nat entries */
- struct radix_node_head *tables[IPFW_TABLES_MAX];
+ struct ipfw_table_head tables[IPFW_TABLES_MAX];
struct rwlock rwmtx;
};
#define IPFW_LOCK_INIT(_chain) \
==== //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw2.c#19 (text+ko) ====
@@ -58,6 +58,7 @@
#include <sys/module.h>
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/refcount.h>
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -181,10 +182,17 @@
return (ether_addr_allow(want, a));
}
+struct table_entry_addr {
+ u_char len;
+ u_char __reserved;
+ struct ether_addr ether_addr;
+ in_addr_t in_addr;
+};
+
struct table_entry {
- struct radix_node rn[2];
- struct sockaddr_in addr, mask;
- ipfw_ether_addr ether_addr;
+ struct radix_node in_rn[2], ether_rn[2];
+ struct table_entry_addr addr, mask;
+ int refcnt;
u_int32_t value;
};
@@ -1802,86 +1810,149 @@
return rule;
}
+static void
+init_table_entry_addr(struct table_entry_addr *addr, struct table_entry_addr *mask,
+ in_addr_t in_addr, uint8_t mlen, ipfw_ether_addr *ether_addr)
+{
+ addr->len = mask->len = sizeof(struct table_entry_addr);
+ mask->in_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+ addr->in_addr = in_addr & mask->in_addr;
+ if (ether_addr && (ether_addr->flags & IPFW_EA_CHECK)) {
+ if (ether_addr->flags & IPFW_EA_MULTICAST) {
+ bzero(addr->ether_addr.octet, ETHER_ADDR_LEN);
+ addr->ether_addr.octet[0] = 0x01;
+ bzero(mask->ether_addr.octet, ETHER_ADDR_LEN);
+ mask->ether_addr.octet[0] = 0x01;
+ } else {
+ memcpy(addr->ether_addr.octet, ether_addr->octet, ETHER_ADDR_LEN);
+ memset(mask->ether_addr.octet, 0xff, ETHER_ADDR_LEN);
+ }
+ } else {
+ /* set any ether addr */
+ bzero(addr->ether_addr.octet, ETHER_ADDR_LEN);
+ memset(mask->ether_addr.octet, 0xff, ETHER_ADDR_LEN);
+ }
+}
+
+static __inline struct table_entry *
+__rn_to_table_entry(struct radix_node *_rn, int off)
+{
+ char *rn = (char*) _rn;
+
+ if (rn == NULL)
+ return NULL;
+ return (struct table_entry*)(rn - off);
+
+}
+
+#define RN_TO_ENT(e, r) (__rn_to_table_entry(e, __offsetof(struct table_entry, r)))
+
+static __inline void
+release_table_entry(struct ipfw_table_head *th, struct table_entry *ent)
+{
+ IPFW_WLOCK_ASSERT(&V_layer3_chain); /* FIXME */
+
+ if (refcount_release(&ent->refcnt)) {
+ if (ent->in_rn[0].rn_flags)
+ th->in_rnh->rnh_deladdr(&ent->addr, &ent->mask, th->in_rnh);
+ free(ent, M_IPFW_TBL);
+ }
+}
+
static int
add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
uint8_t mlen, ipfw_ether_addr *ether_addr, uint32_t value)
{
INIT_VNET_IPFW(curvnet);
- struct radix_node_head *rnh;
- struct table_entry *ent;
+ struct ipfw_table_head *th;
+ struct table_entry *ent, *in_ent;
if (tbl >= IPFW_TABLES_MAX)
return (EINVAL);
- rnh = ch->tables[tbl];
+ th = &ch->tables[tbl];
ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO);
if (ent == NULL)
return (ENOMEM);
+ refcount_init(&ent->refcnt, 1);
ent->value = value;
- ent->addr.sin_len = ent->mask.sin_len = 8;
- ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
- ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
- ent->ether_addr = *ether_addr;
+ init_table_entry_addr(&ent->addr, &ent->mask, addr, mlen, ether_addr);
IPFW_WLOCK(&V_layer3_chain);
- if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) ==
- NULL) {
+ if (th->ether_rnh->rnh_addaddr(&ent->addr, &ent->mask, th->ether_rnh,
+ ent->ether_rn) == NULL) {
IPFW_WUNLOCK(&V_layer3_chain);
free(ent, M_IPFW_TBL);
return (EEXIST);
}
+ in_ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&ent->addr, &ent->mask, th->in_rnh),
+ in_rn);
+ if (in_ent == NULL) {
+ in_ent = RN_TO_ENT(th->in_rnh->rnh_addaddr(&ent->addr, &ent->mask,
+ th->in_rnh, ent->in_rn), in_rn);
+ if (in_ent == NULL) {
+ th->ether_rnh->rnh_deladdr(&ent->addr, &ent->mask, th->ether_rnh);
+ IPFW_WUNLOCK(&V_layer3_chain);
+ free(ent, M_IPFW_TBL);
+ return (EEXIST);
+ }
+ }
+ refcount_acquire(&in_ent->refcnt);
IPFW_WUNLOCK(&V_layer3_chain);
return (0);
}
+static __inline int
+delete_table_entry_rn(struct ipfw_table_head *th, void *addr, void *mask)
+{
+ struct table_entry *ent, *in_ent;
+
+ ent = RN_TO_ENT(th->ether_rnh->rnh_deladdr(addr, mask, th->ether_rnh),
+ ether_rn);
+ if (ent == NULL)
+ return (ESRCH);
+ in_ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&ent->addr, &ent->mask, th->in_rnh),
+ in_rn);
+ release_table_entry(th, in_ent);
+ release_table_entry(th, ent);
+ return (0);
+}
+
static int
del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
- uint8_t mlen)
+ uint8_t mlen, ipfw_ether_addr *ether_addr)
{
- struct radix_node_head *rnh;
- struct table_entry *ent;
- struct sockaddr_in sa, mask;
+ struct ipfw_table_head *th;
+ struct table_entry_addr sa, mask;
+ int err;
if (tbl >= IPFW_TABLES_MAX)
return (EINVAL);
- rnh = ch->tables[tbl];
- sa.sin_len = mask.sin_len = 8;
- mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
- sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
+ th = &ch->tables[tbl];
+ init_table_entry_addr(&sa, &mask, addr, mlen, ether_addr);
IPFW_WLOCK(ch);
- ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh);
- if (ent == NULL) {
- IPFW_WUNLOCK(ch);
- return (ESRCH);
- }
+ err = delete_table_entry_rn(th, &sa, &mask);
IPFW_WUNLOCK(ch);
- free(ent, M_IPFW_TBL);
- return (0);
+ return (err);
}
static int
flush_table_entry(struct radix_node *rn, void *arg)
{
- struct radix_node_head * const rnh = arg;
- struct table_entry *ent;
-
- ent = (struct table_entry *)
- rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
- if (ent != NULL)
- free(ent, M_IPFW_TBL);
+ delete_table_entry_rn((struct ipfw_table_head *)arg, rn->rn_key, rn->rn_mask);
return (0);
}
static int
flush_table(struct ip_fw_chain *ch, uint16_t tbl)
{
- struct radix_node_head *rnh;
+ struct ipfw_table_head *th;
IPFW_WLOCK_ASSERT(ch);
if (tbl >= IPFW_TABLES_MAX)
return (EINVAL);
- rnh = ch->tables[tbl];
- KASSERT(rnh != NULL, ("NULL IPFW table"));
- rnh->rnh_walktree(rnh, flush_table_entry, rnh);
+ th = &ch->tables[tbl];
+ KASSERT(th->ether_rnh != NULL, ("NULL IPFW table"));
+ th->ether_rnh->rnh_walktree(th->ether_rnh, flush_table_entry, th);
return (0);
}
@@ -1903,7 +1974,12 @@
uint16_t j;
for (i = 0; i < IPFW_TABLES_MAX; i++) {
- if (!rn_inithead((void **)&ch->tables[i], 32)) {
+ struct ipfw_table_head *th = &ch->tables[i];
+
+ if (!rn_inithead((void**)&(th->in_rnh),
+ __offsetof(struct table_entry_addr, in_addr) * 8) ||
+ !rn_inithead((void**)&(th->ether_rnh),
+ __offsetof(struct table_entry_addr, ether_addr) * 8)) {
for (j = 0; j < i; j++) {
(void) flush_table(ch, j);
}
@@ -1915,22 +1991,35 @@
static int
lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
- ipfw_ether_addr *ea, uint32_t *val)
+ ipfw_ether_addr *ether_addr, uint32_t *val)
{
- struct radix_node_head *rnh;
- struct table_entry *ent;
- struct sockaddr_in sa;
+ struct ipfw_table_head *th;
+ struct table_entry_addr sa, mask;
+ struct table_entry *ent = NULL;
+ const int has_ether_addr = (ether_addr && (ether_addr->flags & IPFW_EA_CHECK));
+ const int has_in_addr = (addr != INADDR_ANY);
if (tbl >= IPFW_TABLES_MAX)
return (0);
- rnh = ch->tables[tbl];
- sa.sin_len = 8;
- sa.sin_addr.s_addr = addr;
- ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
+ th = &ch->tables[tbl];
+ init_table_entry_addr(&sa, &mask, addr, (addr == INADDR_ANY ? 0 : 32), ether_addr);
+ if (has_ether_addr) {
+ ent = RN_TO_ENT(th->ether_rnh->rnh_lookup(&sa, NULL, th->ether_rnh),
+ ether_rn);
+ if (ent == NULL && has_in_addr) {
+ /*
+ * Try to lookup entry with any (zero) ether_addr. It's
+ * handled this way not to deal with non-continuous
+ * masks in radix trees.
+ */
+ bzero(sa.ether_addr.octet, ETHER_ADDR_LEN);
+ ent = RN_TO_ENT(th->ether_rnh->rnh_lookup(&sa, NULL, th->ether_rnh),
+ ether_rn);
+ }
+ } else if (has_in_addr) {
+ ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&sa, NULL, th->in_rnh), in_rn);
+ }
if (ent != NULL) {
- if (ea && !ether_addr_allow(&ent->ether_addr, ea))
- return (0);
- /* use address to create dynamic rule */
*val = ent->value;
return (1);
}
@@ -1949,20 +2038,20 @@
static int
count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
{
- struct radix_node_head *rnh;
+ struct ipfw_table_head *th;
if (tbl >= IPFW_TABLES_MAX)
return (EINVAL);
- rnh = ch->tables[tbl];
+ th = &ch->tables[tbl];
*cnt = 0;
- rnh->rnh_walktree(rnh, count_table_entry, cnt);
+ th->ether_rnh->rnh_walktree(th->ether_rnh, count_table_entry, cnt);
return (0);
}
static int
dump_table_entry(struct radix_node *rn, void *arg)
{
- struct table_entry * const n = (struct table_entry *)rn;
+ struct table_entry * const n = RN_TO_ENT(rn, ether_rn);
ipfw_table * const tbl = arg;
ipfw_table_entry *ent;
@@ -1970,12 +2059,23 @@
return (1);
ent = &tbl->ent[tbl->cnt];
ent->tbl = tbl->tbl;
- if (in_nullhost(n->mask.sin_addr))
+ if (n->mask.in_addr == INADDR_ANY)
ent->masklen = 0;
else
- ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
- ent->addr = n->addr.sin_addr.s_addr;
- ent->ether_addr = n->ether_addr;
+ ent->masklen = 33 - ffs(ntohl(n->mask.in_addr));
+ ent->addr = n->addr.in_addr;
+ memcpy(ent->ether_addr.octet, n->addr.ether_addr.octet, ETHER_ADDR_LEN);
+ ent->ether_addr.flags = 0;
+
+#define __ETHER_IS_ZERO(a) (((a)[0] | (a)[1] | (a)[2] | (a)[3] | (a)[4] | (a)[5]) == 0)
+ if (!__ETHER_IS_ZERO(n->mask.ether_addr.octet) &&
+ !__ETHER_IS_ZERO(n->addr.ether_addr.octet)) {
+ ent->ether_addr.flags = IPFW_EA_CHECK;
+ /* Should be fixed after adding new flags */
+ if (n->mask.ether_addr.octet[0] == 0x01)
+ ent->ether_addr.flags |= IPFW_EA_MULTICAST;
+ }
+#undef __ETHER_IS_ZERO
ent->value = n->value;
tbl->cnt++;
return (0);
@@ -1984,13 +2084,13 @@
static int
dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
{
- struct radix_node_head *rnh;
+ struct ipfw_table_head *th;
if (tbl->tbl >= IPFW_TABLES_MAX)
return (EINVAL);
- rnh = ch->tables[tbl->tbl];
+ th = &ch->tables[tbl->tbl];
tbl->cnt = 0;
- rnh->rnh_walktree(rnh, dump_table_entry, tbl);
+ th->ether_rnh->rnh_walktree(th->ether_rnh, dump_table_entry, tbl);
return (0);
}
@@ -2707,16 +2807,21 @@
case O_IP_SRC_LOOKUP:
case O_IP_DST_LOOKUP:
- if (is_ipv4) {
- ipfw_ether_addr *ea =
- (cmd->opcode == O_IP_DST_LOOKUP ?
- &args->f_id.dst_ether :
- &args->f_id.src_ether);
- uint32_t a =
- (cmd->opcode == O_IP_DST_LOOKUP) ?
- dst_ip.s_addr : src_ip.s_addr;
+ if (is_ipv4 || (args->flags & IP_FW_ARGS_LAYER2)) {
+ ipfw_ether_addr *ea;
+ uint32_t a;
uint32_t v;
+ if (cmd->opcode == O_IP_DST_LOOKUP) {
+ a = dst_ip.s_addr;
+ ea = &args->f_id.dst_ether;
+ } else {
+ a = src_ip.s_addr;
+ ea = &args->f_id.src_ether;
+ }
+ if (args->flags & IP_FW_ARGS_LAYER2)
+ a = INADDR_ANY;
+
match = lookup_table(chain, cmd->arg1, a,
ea, &v);
if (!match)
@@ -4441,7 +4546,7 @@
if (error)
break;
error = del_table_entry(&V_layer3_chain, ent.tbl,
- ent.addr, ent.masklen);
+ ent.addr, ent.masklen, &ent.ether_addr);
}
break;
More information about the p4-projects
mailing list