svn commit: r271932 - in projects/ipfw/sys: modules/ipfw netpfil/ipfw
Alexander V. Chernikov
melifaro at FreeBSD.org
Sun Sep 21 18:15:10 UTC 2014
Author: melifaro
Date: Sun Sep 21 18:15:09 2014
New Revision: 271932
URL: http://svnweb.freebsd.org/changeset/base/271932
Log:
Add pre-alfa version of DXR lookup module.
It does build but (currently) does not work.
This change is not intended to be merged along with other ipfw changes.
Added:
projects/ipfw/sys/netpfil/ipfw/dxr_algo.c
projects/ipfw/sys/netpfil/ipfw/dxr_fwd.c
projects/ipfw/sys/netpfil/ipfw/dxr_fwd.h
Modified:
projects/ipfw/sys/modules/ipfw/Makefile
projects/ipfw/sys/netpfil/ipfw/ip_fw_table.h
projects/ipfw/sys/netpfil/ipfw/ip_fw_table_algo.c
Modified: projects/ipfw/sys/modules/ipfw/Makefile
==============================================================================
--- projects/ipfw/sys/modules/ipfw/Makefile Sun Sep 21 15:37:39 2014 (r271931)
+++ projects/ipfw/sys/modules/ipfw/Makefile Sun Sep 21 18:15:09 2014 (r271932)
@@ -9,6 +9,7 @@ SRCS= ip_fw2.c ip_fw_pfil.c
SRCS+= ip_fw_dynamic.c ip_fw_log.c
SRCS+= ip_fw_sockopt.c ip_fw_table.c ip_fw_table_algo.c ip_fw_iface.c
SRCS+= ip_fw_table_value.c
+SRCS+= dxr_fwd.c dxr_algo.c
SRCS+= opt_inet.h opt_inet6.h opt_ipdivert.h opt_ipfw.h opt_ipsec.h
CFLAGS+= -DIPFIREWALL
Added: projects/ipfw/sys/netpfil/ipfw/dxr_algo.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/ipfw/sys/netpfil/ipfw/dxr_algo.c Sun Sep 21 18:15:09 2014 (r271932)
@@ -0,0 +1,847 @@
+/*-
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c 267384 2014-06-12 09:59:11Z melifaro $");
+
+/*
+ * DXR algorithm bindings.
+ *
+ */
+
+#include "opt_ipfw.h"
+#include "opt_inet.h"
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
+#include <net/radix.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <vm/uma.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
+#include <netpfil/ipfw/dxr_fwd.h>
+
+#define DXR_BUILD_DEBUG
+
+static uma_zone_t chunk_zone;
+
+/*
+ * ADDR implementation using dxr
+ *
+ */
+
+/*
+ * The radix code expects addr and mask to be array of bytes,
+ * with the first byte being the length of the array. rn_inithead
+ * is called with the offset in bits of the lookup key within the
+ * array. If we use a sockaddr_in as the underlying type,
+ * sin_len is conveniently located at offset 0, sin_addr is at
+ * offset 4 and normally aligned.
+ * But for portability, let's avoid assumption and make the code explicit
+ */
+#define KEY_LEN(v) *((uint8_t *)&(v))
+/*
+ * Do not require radix to compare more than actual IPv4/IPv6 address
+ */
+#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
+#define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr))
+
+#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
+#define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr))
+
+struct radix_addr_entry {
+ struct radix_node rn[2];
+ struct sockaddr_in addr;
+ uint32_t value;
+ uint8_t masklen;
+};
+
+struct sa_in6 {
+ uint8_t sin6_len;
+ uint8_t sin6_family;
+ uint8_t pad[2];
+ struct in6_addr sin6_addr;
+};
+
+struct radix_addr_xentry {
+ struct radix_node rn[2];
+ struct sa_in6 addr6;
+ uint32_t value;
+ uint8_t masklen;
+};
+
+struct radix_cfg {
+ struct radix_node_head *head4;
+ struct radix_node_head *head6;
+ size_t count4;
+ size_t count6;
+ struct dxr_instance *di;
+};
+
+struct ta_buf_radix
+{
+ void *ent_ptr;
+ struct sockaddr *addr_ptr;
+ struct sockaddr *mask_ptr;
+ union {
+ struct {
+ struct sockaddr_in sa;
+ struct sockaddr_in ma;
+ } a4;
+ struct {
+ struct sa_in6 sa;
+ struct sa_in6 ma;
+ } a6;
+ } addr;
+};
+
+static int
+radix_lookup(void *tree_ptr, in_addr_t *pdst, in_addr_t *pmask, int *pnh)
+{
+ struct radix_node_head *rnh;
+ struct radix_addr_entry *ent;
+ struct sockaddr_in sin, *s_dst;
+ struct sockaddr *psa;
+ in_addr_t dst, mask;
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ sin.sin_addr.s_addr = htonl(*pdst);
+ psa = (struct sockaddr *)&sin;
+
+ //TREE_LOCK_ASSERT(di);
+ rnh = (struct radix_node_head *)tree_ptr;
+ ent = (struct radix_addr_entry *)rnh->rnh_matchaddr(psa, rnh);
+ if (ent == NULL)
+ return (ENOENT);
+
+ s_dst = (struct sockaddr_in *)&ent->addr;
+
+ dst = s_dst->sin_addr.s_addr;
+ mask = htonl(ent->masklen ? ~((1 << (32 - ent->masklen)) - 1) : 0);
+
+#ifdef DXR_BUILD_DEBUG
+ char kbuf[16], kbuf2[16];
+ inet_ntop(AF_INET, pdst, kbuf, sizeof(kbuf));
+ inet_ntop(AF_INET, &dst, kbuf2, sizeof(kbuf2));
+ printf("RLookup for %s returned %s/%d value %d\n", kbuf, kbuf2,
+ ent->masklen, ent->value);
+#endif
+
+ *pnh = ent->value;
+ *pdst = dst;
+ *pmask = mask;
+
+ return (0);
+}
+
+struct radix_wa {
+ tree_walkf_cb_t *f;
+ void *arg;
+ struct dxr_instance *di;
+};
+
+static int
+radix_walkf_f(struct radix_node *rn, void *arg)
+{
+ struct radix_wa *wa;
+ struct radix_addr_entry *ent;
+ struct sockaddr_in *s_dst;
+ in_addr_t dst, mask;
+ int nh;
+
+ wa = (struct radix_wa *)arg;
+ ent = (struct radix_addr_entry *)rn;
+
+ s_dst = (struct sockaddr_in *)&ent->addr;
+
+ nh = ent->value;
+ dst = s_dst->sin_addr.s_addr;
+ mask = htonl(ent->masklen ? ~((1 << (32 - ent->masklen)) - 1) : 0);
+
+#ifdef DXR_BUILD_DEBUG
+ char kbuf[16];
+ inet_ntop(AF_INET, &dst, kbuf, sizeof(kbuf));
+ printf(" WALK returned %s/%d value %d\n", kbuf,
+ ent->masklen, ent->value);
+#endif
+
+ return (wa->f(wa->di, dst, mask, nh, wa->arg));
+}
+
+
+static int
+radix_walkf(void *tree_ptr, struct dxr_instance *di, in_addr_t dst,
+ in_addr_t mask, tree_walkf_cb_t *f, void *arg)
+{
+ struct radix_node_head *rnh;
+ struct sockaddr_in s_dst, s_mask;
+ struct radix_wa wa;
+ int error;
+
+ rnh = (struct radix_node_head *)tree_ptr;
+
+ memset(&s_dst, 0, sizeof(s_dst));
+ memset(&s_mask, 0, sizeof(s_mask));
+ s_dst.sin_family = AF_INET;
+ s_dst.sin_len = sizeof(s_dst);
+ s_dst.sin_addr.s_addr = dst;
+ s_mask.sin_family = AF_INET;
+ s_mask.sin_len = sizeof(s_mask);
+ s_mask.sin_addr.s_addr = mask;
+
+ memset(&wa, 0, sizeof(wa));
+ wa.f = f;
+ wa.arg = arg;
+ wa.di = di;
+
+#ifdef DXR_BUILD_DEBUG
+ char kbuf[16], kbuf2[16];
+ inet_ntop(AF_INET, &dst, kbuf, sizeof(kbuf));
+ inet_ntop(AF_INET, &mask, kbuf2, sizeof(kbuf2));
+ printf("START walk for %s/%s\n", kbuf, kbuf2);
+#endif
+
+ error = rnh->rnh_walktree_from(rnh, &s_dst, &s_mask, radix_walkf_f, &wa);
+#ifdef DXR_BUILD_DEBUG
+ printf("END walk\n");
+#endif
+
+ return (error);
+}
+
+
+static void *slab_alloc(void *slab_ptr)
+{
+ uma_zone_t zone;
+
+ zone = (uma_zone_t)slab_ptr;
+
+ return (uma_zalloc(zone, M_NOWAIT));
+}
+
+static void slab_free(void *slab_ptr, void *obj_ptr)
+{
+ uma_zone_t zone;
+
+ zone = (uma_zone_t)slab_ptr;
+
+ uma_zfree(zone, obj_ptr);
+}
+
+static int
+ta_lookup_dxr(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct radix_node_head *rnh;
+ struct dxr_instance *di;
+
+ if (keylen == sizeof(in_addr_t)) {
+ di = (struct dxr_instance *)ti->state;
+ int idx = dxr_lookup(di, *((uint32_t *)key));
+#ifdef DXR_BUILD_DEBUG
+ char kbuf[16];
+ inet_ntop(AF_INET, key, kbuf, sizeof(kbuf));
+ printf("Lookup for %s returned %d\n", kbuf, idx);
+#endif
+ if (idx == 0) {
+ /* No match, check for default route idx */
+ if ((idx = ti->data & 0xFFFF) == 0)
+ return (0);
+ }
+
+ *val = idx;
+ return (1);
+ } else {
+ struct radix_addr_xentry *xent;
+ struct sa_in6 sa6;
+ KEY_LEN(sa6) = KEY_LEN_INET6;
+ memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr));
+ rnh = (struct radix_node_head *)ti->xstate;
+ xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
+ if (xent != NULL) {
+ *val = xent->value;
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * New table
+ */
+static int
+ta_init_dxr(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+ char *data, uint8_t tflags)
+{
+ struct radix_cfg *cfg;
+ struct dxr_funcs f;
+
+ cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+ if (!rn_inithead((void **)&cfg->head4, OFF_LEN_INET))
+ return (ENOMEM);
+ if (!rn_inithead((void **)&cfg->head6, OFF_LEN_INET6)) {
+ rn_detachhead((void **)&cfg->head4);
+ return (ENOMEM);
+ }
+
+ ti->xstate = cfg->head6;
+ *ta_state = cfg;
+ ti->lookup = ta_lookup_dxr;
+
+ /* XXX: do this from per-algo hook */
+ if (chunk_zone == NULL) {
+ /* Allocate the zone for chunk descriptors (XXX - get size) */
+ chunk_zone = uma_zcreate("dxr_chunk", sizeof(struct chunk_desc),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+#if 0
+ /* Create updater thread */
+ if (kproc_kthread_add(dxr_updater, NULL, &p, &td, RFHIGHPID,
+ 0, "dxr_update", "dxr_update"))
+ panic("Can't create the DXR updater thread");
+#endif
+ }
+
+ memset(&f, 0, sizeof(f));
+ f.slab_alloc = slab_alloc;
+ f.slab_free = slab_free;
+ f.slab_ptr = chunk_zone;
+ f.tree_walk = radix_walkf;
+ f.tree_lookup = radix_lookup;
+ f.tree_ptr = cfg->head4;
+
+
+ cfg->di = dxr_init(M_IPFW, M_WAITOK);
+ if (cfg == NULL)
+ return (ENOMEM);
+
+ dxr_setfuncs(cfg->di, &f);
+
+ ti->state = cfg->di;
+
+ return (0);
+}
+
+static int
+flush_radix_entry(struct radix_node *rn, void *arg)
+{
+ struct radix_node_head * const rnh = arg;
+ struct radix_addr_entry *ent;
+
+ ent = (struct radix_addr_entry *)
+ rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
+ if (ent != NULL)
+ free(ent, M_IPFW_TBL);
+ return (0);
+}
+
+static void
+ta_destroy_dxr(void *ta_state, struct table_info *ti)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+
+ cfg = (struct radix_cfg *)ta_state;
+
+ dxr_destroy(cfg->di, M_IPFW);
+
+ rnh = cfg->head4;
+ rnh->rnh_walktree(rnh, flush_radix_entry, rnh);
+ rn_detachhead((void **)&cfg->head4);
+
+ rnh = cfg->head6;
+ rnh->rnh_walktree(rnh, flush_radix_entry, rnh);
+ rn_detachhead((void **)&cfg->head6);
+
+ free(cfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+ struct radix_cfg *cfg;
+
+ cfg = (struct radix_cfg *)ta_state;
+
+ tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
+ tinfo->taclass4 = IPFW_TACLASS_RADIX;
+ tinfo->count4 = cfg->count4;
+ tinfo->itemsize4 = sizeof(struct radix_addr_entry);
+ tinfo->taclass6 = IPFW_TACLASS_RADIX;
+ tinfo->count6 = cfg->count6;
+ tinfo->itemsize6 = sizeof(struct radix_addr_xentry);
+}
+
+static int
+ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent)
+{
+ struct radix_addr_entry *n;
+ struct radix_addr_xentry *xn;
+
+ n = (struct radix_addr_entry *)e;
+
+ /* Guess IPv4/IPv6 radix by sockaddr family */
+ if (n->addr.sin_family == AF_INET) {
+ tent->k.addr.s_addr = n->addr.sin_addr.s_addr;
+ tent->masklen = n->masklen;
+ tent->subtype = AF_INET;
+ tent->v.kidx = n->value;
+#ifdef INET6
+ } else {
+ xn = (struct radix_addr_xentry *)e;
+ memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr));
+ tent->masklen = xn->masklen;
+ tent->subtype = AF_INET6;
+ tent->v.kidx = xn->value;
+#endif
+ }
+
+ return (0);
+}
+
+static int
+ta_find_radix_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+ void *e;
+
+ cfg = (struct radix_cfg *)ta_state;
+
+ e = NULL;
+ if (tent->subtype == AF_INET) {
+ struct sockaddr_in sa;
+ KEY_LEN(sa) = KEY_LEN_INET;
+ sa.sin_addr.s_addr = tent->k.addr.s_addr;
+ rnh = cfg->head4;
+ e = rnh->rnh_matchaddr(&sa, rnh);
+ } else {
+ struct sa_in6 sa6;
+ KEY_LEN(sa6) = KEY_LEN_INET6;
+ memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr));
+ rnh = cfg->head6;
+ e = rnh->rnh_matchaddr(&sa6, rnh);
+ }
+
+ if (e != NULL) {
+ ta_dump_radix_tentry(ta_state, ti, e, tent);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+static void
+ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+
+ cfg = (struct radix_cfg *)ta_state;
+
+ rnh = cfg->head4;
+ rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
+
+ rnh = cfg->head6;
+ rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
+}
+
+
+#ifdef INET6
+static inline void
+ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+{
+ uint32_t *cp;
+
+ for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
+ *cp++ = 0xFFFFFFFF;
+ *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+}
+#endif
+
+static void
+tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
+ struct sockaddr *ma, int *set_mask)
+{
+ int mlen;
+ struct sockaddr_in *addr, *mask;
+ struct sa_in6 *addr6, *mask6;
+ in_addr_t a4;
+
+ mlen = tei->masklen;
+
+ if (tei->subtype == AF_INET) {
+#ifdef INET
+ addr = (struct sockaddr_in *)sa;
+ mask = (struct sockaddr_in *)ma;
+ /* Set 'total' structure length */
+ KEY_LEN(*addr) = KEY_LEN_INET;
+ KEY_LEN(*mask) = KEY_LEN_INET;
+ addr->sin_family = AF_INET;
+ mask->sin_addr.s_addr =
+ htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+ a4 = *((in_addr_t *)tei->paddr);
+ addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr;
+ if (mlen != 32)
+ *set_mask = 1;
+ else
+ *set_mask = 0;
+#endif
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ /* IPv6 case */
+ addr6 = (struct sa_in6 *)sa;
+ mask6 = (struct sa_in6 *)ma;
+ /* Set 'total' structure length */
+ KEY_LEN(*addr6) = KEY_LEN_INET6;
+ KEY_LEN(*mask6) = KEY_LEN_INET6;
+ addr6->sin6_family = AF_INET6;
+ ipv6_writemask(&mask6->sin6_addr, mlen);
+ memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr));
+ APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr);
+ if (mlen != 128)
+ *set_mask = 1;
+ else
+ *set_mask = 0;
+ }
+#endif
+}
+
+static int
+ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_radix *tb;
+ struct radix_addr_entry *ent;
+ struct radix_addr_xentry *xent;
+ struct sockaddr *addr, *mask;
+ int mlen, set_mask;
+
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ mlen = tei->masklen;
+ set_mask = 0;
+
+ if (tei->subtype == AF_INET) {
+#ifdef INET
+ if (mlen > 32)
+ return (EINVAL);
+ ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ ent->masklen = mlen;
+
+ addr = (struct sockaddr *)&ent->addr;
+ mask = (struct sockaddr *)&tb->addr.a4.ma;
+ tb->ent_ptr = ent;
+#endif
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ /* IPv6 case */
+ if (mlen > 128)
+ return (EINVAL);
+ xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ xent->masklen = mlen;
+
+ addr = (struct sockaddr *)&xent->addr6;
+ mask = (struct sockaddr *)&tb->addr.a6.ma;
+ tb->ent_ptr = xent;
+#endif
+ } else {
+ /* Unknown CIDR type */
+ return (EINVAL);
+ }
+
+ tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
+ /* Set pointers */
+ tb->addr_ptr = addr;
+ if (set_mask != 0)
+ tb->mask_ptr = mask;
+
+ return (0);
+}
+
+static int
+dxr_req(struct table_info *ti, int req, struct tentry_info *tei)
+{
+ struct dxr_instance *di;
+ struct in_addr *a;
+ int error;
+
+ if (tei->masklen == 0) {
+
+ /*
+ * Handle 'default route' case - store
+ * value index in lowe 2 bits of ti->data
+ */
+ ti->data &= ~((u_long)0xFFFF);
+ if (req != 0)
+ ti->data |= tei->value & 0xFFFF;
+ return (0);
+ }
+
+ di = (struct dxr_instance *)ti->state;
+ a = (struct in_addr *)tei->paddr;
+ error = 0;
+
+#ifdef DXR_BUILD_DEBUG
+ char kbuf[16];
+ inet_ntop(AF_INET, tei->paddr, kbuf, sizeof(kbuf));
+ printf("%s for %s/%d value [%d]\n", (req == 0) ? "DEL":"ADD", kbuf,
+ tei->masklen, tei->value);
+#endif
+
+ /* Delete old record */
+ if (req == 0 || (tei->flags & TEI_FLAGS_UPDATED) != 0) {
+ error = dxr_request(di, RTM_DELETE, *a, tei->masklen, 1);
+ if (error != 0)
+ printf("error doing del dxr_req\n");
+ }
+ if (req != 0) {
+ error = dxr_request(di, RTM_ADD, *a, tei->masklen, 1);
+ if (error != 0)
+ printf("error doing del dxr_req\n");
+ }
+
+ return (error);
+}
+
+static int
+ta_add_dxr(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ struct ta_buf_radix *tb;
+ uint32_t *old_value, value;
+
+ cfg = (struct radix_cfg *)ta_state;
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ /* Save current entry value from @tei */
+ if (tei->subtype == AF_INET) {
+ rnh = cfg->head4;
+ ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value;
+ } else {
+ rnh = ti->xstate;
+ ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value;
+ }
+
+ /* Search for an entry first */
+ rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, rnh);
+ if (rn != NULL) {
+ if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+ return (EEXIST);
+ /* Record already exists. Update value if we're asked to */
+ if (tei->subtype == AF_INET)
+ old_value = &((struct radix_addr_entry *)rn)->value;
+ else
+ old_value = &((struct radix_addr_xentry *)rn)->value;
+
+ /* Indicate that update has happened instead of addition */
+ tei->flags |= TEI_FLAGS_UPDATED;
+
+ /* Update DXR data */
+ if (tei->subtype == AF_INET)
+ dxr_req(ti, 1, tei);
+
+ value = *old_value;
+ *old_value = tei->value;
+ tei->value = value;
+
+ *pnum = 0;
+
+ return (0);
+ }
+
+ if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+ return (EFBIG);
+
+ rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, rnh, tb->ent_ptr);
+ if (rn == NULL) {
+ /* Unknown error */
+ return (EINVAL);
+ }
+
+ if (tei->subtype == AF_INET) {
+ dxr_req(ti, 1, tei);
+ cfg->count4++;
+ } else
+ cfg->count6++;
+ tb->ent_ptr = NULL;
+ *pnum = 1;
+
+ return (0);
+}
+
+static int
+ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_radix *tb;
+ struct sockaddr *addr, *mask;
+ int mlen, set_mask;
+
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ mlen = tei->masklen;
+ set_mask = 0;
+
+ if (tei->subtype == AF_INET) {
+ if (mlen > 32)
+ return (EINVAL);
+
+ addr = (struct sockaddr *)&tb->addr.a4.sa;
+ mask = (struct sockaddr *)&tb->addr.a4.ma;
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ if (mlen > 128)
+ return (EINVAL);
+
+ addr = (struct sockaddr *)&tb->addr.a6.sa;
+ mask = (struct sockaddr *)&tb->addr.a6.ma;
+#endif
+ } else
+ return (EINVAL);
+
+ tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
+ tb->addr_ptr = addr;
+ if (set_mask != 0)
+ tb->mask_ptr = mask;
+
+ return (0);
+}
+
+static int
+ta_del_dxr(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ struct ta_buf_radix *tb;
+
+ cfg = (struct radix_cfg *)ta_state;
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ if (tei->subtype == AF_INET)
+ rnh = cfg->head4;
+ else
+ rnh = cfg->head6;
+
+ rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, rnh);
+
+ if (rn == NULL)
+ return (ENOENT);
+
+ /* Save entry value to @tei */
+ if (tei->subtype == AF_INET)
+ tei->value = ((struct radix_addr_entry *)rn)->value;
+ else
+ tei->value = ((struct radix_addr_xentry *)rn)->value;
+
+ tb->ent_ptr = rn;
+
+ if (tei->subtype == AF_INET) {
+ dxr_req(ti, 0, tei);
+ cfg->count4--;
+ } else
+ cfg->count6--;
+ *pnum = 1;
+
+ return (0);
+}
+
+static void
+ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_radix *tb;
+
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ if (tb->ent_ptr != NULL)
+ free(tb->ent_ptr, M_IPFW_TBL);
+}
+
+static int
+ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count,
+ uint64_t *pflags)
+{
+
+ /*
+ * radix does not require additional memory allocations
+ * other than nodes itself. Adding new masks to the tree do
+ * but we don't have any API to call (and we don't known which
+ * sizes do we need).
+ */
+ return (0);
+}
+
+struct table_algo addr_dxr = {
+ .name = "addr:dxr",
+ .type = IPFW_TABLE_ADDR,
+ .flags = TA_FLAG_DEFAULT,
+ .ta_buf_size = sizeof(struct ta_buf_radix),
+ .init = ta_init_dxr,
+ .destroy = ta_destroy_dxr,
+ .prepare_add = ta_prepare_add_radix,
+ .prepare_del = ta_prepare_del_radix,
+ .add = ta_add_dxr,
+ .del = ta_del_dxr,
+ .flush_entry = ta_flush_radix_entry,
+ .foreach = ta_foreach_radix,
+ .dump_tentry = ta_dump_radix_tentry,
+ .find_tentry = ta_find_radix_tentry,
+ .dump_tinfo = ta_dump_radix_tinfo,
+ .need_modify = ta_need_modify_radix,
+};
+
Added: projects/ipfw/sys/netpfil/ipfw/dxr_fwd.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/ipfw/sys/netpfil/ipfw/dxr_fwd.c Sun Sep 21 18:15:09 2014 (r271932)
@@ -0,0 +1,2424 @@
+#define DXR_DIRECT_BITS 18
+#define ALLOW_OOO_EXEC
+#define DXR_LOOKUP_TIMING
+//#define DIR_24_8
+//#define RADIX_TIMING
+//#define DXR_ITER_TIMING
+//#define REPEAT_SAME_KEY
+#define DXR_LOOKUP_CONSISTENCY_CHECK
+
+/*
+ * Copyright (c) 2005-2012 University of Zagreb
+ * Copyright (c) 2005 International Computer Science Institute
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/* Compile-time tunables, overriding defaults from ip_fib.h */
+#define DXR_VPORTS_MAX 1024
+
+/* Debugging options */
+#define DXR_BUILD_TIMING
+#define DXR_BUILD_PARANOIC
+//#define DXR_BUILD_DEBUG
+
+#if defined(DXR_ITER_TIMING) && defined(DXR_LOOKUP_TIMING)
+#error DXR_ITER_TIMING and DXR_LOOKUP_TIMING are mutualy exclusive
+#endif
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+
+#include <net/vnet.h>
+#include <net/if.h>
+#include <net/netisr.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+
+#include <machine/clock.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include "dxr_fwd.h"
+
+#if 0
+static uint16_t nexthop_ref(struct in_addr, struct ifnet *);
+static int nexthop_unref(uint16_t);
+#endif
+static void schedule_update(struct dxr_instance *di, struct in_addr dst,
+ int mlen);
+static void update_chunk(struct dxr_instance *, int);
+static void update_chunk_long(struct dxr_instance *, int);
+static int dxr_walk(struct dxr_instance *di, in_addr_t dst, in_addr_t mask,
+ int nh, void *arg);
+static int dxr_walk_long(struct dxr_instance *di, in_addr_t dst, in_addr_t mask,
+ int nh, void *arg);
+static void dxr_initheap(struct dxr_instance *, uint32_t, uint32_t);
+static void dxr_heap_inject(struct dxr_instance*, uint32_t, uint32_t, int, int);
+static int dxr_parse(struct dxr_instance *, int, uint32_t, uint32_t, int, int);
+static int dxr_parse_long(struct dxr_instance *, int, uint32_t, uint32_t,
+ int, int);
+static void prune_empty_chunks(struct dxr_instance *);
+static void chunk_ref(struct dxr_instance *, int);
+static void chunk_unref(struct dxr_instance *, int);
+static void apply_pending(struct dxr_instance *);
+static void dxr_check_tables(struct dxr_instance *di);
+
+static int radix_lookup(struct dxr_instance *di, uint32_t dst);
+
+#ifdef DIR_24_8
+#if (DXR_DIRECT_BITS != 24)
+#error DXR_DIRECT_BITS must be set to 24 when DIR_24_8 is configured
+#endif
+static void dir_24_8_rebuild(void);
+static int dir_24_8_lookup(uint32_t);
+#endif
+
+#if defined(DXR_LOOKUP_TIMING) || defined(DXR_ITER_TIMING) || defined(RADIX_TIMING)
+static void dxr_lookup_exercise(void *arg);
+#endif
+
+#ifdef DXR_BUILD_DEBUG
+static void dxr_heap_dump(void);
+static void dxr_chunk_dump(int);
+static void print_in_route(struct rtentry *, const char *);
+#endif
+
+#if defined(DXR_LOOKUP_TIMING) || defined(DXR_ITER_TIMING) || defined(RADIX_TIMING)
+static DPCPU_DEFINE(int, valid_timing);
+static int ex_preload;
+static int ex_threads;
+static int ex_iters = 100000;
+
+struct iter_stat {
+ uint64_t cnt;
+ uint64_t cycles;
+} static iter_stats[MAXCPU][32];
+
+static int reduce;
+static int rdtsc_latency;
+
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list