svn commit: r328988 - in head/sys: conf modules/ipfw netinet netpfil/ipfw

Adrian Chadd adrian.chadd at gmail.com
Fri Mar 23 07:37:34 UTC 2018


Hi!

It looks like this broke on mips32. I posted in -net on this.

In addition i can't even build the concurrencykit regression suite
from github. It just plainly fails to build, so I can't even attempt
to cross compile it for mips to test things.

Did you test this on any platform besides 64 bit intel?

Thanks!



-adrian


On 7 February 2018 at 10:59, Andrey V. Elsukov <ae at freebsd.org> wrote:
> Author: ae
> Date: Wed Feb  7 18:59:54 2018
> New Revision: 328988
> URL: https://svnweb.freebsd.org/changeset/base/328988
>
> Log:
>   Rework ipfw dynamic states implementation to be lockless on fast path.
>
>   o added struct ipfw_dyn_info that keeps all needed for ipfw_chk and
>     for dynamic states implementation information;
>   o added DYN_LOOKUP_NEEDED() macro that can be used to determine the
>     need of new lookup of dynamic states;
>   o ipfw_dyn_rule now becomes obsolete. Currently it used to pass
>     information from kernel to userland only.
>   o IPv4 and IPv6 states now described by different structures
>     dyn_ipv4_state and dyn_ipv6_state;
>   o IPv6 scope zones support is added;
>   o ipfw(4) now depends from Concurrency Kit;
>   o states are linked with "entry" field using CK_SLIST. This allows
>     lockless lookup and protected by mutex modifications.
>   o the "expired" SLIST field is used for states expiring.
>   o struct dyn_data is used to keep generic information for both IPv4
>     and IPv6;
>   o struct dyn_parent is used to keep O_LIMIT_PARENT information;
>   o IPv4 and IPv6 states are stored in different hash tables;
>   o O_LIMIT_PARENT states now are kept separately from O_LIMIT and
>     O_KEEP_STATE states;
>   o per-cpu dyn_hp pointers are used to implement hazard pointers and they
>     prevent freeing states that are locklessly used by lookup threads;
>   o mutexes to protect modification of lists in hash tables now kept in
>     separate arrays. 65535 limit to maximum number of hash buckets now
>     removed.
>   o Separate lookup and install functions added for IPv4 and IPv6 states
>     and for parent states.
>   o By default now is used Jenkinks hash function.
>
>   Obtained from:        Yandex LLC
>   MFC after:    42 days
>   Sponsored by: Yandex LLC
>   Differential Revision:        https://reviews.freebsd.org/D12685
>
> Modified:
>   head/sys/conf/files
>   head/sys/modules/ipfw/Makefile
>   head/sys/netinet/ip_fw.h
>   head/sys/netpfil/ipfw/ip_fw2.c
>   head/sys/netpfil/ipfw/ip_fw_dynamic.c
>   head/sys/netpfil/ipfw/ip_fw_private.h
>   head/sys/netpfil/ipfw/ip_fw_sockopt.c
>
> Modified: head/sys/conf/files
> ==============================================================================
> --- head/sys/conf/files Wed Feb  7 18:50:36 2018        (r328987)
> +++ head/sys/conf/files Wed Feb  7 18:59:54 2018        (r328988)
> @@ -4374,7 +4374,8 @@ netpfil/ipfw/ip_dn_io.c           optional inet dummynet
>  netpfil/ipfw/ip_dn_glue.c      optional inet dummynet
>  netpfil/ipfw/ip_fw2.c          optional inet ipfirewall
>  netpfil/ipfw/ip_fw_bpf.c       optional inet ipfirewall
> -netpfil/ipfw/ip_fw_dynamic.c   optional inet ipfirewall
> +netpfil/ipfw/ip_fw_dynamic.c   optional inet ipfirewall \
> +       compile-with "${NORMAL_C} -I$S/contrib/ck/include"
>  netpfil/ipfw/ip_fw_eaction.c   optional inet ipfirewall
>  netpfil/ipfw/ip_fw_log.c       optional inet ipfirewall
>  netpfil/ipfw/ip_fw_pfil.c      optional inet ipfirewall
>
> Modified: head/sys/modules/ipfw/Makefile
> ==============================================================================
> --- head/sys/modules/ipfw/Makefile      Wed Feb  7 18:50:36 2018        (r328987)
> +++ head/sys/modules/ipfw/Makefile      Wed Feb  7 18:59:54 2018        (r328988)
> @@ -9,7 +9,7 @@ SRCS+=  ip_fw_sockopt.c ip_fw_table.c ip_fw_table_algo.
>  SRCS+= ip_fw_table_value.c
>  SRCS+= opt_inet.h opt_inet6.h opt_ipdivert.h opt_ipfw.h
>
> -CFLAGS+= -DIPFIREWALL
> +CFLAGS+= -DIPFIREWALL -I${SRCTOP}/sys/contrib/ck/include
>  #
>  #If you want it verbose
>  #CFLAGS+= -DIPFIREWALL_VERBOSE
>
> Modified: head/sys/netinet/ip_fw.h
> ==============================================================================
> --- head/sys/netinet/ip_fw.h    Wed Feb  7 18:50:36 2018        (r328987)
> +++ head/sys/netinet/ip_fw.h    Wed Feb  7 18:59:54 2018        (r328988)
> @@ -671,7 +671,7 @@ struct ipfw_flow_id {
>         uint32_t        src_ip;
>         uint16_t        dst_port;
>         uint16_t        src_port;
> -       uint8_t         fib;
> +       uint8_t         fib;    /* XXX: must be uint16_t */
>         uint8_t         proto;
>         uint8_t         _flags; /* protocol-specific flags */
>         uint8_t         addr_type; /* 4=ip4, 6=ip6, 1=ether ? */
> @@ -682,6 +682,7 @@ struct ipfw_flow_id {
>  };
>  #endif
>
> +#define        IS_IP4_FLOW_ID(id)      ((id)->addr_type == 4)
>  #define IS_IP6_FLOW_ID(id)     ((id)->addr_type == 6)
>
>  /*
>
> Modified: head/sys/netpfil/ipfw/ip_fw2.c
> ==============================================================================
> --- head/sys/netpfil/ipfw/ip_fw2.c      Wed Feb  7 18:50:36 2018        (r328987)
> +++ head/sys/netpfil/ipfw/ip_fw2.c      Wed Feb  7 18:59:54 2018        (r328988)
> @@ -1387,8 +1387,7 @@ ipfw_chk(struct ip_fw_args *args)
>          *      MATCH_NONE when checked and not matched (q = NULL),
>          *      MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
>          */
> -       int dyn_dir = MATCH_UNKNOWN;
> -       uint16_t dyn_name = 0;
> +       struct ipfw_dyn_info dyn_info;
>         struct ip_fw *q = NULL;
>         struct ip_fw_chain *chain = &V_layer3_chain;
>
> @@ -1420,6 +1419,7 @@ ipfw_chk(struct ip_fw_args *args)
>         proto = args->f_id.proto = 0;   /* mark f_id invalid */
>                 /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
>
> +       DYN_INFO_INIT(&dyn_info);
>  /*
>   * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
>   * then it sets p to point at the offset "len" in the mbuf. WARNING: the
> @@ -2605,7 +2605,8 @@ do {                                                              \
>                         case O_LIMIT:
>                         case O_KEEP_STATE:
>                                 if (ipfw_dyn_install_state(chain, f,
> -                                   (ipfw_insn_limit *)cmd, args, tablearg)) {
> +                                   (ipfw_insn_limit *)cmd, args, ulp,
> +                                   pktlen, &dyn_info, tablearg)) {
>                                         /* error or limit violation */
>                                         retval = IP_FW_DENY;
>                                         l = 0;  /* exit inner loop */
> @@ -2619,34 +2620,15 @@ do {                                                            \
>                                 /*
>                                  * dynamic rules are checked at the first
>                                  * keep-state or check-state occurrence,
> -                                * with the result being stored in dyn_dir
> -                                * and dyn_name.
> +                                * with the result being stored in dyn_info.
>                                  * The compiler introduces a PROBE_STATE
>                                  * instruction for us when we have a
>                                  * KEEP_STATE (because PROBE_STATE needs
>                                  * to be run first).
> -                                *
> -                                * (dyn_dir == MATCH_UNKNOWN) means this is
> -                                * first lookup for such f_id. Do lookup.
> -                                *
> -                                * (dyn_dir != MATCH_UNKNOWN &&
> -                                *  dyn_name != 0 && dyn_name != cmd->arg1)
> -                                * means previous lookup didn't find dynamic
> -                                * rule for specific state name and current
> -                                * lookup will search rule with another state
> -                                * name. Redo lookup.
> -                                *
> -                                * (dyn_dir != MATCH_UNKNOWN && dyn_name == 0)
> -                                * means previous lookup was for `any' name
> -                                * and it didn't find rule. No need to do
> -                                * lookup again.
>                                  */
> -                               if ((dyn_dir == MATCH_UNKNOWN ||
> -                                   (dyn_name != 0 &&
> -                                   dyn_name != cmd->arg1)) &&
> -                                   (q = ipfw_dyn_lookup_state(&args->f_id,
> -                                    ulp, pktlen, &dyn_dir,
> -                                    (dyn_name = cmd->arg1))) != NULL) {
> +                               if (DYN_LOOKUP_NEEDED(&dyn_info, cmd) &&
> +                                   (q = ipfw_dyn_lookup_state(args, ulp,
> +                                   pktlen, cmd, &dyn_info)) != NULL) {
>                                         /*
>                                          * Found dynamic entry, jump to the
>                                          * 'action' part of the parent rule
> @@ -2654,13 +2636,7 @@ do {                                                             \
>                                          * cmdlen.
>                                          */
>                                         f = q;
> -                                       /* XXX we would like to have f_pos
> -                                        * readily accessible in the dynamic
> -                                        * rule, instead of having to
> -                                        * lookup q->rule.
> -                                        */
> -                                       f_pos = ipfw_find_rule(chain,
> -                                           f->rulenum, f->id);
> +                                       f_pos = dyn_info.f_pos;
>                                         cmd = ACTION_PTR(f);
>                                         l = f->cmd_len - f->act_ofs;
>                                         cmdlen = 0;
> @@ -2877,7 +2853,8 @@ do {                                                              \
>                         case O_FORWARD_IP:
>                                 if (args->eh)   /* not valid on layer2 pkts */
>                                         break;
> -                               if (q != f || dyn_dir == MATCH_FORWARD) {
> +                               if (q != f ||
> +                                   dyn_info.direction == MATCH_FORWARD) {
>                                     struct sockaddr_in *sa;
>
>                                     sa = &(((ipfw_insn_sa *)cmd)->sa);
> @@ -2937,7 +2914,8 @@ do {                                                              \
>                         case O_FORWARD_IP6:
>                                 if (args->eh)   /* not valid on layer2 pkts */
>                                         break;
> -                               if (q != f || dyn_dir == MATCH_FORWARD) {
> +                               if (q != f ||
> +                                   dyn_info.direction == MATCH_FORWARD) {
>                                         struct sockaddr_in6 *sin6;
>
>                                         sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
> @@ -3089,7 +3067,7 @@ do {                                                              \
>                                          * @args content, and it may be
>                                          * used for new state lookup later.
>                                          */
> -                                       dyn_dir = MATCH_UNKNOWN;
> +                                       DYN_INFO_INIT(&dyn_info);
>                                 }
>                                 break;
>
>
> Modified: head/sys/netpfil/ipfw/ip_fw_dynamic.c
> ==============================================================================
> --- head/sys/netpfil/ipfw/ip_fw_dynamic.c       Wed Feb  7 18:50:36 2018        (r328987)
> +++ head/sys/netpfil/ipfw/ip_fw_dynamic.c       Wed Feb  7 18:59:54 2018        (r328988)
> @@ -1,6 +1,8 @@
>  /*-
>   * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
>   *
> + * Copyright (c) 2017-2018 Yandex LLC
> + * Copyright (c) 2017-2018 Andrey V. Elsukov <ae at FreeBSD.org>
>   * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
>   *
>   * Redistribution and use in source and binary forms, with or without
> @@ -28,32 +30,27 @@
>  #include <sys/cdefs.h>
>  __FBSDID("$FreeBSD$");
>
> -#define        DEB(x)
> -#define        DDB(x) x
> -
> -/*
> - * Dynamic rule support for ipfw
> - */
> -
> -#include "opt_ipfw.h"
>  #include "opt_inet.h"
> +#include "opt_inet6.h"
> +#include "opt_ipfw.h"
>  #ifndef INET
>  #error IPFIREWALL requires INET.
>  #endif /* INET */
> -#include "opt_inet6.h"
>
>  #include <sys/param.h>
>  #include <sys/systm.h>
> -#include <sys/malloc.h>
> +#include <sys/hash.h>
>  #include <sys/mbuf.h>
>  #include <sys/kernel.h>
> -#include <sys/ktr.h>
>  #include <sys/lock.h>
> +#include <sys/pcpu.h>
> +#include <sys/queue.h>
>  #include <sys/rmlock.h>
> +#include <sys/smp.h>
>  #include <sys/socket.h>
>  #include <sys/sysctl.h>
>  #include <sys/syslog.h>
> -#include <net/ethernet.h> /* for ETHERTYPE_IP */
> +#include <net/ethernet.h>
>  #include <net/if.h>
>  #include <net/if_var.h>
>  #include <net/pfil.h>
> @@ -61,8 +58,9 @@ __FBSDID("$FreeBSD$");
>
>  #include <netinet/in.h>
>  #include <netinet/ip.h>
> -#include <netinet/ip_var.h>    /* ip_defttl */
> +#include <netinet/ip_var.h>
>  #include <netinet/ip_fw.h>
> +#include <netinet/ip_var.h>
>  #include <netinet/tcp_var.h>
>  #include <netinet/udp.h>
>
> @@ -70,6 +68,7 @@ __FBSDID("$FreeBSD$");
>  #ifdef INET6
>  #include <netinet6/in6_var.h>
>  #include <netinet6/ip6_var.h>
> +#include <netinet6/scope6_var.h>
>  #endif
>
>  #include <netpfil/ipfw/ip_fw_private.h>
> @@ -79,86 +78,261 @@ __FBSDID("$FreeBSD$");
>  #ifdef MAC
>  #include <security/mac/mac_framework.h>
>  #endif
> +#include <ck_queue.h>
>
>  /*
> - * Description of dynamic rules.
> + * Description of dynamic states.
>   *
> - * Dynamic rules are stored in lists accessed through a hash table
> - * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
> - * be modified through the sysctl variable dyn_buckets which is
> - * updated when the table becomes empty.
> + * Dynamic states are stored in lists accessed through a hash tables
> + * whose size is curr_dyn_buckets. This value can be modified through
> + * the sysctl variable dyn_buckets.
>   *
> - * XXX currently there is only one list, ipfw_dyn.
> + * Currently there are four tables: dyn_ipv4, dyn_ipv6, dyn_ipv4_parent,
> + * and dyn_ipv6_parent.
>   *
> - * When a packet is received, its address fields are first masked
> - * with the mask defined for the rule, then hashed, then matched
> - * against the entries in the corresponding list.
> - * Dynamic rules can be used for different purposes:
> + * When a packet is received, its address fields hashed, then matched
> + * against the entries in the corresponding list by addr_type.
> + * Dynamic states can be used for different purposes:
>   *  + stateful rules;
>   *  + enforcing limits on the number of sessions;
>   *  + in-kernel NAT (not implemented yet)
>   *
> - * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
> + * The lifetime of dynamic states is regulated by dyn_*_lifetime,
>   * measured in seconds and depending on the flags.
>   *
> - * The total number of dynamic rules is equal to UMA zone items count.
> - * The max number of dynamic rules is dyn_max. When we reach
> + * The total number of dynamic states is equal to UMA zone items count.
> + * The max number of dynamic states is dyn_max. When we reach
>   * the maximum number of rules we do not create anymore. This is
>   * done to avoid consuming too much memory, but also too much
>   * time when searching on each packet (ideally, we should try instead
>   * to put a limit on the length of the list on each bucket...).
>   *
> - * Each dynamic rule holds a pointer to the parent ipfw rule so
> - * we know what action to perform. Dynamic rules are removed when
> - * the parent rule is deleted. This can be changed by dyn_keep_states
> - * sysctl.
> + * Each state holds a pointer to the parent ipfw rule so we know what
> + * action to perform. Dynamic rules are removed when the parent rule is
> + * deleted.
>   *
>   * There are some limitations with dynamic rules -- we do not
>   * obey the 'randomized match', and we do not do multiple
>   * passes through the firewall. XXX check the latter!!!
>   */
>
> -struct ipfw_dyn_bucket {
> -       struct mtx      mtx;            /* Bucket protecting lock */
> -       ipfw_dyn_rule   *head;          /* Pointer to first rule */
> +/* By default use jenkins hash function */
> +#define        IPFIREWALL_JENKINSHASH
> +
> +#define        DYN_COUNTER_INC(d, dir, pktlen) do {    \
> +       (d)->pcnt_ ## dir++;                    \
> +       (d)->bcnt_ ## dir += pktlen;            \
> +       } while (0)
> +
> +struct dyn_data {
> +       void            *parent;        /* pointer to parent rule */
> +       uint32_t        chain_id;       /* cached ruleset id */
> +       uint32_t        f_pos;          /* cached rule index */
> +
> +       uint32_t        hashval;        /* hash value used for hash resize */
> +       uint16_t        fibnum;         /* fib used to send keepalives */
> +       uint8_t         _pad[3];
> +       uint8_t         set;            /* parent rule set number */
> +       uint16_t        rulenum;        /* parent rule number */
> +       uint32_t        ruleid;         /* parent rule id */
> +
> +       uint32_t        state;          /* TCP session state and flags */
> +       uint32_t        ack_fwd;        /* most recent ACKs in forward */
> +       uint32_t        ack_rev;        /* and reverse direction (used */
> +                                       /* to generate keepalives) */
> +       uint32_t        sync;           /* synchronization time */
> +       uint32_t        expire;         /* expire time */
> +
> +       uint64_t        pcnt_fwd;       /* bytes counter in forward */
> +       uint64_t        bcnt_fwd;       /* packets counter in forward */
> +       uint64_t        pcnt_rev;       /* bytes counter in reverse */
> +       uint64_t        bcnt_rev;       /* packets counter in reverse */
>  };
>
> +#define        DPARENT_COUNT_DEC(p)    do {                    \
> +       MPASS(p->count > 0);                            \
> +       ck_pr_dec_32(&(p)->count);                      \
> +} while (0)
> +#define        DPARENT_COUNT_INC(p)    ck_pr_inc_32(&(p)->count)
> +#define        DPARENT_COUNT(p)        ck_pr_load_32(&(p)->count)
> +struct dyn_parent {
> +       void            *parent;        /* pointer to parent rule */
> +       uint32_t        count;          /* number of linked states */
> +       uint8_t         _pad;
> +       uint8_t         set;            /* parent rule set number */
> +       uint16_t        rulenum;        /* parent rule number */
> +       uint32_t        ruleid;         /* parent rule id */
> +       uint32_t        hashval;        /* hash value used for hash resize */
> +       uint32_t        expire;         /* expire time */
> +};
> +
> +struct dyn_ipv4_state {
> +       uint8_t         type;           /* State type */
> +       uint8_t         proto;          /* UL Protocol */
> +       uint16_t        kidx;           /* named object index */
> +       uint16_t        sport, dport;   /* ULP source and destination ports */
> +       in_addr_t       src, dst;       /* IPv4 source and destination */
> +
> +       union {
> +               struct dyn_data *data;
> +               struct dyn_parent *limit;
> +       };
> +       CK_SLIST_ENTRY(dyn_ipv4_state)  entry;
> +       SLIST_ENTRY(dyn_ipv4_state)     expired;
> +};
> +CK_SLIST_HEAD(dyn_ipv4ck_slist, dyn_ipv4_state);
> +static VNET_DEFINE(struct dyn_ipv4ck_slist *, dyn_ipv4);
> +static VNET_DEFINE(struct dyn_ipv4ck_slist *, dyn_ipv4_parent);
> +
> +SLIST_HEAD(dyn_ipv4_slist, dyn_ipv4_state);
> +static VNET_DEFINE(struct dyn_ipv4_slist, dyn_expired_ipv4);
> +#define        V_dyn_ipv4                      VNET(dyn_ipv4)
> +#define        V_dyn_ipv4_parent               VNET(dyn_ipv4_parent)
> +#define        V_dyn_expired_ipv4              VNET(dyn_expired_ipv4)
> +
> +#ifdef INET6
> +struct dyn_ipv6_state {
> +       uint8_t         type;           /* State type */
> +       uint8_t         proto;          /* UL Protocol */
> +       uint16_t        kidx;           /* named object index */
> +       uint16_t        sport, dport;   /* ULP source and destination ports */
> +       struct in6_addr src, dst;       /* IPv6 source and destination */
> +       uint32_t        zoneid;         /* IPv6 scope zone id */
> +       union {
> +               struct dyn_data *data;
> +               struct dyn_parent *limit;
> +       };
> +       CK_SLIST_ENTRY(dyn_ipv6_state)  entry;
> +       SLIST_ENTRY(dyn_ipv6_state)     expired;
> +};
> +CK_SLIST_HEAD(dyn_ipv6ck_slist, dyn_ipv6_state);
> +static VNET_DEFINE(struct dyn_ipv6ck_slist *, dyn_ipv6);
> +static VNET_DEFINE(struct dyn_ipv6ck_slist *, dyn_ipv6_parent);
> +
> +SLIST_HEAD(dyn_ipv6_slist, dyn_ipv6_state);
> +static VNET_DEFINE(struct dyn_ipv6_slist, dyn_expired_ipv6);
> +#define        V_dyn_ipv6                      VNET(dyn_ipv6)
> +#define        V_dyn_ipv6_parent               VNET(dyn_ipv6_parent)
> +#define        V_dyn_expired_ipv6              VNET(dyn_expired_ipv6)
> +#endif /* INET6 */
> +
>  /*
> - * Static variables followed by global ones
> + * Per-CPU pointer indicates that specified state is currently in use
> + * and must not be reclaimed by expiration callout.
>   */
> -static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v);
> -static VNET_DEFINE(u_int32_t, dyn_buckets_max);
> -static VNET_DEFINE(u_int32_t, curr_dyn_buckets);
> -static VNET_DEFINE(struct callout, ipfw_timeout);
> -#define        V_ipfw_dyn_v                    VNET(ipfw_dyn_v)
> -#define        V_dyn_buckets_max               VNET(dyn_buckets_max)
> -#define        V_curr_dyn_buckets              VNET(curr_dyn_buckets)
> -#define V_ipfw_timeout                  VNET(ipfw_timeout)
> +static void **dyn_hp_cache;
> +static DPCPU_DEFINE(void *, dyn_hp);
> +#define        DYNSTATE_GET(cpu)       ck_pr_load_ptr(DPCPU_ID_PTR((cpu), dyn_hp))
> +#define        DYNSTATE_PROTECT(v)     ck_pr_store_ptr(DPCPU_PTR(dyn_hp), (v))
> +#define        DYNSTATE_RELEASE()      DYNSTATE_PROTECT(NULL)
> +#define        DYNSTATE_CRITICAL_ENTER()       critical_enter()
> +#define        DYNSTATE_CRITICAL_EXIT()        do {    \
> +       DYNSTATE_RELEASE();                     \
> +       critical_exit();                        \
> +} while (0);
>
> -static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone);
> -#define        V_ipfw_dyn_rule_zone            VNET(ipfw_dyn_rule_zone)
> +/*
> + * We keep two version numbers, one is updated when new entry added to
> + * the list. Second is updated when an entry deleted from the list.
> + * Versions are updated under bucket lock.
> + *
> + * Bucket "add" version number is used to know, that in the time between
> + * state lookup (i.e. ipfw_dyn_lookup_state()) and the followed state
> + * creation (i.e. ipfw_dyn_install_state()) another concurrent thread did
> + * not install some state in this bucket. Using this info we can avoid
> + * additional state lookup, because we are sure that we will not install
> + * the state twice.
> + *
> + * Also doing the tracking of bucket "del" version during lookup we can
> + * be sure, that state entry was not unlinked and freed in time between
> + * we read the state pointer and protect it with hazard pointer.
> + *
> + * An entry unlinked from CK list keeps unchanged until it is freed.
> + * Unlinked entries are linked into expired lists using "expired" field.
> + */
>
> -#define        IPFW_BUCK_LOCK_INIT(b)  \
> -       mtx_init(&(b)->mtx, "IPFW dynamic bucket", NULL, MTX_DEF)
> -#define        IPFW_BUCK_LOCK_DESTROY(b)       \
> -       mtx_destroy(&(b)->mtx)
> -#define        IPFW_BUCK_LOCK(i)       mtx_lock(&V_ipfw_dyn_v[(i)].mtx)
> -#define        IPFW_BUCK_UNLOCK(i)     mtx_unlock(&V_ipfw_dyn_v[(i)].mtx)
> -#define        IPFW_BUCK_ASSERT(i)     mtx_assert(&V_ipfw_dyn_v[(i)].mtx, MA_OWNED)
> +/*
> + * dyn_expire_lock is used to protect access to dyn_expired_xxx lists.
> + * dyn_bucket_lock is used to get write access to lists in specific bucket.
> + * Currently one dyn_bucket_lock is used for all ipv4, ipv4_parent, ipv6,
> + * and ipv6_parent lists.
> + */
> +static VNET_DEFINE(struct mtx, dyn_expire_lock);
> +static VNET_DEFINE(struct mtx *, dyn_bucket_lock);
> +#define        V_dyn_expire_lock               VNET(dyn_expire_lock)
> +#define        V_dyn_bucket_lock               VNET(dyn_bucket_lock)
>
> +/*
> + * Bucket's add/delete generation versions.
> + */
> +static VNET_DEFINE(uint32_t *, dyn_ipv4_add);
> +static VNET_DEFINE(uint32_t *, dyn_ipv4_del);
> +static VNET_DEFINE(uint32_t *, dyn_ipv4_parent_add);
> +static VNET_DEFINE(uint32_t *, dyn_ipv4_parent_del);
> +#define        V_dyn_ipv4_add                  VNET(dyn_ipv4_add)
> +#define        V_dyn_ipv4_del                  VNET(dyn_ipv4_del)
> +#define        V_dyn_ipv4_parent_add           VNET(dyn_ipv4_parent_add)
> +#define        V_dyn_ipv4_parent_del           VNET(dyn_ipv4_parent_del)
>
> -static VNET_DEFINE(int, dyn_keep_states);
> -#define        V_dyn_keep_states               VNET(dyn_keep_states)
> +#ifdef INET6
> +static VNET_DEFINE(uint32_t *, dyn_ipv6_add);
> +static VNET_DEFINE(uint32_t *, dyn_ipv6_del);
> +static VNET_DEFINE(uint32_t *, dyn_ipv6_parent_add);
> +static VNET_DEFINE(uint32_t *, dyn_ipv6_parent_del);
> +#define        V_dyn_ipv6_add                  VNET(dyn_ipv6_add)
> +#define        V_dyn_ipv6_del                  VNET(dyn_ipv6_del)
> +#define        V_dyn_ipv6_parent_add           VNET(dyn_ipv6_parent_add)
> +#define        V_dyn_ipv6_parent_del           VNET(dyn_ipv6_parent_del)
> +#endif /* INET6 */
>
> +#define        DYN_BUCKET(h, b)                ((h) & (b - 1))
> +#define        DYN_BUCKET_VERSION(b, v)        ck_pr_load_32(&V_dyn_ ## v[(b)])
> +#define        DYN_BUCKET_VERSION_BUMP(b, v)   ck_pr_inc_32(&V_dyn_ ## v[(b)])
> +
> +#define        DYN_BUCKET_LOCK_INIT(lock, b)           \
> +    mtx_init(&lock[(b)], "IPFW dynamic bucket", NULL, MTX_DEF)
> +#define        DYN_BUCKET_LOCK_DESTROY(lock, b)        mtx_destroy(&lock[(b)])
> +#define        DYN_BUCKET_LOCK(b)      mtx_lock(&V_dyn_bucket_lock[(b)])
> +#define        DYN_BUCKET_UNLOCK(b)    mtx_unlock(&V_dyn_bucket_lock[(b)])
> +#define        DYN_BUCKET_ASSERT(b)    mtx_assert(&V_dyn_bucket_lock[(b)], MA_OWNED)
> +
> +#define        DYN_EXPIRED_LOCK_INIT()         \
> +    mtx_init(&V_dyn_expire_lock, "IPFW expired states list", NULL, MTX_DEF)
> +#define        DYN_EXPIRED_LOCK_DESTROY()      mtx_destroy(&V_dyn_expire_lock)
> +#define        DYN_EXPIRED_LOCK()              mtx_lock(&V_dyn_expire_lock)
> +#define        DYN_EXPIRED_UNLOCK()            mtx_unlock(&V_dyn_expire_lock)
> +
> +static VNET_DEFINE(uint32_t, dyn_buckets_max);
> +static VNET_DEFINE(uint32_t, curr_dyn_buckets);
> +static VNET_DEFINE(struct callout, dyn_timeout);
> +#define        V_dyn_buckets_max               VNET(dyn_buckets_max)
> +#define        V_curr_dyn_buckets              VNET(curr_dyn_buckets)
> +#define        V_dyn_timeout                   VNET(dyn_timeout)
> +
> +/* Maximum length of states chain in a bucket */
> +static VNET_DEFINE(uint32_t, curr_max_length);
> +#define        V_curr_max_length               VNET(curr_max_length)
> +
> +static VNET_DEFINE(uma_zone_t, dyn_data_zone);
> +static VNET_DEFINE(uma_zone_t, dyn_parent_zone);
> +static VNET_DEFINE(uma_zone_t, dyn_ipv4_zone);
> +#ifdef INET6
> +static VNET_DEFINE(uma_zone_t, dyn_ipv6_zone);
> +#define        V_dyn_ipv6_zone                 VNET(dyn_ipv6_zone)
> +#endif /* INET6 */
> +#define        V_dyn_data_zone                 VNET(dyn_data_zone)
> +#define        V_dyn_parent_zone               VNET(dyn_parent_zone)
> +#define        V_dyn_ipv4_zone                 VNET(dyn_ipv4_zone)
> +
>  /*
>   * Timeouts for various events in handing dynamic rules.
>   */
> -static VNET_DEFINE(u_int32_t, dyn_ack_lifetime);
> -static VNET_DEFINE(u_int32_t, dyn_syn_lifetime);
> -static VNET_DEFINE(u_int32_t, dyn_fin_lifetime);
> -static VNET_DEFINE(u_int32_t, dyn_rst_lifetime);
> -static VNET_DEFINE(u_int32_t, dyn_udp_lifetime);
> -static VNET_DEFINE(u_int32_t, dyn_short_lifetime);
> +static VNET_DEFINE(uint32_t, dyn_ack_lifetime);
> +static VNET_DEFINE(uint32_t, dyn_syn_lifetime);
> +static VNET_DEFINE(uint32_t, dyn_fin_lifetime);
> +static VNET_DEFINE(uint32_t, dyn_rst_lifetime);
> +static VNET_DEFINE(uint32_t, dyn_udp_lifetime);
> +static VNET_DEFINE(uint32_t, dyn_short_lifetime);
>
>  #define        V_dyn_ack_lifetime              VNET(dyn_ack_lifetime)
>  #define        V_dyn_syn_lifetime              VNET(dyn_syn_lifetime)
> @@ -174,10 +348,10 @@ static VNET_DEFINE(u_int32_t, dyn_short_lifetime);
>   * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
>   * than dyn_keepalive_period.
>   */
> -
> -static VNET_DEFINE(u_int32_t, dyn_keepalive_interval);
> -static VNET_DEFINE(u_int32_t, dyn_keepalive_period);
> -static VNET_DEFINE(u_int32_t, dyn_keepalive);
> +#define        DYN_KEEPALIVE_MAXQ              512
> +static VNET_DEFINE(uint32_t, dyn_keepalive_interval);
> +static VNET_DEFINE(uint32_t, dyn_keepalive_period);
> +static VNET_DEFINE(uint32_t, dyn_keepalive);
>  static VNET_DEFINE(time_t, dyn_keepalive_last);
>
>  #define        V_dyn_keepalive_interval        VNET(dyn_keepalive_interval)
> @@ -185,113 +359,208 @@ static VNET_DEFINE(time_t, dyn_keepalive_last);
>  #define        V_dyn_keepalive                 VNET(dyn_keepalive)
>  #define        V_dyn_keepalive_last            VNET(dyn_keepalive_last)
>
> -static VNET_DEFINE(u_int32_t, dyn_max);                /* max # of dynamic rules */
> -
> -#define        DYN_COUNT                       uma_zone_get_cur(V_ipfw_dyn_rule_zone)
> +static VNET_DEFINE(uint32_t, dyn_max);         /* max # of dynamic states */
> +static VNET_DEFINE(uint32_t, dyn_count);       /* number of states */
> +static VNET_DEFINE(uint32_t, dyn_parent_max);  /* max # of parent states */
> +static VNET_DEFINE(uint32_t, dyn_parent_count);        /* number of parent states */
>  #define        V_dyn_max                       VNET(dyn_max)
> +#define        V_dyn_count                     VNET(dyn_count)
> +#define        V_dyn_parent_max                VNET(dyn_parent_max)
> +#define        V_dyn_parent_count              VNET(dyn_parent_count)
>
> -/* for userspace, we emulate the uma_zone_counter with ipfw_dyn_count */
> -static int ipfw_dyn_count;     /* number of objects */
> +#define        DYN_COUNT_DEC(name)     do {                    \
> +       MPASS((V_ ## name) > 0);                        \
> +       ck_pr_dec_32(&(V_ ## name));                    \
> +} while (0)
> +#define        DYN_COUNT_INC(name)     ck_pr_inc_32(&(V_ ## name))
> +#define        DYN_COUNT(name)         ck_pr_load_32(&(V_ ## name))
>
> -#ifdef USERSPACE /* emulation of UMA object counters for userspace */
> -#define uma_zone_get_cur(x)    ipfw_dyn_count
> -#endif /* USERSPACE */
> +static time_t last_log;        /* Log ratelimiting */
>
> -static int last_log;   /* Log ratelimiting */
> +/*
> + * Get/set maximum number of dynamic states in given VNET instance.
> + */
> +static int
> +sysctl_dyn_max(SYSCTL_HANDLER_ARGS)
> +{
> +       uint32_t nstates;
> +       int error;
>
> -static void ipfw_dyn_tick(void *vnetx);
> -static void check_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *, int, int);
> -#ifdef SYSCTL_NODE
> +       nstates = V_dyn_max;
> +       error = sysctl_handle_32(oidp, &nstates, 0, req);
> +       /* Read operation or some error */
> +       if ((error != 0) || (req->newptr == NULL))
> +               return (error);
>
> -static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS);
> -static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS);
> +       V_dyn_max = nstates;
> +       uma_zone_set_max(V_dyn_data_zone, V_dyn_max);
> +       return (0);
> +}
>
> -SYSBEGIN(f2)
> +static int
> +sysctl_dyn_parent_max(SYSCTL_HANDLER_ARGS)
> +{
> +       uint32_t nstates;
> +       int error;
>
> +       nstates = V_dyn_parent_max;
> +       error = sysctl_handle_32(oidp, &nstates, 0, req);
> +       /* Read operation or some error */
> +       if ((error != 0) || (req->newptr == NULL))
> +               return (error);
> +
> +       V_dyn_parent_max = nstates;
> +       uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max);
> +       return (0);
> +}
> +
> +static int
> +sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS)
> +{
> +       uint32_t nbuckets;
> +       int error;
> +
> +       nbuckets = V_dyn_buckets_max;
> +       error = sysctl_handle_32(oidp, &nbuckets, 0, req);
> +       /* Read operation or some error */
> +       if ((error != 0) || (req->newptr == NULL))
> +               return (error);
> +
> +       if (nbuckets > 256)
> +               V_dyn_buckets_max = 1 << fls(nbuckets - 1);
> +       else
> +               return (EINVAL);
> +       return (0);
> +}
> +
>  SYSCTL_DECL(_net_inet_ip_fw);
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
> -    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_buckets_max), 0,
> -    "Max number of dyn. buckets");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
> +
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_count,
> +    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_count), 0,
> +    "Current number of dynamic states.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_parent_count,
> +    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_parent_count), 0,
> +    "Current number of parent states. ");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
>      CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0,
> -    "Current Number of dyn. buckets");
> -SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count,
> -    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RD, 0, 0, sysctl_ipfw_dyn_count, "IU",
> -    "Number of dyn. rules");
> +    "Current number of buckets for states hash table.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_max_length,
> +    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_max_length), 0,
> +    "Current maximum length of states chains in hash buckets.");
> +SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
> +    CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_buckets,
> +    "IU", "Max number of buckets for dynamic states hash table.");
>  SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max,
> -    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_dyn_max, "IU",
> -    "Max number of dyn. rules");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
> +    CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_max,
> +    "IU", "Max number of dynamic states.");
> +SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_parent_max,
> +    CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_parent_max,
> +    "IU", "Max number of parent dynamic states.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
>      CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0,
> -    "Lifetime of dyn. rules for acks");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
> +    "Lifetime of dynamic states for TCP ACK.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
>      CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0,
> -    "Lifetime of dyn. rules for syn");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
> +    "Lifetime of dynamic states for TCP SYN.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
>      CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0,
> -    "Lifetime of dyn. rules for fin");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
> +    "Lifetime of dynamic states for TCP FIN.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
>      CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0,
> -    "Lifetime of dyn. rules for rst");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
> +    "Lifetime of dynamic states for TCP RST.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
>      CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0,
> -    "Lifetime of dyn. rules for UDP");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
> +    "Lifetime of dynamic states for UDP.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
>      CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0,
> -    "Lifetime of dyn. rules for other situations");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
> +    "Lifetime of dynamic states for other situations.");
> +SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
>      CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0,
> -    "Enable keepalives for dyn. rules");
> -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keep_states,
> -    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0,
> -    "Do not flush dynamic states on rule deletion");
> +    "Enable keepalives for dynamic states.");
>
> -SYSEND
> +#ifdef IPFIREWALL_DYNDEBUG
> +#define        DYN_DEBUG(fmt, ...)     do {                    \
> +       printf("%s: " fmt "\n", __func__, __VA_ARGS__); \
> +} while (0)
> +#else
> +#define        DYN_DEBUG(fmt, ...)
> +#endif /* !IPFIREWALL_DYNDEBUG */
>
> -#endif /* SYSCTL_NODE */
> -
> -
>  #ifdef INET6
> -static __inline int
> -hash_packet6(const struct ipfw_flow_id *id)
> -{
> -       u_int32_t i;
> -       i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
> -           (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
> -           (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
> -           (id->src_ip6.__u6_addr.__u6_addr32[3]);
> -       return ntohl(i);
> -}
> -#endif
> +/* Functions to work with IPv6 states */
> +static struct dyn_ipv6_state *dyn_lookup_ipv6_state(
> +    const struct ipfw_flow_id *, uint32_t, const void *,
> +    struct ipfw_dyn_info *, int);
> +static int dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *,
> +    uint32_t, const void *, int, const void *, uint32_t, uint16_t, uint32_t,
> +    uint16_t);
> +static struct dyn_ipv6_state *dyn_alloc_ipv6_state(
> +    const struct ipfw_flow_id *, uint32_t, uint16_t, uint8_t);
> +static int dyn_add_ipv6_state(void *, uint32_t, uint16_t, uint8_t,
> +    const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t,
> +    struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t);
> +static void dyn_export_ipv6_state(const struct dyn_ipv6_state *,
> +    ipfw_dyn_rule *);
>
> -/*
> - * IMPORTANT: the hash function for dynamic rules must be commutative
> - * in source and destination (ip,port), because rules are bidirectional
> - * and we want to find both in the same bucket.
> - */
> -static __inline int
> -hash_packet(const struct ipfw_flow_id *id, int buckets)
> -{
> -       u_int32_t i;
> +static uint32_t dyn_getscopeid(const struct ip_fw_args *);
> +static void dyn_make_keepalive_ipv6(struct mbuf *, const struct in6_addr *,
> +    const struct in6_addr *, uint32_t, uint32_t, uint32_t, uint16_t,
> +    uint16_t);
> +static void dyn_enqueue_keepalive_ipv6(struct mbufq *,
> +    const struct dyn_ipv6_state *);
> +static void dyn_send_keepalive_ipv6(struct ip_fw_chain *);
>
> -#ifdef INET6
> -       if (IS_IP6_FLOW_ID(id))
> -               i = hash_packet6(id);
> -       else
> +static struct dyn_ipv6_state *dyn_lookup_ipv6_parent(
> +    const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t,
> +    uint32_t);
> +static struct dyn_ipv6_state *dyn_lookup_ipv6_parent_locked(
> +    const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t,
> +    uint32_t);
> +static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint16_t,
> +    uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t,
> +    uint16_t);
>  #endif /* INET6 */
> -       i = (id->dst_ip) ^ (id->src_ip);
> -       i ^= (id->dst_port) ^ (id->src_port);
> -       return (i & (buckets - 1));
> -}
>
> -#if 0
> -#define        DYN_DEBUG(fmt, ...)     do {                    \
> -       printf("%s: " fmt "\n", __func__, __VA_ARGS__); \
> -} while (0)
> -#else
> -#define        DYN_DEBUG(fmt, ...)
> -#endif
> +/* Functions to work with limit states */
> +static void *dyn_get_parent_state(const struct ipfw_flow_id *, uint32_t,
> +    struct ip_fw *, uint32_t, uint32_t, uint16_t);
> +static struct dyn_ipv4_state *dyn_lookup_ipv4_parent(
> +    const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t);
> +static struct dyn_ipv4_state *dyn_lookup_ipv4_parent_locked(
> +    const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t);
> +static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint16_t,
> +    uint8_t, uint32_t);
> +static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint16_t,
> +    uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t);
>
> +static void dyn_tick(void *);
> +static void dyn_expire_states(struct ip_fw_chain *, ipfw_range_tlv *);
> +static void dyn_free_states(struct ip_fw_chain *);
> +static void dyn_export_parent(const struct dyn_parent *, uint16_t,
> +    ipfw_dyn_rule *);
> +static void dyn_export_data(const struct dyn_data *, uint16_t, uint8_t,
> +    ipfw_dyn_rule *);
> +static uint32_t dyn_update_tcp_state(struct dyn_data *,
> +    const struct ipfw_flow_id *, const struct tcphdr *, int);
> +static void dyn_update_proto_state(struct dyn_data *,
> +    const struct ipfw_flow_id *, const void *, int, int);
> +
> +/* Functions to work with IPv4 states */
> +struct dyn_ipv4_state *dyn_lookup_ipv4_state(const struct ipfw_flow_id *,
> +    const void *, struct ipfw_dyn_info *, int);
> +static int dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *,
> +    const void *, int, const void *, uint32_t, uint16_t, uint32_t, uint16_t);
> +static struct dyn_ipv4_state *dyn_alloc_ipv4_state(
> +    const struct ipfw_flow_id *, uint16_t, uint8_t);
> +static int dyn_add_ipv4_state(void *, uint32_t, uint16_t, uint8_t,
> +    const struct ipfw_flow_id *, const void *, int, uint32_t,
> +    struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t);
> +static void dyn_export_ipv4_state(const struct dyn_ipv4_state *,
> +    ipfw_dyn_rule *);
> +
> +/*
> + * Named states support.
> + */
>  static char *default_state_name = "default";
>  struct dyn_state_obj {
>         struct named_object     no;
> @@ -438,7 +707,6 @@ dyn_destroy(struct ip_fw_chain *ch, struct named_objec
>         KASSERT(no->refcnt == 1,
>             ("Destroying object '%s' (type %u, idx %u) with refcnt %u",
>             no->name, no->etlv, no->kidx, no->refcnt));
> -
>         DYN_DEBUG("kidx %d", no->kidx);
>         obj = SRV_OBJECT(ch, no->kidx);
>         SRV_OBJECT(ch, no->kidx) = NULL;
> @@ -474,7 +742,137 @@ static struct opcode_obj_rewrite dyn_opcodes[] = {
>                 dyn_create, dyn_destroy
>         },
>  };
> -/**
> +
> +/*
> + * IMPORTANT: the hash function for dynamic rules must be commutative
> + * in source and destination (ip,port), because rules are bidirectional
> + * and we want to find both in the same bucket.
> + */
> +#ifndef IPFIREWALL_JENKINSHASH
> +static __inline uint32_t
> +hash_packet(const struct ipfw_flow_id *id)
> +{
> +       uint32_t i;
> +
> +#ifdef INET6
> +       if (IS_IP6_FLOW_ID(id))
> +               i = ntohl((id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
> +                   (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
> +                   (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
> +                   (id->src_ip6.__u6_addr.__u6_addr32[3]));
> +       else
> +#endif /* INET6 */
> +       i = (id->dst_ip) ^ (id->src_ip);
> +       i ^= (id->dst_port) ^ (id->src_port);
> +       return (i);
> +}
> +
> +static __inline uint32_t
> +hash_parent(const struct ipfw_flow_id *id, const void *rule)
> +{
> +
> +       return (hash_packet(id) ^ ((uintptr_t)rule));
> +}
> +
> +#else /* IPFIREWALL_JENKINSHASH */
> +
> +static VNET_DEFINE(uint32_t, dyn_hashseed);
> +#define        V_dyn_hashseed          VNET(dyn_hashseed)
> +
> +static __inline int
> +addrcmp4(const struct ipfw_flow_id *id)
> +{
> +
> +       if (id->src_ip < id->dst_ip)
> +               return (0);
> +       if (id->src_ip > id->dst_ip)
> +               return (1);
> +       if (id->src_port <= id->dst_port)
> +               return (0);
> +       return (1);
> +}
> +
> +#ifdef INET6
> +static __inline int
> +addrcmp6(const struct ipfw_flow_id *id)
> +{
> +       int ret;
> +
> +       ret = memcmp(&id->src_ip6, &id->dst_ip6, sizeof(struct in6_addr));
> +       if (ret < 0)
> +               return (0);
> +       if (ret > 0)
> +               return (1);
> +       if (id->src_port <= id->dst_port)
> +               return (0);
> +       return (1);
> +}
> +
> +static __inline uint32_t
> +hash_packet6(const struct ipfw_flow_id *id)
> +{
> +       struct tuple6 {
> +               struct in6_addr addr[2];
> +               uint16_t        port[2];
> +       } t6;
> +
> +       if (addrcmp6(id) == 0) {
> +               t6.addr[0] = id->src_ip6;
> +               t6.addr[1] = id->dst_ip6;
> +               t6.port[0] = id->src_port;
> +               t6.port[1] = id->dst_port;
> +       } else {
> +               t6.addr[0] = id->dst_ip6;
> +               t6.addr[1] = id->src_ip6;
> +               t6.port[0] = id->dst_port;
> +               t6.port[1] = id->src_port;
> +       }
> +       return (jenkins_hash32((const uint32_t *)&t6,
> +           sizeof(t6) / sizeof(uint32_t), V_dyn_hashseed));
> +}
> +#endif
> +
> +static __inline uint32_t
> +hash_packet(const struct ipfw_flow_id *id)
> +{
> +       struct tuple4 {
> +               in_addr_t       addr[2];
> +               uint16_t        port[2];
> +       } t4;
> +
> +       if (IS_IP4_FLOW_ID(id)) {
> +               /* All fields are in host byte order */
> +               if (addrcmp4(id) == 0) {
> +                       t4.addr[0] = id->src_ip;
> +                       t4.addr[1] = id->dst_ip;
> +                       t4.port[0] = id->src_port;
> +                       t4.port[1] = id->dst_port;
> +               } else {
> +                       t4.addr[0] = id->dst_ip;
> +                       t4.addr[1] = id->src_ip;
> +                       t4.port[0] = id->dst_port;
> +                       t4.port[1] = id->src_port;
> +               }
> +               return (jenkins_hash32((const uint32_t *)&t4,
> +                   sizeof(t4) / sizeof(uint32_t), V_dyn_hashseed));
> +       } else
> +#ifdef INET6
> +       if (IS_IP6_FLOW_ID(id))
> +               return (hash_packet6(id));
> +#endif
> +       return (0);
> +}
> +
> +static __inline uint32_t
> +hash_parent(const struct ipfw_flow_id *id, const void *rule)
> +{
> +
> +       return (jenkins_hash32((const uint32_t *)&rule,
> +           sizeof(rule) / sizeof(uint32_t), hash_packet(id)));
> +}
> +#endif /* IPFIREWALL_JENKINSHASH */
> +
> +/*
>   * Print customizable flow id description via log(9) facility.
>   */
>  static void
> @@ -502,903 +900,1809 @@ print_dyn_rule_flags(const struct ipfw_flow_id *id, in
>         }
>         log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n",
>             prefix, dyn_type, src, id->src_port, dst,
> -           id->dst_port, DYN_COUNT, postfix);
> +           id->dst_port, V_dyn_count, postfix);
>  }
>
>  #define        print_dyn_rule(id, dtype, prefix, postfix)      \
>         print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix)
>
> -#define TIME_LEQ(a,b)       ((int)((a)-(b)) <= 0)
> -#define TIME_LE(a,b)       ((int)((a)-(b)) < 0)
> +#define        TIME_LEQ(a,b)   ((int)((a)-(b)) <= 0)
> +#define        TIME_LE(a,b)    ((int)((a)-(b)) < 0)
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>


More information about the svn-src-all mailing list