svn commit: r184320 - user/kmacy/HEAD_fast_xmit/sys/net

Kip Macy kmacy at FreeBSD.org
Mon Oct 27 05:19:26 UTC 2008


Author: kmacy
Date: Mon Oct 27 05:19:26 2008
New Revision: 184320
URL: http://svn.freebsd.org/changeset/base/184320

Log:
  Generalize the flowtable code support ipv6, host route caching, and pcpu tables

Added:
  user/kmacy/HEAD_fast_xmit/sys/net/flowtable.h   (contents, props changed)
Modified:
  user/kmacy/HEAD_fast_xmit/sys/net/flowtable.c

Modified: user/kmacy/HEAD_fast_xmit/sys/net/flowtable.c
==============================================================================
--- user/kmacy/HEAD_fast_xmit/sys/net/flowtable.c	Mon Oct 27 02:36:03 2008	(r184319)
+++ user/kmacy/HEAD_fast_xmit/sys/net/flowtable.c	Mon Oct 27 05:19:26 2008	(r184320)
@@ -16,9 +16,14 @@
 
 #include <net/route.h> 
 #include <net/vnet.h>
+#include <net/flowtable.h>
+#include <net/if.h>
+#include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
@@ -172,59 +177,141 @@ uint32_t        initval)         /* the 
 }
 
 
-struct ip_tuple {
+struct ipv4_tuple {
+	uint16_t 	ip_sport;	/* source port */
+	uint16_t 	ip_dport;	/* destination port */
 	in_addr_t 	ip_saddr;	/* source address */
 	in_addr_t 	ip_daddr;	/* destination address */
+};
+
+union ipv4_flow {
+	struct ipv4_tuple ipf_ipt;
+	uint32_t 	ipf_key[3];
+};
+
+struct ipv6_tuple {
 	uint16_t 	ip_sport;	/* source port */
 	uint16_t 	ip_dport;	/* destination port */
+	struct in6_addr	ip_saddr;	/* source address */
+	struct in6_addr	ip_daddr;	/* destination address */
 };
 
-union ip_flow {
-	struct ip_tuple ipf_ipt;
-	uint32_t 	ipf_key[3];
+union ipv6_flow {
+	struct ipv6_tuple ipf_ipt;
+	uint32_t 	ipf_key[9];
+};
+
+struct flentry {
+	uint32_t	f_fhash;	/* hash flowing forward */
+	uint16_t	f_flags;	/* flow flags */
+	uint8_t		f_pad;
+	uint8_t		f_proto;	/* protocol */
+	time_t		f_uptime;	/* last time this flow was accessed */
+	struct rtentry *f_rt;		/* rtentry for flow */
+	u_char		f_desten[ETHER_ADDR_LEN];	
 };
 
 struct flentry_v4 {
-	uint32_t	fl_fhash;	/* hash flowing forward */
-	uint32_t	fl_ticks;	/* last time this flow was accessed */
-	uint16_t	fl_flags;	/* flow flags */
-	uint8_t		fl_pad;
-	uint8_t		fl_proto;	/* protocol */
-	union ip_flow	fl_flow;
-	struct rtentry *fl_rt;		/* rtentry for flow */
-	uint32_t	fl_refcnt;
-	uint32_t	fl_hash_next;	/* needed for GC */
-	uint32_t	fl_hash_prev;
+	struct flentry	fl_entry;
+	union ipv4_flow	fl_flow;
 };
 
-#define	TICKS_PER_MINUTE	(60*hz)
-#define	TICKS_PER_HOUR		(60*TICKS_PER_MINUTE)
-#define	TICKS_PER_DAY		(24*TICKS_PER_HOUR)
-
-
-#define SYN_IDLE		(5*TICKS_PER_MINUTE)
-#define UDP_IDLE		(5*TICKS_PER_MINUTE)
-#define FIN_WAIT_IDLE		(10*TICKS_PER_MINUTE)
-#define TCP_IDLE		TICKS_PER_DAY
-
-
-static struct flentry_v4 *ipv4_flow_table;
-static int ipv4_flow_table_size;
-static bitstr_t *ipv4_flow_bitstring;
-static int ipv4_flow_allocated;
-struct mtx *ipv4_flow_locks;
-static int ipv4_flow_lock_count;
-extern uint32_t hashjitter;
-static uint32_t ipv4_flow_route_lookup_fail;
-static uint32_t	ipv4_flow_collisions;
-struct callout ipv4_flow_callout;
-static int ipv4_flow_max_count;
-
-
-#define FL_ENTRY_INDEX(hash)((hash) % ipv4_flow_table_size)
-#define FL_ENTRY(hash) (&ipv4_flow_table[FL_ENTRY_INDEX((hash))])
-#define FL_ENTRY_LOCK(hash) mtx_lock(&ipv4_flow_locks[(hash)&(ipv4_flow_lock_count - 1)])
-#define FL_ENTRY_UNLOCK(hash) mtx_lock(&ipv4_flow_locks[(hash)&(ipv4_flow_lock_count - 1)])
+struct flentry_v6 {
+	struct flentry	fl_entry;
+	union ipv6_flow	fl_flow;
+};
+
+#define	fl_fhash	fl_entry.fl_fhash
+#define	fl_flags	fl_entry.fl_flags
+#define	fl_proto	fl_entry.fl_proto
+#define	fl_uptime	fl_entry.fl_uptime
+#define	fl_rt		fl_entry.fl_rt
+#define	fl_desten		fl_entry.fl_desten
+
+#define	SECS_PER_HOUR		3600
+#define	SECS_PER_DAY		(24*SECS_PER_HOUR)
+
+#define	SYN_IDLE		300
+#define	UDP_IDLE		300
+#define	FIN_WAIT_IDLE		600
+#define	TCP_IDLE		SECS_PER_DAY
+
+
+typedef	void fl_lock_t(struct flowtable *, uint32_t);
+typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
+
+union flentryp {
+	struct flentry_v4	*v4;
+	struct flentry_v6	*v6;
+	struct flentry_v4	*v4_pcpu[MAXCPU];
+	struct flentry_v6	*v6_pcpu[MAXCPU];
+};
+
+struct flowtable {
+	union flentryp	ft_table;
+	int 		ft_size;
+	bitstr_t 	*ft_masks[MAXCPU];
+	struct mtx	*ft_locks;
+	int 		ft_lock_count;
+	uint32_t	ft_flags;
+	uint32_t	ft_collisions;
+	uint32_t	ft_allocated;
+	uint64_t	ft_hits;
+
+	uint32_t	ft_udp_idle;
+	uint32_t	ft_fin_wait_idle;
+	uint32_t	ft_syn_idle;
+	uint32_t	ft_tcp_idle;
+
+	fl_lock_t	*ft_lock;
+	fl_lock_t 	*ft_unlock;
+	fl_rtalloc_t	*ft_rtalloc;
+
+};
+
+extern	uint32_t hashjitter;
+
+static void
+in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fib)
+{
+
+	in_rtalloc_ign(ro, 0, fib);
+}
+
+static void
+flowtable_global_lock(struct flowtable *table, uint32_t hash)
+{	
+	int lock_index = (hash)&(table->ft_lock_count - 1);
+
+	mtx_lock(&table->ft_locks[lock_index]);
+}
+
+static void
+flowtable_global_unlock(struct flowtable *table, uint32_t hash)
+{	
+	int lock_index = (hash)&(table->ft_lock_count - 1);
+
+	mtx_unlock(&table->ft_locks[lock_index]);
+}
+
+static void
+flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
+{
+
+	critical_enter();
+}
+
+static void
+flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
+{
+
+	critical_exit();
+}
+
+#define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
+#define FL_ENTRY(table, hash) flowtable_entry((table), (hash))
+#define FL_ENTRY_LOCK(table, hash)  (table)->ft_lock((table), (hash))
+#define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
 
 #define FL_STALE (1<<8)
 
@@ -241,8 +328,9 @@ ipv4_flow_lookup_hash_internal(struct mb
 	struct udphdr *uh;
 	struct sctphdr *sh;
 
-	key[0] = ip->ip_src.s_addr;
-	key[1] = ip->ip_dst.s_addr;	
+	key[0] = 0;
+	key[1] = ip->ip_src.s_addr;
+	key[2] = ip->ip_dst.s_addr;	
 
 	sin = (struct sockaddr_in *)&ro->ro_dst;
 	sin->sin_family = AF_INET;
@@ -254,7 +342,7 @@ ipv4_flow_lookup_hash_internal(struct mb
 		th = (struct tcphdr *)((caddr_t)ip + iphlen);
 		sport = th->th_sport;
 		dport = th->th_dport;
-		*flags = th->th_flags;
+		*flags |= th->th_flags;
 		if (*flags & TH_RST)
 			*flags |= FL_STALE;
 	break;
@@ -269,270 +357,272 @@ ipv4_flow_lookup_hash_internal(struct mb
 		dport = sh->dest_port;
 	break;
 	default:
+		goto noop;
 		/* no port - hence not a protocol we care about */
 		break;;
 	
 	}
-	((uint16_t *)key)[4] = sport;
-	((uint16_t *)key)[5] = dport;
-
 	*protop = proto;
+
+	/*
+	 * If this is a transmit route cache then 
+	 * hash all flows to a given destination to
+	 * the same bucket
+	 */
+	if (*flags & FL_LOCAL_XMIT)
+		proto = sport = dport = 0;
+
+	((uint16_t *)key)[0] = sport;
+	((uint16_t *)key)[1] = dport;
+	
 	return (hashword(key, 3, hashjitter + proto));
+
+noop:
+	*protop = proto;
+	return (0);
 }
 
-uint32_t
-ipv4_flow_lookup_hash(struct mbuf *m)
+static bitstr_t *
+flowtable_mask(struct flowtable *ft)
 {
-	struct route ro;
-	uint32_t key[3];
-	uint16_t flags;
-	uint8_t proto;
+	bitstr_t *mask;
 	
-	bzero(&ro, sizeof(ro));
-	return (ipv4_flow_lookup_hash_internal(m, &ro, key, &flags, &proto));
+	if (ft->ft_flags & FL_PCPU)
+		mask = ft->ft_masks[curcpu];
+	else
+		mask = ft->ft_masks[0];
+
+	return (mask);
 }
 
-static void
-ipv4_flow_insert(uint32_t hash, uint32_t *key, uint8_t proto,
-    struct rtentry *rt, uint16_t flags)
+static struct flentry *
+flowtable_entry(struct flowtable *ft, uint32_t hash)
 {
-	struct flentry_v4 *fle, *fle2;
-	uint32_t *hashkey;
-	
-	fle = FL_ENTRY(hash);
-	hashkey = fle->fl_flow.ipf_key;
-
-	hashkey[0] = key[0];
-	hashkey[1] = key[1];
-	hashkey[2] = key[2];
-
-	bit_set(ipv4_flow_bitstring, FL_ENTRY_INDEX(hash));
-	if (rt->rt_flow_head == 0) {
-		rt->rt_flow_head = hash;
-		fle->fl_hash_next = fle->fl_hash_prev = 0;
+	struct flentry *fle;
+	int index = (ft->ft_size % hash);
+ 
+	if ((ft->ft_flags & FL_IPV6) == 0) {
+		if (ft->ft_flags & FL_PCPU)
+			fle = (struct flentry *)
+			    &ft->ft_table.v4_pcpu[curcpu][index];
+		else
+			fle = (struct flentry *)&ft->ft_table.v4[index];
 	} else {
-		fle->fl_hash_next = rt->rt_flow_head;
-		fle2 = FL_ENTRY(rt->rt_flow_head);
-		rt->rt_flow_head = hash;
-		fle2->fl_hash_prev = hash;
+		if (ft->ft_flags & FL_PCPU)
+			fle = (struct flentry *)
+			    &ft->ft_table.v6_pcpu[curcpu][index];
+		else
+			fle = (struct flentry *)&ft->ft_table.v6[index];
 	}
-	fle->fl_proto = proto;
-	fle->fl_rt = rt;
-	fle->fl_fhash = hash;
-	fle->fl_ticks = ticks;
-	rt->rt_refcnt++;
-	ipv4_flow_allocated++;
+
+	return (fle);
 }
 
-uint32_t
-ipv4_flow_alloc(struct mbuf *m, struct route *ro)
-{
-	uint32_t key[3], hash, *hashkey;
-	struct flentry_v4 *fle;
-	uint16_t flags = 0;
-	uint8_t proto;
-	
-	/*
-	 * Only handle IPv4 for now
-	 *
-	 */
-	hash = ipv4_flow_lookup_hash_internal(m, ro, key, &flags, &proto);
+static int
+flow_stale(struct flowtable *ft, struct flentry *fle)
+{
+	time_t idle_time;
 
-	/*
-	 * Ports are zero - thus not a protocol for which 
-	 * we need to keep state
-	 */
-	if (key[3] == 0)
-		return (hash);
+	if (fle->f_fhash == 0)
+		return (1);
 	
-	FL_ENTRY_LOCK(hash);
-	fle = FL_ENTRY(hash);
+	idle_time = time_uptime - fle->f_uptime;
 
-	hashkey = fle->fl_flow.ipf_key;
-	
-	if (fle->fl_fhash == 0) {
-		FL_ENTRY_UNLOCK(hash);
-		rtalloc_mpath_fib(ro, hash, M_GETFIB(m));
-		if (ro->ro_rt) {
-			FL_ENTRY_LOCK(hash);
-			ipv4_flow_insert(hash, key, proto, ro->ro_rt, flags);
-			RT_UNLOCK(ro->ro_rt);
-		} else
-			ipv4_flow_route_lookup_fail++;
-	} else if (fle->fl_fhash == hash
-	    && key[0] == hashkey[0] 
-	    && key[1] == hashkey[1]
-	    && key[2] == hashkey[2]
-	    && proto == fle->fl_proto) {
-		fle->fl_ticks = ticks;
-		fle->fl_flags |= flags;
-		fle->fl_refcnt++;
-		ro->ro_rt = fle->fl_rt;
-	} else 
-		ipv4_flow_collisions++;
-		
-	FL_ENTRY_UNLOCK(hash);
+	if ((fle->f_flags & FL_STALE) ||
+	    ((fle->f_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
+		&& (idle_time > ft->ft_udp_idle)) ||
+	    ((fle->f_flags & TH_FIN)
+		&& (idle_time > ft->ft_fin_wait_idle)) ||
+	    ((fle->f_flags & (TH_SYN|TH_ACK)) == TH_SYN
+		&& (idle_time > ft->ft_syn_idle)) ||
+	    ((fle->f_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
+		&& (idle_time > ft->ft_tcp_idle)) ||
+	    ((fle->f_rt->rt_flags & RTF_UP) == 0 || 
+		(fle->f_rt->rt_ifp == NULL)))
+		return (1);
 
-	return (hash);
+	return (0);
 }
 
-/*
- * Internal helper routine
- * hash - the hash of the entry to free
- * stale - indicates to only free the entry if it is marked stale
- */
-
-static uint32_t
-ipv4_flow_free_internal(uint32_t hash, int staleonly)
+static void
+flowtable_set_hashkey(struct flowtable *ft, struct flentry *fle, uint32_t *key)
 {
-	struct flentry_v4 *fle, *fleprev, *flenext;
-	uint32_t hash_next;
+	uint32_t *hashkey;
+	int i, nwords;
 
-	fle = FL_ENTRY(hash);
-	hash_next = fle->fl_hash_next;
-	
-	if (staleonly && ((fle->fl_flags & FL_STALE) == 0))
-	    return (hash_next);
-	
-	if (fle->fl_hash_next) {
-		flenext = FL_ENTRY(fle->fl_hash_next);
-		flenext->fl_hash_prev = fle->fl_hash_prev;
-	}
-	if (fle->fl_hash_prev) {
-		fleprev = FL_ENTRY(fle->fl_hash_prev);
-		fleprev->fl_hash_next = fle->fl_hash_next;
+	if (ft->ft_flags & FL_IPV6) {
+		nwords = 9;
+		hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
+	} else {
+		nwords = 3;
+		hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
 	}
-	fle->fl_hash_next = fle->fl_hash_prev = 0;
-	    
-	if (fle->fl_refcnt == 0) {
-		fle->fl_rt->rt_refcnt--;
-		ipv4_flow_allocated--;
-		bit_clear(ipv4_flow_bitstring, FL_ENTRY_INDEX(hash));
-		bzero(fle, sizeof(struct flentry_v4));
-	} else if (!staleonly) 
-		fle->fl_flags |= FL_STALE;
-
-	return (hash_next);
+	
+	for (i = 0; i < nwords; i++) 
+		hashkey[i] = key[i];
 }
 
-/*
- * drops the refcount on the flow after alloc was called and 
- * checks if the flow has become stale since alloc was called
- *
- */
-void
-ipv4_flow_free(uint32_t hash)
+static void
+flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
+    uint8_t proto, struct rtentry *rt, u_char *desten, uint16_t flags)
 {
-	struct flentry_v4 *fle;
-	struct rtentry *rt;
+	struct flentry *fle;
+	struct rtentry *rt0 = NULL;
 	int stale;
+	bitstr_t *mask;
+	
+retry:	
+	FL_ENTRY_LOCK(ft, hash);
+	mask = flowtable_mask(ft);
+	fle = flowtable_entry(ft, hash);
+	if (fle->f_fhash) {
+		if ((stale = flow_stale(ft, fle)) != 0) {
+			fle->f_fhash = 0;
+			rt0 = fle->f_rt;
+			fle->f_rt = NULL;
+			bit_clear(mask, FL_ENTRY_INDEX(ft, hash));
+		}
+		FL_ENTRY_UNLOCK(ft, hash);
+		if (!stale)
+			return;
+		RTFREE(rt0);
+		/*
+		 * We might end up on a different cpu
+		 */
+		goto retry;
+	       
+	}
+	flowtable_set_hashkey(ft, fle, key);
+	bit_set(mask, FL_ENTRY_INDEX(ft, hash));
 
-	fle = FL_ENTRY(hash);
-	KASSERT(fle->fl_refcnt > 0,
-	    ("route referenced with flow refcount set to zero"));
-
-	stale = ((fle->fl_flags & FL_STALE) &&
-	    (fle->fl_refcnt == 1));
-
-	rt = fle->fl_rt;
-	if (stale)
-		RT_LOCK(rt);
-	
-	FL_ENTRY_LOCK(hash);
-	fle->fl_refcnt--;
-
-	if (stale) {
- 		ipv4_flow_free_internal(hash, 0);
-		RTFREE_LOCKED(rt);
-	} 
-	FL_ENTRY_UNLOCK(hash);
+	fle->f_proto = proto;
+	fle->f_rt = rt;
+	fle->f_fhash = hash;
+	fle->f_uptime = time_uptime;
+	memcpy(fle->f_desten, desten, ETHER_ADDR_LEN);
+	FL_ENTRY_UNLOCK(ft, hash);
 }
 
-/*
- *
- * Frees all flows that are linked to this rtentry
- *
- */
 void
-ipv4_flow_free_all(struct rtentry *rt)
+route_to_rtentry_info(struct route *ro, u_char *desten, struct rtentry_info *ri)
 {
-	uint32_t hash_next = rt->rt_flow_head;
+	struct sockaddr_in *sin = (struct sockaddr_in *)&ro->ro_dst;
+	struct rtentry *rt = ro->ro_rt;
+	
+	ri->ri_ifp = rt->rt_ifp;
+	ri->ri_ifa = rt->rt_ifa;
+	ri->ri_flags = rt->rt_flags;
+	ri->ri_mtu = rt->rt_rmx.rmx_mtu;
 
-	RT_LOCK_ASSERT(rt);
-	while (hash_next) 
-		hash_next = ipv4_flow_free_internal(hash_next, 0);
+	if (rt->rt_flags & RTF_GATEWAY && !IN_MULTICAST(sin->sin_addr.s_addr))
+		memcpy(&ri->ri_dst, sin, sizeof(struct sockaddr));
+	else
+		memcpy(&ri->ri_dst, rt->rt_gateway, sizeof(struct sockaddr));
+
+	if (desten) {
+		memcpy(ri->ri_desten, desten, ETHER_ADDR_LEN);
+		ri->ri_flags |= RTF_DESTEN_VALID;
+	}
 }
 
-/*
- * Frees all flows tied to this rt that 
- * have been marked stale
- *
- */
 static int
-ipv4_flow_free_stale(struct radix_node *rn, void *unused)
+flowtable_key_equal(struct flentry *fle, uint32_t *key, int flags)
 {
-	struct rtentry *rt = (struct rtentry *)rn;
-	uint32_t hash_next; 
-
-	if (rt->rt_flow_head == 0)
-		return (0);
+	uint32_t *hashkey;
+	int i, nwords;
 
-	RT_LOCK(rt);
-	hash_next = rt->rt_flow_head;
-	while (hash_next)
-		hash_next = ipv4_flow_free_internal(hash_next, 1);
-	RT_UNLOCK(rt);
+	if (flags & FL_IPV6) {
+		nwords = 9;
+		hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
+	} else {
+		nwords = 3;
+		hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
+	}
+	
+	for (i = 0; i < nwords; i++) 
+		if (hashkey[i] != key[i])
+			return (0);
 
-	return (0);
+	return (1);
 }
 
-struct radix_node_head *ipv4_flow_rnh_list[100];
-static void
-ipv4_flow_check_stale(struct flentry_v4 *fle,
-    struct radix_node_head **rnh_list, int *rnh_count)
+int
+flowtable_lookup(struct flowtable *ft, struct mbuf *m,
+    struct rtentry_info *ri)
 {
-	int count = *rnh_count;
-	uint32_t idle_ticks;
-	struct radix_node_head *rnh;
-	struct rtentry *rt;
-	int i, stale = 0, found = 0;
-	
-	if (ticks > fle->fl_ticks)
-		idle_ticks = ticks - fle->fl_ticks;
-	else
-		idle_ticks = (INT_MAX - fle->fl_ticks) + ticks ;
+	uint32_t key[9], hash;
+	struct flentry *fle;
+	uint16_t flags;
+	uint8_t proto;
+	struct route ro;
+	int cache = 1, error = 0;
+	u_char desten[ETHER_ADDR_LEN];
+
+	flags = ft ? ft->ft_flags : FL_LOCAL_XMIT;
+
+	/*
+	 * The internal hash lookup is the only IPv4 specific bit
+	 * remaining
+	 */
+	hash = ipv4_flow_lookup_hash_internal(m, &ro, key,
+	    &flags, &proto);
+
 	
-	if ((fle->fl_flags & FL_STALE) ||
-	    ((fle->fl_flags & (TH_SYN|TH_ACK|TH_FIN)) == 0
-		&& (idle_ticks > UDP_IDLE)) ||
-	    ((fle->fl_flags & TH_FIN)
-		&& (idle_ticks > FIN_WAIT_IDLE)) ||
-	    ((fle->fl_flags & (TH_SYN|TH_ACK)) == TH_SYN
-		&& (idle_ticks > SYN_IDLE)) ||
-	    ((fle->fl_flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)
-		&& (idle_ticks > TCP_IDLE)))
-		stale = 1;
-
-	if (stale == 0)
-		return;
-
-	fle->fl_flags |= FL_STALE;
-	rt = fle->fl_rt;
-	rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
-
-	for (i = 0; i < count; i++) 
-		if (rnh_list[i] == rnh) {
-			found  = 1;
-			break;
-		}
-	if (found == 0) {
-		rnh_list[count] = rnh;
-		count++;
-		*rnh_count = count;
+	/*
+	 * Ports are zero and this isn't a transmit cache
+	 * - thus not a protocol for which we need to keep 
+	 * statex
+	 * FL_LOCAL_XMIT => key[0] == 0 
+	 */
+	if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_LOCAL_XMIT) == 0)) {
+		cache = 0;
+		goto uncached;
 	}
-}
 
+	FL_ENTRY_LOCK(ft, hash);
+	fle = FL_ENTRY(ft, hash);
+	if (fle->f_fhash != hash) {
+		cache = !flow_stale(ft, fle);
+		FL_ENTRY_UNLOCK(ft, hash);
+	} else if (fle->f_fhash == hash
+	    && flowtable_key_equal(fle, key, flags)
+	    && (proto == fle->f_proto)
+	    && (fle->f_rt->rt_flags & RTF_UP)
+	    && (fle->f_uptime > fle->f_rt->rt_llinfo_uptime)) {
+
+		if ((fle->f_rt->rt_flags & RTF_GATEWAY) &&
+		    ((fle->f_rt->rt_gwroute->rt_flags & RTF_UP) == 0))
+			goto uncached;
+
+		fle->f_uptime = time_uptime;
+		fle->f_flags |= flags;
+		fle->f_rt->rt_rmx.rmx_pksent++;
+		route_to_rtentry_info(&ro, fle->f_desten, ri);
+		FL_ENTRY_UNLOCK(ft, hash);
+		return (0);
+	} 
+uncached:
+	ft->ft_rtalloc(&ro, hash, M_GETFIB(m));
+	if (ro.ro_rt == NULL) 
+		error = ENETUNREACH;
+	else {
+		RT_UNLOCK(ro.ro_rt);
+		error = arpresolve(ro.ro_rt->rt_ifp, ro.ro_rt, m,
+		    &ro.ro_dst, desten);
+		route_to_rtentry_info(&ro, error ? NULL : desten, ri);
+
+		if (error == 0 && cache)
+			flowtable_insert(ft, hash, key, proto,
+			    ro.ro_rt, desten, flags);
+		else
+			RTFREE(ro.ro_rt);
+		error = 0;
+	} 
 
+	return (error);
+}
+
+#ifdef notyet
 static __inline int
 bit_fns(bitstr_t *name, int nbits, int lastbit)
 {
@@ -545,60 +635,65 @@ bit_fns(bitstr_t *name, int nbits, int l
 
 	return (value);
 }
+#endif
 
-
-static int ipv4_flow_last_index;
-static void
-ipv4_flow_timeout(void *arg)
+struct flowtable *
+flowtable_alloc(int nentry, int flags)
 {
-	int i, idx, rnh_count = 0;
-	struct radix_node_head *rnh;
-	
-	/*
-	 * scan 1/4th of the table once a second
-	 */
-	for (i = 0; i < (ipv4_flow_allocated >> 2); i++) {
-		idx = bit_fns(ipv4_flow_bitstring, ipv4_flow_table_size,
-		    ipv4_flow_last_index);
-		if (idx == -1) {
-			ipv4_flow_last_index = 0;
-			break;
+	struct flowtable *ft;
+	int i;
+
+	ft = malloc(sizeof(struct flowtable),
+	    M_RTABLE, M_WAITOK | M_ZERO);
+
+	ft->ft_flags = flags;
+	ft->ft_size = nentry;
+#ifdef RADIX_MPATH
+	ft->ft_rtalloc = rtalloc_mpath_fib;
+#else
+	ft->ft_rtalloc = in_rtalloc_ign_wrapper;
+#endif
+	if (flags & FL_PCPU) {
+		ft->ft_lock = flowtable_pcpu_lock;
+		ft->ft_unlock = flowtable_pcpu_unlock;
+
+		for (i = 0; i < mp_ncpus; i++) {
+			ft->ft_table.v4_pcpu[i] =
+			    malloc(nentry*sizeof(struct flentry_v4),
+				M_RTABLE, M_WAITOK | M_ZERO);
+			ft->ft_masks[i] = bit_alloc(nentry);
 		}
+	} else {
+		ft->ft_lock_count = 2*(powerof2(mp_ncpus) ? mp_ncpus :
+		    (fls(mp_ncpus) << 1));
 		
-		FL_ENTRY_LOCK(idx);
-		ipv4_flow_check_stale(FL_ENTRY(idx), ipv4_flow_rnh_list, &rnh_count);
-		FL_ENTRY_UNLOCK(idx);
-	}
-	for (i = 0; i < rnh_count; i++) {
-		rnh = ipv4_flow_rnh_list[i];
-		RADIX_NODE_HEAD_LOCK(rnh);
-		rnh->rnh_walktree(rnh, ipv4_flow_free_stale, NULL);
-		RADIX_NODE_HEAD_UNLOCK(rnh);
-	}
+		ft->ft_lock = flowtable_global_lock;
+		ft->ft_unlock = flowtable_global_unlock;
+		ft->ft_table.v4 =
+			    malloc(nentry*sizeof(struct flentry_v4),
+				M_RTABLE, M_WAITOK | M_ZERO);
+		ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
+				M_RTABLE, M_WAITOK | M_ZERO);
+		for (i = 0; i < ft->ft_lock_count; i++)
+			mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF);
 
-	callout_reset(&ipv4_flow_callout, hz, ipv4_flow_timeout, NULL);
-}
-
-static void
-flowtable_init(void *unused) 
-{
-	int i, nentry;
+		ft->ft_masks[0] = bit_alloc(nentry);
+	}
 
-	nentry = ipv4_flow_max_count;
 	/*
-	 * round mp_ncpus up to the next power of 2 and double
-	 * to determine the number of locks
+	 * In the local transmit case the table truly is 
+	 * just a cache - so everything is eligible for
+	 * replacement after 5s of non-use
 	 */
-	ipv4_flow_lock_count = (1 << fls(mp_ncpus)) << 1;
-	
-	ipv4_flow_table_size = nentry;
-	ipv4_flow_table = malloc(nentry*sizeof(struct flentry_v4),
-	    M_RTABLE, M_WAITOK | M_ZERO);
-	ipv4_flow_bitstring = bit_alloc(nentry);
-	ipv4_flow_locks = malloc(ipv4_flow_lock_count*sizeof(struct mtx),
-	    M_RTABLE, M_WAITOK | M_ZERO);
-	for (i = 0; i < ipv4_flow_lock_count; i++)
-		mtx_init(&ipv4_flow_locks[i], "ipv4_flow", NULL, MTX_DEF);
+	if (flags & FL_LOCAL_XMIT)
+		ft->ft_udp_idle = ft->ft_fin_wait_idle =
+		    ft->ft_syn_idle = ft->ft_tcp_idle = 5;
+	else {
+		ft->ft_udp_idle = UDP_IDLE;
+		ft->ft_syn_idle = SYN_IDLE;
+		ft->ft_fin_wait_idle = FIN_WAIT_IDLE;
+		ft->ft_tcp_idle = TCP_IDLE;
+	}
 	
+	return (ft);
 }
-SYSINIT(flowtable, SI_SUB_INIT_IF, SI_ORDER_ANY, flowtable_init, NULL);

Added: user/kmacy/HEAD_fast_xmit/sys/net/flowtable.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/kmacy/HEAD_fast_xmit/sys/net/flowtable.h	Mon Oct 27 05:19:26 2008	(r184320)
@@ -0,0 +1,49 @@
+#ifndef _NET_FLOWTABLE_H_
+#define	_NET_FLOWTABLE_H_
+
+#ifdef _KERNEL
+#include <net/ethernet.h>
+#include <netinet/in.h>
+
+#define FL_LOCAL_XMIT 	(1<<0)	/* per host, don't hash ports */ 
+#define FL_PCPU		(1<<1)	/* pcpu cache */
+#define FL_IPV6		(1<<2)	/* IPv6 table */
+
+struct flowtable;
+struct flowtable *flowtable_alloc(int nentry, int flags);
+
+struct rtentry_info {
+	struct ifnet		*ri_ifp;
+	struct ifaddr 		*ri_ifa;
+	int			ri_flags;
+	int			ri_mtu;
+	u_char			ri_desten[ETHER_ADDR_LEN];
+	struct sockaddr_in	ri_dst; /* rt_gateway if RTF_GATEWAY */
+};
+
+struct rtentry_info6 {
+	struct ifnet		*ri_ifp;
+	struct ifaddr 		*ri_ifa;
+	int			ri_flags;
+	int			ri_mtu;
+	u_char			ri_desten[ETHER_ADDR_LEN];
+	struct sockaddr_in6	ri_dst; /* rt_gateway if RTF_GATEWAY */
+};
+
+/*
+ * Given a flow table, look up the L3 and L2 information and
+ * return it in ri
+ *
+ */
+int flowtable_lookup(struct flowtable *ft, struct mbuf *m,
+    struct rtentry_info *ri);
+/*
+ * Convert a route and an (optional) L2 address to an 
+ * rtentry_info
+ *
+ */
+void route_to_rtentry_info(struct route *ro, u_char *desten,
+    struct rtentry_info *ri);
+
+#endif
+#endif


More information about the svn-src-user mailing list