svn commit: r261601 - in head: sys/net sys/netinet sys/netinet6 usr.bin/netstat

Gleb Smirnoff glebius at FreeBSD.org
Fri Feb 7 15:18:23 UTC 2014


Author: glebius
Date: Fri Feb  7 15:18:23 2014
New Revision: 261601
URL: http://svnweb.freebsd.org/changeset/base/261601

Log:
  o Revamp API between flowtable and netinet, netinet6.
    - ip_output() and ip_output6() simply call flowtable_lookup(),
      passing mbuf and address family. That's the only code under
      #ifdef FLOWTABLE in the protocols code now.
  o Revamp statistics gathering and export.
    - Remove hand made pcpu stats, and utilize counter(9).
    - Snapshot of statistics is available via 'netstat -rs'.
    - All sysctls are moved into net.flowtable namespace, since
      spreading them over net.inet isn't correct.
  o Properly separate at compile time INET and INET6 parts.
  o General cleanup.
    - Remove chain of multiple flowtables. We simply have one for
      IPv4 and one for IPv6.
    - Flowtables are allocated in flowtable.c, symbols are static.
    - With proper argument to SYSINIT() we no longer need flowtable_ready.
    - Hash salt doesn't need to be per-VNET.
    - Removed rudimentary debugging, which use quite useless in dtrace era.
  
  The runtime behavior of flowtable shouldn't be changed by this commit.
  
  Sponsored by:	Netflix
  Sponsored by:	Nginx, Inc.

Added:
  head/usr.bin/netstat/flowtable.c   (contents, props changed)
Modified:
  head/sys/net/flowtable.c
  head/sys/net/flowtable.h
  head/sys/net/route.c
  head/sys/netinet/ip_input.c
  head/sys/netinet/ip_output.c
  head/sys/netinet6/in6_proto.c
  head/sys/netinet6/ip6_input.c
  head/sys/netinet6/ip6_output.c
  head/usr.bin/netstat/Makefile
  head/usr.bin/netstat/main.c
  head/usr.bin/netstat/netstat.h

Modified: head/sys/net/flowtable.c
==============================================================================
--- head/sys/net/flowtable.c	Fri Feb  7 15:10:24 2014	(r261600)
+++ head/sys/net/flowtable.c	Fri Feb  7 15:18:23 2014	(r261601)
@@ -146,23 +146,13 @@ union flentryp {
 	struct flentry		**pcpu[MAXCPU];
 };
 
-struct flowtable_stats {
-	uint64_t	ft_collisions;
-	uint64_t	ft_allocated;
-	uint64_t	ft_misses;
-	uint64_t	ft_max_depth;
-	uint64_t	ft_free_checks;
-	uint64_t	ft_frees;
-	uint64_t	ft_hits;
-	uint64_t	ft_lookups;
-} __aligned(CACHE_LINE_SIZE);
-
 struct flowtable {
-	struct	flowtable_stats ft_stats[MAXCPU];
+	counter_u64_t	*ft_stat;
+	uma_zone_t	ft_zone;
 	int 		ft_size;
 	int 		ft_lock_count;
 	uint32_t	ft_flags;
-	char		*ft_name;
+	uint32_t	ft_max_depth;
 	fl_lock_t	*ft_lock;
 	fl_lock_t 	*ft_unlock;
 	fl_rtalloc_t	*ft_rtalloc;
@@ -173,9 +163,7 @@ struct flowtable {
 	union flentryp	ft_table;
 	bitstr_t 	*ft_masks[MAXCPU];
 	bitstr_t	*ft_tmpmask;
-	struct flowtable *ft_next;
 
-	uint32_t	ft_count __aligned(CACHE_LINE_SIZE);
 	uint32_t	ft_udp_idle __aligned(CACHE_LINE_SIZE);
 	uint32_t	ft_fin_wait_idle;
 	uint32_t	ft_syn_idle;
@@ -183,17 +171,12 @@ struct flowtable {
 	boolean_t	ft_full;
 } __aligned(CACHE_LINE_SIZE);
 
-static struct proc *flowcleanerproc;
-static VNET_DEFINE(struct flowtable *, flow_list_head);
-static VNET_DEFINE(uint32_t, flow_hashjitter);
-static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
-static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
-
-#define	V_flow_list_head	VNET(flow_list_head)
-#define	V_flow_hashjitter	VNET(flow_hashjitter)
-#define	V_flow_ipv4_zone	VNET(flow_ipv4_zone)
-#define	V_flow_ipv6_zone	VNET(flow_ipv6_zone)
+#define	FLOWSTAT_ADD(ft, name, v)	\
+	counter_u64_add((ft)->ft_stat[offsetof(struct flowtable_stat, name) / sizeof(uint64_t)], (v))
+#define	FLOWSTAT_INC(ft, name)	FLOWSTAT_ADD(ft, name, 1)
 
+static struct proc *flowcleanerproc;
+static uint32_t flow_hashjitter;
 
 static struct cv 	flowclean_f_cv;
 static struct cv 	flowclean_c_cv;
@@ -201,24 +184,8 @@ static struct mtx	flowclean_lock;
 static uint32_t		flowclean_cycles;
 static uint32_t		flowclean_freq;
 
-#ifdef FLOWTABLE_DEBUG
-#define FLDPRINTF(ft, flags, fmt, ...) 		\
-do {		  				\
-	if ((ft)->ft_flags & (flags))		\
-		printf((fmt), __VA_ARGS__);	\
-} while (0);					\
-
-#else
-#define FLDPRINTF(ft, flags, fmt, ...)
-
-#endif
-
-
 /*
  * TODO:
- * - Make flowtable stats per-cpu, aggregated at sysctl call time,
- *   to avoid extra cache evictions caused by incrementing a shared
- *   counter
  * - add sysctls to resize && flush flow tables
  * - Add per flowtable sysctls for statistics and configuring timeouts
  * - add saturation counter to rtentry to support per-packet load-balancing
@@ -230,148 +197,51 @@ do {		  				\
  * - support explicit connection state (currently only ad-hoc for DSR)
  * - idetach() cleanup for options VIMAGE builds.
  */
-VNET_DEFINE(int, flowtable_enable) = 1;
-static VNET_DEFINE(int, flowtable_debug);
+#ifdef INET
+static VNET_DEFINE(struct flowtable, ip4_ft);
+#define V_ip4_ft	VNET(ip4_ft)
+static uma_zone_t	flow_ipv4_zone;
+#endif
+#ifdef INET6
+static VNET_DEFINE(struct flowtable, ip6_ft);
+#define	V_ip6_ft	VNET(ip6_ft)
+static uma_zone_t	flow_ipv6_zone;
+#endif
+
+static VNET_DEFINE(int, flowtable_enable) = 1;
 static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
 static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
 static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
 static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
-static VNET_DEFINE(int, flowtable_nmbflows);
-static VNET_DEFINE(int, flowtable_ready) = 0;
 
 #define	V_flowtable_enable		VNET(flowtable_enable)
-#define	V_flowtable_debug		VNET(flowtable_debug)
 #define	V_flowtable_syn_expire		VNET(flowtable_syn_expire)
 #define	V_flowtable_udp_expire		VNET(flowtable_udp_expire)
 #define	V_flowtable_fin_wait_expire	VNET(flowtable_fin_wait_expire)
 #define	V_flowtable_tcp_expire		VNET(flowtable_tcp_expire)
-#define	V_flowtable_nmbflows		VNET(flowtable_nmbflows)
-#define	V_flowtable_ready		VNET(flowtable_ready)
 
-static SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
+static SYSCTL_NODE(_net, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
     "flowtable");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
-    &VNET_NAME(flowtable_debug), 0, "print debug info.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, enable, CTLFLAG_RW,
     &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
 
 /*
  * XXX This does not end up updating timeouts at runtime
  * and only reflects the value for the last table added :-/
  */
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
     &VNET_NAME(flowtable_syn_expire), 0,
     "seconds after which to remove syn allocated flow.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
     &VNET_NAME(flowtable_udp_expire), 0,
     "seconds after which to remove flow allocated to UDP.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
     &VNET_NAME(flowtable_fin_wait_expire), 0,
     "seconds after which to remove a flow in FIN_WAIT.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
     &VNET_NAME(flowtable_tcp_expire), 0,
     "seconds after which to remove flow allocated to a TCP connection.");
 
-
-/*
- * Maximum number of flows that can be allocated of a given type.
- *
- * The table is allocated at boot time (for the pure caching case
- * there is no reason why this could not be changed at runtime)
- * and thus (currently) needs to be set with a tunable.
- */
-static int
-sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
-{
-	int error, newnmbflows;
-
-	newnmbflows = V_flowtable_nmbflows;
-	error = sysctl_handle_int(oidp, &newnmbflows, 0, req);
-	if (error == 0 && req->newptr) {
-		if (newnmbflows > V_flowtable_nmbflows) {
-			V_flowtable_nmbflows = newnmbflows;
-			uma_zone_set_max(V_flow_ipv4_zone,
-			    V_flowtable_nmbflows);
-			uma_zone_set_max(V_flow_ipv6_zone,
-			    V_flowtable_nmbflows);
-		} else
-			error = EINVAL;
-	}
-	return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
-    CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
-    "Maximum number of flows allowed");
-
-
-
-#define FS_PRINT(sb, field)	sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
-
-static void
-fs_print(struct sbuf *sb, struct flowtable_stats *fs)
-{
-
-	FS_PRINT(sb, collisions);
-	FS_PRINT(sb, allocated);
-	FS_PRINT(sb, misses);
-	FS_PRINT(sb, max_depth);
-	FS_PRINT(sb, free_checks);
-	FS_PRINT(sb, frees);
-	FS_PRINT(sb, hits);
-	FS_PRINT(sb, lookups);
-}
-
-static void
-flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
-{
-	int i;
-	struct flowtable_stats fs, *pfs;
-
-	if (ft->ft_flags & FL_PCPU) {
-		bzero(&fs, sizeof(fs));
-		pfs = &fs;
-		CPU_FOREACH(i) {
-			pfs->ft_collisions  += ft->ft_stats[i].ft_collisions;
-			pfs->ft_allocated   += ft->ft_stats[i].ft_allocated;
-			pfs->ft_misses      += ft->ft_stats[i].ft_misses;
-			pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
-			pfs->ft_frees       += ft->ft_stats[i].ft_frees;
-			pfs->ft_hits        += ft->ft_stats[i].ft_hits;
-			pfs->ft_lookups     += ft->ft_stats[i].ft_lookups;
-			if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
-				pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
-		}
-	} else {
-		pfs = &ft->ft_stats[0];
-	}
-	fs_print(sb, pfs);
-}
-
-static int
-sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
-{
-	struct flowtable *ft;
-	struct sbuf *sb;
-	int error;
-
-	sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
-
-	ft = V_flow_list_head;
-	while (ft != NULL) {
-		sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
-		flowtable_show_stats(sb, ft);
-		ft = ft->ft_next;
-	}
-	sbuf_finish(sb);
-	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
-	sbuf_delete(sb);
-
-	return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
-    NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
-
-
 #ifndef RADIX_MPATH
 static void
 rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
@@ -419,12 +289,8 @@ flowtable_pcpu_unlock(struct flowtable *
 #define FL_STALE 	(1<<8)
 #define FL_OVERWRITE	(1<<10)
 
-void
-flow_invalidate(struct flentry *fle)
-{
-
-	fle->f_flags |= FL_STALE;
-}
+static struct flentry *flowtable_lookup_common(struct flowtable *,
+    struct sockaddr_storage *, struct sockaddr_storage *, struct mbuf *, int);
 
 static __inline int
 proto_to_flags(uint8_t proto)
@@ -495,8 +361,8 @@ ipv4_flow_print_tuple(int flags, int pro
 #endif
 
 static int
-ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
-    struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
+ipv4_mbuf_demarshal(struct mbuf *m, struct sockaddr_in *ssin,
+    struct sockaddr_in *dsin, uint16_t *flags)
 {
 	struct ip *ip;
 	uint8_t proto;
@@ -516,11 +382,8 @@ ipv4_mbuf_demarshal(struct flowtable *ft
 	ssin->sin_addr = ip->ip_src;	
 
 	proto = ip->ip_p;
-	if ((*flags & FL_HASH_ALL) == 0) {
-		FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
-		    *flags);
+	if ((*flags & FL_HASH_ALL) == 0)
 		goto skipports;
-	}
 
 	iphlen = ip->ip_hl << 2; /* XXX options? */
 
@@ -544,7 +407,6 @@ ipv4_mbuf_demarshal(struct flowtable *ft
 		dport = sh->dest_port;
 		break;
 	default:
-		FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
 		return (ENOTSUP);
 		/* no port - hence not a protocol we care about */
 		break;
@@ -559,7 +421,7 @@ skipports:
 }
 
 static uint32_t
-ipv4_flow_lookup_hash_internal(
+ipv4_flow_lookup_hash(
 	struct sockaddr_in *ssin, struct sockaddr_in *dsin,
 	    uint32_t *key, uint16_t flags)
 {
@@ -567,8 +429,6 @@ ipv4_flow_lookup_hash_internal(
 	uint8_t proto;
 	int offset = 0;
 
-	if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
-		return (0);
 	proto = flags_to_proto(flags);
 	sport = dport = key[2] = key[1] = key[0] = 0;
 	if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
@@ -583,13 +443,13 @@ ipv4_flow_lookup_hash_internal(
 		((uint16_t *)key)[0] = sport;
 		((uint16_t *)key)[1] = dport;
 	} else
-		offset = V_flow_hashjitter + proto;
+		offset = flow_hashjitter + proto;
 
 	return (jenkins_hash32(key, 3, offset));
 }
 
 static struct flentry *
-flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
+flowtable_lookup_ipv4(struct mbuf *m)
 {
 	struct sockaddr_storage ssa, dsa;
 	uint16_t flags;
@@ -599,11 +459,11 @@ flowtable_lookup_mbuf4(struct flowtable 
 	ssin = (struct sockaddr_in *)&ssa;
 	bzero(dsin, sizeof(*dsin));
 	bzero(ssin, sizeof(*ssin));
-	flags = ft->ft_flags;
-	if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
+	flags = V_ip4_ft.ft_flags;
+	if (ipv4_mbuf_demarshal(m, ssin, dsin, &flags) != 0)
 		return (NULL);
 
-	return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
+	return (flowtable_lookup_common(&V_ip4_ft, &ssa, &dsa, m, flags));
 }
 
 void
@@ -644,8 +504,8 @@ do {									\
 #define	UDP(p)		((struct udphdr *)(p))
 
 static int
-ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
-    struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
+ipv6_mbuf_demarshal(struct mbuf *m, struct sockaddr_in6 *ssin6,
+    struct sockaddr_in6 *dsin6, uint16_t *flags)
 {
 	struct ip6_hdr *ip6;
 	uint8_t proto;
@@ -763,7 +623,7 @@ do {				\
 } while (0)
 	
 static uint32_t
-ipv6_flow_lookup_hash_internal(
+ipv6_flow_lookup_hash(
 	struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6,
 	    uint32_t *key, uint16_t flags)
 {
@@ -771,9 +631,6 @@ ipv6_flow_lookup_hash_internal(
 	uint8_t proto;
 	int offset = 0;
 
-	if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
-		return (0);
-
 	proto = flags_to_proto(flags);
 	zero_key(key);
 	sport = dport = 0;
@@ -789,13 +646,13 @@ ipv6_flow_lookup_hash_internal(
 		((uint16_t *)key)[0] = sport;
 		((uint16_t *)key)[1] = dport;
 	} else
-		offset = V_flow_hashjitter + proto;
+		offset = flow_hashjitter + proto;
 
 	return (jenkins_hash32(key, 9, offset));
 }
 
 static struct flentry *
-flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
+flowtable_lookup_ipv6(struct mbuf *m)
 {
 	struct sockaddr_storage ssa, dsa;
 	struct sockaddr_in6 *dsin6, *ssin6;	
@@ -805,12 +662,12 @@ flowtable_lookup_mbuf6(struct flowtable 
 	ssin6 = (struct sockaddr_in6 *)&ssa;
 	bzero(dsin6, sizeof(*dsin6));
 	bzero(ssin6, sizeof(*ssin6));
-	flags = ft->ft_flags;
+	flags = V_ip6_ft.ft_flags;
 	
-	if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
+	if (ipv6_mbuf_demarshal(m, ssin6, dsin6, &flags) != 0)
 		return (NULL);
 
-	return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
+	return (flowtable_lookup_common(&V_ip6_ft, &ssa, &dsa, m, flags));
 }
 
 void
@@ -910,43 +767,19 @@ flowtable_set_hashkey(struct flentry *fl
 		hashkey[i] = key[i];
 }
 
-static struct flentry *
-flow_alloc(struct flowtable *ft)
-{
-	struct flentry *newfle;
-	uma_zone_t zone;
-
-	newfle = NULL;
-	zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
-
-	newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
-	if (newfle != NULL)
-		atomic_add_int(&ft->ft_count, 1);
-	return (newfle);
-}
-
-static void
-flow_free(struct flentry *fle, struct flowtable *ft)
-{
-	uma_zone_t zone;
-
-	zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
-	atomic_add_int(&ft->ft_count, -1);
-	uma_zfree(zone, fle);
-}
-
 static int
 flow_full(struct flowtable *ft)
 {
 	boolean_t full;
-	uint32_t count;
+	int count, max;
 	
 	full = ft->ft_full;
-	count = ft->ft_count;
+	count = uma_zone_get_cur(ft->ft_zone);
+	max = uma_zone_get_max(ft->ft_zone);
 
-	if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
+	if (full && (count < (max - (max >> 3))))
 		ft->ft_full = FALSE;
-	else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
+	else if (!full && (count > (max - (max >> 5))))
 		ft->ft_full = TRUE;
 	
 	if (full && !ft->ft_full) {
@@ -970,12 +803,11 @@ flowtable_insert(struct flowtable *ft, u
     uint32_t fibnum, struct route *ro, uint16_t flags)
 {
 	struct flentry *fle, *fletail, *newfle, **flep;
-	struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 	int depth;
 	bitstr_t *mask;
 	uint8_t proto;
 
-	newfle = flow_alloc(ft);
+	newfle = uma_zalloc(ft->ft_zone, M_NOWAIT | M_ZERO);
 	if (newfle == NULL)
 		return (ENOMEM);
 
@@ -994,7 +826,7 @@ flowtable_insert(struct flowtable *ft, u
 	}
 	
 	depth = 0;
-	fs->ft_collisions++;
+	FLOWSTAT_INC(ft, ft_collisions);
 	/*
 	 * find end of list and make sure that we were not
 	 * preempted by another thread handling this flow
@@ -1006,7 +838,7 @@ flowtable_insert(struct flowtable *ft, u
 			 * or we lost a race to insert
 			 */
 			FL_ENTRY_UNLOCK(ft, hash);
-			flow_free(newfle, ft);
+			uma_zfree(ft->ft_zone, newfle);
 			
 			if (flags & FL_OVERWRITE)
 				goto skip;
@@ -1022,8 +854,8 @@ flowtable_insert(struct flowtable *ft, u
 		fle = fle->f_next;
 	}
 
-	if (depth > fs->ft_max_depth)
-		fs->ft_max_depth = depth;
+	if (depth > ft->ft_max_depth)
+		ft->ft_max_depth = depth;
 	fletail->f_next = newfle;
 	fle = newfle;
 skip:
@@ -1039,35 +871,6 @@ skip:
 	return (0);
 }
 
-int
-kern_flowtable_insert(struct flowtable *ft,
-    struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
-    struct route *ro, uint32_t fibnum, int flags)
-{
-	uint32_t key[9], hash;
-
-	flags = (ft->ft_flags | flags | FL_OVERWRITE);
-	hash = 0;
-
-#ifdef INET
-	if (ssa->ss_family == AF_INET)
-		hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
-		    (struct sockaddr_in *)dsa, key, flags);
-#endif
-#ifdef INET6
-	if (ssa->ss_family == AF_INET6)
-		hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
-		    (struct sockaddr_in6 *)dsa, key, flags);
-#endif	
-	if (ro->ro_rt == NULL || ro->ro_lle == NULL)
-		return (EINVAL);
-
-	FLDPRINTF(ft, FL_DEBUG,
-	    "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
-	    key[0], key[1], key[2], hash, fibnum, flags);
-	return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
-}
-
 static int
 flowtable_key_equal(struct flentry *fle, uint32_t *key)
 {
@@ -1090,49 +893,54 @@ flowtable_key_equal(struct flentry *fle,
 }
 
 struct flentry *
-flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
+flowtable_lookup(sa_family_t sa, struct mbuf *m)
 {
-	struct flentry *fle = NULL;
 
+	switch (sa) {
 #ifdef INET
-	if (af == AF_INET)
-		fle = flowtable_lookup_mbuf4(ft, m);
+	case AF_INET:
+		return (flowtable_lookup_ipv4(m));
 #endif
 #ifdef INET6
-	if (af == AF_INET6)
-		fle = flowtable_lookup_mbuf6(ft, m);
-#endif	
-	if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
-		m->m_flags |= M_FLOWID;
-		m->m_pkthdr.flowid = fle->f_fhash;
+	case AF_INET6:
+		return (flowtable_lookup_ipv6(m));
+#endif
+	default:
+		panic("%s: sa %d", __func__, sa);
 	}
-	return (fle);
 }
-	
-struct flentry *
-flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
-    struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
+
+static struct flentry *
+flowtable_lookup_common(struct flowtable *ft, struct sockaddr_storage *ssa,
+    struct sockaddr_storage *dsa, struct mbuf *m, int flags)
 {
-	uint32_t key[9], hash;
+	struct route_in6 sro6;
+	struct route sro, *ro;
 	struct flentry *fle;
-	struct flowtable_stats *fs = &ft->ft_stats[curcpu];
-	uint8_t proto = 0;
-	int error = 0;
 	struct rtentry *rt;
 	struct llentry *lle;
-	struct route sro, *ro;
-	struct route_in6 sro6;
+	struct sockaddr_storage *l3addr;
+	struct ifnet *ifp;
+	uint32_t key[9], hash, fibnum;
+	uint8_t proto;
+
+	if (V_flowtable_enable == 0)
+		return (NULL);
 
 	sro.ro_rt = sro6.ro_rt = NULL;
 	sro.ro_lle = sro6.ro_lle = NULL;
-	ro = NULL;
-	hash = 0;
 	flags |= ft->ft_flags;
 	proto = flags_to_proto(flags);
+	fibnum = M_GETFIB(m);
+
+	switch (ssa->ss_family) {
 #ifdef INET
-	if (ssa->ss_family == AF_INET) {
+	case AF_INET: {
 		struct sockaddr_in *ssin, *dsin;
 
+		KASSERT(dsa->ss_family == AF_INET,
+		    ("%s: dsa family %d\n", __func__, dsa->ss_family));
+
 		ro = &sro;
 		memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
 		/*
@@ -1150,13 +958,17 @@ flowtable_lookup(struct flowtable *ft, s
 		    (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 			return (NULL);
 
-		hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
+		hash = ipv4_flow_lookup_hash(ssin, dsin, key, flags);
+		break;
 	}
 #endif
 #ifdef INET6
-	if (ssa->ss_family == AF_INET6) {
+	case AF_INET6: {
 		struct sockaddr_in6 *ssin6, *dsin6;
 
+		KASSERT(dsa->ss_family == AF_INET6,
+		    ("%s: dsa family %d\n", __func__, dsa->ss_family));
+
 		ro = (struct route *)&sro6;
 		memcpy(&sro6.ro_dst, dsa,
 		    sizeof(struct sockaddr_in6));
@@ -1165,19 +977,24 @@ flowtable_lookup(struct flowtable *ft, s
 		ssin6 = (struct sockaddr_in6 *)ssa;
 
 		flags |= FL_IPV6;
-		hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
+		hash = ipv6_flow_lookup_hash(ssin6, dsin6, key, flags);
+		break;
 	}
 #endif
+	default:
+		panic("%s: ssa family %d", __func__, ssa->ss_family);
+	}
+
 	/*
 	 * Ports are zero and this isn't a transmit cache
 	 * - thus not a protocol for which we need to keep
 	 * state
 	 * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
 	 */
-	if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
+	if (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL))
 		return (NULL);
 
-	fs->ft_lookups++;
+	FLOWSTAT_INC(ft, ft_lookups);
 	FL_ENTRY_LOCK(ft, hash);
 	if ((fle = FL_ENTRY(ft, hash)) == NULL) {
 		FL_ENTRY_UNLOCK(ft, hash);
@@ -1195,11 +1012,11 @@ keycheck:	
 	    && (rt->rt_flags & RTF_UP)
 	    && (rt->rt_ifp != NULL)
 	    && (lle->la_flags & LLE_VALID)) {
-		fs->ft_hits++;
+		FLOWSTAT_INC(ft, ft_hits);
 		fle->f_uptime = time_uptime;
 		fle->f_flags |= flags;
 		FL_ENTRY_UNLOCK(ft, hash);
-		return (fle);
+		goto success;
 	} else if (fle->f_next != NULL) {
 		fle = fle->f_next;
 		goto keycheck;
@@ -1209,7 +1026,7 @@ uncached:
 	if (flags & FL_NOAUTO || flow_full(ft))
 		return (NULL);
 
-	fs->ft_misses++;
+	FLOWSTAT_INC(ft, ft_misses);
 	/*
 	 * This bit of code ends up locking the
 	 * same route 3 times (just like ip_output + ether_output)
@@ -1222,73 +1039,66 @@ uncached:
 	 * receive the route locked
 	 */
 
-#ifdef INVARIANTS
-	if ((ro->ro_dst.sa_family != AF_INET) &&
-	    (ro->ro_dst.sa_family != AF_INET6))
-		panic("sa_family == %d\n", ro->ro_dst.sa_family);
-#endif
-
 	ft->ft_rtalloc(ro, hash, fibnum);
 	if (ro->ro_rt == NULL)
-		error = ENETUNREACH;
-	else {
-		struct llentry *lle = NULL;
-		struct sockaddr_storage *l3addr;
-		struct rtentry *rt = ro->ro_rt;
-		struct ifnet *ifp = rt->rt_ifp;
+		return (NULL);
 
-		if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
-			RTFREE(rt);
-			ro->ro_rt = NULL;
-			return (NULL);
-		}
-#ifdef INET6
-		if (ssa->ss_family == AF_INET6) {
-			struct sockaddr_in6 *dsin6;
+	rt = ro->ro_rt;
+	ifp = rt->rt_ifp;
 
-			dsin6 = (struct sockaddr_in6 *)dsa;			
-			if (in6_localaddr(&dsin6->sin6_addr)) {
-				RTFREE(rt);
-				ro->ro_rt = NULL;
-				return (NULL);				
-			}
+	if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
+		RTFREE(rt);
+		return (NULL);
+	}
 
-			if (rt->rt_flags & RTF_GATEWAY)
-				l3addr = (struct sockaddr_storage *)rt->rt_gateway;
-			
-			else
-				l3addr = (struct sockaddr_storage *)&ro->ro_dst;
-			lle = llentry_alloc(ifp, LLTABLE6(ifp), l3addr);
-		}
-#endif	
+	switch (ssa->ss_family) {
 #ifdef INET
-		if (ssa->ss_family == AF_INET) {
-			if (rt->rt_flags & RTF_GATEWAY)
-				l3addr = (struct sockaddr_storage *)rt->rt_gateway;
-			else
-				l3addr = (struct sockaddr_storage *)&ro->ro_dst;
-			lle = llentry_alloc(ifp, LLTABLE(ifp), l3addr);	
-		}
-			
+	case AF_INET:
+		if (rt->rt_flags & RTF_GATEWAY)
+			l3addr = (struct sockaddr_storage *)rt->rt_gateway;
+		else
+			l3addr = (struct sockaddr_storage *)&ro->ro_dst;
+		lle = llentry_alloc(ifp, LLTABLE(ifp), l3addr);	
+		break;
 #endif
-		ro->ro_lle = lle;
+#ifdef INET6
+	case AF_INET6: {
+		struct sockaddr_in6 *dsin6;
 
-		if (lle == NULL) {
+		dsin6 = (struct sockaddr_in6 *)dsa;			
+		if (in6_localaddr(&dsin6->sin6_addr)) {
 			RTFREE(rt);
-			ro->ro_rt = NULL;
-			return (NULL);
+			return (NULL);				
 		}
-		error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
 
-		if (error) {
-			RTFREE(rt);
-			LLE_FREE(lle);
-			ro->ro_rt = NULL;
-			ro->ro_lle = NULL;
-		}
+		if (rt->rt_flags & RTF_GATEWAY)
+			l3addr = (struct sockaddr_storage *)rt->rt_gateway;
+		else
+			l3addr = (struct sockaddr_storage *)&ro->ro_dst;
+		lle = llentry_alloc(ifp, LLTABLE6(ifp), l3addr);
+		break;
+	}
+#endif	
+	}
+
+	if (lle == NULL) {
+		RTFREE(rt);
+		return (NULL);
+	}
+	ro->ro_lle = lle;
+
+	if (flowtable_insert(ft, hash, key, fibnum, ro, flags) != 0) {
+		RTFREE(rt);
+		LLE_FREE(lle);
+		return (NULL);
 	}
 
-	return ((error) ? NULL : fle);
+success:
+	if (fle != NULL && (m->m_flags & M_FLOWID) == 0) {
+		m->m_flags |= M_FLOWID;
+		m->m_pkthdr.flowid = fle->f_fhash;
+	}
+	return (fle);
 }
 
 /*
@@ -1296,37 +1106,24 @@ uncached:
  */
 #define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
 	
-struct flowtable *
-flowtable_alloc(char *name, int nentry, int flags)
+static void
+flowtable_alloc(struct flowtable *ft)
 {
-	struct flowtable *ft, *fttail;
-	int i;
-
-	if (V_flow_hashjitter == 0)
-		V_flow_hashjitter = arc4random();
 
-	KASSERT(nentry > 0, ("nentry must be > 0, is %d\n", nentry));
-
-	ft = malloc(sizeof(struct flowtable),
-	    M_RTABLE, M_WAITOK | M_ZERO);
-
-	ft->ft_name = name;
-	ft->ft_flags = flags;
-	ft->ft_size = nentry;
 #ifdef RADIX_MPATH
 	ft->ft_rtalloc = rtalloc_mpath_fib;
 #else
 	ft->ft_rtalloc = rtalloc_ign_wrapper;
 #endif
-	if (flags & FL_PCPU) {
+	if (ft->ft_flags & FL_PCPU) {
 		ft->ft_lock = flowtable_pcpu_lock;
 		ft->ft_unlock = flowtable_pcpu_unlock;
 
-		for (i = 0; i <= mp_maxid; i++) {
+		for (int i = 0; i <= mp_maxid; i++) {
 			ft->ft_table.pcpu[i] =
-			    malloc(nentry*sizeof(struct flentry *),
+			    malloc(ft->ft_size * sizeof(struct flentry *),
 				M_RTABLE, M_WAITOK | M_ZERO);
-			ft->ft_masks[i] = bit_alloc(nentry);
+			ft->ft_masks[i] = bit_alloc(ft->ft_size);
 		}
 	} else {
 		ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
@@ -1335,23 +1132,24 @@ flowtable_alloc(char *name, int nentry, 
 		ft->ft_lock = flowtable_global_lock;
 		ft->ft_unlock = flowtable_global_unlock;
 		ft->ft_table.global =
-			    malloc(nentry*sizeof(struct flentry *),
+			    malloc(ft->ft_size * sizeof(struct flentry *),
 				M_RTABLE, M_WAITOK | M_ZERO);
 		ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
 				M_RTABLE, M_WAITOK | M_ZERO);
-		for (i = 0; i < ft->ft_lock_count; i++)
-			mtx_init(&ft->ft_locks[i], "flow", NULL, MTX_DEF|MTX_DUPOK);
+		for (int i = 0; i < ft->ft_lock_count; i++)
+			mtx_init(&ft->ft_locks[i], "flow", NULL,
+			    MTX_DEF | MTX_DUPOK);
 
-		ft->ft_masks[0] = bit_alloc(nentry);
+		ft->ft_masks[0] = bit_alloc(ft->ft_size);
 	}
-	ft->ft_tmpmask = bit_alloc(nentry);
+	ft->ft_tmpmask = bit_alloc(ft->ft_size);
 
 	/*
 	 * In the local transmit case the table truly is
 	 * just a cache - so everything is eligible for
 	 * replacement after 5s of non-use
 	 */
-	if (flags & FL_HASH_ALL) {
+	if (ft->ft_flags & FL_HASH_ALL) {
 		ft->ft_udp_idle = V_flowtable_udp_expire;
 		ft->ft_syn_idle = V_flowtable_syn_expire;
 		ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
@@ -1361,20 +1159,6 @@ flowtable_alloc(char *name, int nentry, 
 		    ft->ft_syn_idle = ft->ft_tcp_idle = 30;
 		
 	}
-
-	/*
-	 * hook in to the cleaner list
-	 */
-	if (V_flow_list_head == NULL)
-		V_flow_list_head = ft;
-	else {
-		fttail = V_flow_list_head;
-		while (fttail->ft_next != NULL)
-			fttail = fttail->ft_next;
-		fttail->ft_next = ft;
-	}
-
-	return (ft);
 }
 
 /*
@@ -1395,17 +1179,16 @@ fle_free(struct flentry *fle, struct flo
 		RTFREE(rt);
 	if (lle != NULL)
 		LLE_FREE(lle);
-	flow_free(fle, ft);
+	uma_zfree(ft->ft_zone, fle);
 }
 
 static void
 flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
 {
-	int curbit = 0, count, tmpsize;
+	int curbit = 0, tmpsize;
 	struct flentry *fle,  **flehead, *fleprev;
 	struct flentry *flefreehead, *flefreetail, *fletmp;
 	bitstr_t *mask, *tmpmask;
-	struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 
 	flefreehead = flefreetail = NULL;
 	mask = flowtable_mask(ft);
@@ -1429,7 +1212,7 @@ flowtable_free_stale(struct flowtable *f
 		flehead = flowtable_entry(ft, curbit);
 		fle = fleprev = *flehead;
 
-		fs->ft_free_checks++;
+		FLOWSTAT_INC(ft, ft_free_checks);
 #ifdef DIAGNOSTIC
 		if (fle == NULL && curbit > 0) {
 			log(LOG_ALERT,
@@ -1484,22 +1267,34 @@ flowtable_free_stale(struct flowtable *f
 		tmpsize -= (curbit / 8) * 8;
 		bit_ffs(tmpmask, tmpsize, &curbit);
 	}
-	count = 0;
 	while ((fle = flefreehead) != NULL) {
 		flefreehead = fle->f_next;
-		count++;
-		fs->ft_frees++;
+		FLOWSTAT_INC(ft, ft_frees);
 		fle_free(fle, ft);
 	}
-	if (V_flowtable_debug && count)
-		log(LOG_DEBUG, "freed %d flow entries\n", count);
 }
 
 void
-flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
+flowtable_route_flush(sa_family_t sa, struct rtentry *rt)
 {
+	struct flowtable *ft;
 	int i;
 
+	switch (sa) {
+#ifdef INET
+	case AF_INET:
+		ft = &V_ip4_ft;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		ft = &V_ip6_ft;
+		break;
+#endif
+	default:
+		panic("%s: sa %d", __func__, sa);
+	}
+
 	if (ft->ft_flags & FL_PCPU) {
 		CPU_FOREACH(i) {
 			if (smp_started == 1) {
@@ -1522,34 +1317,29 @@ flowtable_route_flush(struct flowtable *
 }
 
 static void
-flowtable_clean_vnet(void)
+flowtable_clean_vnet(struct flowtable *ft)
 {
-	struct flowtable *ft;
-	int i;
-
-	ft = V_flow_list_head;
-	while (ft != NULL) {
-		if (ft->ft_flags & FL_PCPU) {
-			CPU_FOREACH(i) {
-				if (smp_started == 1) {
-					thread_lock(curthread);
-					sched_bind(curthread, i);
-					thread_unlock(curthread);
-				}
 
-				flowtable_free_stale(ft, NULL);
+	if (ft->ft_flags & FL_PCPU) {
+		int i;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list