svn commit: r205488 - in head/sys: net netinet

Kip Macy kmacy at FreeBSD.org
Mon Mar 22 23:04:13 UTC 2010


Author: kmacy
Date: Mon Mar 22 23:04:12 2010
New Revision: 205488
URL: http://svn.freebsd.org/changeset/base/205488

Log:
  - boot-time size the ipv4 flowtable and the maximum number of flows
  - increase flow cleaning frequency and decrease flow caching time
    when near the flow limit
  - stop allocating new flows when within 3% of maxflows don't start
    allocating again until below 12.5%
  
  MFC after:	7 days

Modified:
  head/sys/net/flowtable.c
  head/sys/netinet/ip_input.c

Modified: head/sys/net/flowtable.c
==============================================================================
--- head/sys/net/flowtable.c	Mon Mar 22 22:39:32 2010	(r205487)
+++ head/sys/net/flowtable.c	Mon Mar 22 23:04:12 2010	(r205488)
@@ -155,30 +155,33 @@ struct flowtable_stats {
 	uint64_t	ft_frees;
 	uint64_t	ft_hits;
 	uint64_t	ft_lookups;
-} __aligned(128);
+} __aligned(CACHE_LINE_SIZE);
 
 struct flowtable {
 	struct	flowtable_stats ft_stats[MAXCPU];
 	int 		ft_size;
 	int 		ft_lock_count;
 	uint32_t	ft_flags;
-
-	uint32_t	ft_udp_idle;
-	uint32_t	ft_fin_wait_idle;
-	uint32_t	ft_syn_idle;
-	uint32_t	ft_tcp_idle;
-
 	char		*ft_name;
 	fl_lock_t	*ft_lock;
 	fl_lock_t 	*ft_unlock;
 	fl_rtalloc_t	*ft_rtalloc;
+	/*
+	 * XXX need to pad out 
+	 */ 
 	struct mtx	*ft_locks;
-
 	union flentryp	ft_table;
 	bitstr_t 	*ft_masks[MAXCPU];
 	bitstr_t	*ft_tmpmask;
 	struct flowtable *ft_next;
-} __aligned(128);
+
+	uint32_t	ft_count __aligned(CACHE_LINE_SIZE);
+	uint32_t	ft_udp_idle __aligned(CACHE_LINE_SIZE);
+	uint32_t	ft_fin_wait_idle;
+	uint32_t	ft_syn_idle;
+	uint32_t	ft_tcp_idle;
+	boolean_t	ft_full;
+} __aligned(CACHE_LINE_SIZE);
 
 static struct proc *flowcleanerproc;
 static VNET_DEFINE(struct flowtable *, flow_list_head);
@@ -191,9 +194,11 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6
 #define	V_flow_ipv4_zone	VNET(flow_ipv4_zone)
 #define	V_flow_ipv6_zone	VNET(flow_ipv6_zone)
 
+
 static struct cv 	flowclean_cv;
 static struct mtx	flowclean_lock;
 static uint32_t		flowclean_cycles;
+static uint32_t		flowclean_freq;
 
 #ifdef FLOWTABLE_DEBUG
 #define FLDPRINTF(ft, flags, fmt, ...) 		\
@@ -230,7 +235,7 @@ static VNET_DEFINE(int, flowtable_syn_ex
 static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
 static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
 static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
-static VNET_DEFINE(int, flowtable_nmbflows) = 4096;
+static VNET_DEFINE(int, flowtable_nmbflows);
 static VNET_DEFINE(int, flowtable_ready) = 0;
 
 #define	V_flowtable_enable		VNET(flowtable_enable)
@@ -905,6 +910,61 @@ flowtable_set_hashkey(struct flentry *fl
 		hashkey[i] = key[i];
 }
 
+static struct flentry *
+flow_alloc(struct flowtable *ft)
+{
+	struct flentry *newfle;
+	uma_zone_t zone;
+
+	newfle = NULL;
+	zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
+
+	newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
+	if (newfle != NULL)
+		atomic_add_int(&ft->ft_count, 1);
+	return (newfle);
+}
+
+static void
+flow_free(struct flentry *fle, struct flowtable *ft)
+{
+	uma_zone_t zone;
+
+	zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
+	atomic_add_int(&ft->ft_count, -1);
+	uma_zfree(zone, fle);
+}
+
+static int
+flow_full(struct flowtable *ft)
+{
+	boolean_t full;
+	uint32_t count;
+	
+	full = ft->ft_full;
+	count = ft->ft_count;
+
+	if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
+		ft->ft_full = FALSE;
+	else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
+		ft->ft_full = TRUE;
+	
+	if (full && !ft->ft_full) {
+		flowclean_freq = 4*hz;
+		if ((ft->ft_flags & FL_HASH_ALL) == 0)
+			ft->ft_udp_idle = ft->ft_fin_wait_idle =
+			    ft->ft_syn_idle = ft->ft_tcp_idle = 5;
+		cv_broadcast(&flowclean_cv);
+	} else if (!full && ft->ft_full) {
+		flowclean_freq = 20*hz;
+		if ((ft->ft_flags & FL_HASH_ALL) == 0)
+			ft->ft_udp_idle = ft->ft_fin_wait_idle =
+			    ft->ft_syn_idle = ft->ft_tcp_idle = 30;
+	}
+
+	return (ft->ft_full);
+}
+
 static int
 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
     uint32_t fibnum, struct route *ro, uint16_t flags)
@@ -912,12 +972,10 @@ flowtable_insert(struct flowtable *ft, u
 	struct flentry *fle, *fletail, *newfle, **flep;
 	struct flowtable_stats *fs = &ft->ft_stats[curcpu];
 	int depth;
-	uma_zone_t flezone;
 	bitstr_t *mask;
 	uint8_t proto;
 
-	flezone = (flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
-	newfle = uma_zalloc(flezone, M_NOWAIT | M_ZERO);
+	newfle = flow_alloc(ft);
 	if (newfle == NULL)
 		return (ENOMEM);
 
@@ -948,9 +1006,8 @@ flowtable_insert(struct flowtable *ft, u
 			 * or we lost a race to insert
 			 */
 			FL_ENTRY_UNLOCK(ft, hash);
-			uma_zfree((newfle->f_flags & FL_IPV6) ?
-			    V_flow_ipv6_zone : V_flow_ipv4_zone, newfle);
-
+			flow_free(newfle, ft);
+			
 			if (flags & FL_OVERWRITE) 
 				goto skip;
 			return (EEXIST);
@@ -1147,7 +1204,7 @@ keycheck:	
 	}
 	FL_ENTRY_UNLOCK(ft, hash);
 uncached:
-	if (flags & FL_NOAUTO)
+	if (flags & FL_NOAUTO || flow_full(ft))
 		return (NULL);
 
 	fs->ft_misses++;
@@ -1325,7 +1382,7 @@ flowtable_alloc(char *name, int nentry, 
  * 
  */
 static void
-fle_free(struct flentry *fle)
+fle_free(struct flentry *fle, struct flowtable *ft)
 {
 	struct rtentry *rt;
 	struct llentry *lle;
@@ -1334,8 +1391,7 @@ fle_free(struct flentry *fle)
 	lle = __DEVOLATILE(struct llentry *, fle->f_lle);
 	RTFREE(rt);
 	LLE_FREE(lle);
-	uma_zfree((fle->f_flags & FL_IPV6) ?
-	    V_flow_ipv6_zone : V_flow_ipv4_zone, fle);
+	flow_free(fle, ft);
 }
 
 static void
@@ -1426,7 +1482,7 @@ flowtable_free_stale(struct flowtable *f
 		flefreehead = fle->f_next;
 		count++;
 		fs->ft_frees++;
-		fle_free(fle);
+		fle_free(fle, ft);
 	}
 	if (V_flowtable_debug && count)
 		log(LOG_DEBUG, "freed %d flow entries\n", count);
@@ -1518,7 +1574,7 @@ flowtable_cleaner(void)
 		 */
 		mtx_lock(&flowclean_lock);
 		cv_broadcast(&flowclean_cv);
-		cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz);
+		cv_timedwait(&flowclean_cv, &flowclean_lock, flowclean_freq);
 		mtx_unlock(&flowclean_lock);
 	}
 }
@@ -1548,6 +1604,7 @@ static void
 flowtable_init_vnet(const void *unused __unused)
 {
 
+	V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
 	V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
 	    NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
 	V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
@@ -1556,7 +1613,7 @@ flowtable_init_vnet(const void *unused _
 	uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
 	V_flowtable_ready = 1;
 }
-VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE,
+VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
     flowtable_init_vnet, NULL);
 
 static void
@@ -1567,8 +1624,9 @@ flowtable_init(const void *unused __unus
 	mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
 	EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
 	    EVENTHANDLER_PRI_ANY);
+	flowclean_freq = 20*hz;
 }
-SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+SYSINIT(flowtable_init, SI_SUB_SMP, SI_ORDER_MIDDLE,
     flowtable_init, NULL);
 
 

Modified: head/sys/netinet/ip_input.c
==============================================================================
--- head/sys/netinet/ip_input.c	Mon Mar 22 22:39:32 2010	(r205487)
+++ head/sys/netinet/ip_input.c	Mon Mar 22 23:04:12 2010	(r205488)
@@ -327,8 +327,20 @@ ip_init(void)
 			"error %d\n", __func__, i);
 
 #ifdef FLOWTABLE
-	TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
-	    &V_ip_output_flowtable_size);
+	if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
+		&V_ip_output_flowtable_size)) {
+		if (V_ip_output_flowtable_size < 256)
+			V_ip_output_flowtable_size = 256;
+		if (!powerof2(V_ip_output_flowtable_size)) {
+			printf("flowtable must be power of 2 size\n");
+			V_ip_output_flowtable_size = 2048;
+		}
+	} else {
+		/*
+		 * round up to the next power of 2
+		 */
+		V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
+	}
 	V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
 #endif
 


More information about the svn-src-head mailing list