svn commit: r242910 - in user/andre/tcp_workqueue/sys: kern sys

Andre Oppermann andre at FreeBSD.org
Mon Nov 12 08:47:15 UTC 2012


Author: andre
Date: Mon Nov 12 08:47:13 2012
New Revision: 242910
URL: http://svnweb.freebsd.org/changeset/base/242910

Log:
  Base the mbuf related limits on the available physical memory or
  kernel memory, whichever is lower.
  
  Set maxfiles to a memory derived value at with a floor based on
  maxusers.
  
  Tidy up ordering in init_param2() and check up on some users of
  those values calculated here.

Modified:
  user/andre/tcp_workqueue/sys/kern/kern_mbuf.c
  user/andre/tcp_workqueue/sys/kern/subr_param.c
  user/andre/tcp_workqueue/sys/kern/uipc_socket.c
  user/andre/tcp_workqueue/sys/sys/eventhandler.h
  user/andre/tcp_workqueue/sys/sys/mbuf.h

Modified: user/andre/tcp_workqueue/sys/kern/kern_mbuf.c
==============================================================================
--- user/andre/tcp_workqueue/sys/kern/kern_mbuf.c	Mon Nov 12 07:47:19 2012	(r242909)
+++ user/andre/tcp_workqueue/sys/kern/kern_mbuf.c	Mon Nov 12 08:47:13 2012	(r242910)
@@ -96,6 +96,7 @@ __FBSDID("$FreeBSD$");
  *
  */
 
+int nmbufs;			/* limits number of mbufs */
 int nmbclusters;		/* limits number of mbuf clusters */
 int nmbjumbop;			/* limits number of page size jumbo clusters */
 int nmbjumbo9;			/* limits number of 9k jumbo clusters */
@@ -106,27 +107,38 @@ struct mbstat mbstat;
  * tunable_mbinit() has to be run before init_maxsockets() thus
  * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
  * runs at SI_ORDER_ANY.
+ *
+ * NB: This has to be done before VM init.
  */
 static void
 tunable_mbinit(void *dummy)
 {
 
-	/* This has to be done before VM init. */
 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
 	if (nmbclusters == 0)
-		nmbclusters = 1024 + maxusers * 64;
+		nmbclusters = maxmbufmem / MCLBYTES / 4;
 
 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
 	if (nmbjumbop == 0)
-		nmbjumbop = nmbclusters / 2;
+		nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
 
 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
 	if (nmbjumbo9 == 0)
-		nmbjumbo9 = nmbclusters / 4;
+		nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
 
 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
 	if (nmbjumbo16 == 0)
-		nmbjumbo16 = nmbclusters / 8;
+		nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
+
+	/*
+	 * We need at least as many mbufs as we have clusters of
+	 * the various types added together.
+	 */
+	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
+	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
+		nmbufs = lmax(maxmbufmem / MSIZE / 5,
+			      nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
+
 }
 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
 
@@ -138,9 +150,11 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
 	newnmbclusters = nmbclusters;
 	error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 
 	if (error == 0 && req->newptr) {
-		if (newnmbclusters > nmbclusters) {
+		if (newnmbclusters > nmbclusters &&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbclusters = newnmbclusters;
 			uma_zone_set_max(zone_clust, nmbclusters);
+			nmbclusters = uma_zone_get_max(zone_clust);
 			EVENTHANDLER_INVOKE(nmbclusters_change);
 		} else
 			error = EINVAL;
@@ -159,9 +173,11 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
 	newnmbjumbop = nmbjumbop;
 	error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 
 	if (error == 0 && req->newptr) {
-		if (newnmbjumbop> nmbjumbop) {
+		if (newnmbjumbop > nmbjumbop &&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbjumbop = newnmbjumbop;
 			uma_zone_set_max(zone_jumbop, nmbjumbop);
+			nmbjumbop = uma_zone_get_max(zone_jumbop);
 		} else
 			error = EINVAL;
 	}
@@ -180,9 +196,11 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
 	newnmbjumbo9 = nmbjumbo9;
 	error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 
 	if (error == 0 && req->newptr) {
-		if (newnmbjumbo9> nmbjumbo9) {
+		if (newnmbjumbo9 > nmbjumbo9&&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbjumbo9 = newnmbjumbo9;
 			uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+			nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
 		} else
 			error = EINVAL;
 	}
@@ -200,9 +218,11 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
 	newnmbjumbo16 = nmbjumbo16;
 	error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 
 	if (error == 0 && req->newptr) {
-		if (newnmbjumbo16> nmbjumbo16) {
+		if (newnmbjumbo16 > nmbjumbo16 &&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbjumbo16 = newnmbjumbo16;
 			uma_zone_set_max(zone_jumbo16, nmbjumbo16);
+			nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
 		} else
 			error = EINVAL;
 	}
@@ -212,6 +232,27 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumb
 &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
     "Maximum number of mbuf 16k jumbo clusters allowed");
 
+static int
+sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
+{
+	int error, newnmbufs;
+
+	newnmbufs = nmbufs;
+	error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 
+	if (error == 0 && req->newptr) {
+		if (newnmbufs > nmbufs) {
+			nmbufs = newnmbufs;
+			uma_zone_set_max(zone_mbuf, nmbufs);
+			nmbclusters = uma_zone_get_max(zone_mbuf);
+			EVENTHANDLER_INVOKE(nmbufs_change);
+		} else
+			error = EINVAL;
+	}
+	return (error);
+}
+SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
+&nmbufs, 0, sysctl_nmbufs, "IU",
+    "Maximum number of mbufs allowed");
 
 
 SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
@@ -266,6 +307,10 @@ mbuf_init(void *dummy)
 	    NULL, NULL,
 #endif
 	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
+	if (nmbufs > 0) {
+		uma_zone_set_max(zone_mbuf, nmbufs);
+		nmbufs = uma_zone_get_max(zone_mbuf);
+	}
 
 	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -275,8 +320,10 @@ mbuf_init(void *dummy)
 	    NULL, NULL,
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
-	if (nmbclusters > 0)
+	if (nmbclusters > 0) {
 		uma_zone_set_max(zone_clust, nmbclusters);
+		nmbclusters = uma_zone_get_max(zone_clust);
+	}
 
 	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
 	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
@@ -290,8 +337,10 @@ mbuf_init(void *dummy)
 	    NULL, NULL,
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
-	if (nmbjumbop > 0)
+	if (nmbjumbop > 0) {
 		uma_zone_set_max(zone_jumbop, nmbjumbop);
+		nmbjumbop = uma_zone_get_max(zone_jumbop);
+	}
 
 	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -301,9 +350,11 @@ mbuf_init(void *dummy)
 	    NULL, NULL,
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
-	if (nmbjumbo9 > 0)
-		uma_zone_set_max(zone_jumbo9, nmbjumbo9);
 	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
+	if (nmbjumbo9 > 0) {
+		uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+		nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
+	}
 
 	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -313,9 +364,11 @@ mbuf_init(void *dummy)
 	    NULL, NULL,
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
-	if (nmbjumbo16 > 0)
-		uma_zone_set_max(zone_jumbo16, nmbjumbo16);
 	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
+	if (nmbjumbo16 > 0) {
+		uma_zone_set_max(zone_jumbo16, nmbjumbo16);
+		nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
+	}
 
 	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
 	    NULL, NULL,

Modified: user/andre/tcp_workqueue/sys/kern/subr_param.c
==============================================================================
--- user/andre/tcp_workqueue/sys/kern/subr_param.c	Mon Nov 12 07:47:19 2012	(r242909)
+++ user/andre/tcp_workqueue/sys/kern/subr_param.c	Mon Nov 12 08:47:13 2012	(r242910)
@@ -93,6 +93,7 @@ int	ncallout;			/* maximum # of timer ev
 int	nbuf;
 int	ngroups_max;			/* max # groups per process */
 int	nswbuf;
+long	maxmbufmem;			/* max mbuf memory */
 pid_t	pid_max = PID_MAX;
 long	maxswzone;			/* max swmeta KVA storage */
 long	maxbcache;			/* max buffer cache KVA storage */
@@ -270,6 +271,7 @@ init_param1(void)
 void
 init_param2(long physpages)
 {
+	long realmem;
 
 	/* Base parameters */
 	maxusers = MAXUSERS;
@@ -293,18 +295,24 @@ init_param2(long physpages)
 	/*
 	 * The following can be overridden after boot via sysctl.  Note:
 	 * unless overriden, these macros are ultimately based on maxusers.
-	 */
-	maxproc = NPROC;
-	TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
-	/*
 	 * Limit maxproc so that kmap entries cannot be exhausted by
 	 * processes.
 	 */
+	maxproc = NPROC;
+	TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
 	if (maxproc > (physpages / 12))
 		maxproc = physpages / 12;
-	maxfiles = MAXFILES;
-	TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
 	maxprocperuid = (maxproc * 9) / 10;
+
+	/*
+	 * The default limit for maxfiles is 1/12 of the number of
+	 * physical page but not less than 16 times maxusers.
+	 * At most it can be 1/6 the number of physical pages.
+	 */
+	maxfiles = imax(MAXFILES, physpages / 12);
+	TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
+	if (maxfiles > (physpages / 6))
+		maxfiles = physpages / 6;
 	maxfilesperproc = (maxfiles * 9) / 10;
 	
 	/*
@@ -313,20 +321,36 @@ init_param2(long physpages)
 	nbuf = NBUF;
 	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
 
+	/*
+	 * XXXAO: This can really large, does the callout wheel have
+	 * to be so big?
+	 */
 	ncallout = 16 + maxproc + maxfiles;
 	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 
 	/*
+	 * The default limit for all mbuf related memory is 1/2 of all
+	 * available kernel memory (physical or kmem).
+	 * At most it can be 3/4 of available kernel memory.
+	 */
+	realmem = lmin(physpages * PAGE_SIZE,
+			VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
+	maxmbufmem = realmem / 2;
+	TUNABLE_LONG_FETCH("kern.maxmbufmem", &maxmbufmem);
+	if (maxmbufmem > realmem / 4 * 3)
+		maxmbufmem = realmem / 4 * 3;
+
+	/*
 	 * The default for maxpipekva is min(1/64 of the kernel address space,
 	 * max(1/64 of main memory, 512KB)).  See sys_pipe.c for more details.
 	 */
 	maxpipekva = (physpages / 64) * PAGE_SIZE;
+	TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
 	if (maxpipekva < 512 * 1024)
 		maxpipekva = 512 * 1024;
 	if (maxpipekva > (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64)
 		maxpipekva = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) /
 		    64;
-	TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
 }
 
 /*

Modified: user/andre/tcp_workqueue/sys/kern/uipc_socket.c
==============================================================================
--- user/andre/tcp_workqueue/sys/kern/uipc_socket.c	Mon Nov 12 07:47:19 2012	(r242909)
+++ user/andre/tcp_workqueue/sys/kern/uipc_socket.c	Mon Nov 12 08:47:13 2012	(r242910)
@@ -290,7 +290,7 @@ init_maxsockets(void *ignored)
 {
 
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
-	maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
+	maxsockets = imax(maxsockets, maxfiles);
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
 
@@ -308,10 +308,6 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 	if (error == 0 && req->newptr) {
 		if (newmaxsockets > maxsockets) {
 			maxsockets = newmaxsockets;
-			if (maxsockets > ((maxfiles / 4) * 3)) {
-				maxfiles = (maxsockets * 5) / 4;
-				maxfilesperproc = (maxfiles * 9) / 10;
-			}
 			EVENTHANDLER_INVOKE(maxsockets_change);
 		} else
 			error = EINVAL;

Modified: user/andre/tcp_workqueue/sys/sys/eventhandler.h
==============================================================================
--- user/andre/tcp_workqueue/sys/sys/eventhandler.h	Mon Nov 12 07:47:19 2012	(r242909)
+++ user/andre/tcp_workqueue/sys/sys/eventhandler.h	Mon Nov 12 08:47:13 2012	(r242910)
@@ -253,6 +253,7 @@ EVENTHANDLER_DECLARE(thread_fini, thread
 
 typedef void (*uma_zone_chfn)(void *);
 EVENTHANDLER_DECLARE(nmbclusters_change, uma_zone_chfn);
+EVENTHANDLER_DECLARE(nmbufs_change, uma_zone_chfn);
 EVENTHANDLER_DECLARE(maxsockets_change, uma_zone_chfn);
 
 #endif /* SYS_EVENTHANDLER_H */

Modified: user/andre/tcp_workqueue/sys/sys/mbuf.h
==============================================================================
--- user/andre/tcp_workqueue/sys/sys/mbuf.h	Mon Nov 12 07:47:19 2012	(r242909)
+++ user/andre/tcp_workqueue/sys/sys/mbuf.h	Mon Nov 12 08:47:13 2012	(r242910)
@@ -396,7 +396,7 @@ struct mbstat {
  *
  * The rest of it is defined in kern/kern_mbuf.c
  */
-
+extern long		maxmbufmem;
 extern uma_zone_t	zone_mbuf;
 extern uma_zone_t	zone_clust;
 extern uma_zone_t	zone_pack;


More information about the svn-src-user mailing list