Kernel tuning for large maxsockets

Scot Loach sloach at sandvine.com
Tue Jul 29 07:00:42 PDT 2003


Here is my patch for this.  I've added the new settings to uipc_socket2.c
instead of subr_param.c because they need to be initialized with maxsockets
to keep the current behavior by default.

This patch adds four new tunable vars:
kern.ipc.maxripcb	- maximum number of raw pcbs
kern.ipc.maxdivcb - maximum number of divert pcbs
kern.ipc.maxudpcb - maximum number of udp pcbs
kern.ipc.maxtcpcb - maximum number of tcp pcbs


Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.55.2.17
diff -U3 -r1.55.2.17 uipc_socket2.c
--- kern/uipc_socket2.c	31 Aug 2002 19:04:55 -0000	1.55.2.17
+++ kern/uipc_socket2.c	23 Jul 2003 20:40:53 -0000
@@ -54,6 +54,10 @@
 #include <sys/event.h>
 
 int	maxsockets;
+int	maxripcb;			/* max raw pcbs to preallocate */
+int	maxdivcb;			/* max divert pcbs to preallocate */
+int	maxtcpcb;			/* max tcp pcbs to preallocate */
+int	maxudpcb;			/* max udp pcbs to preallocate */
 
 /*
  * Primitive routines for operating on sockets and socket buffers
@@ -998,6 +1002,16 @@
 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
     &sb_efficiency, 0, "");
 
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxripcb, CTLFLAG_RD,
+    &maxripcb, 0, "Maximum number of raw sockets available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxdivcb, CTLFLAG_RD,
+    &maxdivcb, 0, "Maximum number of divert sockets available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxtcpcb, CTLFLAG_RD,
+    &maxtcpcb, 0, "Maximum number of TCP sockets available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxudpcb, CTLFLAG_RD,
+    &maxudpcb, 0, "Maximum number of UDP sockets available");
+
+
 /*
  * Initialise maxsockets 
  */
@@ -1005,5 +1019,14 @@
 {
     TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
     maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
+
+    maxripcb = maxsockets;
+    TUNABLE_INT_FETCH("kern.ipc.maxripcb", &maxripcb);
+    maxdivcb = maxsockets;
+    TUNABLE_INT_FETCH("kern.ipc.maxdivcb", &maxdivcb);
+    maxtcpcb = maxsockets;
+    TUNABLE_INT_FETCH("kern.ipc.maxtcpcb", &maxtcpcb);
+    maxudpcb = maxsockets;
+    TUNABLE_INT_FETCH("kern.ipc.maxudpcb", &maxudpcb);
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
Index: netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.42.2.5
diff -U3 -r1.42.2.5 ip_divert.c
--- netinet/ip_divert.c	9 Jul 2002 09:11:42 -0000	1.42.2.5
+++ netinet/ip_divert.c	23 Jul 2003 20:10:30 -0000
@@ -125,7 +125,7 @@
 	divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
 	divcbinfo.porthashbase = hashinit(1, M_PCB,
&divcbinfo.porthashmask);
 	divcbinfo.ipi_zone = zinit("divcb", sizeof(struct inpcb),
-				   maxsockets, ZONE_INTERRUPT, 0);
+				   maxdivcb, ZONE_INTERRUPT, 0);
 }
 
 /*
Index: netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.64.2.10
diff -U3 -r1.64.2.10 raw_ip.c
--- netinet/raw_ip.c	26 Nov 2001 10:07:57 -0000	1.64.2.10
+++ netinet/raw_ip.c	23 Jul 2003 20:10:43 -0000
@@ -103,7 +103,7 @@
 	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
 	ripcbinfo.porthashbase = hashinit(1, M_PCB,
&ripcbinfo.porthashmask);
 	ripcbinfo.ipi_zone = zinit("ripcb", sizeof(struct inpcb),
-				   maxsockets, ZONE_INTERRUPT, 0);
+				   maxripcb, ZONE_INTERRUPT, 0);
 }
 
 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
Index: netinet/tcp_subr.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.73.2.28.1000.1
diff -U3 -r1.73.2.28.1000.1 tcp_subr.c
--- netinet/tcp_subr.c	2 Jan 2003 18:07:54 -0000	1.73.2.28.1000.1
+++ netinet/tcp_subr.c	23 Jul 2003 22:55:12 -0000
@@ -231,7 +231,7 @@
 	tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
 	tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
 					&tcbinfo.porthashmask);
-	tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
+	tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxtcpcb,
 				 ZONE_INTERRUPT, 0);
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.64.2.16.1000.3
diff -U3 -r1.64.2.16.1000.3 udp_usrreq.c
--- netinet/udp_usrreq.c	29 May 2003 16:35:50 -0000
1.64.2.16.1000.3
+++ netinet/udp_usrreq.c	23 Jul 2003 22:54:55 -0000
@@ -144,7 +144,7 @@
 	udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
 	udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
 					&udbinfo.porthashmask);
-	udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets,
+	udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxudpcb,
 				 ZONE_INTERRUPT, 0);
 }
 
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.46.2.9
diff -U3 -r1.46.2.9 socketvar.h
--- sys/socketvar.h	14 Aug 2002 22:23:10 -0000	1.46.2.9
+++ sys/socketvar.h	28 Jul 2003 02:28:40 -0000
@@ -297,6 +297,10 @@
 extern u_long	sb_max;
 extern struct	vm_zone *socket_zone;
 extern so_gen_t so_gencnt;
+extern int      maxripcb;
+extern int      maxdivcb;
+extern int      maxtcpcb;
+extern int      maxudpcb;
 
 struct file;
 struct filedesc;


-----Original Message-----
From: Mike Silbersack [mailto:silby at silby.com]
Sent: Tuesday, July 15, 2003 6:39 PM
To: Scot Loach
Cc: 'freebsd-net at freebsd.org'
Subject: Re: Kernel tuning for large maxsockets



On Tue, 15 Jul 2003, Scot Loach wrote:

> Is there any reason I should not modify the kernel code to only let a
small,
> fixed number of raw and divert pcbs be preallocated instead of having them
> scale with maxsockets?

Your idea is sound.

> Next, does this seem like a generally useful thing that could be rolled
back
> into the source tree?  I could make this a kernel option or a tunable
sysctl
> variable.
>
> thanks
>
> Scot Loach

A tunable maximum for each of those settings sounds good, that should fit
well in subr_param.c.  Send me your patch when it's done, and I'll look
into incorporating it.

Mike "Silby" Silbersack


More information about the freebsd-net mailing list