svn commit: r243631 - in head/sys: kern sys

Andre Oppermann andre at freebsd.org
Mon Jan 14 15:00:54 UTC 2013


On 13.01.2013 11:10, Alan Cox wrote:
> On 01/07/2013 12:47, Oleksandr Tymoshenko wrote:
>> On 12/27/2012 6:46 PM, Oleksandr Tymoshenko wrote:
>>> On 12/18/2012 1:59 AM, Alan Cox wrote:
>>>> On 12/17/2012 23:40, Oleksandr Tymoshenko wrote:
>>>>> On 2012-12-08, at 1:21 PM, Alan Cox <alc at rice.edu> wrote:
>>>> That makes sense.  However, "virtual_avail" isn't the start of the
>>>> kernel address space.  The kernel map always starts at
>>>> VM_MIN_KERNEL_ADDRESS.  (See kmem_init().)  "virtual_avail" represents
>>>> the next unallocated virtual address in the kernel address space at an
>>>> early point in initialization.  "virtual_avail" and "virtual_end"
>>>> aren't
>>>> used after that, or outside the VM system.  Please use
>>>> vm_map_min(kernel_map) and vm_map_max(kernel_map) instead.
>>>
>>> I checked: kernel_map is not available (NULL) at this point.  So we
>>> can't use it to
>>> determine real KVA size. Closest thing we can get is
>>> virtual_avail/virtual_end pair.
>>>
>>> Andre, could you approve attached patch for commit or suggest better
>>> solution?
>>
>> Any update on this one? Can I proceed with commit?
>>
>
> Yes, I've now spent a little bit of time looking at this, and I don't
> see why these calculations and tunable_mbinit() need to be performed
> before the kernel map is initialized.
>
> Let me summarize what I found:
>
> 1. The function tunable_mbinit() now has a dependency on the global
> variable maxmbufmem.  tunable_mbinit() is executed under
> SI_SUB_TUNABLES.  tunable_mbinit() defines the global variable
> nmbclusters.  The statements made in the comment at the head of
> tunable_mbinit() all appear to be false:
>
> /*
>   * tunable_mbinit() has to be run before init_maxsockets() thus
>   * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
>   * runs at SI_ORDER_ANY.
>   *
>   * NB: This has to be done before VM init.
>   */
>
> I don't see anything in init_maxsockets() that depends on
> tunable_mbinit().  Moreover, the statement about "VM init" is only
> correct if you regard the initialization of the kernel's malloc as "VM
> init".

This seems to be historic cruft.  The dependency on maxsockets was
removed recently with the autotuning improvements.

A patch moving the maxmbufmem calculation into tunable_mbinit() and
changing it to SI_SUB_KMEM which comes after the VM initialization is
attached.

> 2. The function kmeminit() in kern/kern_malloc.c has a dependency on the
> global variable nmbclusters.  kmeminit() is executed under SI_SUB_KMEM,
> which comes after the initialization of the virtual memory system,
> including the kernel map.

The use of nmbclusters in kmeminit seems to be bogus.  I think it comes
from the times when the mbuf allocator was directly layered on top of
the VM, that is before UMA.

kmeminit() should not use nmbclusters.  The computations done in kmeminit()
do not make a whole lot of sense to me. But I'm no expert in that area.

> 3. The function vm_ksubmap_init() has a dependency on the global
> variable maxpipekva.  vm_ksubmap_init() is executed under SI_SUB_CPU,
> which comes after SI_SUB_KMEM.
>
> Am I missing anything?
>
> I'm attaching a patch that defers the calculation of maxpipekva until we
> actually need it in vm_ksubmap_init().  Any comments on this patch are
> welcome.

Looks good to me.  Perhaps the whole calculation and setup of the pipe_map
could be moved to kern/sys_pipe.c:pipeinit() to have it all together.

-- 
Andre

-------------- next part --------------
Index: sys/mbuf.h
===================================================================
--- sys/mbuf.h	(revision 245423)
+++ sys/mbuf.h	(working copy)
@@ -384,7 +384,6 @@
  *
  * The rest of it is defined in kern/kern_mbuf.c
  */
-extern quad_t		maxmbufmem;
 extern uma_zone_t	zone_mbuf;
 extern uma_zone_t	zone_clust;
 extern uma_zone_t	zone_pack;
Index: kern/kern_mbuf.c
===================================================================
--- kern/kern_mbuf.c	(revision 245423)
+++ kern/kern_mbuf.c	(working copy)
@@ -47,6 +47,7 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
+#include <vm/vm_map.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
@@ -104,16 +105,25 @@
 struct mbstat mbstat;
 
 /*
- * tunable_mbinit() has to be run before init_maxsockets() thus
- * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
- * runs at SI_ORDER_ANY.
- *
- * NB: This has to be done before VM init.
+ * tunable_mbinit() has to be run before any mbuf allocations are done.
  */
 static void
 tunable_mbinit(void *dummy)
 {
+	quad_t realmem, maxmbufmem;
 
+	/*
+	 * The default limit for all mbuf related memory is 1/2 of all
+	 * available kernel memory (physical or kmem).
+	 * At most it can be 3/4 of available kernel memory.
+	 */
+	realmem = qmin((quad_t)physmem * PAGE_SIZE,
+	    vm_map_max(kernel_map) - vm_map_min(kernel_map));
+	maxmbufmem = realmem / 2;
+	TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
+	if (maxmbufmem > realmem / 4 * 3)
+		maxmbufmem = realmem / 4 * 3;
+
 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
 	if (nmbclusters == 0)
 		nmbclusters = maxmbufmem / MCLBYTES / 4;
@@ -139,7 +149,7 @@
 		nmbufs = lmax(maxmbufmem / MSIZE / 5,
 		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
 }
-SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
+SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
 
 static int
 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
@@ -279,16 +289,14 @@
 static void	mb_zfini_pack(void *, int);
 
 static void	mb_reclaim(void *);
-static void	mbuf_init(void *);
 static void    *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
 
-/* Ensure that MSIZE must be a power of 2. */
+/* Ensure that MSIZE is a power of 2. */
 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
 
 /*
  * Initialize FreeBSD Network buffer allocation.
  */
-SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
 static void
 mbuf_init(void *dummy)
 {
@@ -396,6 +404,7 @@
 	mbstat.sf_iocnt = 0;
 	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
 }
+SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
 
 /*
  * UMA backend page allocator for the jumbo frame zones.
Index: kern/subr_param.c
===================================================================
--- kern/subr_param.c	(revision 245423)
+++ kern/subr_param.c	(working copy)
@@ -93,7 +93,6 @@
 int	nbuf;
 int	ngroups_max;			/* max # groups per process */
 int	nswbuf;
-quad_t	maxmbufmem;			/* max mbuf memory */
 pid_t	pid_max = PID_MAX;
 long	maxswzone;			/* max swmeta KVA storage */
 long	maxbcache;			/* max buffer cache KVA storage */
@@ -272,7 +271,6 @@
 void
 init_param2(long physpages)
 {
-	quad_t realmem;
 
 	/* Base parameters */
 	maxusers = MAXUSERS;
@@ -329,18 +327,6 @@
 	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 
 	/*
-	 * The default limit for all mbuf related memory is 1/2 of all
-	 * available kernel memory (physical or kmem).
-	 * At most it can be 3/4 of available kernel memory.
-	 */
-	realmem = qmin((quad_t)physpages * PAGE_SIZE,
-	    VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
-	maxmbufmem = realmem / 2;
-	TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
-	if (maxmbufmem > (realmem / 4) * 3)
-		maxmbufmem = (realmem / 4) * 3;
-
-	/*
 	 * The default for maxpipekva is min(1/64 of the kernel address space,
 	 * max(1/64 of main memory, 512KB)).  See sys_pipe.c for more details.
 	 */


More information about the svn-src-all mailing list