svn commit: r233034 - in user/attilio/vmcontention/sys: kern vm

Attilio Rao attilio at FreeBSD.org
Fri Mar 16 15:41:08 UTC 2012


Author: attilio
Date: Fri Mar 16 15:41:07 2012
New Revision: 233034
URL: http://svn.freebsd.org/changeset/base/233034

Log:
  Fix the nodes allocator in architectures without direct-mapping:
  - Fix bugs in the free path where the pages were not unwired and
    relevant locking wasn't acquired.
  - Introduce the rnode_map, submap of kernel_map, where to allocate from.
    The reason is that, in architectures without direct-mapping,
    kmem_alloc*() will try to insert the newly created mapping while
    holding the vm_object lock introducing a LOR or lock recursion.
    rnode_map is however a leafly-used submap, thus there cannot be any
    deadlock.
    Notes: Size the submap in order to be, by default, around 64 MB and
    decrase the size of the nodes as the allocation will be much smaller
    (and when the compacting code in the vm_radix will be implemented this
    will aim for much less space to be used).  However note that the
    size of the submap can be changed at boot time via the
    hw.rnode_map_scale scaling factor.
  - Use uma_zone_set_max() covering the size of the submap.
  
  Tested by:	flo

Modified:
  user/attilio/vmcontention/sys/kern/subr_witness.c
  user/attilio/vmcontention/sys/vm/vm_radix.c

Modified: user/attilio/vmcontention/sys/kern/subr_witness.c
==============================================================================
--- user/attilio/vmcontention/sys/kern/subr_witness.c	Fri Mar 16 13:46:54 2012	(r233033)
+++ user/attilio/vmcontention/sys/kern/subr_witness.c	Fri Mar 16 15:41:07 2012	(r233034)
@@ -602,6 +602,7 @@ static struct witness_order_list_entry o
 	 * VM
 	 * 
 	 */
+	{ "system map", &lock_class_mtx_sleep },
 	{ "vm object", &lock_class_mtx_sleep },
 	{ "page lock", &lock_class_mtx_sleep },
 	{ "vm page queue mutex", &lock_class_mtx_sleep },

Modified: user/attilio/vmcontention/sys/vm/vm_radix.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_radix.c	Fri Mar 16 13:46:54 2012	(r233033)
+++ user/attilio/vmcontention/sys/vm/vm_radix.c	Fri Mar 16 15:41:07 2012	(r233034)
@@ -49,12 +49,30 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
+#ifndef UMA_MD_SMALL_ALLOC
+#include <vm/vm_map.h>
+#endif
 #include <vm/vm_radix.h>
 #include <vm/vm_object.h>
 
 #include <sys/kdb.h>
 
+#ifndef UMA_MD_SMALL_ALLOC
+#define	VM_RADIX_RNODE_MAP_SCALE	(1024 * 1024 / 2)
+#define	VM_RADIX_WIDTH	4
+
+/*
+ * Bits of height in root.
+ * The mask of smaller power of 2 containing VM_RADIX_LIMIT.
+ */
+#define	VM_RADIX_HEIGHT	0x1f
+#else
 #define	VM_RADIX_WIDTH	5
+
+/* See the comment above. */
+#define	VM_RADIX_HEIGHT	0xf
+#endif
+
 #define	VM_RADIX_COUNT	(1 << VM_RADIX_WIDTH)
 #define	VM_RADIX_MASK	(VM_RADIX_COUNT - 1)
 #define	VM_RADIX_MAXVAL	((vm_pindex_t)-1)
@@ -63,9 +81,6 @@
 /* Flag bits stored in node pointers. */
 #define VM_RADIX_FLAGS	0x3
 
-/* Bits of height in root. */
-#define VM_RADIX_HEIGHT	0xf
-
 /* Calculates maximum value for a tree of height h. */
 #define	VM_RADIX_MAX(h)							\
 	    ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL :			\
@@ -84,6 +99,9 @@ CTASSERT(sizeof(struct vm_radix_node) < 
 static uma_zone_t vm_radix_node_zone;
 
 #ifndef UMA_MD_SMALL_ALLOC
+static vm_map_t rnode_map;
+static u_long rnode_map_scale;
+
 static void *
 vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
 {
@@ -91,7 +109,7 @@ vm_radix_node_zone_allocf(uma_zone_t zon
 	vm_page_t m;
 	int pflags;
 
-	/* Inform UMA that this allocator uses kernel_map. */
+	/* Inform UMA that this allocator uses rnode_map. */
 	*flags = UMA_SLAB_KERNEL;
 
 	pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
@@ -104,7 +122,7 @@ vm_radix_node_zone_allocf(uma_zone_t zon
 	    VM_ALLOC_SYSTEM;
 	if ((wait & M_ZERO) != 0)
 		pflags |= VM_ALLOC_ZERO; 
-	addr = kmem_alloc_nofault(kernel_map, size);
+	addr = kmem_alloc_nofault(rnode_map, size);
 	if (addr == 0)
 		return (NULL);
 
@@ -112,7 +130,7 @@ vm_radix_node_zone_allocf(uma_zone_t zon
 	m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
 	    pflags);
 	if (m == NULL) {
-		kmem_free(kernel_map, addr, size);
+		kmem_free(rnode_map, addr, size);
 		return (NULL);
 	}
 	if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
@@ -133,14 +151,18 @@ vm_radix_node_zone_freef(void *item, int
 	voitem = (vm_offset_t)item;
 	m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
 	pmap_qremove(voitem, 1);
+	vm_page_lock(m);
+	vm_page_unwire(m, 0);
 	vm_page_free(m);
-	kmem_free(kernel_map, voitem, size);
+	vm_page_unlock(m);
+	kmem_free(rnode_map, voitem, size);
 }
 
 static void
 init_vm_radix_alloc(void *dummy __unused)
 {
 
+	uma_zone_set_max(vm_radix_node_zone, rnode_map_scale);
 	uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
 	uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
 }
@@ -193,9 +215,31 @@ vm_radix_slot(vm_pindex_t index, int lev
 	return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
 }
 
+/*
+ * Initialize the radix node submap (for architectures not supporting
+ * direct-mapping) and the radix node zone.
+ *
+ * WITNESS reports a lock order reversal, for architectures not
+ * supporting direct-mapping,  between the "system map" lock
+ * and the "vm object" lock.  This is because the well established ordering
+ * "system map" -> "vm object" is not honoured in this case as allocating
+ * from the radix node submap ends up adding a mapping entry to it, meaning
+ * it is necessary to lock the submap.  However, the radix node submap is
+ * a leaf and self-contained, thus a deadlock cannot happen here and
+ * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal.
+ */
 void
 vm_radix_init(void)
 {
+#ifndef UMA_MD_SMALL_ALLOC
+	vm_offset_t maxaddr, minaddr;
+
+	rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE;
+	TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale);
+	rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
+	    rnode_map_scale * sizeof(struct vm_radix_node), FALSE);
+	rnode_map->system_map = 1;
+#endif
 
 	vm_radix_node_zone = uma_zcreate("RADIX NODE",
 	    sizeof(struct vm_radix_node), NULL,


More information about the svn-src-user mailing list