svn commit: r212427 - in stable/8: share/man/man9 sys/kern sys/vm

Matthew D Fleming mdf at FreeBSD.org
Fri Sep 10 17:26:42 UTC 2010


Author: mdf
Date: Fri Sep 10 17:26:41 2010
New Revision: 212427
URL: http://svn.freebsd.org/changeset/base/212427

Log:
  MFC r211194, r211229, r212058, r212063: memguard(9) rewrite
  
  r211194:
  
  Rework memguard(9) to reserve significantly more KVA to detect
  use-after-free over a longer time.  Also release the backing pages of
  a guarded allocation at free(9) time to reduce the overhead of using
  memguard(9).  Allow setting and varying the malloc type at run-time.
  Add knobs to allow:
  
   - randomly guarding memory
   - adding un-backed KVA guard pages to detect underflow and overflow
   - a lower limit on the size of allocations that are guarded
  
  r211229:
  
  Fix compile.  It seemed better to have memguard.c include opt_vm.h in
  case future compile-time knobs were added that it wants to use.
  Also add include guards and forward declarations to vm/memguard.h.
  
  r212058:
  
  The realloc case for memguard(9) will copy too many bytes when
  reallocating to a smaller-sized allocation.  Fix this issue.
  
  r212063:
  
  Have memguard(9) crash with an easier-to-debug message on double-free.

Modified:
  stable/8/share/man/man9/memguard.9
  stable/8/sys/kern/kern_malloc.c
  stable/8/sys/vm/memguard.c
  stable/8/sys/vm/memguard.h
  stable/8/sys/vm/vm_extern.h
  stable/8/sys/vm/vm_kern.c
Directory Properties:
  stable/8/share/man/   (props changed)
  stable/8/share/man/man1/   (props changed)
  stable/8/share/man/man3/   (props changed)
  stable/8/share/man/man4/   (props changed)
  stable/8/share/man/man5/   (props changed)
  stable/8/share/man/man7/   (props changed)
  stable/8/share/man/man8/   (props changed)
  stable/8/share/man/man9/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/share/man/man9/memguard.9
==============================================================================
--- stable/8/share/man/man9/memguard.9	Fri Sep 10 17:00:48 2010	(r212426)
+++ stable/8/share/man/man9/memguard.9	Fri Sep 10 17:26:41 2010	(r212427)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 31, 2006
+.Dd August 2, 2010
 .Dt MEMGUARD 9
 .Os
 .Sh NAME
@@ -41,54 +41,107 @@ multithreaded kernels where race conditi
 .Pp
 Currently,
 .Nm
-can only take over
+can take over
 .Fn malloc ,
 .Fn realloc
 and
 .Fn free
-for a particular malloc type.
+for a single malloc type.
+.Nm
+can also guard all allocations larger than
+.Dv PAGE_SIZE ,
+and can guard a random fraction of all allocations.
+There is also a knob to prevent allocations smaller than a specified
+size from being guarded, to limit memory waste.
 .Sh EXAMPLES
 To use
 .Nm
-for memory type compiled into the kernel, one has to add the
-following line to the
+for a memory type, either add an entry to
 .Pa /boot/loader.conf :
 .Bd -literal -offset indent
 vm.memguard.desc=<memory_type>
 .Ed
 .Pp
-Where
-.Ar memory_type
-is a short description of memory type to monitor.
-The short description of memory type is the second argument to
-.Xr MALLOC_DEFINE 9 ,
-so one has to find it in the kernel source.
-.Pp
-To use
-.Nm
-for memory type defined in a kernel module, one has to set
+Or set the
 .Va vm.memguard.desc
 .Xr sysctl 8
-variable before loading the module:
+variable at run-time:
 .Bd -literal -offset indent
 sysctl vm.memguard.desc=<memory_type>
 .Ed
 .Pp
+Where
+.Ar memory_type
+is a short description of the memory type to monitor.
+Only allocations from that
+.Ar memory_type
+made after
+.Va vm.memguard.desc
+is set will potentially be guarded.
+If
+.Va vm.memguard.desc
+is modified at run-time then only allocations of the new
+.Ar memory_type
+will potentially be guarded once the
+.Xr sysctl 8
+is set.
+Existing guarded allocations will still be properly released by
+.Xr free 9 .
+.Pp
+The short description of a
+.Xr malloc 9
+type is the second argument to
+.Xr MALLOC_DEFINE 9 ,
+so one has to find it in the kernel source.
+.Pp
 The
 .Va vm.memguard.divisor
-boot-time tunable is used to scale how much of
-.Va kmem_map
-one wants to allocate for
-.Nm .
-The default is 10, so
-.Va kmem_size Ns /10
-bytes will be used.
-The
-.Va kmem_size
-value can be obtained via the
-.Va vm.kmem_size
-.Xr sysctl 8
-variable.
+boot-time tunable is used to scale how much of the system's physical
+memory
+.Nm
+is allowed to consume.
+The default is 10, so up to
+.Va cnt.v_page_count Ns /10
+pages can be used.
+.Nm
+will reserve
+.Va vm_kmem_max
+/
+.Va vm.memguard.divisor
+bytes of virtual address space, limited by twice the physical memory
+size.
+The physical limit is reported as
+.Va vm.memguard.phys_limit
+and the virtual space reserved for
+.Nm
+is reported as
+.Va vm.memguard.mapsize .
+.Pp
+.Nm
+will not do page promotions for any allocation smaller than
+.Va vm.memguard.minsize
+bytes.
+The default is 0, meaning all allocations can potentially be guarded.
+.Nm
+can guard sufficiently large allocations randomly, with average
+frequency of every one in 100000 /
+.Va vm.memguard.frequency
+allocations.
+The default is 0, meaning no allocations are randomly guarded.
+.Pp
+.Nm
+can optionally add unmapped guard pages around each allocation to
+detect overflow and underflow, if
+.Va vm.memguard.options
+has the 1 bit set.
+This option is enabled by default.
+.Nm
+will optionally guard all allocations of
+.Dv PAGE_SIZE
+or larger if
+.Va vm.memguard.options
+has the 2 bit set.
+This option is off by default.
 .Sh SEE ALSO
 .Xr sysctl 8 ,
 .Xr vmstat 8 ,
@@ -102,10 +155,13 @@ first appeared in
 .Sh AUTHORS
 .An -nosplit
 .Nm
-was written by
+was originally written by
 .An Bosko Milekic Aq bmilekic at FreeBSD.org .
-This manual page was written by
+This manual page was originally written by
 .An Christian Brueffer Aq brueffer at FreeBSD.org .
+Additions have been made by
+.An Matthew Fleming Aq mdf at FreeBSD.org
+to both the implementation and the documentation.
 .Sh BUGS
 Currently, it is not possible to override UMA
 .Xr zone 9

Modified: stable/8/sys/kern/kern_malloc.c
==============================================================================
--- stable/8/sys/kern/kern_malloc.c	Fri Sep 10 17:00:48 2010	(r212426)
+++ stable/8/sys/kern/kern_malloc.c	Fri Sep 10 17:26:41 2010	(r212427)
@@ -365,8 +365,12 @@ malloc(unsigned long size, struct malloc
 		   ("malloc(M_WAITOK) in interrupt context"));
 
 #ifdef DEBUG_MEMGUARD
-	if (memguard_cmp(mtp))
-		return memguard_alloc(size, flags);
+	if (memguard_cmp(mtp, size)) {
+		va = memguard_alloc(size, flags);
+		if (va != NULL)
+			return (va);
+		/* This is unfortunate but should not be fatal. */
+	}
 #endif
 
 #ifdef DEBUG_REDZONE
@@ -427,7 +431,7 @@ free(void *addr, struct malloc_type *mtp
 		return;
 
 #ifdef DEBUG_MEMGUARD
-	if (memguard_cmp(mtp)) {
+	if (is_memguard_addr(addr)) {
 		memguard_free(addr);
 		return;
 	}
@@ -496,10 +500,8 @@ realloc(void *addr, unsigned long size, 
 	 */
 
 #ifdef DEBUG_MEMGUARD
-if (memguard_cmp(mtp)) {
-	slab = NULL;
-	alloc = size;
-} else {
+	if (is_memguard_addr(addr))
+		return (memguard_realloc(addr, size, mtp, flags));
 #endif
 
 #ifdef DEBUG_REDZONE
@@ -524,10 +526,6 @@ if (memguard_cmp(mtp)) {
 		return (addr);
 #endif /* !DEBUG_REDZONE */
 
-#ifdef DEBUG_MEMGUARD
-}
-#endif
-
 	/* Allocate a new, bigger (or smaller) block */
 	if ((newaddr = malloc(size, mtp, flags)) == NULL)
 		return (NULL);
@@ -559,7 +557,7 @@ static void
 kmeminit(void *dummy)
 {
 	u_int8_t indx;
-	u_long mem_size;
+	u_long mem_size, tmp;
 	int i;
  
 	mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
@@ -619,8 +617,13 @@ kmeminit(void *dummy)
 	 */
 	init_param3(vm_kmem_size / PAGE_SIZE);
 
+#ifdef DEBUG_MEMGUARD
+	tmp = memguard_fudge(vm_kmem_size, vm_kmem_size_max);
+#else
+	tmp = vm_kmem_size;
+#endif
 	kmem_map = kmem_suballoc(kernel_map, &kmembase, &kmemlimit,
-	    vm_kmem_size, TRUE);
+	    tmp, TRUE);
 	kmem_map->system_map = 1;
 
 #ifdef DEBUG_MEMGUARD
@@ -629,14 +632,7 @@ kmeminit(void *dummy)
 	 * replacement allocator used for detecting tamper-after-free
 	 * scenarios as they occur.  It is only used for debugging.
 	 */
-	vm_memguard_divisor = 10;
-	TUNABLE_INT_FETCH("vm.memguard.divisor", &vm_memguard_divisor);
-
-	/* Pick a conservative value if provided value sucks. */
-	if ((vm_memguard_divisor <= 0) ||
-	    ((vm_kmem_size / vm_memguard_divisor) == 0))
-		vm_memguard_divisor = 10;
-	memguard_init(kmem_map, vm_kmem_size / vm_memguard_divisor);
+	memguard_init(kmem_map);
 #endif
 
 	uma_startup2();

Modified: stable/8/sys/vm/memguard.c
==============================================================================
--- stable/8/sys/vm/memguard.c	Fri Sep 10 17:00:48 2010	(r212426)
+++ stable/8/sys/vm/memguard.c	Fri Sep 10 17:26:41 2010	(r212427)
@@ -1,6 +1,7 @@
 /*
- * Copyright (c) 2005,
- *     Bosko Milekic <bmilekic at FreeBSD.org>.  All rights reserved.
+ * Copyright (c) 2005, Bosko Milekic <bmilekic at FreeBSD.org>.
+ * Copyright (c) 2010 Isilon Systems, Inc. (http://www.isilon.com/)
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -36,6 +37,8 @@ __FBSDID("$FreeBSD$");
  * See the memguard(9) man page for more information on using MemGuard.
  */
 
+#include "opt_vm.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -47,26 +50,22 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
+#include <vm/uma.h>
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
+#include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/memguard.h>
 
-/*
- * The maximum number of pages allowed per allocation.  If you're using
- * MemGuard to override very large items (> MAX_PAGES_PER_ITEM in size),
- * you need to increase MAX_PAGES_PER_ITEM.
- */
-#define	MAX_PAGES_PER_ITEM	64
-
 SYSCTL_NODE(_vm, OID_AUTO, memguard, CTLFLAG_RW, NULL, "MemGuard data");
 /*
  * The vm_memguard_divisor variable controls how much of kmem_map should be
  * reserved for MemGuard.
  */
-u_int vm_memguard_divisor;
-SYSCTL_UINT(_vm_memguard, OID_AUTO, divisor, CTLFLAG_RD, &vm_memguard_divisor,
+static u_int vm_memguard_divisor;
+SYSCTL_UINT(_vm_memguard, OID_AUTO, divisor, CTLFLAG_RDTUN,
+    &vm_memguard_divisor,
     0, "(kmem_size/memguard_divisor) == memguard submap size");     
 
 /*
@@ -78,233 +77,372 @@ TUNABLE_STR("vm.memguard.desc", vm_memgu
 static int
 memguard_sysctl_desc(SYSCTL_HANDLER_ARGS)
 {
-	struct malloc_type_internal *mtip;
-	struct malloc_type_stats *mtsp;
-	struct malloc_type *mtp;
-	char desc[128];
-	long bytes;
-	int error, i;
+	char desc[sizeof(vm_memguard_desc)];
+	int error;
 
 	strlcpy(desc, vm_memguard_desc, sizeof(desc));
 	error = sysctl_handle_string(oidp, desc, sizeof(desc), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
+	mtx_lock(&malloc_mtx);
 	/*
-	 * We can change memory type when no memory has been allocated for it
-	 * or when there is no such memory type yet (ie. it will be loaded with
-	 * kernel module).
+	 * If mtp is NULL, it will be initialized in memguard_cmp().
 	 */
-	bytes = 0;
-	mtx_lock(&malloc_mtx);
-	mtp = malloc_desc2type(desc);
-	if (mtp != NULL) {
-		mtip = mtp->ks_handle;
-		for (i = 0; i < MAXCPU; i++) {
-			mtsp = &mtip->mti_stats[i];
-			bytes += mtsp->mts_memalloced;
-			bytes -= mtsp->mts_memfreed;
-		}
-	}
-	if (bytes > 0)
-		error = EBUSY;
-	else {
-		/*
-		 * If mtp is NULL, it will be initialized in memguard_cmp().
-		 */
-		vm_memguard_mtype = mtp;
-		strlcpy(vm_memguard_desc, desc, sizeof(vm_memguard_desc));
-	}
+	vm_memguard_mtype = malloc_desc2type(desc);
+	strlcpy(vm_memguard_desc, desc, sizeof(vm_memguard_desc));
 	mtx_unlock(&malloc_mtx);
 	return (error);
 }
-SYSCTL_PROC(_vm_memguard, OID_AUTO, desc, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
+SYSCTL_PROC(_vm_memguard, OID_AUTO, desc,
+    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
     memguard_sysctl_desc, "A", "Short description of memory type to monitor");
 
+static vm_map_t memguard_map = NULL;
+static vm_offset_t memguard_cursor;
+static vm_size_t memguard_mapsize;
+static vm_size_t memguard_physlimit;
+static u_long memguard_wasted;
+static u_long memguard_wrap;
+static u_long memguard_succ;
+static u_long memguard_fail_kva;
+static u_long memguard_fail_pgs;
+
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, cursor, CTLFLAG_RD,
+    &memguard_cursor, 0, "MemGuard cursor");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, mapsize, CTLFLAG_RD,
+    &memguard_mapsize, 0, "MemGuard private vm_map size");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, phys_limit, CTLFLAG_RD,
+    &memguard_physlimit, 0, "Limit on MemGuard memory consumption");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, wasted, CTLFLAG_RD,
+    &memguard_wasted, 0, "Excess memory used through page promotion");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, wrapcnt, CTLFLAG_RD,
+    &memguard_wrap, 0, "MemGuard cursor wrap count");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, numalloc, CTLFLAG_RD,
+    &memguard_succ, 0, "Count of successful MemGuard allocations");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, fail_kva, CTLFLAG_RD,
+    &memguard_fail_kva, 0, "MemGuard failures due to lack of KVA");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, fail_pgs, CTLFLAG_RD,
+    &memguard_fail_pgs, 0, "MemGuard failures due to lack of pages");
+
+#define MG_GUARD	0x001
+#define MG_ALLLARGE	0x002
+static int memguard_options = MG_GUARD;
+TUNABLE_INT("vm.memguard.options", &memguard_options);
+SYSCTL_INT(_vm_memguard, OID_AUTO, options, CTLFLAG_RW,
+    &memguard_options, 0,
+    "MemGuard options:\n"
+    "\t0x001 - add guard pages around each allocation\n"
+    "\t0x002 - always use MemGuard for allocations over a page");
+
+static u_int memguard_minsize;
+static u_long memguard_minsize_reject;
+SYSCTL_UINT(_vm_memguard, OID_AUTO, minsize, CTLFLAG_RW,
+    &memguard_minsize, 0, "Minimum size for page promotion");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, minsize_reject, CTLFLAG_RD,
+    &memguard_minsize_reject, 0, "# times rejected for size");
+
+static u_int memguard_frequency;
+static u_long memguard_frequency_hits;
+TUNABLE_INT("vm.memguard.frequency", &memguard_frequency);
+SYSCTL_UINT(_vm_memguard, OID_AUTO, frequency, CTLFLAG_RW,
+    &memguard_frequency, 0, "Times in 100000 that MemGuard will randomly run");
+SYSCTL_ULONG(_vm_memguard, OID_AUTO, frequency_hits, CTLFLAG_RD,
+    &memguard_frequency_hits, 0, "# times MemGuard randomly chose");
+
+
 /*
- * Global MemGuard data.
+ * Return a fudged value to be used for vm_kmem_size for allocating
+ * the kmem_map.  The memguard memory will be a submap.
  */
-static vm_map_t memguard_map;
-static unsigned long memguard_mapsize;
-static unsigned long memguard_mapused;
-struct memguard_entry {
-	STAILQ_ENTRY(memguard_entry) entries;
-	void *ptr;
-};
-static struct memguard_fifo {
-	struct memguard_entry *stqh_first;
-	struct memguard_entry **stqh_last;
-	int index;
-} memguard_fifo_pool[MAX_PAGES_PER_ITEM];
-
-/*
- * Local prototypes.
- */
-static void memguard_guard(void *addr, int numpgs);
-static void memguard_unguard(void *addr, int numpgs);
-static struct memguard_fifo *vtomgfifo(vm_offset_t va);
-static void vsetmgfifo(vm_offset_t va, struct memguard_fifo *mgfifo);
-static void vclrmgfifo(vm_offset_t va);
-
-/*
- * Local macros.  MemGuard data is global, so replace these with whatever
- * your system uses to protect global data (if it is kernel-level
- * parallelized).  This is for porting among BSDs.
- */
-#define	MEMGUARD_CRIT_SECTION_DECLARE	static struct mtx memguard_mtx
-#define	MEMGUARD_CRIT_SECTION_INIT				\
-	mtx_init(&memguard_mtx, "MemGuard mtx", NULL, MTX_DEF)
-#define	MEMGUARD_CRIT_SECTION_ENTER	mtx_lock(&memguard_mtx)
-#define	MEMGUARD_CRIT_SECTION_EXIT	mtx_unlock(&memguard_mtx)
-MEMGUARD_CRIT_SECTION_DECLARE;
+unsigned long
+memguard_fudge(unsigned long km_size, unsigned long km_max)
+{
+	u_long mem_pgs = cnt.v_page_count;
+
+	vm_memguard_divisor = 10;
+	TUNABLE_INT_FETCH("vm.memguard.divisor", &vm_memguard_divisor);
+
+	/* Pick a conservative value if provided value sucks. */
+	if ((vm_memguard_divisor <= 0) ||
+	    ((km_size / vm_memguard_divisor) == 0))
+		vm_memguard_divisor = 10;
+	/*
+	 * Limit consumption of physical pages to
+	 * 1/vm_memguard_divisor of system memory.  If the KVA is
+	 * smaller than this then the KVA limit comes into play first.
+	 * This prevents memguard's page promotions from completely
+	 * using up memory, since most malloc(9) calls are sub-page.
+	 */
+	memguard_physlimit = (mem_pgs / vm_memguard_divisor) * PAGE_SIZE;
+	/*
+	 * We want as much KVA as we can take safely.  Use at most our
+	 * allotted fraction of kmem_max.  Limit this to twice the
+	 * physical memory to avoid using too much memory as pagetable
+	 * pages.
+	 */
+	memguard_mapsize = km_max / vm_memguard_divisor;
+	/* size must be multiple of PAGE_SIZE */
+	memguard_mapsize = round_page(memguard_mapsize);
+	if (memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs)
+		memguard_mapsize = mem_pgs * 2 * PAGE_SIZE;
+	if (km_size + memguard_mapsize > km_max)
+		return (km_max);
+	return (km_size + memguard_mapsize);
+}
 
 /*
  * Initialize the MemGuard mock allocator.  All objects from MemGuard come
  * out of a single VM map (contiguous chunk of address space).
  */
 void
-memguard_init(vm_map_t parent_map, unsigned long size)
+memguard_init(vm_map_t parent_map)
 {
-	char *base, *limit;
-	int i;
-
-	/* size must be multiple of PAGE_SIZE */
-	size /= PAGE_SIZE;
-	size++;
-	size *= PAGE_SIZE;
+	vm_offset_t base, limit;
 
-	memguard_map = kmem_suballoc(parent_map, (vm_offset_t *)&base,
-	    (vm_offset_t *)&limit, (vm_size_t)size, FALSE);
+	memguard_map = kmem_suballoc(parent_map, &base, &limit,
+	    memguard_mapsize, FALSE);
 	memguard_map->system_map = 1;
-	memguard_mapsize = size;
-	memguard_mapused = 0;
-
-	MEMGUARD_CRIT_SECTION_INIT;
-	MEMGUARD_CRIT_SECTION_ENTER;
-	for (i = 0; i < MAX_PAGES_PER_ITEM; i++) {
-		STAILQ_INIT(&memguard_fifo_pool[i]);
-		memguard_fifo_pool[i].index = i;
-	}
-	MEMGUARD_CRIT_SECTION_EXIT;
+	KASSERT(memguard_mapsize == limit - base,
+	    ("Expected %lu, got %lu", (u_long)memguard_mapsize,
+	     (u_long)(limit - base)));
+	memguard_cursor = base;
 
 	printf("MEMGUARD DEBUGGING ALLOCATOR INITIALIZED:\n");
-	printf("\tMEMGUARD map base: %p\n", base);
-	printf("\tMEMGUARD map limit: %p\n", limit);
-	printf("\tMEMGUARD map size: %ld (Bytes)\n", size);
+	printf("\tMEMGUARD map base: 0x%lx\n", (u_long)base);
+	printf("\tMEMGUARD map limit: 0x%lx\n", (u_long)limit);
+	printf("\tMEMGUARD map size: %jd KBytes\n",
+	    (uintmax_t)memguard_mapsize >> 10);
 }
 
 /*
- * Allocate a single object of specified size with specified flags (either
- * M_WAITOK or M_NOWAIT).
+ * Run things that can't be done as early as memguard_init().
+ */
+static void
+memguard_sysinit(void)
+{
+	struct sysctl_oid_list *parent;
+
+	parent = SYSCTL_STATIC_CHILDREN(_vm_memguard);
+
+	SYSCTL_ADD_ULONG(NULL, parent, OID_AUTO, "mapstart", CTLFLAG_RD,
+	    &memguard_map->min_offset, "MemGuard KVA base");
+	SYSCTL_ADD_ULONG(NULL, parent, OID_AUTO, "maplimit", CTLFLAG_RD,
+	    &memguard_map->max_offset, "MemGuard KVA end");
+	SYSCTL_ADD_ULONG(NULL, parent, OID_AUTO, "mapused", CTLFLAG_RD,
+	    &memguard_map->size, "MemGuard KVA used");
+}
+SYSINIT(memguard, SI_SUB_KLD, SI_ORDER_ANY, memguard_sysinit, NULL);
+
+/*
+ * v2sizep() converts a virtual address of the first page allocated for
+ * an item to a pointer to u_long recording the size of the original
+ * allocation request.
+ *
+ * This routine is very similar to those defined by UMA in uma_int.h.
+ * The difference is that this routine stores the originally allocated
+ * size in one of the page's fields that is unused when the page is
+ * wired rather than the object field, which is used.
+ */
+static u_long *
+v2sizep(vm_offset_t va)
+{
+	vm_paddr_t pa;
+	struct vm_page *p;
+
+	pa = pmap_kextract(va);
+	if (pa == 0)
+		panic("MemGuard detected double-free of %p", (void *)va);
+	p = PHYS_TO_VM_PAGE(pa);
+	KASSERT(p->wire_count != 0 && p->queue == PQ_NONE,
+	    ("MEMGUARD: Expected wired page %p in vtomgfifo!", p));
+	return ((u_long *)&p->pageq.tqe_next);
+}
+
+/*
+ * Allocate a single object of specified size with specified flags
+ * (either M_WAITOK or M_NOWAIT).
  */
 void *
-memguard_alloc(unsigned long size, int flags)
+memguard_alloc(unsigned long req_size, int flags)
 {
-	void *obj;
-	struct memguard_entry *e = NULL;
-	int numpgs;
-
-	numpgs = size / PAGE_SIZE;
-	if ((size % PAGE_SIZE) != 0)
-		numpgs++;
-	if (numpgs > MAX_PAGES_PER_ITEM)
-		panic("MEMGUARD: You must increase MAX_PAGES_PER_ITEM " \
-		    "in memguard.c (requested: %d pages)", numpgs);
-	if (numpgs == 0)
-		return NULL;
-
-	/*
-	 * If we haven't exhausted the memguard_map yet, allocate from
-	 * it and grab a new page, even if we have recycled pages in our
-	 * FIFO.  This is because we wish to allow recycled pages to live
-	 * guarded in the FIFO for as long as possible in order to catch
-	 * even very late tamper-after-frees, even though it means that
-	 * we end up wasting more memory, this is only a DEBUGGING allocator
-	 * after all.
-	 */
-	MEMGUARD_CRIT_SECTION_ENTER;
-	if (memguard_mapused >= memguard_mapsize) {
-		e = STAILQ_FIRST(&memguard_fifo_pool[numpgs - 1]);
-		if (e != NULL) {
-			STAILQ_REMOVE(&memguard_fifo_pool[numpgs - 1], e,
-			    memguard_entry, entries);
-			MEMGUARD_CRIT_SECTION_EXIT;
-			obj = e->ptr;
-			free(e, M_TEMP);
-			memguard_unguard(obj, numpgs);
-			if (flags & M_ZERO)
-				bzero(obj, PAGE_SIZE * numpgs);
-			return obj;
+	vm_offset_t addr;
+	u_long size_p, size_v;
+	int do_guard, rv;
+
+	size_p = round_page(req_size);
+	if (size_p == 0)
+		return (NULL);
+	/*
+	 * To ensure there are holes on both sides of the allocation,
+	 * request 2 extra pages of KVA.  We will only actually add a
+	 * vm_map_entry and get pages for the original request.  Save
+	 * the value of memguard_options so we have a consistent
+	 * value.
+	 */
+	size_v = size_p;
+	do_guard = (memguard_options & MG_GUARD) != 0;
+	if (do_guard)
+		size_v += 2 * PAGE_SIZE;
+
+	vm_map_lock(memguard_map);
+	/*
+	 * When we pass our memory limit, reject sub-page allocations.
+	 * Page-size and larger allocations will use the same amount
+	 * of physical memory whether we allocate or hand off to
+	 * uma_large_alloc(), so keep those.
+	 */
+	if (memguard_map->size >= memguard_physlimit &&
+	    req_size < PAGE_SIZE) {
+		addr = (vm_offset_t)NULL;
+		memguard_fail_pgs++;
+		goto out;
+	}
+	/*
+	 * Keep a moving cursor so we don't recycle KVA as long as
+	 * possible.  It's not perfect, since we don't know in what
+	 * order previous allocations will be free'd, but it's simple
+	 * and fast, and requires O(1) additional storage if guard
+	 * pages are not used.
+	 *
+	 * XXX This scheme will lead to greater fragmentation of the
+	 * map, unless vm_map_findspace() is tweaked.
+	 */
+	for (;;) {
+		rv = vm_map_findspace(memguard_map, memguard_cursor,
+		    size_v, &addr);
+		if (rv == KERN_SUCCESS)
+			break;
+		/*
+		 * The map has no space.  This may be due to
+		 * fragmentation, or because the cursor is near the
+		 * end of the map.
+		 */
+		if (memguard_cursor == vm_map_min(memguard_map)) {
+			memguard_fail_kva++;
+			addr = (vm_offset_t)NULL;
+			goto out;
 		}
-		MEMGUARD_CRIT_SECTION_EXIT;
-		if (flags & M_WAITOK)
-			panic("MEMGUARD: Failed with M_WAITOK: " \
-			    "memguard_map too small");
-		return NULL;
+		memguard_wrap++;
+		memguard_cursor = vm_map_min(memguard_map);
 	}
-	memguard_mapused += (PAGE_SIZE * numpgs);
-	MEMGUARD_CRIT_SECTION_EXIT;
-
-	obj = (void *)kmem_malloc(memguard_map, PAGE_SIZE * numpgs, flags);
-	if (obj != NULL) {
-		vsetmgfifo((vm_offset_t)obj, &memguard_fifo_pool[numpgs - 1]);
-		if (flags & M_ZERO)
-			bzero(obj, PAGE_SIZE * numpgs);
-	} else {
-		MEMGUARD_CRIT_SECTION_ENTER;
-		memguard_mapused -= (PAGE_SIZE * numpgs);
-		MEMGUARD_CRIT_SECTION_EXIT;
+	if (do_guard)
+		addr += PAGE_SIZE;
+	rv = kmem_back(memguard_map, addr, size_p, flags);
+	if (rv != KERN_SUCCESS) {
+		memguard_fail_pgs++;
+		addr = (vm_offset_t)NULL;
+		goto out;
 	}
-	return obj;
+	memguard_cursor = addr + size_p;
+	*v2sizep(trunc_page(addr)) = req_size;
+	memguard_succ++;
+	if (req_size < PAGE_SIZE) {
+		memguard_wasted += (PAGE_SIZE - req_size);
+		if (do_guard) {
+			/*
+			 * Align the request to 16 bytes, and return
+			 * an address near the end of the page, to
+			 * better detect array overrun.
+			 */
+			req_size = roundup2(req_size, 16);
+			addr += (PAGE_SIZE - req_size);
+		}
+	}
+out:
+	vm_map_unlock(memguard_map);
+	return ((void *)addr);
+}
+
+int
+is_memguard_addr(void *addr)
+{
+	vm_offset_t a = (vm_offset_t)(uintptr_t)addr;
+
+	return (a >= memguard_map->min_offset && a < memguard_map->max_offset);
 }
 
 /*
  * Free specified single object.
  */
 void
-memguard_free(void *addr)
+memguard_free(void *ptr)
 {
-	struct memguard_entry *e;
-	struct memguard_fifo *mgfifo;
-	int idx;
-	int *temp;
-
-	addr = (void *)trunc_page((unsigned long)addr);
-
-	/*
-	 * Page should not be guarded by now, so force a write.
-	 * The purpose of this is to increase the likelihood of catching a
-	 * double-free, but not necessarily a tamper-after-free (the second
-	 * thread freeing might not write before freeing, so this forces it
-	 * to and, subsequently, trigger a fault).
-	 */
-	temp = (int *)((unsigned long)addr + (PAGE_SIZE/2)); 	/* in page */
-	*temp = 0xd34dc0d3;
-
-	mgfifo = vtomgfifo((vm_offset_t)addr);
-	idx = mgfifo->index;
-	memguard_guard(addr, idx + 1);
-	e = malloc(sizeof(struct memguard_entry), M_TEMP, M_NOWAIT);
-	if (e == NULL) {
-		MEMGUARD_CRIT_SECTION_ENTER;
-		memguard_mapused -= (PAGE_SIZE * (idx + 1));
-		MEMGUARD_CRIT_SECTION_EXIT;
-		memguard_unguard(addr, idx + 1);	/* just in case */
-		vclrmgfifo((vm_offset_t)addr);
-		kmem_free(memguard_map, (vm_offset_t)addr,
-		    PAGE_SIZE * (idx + 1));
-		return;
-	}
-	e->ptr = addr;
-	MEMGUARD_CRIT_SECTION_ENTER;
-	STAILQ_INSERT_TAIL(mgfifo, e, entries);
-	MEMGUARD_CRIT_SECTION_EXIT;
+	vm_offset_t addr;
+	u_long req_size, size;
+	char *temp;
+	int i;
+
+	addr = trunc_page((uintptr_t)ptr);
+	req_size = *v2sizep(addr);
+	size = round_page(req_size);
+
+	/*
+	 * Page should not be guarded right now, so force a write.
+	 * The purpose of this is to increase the likelihood of
+	 * catching a double-free, but not necessarily a
+	 * tamper-after-free (the second thread freeing might not
+	 * write before freeing, so this forces it to and,
+	 * subsequently, trigger a fault).
+	 */
+	temp = ptr;
+	for (i = 0; i < size; i += PAGE_SIZE)
+		temp[i] = 'M';
+
+	/*
+	 * This requires carnal knowledge of the implementation of
+	 * kmem_free(), but since we've already replaced kmem_malloc()
+	 * above, it's not really any worse.  We want to use the
+	 * vm_map lock to serialize updates to memguard_wasted, since
+	 * we had the lock at increment.
+	 */
+	vm_map_lock(memguard_map);
+	if (req_size < PAGE_SIZE)
+		memguard_wasted -= (PAGE_SIZE - req_size);
+	(void)vm_map_delete(memguard_map, addr, addr + size);
+	vm_map_unlock(memguard_map);
+}
+
+/*
+ * Re-allocate an allocation that was originally guarded.
+ */
+void *
+memguard_realloc(void *addr, unsigned long size, struct malloc_type *mtp,
+    int flags)
+{
+	void *newaddr;
+	u_long old_size;
+
+	/*
+	 * Allocate the new block.  Force the allocation to be guarded
+	 * as the original may have been guarded through random
+	 * chance, and that should be preserved.
+	 */
+	if ((newaddr = memguard_alloc(size, flags)) == NULL)
+		return (NULL);
+
+	/* Copy over original contents. */
+	old_size = *v2sizep(trunc_page((uintptr_t)addr));
+	bcopy(addr, newaddr, min(size, old_size));
+	memguard_free(addr);
+	return (newaddr);
 }
 
 int
-memguard_cmp(struct malloc_type *mtp)
+memguard_cmp(struct malloc_type *mtp, unsigned long size)
 {
 
+	if (size < memguard_minsize) {
+		memguard_minsize_reject++;
+		return (0);
+	}
+	if ((memguard_options & MG_ALLLARGE) != 0 && size >= PAGE_SIZE)
+		return (1);
+	if (memguard_frequency > 0 &&
+	    (random() % 100000) < memguard_frequency) {
+		memguard_frequency_hits++;
+		return (1);
+	}
 #if 1
 	/*
 	 * The safest way of comparsion is to always compare short description
@@ -328,78 +466,3 @@ memguard_cmp(struct malloc_type *mtp)
 	return (0);
 #endif
 }
-
-/*
- * Guard a page containing specified object (make it read-only so that
- * future writes to it fail).
- */
-static void
-memguard_guard(void *addr, int numpgs)
-{
-	void *a = (void *)trunc_page((unsigned long)addr);
-	if (vm_map_protect(memguard_map, (vm_offset_t)a,
-	    (vm_offset_t)((unsigned long)a + (PAGE_SIZE * numpgs)),
-	    VM_PROT_READ, FALSE) != KERN_SUCCESS)
-		panic("MEMGUARD: Unable to guard page!");
-}
-
-/*
- * Unguard a page containing specified object (make it read-and-write to
- * allow full data access).
- */
-static void
-memguard_unguard(void *addr, int numpgs)
-{
-	void *a = (void *)trunc_page((unsigned long)addr);
-	if (vm_map_protect(memguard_map, (vm_offset_t)a,
-	    (vm_offset_t)((unsigned long)a + (PAGE_SIZE * numpgs)),
-	    VM_PROT_DEFAULT, FALSE) != KERN_SUCCESS)
-		panic("MEMGUARD: Unable to unguard page!");
-}
-
-/*
- * vtomgfifo() converts a virtual address of the first page allocated for
- * an item to a memguard_fifo_pool reference for the corresponding item's
- * size.
- *
- * vsetmgfifo() sets a reference in an underlying page for the specified
- * virtual address to an appropriate memguard_fifo_pool.
- *
- * These routines are very similar to those defined by UMA in uma_int.h.
- * The difference is that these routines store the mgfifo in one of the
- * page's fields that is unused when the page is wired rather than the
- * object field, which is used.
- */
-static struct memguard_fifo *
-vtomgfifo(vm_offset_t va)
-{
-	vm_page_t p;
-	struct memguard_fifo *mgfifo;
-
-	p = PHYS_TO_VM_PAGE(pmap_kextract(va));
-	KASSERT(p->wire_count != 0 && p->queue == PQ_NONE,
-	    ("MEMGUARD: Expected wired page in vtomgfifo!"));
-	mgfifo = (struct memguard_fifo *)p->pageq.tqe_next;
-	return mgfifo;
-}
-
-static void
-vsetmgfifo(vm_offset_t va, struct memguard_fifo *mgfifo)
-{
-	vm_page_t p;
-
-	p = PHYS_TO_VM_PAGE(pmap_kextract(va));
-	KASSERT(p->wire_count != 0 && p->queue == PQ_NONE,
-	    ("MEMGUARD: Expected wired page in vsetmgfifo!"));
-	p->pageq.tqe_next = (vm_page_t)mgfifo;
-}
-
-static void vclrmgfifo(vm_offset_t va)
-{
-	vm_page_t p;
-
-	p = PHYS_TO_VM_PAGE(pmap_kextract(va));
-	KASSERT(p->wire_count != 0 && p->queue == PQ_NONE,
-	    ("MEMGUARD: Expected wired page in vclrmgfifo!"));
-	p->pageq.tqe_next = NULL;
-}

Modified: stable/8/sys/vm/memguard.h
==============================================================================
--- stable/8/sys/vm/memguard.h	Fri Sep 10 17:00:48 2010	(r212426)
+++ stable/8/sys/vm/memguard.h	Fri Sep 10 17:26:41 2010	(r212427)
@@ -26,9 +26,30 @@
  * $FreeBSD$
  */
 
-extern u_int vm_memguard_divisor;
+#ifndef _VM_MEMGUARD_H_
+#define	_VM_MEMGUARD_H_
 
-void	memguard_init(vm_map_t parent_map, unsigned long size);
-void 	*memguard_alloc(unsigned long size, int flags);
-void	memguard_free(void *addr);
-int	memguard_cmp(struct malloc_type *mtp);
+#include "opt_vm.h"
+
+struct malloc_type;
+struct vm_map;
+
+#ifdef DEBUG_MEMGUARD
+unsigned long	memguard_fudge(unsigned long, unsigned long);
+void	memguard_init(struct vm_map *);
+void 	*memguard_alloc(unsigned long, int);
+void	*memguard_realloc(void *, unsigned long, struct malloc_type *, int);
+void	memguard_free(void *);
+int	memguard_cmp(struct malloc_type *, unsigned long);
+int	is_memguard_addr(void *);
+#else
+#define	memguard_fudge(size, xxx)	(size)
+#define	memguard_init(map)		do { } while (0)
+#define	memguard_alloc(size, flags)	NULL
+#define	memguard_realloc(a, s, mtp, f)	NULL
+#define	memguard_free(addr)		do { } while (0)
+#define	memguard_cmp(mtp, size)		0
+#define	is_memguard_addr(addr)		0
+#endif
+
+#endif /* _VM_MEMGUARD_H_ */

Modified: stable/8/sys/vm/vm_extern.h
==============================================================================
--- stable/8/sys/vm/vm_extern.h	Fri Sep 10 17:00:48 2010	(r212426)
+++ stable/8/sys/vm/vm_extern.h	Fri Sep 10 17:26:41 2010	(r212427)
@@ -52,6 +52,7 @@ void kmem_free(vm_map_t, vm_offset_t, vm
 void kmem_free_wakeup(vm_map_t, vm_offset_t, vm_size_t);
 void kmem_init(vm_offset_t, vm_offset_t);
 vm_offset_t kmem_malloc(vm_map_t map, vm_size_t size, int flags);
+int kmem_back(vm_map_t, vm_offset_t, vm_size_t, int);
 vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t,
     boolean_t);
 void swapout_procs(int);

Modified: stable/8/sys/vm/vm_kern.c
==============================================================================
--- stable/8/sys/vm/vm_kern.c	Fri Sep 10 17:00:48 2010	(r212426)
+++ stable/8/sys/vm/vm_kern.c	Fri Sep 10 17:26:41 2010	(r212427)
@@ -272,11 +272,8 @@ kmem_malloc(map, size, flags)
 	vm_size_t size;
 	int flags;
 {
-	vm_offset_t offset, i;
-	vm_map_entry_t entry;
 	vm_offset_t addr;
-	vm_page_t m;
-	int pflags;
+	int i, rv;
 
 	size = round_page(size);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list