git: 8978608832c2 - main - amd64: Populate the KMSAN shadow maps and integrate with the VM

Mark Johnston markj at FreeBSD.org
Wed Aug 11 01:31:06 UTC 2021


The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=8978608832c28572bbf5adadb9cfb077e8f15255

commit 8978608832c28572bbf5adadb9cfb077e8f15255
Author:     Mark Johnston <markj at FreeBSD.org>
AuthorDate: 2021-08-10 20:52:36 +0000
Commit:     Mark Johnston <markj at FreeBSD.org>
CommitDate: 2021-08-11 01:27:53 +0000

    amd64: Populate the KMSAN shadow maps and integrate with the VM
    
    - During boot, allocate PDP pages for the shadow maps.  The region above
      KERNBASE is currently not shadowed.
    - Create a dummy shadow for the vm page array.  For now, this array is
      not protected by the shadow map to help reduce kernel memory usage.
    - Grow shadows when growing the kernel map.
    - Increase the default kernel stack size when KMSAN is enabled.  As with
      KASAN, sanitizer instrumentation appears to create stack frames large
      enough that the default value is not sufficient.
    - Disable UMA's use of the direct map when KMSAN is configured.  KMSAN
      cannot validate the direct map.
    - Disable unmapped I/O when KMSAN configured.
    - Lower the limit on paging buffers when KMSAN is configured.  Each
      buffer has a static MAXPHYS-sized allocation of KVA, which in turn
      eats 2*MAXPHYS of space in the shadow map.
    
    Reviewed by:    alc, kib
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D31295
---
 sys/amd64/amd64/pmap.c      | 122 +++++++++++++++++++++++++++++++++++++++++++-
 sys/amd64/include/param.h   |   2 +-
 sys/amd64/include/vmparam.h |   2 +-
 sys/kern/kern_malloc.c      |   8 +--
 sys/kern/vfs_bio.c          |  15 ++++--
 sys/vm/vm_pager.c           |   9 ++++
 6 files changed, 148 insertions(+), 10 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index d7a5e16d3dd6..aae35c5d7e07 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -122,6 +122,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
+#include <sys/msan.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rangeset.h>
@@ -161,8 +162,8 @@ __FBSDID("$FreeBSD$");
 #include <x86/ifunc.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
-#include <machine/intr_machdep.h>
 #include <machine/md_var.h>
+#include <machine/msan.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #ifdef SMP
@@ -430,6 +431,17 @@ u_int64_t		KPML5phys;	/* phys addr of kernel level 5,
 #ifdef KASAN
 static uint64_t		KASANPDPphys;
 #endif
+#ifdef KMSAN
+static uint64_t		KMSANSHADPDPphys;
+static uint64_t		KMSANORIGPDPphys;
+
+/*
+ * To support systems with large amounts of memory, it is necessary to extend
+ * the maximum size of the direct map.  This could eat into the space reserved
+ * for the shadow map.
+ */
+_Static_assert(DMPML4I + NDMPML4E <= KMSANSHADPML4I, "direct map overflow");
+#endif
 
 static pml4_entry_t	*kernel_pml4;
 static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
@@ -1682,13 +1694,21 @@ create_pagetables(vm_paddr_t *firstaddr)
 		DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g);
 	dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
 
-	/* Allocate pages */
+	/* Allocate pages. */
 	KPML4phys = allocpages(firstaddr, 1);
 	KPDPphys = allocpages(firstaddr, NKPML4E);
 #ifdef KASAN
 	KASANPDPphys = allocpages(firstaddr, NKASANPML4E);
 	KASANPDphys = allocpages(firstaddr, 1);
 #endif
+#ifdef KMSAN
+	/*
+	 * The KMSAN shadow maps are initially left unpopulated, since there is
+	 * no need to shadow memory above KERNBASE.
+	 */
+	KMSANSHADPDPphys = allocpages(firstaddr, NKMSANSHADPML4E);
+	KMSANORIGPDPphys = allocpages(firstaddr, NKMSANORIGPML4E);
+#endif
 
 	/*
 	 * Allocate the initial number of kernel page table pages required to
@@ -1826,6 +1846,20 @@ create_pagetables(vm_paddr_t *firstaddr)
 	}
 #endif
 
+#ifdef KMSAN
+	/* Connect the KMSAN shadow map slots up to the PML4. */
+	for (i = 0; i < NKMSANSHADPML4E; i++) {
+		p4_p[KMSANSHADPML4I + i] = KMSANSHADPDPphys + ptoa(i);
+		p4_p[KMSANSHADPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+	}
+
+	/* Connect the KMSAN origin map slots up to the PML4. */
+	for (i = 0; i < NKMSANORIGPML4E; i++) {
+		p4_p[KMSANORIGPML4I + i] = KMSANORIGPDPphys + ptoa(i);
+		p4_p[KMSANORIGPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+	}
+#endif
+
 	/* Connect the Direct Map slots up to the PML4. */
 	for (i = 0; i < ndmpdpphys; i++) {
 		p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
@@ -2498,6 +2532,14 @@ pmap_init(void)
 	TUNABLE_INT_FETCH("vm.pmap.large_map_pml4_entries", &lm_ents);
 	if (lm_ents > LMEPML4I - LMSPML4I + 1)
 		lm_ents = LMEPML4I - LMSPML4I + 1;
+#ifdef KMSAN
+	if (lm_ents > KMSANORIGPML4I - LMSPML4I) {
+		printf(
+	    "pmap: shrinking large map for KMSAN (%d slots to %ld slots)\n",
+		    lm_ents, KMSANORIGPML4I - LMSPML4I);
+		lm_ents = KMSANORIGPML4I - LMSPML4I;
+	}
+#endif
 	if (bootverbose)
 		printf("pmap: large map %u PML4 slots (%lu GB)\n",
 		    lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024));
@@ -4186,6 +4228,16 @@ pmap_pinit_pml4(vm_page_t pml4pg)
 		pm_pml4[KASANPML4I + i] = (KASANPDPphys + ptoa(i)) | X86_PG_RW |
 		    X86_PG_V | pg_nx;
 	}
+#endif
+#ifdef KMSAN
+	for (i = 0; i < NKMSANSHADPML4E; i++) {
+		pm_pml4[KMSANSHADPML4I + i] = (KMSANSHADPDPphys + ptoa(i)) |
+		    X86_PG_RW | X86_PG_V | pg_nx;
+	}
+	for (i = 0; i < NKMSANORIGPML4E; i++) {
+		pm_pml4[KMSANORIGPML4I + i] = (KMSANORIGPDPphys + ptoa(i)) |
+		    X86_PG_RW | X86_PG_V | pg_nx;
+	}
 #endif
 	for (i = 0; i < ndmpdpphys; i++) {
 		pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW |
@@ -4772,6 +4824,12 @@ pmap_release(pmap_t pmap)
 #ifdef KASAN
 		for (i = 0; i < NKASANPML4E; i++) /* KASAN shadow map */
 			pmap->pm_pmltop[KASANPML4I + i] = 0;
+#endif
+#ifdef KMSAN
+		for (i = 0; i < NKMSANSHADPML4E; i++) /* KMSAN shadow map */
+			pmap->pm_pmltop[KMSANSHADPML4I + i] = 0;
+		for (i = 0; i < NKMSANORIGPML4E; i++) /* KMSAN shadow map */
+			pmap->pm_pmltop[KMSANORIGPML4I + i] = 0;
 #endif
 		for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
 			pmap->pm_pmltop[DMPML4I + i] = 0;
@@ -4814,6 +4872,60 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
     0, 0, kvm_free, "LU",
     "Amount of KVM free");
 
+#ifdef KMSAN
+static void
+pmap_kmsan_shadow_map_page_array(vm_paddr_t pdppa, vm_size_t size)
+{
+	pdp_entry_t *pdpe;
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	vm_paddr_t dummypa, dummypd, dummypt;
+	int i, npde, npdpg;
+
+	npdpg = howmany(size, NBPDP);
+	npde = size / NBPDR;
+
+	dummypa = vm_phys_early_alloc(-1, PAGE_SIZE);
+	pagezero((void *)PHYS_TO_DMAP(dummypa));
+
+	dummypt = vm_phys_early_alloc(-1, PAGE_SIZE);
+	pagezero((void *)PHYS_TO_DMAP(dummypt));
+	dummypd = vm_phys_early_alloc(-1, PAGE_SIZE * npdpg);
+	for (i = 0; i < npdpg; i++)
+		pagezero((void *)PHYS_TO_DMAP(dummypd + ptoa(i)));
+
+	pte = (pt_entry_t *)PHYS_TO_DMAP(dummypt);
+	for (i = 0; i < NPTEPG; i++)
+		pte[i] = (pt_entry_t)(dummypa | X86_PG_V | X86_PG_RW |
+		    X86_PG_A | X86_PG_M | pg_nx);
+
+	pde = (pd_entry_t *)PHYS_TO_DMAP(dummypd);
+	for (i = 0; i < npde; i++)
+		pde[i] = (pd_entry_t)(dummypt | X86_PG_V | X86_PG_RW | pg_nx);
+
+	pdpe = (pdp_entry_t *)PHYS_TO_DMAP(pdppa);
+	for (i = 0; i < npdpg; i++)
+		pdpe[i] = (pdp_entry_t)(dummypd + ptoa(i) | X86_PG_V |
+		    X86_PG_RW | pg_nx);
+}
+
+static void
+pmap_kmsan_page_array_startup(vm_offset_t start, vm_offset_t end)
+{
+	vm_size_t size;
+
+	KASSERT(start % NBPDP == 0, ("unaligned page array start address"));
+
+	/*
+	 * The end of the page array's KVA region is 2MB aligned, see
+	 * kmem_init().
+	 */
+	size = round_2mpage(end) - start;
+	pmap_kmsan_shadow_map_page_array(KMSANSHADPDPphys, size);
+	pmap_kmsan_shadow_map_page_array(KMSANORIGPDPphys, size);
+}
+#endif
+
 /*
  * Allocate physical memory for the vm_page array and map it into KVA,
  * attempting to back the vm_pages with domain-local memory.
@@ -4854,6 +4966,10 @@ pmap_page_array_startup(long pages)
 		pde_store(pde, newpdir);
 	}
 	vm_page_array = (vm_page_t)start;
+
+#ifdef KMSAN
+	pmap_kmsan_page_array_startup(start, end);
+#endif
 }
 
 /*
@@ -4892,6 +5008,8 @@ pmap_growkernel(vm_offset_t addr)
 		addr = vm_map_max(kernel_map);
 	if (kernel_vm_end < addr)
 		kasan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
+	if (kernel_vm_end < addr)
+		kmsan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
 	while (kernel_vm_end < addr) {
 		pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
 		if ((*pdpe & X86_PG_V) == 0) {
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index cf1d2bd0a586..a76be23bbe91 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -134,7 +134,7 @@
 #define	IOPERM_BITMAP_SIZE	(IOPAGES * PAGE_SIZE + 1)
 
 #ifndef	KSTACK_PAGES
-#ifdef KASAN
+#if defined(KASAN) || defined(KMSAN)
 #define	KSTACK_PAGES	6
 #else
 #define	KSTACK_PAGES	4	/* pages of kstack (with pcb) */
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index 61d0dea54210..6cb8b3f0071a 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -75,7 +75,7 @@
  * of the direct mapped segment.  This uses 2MB pages for reduced
  * TLB pressure.
  */
-#ifndef KASAN
+#if !defined(KASAN) && !defined(KMSAN)
 #define	UMA_MD_SMALL_ALLOC
 #endif
 
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 0c2f1689d194..b30139830a1b 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -1175,13 +1175,15 @@ kmeminit(void)
 
 	vm_kmem_size = round_page(vm_kmem_size);
 
-#ifdef KASAN
 	/*
-	 * With KASAN enabled, dynamically allocated kernel memory is shadowed.
-	 * Account for this when setting the UMA limit.
+	 * With KASAN or KMSAN enabled, dynamically allocated kernel memory is
+	 * shadowed.  Account for this when setting the UMA limit.
 	 */
+#if defined(KASAN)
 	vm_kmem_size = (vm_kmem_size * KASAN_SHADOW_SCALE) /
 	    (KASAN_SHADOW_SCALE + 1);
+#elif defined(KMSAN)
+	vm_kmem_size /= 3;
 #endif
 
 #ifdef DEBUG_MEMGUARD
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 859ce3b58285..174892b374d1 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1044,13 +1044,22 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 	int tuned_nbuf;
 	long maxbuf, maxbuf_sz, buf_sz,	biotmap_sz;
 
-#ifdef KASAN
 	/*
-	 * With KASAN enabled, the kernel map is shadowed.  Account for this
-	 * when sizing maps based on the amount of physical memory available.
+	 * With KASAN or KMSAN enabled, the kernel map is shadowed.  Account for
+	 * this when sizing maps based on the amount of physical memory
+	 * available.
 	 */
+#if defined(KASAN)
 	physmem_est = (physmem_est * KASAN_SHADOW_SCALE) /
 	    (KASAN_SHADOW_SCALE + 1);
+#elif defined(KMSAN)
+	physmem_est /= 3;
+
+	/*
+	 * KMSAN cannot reliably determine whether buffer data is initialized
+	 * unless it is updated through a KVA mapping.
+	 */
+	unmapped_buf_allowed = 0;
 #endif
 
 	/*
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index 640e3d977e99..69f0a2dc2bbb 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -217,6 +217,15 @@ pbuf_zsecond_create(const char *name, int max)
 
 	zone = uma_zsecond_create(name, pbuf_ctor, pbuf_dtor, NULL, NULL,
 	    pbuf_zone);
+
+#ifdef KMSAN
+	/*
+	 * Shrink the size of the pbuf pools if KMSAN is enabled, otherwise the
+	 * shadows of the large KVA allocations eat up too much memory.
+	 */
+	max /= 3;
+#endif
+
 	/*
 	 * uma_prealloc() rounds up to items per slab. If we would prealloc
 	 * immediately on every pbuf_zsecond_create(), we may accumulate too


More information about the dev-commits-src-all mailing list