git: 8978608832c2 - main - amd64: Populate the KMSAN shadow maps and integrate with the VM
Mark Johnston
markj at FreeBSD.org
Wed Aug 11 01:31:06 UTC 2021
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=8978608832c28572bbf5adadb9cfb077e8f15255
commit 8978608832c28572bbf5adadb9cfb077e8f15255
Author: Mark Johnston <markj at FreeBSD.org>
AuthorDate: 2021-08-10 20:52:36 +0000
Commit: Mark Johnston <markj at FreeBSD.org>
CommitDate: 2021-08-11 01:27:53 +0000
amd64: Populate the KMSAN shadow maps and integrate with the VM
- During boot, allocate PDP pages for the shadow maps. The region above
KERNBASE is currently not shadowed.
- Create a dummy shadow for the vm page array. For now, this array is
not protected by the shadow map to help reduce kernel memory usage.
- Grow shadows when growing the kernel map.
- Increase the default kernel stack size when KMSAN is enabled. As with
KASAN, sanitizer instrumentation appears to create stack frames large
enough that the default value is not sufficient.
- Disable UMA's use of the direct map when KMSAN is configured. KMSAN
cannot validate the direct map.
- Disable unmapped I/O when KMSAN configured.
- Lower the limit on paging buffers when KMSAN is configured. Each
buffer has a static MAXPHYS-sized allocation of KVA, which in turn
eats 2*MAXPHYS of space in the shadow map.
Reviewed by: alc, kib
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D31295
---
sys/amd64/amd64/pmap.c | 122 +++++++++++++++++++++++++++++++++++++++++++-
sys/amd64/include/param.h | 2 +-
sys/amd64/include/vmparam.h | 2 +-
sys/kern/kern_malloc.c | 8 +--
sys/kern/vfs_bio.c | 15 ++++--
sys/vm/vm_pager.c | 9 ++++
6 files changed, 148 insertions(+), 10 deletions(-)
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index d7a5e16d3dd6..aae35c5d7e07 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -122,6 +122,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
+#include <sys/msan.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rangeset.h>
@@ -161,8 +162,8 @@ __FBSDID("$FreeBSD$");
#include <x86/ifunc.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
-#include <machine/intr_machdep.h>
#include <machine/md_var.h>
+#include <machine/msan.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
#ifdef SMP
@@ -430,6 +431,17 @@ u_int64_t KPML5phys; /* phys addr of kernel level 5,
#ifdef KASAN
static uint64_t KASANPDPphys;
#endif
+#ifdef KMSAN
+static uint64_t KMSANSHADPDPphys;
+static uint64_t KMSANORIGPDPphys;
+
+/*
+ * To support systems with large amounts of memory, it is necessary to extend
+ * the maximum size of the direct map. This could eat into the space reserved
+ * for the shadow map.
+ */
+_Static_assert(DMPML4I + NDMPML4E <= KMSANSHADPML4I, "direct map overflow");
+#endif
static pml4_entry_t *kernel_pml4;
static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */
@@ -1682,13 +1694,21 @@ create_pagetables(vm_paddr_t *firstaddr)
DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g);
dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
- /* Allocate pages */
+ /* Allocate pages. */
KPML4phys = allocpages(firstaddr, 1);
KPDPphys = allocpages(firstaddr, NKPML4E);
#ifdef KASAN
KASANPDPphys = allocpages(firstaddr, NKASANPML4E);
KASANPDphys = allocpages(firstaddr, 1);
#endif
+#ifdef KMSAN
+ /*
+ * The KMSAN shadow maps are initially left unpopulated, since there is
+ * no need to shadow memory above KERNBASE.
+ */
+ KMSANSHADPDPphys = allocpages(firstaddr, NKMSANSHADPML4E);
+ KMSANORIGPDPphys = allocpages(firstaddr, NKMSANORIGPML4E);
+#endif
/*
* Allocate the initial number of kernel page table pages required to
@@ -1826,6 +1846,20 @@ create_pagetables(vm_paddr_t *firstaddr)
}
#endif
+#ifdef KMSAN
+ /* Connect the KMSAN shadow map slots up to the PML4. */
+ for (i = 0; i < NKMSANSHADPML4E; i++) {
+ p4_p[KMSANSHADPML4I + i] = KMSANSHADPDPphys + ptoa(i);
+ p4_p[KMSANSHADPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+ }
+
+ /* Connect the KMSAN origin map slots up to the PML4. */
+ for (i = 0; i < NKMSANORIGPML4E; i++) {
+ p4_p[KMSANORIGPML4I + i] = KMSANORIGPDPphys + ptoa(i);
+ p4_p[KMSANORIGPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+ }
+#endif
+
/* Connect the Direct Map slots up to the PML4. */
for (i = 0; i < ndmpdpphys; i++) {
p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
@@ -2498,6 +2532,14 @@ pmap_init(void)
TUNABLE_INT_FETCH("vm.pmap.large_map_pml4_entries", &lm_ents);
if (lm_ents > LMEPML4I - LMSPML4I + 1)
lm_ents = LMEPML4I - LMSPML4I + 1;
+#ifdef KMSAN
+ if (lm_ents > KMSANORIGPML4I - LMSPML4I) {
+ printf(
+ "pmap: shrinking large map for KMSAN (%d slots to %ld slots)\n",
+ lm_ents, KMSANORIGPML4I - LMSPML4I);
+ lm_ents = KMSANORIGPML4I - LMSPML4I;
+ }
+#endif
if (bootverbose)
printf("pmap: large map %u PML4 slots (%lu GB)\n",
lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024));
@@ -4186,6 +4228,16 @@ pmap_pinit_pml4(vm_page_t pml4pg)
pm_pml4[KASANPML4I + i] = (KASANPDPphys + ptoa(i)) | X86_PG_RW |
X86_PG_V | pg_nx;
}
+#endif
+#ifdef KMSAN
+ for (i = 0; i < NKMSANSHADPML4E; i++) {
+ pm_pml4[KMSANSHADPML4I + i] = (KMSANSHADPDPphys + ptoa(i)) |
+ X86_PG_RW | X86_PG_V | pg_nx;
+ }
+ for (i = 0; i < NKMSANORIGPML4E; i++) {
+ pm_pml4[KMSANORIGPML4I + i] = (KMSANORIGPDPphys + ptoa(i)) |
+ X86_PG_RW | X86_PG_V | pg_nx;
+ }
#endif
for (i = 0; i < ndmpdpphys; i++) {
pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW |
@@ -4772,6 +4824,12 @@ pmap_release(pmap_t pmap)
#ifdef KASAN
for (i = 0; i < NKASANPML4E; i++) /* KASAN shadow map */
pmap->pm_pmltop[KASANPML4I + i] = 0;
+#endif
+#ifdef KMSAN
+ for (i = 0; i < NKMSANSHADPML4E; i++) /* KMSAN shadow map */
+ pmap->pm_pmltop[KMSANSHADPML4I + i] = 0;
+ for (i = 0; i < NKMSANORIGPML4E; i++) /* KMSAN shadow map */
+ pmap->pm_pmltop[KMSANORIGPML4I + i] = 0;
#endif
for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
pmap->pm_pmltop[DMPML4I + i] = 0;
@@ -4814,6 +4872,60 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
0, 0, kvm_free, "LU",
"Amount of KVM free");
+#ifdef KMSAN
+static void
+pmap_kmsan_shadow_map_page_array(vm_paddr_t pdppa, vm_size_t size)
+{
+ pdp_entry_t *pdpe;
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+ vm_paddr_t dummypa, dummypd, dummypt;
+ int i, npde, npdpg;
+
+ npdpg = howmany(size, NBPDP);
+ npde = size / NBPDR;
+
+ dummypa = vm_phys_early_alloc(-1, PAGE_SIZE);
+ pagezero((void *)PHYS_TO_DMAP(dummypa));
+
+ dummypt = vm_phys_early_alloc(-1, PAGE_SIZE);
+ pagezero((void *)PHYS_TO_DMAP(dummypt));
+ dummypd = vm_phys_early_alloc(-1, PAGE_SIZE * npdpg);
+ for (i = 0; i < npdpg; i++)
+ pagezero((void *)PHYS_TO_DMAP(dummypd + ptoa(i)));
+
+ pte = (pt_entry_t *)PHYS_TO_DMAP(dummypt);
+ for (i = 0; i < NPTEPG; i++)
+ pte[i] = (pt_entry_t)(dummypa | X86_PG_V | X86_PG_RW |
+ X86_PG_A | X86_PG_M | pg_nx);
+
+ pde = (pd_entry_t *)PHYS_TO_DMAP(dummypd);
+ for (i = 0; i < npde; i++)
+ pde[i] = (pd_entry_t)(dummypt | X86_PG_V | X86_PG_RW | pg_nx);
+
+ pdpe = (pdp_entry_t *)PHYS_TO_DMAP(pdppa);
+ for (i = 0; i < npdpg; i++)
+ pdpe[i] = (pdp_entry_t)(dummypd + ptoa(i) | X86_PG_V |
+ X86_PG_RW | pg_nx);
+}
+
+static void
+pmap_kmsan_page_array_startup(vm_offset_t start, vm_offset_t end)
+{
+ vm_size_t size;
+
+ KASSERT(start % NBPDP == 0, ("unaligned page array start address"));
+
+ /*
+ * The end of the page array's KVA region is 2MB aligned, see
+ * kmem_init().
+ */
+ size = round_2mpage(end) - start;
+ pmap_kmsan_shadow_map_page_array(KMSANSHADPDPphys, size);
+ pmap_kmsan_shadow_map_page_array(KMSANORIGPDPphys, size);
+}
+#endif
+
/*
* Allocate physical memory for the vm_page array and map it into KVA,
* attempting to back the vm_pages with domain-local memory.
@@ -4854,6 +4966,10 @@ pmap_page_array_startup(long pages)
pde_store(pde, newpdir);
}
vm_page_array = (vm_page_t)start;
+
+#ifdef KMSAN
+ pmap_kmsan_page_array_startup(start, end);
+#endif
}
/*
@@ -4892,6 +5008,8 @@ pmap_growkernel(vm_offset_t addr)
addr = vm_map_max(kernel_map);
if (kernel_vm_end < addr)
kasan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
+ if (kernel_vm_end < addr)
+ kmsan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
while (kernel_vm_end < addr) {
pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
if ((*pdpe & X86_PG_V) == 0) {
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index cf1d2bd0a586..a76be23bbe91 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -134,7 +134,7 @@
#define IOPERM_BITMAP_SIZE (IOPAGES * PAGE_SIZE + 1)
#ifndef KSTACK_PAGES
-#ifdef KASAN
+#if defined(KASAN) || defined(KMSAN)
#define KSTACK_PAGES 6
#else
#define KSTACK_PAGES 4 /* pages of kstack (with pcb) */
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index 61d0dea54210..6cb8b3f0071a 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -75,7 +75,7 @@
* of the direct mapped segment. This uses 2MB pages for reduced
* TLB pressure.
*/
-#ifndef KASAN
+#if !defined(KASAN) && !defined(KMSAN)
#define UMA_MD_SMALL_ALLOC
#endif
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 0c2f1689d194..b30139830a1b 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -1175,13 +1175,15 @@ kmeminit(void)
vm_kmem_size = round_page(vm_kmem_size);
-#ifdef KASAN
/*
- * With KASAN enabled, dynamically allocated kernel memory is shadowed.
- * Account for this when setting the UMA limit.
+ * With KASAN or KMSAN enabled, dynamically allocated kernel memory is
+ * shadowed. Account for this when setting the UMA limit.
*/
+#if defined(KASAN)
vm_kmem_size = (vm_kmem_size * KASAN_SHADOW_SCALE) /
(KASAN_SHADOW_SCALE + 1);
+#elif defined(KMSAN)
+ vm_kmem_size /= 3;
#endif
#ifdef DEBUG_MEMGUARD
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 859ce3b58285..174892b374d1 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1044,13 +1044,22 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
int tuned_nbuf;
long maxbuf, maxbuf_sz, buf_sz, biotmap_sz;
-#ifdef KASAN
/*
- * With KASAN enabled, the kernel map is shadowed. Account for this
- * when sizing maps based on the amount of physical memory available.
+ * With KASAN or KMSAN enabled, the kernel map is shadowed. Account for
+ * this when sizing maps based on the amount of physical memory
+ * available.
*/
+#if defined(KASAN)
physmem_est = (physmem_est * KASAN_SHADOW_SCALE) /
(KASAN_SHADOW_SCALE + 1);
+#elif defined(KMSAN)
+ physmem_est /= 3;
+
+ /*
+ * KMSAN cannot reliably determine whether buffer data is initialized
+ * unless it is updated through a KVA mapping.
+ */
+ unmapped_buf_allowed = 0;
#endif
/*
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index 640e3d977e99..69f0a2dc2bbb 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -217,6 +217,15 @@ pbuf_zsecond_create(const char *name, int max)
zone = uma_zsecond_create(name, pbuf_ctor, pbuf_dtor, NULL, NULL,
pbuf_zone);
+
+#ifdef KMSAN
+ /*
+ * Shrink the size of the pbuf pools if KMSAN is enabled, otherwise the
+ * shadows of the large KVA allocations eat up too much memory.
+ */
+ max /= 3;
+#endif
+
/*
* uma_prealloc() rounds up to items per slab. If we would prealloc
* immediately on every pbuf_zsecond_create(), we may accumulate too
More information about the dev-commits-src-all
mailing list