git: 6faf45b34b14 - main - amd64: Implement a KASAN shadow map

Mark Johnston markj at FreeBSD.org
Tue Apr 13 21:42:34 UTC 2021


The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=6faf45b34b14da5f138774b43ec14fb5567ac584

commit 6faf45b34b14da5f138774b43ec14fb5567ac584
Author:     Mark Johnston <markj at FreeBSD.org>
AuthorDate: 2021-04-13 20:30:05 +0000
Commit:     Mark Johnston <markj at FreeBSD.org>
CommitDate: 2021-04-13 21:42:20 +0000

    amd64: Implement a KASAN shadow map
    
    The idea behind KASAN is to use a region of memory to track the validity
    of buffers in the kernel map.  This region is the shadow map.  The
    compiler inserts calls to the KASAN runtime for every emitted load
    and store, and the runtime uses the shadow map to decide whether the
    access is valid.  Various kernel allocators call kasan_mark() to update
    the shadow map.
    
    Since the shadow map tracks only accesses to the kernel map, accesses to
    other kernel maps are not validated by KASAN.  UMA_MD_SMALL_ALLOC is
    disabled when KASAN is configured to reduce usage of the direct map.
    Currently we have no mechanism to completely eliminate uses of the
    direct map, so KASAN's coverage is not comprehensive.
    
    The shadow map uses one byte per eight bytes in the kernel map.  In
    pmap_bootstrap() we create an initial set of page tables for the kernel
    and preloaded data.
    
    When pmap_growkernel() is called, we call kasan_shadow_map() to extend
    the shadow map.  kasan_shadow_map() uses pmap_kasan_enter() to allocate
    memory for the shadow region and map it.
    
    Reviewed by:    kib
    MFC after:      2 weeks
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D29417
---
 sys/amd64/amd64/pmap.c      | 142 +++++++++++++++++++++++++++++++++++++++++++-
 sys/amd64/include/asan.h    |  71 ++++++++++++++++++++++
 sys/amd64/include/pmap.h    |  15 ++++-
 sys/amd64/include/vmparam.h |   8 ++-
 sys/kern/kern_malloc.c      |  10 ++++
 sys/kern/vfs_bio.c          |  10 ++++
 6 files changed, 252 insertions(+), 4 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index aa8810e05b63..77c486c0d00a 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -112,6 +112,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_vm.h"
 
 #include <sys/param.h>
+#include <sys/asan.h>
 #include <sys/bitstring.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
@@ -154,6 +155,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_dumpset.h>
 #include <vm/uma.h>
 
+#include <machine/asan.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <x86/ifunc.h>
@@ -425,6 +427,10 @@ u_int64_t		KPML4phys;	/* phys addr of kernel level 4 */
 u_int64_t		KPML5phys;	/* phys addr of kernel level 5,
 					   if supported */
 
+#ifdef KASAN
+static uint64_t		KASANPDPphys;
+#endif
+
 static pml4_entry_t	*kernel_pml4;
 static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
 static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
@@ -1626,11 +1632,17 @@ bootaddr_rwx(vm_paddr_t pa)
 static void
 create_pagetables(vm_paddr_t *firstaddr)
 {
-	int i, j, ndm1g, nkpdpe, nkdmpde;
 	pd_entry_t *pd_p;
 	pdp_entry_t *pdp_p;
 	pml4_entry_t *p4_p;
 	uint64_t DMPDkernphys;
+#ifdef KASAN
+	pt_entry_t *pt_p;
+	uint64_t KASANPDphys, KASANPTphys, KASANphys;
+	vm_offset_t kasankernbase;
+	int kasankpdpi, kasankpdi, nkasanpte;
+#endif
+	int i, j, ndm1g, nkpdpe, nkdmpde;
 
 	/* Allocate page table pages for the direct map */
 	ndmpdp = howmany(ptoa(Maxmem), NBPDP);
@@ -1670,6 +1682,10 @@ create_pagetables(vm_paddr_t *firstaddr)
 	/* Allocate pages */
 	KPML4phys = allocpages(firstaddr, 1);
 	KPDPphys = allocpages(firstaddr, NKPML4E);
+#ifdef KASAN
+	KASANPDPphys = allocpages(firstaddr, NKASANPML4E);
+	KASANPDphys = allocpages(firstaddr, 1);
+#endif
 
 	/*
 	 * Allocate the initial number of kernel page table pages required to
@@ -1687,6 +1703,12 @@ create_pagetables(vm_paddr_t *firstaddr)
 	KPTphys = allocpages(firstaddr, nkpt);
 	KPDphys = allocpages(firstaddr, nkpdpe);
 
+#ifdef KASAN
+	nkasanpte = howmany(nkpt, KASAN_SHADOW_SCALE);
+	KASANPTphys = allocpages(firstaddr, nkasanpte);
+	KASANphys = allocpages(firstaddr, nkasanpte * NPTEPG);
+#endif
+
 	/*
 	 * Connect the zero-filled PT pages to their PD entries.  This
 	 * implicitly maps the PT pages at their correct locations within
@@ -1719,6 +1741,25 @@ create_pagetables(vm_paddr_t *firstaddr)
 	for (i = 0; i < nkpdpe; i++)
 		pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V;
 
+#ifdef KASAN
+	kasankernbase = kasan_md_addr_to_shad(KERNBASE);
+	kasankpdpi = pmap_pdpe_index(kasankernbase);
+	kasankpdi = pmap_pde_index(kasankernbase);
+
+	pdp_p = (pdp_entry_t *)KASANPDPphys;
+	pdp_p[kasankpdpi] = (KASANPDphys | X86_PG_RW | X86_PG_V | pg_nx);
+
+	pd_p = (pd_entry_t *)KASANPDphys;
+	for (i = 0; i < nkasanpte; i++)
+		pd_p[i + kasankpdi] = (KASANPTphys + ptoa(i)) | X86_PG_RW |
+		    X86_PG_V | pg_nx;
+
+	pt_p = (pt_entry_t *)KASANPTphys;
+	for (i = 0; i < nkasanpte * NPTEPG; i++)
+		pt_p[i] = (KASANphys + ptoa(i)) | X86_PG_RW | X86_PG_V |
+		    X86_PG_M | X86_PG_A | pg_nx;
+#endif
+
 	/*
 	 * Now, set up the direct map region using 2MB and/or 1GB pages.  If
 	 * the end of physical memory is not aligned to a 1GB page boundary,
@@ -1767,7 +1808,15 @@ create_pagetables(vm_paddr_t *firstaddr)
 	p4_p[PML4PML4I] = KPML4phys;
 	p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx;
 
-	/* Connect the Direct Map slot(s) up to the PML4. */
+#ifdef KASAN
+	/* Connect the KASAN shadow map slots up to the PML4. */
+	for (i = 0; i < NKASANPML4E; i++) {
+		p4_p[KASANPML4I + i] = KASANPDPphys + ptoa(i);
+		p4_p[KASANPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
+	}
+#endif
+
+	/* Connect the Direct Map slots up to the PML4. */
 	for (i = 0; i < ndmpdpphys; i++) {
 		p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
 		p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
@@ -4131,6 +4180,12 @@ pmap_pinit_pml4(vm_page_t pml4pg)
 		pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) | X86_PG_RW |
 		    X86_PG_V;
 	}
+#ifdef KASAN
+	for (i = 0; i < NKASANPML4E; i++) {
+		pm_pml4[KASANPML4I + i] = (KASANPDPphys + ptoa(i)) | X86_PG_RW |
+		    X86_PG_V | pg_nx;
+	}
+#endif
 	for (i = 0; i < ndmpdpphys; i++) {
 		pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW |
 		    X86_PG_V;
@@ -4713,6 +4768,10 @@ pmap_release(pmap_t pmap)
 	} else {
 		for (i = 0; i < NKPML4E; i++)	/* KVA */
 			pmap->pm_pmltop[KPML4BASE + i] = 0;
+#ifdef KASAN
+		for (i = 0; i < NKASANPML4E; i++) /* KASAN shadow map */
+			pmap->pm_pmltop[KASANPML4I + i] = 0;
+#endif
 		for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
 			pmap->pm_pmltop[DMPML4I + i] = 0;
 		pmap->pm_pmltop[PML4PML4I] = 0;	/* Recursive Mapping */
@@ -4830,6 +4889,8 @@ pmap_growkernel(vm_offset_t addr)
 	addr = roundup2(addr, NBPDR);
 	if (addr - 1 >= vm_map_max(kernel_map))
 		addr = vm_map_max(kernel_map);
+	if (kernel_vm_end < addr)
+		kasan_shadow_map((void *)kernel_vm_end, addr - kernel_vm_end);
 	while (kernel_vm_end < addr) {
 		pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
 		if ((*pdpe & X86_PG_V) == 0) {
@@ -11190,6 +11251,78 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 	return (error);
 }
 
+#ifdef KASAN
+static vm_page_t
+pmap_kasan_enter_alloc_4k(void)
+{
+	vm_page_t m;
+
+	m = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
+	    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+	if (m == NULL)
+		panic("%s: no memory to grow shadow map", __func__);
+	if ((m->flags & PG_ZERO) == 0)
+		pmap_zero_page(m);
+	return (m);
+}
+
+static vm_page_t
+pmap_kasan_enter_alloc_2m(void)
+{
+	vm_page_t m;
+
+	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+	    VM_ALLOC_WIRED, NPTEPG, 0, ~0ul, NBPDR, 0, VM_MEMATTR_DEFAULT);
+	if (m != NULL)
+		memset((void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), 0, NBPDR);
+	return (m);
+}
+
+/*
+ * Grow the shadow map by at least one 4KB page at the specified address.  Use
+ * 2MB pages when possible.
+ */
+void
+pmap_kasan_enter(vm_offset_t va)
+{
+	pdp_entry_t *pdpe;
+	pd_entry_t *pde;
+	pt_entry_t *pte;
+	vm_page_t m;
+
+	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+
+	pdpe = pmap_pdpe(kernel_pmap, va);
+	if ((*pdpe & X86_PG_V) == 0) {
+		m = pmap_kasan_enter_alloc_4k();
+		*pdpe = (pdp_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW |
+		    X86_PG_V | pg_nx);
+	}
+	pde = pmap_pdpe_to_pde(pdpe, va);
+	if ((*pde & X86_PG_V) == 0) {
+		m = pmap_kasan_enter_alloc_2m();
+		if (m != NULL) {
+			*pde = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW |
+			    X86_PG_PS | X86_PG_V | pg_nx);
+		} else {
+			m = pmap_kasan_enter_alloc_4k();
+			*pde = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW |
+			    X86_PG_V | pg_nx);
+		}
+	}
+	if ((*pde & X86_PG_PS) != 0)
+		return;
+	pte = pmap_pde_to_pte(pde, va);
+	if ((*pte & X86_PG_V) != 0)
+		return;
+	KASSERT((*pte & X86_PG_V) == 0,
+	    ("%s: shadow address %#lx is already mapped", __func__, va));
+	m = pmap_kasan_enter_alloc_4k();
+	*pte = (pt_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | X86_PG_V |
+	    X86_PG_M | X86_PG_A | pg_nx);
+}
+#endif
+
 /*
  * Track a range of the kernel's virtual address space that is contiguous
  * in various mapping attributes.
@@ -11367,6 +11500,11 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS)
 		case DMPML4I:
 			sbuf_printf(sb, "\nDirect map:\n");
 			break;
+#ifdef KASAN
+		case KASANPML4I:
+			sbuf_printf(sb, "\nKASAN shadow map:\n");
+			break;
+#endif
 		case KPML4BASE:
 			sbuf_printf(sb, "\nKernel map:\n");
 			break;
diff --git a/sys/amd64/include/asan.h b/sys/amd64/include/asan.h
new file mode 100644
index 000000000000..03d57673d05e
--- /dev/null
+++ b/sys/amd64/include/asan.h
@@ -0,0 +1,71 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ *
+ * This software was developed by Mark Johnston under sponsorship from the
+ * FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _MACHINE_ASAN_H_
+#define	_MACHINE_ASAN_H_
+
+#ifdef KASAN
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <machine/vmparam.h>
+
+static inline vm_offset_t
+kasan_md_addr_to_shad(vm_offset_t addr)
+{
+	return (((addr - VM_MIN_KERNEL_ADDRESS) >> KASAN_SHADOW_SCALE_SHIFT) +
+	    KASAN_MIN_ADDRESS);
+}
+
+static inline bool
+kasan_md_unsupported(vm_offset_t addr)
+{
+	vm_offset_t kernmin;
+
+	/*
+	 * The vm_page array is mapped at the beginning of the kernel map, but
+	 * accesses to the array are not validated for now.  Handle the fact
+	 * that KASAN must validate accesses before the vm_page array is
+	 * initialized.
+	 */
+	kernmin = vm_page_array == NULL ? VM_MIN_KERNEL_ADDRESS :
+	    (vm_offset_t)(vm_page_array + vm_page_array_size);
+	return (addr < kernmin || addr >= VM_MAX_KERNEL_ADDRESS);
+}
+
+static inline void
+kasan_md_init(void)
+{
+}
+
+#endif /* KASAN */
+
+#endif /* !_MACHINE_ASAN_H_ */
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 8ba654cb2e7c..1704ecf903e9 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -195,6 +195,12 @@
  */
 #define NKPML4E		4
 
+/*
+ * Number of PML4 slots for the KASAN shadow map.  It requires 1 byte of memory
+ * for every 8 bytes of the kernel address space.
+ */
+#define	NKASANPML4E	((NKPML4E + 7) / 8)
+
 /*
  * We use the same numbering of the page table pages for 5-level and
  * 4-level paging structures.
@@ -243,9 +249,11 @@
 #define	KPML4I		(NPML4EPG-1)
 #define	KPDPI		(NPDPEPG-2)	/* kernbase at -2GB */
 
+#define	KASANPML4I	(DMPML4I - NKASANPML4E) /* Below the direct map */
+
 /* Large map: index of the first and max last pml4 entry */
 #define	LMSPML4I	(PML4PML4I + 1)
-#define	LMEPML4I	(DMPML4I - 1)
+#define	LMEPML4I	(KASANPML4I - 1)
 
 /*
  * XXX doesn't really belong here I guess...
@@ -501,6 +509,11 @@ int	pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
 void	pmap_thread_init_invl_gen(struct thread *td);
 int	pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
 void	pmap_page_array_startup(long count);
+
+#ifdef KASAN
+void	pmap_kasan_enter(vm_offset_t);
+#endif
+
 #endif /* _KERNEL */
 
 /* Return various clipped indexes for a given VA */
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index c7ffb218dd4a..6dd76063bf3f 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -75,7 +75,9 @@
  * of the direct mapped segment.  This uses 2MB pages for reduced
  * TLB pressure.
  */
+#ifndef KASAN
 #define	UMA_MD_SMALL_ALLOC
+#endif
 
 /*
  * The physical address space is densely populated.
@@ -165,7 +167,8 @@
  * 0xffff800000000000 - 0xffff804020100fff   recursive page table (512GB slot)
  * 0xffff804020100fff - 0xffff807fffffffff   unused
  * 0xffff808000000000 - 0xffff847fffffffff   large map (can be tuned up)
- * 0xffff848000000000 - 0xfffff7ffffffffff   unused (large map extends there)
+ * 0xffff848000000000 - 0xfffff77fffffffff   unused (large map extends there)
+ * 0xfffff78000000000 - 0xfffff7ffffffffff   512GB KASAN shadow map
  * 0xfffff80000000000 - 0xfffffbffffffffff   4TB direct map
  * 0xfffffc0000000000 - 0xfffffdffffffffff   unused
  * 0xfffffe0000000000 - 0xffffffffffffffff   2TB kernel map
@@ -183,6 +186,9 @@
 #define	DMAP_MIN_ADDRESS	KV4ADDR(DMPML4I, 0, 0, 0)
 #define	DMAP_MAX_ADDRESS	KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0)
 
+#define	KASAN_MIN_ADDRESS	KV4ADDR(KASANPML4I, 0, 0, 0)
+#define	KASAN_MAX_ADDRESS	KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0)
+
 #define	LARGEMAP_MIN_ADDRESS	KV4ADDR(LMSPML4I, 0, 0, 0)
 #define	LARGEMAP_MAX_ADDRESS	KV4ADDR(LMEPML4I + 1, 0, 0, 0)
 
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 0d6f9dcfcab7..7aa9bf79180c 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -1151,6 +1151,16 @@ kmeminit(void)
 		vm_kmem_size = 2 * mem_size * PAGE_SIZE;
 
 	vm_kmem_size = round_page(vm_kmem_size);
+
+#ifdef KASAN
+	/*
+	 * With KASAN enabled, dynamically allocated kernel memory is shadowed.
+	 * Account for this when setting the UMA limit.
+	 */
+	vm_kmem_size = (vm_kmem_size * KASAN_SHADOW_SCALE) /
+	    (KASAN_SHADOW_SCALE + 1);
+#endif
+
 #ifdef DEBUG_MEMGUARD
 	tmp = memguard_fudge(vm_kmem_size, kernel_map);
 #else
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 8e8cf3f083d4..859ce3b58285 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/asan.h>
 #include <sys/bio.h>
 #include <sys/bitset.h>
 #include <sys/conf.h>
@@ -1043,6 +1044,15 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 	int tuned_nbuf;
 	long maxbuf, maxbuf_sz, buf_sz,	biotmap_sz;
 
+#ifdef KASAN
+	/*
+	 * With KASAN enabled, the kernel map is shadowed.  Account for this
+	 * when sizing maps based on the amount of physical memory available.
+	 */
+	physmem_est = (physmem_est * KASAN_SHADOW_SCALE) /
+	    (KASAN_SHADOW_SCALE + 1);
+#endif
+
 	/*
 	 * physmem_est is in pages.  Convert it to kilobytes (assumes
 	 * PAGE_SIZE is >= 1K)


More information about the dev-commits-src-all mailing list