git: 0b29f5efcc7e - main - amd64: Make it possible to grow the KERNBASE region of KVA
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 24 Sep 2022 13:38:16 UTC
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=0b29f5efcc7ee8271ad2f6b6447898b489d618ec
commit 0b29f5efcc7ee8271ad2f6b6447898b489d618ec
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-09-24 13:19:21 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-09-24 13:27:50 +0000
amd64: Make it possible to grow the KERNBASE region of KVA
pmap_growkernel() may be called when mapping a region above KERNBASE,
typically for a kernel module. If we have enough PTPs left over from
bootstrap, pmap_growkernel() does nothing. However, it's possible to
run out, and in this case pmap_growkernel() will try to grow the kernel
map all the way from kernel_vm_end to somewhere past KERNBASE, which can
easily run the system out of memory. This happens with large kernel
modules such as the nvidia GPU driver. There is also a WIP dtrace
provider which needs to map KVA in the region above KERNBASE (to provide
trampolines which allow a copy of traced kernel instruction to be
executed), and its allocations could potentially trigger this scenario.
This change modifies pmap_growkernel() to manage the two regions
separately, allowing them to grow independently. The end of the
KERNBASE region is tracked by modifying "nkpt".
PR: 265019
Reviewed by: alc, imp, kib
MFC after: 2 weeks
Differential Revision: https://reviews.freebsd.org/D36673
---
sys/amd64/amd64/pmap.c | 68 +++++++++++++++++++++++++++++++++-----------------
1 file changed, 45 insertions(+), 23 deletions(-)
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index d7aeb8dcbd98..9a33298944cc 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -4997,13 +4997,21 @@ pmap_growkernel(vm_offset_t addr)
vm_page_t nkpg;
pd_entry_t *pde, newpdir;
pdp_entry_t *pdpe;
+ vm_offset_t end;
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
/*
- * Return if "addr" is within the range of kernel page table pages
- * that were preallocated during pmap bootstrap. Moreover, leave
- * "kernel_vm_end" and the kernel page table as they were.
+ * The kernel map covers two distinct regions of KVA: that used
+ * for dynamic kernel memory allocations, and the uppermost 2GB
+ * of the virtual address space. The latter is used to map the
+ * kernel and loadable kernel modules. This scheme enables the
+ * use of a special code generation model for kernel code which
+ * takes advantage of compact addressing modes in machine code.
+ *
+ * Both regions grow upwards; to avoid wasting memory, the gap
+ * in between is unmapped. If "addr" is above "KERNBASE", the
+ * kernel's region is grown, otherwise the kmem region is grown.
*
* The correctness of this action is based on the following
* argument: vm_map_insert() allocates contiguous ranges of the
@@ -5015,22 +5023,32 @@ pmap_growkernel(vm_offset_t addr)
* any new kernel page table pages between "kernel_vm_end" and
* "KERNBASE".
*/
- if (KERNBASE < addr && addr <= KERNBASE + nkpt * NBPDR)
- return;
+ if (KERNBASE < addr) {
+ end = KERNBASE + nkpt * NBPDR;
+ if (end == 0)
+ return;
+ } else {
+ end = kernel_vm_end;
+ }
addr = roundup2(addr, NBPDR);
if (addr - 1 >= vm_map_max(kernel_map))
addr = vm_map_max(kernel_map);
- if (kernel_vm_end < addr)
- kasan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
- if (kernel_vm_end < addr)
- kmsan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
- while (kernel_vm_end < addr) {
- pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
+ if (addr <= end) {
+ /*
+ * The grown region is already mapped, so there is
+ * nothing to do.
+ */
+ return;
+ }
+
+ kasan_shadow_map(end, addr - end);
+ kmsan_shadow_map(end, addr - end);
+ while (end < addr) {
+ pdpe = pmap_pdpe(kernel_pmap, end);
if ((*pdpe & X86_PG_V) == 0) {
- /* We need a new PDP entry */
nkpg = pmap_alloc_pt_page(kernel_pmap,
- kernel_vm_end >> PDPSHIFT, VM_ALLOC_WIRED |
+ pmap_pdpe_pindex(end), VM_ALLOC_WIRED |
VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
@@ -5039,31 +5057,35 @@ pmap_growkernel(vm_offset_t addr)
X86_PG_A | X86_PG_M);
continue; /* try again */
}
- pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
+ pde = pmap_pdpe_to_pde(pdpe, end);
if ((*pde & X86_PG_V) != 0) {
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
- kernel_vm_end = vm_map_max(kernel_map);
+ end = (end + NBPDR) & ~PDRMASK;
+ if (end - 1 >= vm_map_max(kernel_map)) {
+ end = vm_map_max(kernel_map);
break;
}
continue;
}
- nkpg = pmap_alloc_pt_page(kernel_pmap,
- pmap_pde_pindex(kernel_vm_end), VM_ALLOC_WIRED |
- VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO);
+ nkpg = pmap_alloc_pt_page(kernel_pmap, pmap_pde_pindex(end),
+ VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
paddr = VM_PAGE_TO_PHYS(nkpg);
newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
pde_store(pde, newpdir);
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
- kernel_vm_end = vm_map_max(kernel_map);
+ end = (end + NBPDR) & ~PDRMASK;
+ if (end - 1 >= vm_map_max(kernel_map)) {
+ end = vm_map_max(kernel_map);
break;
}
}
+
+ if (end <= KERNBASE)
+ kernel_vm_end = end;
+ else
+ nkpt = howmany(end - KERNBASE, NBPDR);
}
/***************************************************