git: 762a3224cde6 - main - riscv: smarter DMAP construction

From: Mitchell Horne <mhorne_at_FreeBSD.org>
Date: Thu, 20 Jun 2024 19:00:22 UTC
The branch main has been updated by mhorne:

URL: https://cgit.FreeBSD.org/src/commit/?id=762a3224cde67ce74ad6c0b0b6e434dd761d0baa

commit 762a3224cde67ce74ad6c0b0b6e434dd761d0baa
Author:     Mitchell Horne <mhorne@FreeBSD.org>
AuthorDate: 2024-06-20 18:32:07 +0000
Commit:     Mitchell Horne <mhorne@FreeBSD.org>
CommitDate: 2024-06-20 18:33:19 +0000

    riscv: smarter DMAP construction
    
    Currently we create the DMAP by mapping the entire range between the
    smallest and largest physical memory addresses with L1 superpages. This
    is obviously overkill, and we may end up mapping all kinds of ranges that
    are not real memory.
    
    In the case of the HiFive Unmatched (obsolete hardware), there is an
    errata resulting in faults when a TLB mapping spans PMP (firmware)
    protection regions. So, when our DMAP mapping spans into the memory
    reserved by OpenSBI, we get a fatal fault. This highlights the need to
    be smarter here.
    
    Therefore, let's attempt to build the DMAP a little more correctly by
    walking the physmap array and mapping each range individually. It is not
    perfect in that we still only respect the range to a 2MB granularity,
    but this could be improved in the future.
    
    Reviewed by:    markj
    MFC after:      1 month
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D45328
---
 sys/riscv/riscv/pmap.c | 154 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 100 insertions(+), 54 deletions(-)

diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 937bb22371e2..8839a944de32 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -540,37 +540,109 @@ pmap_early_alloc_tables(vm_paddr_t *freemempos, int npages)
 	return (pt);
 }
 
-static void
-pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
+/*
+ *	Construct the direct map -- a linear mapping of physical memory into
+ *	the kernel address space.
+ *
+ *	We walk the list of physical memory segments (of arbitrary size and
+ *	address) mapping each appropriately using L2 and L1 superpages.
+ *	Consequently, the DMAP address space will have unmapped regions
+ *	corresponding to any holes between physical memory segments.
+ *
+ *	The lowest usable physical address will always be mapped to
+ *	DMAP_MIN_ADDRESS.
+ */
+static vm_paddr_t
+pmap_bootstrap_dmap(pd_entry_t *l1, vm_paddr_t freemempos)
 {
+	vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
 	vm_offset_t va;
-	vm_paddr_t pa;
-	pd_entry_t *l1;
-	u_int l1_slot;
-	pt_entry_t entry;
-	pn_t pn;
+	vm_paddr_t min_pa, max_pa, pa, endpa;
+	pd_entry_t *l2;
+	u_int l1slot, l2slot;
+	int physmap_idx;
+
+	physmap_idx = physmem_avail(physmap, nitems(physmap));
+	min_pa = physmap[0];
+	max_pa = physmap[physmap_idx - 1];
 
-	pa = dmap_phys_base = min_pa & ~L1_OFFSET;
-	va = DMAP_MIN_ADDRESS;
-	l1 = (pd_entry_t *)kern_l1;
-	l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
+	printf("physmap_idx %u\n", physmap_idx);
+	printf("min_pa %lx\n", min_pa);
+	printf("max_pa %lx\n", max_pa);
 
-	for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
-	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
-		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
+	/* Set the limits of the DMAP region. */
+	dmap_phys_base = rounddown(min_pa, L1_SIZE);
+	dmap_phys_max = max_pa;
 
-		/* superpages */
-		pn = (pa / PAGE_SIZE);
-		entry = PTE_KERN;
-		entry |= (pn << PTE_PPN0_S);
-		pmap_store(&l1[l1_slot], entry);
+	/* Walk the physmap table. */
+	l2 = NULL;
+	l1slot = Ln_ENTRIES; /* sentinel value */
+	for (int idx = 0; idx < physmap_idx; idx += 2) {
+		pa = rounddown(physmap[idx], L2_SIZE);
+		endpa = physmap[idx + 1];
+
+		/* Virtual address for this range. */
+		va = PHYS_TO_DMAP(pa);
+
+		/* Any 1GB possible for this range? */
+		if (roundup(pa, L1_SIZE) + L1_SIZE > endpa)
+			goto l2end;
+
+		/* Loop until the next 1GB boundary. */
+		while ((pa & L1_OFFSET) != 0) {
+			if (l2 == NULL || pmap_l1_index(va) != l1slot) {
+				/* Need to alloc another page table. */
+				l2 = pmap_early_alloc_tables(&freemempos, 1);
+
+				/* Link it. */
+				l1slot = pmap_l1_index(va);
+				pmap_store(&l1[l1slot],
+				    L1_PDE((vm_paddr_t)l2, PTE_V));
+			}
+
+			/* map l2 pages */
+			l2slot = pmap_l2_index(va);
+			pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN));
+
+			pa += L2_SIZE;
+			va += L2_SIZE;
+		}
+
+		/* Map what we can with 1GB superpages. */
+		while (pa + L1_SIZE - 1 < endpa) {
+			/* map l1 pages */
+			l1slot = pmap_l1_index(va);
+			pmap_store(&l1[l1slot], L1_PTE(pa, PTE_KERN));
+
+			pa += L1_SIZE;
+			va += L1_SIZE;
+		}
+
+l2end:
+		while (pa < endpa) {
+			if (l2 == NULL || pmap_l1_index(va) != l1slot) {
+				/* Need to alloc another page table. */
+				l2 = pmap_early_alloc_tables(&freemempos, 1);
+
+				/* Link it. */
+				l1slot = pmap_l1_index(va);
+				pmap_store(&l1[l1slot],
+				    L1_PDE((vm_paddr_t)l2, PTE_V));
+			}
+
+			/* map l2 pages */
+			l2slot = pmap_l2_index(va);
+			pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN));
+
+			pa += L2_SIZE;
+			va += L2_SIZE;
+		}
 	}
 
-	/* Set the upper limit of the DMAP region */
-	dmap_phys_max = pa;
+	/* And finally, the limit on DMAP VA. */
 	dmap_max_addr = va;
 
-	sfence_vma();
+	return (freemempos);
 }
 
 /*
@@ -589,7 +661,7 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
  */
 static vm_paddr_t
 pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
-    vm_paddr_t min_pa, vm_paddr_t max_pa, vm_paddr_t *root_pt_phys)
+    vm_paddr_t *root_pt_phys)
 {
 	pt_entry_t *l0, *l1, *kern_l2, *kern_l3, *devmap_l3;
 	pd_entry_t *devmap_l2;
@@ -657,6 +729,9 @@ pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
 	nkernl3 = 16;
 	kern_l3 = pmap_early_alloc_tables(&freemempos, nkernl3);
 
+	/* Bootstrap the direct map. */
+	freemempos = pmap_bootstrap_dmap(l1, freemempos);
+
 	/* Allocations are done. */
 	if (freemempos < roundup2(kernend, L2_SIZE))
 		freemempos = roundup2(kernend, L2_SIZE);
@@ -714,9 +789,6 @@ pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
 	pa = (vm_paddr_t)devmap_l2;
 	pmap_store(&l1[slot], L1_PDE(pa, PTE_V));
 
-	/* Bootstrap the direct map. */
-	pmap_bootstrap_dmap((vm_offset_t)l1, min_pa, max_pa);
-
 	/* Return the next position of free memory */
 	return (freemempos);
 }
@@ -727,14 +799,11 @@ pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
 void
 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
 {
-	vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
-	vm_paddr_t freemempos;
-	vm_paddr_t max_pa, min_pa, pa;
+	vm_paddr_t freemempos, pa;
 	vm_paddr_t root_pt_phys;
 	vm_offset_t freeva;
 	vm_offset_t dpcpu, msgbufpv;
 	pt_entry_t *pte;
-	u_int physmap_idx;
 	int i;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
@@ -752,31 +821,8 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
 	 */
 	CPU_SET(PCPU_GET(hart), &kernel_pmap->pm_active);
 
-	/* Assume the address we were loaded to is a valid physical address. */
-	min_pa = max_pa = kernstart;
-
-	physmap_idx = physmem_avail(physmap, nitems(physmap));
-	physmap_idx /= 2;
-
-	/*
-	 * Find the minimum physical address. physmap is sorted,
-	 * but may contain empty ranges.
-	 */
-	for (i = 0; i < physmap_idx * 2; i += 2) {
-		if (physmap[i] == physmap[i + 1])
-			continue;
-		if (physmap[i] <= min_pa)
-			min_pa = physmap[i];
-		if (physmap[i + 1] > max_pa)
-			max_pa = physmap[i + 1];
-	}
-	printf("physmap_idx %u\n", physmap_idx);
-	printf("min_pa %lx\n", min_pa);
-	printf("max_pa %lx\n", max_pa);
-
 	/* Create a new set of pagetables to run the kernel in. */
-	freemempos = pmap_create_pagetables(kernstart, kernlen, min_pa, max_pa,
-	    &root_pt_phys);
+	freemempos = pmap_create_pagetables(kernstart, kernlen, &root_pt_phys);
 
 	/* Switch to the newly created page tables. */
 	kernel_pmap->pm_stage = PM_STAGE1;