git: 31218f3209ac - main - riscv: Add support for enabling SV48 mode

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Tue, 01 Mar 2022 14:40:09 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=31218f3209acf0f8f0dc54e436342c8fa604d279

commit 31218f3209acf0f8f0dc54e436342c8fa604d279
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-03-01 14:07:14 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-03-01 14:39:44 +0000

    riscv: Add support for enabling SV48 mode
    
    This increases the size of the user map from 256GB to 128TB.  The kernel
    map is left unchanged for now.
    
    For now SV48 mode is left disabled by default, but can be enabled with a
    tunable.  Note that extant hardware does not implement SV48, but QEMU
    does.
    
    - In pmap_bootstrap(), allocate a L0 page and attempt to enable SV48
      mode.  If the write to SATP doesn't take, the kernel continues to run
      in SV39 mode.
    - Define VM_MAX_USER_ADDRESS to refer to the SV48 limit.  In SV39 mode,
      the region [VM_MAX_USER_ADDRESS_SV39, VM_MAX_USER_ADDRESS_SV48] is not
      mappable.
    
    Reviewed by:    jhb
    MFC after:      1 week
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D34280
---
 sys/riscv/include/riscvreg.h |  6 ++---
 sys/riscv/include/vmparam.h  |  6 ++---
 sys/riscv/riscv/pmap.c       | 59 +++++++++++++++++++++++++++++++++++---------
 3 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/sys/riscv/include/riscvreg.h b/sys/riscv/include/riscvreg.h
index e7b52cae8042..91b6f9cb3ee9 100644
--- a/sys/riscv/include/riscvreg.h
+++ b/sys/riscv/include/riscvreg.h
@@ -150,11 +150,11 @@
 #define	SIP_STIP	(1 << 5)
 
 #define	SATP_PPN_S	0
-#define	SATP_PPN_M	(0xfffffffffff << SATP_PPN_S)
+#define	SATP_PPN_M	(0xfffffffffffUL << SATP_PPN_S)
 #define	SATP_ASID_S	44
-#define	SATP_ASID_M	(0xffff << SATP_ASID_S)
+#define	SATP_ASID_M	(0xffffUL << SATP_ASID_S)
 #define	SATP_MODE_S	60
-#define	SATP_MODE_M	(0xf << SATP_MODE_S)
+#define	SATP_MODE_M	(0xfUL << SATP_MODE_S)
 #define	SATP_MODE_SV39	(8ULL << SATP_MODE_S)
 #define	SATP_MODE_SV48	(9ULL << SATP_MODE_S)
 
diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h
index 35f82638cd70..f11f02dcb3e6 100644
--- a/sys/riscv/include/vmparam.h
+++ b/sys/riscv/include/vmparam.h
@@ -197,7 +197,7 @@
 #define	VM_MIN_USER_ADDRESS		(0x0000000000000000UL)
 #define	VM_MAX_USER_ADDRESS_SV39	(0x0000004000000000UL)
 #define	VM_MAX_USER_ADDRESS_SV48	(0x0000800000000000UL)
-#define	VM_MAX_USER_ADDRESS		VM_MAX_USER_ADDRESS_SV39
+#define	VM_MAX_USER_ADDRESS		VM_MAX_USER_ADDRESS_SV48
 
 #define	VM_MINUSER_ADDRESS	(VM_MIN_USER_ADDRESS)
 #define	VM_MAXUSER_ADDRESS	(VM_MAX_USER_ADDRESS)
@@ -209,10 +209,10 @@
 #define	KERNBASE		(VM_MIN_KERNEL_ADDRESS)
 #define	SHAREDPAGE_SV39		(VM_MAX_USER_ADDRESS_SV39 - PAGE_SIZE)
 #define	SHAREDPAGE_SV48		(VM_MAX_USER_ADDRESS_SV48 - PAGE_SIZE)
-#define	SHAREDPAGE		SHAREDPAGE_SV39
+#define	SHAREDPAGE		SHAREDPAGE_SV48
 #define	USRSTACK_SV39		SHAREDPAGE_SV39
 #define	USRSTACK_SV48		SHAREDPAGE_SV48
-#define	USRSTACK		USRSTACK_SV39
+#define	USRSTACK		USRSTACK_SV48
 #define	PS_STRINGS_SV39		(USRSTACK_SV39 - sizeof(struct ps_strings))
 #define	PS_STRINGS_SV48		(USRSTACK_SV48 - sizeof(struct ps_strings))
 
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 3757f6de3043..ef33ea6719b8 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -228,11 +228,17 @@ __FBSDID("$FreeBSD$");
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+    "VM/pmap parameters");
+
 /* The list of all the user pmaps */
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps = LIST_HEAD_INITIALIZER();
 
 enum pmap_mode __read_frequently pmap_mode = PMAP_MODE_SV39;
+SYSCTL_INT(_vm_pmap, OID_AUTO, mode, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+    &pmap_mode, 0,
+    "translation mode, 0 = SV39, 1 = SV48");
 
 struct pmap kernel_pmap_store;
 
@@ -251,9 +257,6 @@ CTASSERT((DMAP_MAX_ADDRESS  & ~L1_OFFSET) == DMAP_MAX_ADDRESS);
 static struct rwlock_padalign pvh_global_lock;
 static struct mtx_padalign allpmaps_lock;
 
-static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-    "VM/pmap parameters");
-
 static int superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
     CTLFLAG_RDTUN, &superpages_enabled, 0,
@@ -617,12 +620,13 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 void
 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
 {
-	u_int l1_slot, l2_slot;
-	vm_offset_t freemempos;
-	vm_offset_t dpcpu, msgbufpv;
-	vm_paddr_t max_pa, min_pa, pa;
+	uint64_t satp;
+	vm_offset_t dpcpu, freemempos, l0pv, msgbufpv;
+	vm_paddr_t l0pa, l1pa, max_pa, min_pa, pa;
+	pd_entry_t *l0p;
 	pt_entry_t *l2p;
-	int i;
+	u_int l1_slot, l2_slot;
+	int i, mode;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
 
@@ -696,6 +700,33 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
+	mode = 0;
+	TUNABLE_INT_FETCH("vm.pmap.mode", &mode);
+	if (mode == PMAP_MODE_SV48) {
+		/*
+		 * Enable SV48 mode: allocate an L0 page and set SV48 mode in
+		 * SATP.  If the implementation does not provide SV48 mode,
+		 * the mode read back from the (WARL) SATP register will be
+		 * unchanged, and we continue in SV39 mode.
+		 */
+		alloc_pages(l0pv, 1);
+		l0p = (void *)l0pv;
+		l1pa = pmap_early_vtophys(l1pt, l1pt);
+		l0p[pmap_l0_index(KERNBASE)] = PTE_V | PTE_A | PTE_D |
+		    ((l1pa >> PAGE_SHIFT) << PTE_PPN0_S);
+
+		l0pa = pmap_early_vtophys(l1pt, l0pv);
+		csr_write(satp, (l0pa >> PAGE_SHIFT) | SATP_MODE_SV48);
+		satp = csr_read(satp);
+		if ((satp & SATP_MODE_M) == SATP_MODE_SV48) {
+			pmap_mode = PMAP_MODE_SV48;
+			kernel_pmap_store.pm_top = l0p;
+		} else {
+			/* Mode didn't change, give the page back. */
+			freemempos -= PAGE_SIZE;
+		}
+	}
+
 	/* Allocate dynamic per-cpu area. */
 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
@@ -1269,14 +1300,20 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
 	return (pmap_unwire_ptp(pmap, va, mpte, free));
 }
 
+static uint64_t
+pmap_satp_mode(void)
+{
+	return (pmap_mode == PMAP_MODE_SV39 ? SATP_MODE_SV39 : SATP_MODE_SV48);
+}
+
 void
 pmap_pinit0(pmap_t pmap)
 {
-
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_top = kernel_pmap->pm_top;
-	pmap->pm_satp = SATP_MODE_SV39 | (vtophys(pmap->pm_top) >> PAGE_SHIFT);
+	pmap->pm_satp = pmap_satp_mode() |
+	    (vtophys(pmap->pm_top) >> PAGE_SHIFT);
 	CPU_ZERO(&pmap->pm_active);
 	pmap_activate_boot(pmap);
 }
@@ -1293,7 +1330,7 @@ pmap_pinit(pmap_t pmap)
 
 	topphys = VM_PAGE_TO_PHYS(mtop);
 	pmap->pm_top = (pd_entry_t *)PHYS_TO_DMAP(topphys);
-	pmap->pm_satp = SATP_MODE_SV39 | (topphys >> PAGE_SHIFT);
+	pmap->pm_satp = pmap_satp_mode() | (topphys >> PAGE_SHIFT);
 
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));