expanding amd64 past the 1TB limit
Chris Torek
torek at torek.net
Mon Jul 15 07:36:50 UTC 2013
(Durn mailing list software, eating attachments... there are just
the two so I will just send them one at a time here. I took the
individual people off the to/cc since presumably you all got the
attachments already.)
Date: Thu, 27 Jun 2013 18:49:29 -0600
Subject: [PATCH 2/2] increase physical and virtual memory limits
Increase kernel VM space: go from .5 TB of KVA and 1 TB of direct
map, to 8 TB of KVA and 16 TB of direct map. However, we allocate
less direct map space for small physical-memory systems. Also, if
Maxmem is so large that there is not enough direct map space,
reduce Maxmem to fit, so that the system can boot unassisted.
---
amd64/amd64/pmap.c | 44 +++++++++++++++++++++++++++++++++-----------
amd64/include/pmap.h | 36 +++++++++++++++++++++++++++++-------
amd64/include/vmparam.h | 13 +++++++------
3 files changed, 69 insertions(+), 24 deletions(-)
diff --git a/amd64/amd64/pmap.c b/amd64/amd64/pmap.c
index 46f6940..5e43c93 100644
--- a/amd64/amd64/pmap.c
+++ b/amd64/amd64/pmap.c
@@ -232,6 +232,7 @@ u_int64_t KPML4phys; /* phys addr of kernel level 4 */
static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */
static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
+static int ndmpdpphys; /* number of DMPDPphys pages */
static struct rwlock_padalign pvh_global_lock;
@@ -540,7 +541,18 @@ create_pagetables(vm_paddr_t *firstaddr)
ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
if (ndmpdp < 4) /* Minimum 4GB of dirmap */
ndmpdp = 4;
- DMPDPphys = allocpages(firstaddr, NDMPML4E);
+ ndmpdpphys = howmany(ndmpdp, NPDPEPG);
+ if (ndmpdpphys > NDMPML4E) {
+ /*
+ * Each NDMPML4E allows 512 GB, so limit to that,
+ * and then readjust ndmpdp and ndmpdpphys.
+ */
+ printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512);
+ Maxmem = atop(NDMPML4E * NBPML4);
+ ndmpdpphys = NDMPML4E;
+ ndmpdp = NDMPML4E * NPDEPG;
+ }
+ DMPDPphys = allocpages(firstaddr, ndmpdpphys);
ndm1g = 0;
if ((amd_feature & AMDID_PAGE1GB) != 0)
ndm1g = ptoa(Maxmem) >> PDPSHIFT;
@@ -557,6 +569,10 @@ create_pagetables(vm_paddr_t *firstaddr)
* bootstrap. We defer this until after all memory-size dependent
* allocations are done (e.g. direct map), so that we don't have to
* build in too much slop in our estimate.
+ *
+ * Note that when NKPML4E > 1, we have an empty page underneath
+ * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed)
+ * pages. (pmap_enter requires a PD page to exist for each KPML4E.)
*/
nkpt_init(*firstaddr);
nkpdpe = NKPDPE(nkpt);
@@ -581,8 +597,8 @@ create_pagetables(vm_paddr_t *firstaddr)
for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G;
- /* And connect up the PD to the PDP */
- pdp_p = (pdp_entry_t *)KPDPphys;
+ /* And connect up the PD to the PDP (leaving room for L4 pages) */
+ pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
for (i = 0; i < nkpdpe; i++)
pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U;
@@ -619,14 +635,16 @@ create_pagetables(vm_paddr_t *firstaddr)
p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U;
/* Connect the Direct Map slot(s) up to the PML4. */
- for (i = 0; i < NDMPML4E; i++) {
+ for (i = 0; i < ndmpdpphys; i++) {
p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U;
}
- /* Connect the KVA slot up to the PML4 */
- p4_p[KPML4I] = KPDPphys;
- p4_p[KPML4I] |= PG_RW | PG_V | PG_U;
+ /* Connect the KVA slots up to the PML4 */
+ for (i = 0; i < NKPML4E; i++) {
+ p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
+ p4_p[KPML4BASE + i] |= PG_RW | PG_V | PG_U;
+ }
}
/*
@@ -1685,8 +1703,11 @@ pmap_pinit(pmap_t pmap)
pagezero(pmap->pm_pml4);
/* Wire in kernel global address entries. */
- pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
- for (i = 0; i < NDMPML4E; i++) {
+ for (i = 0; i < NKPML4E; i++) {
+ pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + (i << PAGE_SHIFT)) |
+ PG_RW | PG_V | PG_U;
+ }
+ for (i = 0; i < ndmpdpphys; i++) {
pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) |
PG_RW | PG_V | PG_U;
}
@@ -1941,8 +1962,9 @@ pmap_release(pmap_t pmap)
m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
- pmap->pm_pml4[KPML4I] = 0; /* KVA */
- for (i = 0; i < NDMPML4E; i++) /* Direct Map */
+ for (i = 0; i < NKPML4E; i++) /* KVA */
+ pmap->pm_pml4[KPML4BASE + i] = 0;
+ for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
pmap->pm_pml4[DMPML4I + i] = 0;
pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */
diff --git a/amd64/include/pmap.h b/amd64/include/pmap.h
index dc02e49..eda0295 100644
--- a/amd64/include/pmap.h
+++ b/amd64/include/pmap.h
@@ -113,28 +113,50 @@
((unsigned long)(l2) << PDRSHIFT) | \
((unsigned long)(l1) << PAGE_SHIFT))
-#define NKPML4E 1 /* number of kernel PML4 slots */
+/*
+ * Number of kernel PML4 slots. Can be anywhere from 1 to 64 or so,
+ * but setting it larger than NDMPML4E makes no sense.
+ *
+ * Each slot provides .5 TB of kernel virtual space.
+ */
+#define NKPML4E 16
#define NUPML4E (NPML4EPG/2) /* number of userland PML4 pages */
#define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */
#define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */
/*
- * NDMPML4E is the number of PML4 entries that are used to implement the
- * direct map. It must be a power of two.
+ * NDMPML4E is the maximum number of PML4 entries that will be
+ * used to implement the direct map. It must be a power of two,
+ * and should generally exceed NKPML4E. The maximum possible
+ * value is 64; using 128 will make the direct map intrude into
+ * the recursive page table map.
*/
-#define NDMPML4E 2
+#define NDMPML4E 32
/*
- * The *PDI values control the layout of virtual memory. The starting address
+ * These values control the layout of virtual memory. The starting address
* of the direct map, which is controlled by DMPML4I, must be a multiple of
* its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
+ *
+ * Note: KPML4I is the index of the (single) level 4 page that maps
+ * the KVA that holds KERNBASE, while KPML4BASE is the index of the
+ * first level 4 page that maps VM_MIN_KERNEL_ADDRESS. If NKPML4E
+ * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra
+ * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to
+ * KERNBASE. Similarly, if KMPL4I < (base+N-1), extra level 2 PDEs are
+ * needed to map from somewhere-above-KERNBASE to VM_MAX_KERNEL_ADDRESS.
+ *
+ * (KPML4I combines with KPDPI to choose where KERNBASE starts.
+ * Or, in other words, KPML4I provides bits 39..46 of KERNBASE,
+ * and KPDPI provides bits 30..38.)
*/
#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
-#define KPML4I (NPML4EPG-1) /* Top 512GB for KVM */
-#define DMPML4I rounddown(KPML4I - NDMPML4E, NDMPML4E) /* Below KVM */
+#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */
+#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
+#define KPML4I (NPML4EPG-1)
#define KPDPI (NPDPEPG-2) /* kernbase at -2GB */
/*
diff --git a/amd64/include/vmparam.h b/amd64/include/vmparam.h
index 33f62bd..cff2558 100644
--- a/amd64/include/vmparam.h
+++ b/amd64/include/vmparam.h
@@ -145,18 +145,19 @@
* 0x0000000000000000 - 0x00007fffffffffff user map
* 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole)
* 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot)
- * 0xffff804020101000 - 0xfffffdffffffffff unused
- * 0xfffffe0000000000 - 0xfffffeffffffffff 1TB direct map
- * 0xffffff0000000000 - 0xffffff7fffffffff unused
- * 0xffffff8000000000 - 0xffffffffffffffff 512GB kernel map
+ * 0xffff804020101000 - 0xffffdfffffffffff unused
+ * 0xffffe00000000000 - 0xffffefffffffffff 16TB direct map
+ * 0xfffff00000000000 - 0xfffff7ffffffffff unused
+ * 0xfffff80000000000 - 0xffffffffffffffff 8TB kernel map
*
* Within the kernel map:
*
* 0xffffffff80000000 KERNBASE
*/
-#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-1, NPDEPG-1, NPTEPG-1)
-#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-512, 0, 0)
+#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4BASE, 0, 0, 0)
+#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4BASE + NKPML4E - 1, \
+ NPDPEPG-1, NPDEPG-1, NPTEPG-1)
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
--
1.8.2.1
More information about the freebsd-hackers
mailing list