svn commit: r297446 - in head/sys/arm64: arm64 include

Andrew Turner andrew at FreeBSD.org
Thu Mar 31 11:07:26 UTC 2016


Author: andrew
Date: Thu Mar 31 11:07:24 2016
New Revision: 297446
URL: https://svnweb.freebsd.org/changeset/base/297446

Log:
  Add support for 4 level pagetables. The userland address space has been
  increased to 256TiB. The kernel address space can also be increased to be
  the same size, but this will be performed in a later change.
  
  To help work with an extra level of page tables two new functions have
  been added, one to file the lowest level table entry, and one to find the
  block/page level. Both of these find the entry for a given pmap and virtual
  address.
  
  This has been tested with a combination of buildworld, stress2 tests, and
  by using sort to consume a large amount of memory by sorting /dev/zero. No
  new issues are known to be present from this change.
  
  Reviewed by:	kib
  Obtained from:	ABT Systems Ltd
  Relnotes:	yes
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D5720

Modified:
  head/sys/arm64/arm64/genassym.c
  head/sys/arm64/arm64/locore.S
  head/sys/arm64/arm64/machdep.c
  head/sys/arm64/arm64/minidump_machdep.c
  head/sys/arm64/arm64/pmap.c
  head/sys/arm64/arm64/swtch.S
  head/sys/arm64/arm64/vm_machdep.c
  head/sys/arm64/include/machdep.h
  head/sys/arm64/include/pcb.h
  head/sys/arm64/include/pmap.h
  head/sys/arm64/include/pte.h
  head/sys/arm64/include/vmparam.h

Modified: head/sys/arm64/arm64/genassym.c
==============================================================================
--- head/sys/arm64/arm64/genassym.c	Thu Mar 31 09:55:21 2016	(r297445)
+++ head/sys/arm64/arm64/genassym.c	Thu Mar 31 11:07:24 2016	(r297446)
@@ -52,7 +52,7 @@ ASSYM(PCB_SIZE, roundup2(sizeof(struct p
 ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT);
 ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x));
 ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp));
-ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr));
+ASSYM(PCB_L0ADDR, offsetof(struct pcb, pcb_l0addr));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
 

Modified: head/sys/arm64/arm64/locore.S
==============================================================================
--- head/sys/arm64/arm64/locore.S	Thu Mar 31 09:55:21 2016	(r297445)
+++ head/sys/arm64/arm64/locore.S	Thu Mar 31 11:07:24 2016	(r297446)
@@ -35,7 +35,7 @@
 #include <machine/param.h>
 #include <machine/pte.h>
 
-#define	VIRT_BITS	39
+#define	VIRT_BITS	48
 
 	.globl	kernbase
 	.set	kernbase, KERNBASE
@@ -89,7 +89,8 @@ _start:
 	/*
 	 * At this point:
 	 * x27 = TTBR0 table
-	 * x26 = TTBR1 table
+	 * x26 = Kernel L1 table
+	 * x24 = TTBR1 table
 	 */
 
 	/* Enable the mmu */
@@ -100,16 +101,6 @@ _start:
 	br	x15
 
 virtdone:
-	/*
-	 * Now that we are in virtual address space,
-	 * we don't need the identity mapping in TTBR0 and
-	 * can set the TCR to a more useful value.
-	 */
-	ldr	x2, tcr
-	mrs	x3, id_aa64mmfr0_el1
-	bfi	x2, x3, #32, #3
-	msr	tcr_el1, x2
-
 	/* Set up the stack */
 	adr	x25, initstack_end
 	mov	sp, x25
@@ -128,6 +119,7 @@ virtdone:
 
 	/* Make the page table base a virtual address */
 	sub	x26, x26, x29
+	sub	x24, x24, x29
 
 	sub	sp, sp, #(64 * 4)
 	mov	x0, sp
@@ -139,6 +131,7 @@ virtdone:
 	str	x26, [x0, 8]	/* kern_l1pt */
 	str	x29, [x0, 16]	/* kern_delta */
 	str	x25, [x0, 24]	/* kern_stack */
+	str	x24, [x0, 32]	/* kern_l0pt */
 
 	/* trace back starts here */
 	mov	fp, #0
@@ -175,7 +168,7 @@ ENTRY(mpentry)
 	msr	contextidr_el1, x1
 
 	/* Load the kernel page table */
-	adr	x26, pagetable_l1_ttbr1
+	adr	x24, pagetable_l0_ttbr1
 	/* Load the identity page table */
 	adr	x27, pagetable_l0_ttbr0
 
@@ -187,16 +180,6 @@ ENTRY(mpentry)
 	br	x15
 
 mp_virtdone:
-	/*
-	 * Now that we are in virtual address space,
-	 * we don't need the identity mapping in TTBR0 and
-	 * can set the TCR to a more useful value.
-	 */
-	ldr	x2, tcr
-	mrs	x3, id_aa64mmfr0_el1
-	bfi	x2, x3, #32, #3
-	msr	tcr_el1, x2
-
 	ldr	x4, =secondary_stacks
 	mov	x5, #(PAGE_SIZE * KSTACK_PAGES)
 	mul	x5, x0, x5
@@ -388,11 +371,18 @@ create_pagetables:
 	mov	x6, x26
 	bl	link_l1_pagetable
 
+	/* Move to the l0 table */
+	add	x24, x26, #PAGE_SIZE
+
+	/* Link the l0 -> l1 table */
+	mov	x9, x6
+	mov	x6, x24
+	bl	link_l0_pagetable
 
 	/*
 	 * Build the TTBR0 maps.
 	 */
-	add	x27, x26, #PAGE_SIZE
+	add	x27, x24, #PAGE_SIZE
 
 	mov	x6, x27		/* The initial page table */
 #if defined(SOCDEV_PA) && defined(SOCDEV_VA)
@@ -440,7 +430,7 @@ link_l0_pagetable:
 	 */
 	/* Find the table index */
 	lsr	x11, x8, #L0_SHIFT
-	and	x11, x11, #Ln_ADDR_MASK
+	and	x11, x11, #L0_ADDR_MASK
 
 	/* Build the L0 block entry */
 	mov	x12, #L0_TABLE
@@ -582,7 +572,7 @@ start_mmu:
 
 	/* Load ttbr0 and ttbr1 */
 	msr	ttbr0_el1, x27
-	msr	ttbr1_el1, x26
+	msr	ttbr1_el1, x24
 	isb
 
 	/* Clear the Monitor Debug System control register */
@@ -596,11 +586,8 @@ start_mmu:
 
 	/*
 	 * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1.
-	 * Some machines have physical memory mapped >512GiB, which can not
-	 * be identity-mapped using the default 39 VA bits. Thus, use
-	 * 48 VA bits for now and switch back to 39 after the VA jump.
 	 */
-	ldr	x2, tcr_early
+	ldr	x2, tcr
 	mrs	x3, id_aa64mmfr0_el1
 	bfi	x2, x3, #32, #3
 	msr	tcr_el1, x2
@@ -623,9 +610,6 @@ mair:
 tcr:
 	.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \
 	    TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
-tcr_early:
-	.quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
-	    TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
 sctlr_set:
 	/* Bits to set */
 	.quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
@@ -651,6 +635,8 @@ pagetable:
 	.space	PAGE_SIZE
 pagetable_l1_ttbr1:
 	.space	PAGE_SIZE
+pagetable_l0_ttbr1:
+	.space	PAGE_SIZE
 pagetable_l1_ttbr0:
 	.space	PAGE_SIZE
 pagetable_l0_ttbr0:

Modified: head/sys/arm64/arm64/machdep.c
==============================================================================
--- head/sys/arm64/arm64/machdep.c	Thu Mar 31 09:55:21 2016	(r297445)
+++ head/sys/arm64/arm64/machdep.c	Thu Mar 31 11:07:24 2016	(r297446)
@@ -896,8 +896,8 @@ initarm(struct arm64_bootparams *abp)
 	cache_setup();
 
 	/* Bootstrap enough of pmap  to enter the kernel proper */
-	pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta,
-	    lastaddr - KERNBASE);
+	pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt,
+	    KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
 
 	arm_devmap_bootstrap(0, NULL);
 

Modified: head/sys/arm64/arm64/minidump_machdep.c
==============================================================================
--- head/sys/arm64/arm64/minidump_machdep.c	Thu Mar 31 09:55:21 2016	(r297445)
+++ head/sys/arm64/arm64/minidump_machdep.c	Thu Mar 31 11:07:24 2016	(r297446)
@@ -218,7 +218,7 @@ blk_write(struct dumperinfo *di, char *p
 int
 minidumpsys(struct dumperinfo *di)
 {
-	pd_entry_t *l1, *l2;
+	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t *l3;
 	uint32_t pmapsize;
 	vm_offset_t va;
@@ -236,7 +236,7 @@ minidumpsys(struct dumperinfo *di)
 	pmapsize = 0;
 	for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) {
 		pmapsize += PAGE_SIZE;
-		if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3))
+		if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3))
 			continue;
 
 		/* We should always be using the l2 table for kvm */
@@ -335,7 +335,7 @@ minidumpsys(struct dumperinfo *di)
 	/* Dump kernel page directory pages */
 	bzero(&tmpbuffer, sizeof(tmpbuffer));
 	for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) {
-		if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) {
+		if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) {
 			/* We always write a page, even if it is zero */
 			error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
 			if (error)

Modified: head/sys/arm64/arm64/pmap.c
==============================================================================
--- head/sys/arm64/arm64/pmap.c	Thu Mar 31 09:55:21 2016	(r297445)
+++ head/sys/arm64/arm64/pmap.c	Thu Mar 31 11:07:24 2016	(r297446)
@@ -11,7 +11,7 @@
  * All rights reserved.
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
- * Copyright (c) 2014 The FreeBSD Foundation
+ * Copyright (c) 2014-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -142,9 +142,14 @@ __FBSDID("$FreeBSD$");
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
-#define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
-#define	NUPDE			(NPDEPG * NPDEPG)
-#define	NUSERPGTBLS		(NUPDE + NPDEPG)
+#define	NL0PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
+#define	NL1PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
+#define	NL2PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
+#define	NL3PG		(PAGE_SIZE/(sizeof (pt_entry_t)))
+
+#define	NUL0E		L0_ENTRIES
+#define	NUL1E		(NUL0E * NL1PG)
+#define	NUL2E		(NUL1E * NL2PG)
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
@@ -273,15 +278,37 @@ pagezero(void *p)
 	bzero(p, PAGE_SIZE);
 }
 
+#define	pmap_l0_index(va)	(((va) >> L0_SHIFT) & L0_ADDR_MASK)
 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
 
 static __inline pd_entry_t *
+pmap_l0(pmap_t pmap, vm_offset_t va)
+{
+
+	return (&pmap->pm_l0[pmap_l0_index(va)]);
+}
+
+static __inline pd_entry_t *
+pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
+{
+	pd_entry_t *l1;
+
+	l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+	return (&l1[pmap_l1_index(va)]);
+}
+
+static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
+	pd_entry_t *l0;
+
+	l0 = pmap_l0(pmap, va);
+	if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
+		return (NULL);
 
-	return (&pmap->pm_l1[pmap_l1_index(va)]);
+	return (pmap_l0_to_l1(l0, va));
 }
 
 static __inline pd_entry_t *
@@ -314,28 +341,103 @@ pmap_l2_to_l3(pd_entry_t *l2, vm_offset_
 	return (&l3[pmap_l3_index(va)]);
 }
 
+/*
+ * Returns the lowest valid pde for a given virtual address.
+ * The next level may or may not point to a valid page or block.
+ */
+static __inline pd_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
+{
+	pd_entry_t *l0, *l1, *l2, desc;
+
+	l0 = pmap_l0(pmap, va);
+	desc = pmap_load(l0) & ATTR_DESCR_MASK;
+	if (desc != L0_TABLE) {
+		*level = -1;
+		return (NULL);
+	}
+
+	l1 = pmap_l0_to_l1(l0, va);
+	desc = pmap_load(l1) & ATTR_DESCR_MASK;
+	if (desc != L1_TABLE) {
+		*level = 0;
+		return (l0);
+	}
+
+	l2 = pmap_l1_to_l2(l1, va);
+	desc = pmap_load(l2) & ATTR_DESCR_MASK;
+	if (desc != L2_TABLE) {
+		*level = 1;
+		return (l1);
+	}
+
+	*level = 2;
+	return (l2);
+}
+
+/*
+ * Returns the lowest valid pte block or table entry for a given virtual
+ * address. If there are no valid entries return NULL and set the level to
+ * the first invalid level.
+ */
 static __inline pt_entry_t *
-pmap_l3(pmap_t pmap, vm_offset_t va)
+pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
 {
-	pd_entry_t *l2;
+	pd_entry_t *l1, *l2, desc;
+	pt_entry_t *l3;
 
-	l2 = pmap_l2(pmap, va);
-	if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
+	l1 = pmap_l1(pmap, va);
+	if (l1 == NULL) {
+		*level = 0;
 		return (NULL);
+	}
+	desc = pmap_load(l1) & ATTR_DESCR_MASK;
+	if (desc == L1_BLOCK) {
+		*level = 1;
+		return (l1);
+	}
 
-	return (pmap_l2_to_l3(l2, va));
+	if (desc != L1_TABLE) {
+		*level = 1;
+		return (NULL);
+	}
+
+	l2 = pmap_l1_to_l2(l1, va);
+	desc = pmap_load(l2) & ATTR_DESCR_MASK;
+	if (desc == L2_BLOCK) {
+		*level = 2;
+		return (l2);
+	}
+
+	if (desc != L2_TABLE) {
+		*level = 2;
+		return (NULL);
+	}
+
+	*level = 3;
+	l3 = pmap_l2_to_l3(l2, va);
+	if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
+		return (NULL);
+
+	return (l3);
 }
 
 bool
-pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2,
-    pt_entry_t **l3)
+pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
+    pd_entry_t **l2, pt_entry_t **l3)
 {
-	pd_entry_t *l1p, *l2p;
+	pd_entry_t *l0p, *l1p, *l2p;
+
+	if (pmap->pm_l0 == NULL)
+		return (false);
+
+	l0p = pmap_l0(pmap, va);
+	*l0 = l0p;
 
-	if (pmap->pm_l1 == NULL)
+	if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
 		return (false);
 
-	l1p = pmap_l1(pmap, va);
+	l1p = pmap_l0_to_l1(l0p, va);
 	*l1 = l1p;
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
@@ -544,7 +646,8 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_o
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
-pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
+pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
+    vm_size_t kernlen)
 {
 	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
 	uint64_t kern_delta;
@@ -562,7 +665,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_padd
 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
 
 	/* Set this early so we can use the pagetable walking functions */
-	kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
+	kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
 	PMAP_LOCK_INIT(kernel_pmap);
 
  	/*
@@ -805,30 +908,40 @@ pmap_invalidate_all(pmap_t pmap)
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
-	pd_entry_t *l2p, l2;
-	pt_entry_t *l3p, l3;
+	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
+	int lvl;
 
 	pa = 0;
 	PMAP_LOCK(pmap);
 	/*
-	 * Start with the l2 tabel. We are unable to allocate
-	 * pages in the l1 table.
+	 * Find the block or page map for this virtual address. pmap_pte
+	 * will return either a valid block/page entry, or NULL.
 	 */
-	l2p = pmap_l2(pmap, va);
-	if (l2p != NULL) {
-		l2 = pmap_load(l2p);
-		if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) {
-			l3p = pmap_l2_to_l3(l2p, va);
-			if (l3p != NULL) {
-				l3 = pmap_load(l3p);
-
-				if ((l3 & ATTR_DESCR_MASK) == L3_PAGE)
-					pa = (l3 & ~ATTR_MASK) |
-					    (va & L3_OFFSET);
-			}
-		} else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
-			pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET);
+	pte = pmap_pte(pmap, va, &lvl);
+	if (pte != NULL) {
+		tpte = pmap_load(pte);
+		pa = tpte & ~ATTR_MASK;
+		switch(lvl) {
+		case 1:
+			KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
+			    ("pmap_extract: Invalid L1 pte found: %lx",
+			    tpte & ATTR_DESCR_MASK));
+			pa |= (va & L1_OFFSET);
+			break;
+		case 2:
+			KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
+			    ("pmap_extract: Invalid L2 pte found: %lx",
+			    tpte & ATTR_DESCR_MASK));
+			pa |= (va & L2_OFFSET);
+			break;
+		case 3:
+			KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
+			    ("pmap_extract: Invalid L3 pte found: %lx",
+			    tpte & ATTR_DESCR_MASK));
+			pa |= (va & L3_OFFSET);
+			break;
+		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
@@ -844,21 +957,31 @@ pmap_extract(pmap_t pmap, vm_offset_t va
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
-	pt_entry_t *l3p, l3;
+	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	vm_page_t m;
+	int lvl;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
-	l3p = pmap_l3(pmap, va);
-	if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
-		if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
+	pte = pmap_pte(pmap, va, &lvl);
+	if (pte != NULL) {
+		tpte = pmap_load(pte);
+
+		KASSERT(lvl > 0 && lvl <= 3,
+		    ("pmap_extract_and_hold: Invalid level %d", lvl));
+		CTASSERT(L1_BLOCK == L2_BLOCK);
+		KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
+		    (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
+		    ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
+		     tpte & ATTR_DESCR_MASK));
+		if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
 		    ((prot & VM_PROT_WRITE) == 0)) {
-			if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa))
+			if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa))
 				goto retry;
-			m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK);
+			m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
 			vm_page_hold(m);
 		}
 	}
@@ -870,25 +993,39 @@ retry:
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
-	pd_entry_t *l2p, l2;
-	pt_entry_t *l3;
+	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
+	int lvl;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 		pa = DMAP_TO_PHYS(va);
 	} else {
-		l2p = pmap_l2(kernel_pmap, va);
-		if (l2p == NULL)
-			panic("pmap_kextract: No l2");
-		l2 = pmap_load(l2p);
-		if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
-			return ((l2 & ~ATTR_MASK) |
-			    (va & L2_OFFSET));
-
-		l3 = pmap_l2_to_l3(l2p, va);
-		if (l3 == NULL)
-			panic("pmap_kextract: No l3...");
-		pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK);
+		pa = 0;
+		pte = pmap_pte(kernel_pmap, va, &lvl);
+		if (pte != NULL) {
+			tpte = pmap_load(pte);
+			pa = tpte & ~ATTR_MASK;
+			switch(lvl) {
+			case 1:
+				KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
+				    ("pmap_kextract: Invalid L1 pte found: %lx",
+				    tpte & ATTR_DESCR_MASK));
+				pa |= (va & L1_OFFSET);
+				break;
+			case 2:
+				KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
+				    ("pmap_kextract: Invalid L2 pte found: %lx",
+				    tpte & ATTR_DESCR_MASK));
+				pa |= (va & L2_OFFSET);
+				break;
+			case 3:
+				KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
+				    ("pmap_kextract: Invalid L3 pte found: %lx",
+				    tpte & ATTR_DESCR_MASK));
+				pa |= (va & L3_OFFSET);
+				break;
+			}
+		}
 	}
 	return (pa);
 }
@@ -900,8 +1037,10 @@ pmap_kextract(vm_offset_t va)
 void
 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
 {
-	pt_entry_t *l3;
+	pd_entry_t *pde;
+	pt_entry_t *pte;
 	vm_offset_t va;
+	int lvl;
 
 	KASSERT((pa & L3_OFFSET) == 0,
 	   ("pmap_kenter_device: Invalid physical address"));
@@ -912,11 +1051,16 @@ pmap_kenter_device(vm_offset_t sva, vm_s
 
 	va = sva;
 	while (size != 0) {
-		l3 = pmap_l3(kernel_pmap, va);
-		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
-		pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
+		pde = pmap_pde(kernel_pmap, va, &lvl);
+		KASSERT(pde != NULL,
+		    ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va));
+		KASSERT(lvl == 2,
+		    ("pmap_kenter_device: Invalid level %d", lvl));
+
+		pte = pmap_l2_to_l3(pde, va);
+		pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
 		    ATTR_IDX(DEVICE_MEMORY) | L3_PAGE);
-		PTE_SYNC(l3);
+		PTE_SYNC(pte);
 
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
@@ -927,28 +1071,30 @@ pmap_kenter_device(vm_offset_t sva, vm_s
 
 /*
  * Remove a page from the kernel pagetables.
- * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
-	pt_entry_t *l3;
+	pt_entry_t *pte;
+	int lvl;
 
-	l3 = pmap_l3(kernel_pmap, va);
-	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
+	pte = pmap_pte(kernel_pmap, va, &lvl);
+	KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
+	KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
 
-	if (pmap_l3_valid_cacheable(pmap_load(l3)))
+	if (pmap_l3_valid_cacheable(pmap_load(pte)))
 		cpu_dcache_wb_range(va, L3_SIZE);
-	pmap_load_clear(l3);
-	PTE_SYNC(l3);
+	pmap_load_clear(pte);
+	PTE_SYNC(pte);
 	pmap_invalidate_page(kernel_pmap, va);
 }
 
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
-	pt_entry_t *l3;
+	pt_entry_t *pte;
 	vm_offset_t va;
+	int lvl;
 
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kremove_device: Invalid virtual address"));
@@ -957,10 +1103,12 @@ pmap_kremove_device(vm_offset_t sva, vm_
 
 	va = sva;
 	while (size != 0) {
-		l3 = pmap_l3(kernel_pmap, va);
-		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
-		pmap_load_clear(l3);
-		PTE_SYNC(l3);
+		pte = pmap_pte(kernel_pmap, va, &lvl);
+		KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
+		KASSERT(lvl == 3,
+		    ("Invalid device pagetable level: %d != 3", lvl));
+		pmap_load_clear(pte);
+		PTE_SYNC(pte);
 
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
@@ -999,19 +1147,26 @@ pmap_map(vm_offset_t *virt, vm_paddr_t s
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
-	pt_entry_t *l3, pa;
+	pd_entry_t *pde;
+	pt_entry_t *pte, pa;
 	vm_offset_t va;
 	vm_page_t m;
-	int i;
+	int i, lvl;
 
 	va = sva;
 	for (i = 0; i < count; i++) {
+		pde = pmap_pde(kernel_pmap, va, &lvl);
+		KASSERT(pde != NULL,
+		    ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
+		KASSERT(lvl == 2,
+		    ("pmap_qenter: Invalid level %d", lvl));
+
 		m = ma[i];
 		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
 		    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
-		l3 = pmap_l3(kernel_pmap, va);
-		pmap_load_store(l3, pa);
-		PTE_SYNC(l3);
+		pte = pmap_l2_to_l3(pde, va);
+		pmap_load_store(pte, pa);
+		PTE_SYNC(pte);
 
 		va += L3_SIZE;
 	}
@@ -1021,25 +1176,27 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
- * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
-	pt_entry_t *l3;
+	pt_entry_t *pte;
 	vm_offset_t va;
+	int lvl;
 
 	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 
 	va = sva;
 	while (count-- > 0) {
-		l3 = pmap_l3(kernel_pmap, va);
-		KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
-
-		if (pmap_l3_valid_cacheable(pmap_load(l3)))
-			cpu_dcache_wb_range(va, L3_SIZE);
-		pmap_load_clear(l3);
-		PTE_SYNC(l3);
+		pte = pmap_pte(kernel_pmap, va, &lvl);
+		KASSERT(lvl == 3,
+		    ("Invalid device pagetable level: %d != 3", lvl));
+		if (pte != NULL) {
+			if (pmap_l3_valid_cacheable(pmap_load(pte)))
+				cpu_dcache_wb_range(va, L3_SIZE);
+			pmap_load_clear(pte);
+			PTE_SYNC(pte);
+		}
 
 		va += PAGE_SIZE;
 	}
@@ -1104,26 +1261,47 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t
 	/*
 	 * unmap the page table page
 	 */
-	if (m->pindex >= NUPDE) {
-		/* PD page */
+	if (m->pindex >= (NUL2E + NUL1E)) {
+		/* l1 page */
+		pd_entry_t *l0;
+
+		l0 = pmap_l0(pmap, va);
+		pmap_load_clear(l0);
+		PTE_SYNC(l0);
+	} else if (m->pindex >= NUL2E) {
+		/* l2 page */
 		pd_entry_t *l1;
+
 		l1 = pmap_l1(pmap, va);
 		pmap_load_clear(l1);
 		PTE_SYNC(l1);
 	} else {
-		/* PTE page */
+		/* l3 page */
 		pd_entry_t *l2;
+
 		l2 = pmap_l2(pmap, va);
 		pmap_load_clear(l2);
 		PTE_SYNC(l2);
 	}
 	pmap_resident_count_dec(pmap, 1);
-	if (m->pindex < NUPDE) {
-		/* We just released a PT, unhold the matching PD */
-		vm_page_t pdpg;
+	if (m->pindex < NUL2E) {
+		/* We just released an l3, unhold the matching l2 */
+		pd_entry_t *l1, tl1;
+		vm_page_t l2pg;
 
-		pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK);
-		pmap_unwire_l3(pmap, va, pdpg, free);
+		l1 = pmap_l1(pmap, va);
+		tl1 = pmap_load(l1);
+		l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+		pmap_unwire_l3(pmap, va, l2pg, free);
+	} else if (m->pindex < (NUL2E + NUL1E)) {
+		/* We just released an l2, unhold the matching l1 */
+		pd_entry_t *l0, tl0;
+		vm_page_t l1pg;
+
+		l0 = pmap_l0(pmap, va);
+		tl0 = pmap_load(l0);
+		l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+		pmap_unwire_l3(pmap, va, l1pg, free);
 	}
 	pmap_invalidate_page(pmap, va);
 
@@ -1164,27 +1342,27 @@ pmap_pinit0(pmap_t pmap)
 
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
-	pmap->pm_l1 = kernel_pmap->pm_l1;
+	pmap->pm_l0 = kernel_pmap->pm_l0;
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
-	vm_paddr_t l1phys;
-	vm_page_t l1pt;
+	vm_paddr_t l0phys;
+	vm_page_t l0pt;
 
 	/*
-	 * allocate the l1 page
+	 * allocate the l0 page
 	 */
-	while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
+	while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		VM_WAIT;
 
-	l1phys = VM_PAGE_TO_PHYS(l1pt);
-	pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
+	l0phys = VM_PAGE_TO_PHYS(l0pt);
+	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
 
-	if ((l1pt->flags & PG_ZERO) == 0)
-		pagezero(pmap->pm_l1);
+	if ((l0pt->flags & PG_ZERO) == 0)
+		pagezero(pmap->pm_l0);
 
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 
@@ -1205,7 +1383,7 @@ pmap_pinit(pmap_t pmap)
 static vm_page_t
 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
-	vm_page_t m, /*pdppg, */pdpg;
+	vm_page_t m, l1pg, l2pg;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
@@ -1237,33 +1415,84 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t 
 	 * it isn't already there.
 	 */
 
-	if (ptepindex >= NUPDE) {
-		pd_entry_t *l1;
-		vm_pindex_t l1index;
+	if (ptepindex >= (NUL2E + NUL1E)) {
+		pd_entry_t *l0;
+		vm_pindex_t l0index;
+
+		l0index = ptepindex - (NUL2E + NUL1E);
+		l0 = &pmap->pm_l0[l0index];
+		pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
+		PTE_SYNC(l0);
+	} else if (ptepindex >= NUL2E) {
+		vm_pindex_t l0index, l1index;
+		pd_entry_t *l0, *l1;
+		pd_entry_t tl0;
+
+		l1index = ptepindex - NUL2E;
+		l0index = l1index >> L0_ENTRIES_SHIFT;
+
+		l0 = &pmap->pm_l0[l0index];
+		tl0 = pmap_load(l0);
+		if (tl0 == 0) {
+			/* recurse for allocating page dir */
+			if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
+			    lockp) == NULL) {
+				--m->wire_count;
+				/* XXX: release mem barrier? */
+				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
+				vm_page_free_zero(m);
+				return (NULL);
+			}
+		} else {
+			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+			l1pg->wire_count++;
+		}
 
-		l1index = ptepindex - NUPDE;
-		l1 = &pmap->pm_l1[l1index];
+		l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+		l1 = &l1[ptepindex & Ln_ADDR_MASK];
 		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
 		PTE_SYNC(l1);
-
 	} else {
-		vm_pindex_t l1index;
-		pd_entry_t *l1, *l2;
-
-		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
-		l1 = &pmap->pm_l1[l1index];
-		if (pmap_load(l1) == 0) {
+		vm_pindex_t l0index, l1index;
+		pd_entry_t *l0, *l1, *l2;
+		pd_entry_t tl0, tl1;
+
+		l1index = ptepindex >> Ln_ENTRIES_SHIFT;
+		l0index = l1index >> L0_ENTRIES_SHIFT;
+
+		l0 = &pmap->pm_l0[l0index];
+		tl0 = pmap_load(l0);
+		if (tl0 == 0) {
 			/* recurse for allocating page dir */
-			if (_pmap_alloc_l3(pmap, NUPDE + l1index,
+			if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
+			tl0 = pmap_load(l0);
+			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+			l1 = &l1[l1index & Ln_ADDR_MASK];
 		} else {
-			pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK);
-			pdpg->wire_count++;
+			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+			l1 = &l1[l1index & Ln_ADDR_MASK];
+			tl1 = pmap_load(l1);
+			if (tl1 == 0) {
+				/* recurse for allocating page dir */
+				if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+				    lockp) == NULL) {
+					--m->wire_count;
+					/* XXX: release mem barrier? */
+					atomic_subtract_int(
+					    &vm_cnt.v_wire_count, 1);
+					vm_page_free_zero(m);
+					return (NULL);
+				}
+			} else {
+				l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+				l2pg->wire_count++;
+			}
 		}
 
 		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
@@ -1281,8 +1510,9 @@ static vm_page_t
 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
-	pd_entry_t *l2;
+	pd_entry_t *pde, tpde;
 	vm_page_t m;
+	int lvl;
 
 	/*
 	 * Calculate pagetable page index
@@ -1292,24 +1522,29 @@ retry:
 	/*
 	 * Get the page directory entry
 	 */
-	l2 = pmap_l2(pmap, va);
+	pde = pmap_pde(pmap, va, &lvl);
 
 	/*
-	 * If the page table page is mapped, we just increment the
-	 * hold count, and activate it.
+	 * If the page table page is mapped, we just increment the hold count,
+	 * and activate it. If we get a level 2 pde it will point to a level 3
+	 * table.
 	 */
-	if (l2 != NULL && pmap_load(l2) != 0) {
-		m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
-		m->wire_count++;
-	} else {
-		/*
-		 * Here if the pte page isn't mapped, or if it has been
-		 * deallocated.
-		 */
-		m = _pmap_alloc_l3(pmap, ptepindex, lockp);
-		if (m == NULL && lockp != NULL)
-			goto retry;
+	if (lvl == 2) {
+		tpde = pmap_load(pde);
+		if (tpde != 0) {
+			m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
+			m->wire_count++;
+			return (m);
+		}
 	}
+
+	/*
+	 * Here if the pte page isn't mapped, or if it has been deallocated.
+	 */
+	m = _pmap_alloc_l3(pmap, ptepindex, lockp);
+	if (m == NULL && lockp != NULL)
+		goto retry;
+
 	return (m);
 }
 
@@ -1332,7 +1567,7 @@ pmap_release(pmap_t pmap)
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 
-	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
+	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
 
 	m->wire_count--;
 	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
@@ -1369,7 +1604,7 @@ pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
-	pd_entry_t *l1, *l2;
+	pd_entry_t *l0, *l1, *l2;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
@@ -1377,7 +1612,11 @@ pmap_growkernel(vm_offset_t addr)
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
-		l1 = pmap_l1(kernel_pmap, kernel_vm_end);
+		l0 = pmap_l0(kernel_pmap, kernel_vm_end);
+		KASSERT(pmap_load(l0) != 0,
+		    ("pmap_growkernel: No level 0 kernel entry"));
+
+		l1 = pmap_l0_to_l1(l0, kernel_vm_end);
 		if (pmap_load(l1) == 0) {
 			/* We need a new PDP entry */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list