svn commit: r354286 - in head/sys/arm64: arm64 include

Alan Cox alc at FreeBSD.org
Sun Nov 3 17:45:34 UTC 2019


Author: alc
Date: Sun Nov  3 17:45:30 2019
New Revision: 354286
URL: https://svnweb.freebsd.org/changeset/base/354286

Log:
  Utilize ASIDs to reduce both the direct and indirect costs of context
  switching.  The indirect costs being unnecessary TLB misses that are
  incurred when ASIDs are not used.  In fact, currently, when we perform a
  context switch on one processor, we issue a broadcast TLB invalidation that
  flushes the TLB contents on every processor.
  
  Mark all user-space ("ttbr0") page table entries with the non-global flag so
  that they are cached in the TLB under their ASID.
  
  Correct an error in pmap_pinit0().  The pointer to the root of the page
  table was being initialized to the root of the kernel-space page table
  rather than a user-space page table.  However, the root of the page table
  that was being cached in process 0's md_l0addr field correctly pointed to a
  user-space page table.  As long as ASIDs weren't being used, this was
  harmless, except that it led to some unnecessary page table switches in
  pmap_switch().  Specifically, other kernel processes besides process 0 would
  have their md_l0addr field set to the root of the kernel-space page table,
  and so pmap_switch() would actually change page tables when switching
  between process 0 and other kernel processes.
  
  Implement a workaround for Cavium erratum 27456 affecting ThunderX machines.
  (I would like to thank andrew@ for providing the code to detect the affected
  machines.)
  
  Address integer overflow in the definition of TCR_ASID_16.
  
  Setup TCR according to the PARange and ASIDBits fields from
  ID_AA64MMFR0_EL1.  Previously, TCR_ASID_16 was unconditionally set.
  
  Modify build_l1_block_pagetable so that lower attributes, such as ATTR_nG,
  can be specified as a parameter.
  
  Eliminate some unused code.
  
  Earlier versions were tested to varying degrees by: andrew, emaste, markj
  
  MFC after:	3 weeks
  Differential Revision:	https://reviews.freebsd.org/D21922

Modified:
  head/sys/arm64/arm64/cpu_errata.c
  head/sys/arm64/arm64/cpufunc_asm.S
  head/sys/arm64/arm64/efirt_machdep.c
  head/sys/arm64/arm64/genassym.c
  head/sys/arm64/arm64/locore.S
  head/sys/arm64/arm64/machdep.c
  head/sys/arm64/arm64/mp_machdep.c
  head/sys/arm64/arm64/pmap.c
  head/sys/arm64/arm64/vm_machdep.c
  head/sys/arm64/include/armreg.h
  head/sys/arm64/include/cpufunc.h
  head/sys/arm64/include/pcpu.h
  head/sys/arm64/include/pmap.h
  head/sys/arm64/include/proc.h

Modified: head/sys/arm64/arm64/cpu_errata.c
==============================================================================
--- head/sys/arm64/arm64/cpu_errata.c	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/cpu_errata.c	Sun Nov  3 17:45:30 2019	(r354286)
@@ -59,6 +59,7 @@ static enum {
 
 static cpu_quirk_install install_psci_bp_hardening;
 static cpu_quirk_install install_ssbd_workaround;
+static cpu_quirk_install install_thunderx_bcast_tlbi_workaround;
 
 static struct cpu_quirks cpu_quirks[] = {
 	{
@@ -92,6 +93,18 @@ static struct cpu_quirks cpu_quirks[] = {
 		.midr_value = 0,
 		.quirk_install = install_ssbd_workaround,
 	},
+	{
+		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+		.midr_value =
+		    CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0),
+		.quirk_install = install_thunderx_bcast_tlbi_workaround,
+	},
+	{
+		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+		.midr_value =
+		    CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX_81XX, 0, 0),
+		.quirk_install = install_thunderx_bcast_tlbi_workaround,
+	},
 };
 
 static void
@@ -135,6 +148,30 @@ install_ssbd_workaround(void)
 	default:
 		PCPU_SET(ssbd, smccc_arch_workaround_2);
 		break;
+	}
+}
+
+/*
+ * Workaround Cavium erratum 27456.
+ *
+ * Invalidate the local icache when changing address spaces.
+ */
+static void
+install_thunderx_bcast_tlbi_workaround(void)
+{
+	u_int midr;
+
+	midr = get_midr();
+	if (CPU_PART(midr) == CPU_PART_THUNDERX_81XX)
+		PCPU_SET(bcast_tlbi_workaround, 1);
+	else if (CPU_PART(midr) == CPU_PART_THUNDERX) {
+		if (CPU_VAR(midr) == 0) {
+			/* ThunderX 1.x */
+			PCPU_SET(bcast_tlbi_workaround, 1);
+		} else if (CPU_VAR(midr) == 1 && CPU_REV(midr) <= 1) {
+			/* ThunderX 2.0 - 2.1 */
+			PCPU_SET(bcast_tlbi_workaround, 1);
+		}
 	}
 }
 

Modified: head/sys/arm64/arm64/cpufunc_asm.S
==============================================================================
--- head/sys/arm64/arm64/cpufunc_asm.S	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/cpufunc_asm.S	Sun Nov  3 17:45:30 2019	(r354286)
@@ -93,14 +93,6 @@ END(arm64_nullop)
  * Generic functions to read/modify/write the internal coprocessor registers
  */
 
-ENTRY(arm64_setttb)
-	dsb	ish
-	msr	ttbr0_el1, x0
-	dsb	ish
-	isb
-	ret
-END(arm64_setttb)
-
 ENTRY(arm64_tlb_flushID)
 	dsb	ishst
 #ifdef SMP

Modified: head/sys/arm64/arm64/efirt_machdep.c
==============================================================================
--- head/sys/arm64/arm64/efirt_machdep.c	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/efirt_machdep.c	Sun Nov  3 17:45:30 2019	(r354286)
@@ -62,9 +62,9 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_pager.h>
 
 static vm_object_t obj_1t1_pt;
-static vm_page_t efi_l0_page;
-static pd_entry_t *efi_l0;
 static vm_pindex_t efi_1t1_idx;
+static pd_entry_t *efi_l0;
+static uint64_t efi_ttbr0;
 
 void
 efi_destroy_1t1_map(void)
@@ -81,8 +81,9 @@ efi_destroy_1t1_map(void)
 	}
 
 	obj_1t1_pt = NULL;
+	efi_1t1_idx = 0;
 	efi_l0 = NULL;
-	efi_l0_page = NULL;
+	efi_ttbr0 = 0;
 }
 
 static vm_page_t
@@ -164,6 +165,7 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int 
 	struct efi_md *p;
 	pt_entry_t *l3, l3_attr;
 	vm_offset_t va;
+	vm_page_t efi_l0_page;
 	uint64_t idx;
 	int i, mode;
 
@@ -172,10 +174,11 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int 
 	    L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES * Ln_ENTRIES,
 	    VM_PROT_ALL, 0, NULL);
 	VM_OBJECT_WLOCK(obj_1t1_pt);
-	efi_1t1_idx = 0;
 	efi_l0_page = efi_1t1_page();
 	VM_OBJECT_WUNLOCK(obj_1t1_pt);
 	efi_l0 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_l0_page));
+	efi_ttbr0 = ASID_TO_OPERAND(ASID_RESERVED_FOR_EFI) |
+	    VM_PAGE_TO_PHYS(efi_l0_page);
 
 	for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p,
 	    descsz)) {
@@ -213,7 +216,7 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int 
 		printf("MAP %lx mode %x pages %lu\n", p->md_phys, mode, p->md_pages);
 
 		l3_attr = ATTR_DEFAULT | ATTR_IDX(mode) | ATTR_AP(ATTR_AP_RW) |
-		    L3_PAGE;
+		    ATTR_nG | L3_PAGE;
 		if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP)
 			l3_attr |= ATTR_UXN | ATTR_PXN;
 
@@ -236,42 +239,37 @@ int
 efi_arch_enter(void)
 {
 
-	__asm __volatile(
-	    "msr ttbr0_el1, %0	\n"
-	    "isb		\n"
-	    "dsb  ishst		\n"
-	    "tlbi vmalle1is	\n"
-	    "dsb  ish		\n"
-	    "isb		\n"
-	     : : "r"(VM_PAGE_TO_PHYS(efi_l0_page)));
+	CRITICAL_ASSERT(curthread);
 
+	/*
+	 * Temporarily switch to EFI's page table.  However, we leave curpmap
+	 * unchanged in order to prevent its ASID from being reclaimed before
+	 * we switch back to its page table in efi_arch_leave().
+	 */
+	set_ttbr0(efi_ttbr0);
+	if (PCPU_GET(bcast_tlbi_workaround) != 0)
+		invalidate_local_icache();
+
 	return (0);
 }
 
 void
 efi_arch_leave(void)
 {
-	struct thread *td;
 
 	/*
 	 * Restore the pcpu pointer. Some UEFI implementations trash it and
 	 * we don't store it before calling into them. To fix this we need
 	 * to restore it after returning to the kernel context. As reading
-	 * curthread will access x18 we need to restore it before loading
-	 * the thread pointer.
+	 * curpmap will access x18 we need to restore it before loading
+	 * the pmap pointer.
 	 */
 	__asm __volatile(
 	    "mrs x18, tpidr_el1	\n"
 	);
-	td = curthread;
-	__asm __volatile(
-	    "msr ttbr0_el1, %0	\n"
-	    "isb		\n"
-	    "dsb  ishst		\n"
-	    "tlbi vmalle1is	\n"
-	    "dsb  ish		\n"
-	    "isb		\n"
-	     : : "r"(td->td_proc->p_md.md_l0addr));
+	set_ttbr0(pmap_to_ttbr0(PCPU_GET(curpmap)));
+	if (PCPU_GET(bcast_tlbi_workaround) != 0)
+		invalidate_local_icache();
 }
 
 int

Modified: head/sys/arm64/arm64/genassym.c
==============================================================================
--- head/sys/arm64/arm64/genassym.c	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/genassym.c	Sun Nov  3 17:45:30 2019	(r354286)
@@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/frame.h>
 #include <machine/pcb.h>
-#include <machine/vmparam.h>
 
 ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
 ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
@@ -53,9 +52,6 @@ ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp));
 ASSYM(PCB_TPIDRRO, offsetof(struct pcb, pcb_tpidrro_el0));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
-
-ASSYM(P_MD, offsetof(struct proc, p_md));
-ASSYM(MD_L0ADDR, offsetof(struct mdproc, md_l0addr));
 
 ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
 

Modified: head/sys/arm64/arm64/locore.S
==============================================================================
--- head/sys/arm64/arm64/locore.S	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/locore.S	Sun Nov  3 17:45:30 2019	(r354286)
@@ -392,14 +392,15 @@ create_pagetables:
 	bl	link_l0_pagetable
 
 	/*
-	 * Build the TTBR0 maps.
+	 * Build the TTBR0 maps.  As TTBR0 maps, they must specify ATTR_nG.
+	 * They are only needed early on, so the VA = PA map is uncached.
 	 */
 	add	x27, x24, #PAGE_SIZE
 
 	mov	x6, x27		/* The initial page table */
 #if defined(SOCDEV_PA) && defined(SOCDEV_VA)
 	/* Create a table for the UART */
-	mov	x7, #DEVICE_MEM
+	mov	x7, #(ATTR_nG | ATTR_IDX(DEVICE_MEM))
 	mov	x8, #(SOCDEV_VA)	/* VA start */
 	mov	x9, #(SOCDEV_PA)	/* PA start */
 	mov	x10, #1
@@ -407,7 +408,7 @@ create_pagetables:
 #endif
 
 	/* Create the VA = PA map */
-	mov	x7, #NORMAL_UNCACHED /* Uncached as it's only needed early on */
+	mov	x7, #(ATTR_nG | ATTR_IDX(NORMAL_UNCACHED))
 	mov	x9, x27
 	mov	x8, x9		/* VA start (== PA start) */
 	mov	x10, #1
@@ -497,7 +498,7 @@ link_l1_pagetable:
 /*
  * Builds count 1 GiB page table entry
  *  x6  = L1 table
- *  x7  = Type (0 = Device, 1 = Normal)
+ *  x7  = Variable lower block attributes
  *  x8  = VA start
  *  x9  = PA start (trashed)
  *  x10 = Entry count
@@ -512,8 +513,7 @@ build_l1_block_pagetable:
 	and	x11, x11, #Ln_ADDR_MASK
 
 	/* Build the L1 block entry */
-	lsl	x12, x7, #2
-	orr	x12, x12, #L1_BLOCK
+	orr	x12, x7, #L1_BLOCK
 	orr	x12, x12, #(ATTR_AF)
 #ifdef SMP
 	orr	x12, x12, ATTR_SH(ATTR_SH_IS)
@@ -599,11 +599,17 @@ start_mmu:
 	msr	mair_el1, x2
 
 	/*
-	 * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1.
+	 * Setup TCR according to the PARange and ASIDBits fields
+	 * from ID_AA64MMFR0_EL1.  More precisely, set TCR_EL1.AS
+	 * to 1 only if the ASIDBits field equals 0b0010.
 	 */
 	ldr	x2, tcr
 	mrs	x3, id_aa64mmfr0_el1
 	bfi	x2, x3, #32, #3
+	and	x3, x3, #0xF0
+	cmp	x3, #0x20
+	cset	x3, eq
+	bfi	x2, x3, #36, #1
 	msr	tcr_el1, x2
 
 	/* Setup SCTLR */
@@ -624,7 +630,7 @@ mair:
 		MAIR_ATTR(MAIR_NORMAL_WB, 2) |		\
 		MAIR_ATTR(MAIR_NORMAL_WT, 3)
 tcr:
-	.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \
+	.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | \
 	    TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
 sctlr_set:
 	/* Bits to set */

Modified: head/sys/arm64/arm64/machdep.c
==============================================================================
--- head/sys/arm64/arm64/machdep.c	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/machdep.c	Sun Nov  3 17:45:30 2019	(r354286)
@@ -797,9 +797,6 @@ init_proc0(vm_offset_t kstack)
 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
-
-	/* Set the base address of translation table 0. */
-	thread0.td_proc->p_md.md_l0addr = READ_SPECIALREG(ttbr0_el1);
 }
 
 typedef struct {

Modified: head/sys/arm64/arm64/mp_machdep.c
==============================================================================
--- head/sys/arm64/arm64/mp_machdep.c	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/mp_machdep.c	Sun Nov  3 17:45:30 2019	(r354286)
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
+#include <vm/vm_map.h>
 
 #include <machine/machdep.h>
 #include <machine/debug_monitor.h>
@@ -192,6 +193,7 @@ void
 init_secondary(uint64_t cpu)
 {
 	struct pcpu *pcpup;
+	pmap_t pmap0;
 
 	pcpup = &__pcpu[cpu];
 	/*
@@ -210,6 +212,12 @@ init_secondary(uint64_t cpu)
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pcpup->pc_curthread = pcpup->pc_idlethread;
 	pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
+
+	/* Initialize curpmap to match TTBR0's current setting. */
+	pmap0 = vmspace_pmap(&vmspace0);
+	KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1),
+	    ("pmap0 doesn't match cpu %ld's ttbr0", cpu));
+	pcpup->pc_curpmap = pmap0;
 
 	/*
 	 * Identify current CPU. This is necessary to setup

Modified: head/sys/arm64/arm64/pmap.c
==============================================================================
--- head/sys/arm64/arm64/pmap.c	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/pmap.c	Sun Nov  3 17:45:30 2019	(r354286)
@@ -113,6 +113,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
@@ -276,6 +277,48 @@ static u_int physmap_idx;
 
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
 
+/*
+ * This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs
+ * that it has currently allocated to a pmap, a cursor ("asid_next") to
+ * optimize its search for a free ASID in the bit vector, and an epoch number
+ * ("asid_epoch") to indicate when it has reclaimed all previously allocated
+ * ASIDs that are not currently active on a processor.
+ *
+ * The current epoch number is always in the range [0, INT_MAX).  Negative
+ * numbers and INT_MAX are reserved for special cases that are described
+ * below.
+ */
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, asid, CTLFLAG_RD, 0, "ASID allocator");
+static int asid_bits;
+SYSCTL_INT(_vm_pmap_asid, OID_AUTO, bits, CTLFLAG_RD, &asid_bits, 0,
+    "The number of bits in an ASID");
+static bitstr_t *asid_set;
+static int asid_set_size;
+static int asid_next;
+SYSCTL_INT(_vm_pmap_asid, OID_AUTO, next, CTLFLAG_RD, &asid_next, 0,
+    "The last allocated ASID plus one");
+static int asid_epoch;
+SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asid_epoch, 0,
+    "The current epoch number");
+static struct mtx asid_set_mutex;
+
+/*
+ * A pmap's cookie encodes an ASID and epoch number.  Cookies for reserved
+ * ASIDs have a negative epoch number, specifically, INT_MIN.  Cookies for
+ * dynamically allocated ASIDs have a non-negative epoch number.
+ *
+ * An invalid ASID is represented by -1.
+ *
+ * There are two special-case cookie values: (1) COOKIE_FROM(-1, INT_MIN),
+ * which indicates that an ASID should never be allocated to the pmap, and
+ * (2) COOKIE_FROM(-1, INT_MAX), which indicates that an ASID should be
+ * allocated when the pmap is next activated.
+ */
+#define	COOKIE_FROM(asid, epoch)	((long)((u_int)(asid) |	\
+					    ((u_long)(epoch) << 32)))
+#define	COOKIE_TO_ASID(cookie)		((int)(cookie))
+#define	COOKIE_TO_EPOCH(cookie)		((int)((u_long)(cookie) >> 32))
+
 static int superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
@@ -295,6 +338,8 @@ static void	pmap_pvh_free(struct md_page *pvh, pmap_t 
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 
+static bool pmap_activate_int(pmap_t pmap);
+static void pmap_alloc_asid(pmap_t pmap);
 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
@@ -308,6 +353,7 @@ static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2,
     pd_entry_t l1e, struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
+static void pmap_reset_asid_set(void);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m, struct rwlock **lockp);
 
@@ -786,6 +832,10 @@ pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_
 	uint64_t kern_delta;
 	int i;
 
+	/* Verify that the ASID is set through TTBR0. */
+	KASSERT((READ_SPECIALREG(tcr_el1) & TCR_A1) == 0,
+	    ("pmap_bootstrap: TCR_EL1.A1 != 0"));
+
 	kern_delta = KERNBASE - kernstart;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
@@ -795,6 +845,8 @@ pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_
 	/* Set this early so we can use the pagetable walking functions */
 	kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
 	PMAP_LOCK_INIT(kernel_pmap);
+	kernel_pmap->pm_l0_paddr = l0pt - kern_delta;
+	kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN);
 
 	/* Assume the address we were loaded to is a valid physical address */
 	min_pa = KERNBASE - kern_delta;
@@ -908,6 +960,11 @@ pmap_init(void)
 	int i, pv_npg;
 
 	/*
+	 * Determine whether an ASID is 8 or 16 bits in size.
+	 */
+	asid_bits = (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8;
+
+	/*
 	 * Are large page mappings enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
@@ -918,6 +975,18 @@ pmap_init(void)
 	}
 
 	/*
+	 * Initialize the ASID allocator.  At this point, we are still too
+	 * early in the overall initialization process to use bit_alloc().
+	 */
+	asid_set_size = 1 << asid_bits;
+	asid_set = (bitstr_t *)kmem_malloc(bitstr_size(asid_set_size),
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < ASID_FIRST_AVAILABLE; i++)
+		bit_set(asid_set, i);
+	asid_next = ASID_FIRST_AVAILABLE;
+	mtx_init(&asid_set_mutex, "asid set", NULL, MTX_SPIN);
+
+	/*
 	 * Initialize the pv chunk list mutex.
 	 */
 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
@@ -971,30 +1040,42 @@ SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLA
 static __inline void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
+	uint64_t r;
 
 	sched_pin();
-	__asm __volatile(
-	    "dsb  ishst		\n"
-	    "tlbi vaae1is, %0	\n"
-	    "dsb  ish		\n"
-	    "isb		\n"
-	    : : "r"(va >> PAGE_SHIFT));
+	dsb(ishst);
+	if (pmap == kernel_pmap) {
+		r = atop(va);
+		__asm __volatile("tlbi vaae1is, %0" : : "r" (r));
+	} else {
+		r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | atop(va);
+		__asm __volatile("tlbi vae1is, %0" : : "r" (r));
+	}
+	dsb(ish);
+	isb();
 	sched_unpin();
 }
 
 static __inline void
 pmap_invalidate_range_nopin(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	vm_offset_t addr;
+	uint64_t end, r, start;
 
 	dsb(ishst);
-	for (addr = sva; addr < eva; addr += PAGE_SIZE) {
-		__asm __volatile(
-		    "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT));
+	if (pmap == kernel_pmap) {
+		start = atop(sva);
+		end = atop(eva);
+		for (r = start; r < end; r++)
+			__asm __volatile("tlbi vaae1is, %0" : : "r" (r));
+	} else {
+		start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
+		start |= atop(sva);
+		end |= atop(eva);
+		for (r = start; r < end; r++)
+			__asm __volatile("tlbi vae1is, %0" : : "r" (r));
 	}
-	__asm __volatile(
-	    "dsb  ish	\n"
-	    "isb	\n");
+	dsb(ish);
+	isb();
 }
 
 static __inline void
@@ -1009,13 +1090,18 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm
 static __inline void
 pmap_invalidate_all(pmap_t pmap)
 {
+	uint64_t r;
 
 	sched_pin();
-	__asm __volatile(
-	    "dsb  ishst		\n"
-	    "tlbi vmalle1is	\n"
-	    "dsb  ish		\n"
-	    "isb		\n");
+	dsb(ishst);
+	if (pmap == kernel_pmap) {
+		__asm __volatile("tlbi vmalle1is");
+	} else {
+		r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
+		__asm __volatile("tlbi aside1is, %0" : : "r" (r));
+	}
+	dsb(ish);
+	isb();
 	sched_unpin();
 }
 
@@ -1446,14 +1532,17 @@ pmap_pinit0(pmap_t pmap)
 
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
-	pmap->pm_l0 = kernel_pmap->pm_l0;
+	pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1);
+	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
 	pmap->pm_root.rt_root = 0;
+	pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN);
+
+	PCPU_SET(curpmap, pmap);
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
-	vm_paddr_t l0phys;
 	vm_page_t l0pt;
 
 	/*
@@ -1463,14 +1552,15 @@ pmap_pinit(pmap_t pmap)
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		vm_wait(NULL);
 
-	l0phys = VM_PAGE_TO_PHYS(l0pt);
-	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
+	pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(l0pt);
+	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
 
 	if ((l0pt->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_l0);
 
 	pmap->pm_root.rt_root = 0;
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
+	pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
 
 	return (1);
 }
@@ -1712,6 +1802,7 @@ void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m;
+	int asid;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
@@ -1719,8 +1810,16 @@ pmap_release(pmap_t pmap)
 	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_release: pmap has reserved page table page(s)"));
 
-	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
+	mtx_lock_spin(&asid_set_mutex);
+	if (COOKIE_TO_EPOCH(pmap->pm_cookie) == asid_epoch) {
+		asid = COOKIE_TO_ASID(pmap->pm_cookie);
+		KASSERT(asid >= ASID_FIRST_AVAILABLE && asid < asid_set_size,
+		    ("pmap_release: pmap cookie has out-of-range asid"));
+		bit_clear(asid_set, asid);
+	}
+	mtx_unlock_spin(&asid_set_mutex);
 
+	m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
 	vm_page_unwire_noq(m);
 	vm_page_free_zero(m);
 }
@@ -3198,6 +3297,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 		new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN;
 	else
 		new_l3 |= ATTR_UXN;
+	if (pmap != kernel_pmap)
+		new_l3 |= ATTR_nG;
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		new_l3 |= ATTR_SW_MANAGED;
 		if ((prot & VM_PROT_WRITE) != 0) {
@@ -3462,6 +3563,8 @@ pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page
 		new_l2 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN;
 	else
 		new_l2 |= ATTR_UXN;
+	if (pmap != kernel_pmap)
+		new_l2 |= ATTR_nG;
 	return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP |
 	    PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
 	    KERN_SUCCESS);
@@ -3762,6 +3865,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, v
 		l3_val |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN;
 	else
 		l3_val |= ATTR_UXN;
+	if (pmap != kernel_pmap)
+		l3_val |= ATTR_nG;
 
 	/*
 	 * Now validate mapping with RO protection
@@ -4299,6 +4404,8 @@ pmap_remove_pages(pmap_t pmap)
 	int allfree, field, freed, idx, lvl;
 	vm_paddr_t pa;
 
+	KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap));
+
 	lock = NULL;
 
 	SLIST_INIT(&free);
@@ -5671,24 +5778,134 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t
 	return (val);
 }
 
+/*
+ * Garbage collect every ASID that is neither active on a processor nor
+ * reserved.
+ */
+static void
+pmap_reset_asid_set(void)
+{
+	pmap_t pmap;
+	int asid, cpuid, epoch;
+
+	mtx_assert(&asid_set_mutex, MA_OWNED);
+
+	/*
+	 * Ensure that the store to asid_epoch is globally visible before the
+	 * loads from pc_curpmap are performed.
+	 */
+	epoch = asid_epoch + 1;
+	if (epoch == INT_MAX)
+		epoch = 0;
+	asid_epoch = epoch;
+	dsb(ishst);
+	__asm __volatile("tlbi vmalle1is");
+	dsb(ish);
+	bit_nclear(asid_set, ASID_FIRST_AVAILABLE, asid_set_size - 1);
+	CPU_FOREACH(cpuid) {
+		if (cpuid == curcpu)
+			continue;
+		pmap = pcpu_find(cpuid)->pc_curpmap;
+		asid = COOKIE_TO_ASID(pmap->pm_cookie);
+		if (asid == -1)
+			continue;
+		bit_set(asid_set, asid);
+		pmap->pm_cookie = COOKIE_FROM(asid, epoch);
+	}
+}
+
+/*
+ * Allocate a new ASID for the specified pmap.
+ */
+static void
+pmap_alloc_asid(pmap_t pmap)
+{
+	int new_asid;
+
+	mtx_lock_spin(&asid_set_mutex);
+
+	/*
+	 * While this processor was waiting to acquire the asid set mutex,
+	 * pmap_reset_asid_set() running on another processor might have
+	 * updated this pmap's cookie to the current epoch.  In which case, we
+	 * don't need to allocate a new ASID.
+	 */
+	if (COOKIE_TO_EPOCH(pmap->pm_cookie) == asid_epoch)
+		goto out;
+
+	bit_ffc_at(asid_set, asid_next, asid_set_size, &new_asid);
+	if (new_asid == -1) {
+		bit_ffc_at(asid_set, ASID_FIRST_AVAILABLE, asid_next,
+		    &new_asid);
+		if (new_asid == -1) {
+			pmap_reset_asid_set();
+			bit_ffc_at(asid_set, ASID_FIRST_AVAILABLE,
+			    asid_set_size, &new_asid);
+			KASSERT(new_asid != -1, ("ASID allocation failure"));
+		}
+	}
+	bit_set(asid_set, new_asid);
+	asid_next = new_asid + 1;
+	pmap->pm_cookie = COOKIE_FROM(new_asid, asid_epoch);
+out:
+	mtx_unlock_spin(&asid_set_mutex);
+}
+
+/*
+ * Compute the value that should be stored in ttbr0 to activate the specified
+ * pmap.  This value may change from time to time.
+ */
+uint64_t
+pmap_to_ttbr0(pmap_t pmap)
+{
+
+	return (ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) |
+	    pmap->pm_l0_paddr);
+}
+
+static bool
+pmap_activate_int(pmap_t pmap)
+{
+	int epoch;
+
+	KASSERT(PCPU_GET(curpmap) != NULL, ("no active pmap"));
+	KASSERT(pmap != kernel_pmap, ("kernel pmap activation"));
+	if (pmap == PCPU_GET(curpmap))
+		return (false);
+
+	/*
+	 * Ensure that the store to curpmap is globally visible before the
+	 * load from asid_epoch is performed.
+	 */
+	PCPU_SET(curpmap, pmap);
+	dsb(ish);
+	epoch = COOKIE_TO_EPOCH(pmap->pm_cookie);
+	if (epoch >= 0 && epoch != asid_epoch)
+		pmap_alloc_asid(pmap);
+
+	set_ttbr0(pmap_to_ttbr0(pmap));
+	if (PCPU_GET(bcast_tlbi_workaround) != 0)
+		invalidate_local_icache();
+	return (true);
+}
+
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap;
 
-	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
-	td->td_proc->p_md.md_l0addr = vtophys(pmap->pm_l0);
-	__asm __volatile(
-	    "msr ttbr0_el1, %0	\n"
-	    "isb		\n"
-	    : : "r"(td->td_proc->p_md.md_l0addr));
-	pmap_invalidate_all(pmap);
+	critical_enter();
+	(void)pmap_activate_int(pmap);
 	critical_exit();
 }
 
+/*
+ * To eliminate the unused parameter "old", we would have to add an instruction
+ * to cpu_switch().
+ */
 struct pcb *
-pmap_switch(struct thread *old, struct thread *new)
+pmap_switch(struct thread *old __unused, struct thread *new)
 {
 	pcpu_bp_harden bp_harden;
 	struct pcb *pcb;
@@ -5705,20 +5922,7 @@ pmap_switch(struct thread *old, struct thread *new)
 	 * to a user process.
 	 */
 
-	if (old == NULL ||
-	    old->td_proc->p_md.md_l0addr != new->td_proc->p_md.md_l0addr) {
-		__asm __volatile(
-		    /* Switch to the new pmap */
-		    "msr	ttbr0_el1, %0	\n"
-		    "isb			\n"
-
-		    /* Invalidate the TLB */
-		    "dsb	ishst		\n"
-		    "tlbi	vmalle1is	\n"
-		    "dsb	ish		\n"
-		    "isb			\n"
-		    : : "r"(new->td_proc->p_md.md_l0addr));
-
+	if (pmap_activate_int(vmspace_pmap(new->td_proc->p_vmspace))) {
 		/*
 		 * Stop userspace from training the branch predictor against
 		 * other processes. This will call into a CPU specific

Modified: head/sys/arm64/arm64/vm_machdep.c
==============================================================================
--- head/sys/arm64/arm64/vm_machdep.c	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/arm64/vm_machdep.c	Sun Nov  3 17:45:30 2019	(r354286)
@@ -91,9 +91,6 @@ cpu_fork(struct thread *td1, struct proc *p2, struct t
 	td2->td_pcb = pcb2;
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
-	td2->td_proc->p_md.md_l0addr =
-	    vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0);
-
 	tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1);
 	bcopy(td1->td_frame, tf, sizeof(*tf));
 	tf->tf_x[0] = 0;

Modified: head/sys/arm64/include/armreg.h
==============================================================================
--- head/sys/arm64/include/armreg.h	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/include/armreg.h	Sun Nov  3 17:45:30 2019	(r354286)
@@ -619,7 +619,7 @@
 #define	PSR_FLAGS	0xf0000000
 
 /* TCR_EL1 - Translation Control Register */
-#define	TCR_ASID_16	(1 << 36)
+#define	TCR_ASID_16	(0x1UL << 36)
 
 #define	TCR_IPS_SHIFT	32
 #define	TCR_IPS_32BIT	(0 << TCR_IPS_SHIFT)
@@ -640,6 +640,8 @@
 #define	TCR_ORGN1_WBWA	(0x1UL << TCR_ORGN1_SHIFT)
 #define	TCR_IRGN1_SHIFT	24
 #define	TCR_IRGN1_WBWA	(0x1UL << TCR_IRGN1_SHIFT)
+#define	TCR_A1_SHIFT	22
+#define	TCR_A1		(0x1UL << TCR_A1_SHIFT)
 #define	TCR_SH0_SHIFT	12
 #define	TCR_SH0_IS	(0x3UL << TCR_SH0_SHIFT)
 #define	TCR_ORGN0_SHIFT	10

Modified: head/sys/arm64/include/cpufunc.h
==============================================================================
--- head/sys/arm64/include/cpufunc.h	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/include/cpufunc.h	Sun Nov  3 17:45:30 2019	(r354286)
@@ -178,6 +178,27 @@ clrex(void)
 	__asm __volatile("clrex" : : : "memory");
 }
 
+static __inline void
+set_ttbr0(uint64_t ttbr0)
+{
+
+	__asm __volatile(
+	    "msr ttbr0_el1, %0 \n"
+	    "isb               \n"
+	    :
+	    : "r" (ttbr0));
+}
+
+static __inline void
+invalidate_local_icache(void)
+{
+
+	__asm __volatile(
+	    "ic iallu          \n"
+	    "dsb nsh           \n"
+	    "isb               \n");
+}
+
 extern int64_t dcache_line_size;
 extern int64_t icache_line_size;
 extern int64_t idcache_line_size;
@@ -185,7 +206,6 @@ extern int64_t dczva_line_size;
 
 #define	cpu_nullop()			arm64_nullop()
 #define	cpufunc_nullop()		arm64_nullop()
-#define	cpu_setttb(a)			arm64_setttb(a)
 
 #define	cpu_tlb_flushID()		arm64_tlb_flushID()
 
@@ -198,7 +218,6 @@ extern int64_t dczva_line_size;
 #define cpu_icache_sync_range_checked(a, s) arm64_icache_sync_range_checked((a), (s))
 
 void arm64_nullop(void);
-void arm64_setttb(vm_offset_t);
 void arm64_tlb_flushID(void);
 void arm64_icache_sync_range(vm_offset_t, vm_size_t);
 int arm64_icache_sync_range_checked(vm_offset_t, vm_size_t);

Modified: head/sys/arm64/include/pcpu.h
==============================================================================
--- head/sys/arm64/include/pcpu.h	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/include/pcpu.h	Sun Nov  3 17:45:30 2019	(r354286)
@@ -40,12 +40,14 @@ typedef int (*pcpu_ssbd)(int);
 struct debug_monitor_state;
 
 #define	PCPU_MD_FIELDS							\
-	u_int	pc_acpi_id;	/* ACPI CPU id */		\
-	u_int	pc_midr;	/* stored MIDR value */	\
+	u_int	pc_acpi_id;	/* ACPI CPU id */			\
+	u_int	pc_midr;	/* stored MIDR value */			\
 	uint64_t pc_clock;						\
 	pcpu_bp_harden pc_bp_harden;					\
 	pcpu_ssbd pc_ssbd;						\
-	char __pad[225]
+	struct pmap *pc_curpmap;					\
+	u_int	pc_bcast_tlbi_workaround;				\
+	char __pad[213]
 
 #ifdef _KERNEL
 

Modified: head/sys/arm64/include/pmap.h
==============================================================================
--- head/sys/arm64/include/pmap.h	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/include/pmap.h	Sun Nov  3 17:45:30 2019	(r354286)
@@ -79,10 +79,12 @@ struct pv_addr {
 
 struct pmap {
 	struct mtx		pm_mtx;
-	struct pmap_statistics	pm_stats;	/* pmap statictics */
+	struct pmap_statistics	pm_stats;	/* pmap statistics */
+	vm_paddr_t		pm_l0_paddr;
 	pd_entry_t		*pm_l0;
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
 	struct vm_radix		pm_root;	/* spare page table pages */
+	long			pm_cookie;	/* encodes the pmap's ASID */
 };
 typedef struct pmap *pmap_t;
 
@@ -132,6 +134,15 @@ extern struct pmap	kernel_pmap_store;
 #define	PMAP_TRYLOCK(pmap)	mtx_trylock(&(pmap)->pm_mtx)
 #define	PMAP_UNLOCK(pmap)	mtx_unlock(&(pmap)->pm_mtx)
 
+#define	ASID_RESERVED_FOR_PID_0	0
+#define	ASID_RESERVED_FOR_EFI	1
+#define	ASID_FIRST_AVAILABLE	(ASID_RESERVED_FOR_EFI + 1)
+#define	ASID_TO_OPERAND_SHIFT	48
+#define	ASID_TO_OPERAND(asid)	({					\
+	KASSERT((asid) != -1, ("invalid ASID"));			\
+	(uint64_t)(asid) << ASID_TO_OPERAND_SHIFT;			\
+})
+
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
@@ -152,6 +163,7 @@ void	pmap_kremove_device(vm_offset_t, vm_size_t);
 void	*pmap_mapdev_attr(vm_offset_t pa, vm_size_t size, vm_memattr_t ma);
 bool	pmap_page_is_mapped(vm_page_t m);
 bool	pmap_ps_enabled(pmap_t pmap);
+uint64_t pmap_to_ttbr0(pmap_t pmap);
 
 void	*pmap_mapdev(vm_offset_t, vm_size_t);
 void	*pmap_mapbios(vm_paddr_t, vm_size_t);

Modified: head/sys/arm64/include/proc.h
==============================================================================
--- head/sys/arm64/include/proc.h	Sun Nov  3 15:42:08 2019	(r354285)
+++ head/sys/arm64/include/proc.h	Sun Nov  3 17:45:30 2019	(r354286)
@@ -40,7 +40,7 @@ struct mdthread {
 };
 
 struct mdproc {
-	vm_offset_t	md_l0addr;
+	long	md_dummy;
 };
 
 #define	KINFO_PROC_SIZE	1088


More information about the svn-src-head mailing list