git: 42f722e721cd - main - amd64: store pcids pmap data in pcpu zone

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Tue, 02 May 2023 11:39:55 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=42f722e721cd010ae5759a4b0d3b7b93c2b9cad2

commit 42f722e721cd010ae5759a4b0d3b7b93c2b9cad2
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-05-01 05:17:45 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-05-02 11:32:47 +0000

    amd64: store pcids pmap data in pcpu zone
    
    This change eliminates the struct pmap_pcid array embedded into struct
    pmap and sized by MAXCPU, which would bloat with MAXCPU increase.  Also
    it removes false sharing of cache lines, since the array elements are
    mostly locally accessed by corresponding CPUs.
    
    Suggested by:   mjg
    Reviewed by:    markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    Differential revision:  https://reviews.freebsd.org/D39890
---
 sys/amd64/amd64/pmap.c   | 38 +++++++++++++++++++++++++++++---------
 sys/amd64/include/pcpu.h |  4 +++-
 sys/amd64/include/pmap.h |  8 ++++++--
 3 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a7716d7f394e..a09920d62961 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -384,7 +384,15 @@ pmap_pku_mask_bit(pmap_t pmap)
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
-struct pmap kernel_pmap_store;
+/*
+ * Statically allocate kernel pmap memory.  However, memory for
+ * pm_pcids is obtained after the dynamic allocator is operational.
+ * Initialize it with a non-canonical pointer to catch early accesses
+ * regardless of the active mapping.
+ */
+struct pmap kernel_pmap_store = {
+	.pm_pcidp = (void *)0xdeadbeefdeadbeef,
+};
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
@@ -2026,9 +2034,14 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 
 	/* Initialize TLB Context Id. */
 	if (pmap_pcid_enabled) {
+		kernel_pmap->pm_pcidp = (void *)(uintptr_t)
+		    offsetof(struct pcpu, pc_kpmap_store);
 		for (i = 0; i < MAXCPU; i++) {
-			kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
-			kernel_pmap->pm_pcids[i].pm_gen = 1;
+			struct pmap_pcid *pcidp;
+
+			pcidp = zpcpu_get_cpu(kernel_pmap->pm_pcidp, i);
+			pcidp->pm_pcid = PMAP_PCID_KERN;
+			pcidp->pm_gen = 1;
 		}
 
 		/*
@@ -3031,6 +3044,7 @@ pmap_invalidate_ept(pmap_t pmap)
 static inline void
 pmap_invalidate_preipi_pcid(pmap_t pmap)
 {
+	struct pmap_pcid *pcidp;
 	u_int cpuid, i;
 
 	sched_pin();
@@ -3040,8 +3054,10 @@ pmap_invalidate_preipi_pcid(pmap_t pmap)
 		cpuid = 0xffffffff;	/* An impossible value */
 
 	CPU_FOREACH(i) {
-		if (cpuid != i)
-			pmap->pm_pcids[i].pm_gen = 0;
+		if (cpuid != i) {
+			pcidp = zpcpu_get_cpu(pmap->pm_pcidp, i);
+			pcidp->pm_gen = 0;
+		}
 	}
 
 	/*
@@ -4192,7 +4208,7 @@ pmap_pinit_pcids(pmap_t pmap, uint32_t pcid, int gen)
 	int i;
 
 	CPU_FOREACH(i) {
-		pcidp = &pmap->pm_pcids[i];
+		pcidp = zpcpu_get_cpu(pmap->pm_pcidp, i);
 		pcidp->pm_pcid = pcid;
 		pcidp->pm_gen = gen;
 	}
@@ -4215,6 +4231,7 @@ pmap_pinit0(pmap_t pmap)
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	pmap->pm_flags = pmap_flags;
+	pmap->pm_pcidp = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK);
 	pmap_pinit_pcids(pmap, PMAP_PCID_KERN + 1, 1);
 	pmap_activate_boot(pmap);
 	td = curthread;
@@ -4398,6 +4415,9 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
 	pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys);
 
 	if (pmap_pcid_enabled) {
+		if (pmap->pm_pcidp == NULL)
+			pmap->pm_pcidp = uma_zalloc_pcpu(pcpu_zone_8,
+			    M_WAITOK);
 		pmap_pinit_pcids(pmap, PMAP_PCID_NONE, 0);
 	}
 	pmap->pm_cr3 = PMAP_NO_CR3;	/* initialize to an invalid value */
@@ -9993,11 +10013,11 @@ pmap_activate_sw_pcid_pti(struct thread *td, pmap_t pmap, u_int cpuid)
 		PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK);
 		old_pmap = PCPU_GET(curpmap);
 		MPASS(old_pmap->pm_ucr3 != PMAP_NO_CR3);
-		old_pcidp = &old_pmap->pm_pcids[cpuid];
+		old_pcidp = zpcpu_get_cpu(old_pmap->pm_pcidp, cpuid);
 		old_pcidp->pm_gen = 0;
 	}
 
-	pcidp = &pmap->pm_pcids[cpuid];
+	pcidp = zpcpu_get_cpu(pmap->pm_pcidp, cpuid);
 	cached = pmap_pcid_alloc_checked(pmap, pcidp);
 	cr3 = rcr3();
 	if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
@@ -10027,7 +10047,7 @@ pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap,
 	KASSERT((read_rflags() & PSL_I) == 0,
 	    ("PCID needs interrupts disabled in pmap_activate_sw()"));
 
-	pcidp = &pmap->pm_pcids[cpuid];
+	pcidp = zpcpu_get_cpu(pmap->pm_pcidp, cpuid);
 	cached = pmap_pcid_alloc_checked(pmap, pcidp);
 	cr3 = rcr3();
 	if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3)
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index c0c35f4419e8..342bdde312be 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -35,6 +35,7 @@
 #ifndef _MACHINE_PCPU_H_
 #define	_MACHINE_PCPU_H_
 
+#include <machine/_pmap.h>
 #include <machine/segments.h>
 #include <machine/tss.h>
 
@@ -101,7 +102,8 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
 	uint64_t pc_ucr3_load_mask;					\
 	u_int	pc_small_core;						\
 	u_int	pc_pcid_invlpg_workaround;				\
-	char	__pad[2908]		/* pad to UMA_PCPU_ALLOC_SIZE */
+	struct pmap_pcid pc_kpmap_store;				\
+	char	__pad[2900]		/* pad to UMA_PCPU_ALLOC_SIZE */
 
 #define	PC_DBREG_CMD_NONE	0
 #define	PC_DBREG_CMD_LOAD	1
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index cef718fb50d5..1b3aeb248ebb 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -286,6 +286,7 @@
 
 #ifndef LOCORE
 
+#include <sys/kassert.h>
 #include <sys/queue.h>
 #include <sys/_cpuset.h>
 #include <sys/_lock.h>
@@ -390,7 +391,7 @@ struct pmap {
 	long			pm_eptgen;	/* EPT pmap generation id */
 	smr_t			pm_eptsmr;
 	int			pm_flags;
-	struct pmap_pcid	pm_pcids[MAXCPU];
+	struct pmap_pcid	*pm_pcidp;
 	struct rangeset		pm_pkru;
 };
 
@@ -537,8 +538,11 @@ pmap_invlpg(pmap_t pmap, vm_offset_t va)
 static __inline uint32_t
 pmap_get_pcid(pmap_t pmap)
 {
+	struct pmap_pcid *pcidp;
+
 	MPASS(pmap_pcid_enabled);
-	return (pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid);
+	pcidp = zpcpu_get(pmap->pm_pcidp);
+	return (pcidp->pm_pcid);
 }
 
 #endif /* _KERNEL */