git: ab12e8db292c - main - amd64: Reduce the amount of cpuset copying done for TLB shootdowns

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 15 Nov 2021 18:03:05 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=ab12e8db292c386a33445dcd95fa629413954192

commit ab12e8db292c386a33445dcd95fa629413954192
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2021-11-15 17:52:03 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2021-11-15 18:01:31 +0000

    amd64: Reduce the amount of cpuset copying done for TLB shootdowns
    
    We use pmap_invalidate_cpu_mask() to get the set of active CPUs.  This
    (32-byte) set is copied by value through multiple frames until we get to
    smp_targeted_tlb_shootdown(), where it is copied yet again.
    
    Avoid this copying by having smp_targeted_tlb_shootdown() make a local
    copy of the active CPUs for the pmap, and drop the cpuset parameter,
    simplifying callers.  Also leverage the use of the non-destructive
    CPU_FOREACH_ISSET to avoid unneeded copying within
    smp_targeted_tlb_shootdown().
    
    Reviewed by:    alc, kib
    Tested by:      pho
    MFC after:      1 month
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D32792
---
 sys/amd64/amd64/mp_machdep.c | 39 ++++++++++++++++++---------------------
 sys/amd64/amd64/pmap.c       |  8 +++-----
 sys/amd64/include/pmap.h     |  9 +++++++--
 sys/x86/include/x86_smp.h    | 15 ++++++++++++---
 4 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index fb8c2af271ed..5cd9d93dd0f1 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -613,10 +613,10 @@ invl_scoreboard_slot(u_int cpu)
  * completion.
  */
 static void
-smp_targeted_tlb_shootdown(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
-    vm_offset_t addr2, smp_invl_cb_t curcpu_cb, enum invl_op_codes op)
+smp_targeted_tlb_shootdown(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2,
+    smp_invl_cb_t curcpu_cb, enum invl_op_codes op)
 {
-	cpuset_t other_cpus;
+	cpuset_t mask;
 	uint32_t generation, *p_cpudone;
 	int cpu;
 	bool is_all;
@@ -631,10 +631,12 @@ smp_targeted_tlb_shootdown(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
 	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 
 	/*
-	 * Check for other cpus.  Return if none.
+	 * Make a stable copy of the set of CPUs on which the pmap is active.
+	 * See if we have to interrupt other CPUs.
 	 */
-	is_all = !CPU_CMP(&mask, &all_cpus);
-	CPU_CLR(PCPU_GET(cpuid), &mask);
+	CPU_COPY(pmap_invalidate_cpu_mask(pmap), &mask);
+	is_all = CPU_CMP(&mask, &all_cpus) == 0;
+	CPU_CLR(curcpu, &mask);
 	if (CPU_EMPTY(&mask))
 		goto local_cb;
 
@@ -663,7 +665,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
 	CPU_FOREACH_ISSET(cpu, &mask) {
 		KASSERT(*invl_scoreboard_slot(cpu) != 0,
 		    ("IPI scoreboard is zero, initiator %d target %d",
-		    PCPU_GET(cpuid), cpu));
+		    curcpu, cpu));
 		*invl_scoreboard_slot(cpu) = 0;
 	}
 
@@ -674,14 +676,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
 	 */
 	if (is_all) {
 		ipi_all_but_self(IPI_INVLOP);
-		other_cpus = all_cpus;
-		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 	} else {
-		other_cpus = mask;
 		ipi_selected(mask, IPI_INVLOP);
 	}
 	curcpu_cb(pmap, addr1, addr2);
-	CPU_FOREACH_ISSET(cpu, &other_cpus) {
+	CPU_FOREACH_ISSET(cpu, &mask) {
 		p_cpudone = invl_scoreboard_slot(cpu);
 		while (atomic_load_int(p_cpudone) != generation)
 			ia32_pause();
@@ -705,29 +704,28 @@ local_cb:
 }
 
 void
-smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
+smp_masked_invltlb(pmap_t pmap, smp_invl_cb_t curcpu_cb)
 {
-	smp_targeted_tlb_shootdown(mask, pmap, 0, 0, curcpu_cb, invl_op_tlb);
+	smp_targeted_tlb_shootdown(pmap, 0, 0, curcpu_cb, invl_op_tlb);
 #ifdef COUNT_XINVLTLB_HITS
 	ipi_global++;
 #endif
 }
 
 void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
-    smp_invl_cb_t curcpu_cb)
+smp_masked_invlpg(vm_offset_t addr, pmap_t pmap, smp_invl_cb_t curcpu_cb)
 {
-	smp_targeted_tlb_shootdown(mask, pmap, addr, 0, curcpu_cb, invl_op_pg);
+	smp_targeted_tlb_shootdown(pmap, addr, 0, curcpu_cb, invl_op_pg);
 #ifdef COUNT_XINVLTLB_HITS
 	ipi_page++;
 #endif
 }
 
 void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
-    pmap_t pmap, smp_invl_cb_t curcpu_cb)
+smp_masked_invlpg_range(vm_offset_t addr1, vm_offset_t addr2, pmap_t pmap,
+    smp_invl_cb_t curcpu_cb)
 {
-	smp_targeted_tlb_shootdown(mask, pmap, addr1, addr2, curcpu_cb,
+	smp_targeted_tlb_shootdown(pmap, addr1, addr2, curcpu_cb,
 	    invl_op_pgrng);
 #ifdef COUNT_XINVLTLB_HITS
 	ipi_range++;
@@ -738,8 +736,7 @@ smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
 void
 smp_cache_flush(smp_invl_cb_t curcpu_cb)
 {
-	smp_targeted_tlb_shootdown(all_cpus, NULL, 0, 0, curcpu_cb,
-	    INVL_OP_CACHE);
+	smp_targeted_tlb_shootdown(kernel_pmap, 0, 0, curcpu_cb, INVL_OP_CACHE);
 }
 
 /*
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f369ec360c83..e9973a420de3 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -3146,8 +3146,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 	    ("pmap_invalidate_page: invalid type %d", pmap->pm_type));
 
 	pmap_invalidate_preipi(pmap);
-	smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap,
-	    pmap_invalidate_page_curcpu_cb);
+	smp_masked_invlpg(va, pmap, pmap_invalidate_page_curcpu_cb);
 }
 
 /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */
@@ -3245,7 +3244,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 	    ("pmap_invalidate_range: invalid type %d", pmap->pm_type));
 
 	pmap_invalidate_preipi(pmap);
-	smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap,
+	smp_masked_invlpg_range(sva, eva, pmap,
 	    pmap_invalidate_range_curcpu_cb);
 }
 
@@ -3331,8 +3330,7 @@ pmap_invalidate_all(pmap_t pmap)
 	    ("pmap_invalidate_all: invalid type %d", pmap->pm_type));
 
 	pmap_invalidate_preipi(pmap);
-	smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap,
-	    pmap_invalidate_all_curcpu_cb);
+	smp_masked_invltlb(pmap, pmap_invalidate_all_curcpu_cb);
 }
 
 static void
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index b327d04c8261..318e70e7794c 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -532,10 +532,15 @@ vm_page_t pmap_page_alloc_below_4g(bool zeroed);
 void	pmap_san_enter(vm_offset_t);
 #endif
 
-static __inline cpuset_t
+/*
+ * Returns a pointer to a set of CPUs on which the pmap is currently active.
+ * Note that the set can be modified without any mutual exclusion, so a copy
+ * must be made if a stable value is required.
+ */
+static __inline volatile cpuset_t *
 pmap_invalidate_cpu_mask(pmap_t pmap)
 {
-	return (pmap->pm_active);
+	return (&pmap->pm_active);
 }
 
 #endif /* _KERNEL */
diff --git a/sys/x86/include/x86_smp.h b/sys/x86/include/x86_smp.h
index b9a1febb70f2..2cf0ff97eae0 100644
--- a/sys/x86/include/x86_smp.h
+++ b/sys/x86/include/x86_smp.h
@@ -107,14 +107,23 @@ void	ipi_swi_handler(struct trapframe frame);
 void	ipi_selected(cpuset_t cpus, u_int ipi);
 void	ipi_self_from_nmi(u_int vector);
 void	set_interrupt_apic_ids(void);
+void	mem_range_AP_init(void);
+void	topo_probe(void);
+
+/* functions in mp_machdep.c */
 void	smp_cache_flush(smp_invl_cb_t curcpu_cb);
+#ifdef __i386__
 void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap,
 	    smp_invl_cb_t curcpu_cb);
 void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
 	    vm_offset_t endva, struct pmap *pmap, smp_invl_cb_t curcpu_cb);
 void	smp_masked_invltlb(cpuset_t mask, struct pmap *pmap,
 	    smp_invl_cb_t curcpu_cb);
-void	mem_range_AP_init(void);
-void	topo_probe(void);
-
+#else
+void	smp_masked_invlpg(vm_offset_t addr, struct pmap *pmap,
+	    smp_invl_cb_t curcpu_cb);
+void	smp_masked_invlpg_range(vm_offset_t startva, vm_offset_t endva,
+	    struct pmap *pmap, smp_invl_cb_t curcpu_cb);
+void	smp_masked_invltlb(struct pmap *pmap, smp_invl_cb_t curcpu_cb);
+#endif
 #endif