git: 6fb848f2ff91 - main - amd64 pmap: Use INVPCID_CTXGLOB on Ryzen processors
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 02 Aug 2025 17:15:01 UTC
The branch main has been updated by alc:
URL: https://cgit.FreeBSD.org/src/commit/?id=6fb848f2ff91337dbb26024ab41103ee2b036021
commit 6fb848f2ff91337dbb26024ab41103ee2b036021
Author: Alan Cox <alc@FreeBSD.org>
AuthorDate: 2025-07-07 02:45:11 +0000
Commit: Alan Cox <alc@FreeBSD.org>
CommitDate: 2025-08-02 17:14:39 +0000
amd64 pmap: Use INVPCID_CTXGLOB on Ryzen processors
Recent AMD Ryzen processors support a limited form of the invpcid
instruction, even when they do not support PCID functionality. In
particular, they support the type 2 form of the instruction, what we
call INVPCID_CTXGLOB. This is faster than toggling PGE in cr4.
Reviewed by: kib
MFC after: 1 week
Differential Revision: https://reviews.freebsd.org/D51565
---
sys/amd64/amd64/machdep.c | 7 ++---
sys/amd64/amd64/pmap.c | 76 +++++++++++++++++++++++++++--------------------
2 files changed, 45 insertions(+), 38 deletions(-)
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 37c7056f649c..9ff60439d1ec 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1336,12 +1336,9 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
pti = pti_get_default();
TUNABLE_INT_FETCH("vm.pmap.pti", &pti);
TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
- if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
- invpcid_works = (cpu_stdext_feature &
- CPUID_STDEXT_INVPCID) != 0;
- } else {
+ if ((cpu_feature2 & CPUID2_PCID) == 0)
pmap_pcid_enabled = 0;
- }
+ invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID) != 0;
/*
* Now we can do small core initialization, after the PCID
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 6369498210d9..243a6625bece 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -2904,6 +2904,9 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
/*
* Promotion: flush every 4KB page mapping from the TLB,
* including any global (PG_G) mappings.
+ *
+ * This function is only used on older processors that
+ * do not support the invpcid instruction.
*/
invltlb_glob();
}
@@ -3050,13 +3053,13 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
* local user page: INVLPG
* local kernel page: INVLPG
* local user total: reload %cr3
- * local kernel total: invltlb_glob()
+ * local kernel total: INVPCID(CTXGLOB) or invltlb_glob()
* remote user page, inactive pmap: -
* remote user page, active pmap: IPI:INVLPG
* remote kernel page: IPI:INVLPG
* remote user total, inactive pmap: -
* remote user total, active pmap: IPI:(reload %cr3)
- * remote kernel total: IPI:invltlb_glob()
+ * remote kernel total: IPI:INVPCID(CTXGLOB) or invltlb_glob()
* Since on return to user mode, the reload of %cr3 with ucr3 causes
* TLB invalidation, no specific action is required for user page table.
*
@@ -3356,7 +3359,8 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
}
static inline void
-pmap_invalidate_all_pcid_cb(pmap_t pmap, bool invpcid_works1)
+pmap_invalidate_all_cb_template(pmap_t pmap, bool pmap_pcid_enabled1,
+ bool invpcid_works1)
{
struct invpcid_descr d;
uint64_t kcr3;
@@ -3370,57 +3374,63 @@ pmap_invalidate_all_pcid_cb(pmap_t pmap, bool invpcid_works1)
invltlb_glob();
}
} else if (pmap == PCPU_GET(curpmap)) {
- CRITICAL_ASSERT(curthread);
-
- pcid = pmap_get_pcid(pmap);
- if (invpcid_works1) {
- d.pcid = pcid;
- d.pad = 0;
- d.addr = 0;
- invpcid(&d, INVPCID_CTX);
+ if (pmap_pcid_enabled1) {
+ CRITICAL_ASSERT(curthread);
+
+ pcid = pmap_get_pcid(pmap);
+ if (invpcid_works1) {
+ d.pcid = pcid;
+ d.pad = 0;
+ d.addr = 0;
+ invpcid(&d, INVPCID_CTX);
+ } else {
+ kcr3 = pmap->pm_cr3 | pcid;
+ load_cr3(kcr3);
+ }
+ if (pmap->pm_ucr3 != PMAP_NO_CR3)
+ PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
} else {
- kcr3 = pmap->pm_cr3 | pcid;
- load_cr3(kcr3);
+ invltlb();
}
- if (pmap->pm_ucr3 != PMAP_NO_CR3)
- PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE);
}
}
static void
-pmap_invalidate_all_pcid_invpcid_cb(pmap_t pmap)
+pmap_invalidate_all_pcid_invpcid_cb(pmap_t pmap, vm_offset_t addr1 __unused,
+ vm_offset_t addr2 __unused)
{
- pmap_invalidate_all_pcid_cb(pmap, true);
+ pmap_invalidate_all_cb_template(pmap, true, true);
}
static void
-pmap_invalidate_all_pcid_noinvpcid_cb(pmap_t pmap)
+pmap_invalidate_all_pcid_noinvpcid_cb(pmap_t pmap, vm_offset_t addr1 __unused,
+ vm_offset_t addr2 __unused)
{
- pmap_invalidate_all_pcid_cb(pmap, false);
+ pmap_invalidate_all_cb_template(pmap, true, false);
}
static void
-pmap_invalidate_all_nopcid_cb(pmap_t pmap)
+pmap_invalidate_all_nopcid_invpcid_cb(pmap_t pmap, vm_offset_t addr1 __unused,
+ vm_offset_t addr2 __unused)
{
- if (pmap == kernel_pmap)
- invltlb_glob();
- else if (pmap == PCPU_GET(curpmap))
- invltlb();
+ pmap_invalidate_all_cb_template(pmap, false, true);
}
-DEFINE_IFUNC(static, void, pmap_invalidate_all_cb, (pmap_t))
+static void
+pmap_invalidate_all_nopcid_noinvpcid_cb(pmap_t pmap, vm_offset_t addr1 __unused,
+ vm_offset_t addr2 __unused)
{
- if (pmap_pcid_enabled)
- return (invpcid_works ? pmap_invalidate_all_pcid_invpcid_cb :
- pmap_invalidate_all_pcid_noinvpcid_cb);
- return (pmap_invalidate_all_nopcid_cb);
+ pmap_invalidate_all_cb_template(pmap, false, false);
}
-static void
-pmap_invalidate_all_curcpu_cb(pmap_t pmap, vm_offset_t addr1 __unused,
- vm_offset_t addr2 __unused)
+DEFINE_IFUNC(static, void, pmap_invalidate_all_curcpu_cb, (pmap_t, vm_offset_t,
+ vm_offset_t))
{
- pmap_invalidate_all_cb(pmap);
+ if (pmap_pcid_enabled)
+ return (invpcid_works ? pmap_invalidate_all_pcid_invpcid_cb :
+ pmap_invalidate_all_pcid_noinvpcid_cb);
+ return (invpcid_works ? pmap_invalidate_all_nopcid_invpcid_cb :
+ pmap_invalidate_all_nopcid_noinvpcid_cb);
}
void