nginx and FreeBSD11
Konstantin Belousov
kostikbel at gmail.com
Thu Sep 22 07:59:39 UTC 2016
On Wed, Sep 21, 2016 at 12:15:17AM +0300, Konstantin Belousov wrote:
> > > diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
> > > index a23468e..f754652 100644
> > > --- a/sys/vm/vm_map.c
> > > +++ b/sys/vm/vm_map.c
> > > @@ -481,6 +481,7 @@ vmspace_switch_aio(struct vmspace *newvm)
> > > if (oldvm == newvm)
> > > return;
> > >
> > > + spinlock_enter();
> > > /*
> > > * Point to the new address space and refer to it.
> > > */
> > > @@ -489,6 +490,7 @@ vmspace_switch_aio(struct vmspace *newvm)
> > >
> > > /* Activate the new mapping. */
> > > pmap_activate(curthread);
> > > + spinlock_exit();
> > >
> > > /* Remove the daemon's reference to the old address space. */
> > > KASSERT(oldvm->vm_refcnt > 1,
Did you tested the patch ?
Below is, I believe, the committable fix, of course supposing that
the patch above worked. If you want to retest it on stable/11, ignore
efirt.c chunks.
diff --git a/sys/amd64/amd64/efirt.c b/sys/amd64/amd64/efirt.c
index f1d67f7..c883af8 100644
--- a/sys/amd64/amd64/efirt.c
+++ b/sys/amd64/amd64/efirt.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
@@ -301,6 +302,17 @@ efi_enter(void)
PMAP_UNLOCK(curpmap);
return (error);
}
+
+ /*
+ * IPI TLB shootdown handler invltlb_pcid_handler() reloads
+ * %cr3 from the curpmap->pm_cr3, which would disable runtime
+ * segments mappings. Block the handler's action by setting
+ * curpmap to impossible value. See also comment in
+ * pmap.c:pmap_activate_sw().
+ */
+ if (pmap_pcid_enabled && !invpcid_works)
+ PCPU_SET(curpmap, NULL);
+
load_cr3(VM_PAGE_TO_PHYS(efi_pml4_page) | (pmap_pcid_enabled ?
curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0));
/*
@@ -317,7 +329,9 @@ efi_leave(void)
{
pmap_t curpmap;
- curpmap = PCPU_GET(curpmap);
+ curpmap = &curproc->p_vmspace->vm_pmap;
+ if (pmap_pcid_enabled && !invpcid_works)
+ PCPU_SET(curpmap, curpmap);
load_cr3(curpmap->pm_cr3 | (pmap_pcid_enabled ?
curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0));
if (!pmap_pcid_enabled)
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 63042e4..59e1b67 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6842,6 +6842,7 @@ pmap_activate_sw(struct thread *td)
{
pmap_t oldpmap, pmap;
uint64_t cached, cr3;
+ register_t rflags;
u_int cpuid;
oldpmap = PCPU_GET(curpmap);
@@ -6865,16 +6866,43 @@ pmap_activate_sw(struct thread *td)
pmap == kernel_pmap,
("non-kernel pmap thread %p pmap %p cpu %d pcid %#x",
td, pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid));
+
+ /*
+ * If the INVPCID instruction is not available,
+ * invltlb_pcid_handler() is used for handle
+ * invalidate_all IPI, which checks for curpmap ==
+ * smp_tlb_pmap. Below operations sequence has a
+ * window where %CR3 is loaded with the new pmap's
+ * PML4 address, but curpmap value is not yet updated.
+ * This causes invltlb IPI handler, called between the
+ * updates, to execute as NOP, which leaves stale TLB
+ * entries.
+ *
+ * Note that the most typical use of
+ * pmap_activate_sw(), from the context switch, is
+ * immune to this race, because interrupts are
+ * disabled (while the thread lock is owned), and IPI
+ * happends after curpmap is updated. Protect other
+ * callers in a similar way, by disabling interrupts
+ * around the %cr3 register reload and curpmap
+ * assignment.
+ */
+ if (!invpcid_works)
+ rflags = intr_disable();
+
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) {
load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
cached);
if (cached)
PCPU_INC(pm_save_cnt);
}
+ PCPU_SET(curpmap, pmap);
+ if (!invpcid_works)
+ intr_restore(rflags);
} else if (cr3 != pmap->pm_cr3) {
load_cr3(pmap->pm_cr3);
+ PCPU_SET(curpmap, pmap);
}
- PCPU_SET(curpmap, pmap);
#ifdef SMP
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
#else
More information about the freebsd-stable
mailing list