svn commit: r353149 - head/sys/amd64/amd64
Cy Schubert
Cy.Schubert at cschubert.com
Mon Oct 7 04:06:31 UTC 2019
In message <201910062213.x96MDZv3085523 at repo.freebsd.org>, Mateusz Guzik
writes
:
> Author: mjg
> Date: Sun Oct 6 22:13:35 2019
> New Revision: 353149
> URL: https://svnweb.freebsd.org/changeset/base/353149
>
> Log:
> amd64 pmap: implement per-superpage locks
>
> The current 256-lock sized array is a problem in the following ways:
> - it's way too small
> - there are 2 locks per cacheline
> - it is not NUMA-aware
>
> Solve these issues by introducing per-superpage locks backed by pages
> allocated from respective domains.
>
> This significantly reduces contention e.g. during poudriere -j 104.
> See the review for results.
>
> Reviewed by: kib
> Discussed with: jeff
> Sponsored by: The FreeBSD Foundation
> Differential Revision: https://reviews.freebsd.org/D21833
>
> Modified:
> head/sys/amd64/amd64/pmap.c
>
> Modified: head/sys/amd64/amd64/pmap.c
> =============================================================================
> =
> --- head/sys/amd64/amd64/pmap.c Sun Oct 6 20:36:25 2019 (r35314
> 8)
> +++ head/sys/amd64/amd64/pmap.c Sun Oct 6 22:13:35 2019 (r35314
> 9)
> @@ -316,13 +316,25 @@ pmap_pku_mask_bit(pmap_t pmap)
> #define PV_STAT(x) do { } while (0)
> #endif
>
> -#define pa_index(pa) ((pa) >> PDRSHIFT)
> +#undef pa_index
> +#define pa_index(pa) ({ \
> + KASSERT((pa) <= vm_phys_segs[vm_phys_nsegs - 1].end, \
> + ("address %lx beyond the last segment", (pa))); \
> + (pa) >> PDRSHIFT; \
> +})
> +#if VM_NRESERVLEVEL > 0
> +#define pa_to_pmdp(pa) (&pv_table[pa_index(pa)])
> +#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page))
> +#define PHYS_TO_PV_LIST_LOCK(pa) \
> + (&(pa_to_pmdp(pa)->pv_lock))
> +#else
> #define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
>
> #define NPV_LIST_LOCKS MAXCPU
>
> #define PHYS_TO_PV_LIST_LOCK(pa) \
> (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
> +#endif
>
> #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
> struct rwlock **_lockp = (lockp); \
> @@ -400,14 +412,22 @@ static int pmap_initialized;
>
> /*
> * Data for the pv entry allocation mechanism.
> - * Updates to pv_invl_gen are protected by the pv_list_locks[]
> - * elements, but reads are not.
> + * Updates to pv_invl_gen are protected by the pv list lock but reads are no
> t.
> */
> static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunk
> s);
> static struct mtx __exclusive_cache_line pv_chunks_mutex;
> +#if VM_NRESERVLEVEL > 0
> +struct pmap_large_md_page {
> + struct rwlock pv_lock;
> + struct md_page pv_page;
> + u_long pv_invl_gen;
> +};
> +static struct pmap_large_md_page *pv_table;
> +#else
> static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS];
> static u_long pv_invl_gen[NPV_LIST_LOCKS];
> static struct md_page *pv_table;
> +#endif
> static struct md_page pv_dummy;
>
> /*
> @@ -918,12 +938,21 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFLA
> "Number of slow invalidation waits for lockless DI");
> #endif
>
> +#if VM_NRESERVLEVEL > 0
> static u_long *
> pmap_delayed_invl_genp(vm_page_t m)
> {
>
> + return (&pa_to_pmdp(VM_PAGE_TO_PHYS(m))->pv_invl_gen);
> +}
> +#else
> +static u_long *
> +pmap_delayed_invl_genp(vm_page_t m)
> +{
> +
> return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
> }
> +#endif
>
> static void
> pmap_delayed_invl_callout_func(void *arg __unused)
> @@ -1803,6 +1832,112 @@ pmap_page_init(vm_page_t m)
> m->md.pat_mode = PAT_WRITE_BACK;
> }
>
> +#if VM_NRESERVLEVEL > 0
> +static void
> +pmap_init_pv_table(void)
> +{
> + struct pmap_large_md_page *pvd;
> + vm_size_t s;
> + long start, end, highest, pv_npg;
> + int domain, i, j, pages;
> +
> + /*
> + * We strongly depend on the size being a power of two, so the assert
> + * is overzealous. However, should the struct be resized to a
> + * different power of two, the code below needs to be revisited.
> + */
> + CTASSERT((sizeof(*pvd) == 64));
> +
> + /*
> + * Calculate the size of the array.
> + */
> + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR);
> + s = (vm_size_t)pv_npg * sizeof(struct pmap_large_md_page);
> + s = round_page(s);
> + pv_table = (struct pmap_large_md_page *)kva_alloc(s);
> + if (pv_table == NULL)
> + panic("%s: kva_alloc failed\n", __func__);
> +
> + /*
> + * Iterate physical segments to allocate space for respective pages.
> + */
> + highest = -1;
> + s = 0;
> + for (i = 0; i < vm_phys_nsegs; i++) {
> + start = vm_phys_segs[i].start / NBPDR;
> + end = vm_phys_segs[i].end / NBPDR;
> + domain = vm_phys_segs[i].domain;
> +
> + if (highest >= end)
> + continue;
> +
> + if (start < highest) {
> + start = highest + 1;
> + pvd = &pv_table[start];
> + } else {
> + /*
> + * The lowest address may land somewhere in the middle
> + * of our page. Simplify the code by pretending it is
> + * at the beginning.
> + */
> + pvd = pa_to_pmdp(vm_phys_segs[i].start);
> + pvd = (struct pmap_large_md_page *)trunc_page(pvd);
> + start = pvd - pv_table;
> + }
> +
> + pages = end - start + 1;
> + s = round_page(pages * sizeof(*pvd));
> + highest = start + (s / sizeof(*pvd)) - 1;
> +
> + for (j = 0; j < s; j += PAGE_SIZE) {
> + vm_page_t m = vm_page_alloc_domain(NULL, 0,
> + domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
> + if (m == NULL)
> + panic("vm_page_alloc_domain failed for %lx\n",
> (vm_offset_t)pvd + j);
> + pmap_qenter((vm_offset_t)pvd + j, &m, 1);
> + }
> +
> + for (j = 0; j < s / sizeof(*pvd); j++) {
> + rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
> + TAILQ_INIT(&pvd->pv_page.pv_list);
> + pvd->pv_page.pv_gen = 0;
> + pvd->pv_page.pat_mode = 0;
> + pvd->pv_invl_gen = 0;
> + pvd++;
> + }
> + }
> + TAILQ_INIT(&pv_dummy.pv_list);
> +}
> +#else
> +static void
> +pmap_init_pv_table(void)
> +{
> + vm_size_t s;
> + long i, pv_npg;
> +
> + /*
> + * Initialize the pool of pv list locks.
> + */
> + for (i = 0; i < NPV_LIST_LOCKS; i++)
> + rw_init(&pv_list_locks[i], "pmap pv list");
> +
> + /*
> + * Calculate the size of the pv head table for superpages.
> + */
> + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR);
> +
> + /*
> + * Allocate memory for the pv head table for superpages.
> + */
> + s = (vm_size_t)pv_npg * sizeof(struct md_page);
> + s = round_page(s);
> + pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
> + for (i = 0; i < pv_npg; i++)
> + TAILQ_INIT(&pv_table[i].pv_list);
> + TAILQ_INIT(&pv_dummy.pv_list);
> +}
> +#endif
> +
> /*
> * Initialize the pmap module.
> * Called by vm_init, to initialize any structures that the pmap
> @@ -1813,8 +1948,7 @@ pmap_init(void)
> {
> struct pmap_preinit_mapping *ppim;
> vm_page_t m, mpte;
> - vm_size_t s;
> - int error, i, pv_npg, ret, skz63;
> + int error, i, ret, skz63;
>
> /* L1TF, reserve page @0 unconditionally */
> vm_page_blacklist_add(0, bootverbose);
> @@ -1902,26 +2036,7 @@ pmap_init(void)
> */
> mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
>
> - /*
> - * Initialize the pool of pv list locks.
> - */
> - for (i = 0; i < NPV_LIST_LOCKS; i++)
> - rw_init(&pv_list_locks[i], "pmap pv list");
> -
> - /*
> - * Calculate the size of the pv head table for superpages.
> - */
> - pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR);
> -
> - /*
> - * Allocate memory for the pv head table for superpages.
> - */
> - s = (vm_size_t)(pv_npg * sizeof(struct md_page));
> - s = round_page(s);
> - pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
> - for (i = 0; i < pv_npg; i++)
> - TAILQ_INIT(&pv_table[i].pv_list);
> - TAILQ_INIT(&pv_dummy.pv_list);
> + pmap_init_pv_table();
>
> pmap_initialized = 1;
> for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
>
This causes a page fault during X (xdm) startup, which loads
drm-current-kmod.
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame
0xfffffe0093e9c260
vpanic() at vpanic+0x19d/frame 0xfffffe0093e9c2b0
panic() at panic+0x43/frame 0xfffffe0093e9c310
vm_fault() at vm_fault+0x2126/frame 0xfffffe0093e9c460
vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c4b0
trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c510
trap() at trap+0x2a1/frame 0xfffffe0093e9c620
calltrap() at calltrap+0x8/frame 0xfffffe0093e9c620
--- trap 0xc, rip = 0xffffffff80a054b1, rsp = 0xfffffe0093e9c6f0, rbp =
0xfffffe0093e9c7a0 ---
pmap_enter() at pmap_enter+0x861/frame 0xfffffe0093e9c7a0
vm_fault() at vm_fault+0x1c69/frame 0xfffffe0093e9c8f0
vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c940
trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c9a0
trap() at trap+0x438/frame 0xfffffe0093e9cab0
calltrap() at calltrap+0x8/frame 0xfffffe0093e9cab0
--- trap 0xc, rip = 0x30e2a9c3, rsp = 0x7fffffffea50, rbp = 0x7fffffffeaa0
---
Uptime: 3m33s
Dumping 945 out of 7974 MB:..2%..11%..21%..31%..41%..51%..61%..72%..82%..92%
(kgdb) bt
#0 doadump (textdump=1) at pcpu_aux.h:55
#1 0xffffffff8068c5ed in kern_reboot (howto=260)
at /opt/src/svn-current/sys/kern/kern_shutdown.c:479
#2 0xffffffff8068caa9 in vpanic (fmt=<value optimized out>,
ap=<value optimized out>)
at /opt/src/svn-current/sys/kern/kern_shutdown.c:908
#3 0xffffffff8068c8a3 in panic (fmt=<value optimized out>)
at /opt/src/svn-current/sys/kern/kern_shutdown.c:835
#4 0xffffffff8098c966 in vm_fault (map=<value optimized out>,
vaddr=<value optimized out>, fault_type=<value optimized out>,
fault_flags=<value optimized out>, m_hold=<value optimized out>)
at /opt/src/svn-current/sys/vm/vm_fault.c:672
#5 0xffffffff8098a723 in vm_fault_trap (map=0xfffff80002001000,
vaddr=<value optimized out>, fault_type=2 '\002',
fault_flags=<value optimized out>, signo=0x0, ucode=0x0)
at /opt/src/svn-current/sys/vm/vm_fault.c:568
#6 0xffffffff80a18326 in trap_pfault (frame=0xfffffe0093e9c630,
signo=<value optimized out>, ucode=<value optimized out>)
at /opt/src/svn-current/sys/amd64/amd64/trap.c:828
#7 0xffffffff80a177f1 in trap (frame=0xfffffe0093e9c630)
at /opt/src/svn-current/sys/amd64/amd64/trap.c:407
#8 0xffffffff809f1aac in calltrap ()
at /opt/src/svn-current/sys/amd64/amd64/exception.S:289
---Type <return> to continue, or q <return> to quit---
#9 0xffffffff80a054b1 in pmap_enter (pmap=<value optimized out>,
va=851443712, m=0xfffffe0005b25ce8, prot=<value optimized out>,
flags=2677542912, psind=<value optimized out>) at atomic.h:221
#10 0xffffffff8098c4a9 in vm_fault (map=<value optimized out>,
vaddr=<value optimized out>, fault_type=232 '\ufffd',
fault_flags=<value optimized out>, m_hold=0x0)
at /opt/src/svn-current/sys/vm/vm_fault.c:489
#11 0xffffffff8098a723 in vm_fault_trap (map=0xfffff80173eb5000,
vaddr=<value optimized out>, fault_type=2 '\002',
fault_flags=<value optimized out>, signo=0xfffffe0093e9ca84,
ucode=0xfffffe0093e9ca80) at /opt/src/svn-current/sys/vm/vm_fault.c:568
#12 0xffffffff80a18326 in trap_pfault (frame=0xfffffe0093e9cac0,
signo=<value optimized out>, ucode=<value optimized out>)
at /opt/src/svn-current/sys/amd64/amd64/trap.c:828
#13 0xffffffff80a17988 in trap (frame=0xfffffe0093e9cac0)
at /opt/src/svn-current/sys/amd64/amd64/trap.c:347
#14 0xffffffff809f1aac in calltrap ()
at /opt/src/svn-current/sys/amd64/amd64/exception.S:289
#15 0x0000000030e2a9c3 in ?? ()
Previous frame inner to this frame (corrupt stack?)
Current language: auto; currently minimal
(kgdb) frame 9
#9 0xffffffff80a054b1 in pmap_enter (pmap=<value optimized out>,
va=851443712, m=0xfffffe0005b25ce8, prot=<value optimized out>,
flags=2677542912, psind=<value optimized out>) at atomic.h:221
221 ATOMIC_CMPSET(long);
(kgdb) l
216 }
217
218 ATOMIC_CMPSET(char);
219 ATOMIC_CMPSET(short);
220 ATOMIC_CMPSET(int);
221 ATOMIC_CMPSET(long);
222
223 /*
224 * Atomically add the value of v to the integer pointed to by p and
return
225 * the previous value of *p.
(kgdb)
--
Cheers,
Cy Schubert <Cy.Schubert at cschubert.com>
FreeBSD UNIX: <cy at FreeBSD.org> Web: http://www.FreeBSD.org
The need of the many outweighs the greed of the few.
More information about the svn-src-all
mailing list