git: 574ef6506950 - main - subr_pctrie: use ilog2(x) instead of fls(x)-1
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 03 Jun 2024 17:49:45 UTC
The branch main has been updated by dougm: URL: https://cgit.FreeBSD.org/src/commit/?id=574ef650695088d56ea12df7da76155370286f9f commit 574ef650695088d56ea12df7da76155370286f9f Author: Doug Moore <dougm@FreeBSD.org> AuthorDate: 2024-06-03 17:45:45 +0000 Commit: Doug Moore <dougm@FreeBSD.org> CommitDate: 2024-06-03 17:45:45 +0000 subr_pctrie: use ilog2(x) instead of fls(x)-1 In three instances where fls(x)-1 is used, the compiler does not know that x is nonzero and so adds needless zero checks. Using ilog(x) instead saves, in each instance, about 4 instructions, including a conditional, and 16 or so bytes, on an amd64 build. Reviewed by: alc Differential Revision: https://reviews.freebsd.org/D45330 --- sys/kern/subr_pctrie.c | 6 +-- sys/vm/_vm_phys.h | 4 +- sys/vm/vm_page.c | 5 +- sys/vm/vm_phys.c | 123 ++++++++++++++++++++++++++++++------------------- sys/vm/vm_phys.h | 2 +- sys/vm/vm_reserv.c | 33 +++++++------ 6 files changed, 104 insertions(+), 69 deletions(-) diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c index 85df0a9bf9e4..76f4ee17a8ca 100644 --- a/sys/kern/subr_pctrie.c +++ b/sys/kern/subr_pctrie.c @@ -350,7 +350,7 @@ pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val) "uint64 too wide"); _Static_assert(sizeof(uint64_t) * NBBY <= (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow"); - parent->pn_clev = rounddown(flsll(index ^ newind) - 1, PCTRIE_WIDTH); + parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH); parent->pn_owner = PCTRIE_COUNT; parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev); @@ -546,14 +546,14 @@ pctrie_lookup_le(struct pctrie *ptree, uint64_t index) KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0, ("%s: no popmap siblings before slot %d in node %p", __func__, slot, pred)); - slot = fls(pred->pn_popmap & ((1 << slot) - 1)) - 1; + slot = ilog2(pred->pn_popmap & ((1 << slot) - 1)); pred = pctrie_node_load(&pred->pn_child[slot], NULL, PCTRIE_LOCKED); } while (!pctrie_isleaf(pred)) { KASSERT(pred->pn_popmap != 0, ("%s: no popmap children in node %p", __func__, pred)); - slot = fls(pred->pn_popmap) - 1; + slot = ilog2(pred->pn_popmap); pred = pctrie_node_load(&pred->pn_child[slot], NULL, PCTRIE_LOCKED); } diff --git a/sys/vm/_vm_phys.h b/sys/vm/_vm_phys.h index 36e1074568e9..e3cf09081537 100644 --- a/sys/vm/_vm_phys.h +++ b/sys/vm/_vm_phys.h @@ -51,6 +51,8 @@ struct vm_freelist { int lcnt; }; +typedef struct vm_freelist vm_freelist_tbl[VM_NFREEPOOL][VM_NFREEORDER_MAX]; + struct vm_phys_seg { vm_paddr_t start; vm_paddr_t end; @@ -62,7 +64,7 @@ struct vm_phys_seg { void *md_first; #endif int domain; - struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX]; + vm_freelist_tbl *free_queues; }; extern struct vm_phys_seg vm_phys_segs[]; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 9ba31cb9e1b3..7c2bed8339ed 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -509,7 +509,7 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind) m->psind = 0; m->segind = segind; m->order = VM_NFREEORDER; - m->pool = VM_FREEPOOL_DEFAULT; + m->pool = VM_NFREEPOOL; m->valid = m->dirty = 0; pmap_page_init(m); } @@ -785,7 +785,8 @@ vm_page_startup(vm_offset_t vaddr) m = seg->first_page + atop(startp - seg->start); vmd = VM_DOMAIN(seg->domain); vm_domain_free_lock(vmd); - vm_phys_enqueue_contig(m, pagecount); + vm_phys_enqueue_contig(m, VM_FREEPOOL_DEFAULT, + pagecount); vm_domain_free_unlock(vmd); vm_domain_freecnt_inc(vmd, pagecount); vm_cnt.v_page_count += (u_int)pagecount; diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 66ad9c13dceb..945e018710a1 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -669,6 +669,7 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, int tail) { vm_page_t m_buddy; + int pool = m->pool; while (oind > order) { oind--; @@ -676,6 +677,10 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, KASSERT(m_buddy->order == VM_NFREEORDER, ("vm_phys_split_pages: page %p has unexpected order %d", m_buddy, m_buddy->order)); + KASSERT(m_buddy->pool == VM_NFREEPOOL, + ("vm_phys_split_pages: page %p has unexpected pool %d", + m_buddy, m_buddy->pool)); + m_buddy->pool = pool; vm_freelist_add(fl, m_buddy, oind, tail); } } @@ -693,7 +698,8 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, * The physical page m's buddy must not be free. */ static void -vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) +vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool, + int tail) { int order; @@ -709,6 +715,7 @@ vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) order = fls(npages) - 1; KASSERT(order < VM_NFREEORDER, ("%s: order %d is out of range", __func__, order)); + m->pool = pool; vm_freelist_add(fl, m, order, tail); m += 1 << order; npages -= 1 << order; @@ -729,7 +736,8 @@ vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) * parameter m. Otherwise, the physical page m's buddy must not be free. */ static vm_page_t -vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) +vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool, + int tail) { int order; @@ -745,6 +753,7 @@ vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) order = ffs(npages) - 1; KASSERT(order < VM_NFREEORDER, ("vm_phys_enq_range: order %d is out of range", order)); + m->pool = pool; vm_freelist_add(fl, m, order, tail); m += 1 << order; npages -= 1 << order; @@ -752,18 +761,6 @@ vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) return (m); } -/* - * Set the pool for a contiguous, power of two-sized set of physical pages. - */ -static void -vm_phys_set_pool(int pool, vm_page_t m, int order) -{ - vm_page_t m_tmp; - - for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) - m_tmp->pool = pool; -} - /* * Tries to allocate the specified number of pages from the specified pool * within the specified domain. Returns the actual number of allocated pages @@ -772,7 +769,8 @@ vm_phys_set_pool(int pool, vm_page_t m, int order) * The returned pages may not be physically contiguous. However, in contrast * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), * calling this function once to allocate the desired number of pages will - * avoid wasted time in vm_phys_split_pages(). + * avoid wasted time in vm_phys_split_pages(). Sets the pool field for + * every allocated page. * * The free page queues for the specified domain must be locked. */ @@ -801,14 +799,18 @@ vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) vm_freelist_rem(fl, m, oind); avail = i + (1 << oind); end = imin(npages, avail); - while (i < end) + ma[i++] = m++; + while (i < end) { + m->pool = pool; ma[i++] = m++; + } if (i == npages) { /* * Return excess pages to fl. Its order * [0, oind) queues are empty. */ - vm_phys_enq_range(m, avail - i, fl, 1); + vm_phys_enq_range(m, avail - i, fl, + pool, 1); return (npages); } } @@ -819,11 +821,12 @@ vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) while ((m = TAILQ_FIRST(&alt[oind].pl)) != NULL) { vm_freelist_rem(alt, m, oind); - vm_phys_set_pool(pool, m, oind); avail = i + (1 << oind); end = imin(npages, avail); - while (i < end) + do { + m->pool = pool; ma[i++] = m++; + } while (i < end); if (i == npages) { /* * Return excess pages to fl. @@ -831,7 +834,7 @@ vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) * are empty. */ vm_phys_enq_range(m, avail - i, - fl, 1); + fl, pool, 1); return (npages); } } @@ -843,7 +846,7 @@ vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) /* * Allocate a contiguous, power of two-sized set of physical pages - * from the free lists. + * from the free lists. Sets the pool field in the first page only. * * The free page queues must be locked. */ @@ -864,7 +867,8 @@ vm_phys_alloc_pages(int domain, int pool, int order) /* * Allocate a contiguous, power of two-sized set of physical pages from the * specified free list. The free list must be specified using one of the - * manifest constants VM_FREELIST_*. + * manifest constants VM_FREELIST_*. Sets the pool field in the first page + * only. * * The free page queues must be locked. */ @@ -915,7 +919,7 @@ vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) m = TAILQ_FIRST(&alt[oind].pl); if (m != NULL) { vm_freelist_rem(alt, m, oind); - vm_phys_set_pool(pool, m, oind); + m->pool = pool; /* The order [order, oind) queues are empty. */ vm_phys_split_pages(m, oind, fl, order, 1); return (m); @@ -1122,7 +1126,8 @@ vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) } /* - * Free a contiguous, power of two-sized set of physical pages. + * Free a contiguous, power of two-sized set of physical pages. Assumes that + * only the first page has a valid pool field. * * The free page queues must be locked. */ @@ -1133,18 +1138,19 @@ vm_phys_free_pages(vm_page_t m, int order) struct vm_phys_seg *seg; vm_paddr_t pa; vm_page_t m_buddy; + int pool = m->pool; KASSERT(m->order == VM_NFREEORDER, ("vm_phys_free_pages: page %p has unexpected order %d", m, m->order)); - KASSERT(m->pool < VM_NFREEPOOL, - ("vm_phys_free_pages: page %p has unexpected pool %d", - m, m->pool)); + KASSERT(pool < VM_NFREEPOOL, + ("vm_phys_free_pages: page %p has unexpected pool %d", m, pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_free_pages: order %d is out of range", order)); seg = &vm_phys_segs[m->segind]; vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); if (order < VM_NFREEORDER - 1) { + vm_page_t m_start = m; pa = VM_PAGE_TO_PHYS(m); do { pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); @@ -1155,25 +1161,28 @@ vm_phys_free_pages(vm_page_t m, int order) break; fl = (*seg->free_queues)[m_buddy->pool]; vm_freelist_rem(fl, m_buddy, order); - if (m_buddy->pool != m->pool) - vm_phys_set_pool(m->pool, m_buddy, order); + m_buddy->pool = VM_NFREEPOOL; order++; pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); m = &seg->first_page[atop(pa - seg->start)]; } while (order < VM_NFREEORDER - 1); + if (m != m_start) { + m_start->pool = VM_NFREEPOOL; + m->pool = pool; + } } - fl = (*seg->free_queues)[m->pool]; + fl = (*seg->free_queues)[pool]; vm_freelist_add(fl, m, order, 1); } /* - * Free a contiguous, arbitrarily sized set of physical pages, without - * merging across set boundaries. + * Free a contiguous, arbitrarily sized set of physical pages, without merging + * across set boundaries. Assumes no pages have a valid pool field. * * The free page queues must be locked. */ void -vm_phys_enqueue_contig(vm_page_t m, u_long npages) +vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages) { struct vm_freelist *fl; struct vm_phys_seg *seg; @@ -1187,14 +1196,15 @@ vm_phys_enqueue_contig(vm_page_t m, u_long npages) */ vm_domain_free_assert_locked(vm_pagequeue_domain(m)); seg = &vm_phys_segs[m->segind]; - fl = (*seg->free_queues)[m->pool]; + fl = (*seg->free_queues)[pool]; m_end = m + npages; /* Free blocks of increasing size. */ lo = atop(VM_PAGE_TO_PHYS(m)); if (m < m_end && (diff = lo ^ (lo + npages - 1)) != 0) { order = min(flsll(diff) - 1, VM_NFREEORDER - 1); - m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl, 1); + m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl, + pool, 1); } /* Free blocks of maximum size. */ @@ -1203,15 +1213,17 @@ vm_phys_enqueue_contig(vm_page_t m, u_long npages) KASSERT(seg == &vm_phys_segs[m->segind], ("%s: page range [%p,%p) spans multiple segments", __func__, m_end - npages, m)); + m->pool = pool; vm_freelist_add(fl, m, order, 1); m += 1 << order; } /* Free blocks of diminishing size. */ - vm_phys_enq_beg(m, m_end - m, fl, 1); + vm_phys_enq_beg(m, m_end - m, fl, pool, 1); } /* * Free a contiguous, arbitrarily sized set of physical pages. + * Assumes that every page has the same, valid, pool field value. * * The free page queues must be locked. */ @@ -1221,17 +1233,22 @@ vm_phys_free_contig(vm_page_t m, u_long npages) vm_paddr_t lo; vm_page_t m_start, m_end; unsigned max_order, order_start, order_end; + int pool = m->pool; + + KASSERT(pool < VM_NFREEPOOL, + ("%s: pool %d is out of range", __func__, pool)); vm_domain_free_assert_locked(vm_pagequeue_domain(m)); lo = atop(VM_PAGE_TO_PHYS(m)); max_order = min(flsll(lo ^ (lo + npages)) - 1, VM_NFREEORDER - 1); - - m_start = m; + m_end = m + npages; + for (m_start = m; m < m_end; m++) + m->pool = VM_NFREEPOOL; + m = m_start; order_start = ffsll(lo) - 1; if (order_start < max_order) m_start += 1 << order_start; - m_end = m + npages; order_end = ffsll(lo + npages) - 1; if (order_end < max_order) m_end -= 1 << order_end; @@ -1240,11 +1257,15 @@ vm_phys_free_contig(vm_page_t m, u_long npages) * end of the range last. */ if (m_start < m_end) - vm_phys_enqueue_contig(m_start, m_end - m_start); - if (order_start < max_order) + vm_phys_enqueue_contig(m_start, pool, m_end - m_start); + if (order_start < max_order) { + m->pool = pool; vm_phys_free_pages(m, order_start); - if (order_end < max_order) + } + if (order_end < max_order) { + m_end->pool = pool; vm_phys_free_pages(m_end, order_end); + } } /* @@ -1292,7 +1313,7 @@ vm_phys_unfree_page(vm_page_t m) struct vm_phys_seg *seg; vm_paddr_t pa, pa_half; vm_page_t m_set, m_tmp; - int order; + int order, pool; /* * First, find the contiguous, power of two-sized set of free @@ -1324,7 +1345,8 @@ vm_phys_unfree_page(vm_page_t m) * is larger than a page, shrink "m_set" by returning the half * of "m_set" that does not contain "m" to the free lists. */ - fl = (*seg->free_queues)[m_set->pool]; + pool = m_set->pool; + fl = (*seg->free_queues)[pool]; order = m_set->order; vm_freelist_rem(fl, m_set, order); while (order > 0) { @@ -1336,8 +1358,10 @@ vm_phys_unfree_page(vm_page_t m) m_tmp = m_set; m_set = &seg->first_page[atop(pa_half - seg->start)]; } + m_tmp->pool = pool; vm_freelist_add(fl, m_tmp, order, 0); } + m_set->pool = pool; KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); return (true); } @@ -1477,7 +1501,8 @@ vm_phys_find_queues_contig( * alignment of the first physical page in the set. If the given value * "boundary" is non-zero, then the set of physical pages cannot cross * any physical address boundary that is a multiple of that value. Both - * "alignment" and "boundary" must be a power of two. + * "alignment" and "boundary" must be a power of two. Sets the pool + * field in every allocated page. */ vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, @@ -1536,14 +1561,16 @@ vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, fl = (*queues)[m->pool]; oind = m->order; vm_freelist_rem(fl, m, oind); - if (m->pool != VM_FREEPOOL_DEFAULT) - vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); } /* Return excess pages to the free lists. */ fl = (*queues)[VM_FREEPOOL_DEFAULT]; - vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl, 0); + vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl, + VM_FREEPOOL_DEFAULT, 0); /* Return page verified to satisfy conditions of request. */ + for (m = m_run; m < &m_run[npages]; m++) + m->pool = VM_FREEPOOL_DEFAULT; + pa_start = VM_PAGE_TO_PHYS(m_run); KASSERT(low <= pa_start, ("memory allocated below minimum requested range")); diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index 1f502ad745c4..8b2d2f7e2251 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -66,7 +66,7 @@ vm_page_t vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]); vm_page_t vm_phys_alloc_pages(int domain, int pool, int order); int vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high); -void vm_phys_enqueue_contig(vm_page_t m, u_long npages); +void vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages); int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, vm_memattr_t memattr); void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end); diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c index a4bbccdfb428..80f62981dee0 100644 --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -889,30 +889,35 @@ out: static void vm_reserv_break(vm_reserv_t rv) { - int hi, lo, pos; + int pool, pos, pos0, pos1; vm_reserv_assert_locked(rv); CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d", __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq); vm_reserv_remove(rv); rv->pages->psind = 0; - hi = lo = -1; - pos = 0; - for (;;) { - bit_ff_at(rv->popmap, pos, VM_LEVEL_0_NPAGES, lo != hi, &pos); - if (lo == hi) { - if (pos == -1) - break; - lo = pos; - continue; - } + pool = rv->pages->pool; + rv->pages->pool = VM_NFREEPOOL; + pos0 = bit_test(rv->popmap, 0) ? -1 : 0; + pos1 = -1 - pos0; + for (pos = 0; pos < VM_LEVEL_0_NPAGES; ) { + /* Find the first different bit after pos. */ + bit_ff_at(rv->popmap, pos + 1, VM_LEVEL_0_NPAGES, + pos1 < pos0, &pos); if (pos == -1) pos = VM_LEVEL_0_NPAGES; - hi = pos; + if (pos0 <= pos1) { + /* Set pool for pages from pos1 to pos. */ + pos0 = pos1; + while (pos0 < pos) + rv->pages[pos0++].pool = pool; + continue; + } + /* Free unused pages from pos0 to pos. */ + pos1 = pos; vm_domain_free_lock(VM_DOMAIN(rv->domain)); - vm_phys_enqueue_contig(&rv->pages[lo], hi - lo); + vm_phys_enqueue_contig(&rv->pages[pos0], pool, pos1 - pos0); vm_domain_free_unlock(VM_DOMAIN(rv->domain)); - lo = hi; } bit_nclear(rv->popmap, 0, VM_LEVEL_0_NPAGES - 1); rv->popcnt = 0;