svn commit: r249834 - in user/attilio/jeff-numa/sys: sys vm
Attilio Rao
attilio at FreeBSD.org
Wed Apr 24 08:51:16 UTC 2013
Author: attilio
Date: Wed Apr 24 08:51:15 2013
New Revision: 249834
URL: http://svnweb.freebsd.org/changeset/base/249834
Log:
o Add accessor functions to add and remove pages from a specific
freelist.
o Split the pool of free_queues really by domain and not relying on
VM_RAW_NFREELIST definition.
o For MAXDOMAIN > 1, wrap the RR allocation logic into a specific function
that is called when calculating the allocation domain.
The RR counter is kept per-thread.
In the future it is expected that such function is going to handle
different types of policies based on specific informations retrieved by
curthread and backing vm_objects.
o Add the concept of "probed domains" under the form of vm_ndomains
(it is similar in concepts to mp_ncpus but it does refer to mem domains).
Right now there are no probed domains for any architecture.
It is responsibility of architectures maintainers to add the proper bits
to do domains probing.
Please note that vm_ndomains and td_dom_rr_idx are both int because
segs store domains as int. u_int would have made much more sense.
Probabilly we should clean them up altogether in the future.
o Apply RR domain selection also to vm_phys_zero_pages_idle().
Sponsored by: EMC / Isilon storage division
Partly obtained from: jeff
Modified:
user/attilio/jeff-numa/sys/sys/proc.h
user/attilio/jeff-numa/sys/vm/vm_phys.c
user/attilio/jeff-numa/sys/vm/vm_phys.h
Modified: user/attilio/jeff-numa/sys/sys/proc.h
==============================================================================
--- user/attilio/jeff-numa/sys/sys/proc.h Wed Apr 24 06:40:48 2013 (r249833)
+++ user/attilio/jeff-numa/sys/sys/proc.h Wed Apr 24 08:51:15 2013 (r249834)
@@ -274,6 +274,7 @@ struct thread {
pid_t td_dbg_forked; /* (c) Child pid for debugger. */
u_int td_vp_reserv; /* (k) Count of reserved vnodes. */
int td_no_sleeping; /* (k) Sleeping disabled count. */
+ int td_dom_rr_idx; /* (k) RR Numa domain selection. */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */
Modified: user/attilio/jeff-numa/sys/vm/vm_phys.c
==============================================================================
--- user/attilio/jeff-numa/sys/vm/vm_phys.c Wed Apr 24 06:40:48 2013 (r249833)
+++ user/attilio/jeff-numa/sys/vm/vm_phys.c Wed Apr 24 08:51:15 2013 (r249834)
@@ -48,6 +48,9 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#if MAXDOMAIN > 1
+#include <sys/proc.h>
+#endif
#include <sys/queue.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
@@ -62,13 +65,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
-/*
- * VM_FREELIST_DEFAULT is split into MAXDOMAIN lists, one for each
- * domain. These extra lists are stored at the end of the regular
- * free lists starting with VM_NFREELIST.
- */
-#define VM_RAW_NFREELIST (VM_NFREELIST + MAXDOMAIN - 1)
-
struct vm_freelist {
struct pglist pl;
int lcnt;
@@ -84,6 +80,8 @@ struct vm_phys_seg {
struct mem_affinity *mem_affinity;
+int vm_ndomains = 1;
+
static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
static int vm_phys_nsegs;
@@ -98,9 +96,7 @@ static struct mtx vm_phys_fictitious_reg
MALLOC_DEFINE(M_FICT_PAGES, "", "");
static struct vm_freelist
- vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
-static struct vm_freelist
-(*vm_phys_lookup_lists[MAXDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
+ vm_phys_free_queues[MAXDOMAIN][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
@@ -116,11 +112,8 @@ static int sysctl_vm_phys_segs(SYSCTL_HA
SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
-#if MAXDOMAIN > 1
-static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
-SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
- NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
-#endif
+SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
+ &vm_ndomains, 0, "Number of physical memory domains available.");
static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
int domain);
@@ -129,6 +122,22 @@ static int vm_phys_paddr_to_segind(vm_pa
static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
int order);
+static __inline int
+vm_rr_selectdomain(void)
+{
+#if MAXDOMAIN > 1
+ struct thread *td;
+
+ td = curthread;
+
+ td->td_dom_rr_idx++;
+ td->td_dom_rr_idx %= vm_ndomains;
+ return (td->td_dom_rr_idx);
+#else
+ return (0);
+#endif
+}
+
/*
* Outputs the state of the physical memory allocator, specifically,
* the amount of physical memory in each free list.
@@ -137,8 +146,7 @@ static int
sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
{
struct sbuf sbuf;
- struct vm_freelist *fl;
- int error, flind, oind, pind;
+ int dom, error, flind, lcnt, oind, pind;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
@@ -158,8 +166,10 @@ sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
sbuf_printf(&sbuf, " %2d (%6dK)", oind,
1 << (PAGE_SHIFT - 10 + oind));
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
- fl = vm_phys_free_queues[flind][pind];
- sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt);
+ lcnt = 0;
+ for (dom = 0; dom < vm_ndomains; dom++)
+ lcnt += vm_phys_free_queues[dom][flind][pind][oind].lcnt;
+ sbuf_printf(&sbuf, " | %6d", lcnt);
}
sbuf_printf(&sbuf, "\n");
}
@@ -198,33 +208,27 @@ sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
return (error);
}
-#if MAXDOMAIN > 1
-/*
- * Outputs the set of free list lookup lists.
- */
-static int
-sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
+static void
+vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
{
- struct sbuf sbuf;
- int domain, error, flind, ndomains;
- error = sysctl_wire_old_buffer(req, 0);
- if (error != 0)
- return (error);
- sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
- ndomains = vm_nfreelists - VM_NFREELIST + 1;
- for (domain = 0; domain < ndomains; domain++) {
- sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
- for (flind = 0; flind < vm_nfreelists; flind++)
- sbuf_printf(&sbuf, " [%d]:\t%p\n", flind,
- vm_phys_lookup_lists[domain][flind]);
- }
- error = sbuf_finish(&sbuf);
- sbuf_delete(&sbuf);
- return (error);
+ m->order = order;
+ if (tail)
+ TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
+ else
+ TAILQ_INSERT_HEAD(&fl[order].pl, m, pageq);
+ fl[order].lcnt++;
}
-#endif
-
+
+static void
+vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
+{
+
+ TAILQ_REMOVE(&fl[order].pl, m, pageq);
+ fl[order].lcnt--;
+ m->order = VM_NFREEORDER;
+}
+
/*
* Create a physical memory segment.
*/
@@ -253,14 +257,7 @@ _vm_phys_create_seg(vm_paddr_t start, vm
#else
seg->first_page = PHYS_TO_VM_PAGE(start);
#endif
-#if MAXDOMAIN > 1
- if (flind == VM_FREELIST_DEFAULT && domain != 0) {
- flind = VM_NFREELIST + (domain - 1);
- if (flind >= vm_nfreelists)
- vm_nfreelists = flind + 1;
- }
-#endif
- seg->free_queues = &vm_phys_free_queues[flind];
+ seg->free_queues = &vm_phys_free_queues[domain][flind];
}
static void
@@ -299,10 +296,7 @@ void
vm_phys_init(void)
{
struct vm_freelist *fl;
- int flind, i, oind, pind;
-#if MAXDOMAIN > 1
- int ndomains, j;
-#endif
+ int dom, flind, i, oind, pind;
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
#ifdef VM_FREELIST_ISADMA
@@ -338,45 +332,15 @@ vm_phys_init(void)
vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
VM_FREELIST_DEFAULT);
}
- for (flind = 0; flind < vm_nfreelists; flind++) {
- for (pind = 0; pind < VM_NFREEPOOL; pind++) {
- fl = vm_phys_free_queues[flind][pind];
- for (oind = 0; oind < VM_NFREEORDER; oind++)
- TAILQ_INIT(&fl[oind].pl);
+ for (dom = 0; dom < vm_ndomains; dom++) {
+ for (flind = 0; flind < vm_nfreelists; flind++) {
+ for (pind = 0; pind < VM_NFREEPOOL; pind++) {
+ fl = vm_phys_free_queues[dom][flind][pind];
+ for (oind = 0; oind < VM_NFREEORDER; oind++)
+ TAILQ_INIT(&fl[oind].pl);
+ }
}
}
-#if MAXDOMAIN > 1
- /*
- * Build a free list lookup list for each domain. All of the
- * memory domain lists are inserted at the VM_FREELIST_DEFAULT
- * index in a round-robin order starting with the current
- * domain.
- */
- ndomains = vm_nfreelists - VM_NFREELIST + 1;
- for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
- for (i = 0; i < ndomains; i++)
- vm_phys_lookup_lists[i][flind] =
- &vm_phys_free_queues[flind];
- for (i = 0; i < ndomains; i++)
- for (j = 0; j < ndomains; j++) {
- flind = (i + j) % ndomains;
- if (flind == 0)
- flind = VM_FREELIST_DEFAULT;
- else
- flind += VM_NFREELIST - 1;
- vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
- &vm_phys_free_queues[flind];
- }
- for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
- flind++)
- for (i = 0; i < ndomains; i++)
- vm_phys_lookup_lists[i][flind + ndomains - 1] =
- &vm_phys_free_queues[flind];
-#else
- for (flind = 0; flind < vm_nfreelists; flind++)
- vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
-#endif
-
mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
}
@@ -394,9 +358,7 @@ vm_phys_split_pages(vm_page_t m, int oin
KASSERT(m_buddy->order == VM_NFREEORDER,
("vm_phys_split_pages: page %p has unexpected order %d",
m_buddy, m_buddy->order));
- m_buddy->order = oind;
- TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
- fl[oind].lcnt++;
+ vm_freelist_add(fl, m_buddy, oind, 0);
}
}
@@ -426,35 +388,15 @@ vm_phys_add_page(vm_paddr_t pa)
}
/*
- * Allocate a contiguous, power of two-sized set of physical pages
- * from the free lists.
- *
- * The free page queues must be locked.
- */
-vm_page_t
-vm_phys_alloc_pages(int pool, int order)
-{
- vm_page_t m;
- int flind;
-
- for (flind = 0; flind < vm_nfreelists; flind++) {
- m = vm_phys_alloc_freelist_pages(flind, pool, order);
- if (m != NULL)
- return (m);
- }
- return (NULL);
-}
-
-/*
* Find and dequeue a free page on the given free list, with the
* specified pool and order
*/
-vm_page_t
-vm_phys_alloc_freelist_pages(int flind, int pool, int order)
+static vm_page_t
+vm_phys_alloc_freelist_pages_domain(int domain, int flind, int pool, int order)
{
struct vm_freelist *fl;
struct vm_freelist *alt;
- int domain, oind, pind;
+ int oind, pind;
vm_page_t m;
KASSERT(flind < VM_NFREELIST,
@@ -464,19 +406,12 @@ vm_phys_alloc_freelist_pages(int flind,
KASSERT(order < VM_NFREEORDER,
("vm_phys_alloc_freelist_pages: order %d is out of range", order));
-#if MAXDOMAIN > 1
- domain = PCPU_GET(domain);
-#else
- domain = 0;
-#endif
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- fl = (*vm_phys_lookup_lists[domain][flind])[pool];
+ fl = &vm_phys_free_queues[domain][flind][pool][0];
for (oind = order; oind < VM_NFREEORDER; oind++) {
m = TAILQ_FIRST(&fl[oind].pl);
if (m != NULL) {
- TAILQ_REMOVE(&fl[oind].pl, m, pageq);
- fl[oind].lcnt--;
- m->order = VM_NFREEORDER;
+ vm_freelist_rem(fl, m, oind);
vm_phys_split_pages(m, oind, fl, order);
return (m);
}
@@ -490,12 +425,10 @@ vm_phys_alloc_freelist_pages(int flind,
*/
for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
- alt = (*vm_phys_lookup_lists[domain][flind])[pind];
+ alt = &vm_phys_free_queues[domain][flind][pind][0];
m = TAILQ_FIRST(&alt[oind].pl);
if (m != NULL) {
- TAILQ_REMOVE(&alt[oind].pl, m, pageq);
- alt[oind].lcnt--;
- m->order = VM_NFREEORDER;
+ vm_freelist_rem(alt, m, oind);
vm_phys_set_pool(pool, m, oind);
vm_phys_split_pages(m, oind, fl, order);
return (m);
@@ -506,6 +439,40 @@ vm_phys_alloc_freelist_pages(int flind,
}
/*
+ * See the comments for vm_phys_alloc_freelist_pages_domain().
+ * When MAXDOMAIN is bumped picks up a domain in round-robin fashion.
+ */
+vm_page_t
+vm_phys_alloc_freelist_pages(int flind, int pool, int order)
+{
+
+ return (vm_phys_alloc_freelist_pages_domain(vm_rr_selectdomain(),
+ flind, pool, order));
+}
+
+/*
+ * Allocate a contiguous, power of two-sized set of physical pages
+ * from the free lists.
+ *
+ * The free page queues must be locked.
+ */
+vm_page_t
+vm_phys_alloc_pages(int pool, int order)
+{
+ vm_page_t m;
+ int domain, flind;
+
+ domain = vm_rr_selectdomain();
+ for (flind = 0; flind < vm_nfreelists; flind++) {
+ m = vm_phys_alloc_freelist_pages_domain(domain, flind, pool,
+ order);
+ if (m != NULL)
+ return (m);
+ }
+ return (NULL);
+}
+
+/*
* Find the vm_page corresponding to the given physical address.
*/
vm_page_t
@@ -679,9 +646,7 @@ vm_phys_free_pages(vm_page_t m, int orde
if (m_buddy->order != order)
break;
fl = (*seg->free_queues)[m_buddy->pool];
- TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq);
- fl[order].lcnt--;
- m_buddy->order = VM_NFREEORDER;
+ vm_freelist_rem(fl, m_buddy, order);
if (m_buddy->pool != m->pool)
vm_phys_set_pool(m->pool, m_buddy, order);
order++;
@@ -689,10 +654,8 @@ vm_phys_free_pages(vm_page_t m, int orde
m = &seg->first_page[atop(pa - seg->start)];
} while (order < VM_NFREEORDER - 1);
}
- m->order = order;
fl = (*seg->free_queues)[m->pool];
- TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
- fl[order].lcnt++;
+ vm_freelist_add(fl, m, order, 1);
}
/*
@@ -797,9 +760,7 @@ vm_phys_unfree_page(vm_page_t m)
*/
fl = (*seg->free_queues)[m_set->pool];
order = m_set->order;
- TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
- fl[order].lcnt--;
- m_set->order = VM_NFREEORDER;
+ vm_freelist_rem(fl, m_set, order);
while (order > 0) {
order--;
pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
@@ -809,9 +770,7 @@ vm_phys_unfree_page(vm_page_t m)
m_tmp = m_set;
m_set = &seg->first_page[atop(pa_half - seg->start)];
}
- m_tmp->order = order;
- TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
- fl[order].lcnt++;
+ vm_freelist_add(fl, m_tmp, order, 0);
}
KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
return (TRUE);
@@ -823,10 +782,13 @@ vm_phys_unfree_page(vm_page_t m)
boolean_t
vm_phys_zero_pages_idle(void)
{
- static struct vm_freelist *fl = vm_phys_free_queues[0][0];
+ static struct vm_freelist *fl;
static int flind, oind, pind;
vm_page_t m, m_tmp;
+ int domain;
+ domain = vm_rr_selectdomain();
+ fl = vm_phys_free_queues[domain][0][0];
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
for (;;) {
TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
@@ -856,7 +818,7 @@ vm_phys_zero_pages_idle(void)
if (flind == vm_nfreelists)
flind = 0;
}
- fl = vm_phys_free_queues[flind][pind];
+ fl = vm_phys_free_queues[domain][flind][pind];
}
}
}
@@ -883,12 +845,8 @@ vm_phys_alloc_contig(u_long npages, vm_p
int domain, flind, oind, order, pind;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-#if MAXDOMAIN > 1
- domain = PCPU_GET(domain);
-#else
- domain = 0;
-#endif
size = npages << PAGE_SHIFT;
+ domain = vm_rr_selectdomain();
KASSERT(size != 0,
("vm_phys_alloc_contig: size must not be 0"));
KASSERT((alignment & (alignment - 1)) == 0,
@@ -900,8 +858,7 @@ vm_phys_alloc_contig(u_long npages, vm_p
for (flind = 0; flind < vm_nfreelists; flind++) {
for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
- fl = (*vm_phys_lookup_lists[domain][flind])
- [pind];
+ fl = &vm_phys_free_queues[domain][flind][pind][0];
TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
/*
* A free list may contain physical pages
@@ -959,9 +916,7 @@ vm_phys_alloc_contig(u_long npages, vm_p
done:
for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
fl = (*seg->free_queues)[m->pool];
- TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
- fl[m->order].lcnt--;
- m->order = VM_NFREEORDER;
+ vm_freelist_rem(fl, m, m->order);
}
if (m_ret->pool != VM_FREEPOOL_DEFAULT)
vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
@@ -981,28 +936,30 @@ done:
DB_SHOW_COMMAND(freepages, db_show_freepages)
{
struct vm_freelist *fl;
- int flind, oind, pind;
+ int flind, oind, pind, dom;
- for (flind = 0; flind < vm_nfreelists; flind++) {
- db_printf("FREE LIST %d:\n"
- "\n ORDER (SIZE) | NUMBER"
- "\n ", flind);
- for (pind = 0; pind < VM_NFREEPOOL; pind++)
- db_printf(" | POOL %d", pind);
- db_printf("\n-- ");
- for (pind = 0; pind < VM_NFREEPOOL; pind++)
- db_printf("-- -- ");
- db_printf("--\n");
- for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
- db_printf(" %2.2d (%6.6dK)", oind,
- 1 << (PAGE_SHIFT - 10 + oind));
- for (pind = 0; pind < VM_NFREEPOOL; pind++) {
- fl = vm_phys_free_queues[flind][pind];
- db_printf(" | %6.6d", fl[oind].lcnt);
+ for (dom = 0; dom < vm_ndomains; dom++) {
+ for (flind = 0; flind < vm_nfreelists; flind++) {
+ db_printf("DOMAIN %d FREE LIST %d:\n"
+ "\n ORDER (SIZE) | NUMBER"
+ "\n ", dom, flind);
+ for (pind = 0; pind < VM_NFREEPOOL; pind++)
+ db_printf(" | POOL %d", pind);
+ db_printf("\n-- ");
+ for (pind = 0; pind < VM_NFREEPOOL; pind++)
+ db_printf("-- -- ");
+ db_printf("--\n");
+ for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
+ db_printf(" %2.2d (%6.6dK)", oind,
+ 1 << (PAGE_SHIFT - 10 + oind));
+ for (pind = 0; pind < VM_NFREEPOOL; pind++) {
+ fl = vm_phys_free_queues[dom][flind][pind];
+ db_printf(" | %6.6d", fl[oind].lcnt);
+ }
+ db_printf("\n");
}
db_printf("\n");
}
- db_printf("\n");
}
}
#endif
Modified: user/attilio/jeff-numa/sys/vm/vm_phys.h
==============================================================================
--- user/attilio/jeff-numa/sys/vm/vm_phys.h Wed Apr 24 06:40:48 2013 (r249833)
+++ user/attilio/jeff-numa/sys/vm/vm_phys.h Wed Apr 24 08:51:15 2013 (r249834)
@@ -48,6 +48,7 @@ struct mem_affinity {
};
extern struct mem_affinity *mem_affinity;
+extern int vm_ndomains;
/*
* The following functions are only to be used by the virtual memory system.
More information about the svn-src-user
mailing list