svn commit: r326889 - in user/jeff/numa: lib/libc/sys sys/compat/freebsd32 sys/conf sys/kern sys/sys sys/vm usr.bin/cpuset
Jeff Roberson
jeff at FreeBSD.org
Fri Dec 15 23:35:21 UTC 2017
Author: jeff
Date: Fri Dec 15 23:35:20 2017
New Revision: 326889
URL: https://svnweb.freebsd.org/changeset/base/326889
Log:
First cut of NUMA domain integration into cpuset.
Added:
user/jeff/numa/sys/sys/_domainset.h (contents, props changed)
user/jeff/numa/sys/sys/domainset.h (contents, props changed)
user/jeff/numa/sys/vm/vm_domainset.c (contents, props changed)
user/jeff/numa/sys/vm/vm_domainset.h (contents, props changed)
Modified:
user/jeff/numa/lib/libc/sys/Symbol.map
user/jeff/numa/sys/compat/freebsd32/syscalls.master
user/jeff/numa/sys/conf/files
user/jeff/numa/sys/kern/init_main.c
user/jeff/numa/sys/kern/init_sysent.c
user/jeff/numa/sys/kern/kern_cpuset.c
user/jeff/numa/sys/kern/kern_exit.c
user/jeff/numa/sys/kern/kern_fork.c
user/jeff/numa/sys/kern/kern_numa.c
user/jeff/numa/sys/kern/kern_thr.c
user/jeff/numa/sys/kern/kern_thread.c
user/jeff/numa/sys/kern/makesyscalls.sh
user/jeff/numa/sys/kern/sched_4bsd.c
user/jeff/numa/sys/kern/sched_ule.c
user/jeff/numa/sys/kern/syscalls.c
user/jeff/numa/sys/kern/syscalls.master
user/jeff/numa/sys/kern/systrace_args.c
user/jeff/numa/sys/sys/cpuset.h
user/jeff/numa/sys/sys/param.h
user/jeff/numa/sys/sys/proc.h
user/jeff/numa/sys/sys/syscall.h
user/jeff/numa/sys/sys/syscall.mk
user/jeff/numa/sys/sys/syscallsubr.h
user/jeff/numa/sys/sys/sysproto.h
user/jeff/numa/sys/vm/uma_core.c
user/jeff/numa/sys/vm/vm_fault.c
user/jeff/numa/sys/vm/vm_kern.c
user/jeff/numa/sys/vm/vm_object.c
user/jeff/numa/sys/vm/vm_object.h
user/jeff/numa/sys/vm/vm_page.c
user/jeff/numa/sys/vm/vm_phys.c
user/jeff/numa/usr.bin/cpuset/cpuset.c
Modified: user/jeff/numa/lib/libc/sys/Symbol.map
==============================================================================
--- user/jeff/numa/lib/libc/sys/Symbol.map Fri Dec 15 23:19:49 2017 (r326888)
+++ user/jeff/numa/lib/libc/sys/Symbol.map Fri Dec 15 23:35:20 2017 (r326889)
@@ -398,6 +398,8 @@ FBSD_1.5 {
mknodat;
stat;
statfs;
+ cpuset_getdomain;
+ cpuset_setdomain;
};
FBSDprivate_1.0 {
@@ -1022,4 +1024,8 @@ FBSDprivate_1.0 {
gssd_syscall;
__libc_interposing_slot;
__libc_sigwait;
+ _cpuset_getdomain;
+ __sys_cpuset_getdomain;
+ _cpuset_setdomain;
+ __sys_cpuset_setdomain;
};
Modified: user/jeff/numa/sys/compat/freebsd32/syscalls.master
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/syscalls.master Fri Dec 15 23:19:49 2017 (r326888)
+++ user/jeff/numa/sys/compat/freebsd32/syscalls.master Fri Dec 15 23:35:20 2017 (r326889)
@@ -1119,4 +1119,13 @@
struct kevent32 *eventlist, \
int nevents, \
const struct timespec32 *timeout); }
+561 AUE_NULL STD { int cpuset_getdomain(cpulevel_t level, \
+ cpuwhich_t which, id_t id, \
+ size_t domainsetsize, domainset_t *mask, \
+ int *policy); }
+562 AUE_NULL STD { int cpuset_setdomain(cpulevel_t level, \
+ cpuwhich_t which, id_t id, \
+ size_t domainsetsize, domainset_t *mask, \
+ int policy); }
+
; vim: syntax=off
Modified: user/jeff/numa/sys/conf/files
==============================================================================
--- user/jeff/numa/sys/conf/files Fri Dec 15 23:19:49 2017 (r326888)
+++ user/jeff/numa/sys/conf/files Fri Dec 15 23:35:20 2017 (r326889)
@@ -4816,7 +4816,7 @@ vm/swap_pager.c standard
vm/uma_core.c standard
vm/uma_dbg.c standard
vm/memguard.c optional DEBUG_MEMGUARD
-vm/vm_domain.c standard
+vm/vm_domainset.c standard
vm/vm_fault.c standard
vm/vm_glue.c standard
vm/vm_init.c standard
Modified: user/jeff/numa/sys/kern/init_main.c
==============================================================================
--- user/jeff/numa/sys/kern/init_main.c Fri Dec 15 23:19:49 2017 (r326888)
+++ user/jeff/numa/sys/kern/init_main.c Fri Dec 15 23:35:20 2017 (r326889)
@@ -493,10 +493,7 @@ proc0_init(void *dummy __unused)
td->td_flags = TDF_INMEM;
td->td_pflags = TDP_KTHREAD;
td->td_cpuset = cpuset_thread0();
- vm_domain_policy_init(&td->td_vm_dom_policy);
- vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1);
- vm_domain_policy_init(&p->p_vm_dom_policy);
- vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1);
+ td->td_domain.dr_policy = td->td_cpuset->cs_domain;
prison0_init();
p->p_peers = 0;
p->p_leader = p;
Modified: user/jeff/numa/sys/kern/init_sysent.c
==============================================================================
--- user/jeff/numa/sys/kern/init_sysent.c Fri Dec 15 23:19:49 2017 (r326888)
+++ user/jeff/numa/sys/kern/init_sysent.c Fri Dec 15 23:35:20 2017 (r326889)
@@ -612,4 +612,6 @@ struct sysent sysent[] = {
{ AS(fhstatfs_args), (sy_call_t *)sys_fhstatfs, AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 558 = fhstatfs */
{ AS(mknodat_args), (sy_call_t *)sys_mknodat, AUE_MKNODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 559 = mknodat */
{ AS(kevent_args), (sy_call_t *)sys_kevent, AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 560 = kevent */
+ { AS(cpuset_getdomain_args), (sy_call_t *)sys_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = cpuset_getdomain */
+ { AS(cpuset_setdomain_args), (sy_call_t *)sys_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = cpuset_setdomain */
};
Modified: user/jeff/numa/sys/kern/kern_cpuset.c
==============================================================================
--- user/jeff/numa/sys/kern/kern_cpuset.c Fri Dec 15 23:19:49 2017 (r326888)
+++ user/jeff/numa/sys/kern/kern_cpuset.c Fri Dec 15 23:35:20 2017 (r326889)
@@ -51,17 +51,21 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/capsicum.h>
#include <sys/cpuset.h>
+#include <sys/domainset.h>
#include <sys/sx.h>
#include <sys/queue.h>
#include <sys/libkern.h>
#include <sys/limits.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <sys/vmmeter.h>
#include <vm/uma.h>
#include <vm/vm.h>
+#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_param.h>
+#include <vm/vm_phys.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -109,8 +113,10 @@ __FBSDID("$FreeBSD$");
* getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...).
*/
static uma_zone_t cpuset_zone;
+static uma_zone_t domainset_zone;
static struct mtx cpuset_lock;
static struct setlist cpuset_ids;
+static struct domainlist cpuset_domains;
static struct unrhdr *cpuset_unr;
static struct cpuset *cpuset_zero, *cpuset_default;
@@ -122,6 +128,30 @@ cpuset_t *cpuset_root;
cpuset_t cpuset_domain[MAXMEMDOM];
/*
+ * Find the first non-anonymous set starting from 'set'.
+ */
+static struct cpuset *
+cpuset_getbase(struct cpuset *set)
+{
+
+ if (set->cs_id == CPUSET_INVALID)
+ set = set->cs_parent;
+ return (set);
+}
+
+/*
+ * Walks up the tree from 'set' to find the root.
+ */
+static struct cpuset *
+cpuset_getroot(struct cpuset *set)
+{
+
+ while ((set->cs_flags & CPU_SET_ROOT) == 0 && set->cs_parent != NULL)
+ set = set->cs_parent;
+ return (set);
+}
+
+/*
* Acquire a reference to a cpuset, all pointers must be tracked with refs.
*/
struct cpuset *
@@ -140,12 +170,7 @@ static struct cpuset *
cpuset_refroot(struct cpuset *set)
{
- for (; set->cs_parent != NULL; set = set->cs_parent)
- if (set->cs_flags & CPU_SET_ROOT)
- break;
- cpuset_ref(set);
-
- return (set);
+ return cpuset_ref(cpuset_getroot(set));
}
/*
@@ -157,11 +182,7 @@ static struct cpuset *
cpuset_refbase(struct cpuset *set)
{
- if (set->cs_id == CPUSET_INVALID)
- set = set->cs_parent;
- cpuset_ref(set);
-
- return (set);
+ return cpuset_ref(cpuset_getbase(set));
}
/*
@@ -257,17 +278,25 @@ cpuset_lookup(cpusetid_t setid, struct thread *td)
* will have no valid cpu based on restrictions from the parent.
*/
static int
-_cpuset_create(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask,
- cpusetid_t id)
+_cpuset_create(struct cpuset *set, struct cpuset *parent,
+ const cpuset_t *mask, struct domainset *domain, cpusetid_t id)
{
+ if (domain == NULL)
+ domain = parent->cs_domain;
+ if (mask == NULL)
+ mask = &parent->cs_mask;
if (!CPU_OVERLAP(&parent->cs_mask, mask))
return (EDEADLK);
+ /* The domain must be prepared ahead of time. */
+ if (!DOMAINSET_SUBSET(&parent->cs_domain->ds_mask, &domain->ds_mask))
+ return (EDEADLK);
CPU_COPY(mask, &set->cs_mask);
LIST_INIT(&set->cs_children);
refcount_init(&set->cs_ref, 1);
set->cs_flags = 0;
mtx_lock_spin(&cpuset_lock);
+ set->cs_domain = domain;
CPU_AND(&set->cs_mask, &parent->cs_mask);
set->cs_id = id;
set->cs_parent = cpuset_ref(parent);
@@ -294,8 +323,8 @@ cpuset_create(struct cpuset **setp, struct cpuset *par
id = alloc_unr(cpuset_unr);
if (id == -1)
return (ENFILE);
- *setp = set = uma_zalloc(cpuset_zone, M_WAITOK);
- error = _cpuset_create(set, parent, mask, id);
+ *setp = set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
+ error = _cpuset_create(set, parent, mask, NULL, id);
if (error == 0)
return (0);
free_unr(cpuset_unr, id);
@@ -304,7 +333,187 @@ cpuset_create(struct cpuset **setp, struct cpuset *par
return (error);
}
+static void
+cpuset_freelist_add(struct setlist *list, int count)
+{
+ struct cpuset *set;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ set = uma_zalloc(cpuset_zone, M_ZERO | M_WAITOK);
+ LIST_INSERT_HEAD(list, set, cs_link);
+ }
+}
+
+static void
+cpuset_freelist_init(struct setlist *list, int count)
+{
+
+ LIST_INIT(list);
+ cpuset_freelist_add(list, count);
+}
+
+static void
+cpuset_freelist_free(struct setlist *list)
+{
+ struct cpuset *set;
+
+ while ((set = LIST_FIRST(list)) != NULL) {
+ LIST_REMOVE(set, cs_link);
+ uma_zfree(cpuset_zone, set);
+ }
+}
+
+static void
+domainset_freelist_add(struct domainlist *list, int count)
+{
+ struct domainset *set;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ set = uma_zalloc(domainset_zone, M_ZERO | M_WAITOK);
+ LIST_INSERT_HEAD(list, set, ds_link);
+ }
+}
+
+static void
+domainset_freelist_init(struct domainlist *list, int count)
+{
+
+ LIST_INIT(list);
+ domainset_freelist_add(list, count);
+}
+
+static void
+domainset_freelist_free(struct domainlist *list)
+{
+ struct domainset *set;
+
+ while ((set = LIST_FIRST(list)) != NULL) {
+ LIST_REMOVE(set, ds_link);
+ uma_zfree(domainset_zone, set);
+ }
+}
+
+/* Copy a domainset preserving mask and policy. */
+static void
+domainset_copy(const struct domainset *from, struct domainset *to)
+{
+
+ DOMAINSET_COPY(&from->ds_mask, &to->ds_mask);
+ to->ds_policy = from->ds_policy;
+}
+
+/* Return 1 if mask and policy are equal, otherwise 0. */
+static int
+domainset_equal(const struct domainset *one, const struct domainset *two)
+{
+
+ return (DOMAINSET_CMP(&one->ds_mask, &two->ds_mask) == 0 &&
+ one->ds_policy == two->ds_policy);
+}
+
/*
+ * Lookup or create a domainset. The key is provided in ds_mask and
+ * ds_policy. If the domainset does not yet exist the storage in
+ * 'domain' is used to insert. Otherwise this storage is freed to the
+ * domainset_zone and the existing domainset is returned.
+ */
+static struct domainset *
+_domainset_create(struct domainset *domain, struct domainlist *freelist)
+{
+ struct domainset *ndomain;
+
+ mtx_lock_spin(&cpuset_lock);
+ LIST_FOREACH(ndomain, &cpuset_domains, ds_link)
+ if (domainset_equal(ndomain, domain))
+ break;
+ /*
+ * If the domain does not yet exist we insert it and initialize
+ * various iteration helpers which are not part of the key.
+ */
+ if (ndomain == NULL) {
+ LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link);
+ domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask);
+ domain->ds_max = DOMAINSET_FLS(&domain->ds_mask) + 1;
+ }
+ mtx_unlock_spin(&cpuset_lock);
+ if (ndomain == NULL)
+ return (domain);
+ if (freelist != NULL)
+ LIST_INSERT_HEAD(freelist, domain, ds_link);
+ else
+ uma_zfree(domainset_zone, domain);
+ return (ndomain);
+
+}
+
+/*
+ * Create or lookup a domainset based on the key held in 'domain'.
+ */
+static struct domainset *
+domainset_create(const struct domainset *domain)
+{
+ struct domainset *ndomain;
+
+ ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO);
+ domainset_copy(domain, ndomain);
+ return _domainset_create(ndomain, NULL);
+}
+
+/*
+ * Update thread domainset pointers.
+ */
+static void
+domainset_notify(void)
+{
+ struct thread *td;
+ struct proc *p;
+
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NEW) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ FOREACH_THREAD_IN_PROC(p, td) {
+ thread_lock(td);
+ td->td_domain.dr_policy = td->td_cpuset->cs_domain;
+ thread_unlock(td);
+ }
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ kernel_object->domain.dr_policy = cpuset_default->cs_domain;
+}
+
+/*
+ * Create a new set that is a subset of a parent.
+ */
+static struct domainset *
+domainset_shadow(const struct domainset *pdomain,
+ const struct domainset *domain, struct domainlist *freelist)
+{
+ struct domainset *ndomain;
+
+ ndomain = LIST_FIRST(freelist);
+ LIST_REMOVE(ndomain, ds_link);
+
+ /*
+ * Initialize the key from the request.
+ */
+ domainset_copy(domain, ndomain);
+
+ /*
+ * Restrict the key by the parent.
+ */
+ DOMAINSET_AND(&ndomain->ds_mask, &pdomain->ds_mask);
+
+ return _domainset_create(ndomain, freelist);
+}
+
+/*
* Recursively check for errors that would occur from applying mask to
* the tree of sets starting at 'set'. Checks for sets that would become
* empty as well as RDONLY flags.
@@ -376,10 +585,12 @@ cpuset_modify(struct cpuset *set, cpuset_t *mask)
* Verify that we have access to this set of
* cpus.
*/
- root = set->cs_parent;
- if (root && !CPU_SUBSET(&root->cs_mask, mask))
- return (EINVAL);
+ root = cpuset_getroot(set);
mtx_lock_spin(&cpuset_lock);
+ if (root && !CPU_SUBSET(&root->cs_mask, mask)) {
+ error = EINVAL;
+ goto out;
+ }
error = cpuset_testupdate(set, mask, 0);
if (error)
goto out;
@@ -392,6 +603,136 @@ out:
}
/*
+ * Recursively check for errors that would occur from applying mask to
+ * the tree of sets starting at 'set'. Checks for sets that would become
+ * empty as well as RDONLY flags.
+ */
+static int
+cpuset_testupdate_domain(struct cpuset *set, struct domainset *dset,
+ struct domainset *orig, int *count, int check_mask)
+{
+ struct cpuset *nset;
+ struct domainset *domain;
+ struct domainset newset;
+ int error;
+
+ mtx_assert(&cpuset_lock, MA_OWNED);
+ if (set->cs_flags & CPU_SET_RDONLY)
+ return (EPERM);
+ domain = set->cs_domain;
+ domainset_copy(domain, &newset);
+ if (!domainset_equal(domain, orig)) {
+ if (!DOMAINSET_OVERLAP(&domain->ds_mask, &dset->ds_mask))
+ return (EDEADLK);
+ DOMAINSET_AND(&newset.ds_mask, &dset->ds_mask);
+ /* Count the number of domains that are changing. */
+ (*count)++;
+ }
+ error = 0;
+ LIST_FOREACH(nset, &set->cs_children, cs_siblings)
+ if ((error = cpuset_testupdate_domain(nset, &newset, domain,
+ count, 1)) != 0)
+ break;
+ return (error);
+}
+
+/*
+ * Applies the mask 'mask' without checking for empty sets or permissions.
+ */
+static void
+cpuset_update_domain(struct cpuset *set, struct domainset *domain,
+ struct domainset *orig, struct domainlist *domains)
+{
+ struct cpuset *nset;
+
+ mtx_assert(&cpuset_lock, MA_OWNED);
+ /*
+ * If this domainset has changed from the parent we must calculate
+ * a new set. Otherwise it simply inherits from the parent. When
+ * we inherit from the parent we get a new mask and policy. If the
+ * set is modified from the parent we keep the policy and only
+ * update the mask.
+ */
+ if (set->cs_domain != orig) {
+ orig = set->cs_domain;
+ set->cs_domain = domainset_shadow(domain, orig, domains);
+ } else
+ set->cs_domain = domain;
+ LIST_FOREACH(nset, &set->cs_children, cs_siblings)
+ cpuset_update_domain(nset, set->cs_domain, orig, domains);
+
+ return;
+}
+
+/*
+ * Modify the set 'set' to use a copy the domainset provided. Apply this new
+ * mask to restrict all children in the tree. Checks for validity before
+ * applying the changes.
+ */
+static int
+cpuset_modify_domain(struct cpuset *set, struct domainset *domain)
+{
+ struct domainlist domains;
+ struct domainset temp;
+ struct domainset *dset;
+ struct cpuset *root;
+ int ndomains, needed;
+ int error;
+
+ error = priv_check(curthread, PRIV_SCHED_CPUSET);
+ if (error)
+ return (error);
+ /*
+ * In case we are called from within the jail
+ * we do not allow modifying the dedicated root
+ * cpuset of the jail but may still allow to
+ * change child sets.
+ */
+ if (jailed(curthread->td_ucred) &&
+ set->cs_flags & CPU_SET_ROOT)
+ return (EPERM);
+ domainset_freelist_init(&domains, 0);
+ domain = domainset_create(domain);
+ ndomains = needed = 0;
+ do {
+ if (ndomains < needed) {
+ domainset_freelist_add(&domains, needed - ndomains);
+ ndomains = needed;
+ }
+ root = cpuset_getroot(set);
+ mtx_lock_spin(&cpuset_lock);
+ dset = root->cs_domain;
+ /*
+ * Verify that we have access to this set of domains.
+ */
+ if (root &&
+ !DOMAINSET_SUBSET(&dset->ds_mask, &domain->ds_mask)) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Determine whether we can apply this set of domains and
+ * how many new domain structures it will require.
+ */
+ domainset_copy(domain, &temp);
+ needed = 0;
+ error = cpuset_testupdate_domain(set, &temp, set->cs_domain,
+ &needed, 0);
+ if (error)
+ goto out;
+ } while (ndomains < needed);
+ dset = set->cs_domain;
+ cpuset_update_domain(set, domain, dset, &domains);
+out:
+ mtx_unlock_spin(&cpuset_lock);
+ domainset_freelist_free(&domains);
+ if (error == 0)
+ domainset_notify();
+
+ return (error);
+}
+
+/*
* Resolve the 'which' parameter of several cpuset apis.
*
* For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also
@@ -481,44 +822,204 @@ cpuset_which(cpuwhich_t which, id_t id, struct proc **
return (0);
}
+static int
+cpuset_testshadow(struct cpuset *set, const cpuset_t *mask,
+ const struct domainset *domain)
+{
+ struct cpuset *parent;
+ struct domainset *dset;
+
+ parent = cpuset_getbase(set);
+ /*
+ * If we are restricting a cpu mask it must be a subset of the
+ * parent or invalid CPUs have been specified.
+ */
+ if (mask != NULL && !CPU_SUBSET(&parent->cs_mask, mask))
+ return (EINVAL);
+
+ /*
+ * If we are restricting a domain mask it must be a subset of the
+ * parent or invalid domains have been specified.
+ */
+ dset = parent->cs_domain;
+ if (domain != NULL &&
+ !DOMAINSET_SUBSET(&dset->ds_mask, &domain->ds_mask))
+ return (EINVAL);
+
+ return (0);
+}
+
/*
* Create an anonymous set with the provided mask in the space provided by
- * 'fset'. If the passed in set is anonymous we use its parent otherwise
+ * 'nset'. If the passed in set is anonymous we use its parent otherwise
* the new set is a child of 'set'.
*/
static int
-cpuset_shadow(struct cpuset *set, struct cpuset *fset, const cpuset_t *mask)
+cpuset_shadow(struct cpuset *set, struct cpuset **nsetp,
+ const cpuset_t *mask, const struct domainset *domain,
+ struct setlist *cpusets, struct domainlist *domains)
{
struct cpuset *parent;
+ struct cpuset *nset;
+ struct domainset *dset;
+ struct domainset *d;
+ int error;
- if (set->cs_id == CPUSET_INVALID)
- parent = set->cs_parent;
+ error = cpuset_testshadow(set, mask, domain);
+ if (error)
+ return (error);
+
+ parent = cpuset_getbase(set);
+ dset = parent->cs_domain;
+ if (mask == NULL)
+ mask = &set->cs_mask;
+ if (domain != NULL)
+ d = domainset_shadow(dset, domain, domains);
else
- parent = set;
- if (!CPU_SUBSET(&parent->cs_mask, mask))
+ d = set->cs_domain;
+ nset = LIST_FIRST(cpusets);
+ error = _cpuset_create(nset, parent, mask, d, CPUSET_INVALID);
+ if (error == 0) {
+ LIST_REMOVE(nset, cs_link);
+ *nsetp = nset;
+ }
+ return (error);
+}
+
+static struct cpuset *
+cpuset_update_thread(struct thread *td, struct cpuset *nset)
+{
+ struct cpuset *tdset;
+
+ tdset = td->td_cpuset;
+ td->td_cpuset = nset;
+ td->td_domain.dr_policy = nset->cs_domain;
+ sched_affinity(td);
+
+ return (tdset);
+}
+
+static int
+cpuset_setproc_test_maskthread(struct cpuset *tdset, cpuset_t *mask,
+ struct domainset *domain)
+{
+ struct cpuset *parent;
+
+ parent = cpuset_getbase(tdset);
+ if (mask == NULL)
+ mask = &tdset->cs_mask;
+ if (domain == NULL)
+ domain = tdset->cs_domain;
+ return cpuset_testshadow(parent, mask, domain);
+}
+
+static int
+cpuset_setproc_maskthread(struct cpuset *tdset, cpuset_t *mask,
+ struct domainset *domain, struct cpuset **nsetp,
+ struct setlist *freelist, struct domainlist *domainlist)
+{
+ struct cpuset *parent;
+
+ parent = cpuset_getbase(tdset);
+ if (mask == NULL)
+ mask = &tdset->cs_mask;
+ if (domain == NULL)
+ domain = tdset->cs_domain;
+ return cpuset_shadow(parent, nsetp, mask, domain, freelist,
+ domainlist);
+}
+
+static int
+cpuset_setproc_setthread_mask(struct cpuset *tdset, struct cpuset *set,
+ cpuset_t *mask, struct domainset *domain)
+{
+ struct cpuset *parent;
+
+ parent = cpuset_getbase(tdset);
+
+ /*
+ * If the thread restricted its mask then apply that same
+ * restriction to the new set, otherwise take it wholesale.
+ */
+ if (CPU_CMP(&tdset->cs_mask, &parent->cs_mask) != 0) {
+ CPU_COPY(&tdset->cs_mask, mask);
+ CPU_AND(mask, &set->cs_mask);
+ } else
+ CPU_COPY(&set->cs_mask, mask);
+
+ /*
+ * If the thread restricted the domain then we apply the
+ * restriction to the new set but retain the policy.
+ */
+ if (tdset->cs_domain != parent->cs_domain) {
+ domainset_copy(tdset->cs_domain, domain);
+ DOMAINSET_AND(&domain->ds_mask, &set->cs_domain->ds_mask);
+ } else
+ domainset_copy(set->cs_domain, domain);
+
+ if (CPU_EMPTY(mask) || DOMAINSET_EMPTY(&domain->ds_mask))
return (EDEADLK);
- return (_cpuset_create(fset, parent, mask, CPUSET_INVALID));
+
+ return (0);
}
+static int
+cpuset_setproc_test_setthread(struct cpuset *tdset, struct cpuset *set)
+{
+ struct domainset domain;
+ cpuset_t mask;
+
+ if (tdset->cs_id != CPUSET_INVALID)
+ return (0);
+ return cpuset_setproc_setthread_mask(tdset, set, &mask, &domain);
+}
+
+static int
+cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set,
+ struct cpuset **nsetp, struct setlist *freelist,
+ struct domainlist *domainlist)
+{
+ struct domainset domain;
+ cpuset_t mask;
+ int error;
+
+ /*
+ * If we're replacing on a thread that has not constrained the
+ * original set we can simply accept the new set.
+ */
+ if (tdset->cs_id != CPUSET_INVALID) {
+ *nsetp = cpuset_ref(set);
+ return (0);
+ }
+ error = cpuset_setproc_setthread_mask(tdset, set, &mask, &domain);
+ if (error)
+ return (error);
+
+ return cpuset_shadow(tdset, nsetp, &mask, &domain, freelist,
+ domainlist);
+}
+
/*
- * Handle two cases for replacing the base set or mask of an entire process.
+ * Handle three cases for updating an entire process.
*
- * 1) Set is non-null and mask is null. This reparents all anonymous sets
- * to the provided set and replaces all non-anonymous td_cpusets with the
- * provided set.
- * 2) Mask is non-null and set is null. This replaces or creates anonymous
- * sets for every thread with the existing base as a parent.
+ * 1) Set is non-null. This reparents all anonymous sets to the provided
+ * set and replaces all non-anonymous td_cpusets with the provided set.
+ * 2) Mask is non-null. This replaces or creates anonymous sets for every
+ * thread with the existing base as a parent.
+ * 3) domain is non-null. This creates anonymous sets for every thread
+ * and replaces the domain set.
*
* This is overly complicated because we can't allocate while holding a
* spinlock and spinlocks must be held while changing and examining thread
* state.
*/
static int
-cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask)
+cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
+ struct domainset *domain)
{
struct setlist freelist;
struct setlist droplist;
- struct cpuset *tdset;
+ struct domainlist domainlist;
struct cpuset *nset;
struct thread *td;
struct proc *p;
@@ -533,7 +1034,9 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t
* 2) If enough cpusets have not been allocated release the locks and
* allocate them. Loop.
*/
- LIST_INIT(&freelist);
+ cpuset_freelist_init(&freelist, 1);
+ domainset_freelist_init(&domainlist, 1);
+ nfree = 1;
LIST_INIT(&droplist);
nfree = 0;
for (;;) {
@@ -544,39 +1047,27 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t
break;
threads = p->p_numthreads;
PROC_UNLOCK(p);
- for (; nfree < threads; nfree++) {
- nset = uma_zalloc(cpuset_zone, M_WAITOK);
- LIST_INSERT_HEAD(&freelist, nset, cs_link);
+ if (nfree < threads) {
+ cpuset_freelist_add(&freelist, threads - nfree);
+ domainset_freelist_add(&domainlist, threads - nfree);
+ nfree = threads;
}
}
PROC_LOCK_ASSERT(p, MA_OWNED);
/*
* Now that the appropriate locks are held and we have enough cpusets,
- * make sure the operation will succeed before applying changes. The
+ * make sure the operation will succeed before applying changes. The
* proc lock prevents td_cpuset from changing between calls.
*/
error = 0;
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
- tdset = td->td_cpuset;
- /*
- * Verify that a new mask doesn't specify cpus outside of
- * the set the thread is a member of.
- */
- if (mask) {
- if (tdset->cs_id == CPUSET_INVALID)
- tdset = tdset->cs_parent;
- if (!CPU_SUBSET(&tdset->cs_mask, mask))
- error = EDEADLK;
- /*
- * Verify that a new set won't leave an existing thread
- * mask without a cpu to run on. It can, however, restrict
- * the set.
- */
- } else if (tdset->cs_id == CPUSET_INVALID) {
- if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask))
- error = EDEADLK;
- }
+ if (set != NULL)
+ error = cpuset_setproc_test_setthread(td->td_cpuset,
+ set);
+ else
+ error = cpuset_setproc_test_maskthread(td->td_cpuset,
+ mask, domain);
thread_unlock(td);
if (error)
goto unlock_out;
@@ -588,33 +1079,17 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t
*/
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
- /*
- * If we presently have an anonymous set or are applying a
- * mask we must create an anonymous shadow set. That is
- * either parented to our existing base or the supplied set.
- *
- * If we have a base set with no anonymous shadow we simply
- * replace it outright.
- */
- tdset = td->td_cpuset;
- if (tdset->cs_id == CPUSET_INVALID || mask) {
- nset = LIST_FIRST(&freelist);
- LIST_REMOVE(nset, cs_link);
- if (mask)
- error = cpuset_shadow(tdset, nset, mask);
- else
- error = _cpuset_create(nset, set,
- &tdset->cs_mask, CPUSET_INVALID);
- if (error) {
- LIST_INSERT_HEAD(&freelist, nset, cs_link);
- thread_unlock(td);
- break;
- }
- } else
- nset = cpuset_ref(set);
- cpuset_rel_defer(&droplist, tdset);
- td->td_cpuset = nset;
- sched_affinity(td);
+ if (set != NULL)
+ error = cpuset_setproc_setthread(td->td_cpuset, set,
+ &nset, &freelist, &domainlist);
+ else
+ error = cpuset_setproc_maskthread(td->td_cpuset, mask,
+ domain, &nset, &freelist, &domainlist);
+ if (error) {
+ thread_unlock(td);
+ break;
+ }
+ cpuset_rel_defer(&droplist, cpuset_update_thread(td, nset));
thread_unlock(td);
}
unlock_out:
@@ -622,10 +1097,8 @@ unlock_out:
out:
while ((nset = LIST_FIRST(&droplist)) != NULL)
cpuset_rel_complete(nset);
- while ((nset = LIST_FIRST(&freelist)) != NULL) {
- LIST_REMOVE(nset, cs_link);
- uma_zfree(cpuset_zone, nset);
- }
+ cpuset_freelist_free(&freelist);
+ domainset_freelist_free(&domainlist);
return (error);
}
@@ -690,46 +1163,57 @@ cpusetobj_strscan(cpuset_t *set, const char *buf)
}
/*
- * Apply an anonymous mask to a single thread.
+ * Apply an anonymous mask or a domain to a single thread.
*/
-int
-cpuset_setthread(lwpid_t id, cpuset_t *mask)
+static int
+_cpuset_setthread(lwpid_t id, cpuset_t *mask, struct domainset *domain)
{
+ struct setlist cpusets;
+ struct domainlist domainlist;
struct cpuset *nset;
struct cpuset *set;
struct thread *td;
struct proc *p;
int error;
- nset = uma_zalloc(cpuset_zone, M_WAITOK);
+ cpuset_freelist_init(&cpusets, 1);
+ domainset_freelist_init(&domainlist, domain != NULL);
error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set);
if (error)
goto out;
set = NULL;
thread_lock(td);
- error = cpuset_shadow(td->td_cpuset, nset, mask);
- if (error == 0) {
- set = td->td_cpuset;
- td->td_cpuset = nset;
- sched_affinity(td);
- nset = NULL;
- }
+ error = cpuset_shadow(td->td_cpuset, &nset, mask, domain,
+ &cpusets, &domainlist);
+ if (error == 0)
+ set = cpuset_update_thread(td, nset);
thread_unlock(td);
PROC_UNLOCK(p);
if (set)
cpuset_rel(set);
out:
- if (nset)
- uma_zfree(cpuset_zone, nset);
+ cpuset_freelist_free(&cpusets);
+ domainset_freelist_free(&domainlist);
return (error);
}
/*
+ * Apply an anonymous mask to a single thread.
+ */
+int
+cpuset_setthread(lwpid_t id, cpuset_t *mask)
+{
+
+ return _cpuset_setthread(id, mask, NULL);
+}
+
+/*
* Apply new cpumask to the ithread.
*/
int
cpuset_setithread(lwpid_t id, int cpu)
{
+ struct setlist cpusets;
struct cpuset *nset, *rset;
struct cpuset *parent, *old_set;
struct thread *td;
@@ -738,8 +1222,8 @@ cpuset_setithread(lwpid_t id, int cpu)
cpuset_t mask;
int error;
- nset = uma_zalloc(cpuset_zone, M_WAITOK);
- rset = uma_zalloc(cpuset_zone, M_WAITOK);
+ cpuset_freelist_init(&cpusets, 1);
+ rset = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
cs_id = CPUSET_INVALID;
CPU_ZERO(&mask);
@@ -756,13 +1240,15 @@ cpuset_setithread(lwpid_t id, int cpu)
old_set = td->td_cpuset;
if (cpu == NOCPU) {
+ nset = LIST_FIRST(&cpusets);
+ LIST_REMOVE(nset, cs_link);
/*
* roll back to default set. We're not using cpuset_shadow()
* here because we can fail CPU_SUBSET() check. This can happen
* if default set does not contain all CPUs.
*/
- error = _cpuset_create(nset, cpuset_default, &mask,
+ error = _cpuset_create(nset, cpuset_default, &mask, NULL,
CPUSET_INVALID);
goto applyset;
@@ -779,7 +1265,7 @@ cpuset_setithread(lwpid_t id, int cpu)
* with any mask.
*/
error = _cpuset_create(rset, cpuset_zero,
- &cpuset_zero->cs_mask, cs_id);
+ &cpuset_zero->cs_mask, NULL, cs_id);
if (error != 0) {
PROC_UNLOCK(p);
goto out;
@@ -794,22 +1280,19 @@ cpuset_setithread(lwpid_t id, int cpu)
old_set = NULL;
}
- error = cpuset_shadow(parent, nset, &mask);
+ error = cpuset_shadow(parent, &nset, &mask, NULL, &cpusets, NULL);
applyset:
if (error == 0) {
thread_lock(td);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list