svn commit: r326889 - in user/jeff/numa: lib/libc/sys sys/compat/freebsd32 sys/conf sys/kern sys/sys sys/vm usr.bin/cpuset

Jeff Roberson jeff at FreeBSD.org
Fri Dec 15 23:35:21 UTC 2017


Author: jeff
Date: Fri Dec 15 23:35:20 2017
New Revision: 326889
URL: https://svnweb.freebsd.org/changeset/base/326889

Log:
  First cut of NUMA domain integration into cpuset.

Added:
  user/jeff/numa/sys/sys/_domainset.h   (contents, props changed)
  user/jeff/numa/sys/sys/domainset.h   (contents, props changed)
  user/jeff/numa/sys/vm/vm_domainset.c   (contents, props changed)
  user/jeff/numa/sys/vm/vm_domainset.h   (contents, props changed)
Modified:
  user/jeff/numa/lib/libc/sys/Symbol.map
  user/jeff/numa/sys/compat/freebsd32/syscalls.master
  user/jeff/numa/sys/conf/files
  user/jeff/numa/sys/kern/init_main.c
  user/jeff/numa/sys/kern/init_sysent.c
  user/jeff/numa/sys/kern/kern_cpuset.c
  user/jeff/numa/sys/kern/kern_exit.c
  user/jeff/numa/sys/kern/kern_fork.c
  user/jeff/numa/sys/kern/kern_numa.c
  user/jeff/numa/sys/kern/kern_thr.c
  user/jeff/numa/sys/kern/kern_thread.c
  user/jeff/numa/sys/kern/makesyscalls.sh
  user/jeff/numa/sys/kern/sched_4bsd.c
  user/jeff/numa/sys/kern/sched_ule.c
  user/jeff/numa/sys/kern/syscalls.c
  user/jeff/numa/sys/kern/syscalls.master
  user/jeff/numa/sys/kern/systrace_args.c
  user/jeff/numa/sys/sys/cpuset.h
  user/jeff/numa/sys/sys/param.h
  user/jeff/numa/sys/sys/proc.h
  user/jeff/numa/sys/sys/syscall.h
  user/jeff/numa/sys/sys/syscall.mk
  user/jeff/numa/sys/sys/syscallsubr.h
  user/jeff/numa/sys/sys/sysproto.h
  user/jeff/numa/sys/vm/uma_core.c
  user/jeff/numa/sys/vm/vm_fault.c
  user/jeff/numa/sys/vm/vm_kern.c
  user/jeff/numa/sys/vm/vm_object.c
  user/jeff/numa/sys/vm/vm_object.h
  user/jeff/numa/sys/vm/vm_page.c
  user/jeff/numa/sys/vm/vm_phys.c
  user/jeff/numa/usr.bin/cpuset/cpuset.c

Modified: user/jeff/numa/lib/libc/sys/Symbol.map
==============================================================================
--- user/jeff/numa/lib/libc/sys/Symbol.map	Fri Dec 15 23:19:49 2017	(r326888)
+++ user/jeff/numa/lib/libc/sys/Symbol.map	Fri Dec 15 23:35:20 2017	(r326889)
@@ -398,6 +398,8 @@ FBSD_1.5 {
 	mknodat;
 	stat;
 	statfs;
+	cpuset_getdomain;
+	cpuset_setdomain;
 };
 
 FBSDprivate_1.0 {
@@ -1022,4 +1024,8 @@ FBSDprivate_1.0 {
 	gssd_syscall;
 	__libc_interposing_slot;
 	__libc_sigwait;
+	_cpuset_getdomain;
+	__sys_cpuset_getdomain;
+	_cpuset_setdomain;
+	__sys_cpuset_setdomain;
 };

Modified: user/jeff/numa/sys/compat/freebsd32/syscalls.master
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/syscalls.master	Fri Dec 15 23:19:49 2017	(r326888)
+++ user/jeff/numa/sys/compat/freebsd32/syscalls.master	Fri Dec 15 23:35:20 2017	(r326889)
@@ -1119,4 +1119,13 @@
 				    struct kevent32 *eventlist, \
 				    int nevents, \
 				    const struct timespec32 *timeout); }
+561	AUE_NULL	STD	{ int cpuset_getdomain(cpulevel_t level, \
+				    cpuwhich_t which, id_t id, \
+				    size_t domainsetsize, domainset_t *mask, \
+				    int *policy); }
+562	AUE_NULL	STD	{ int cpuset_setdomain(cpulevel_t level, \
+				    cpuwhich_t which, id_t id, \
+				    size_t domainsetsize, domainset_t *mask, \
+				    int policy); }
+
 ; vim: syntax=off

Modified: user/jeff/numa/sys/conf/files
==============================================================================
--- user/jeff/numa/sys/conf/files	Fri Dec 15 23:19:49 2017	(r326888)
+++ user/jeff/numa/sys/conf/files	Fri Dec 15 23:35:20 2017	(r326889)
@@ -4816,7 +4816,7 @@ vm/swap_pager.c			standard
 vm/uma_core.c			standard
 vm/uma_dbg.c			standard
 vm/memguard.c			optional DEBUG_MEMGUARD
-vm/vm_domain.c			standard
+vm/vm_domainset.c		standard
 vm/vm_fault.c			standard
 vm/vm_glue.c			standard
 vm/vm_init.c			standard

Modified: user/jeff/numa/sys/kern/init_main.c
==============================================================================
--- user/jeff/numa/sys/kern/init_main.c	Fri Dec 15 23:19:49 2017	(r326888)
+++ user/jeff/numa/sys/kern/init_main.c	Fri Dec 15 23:35:20 2017	(r326889)
@@ -493,10 +493,7 @@ proc0_init(void *dummy __unused)
 	td->td_flags = TDF_INMEM;
 	td->td_pflags = TDP_KTHREAD;
 	td->td_cpuset = cpuset_thread0();
-	vm_domain_policy_init(&td->td_vm_dom_policy);
-	vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1);
-	vm_domain_policy_init(&p->p_vm_dom_policy);
-	vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1);
+	td->td_domain.dr_policy = td->td_cpuset->cs_domain;
 	prison0_init();
 	p->p_peers = 0;
 	p->p_leader = p;

Modified: user/jeff/numa/sys/kern/init_sysent.c
==============================================================================
--- user/jeff/numa/sys/kern/init_sysent.c	Fri Dec 15 23:19:49 2017	(r326888)
+++ user/jeff/numa/sys/kern/init_sysent.c	Fri Dec 15 23:35:20 2017	(r326889)
@@ -612,4 +612,6 @@ struct sysent sysent[] = {
 	{ AS(fhstatfs_args), (sy_call_t *)sys_fhstatfs, AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC },	/* 558 = fhstatfs */
 	{ AS(mknodat_args), (sy_call_t *)sys_mknodat, AUE_MKNODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC },	/* 559 = mknodat */
 	{ AS(kevent_args), (sy_call_t *)sys_kevent, AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC },	/* 560 = kevent */
+	{ AS(cpuset_getdomain_args), (sy_call_t *)sys_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 561 = cpuset_getdomain */
+	{ AS(cpuset_setdomain_args), (sy_call_t *)sys_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 562 = cpuset_setdomain */
 };

Modified: user/jeff/numa/sys/kern/kern_cpuset.c
==============================================================================
--- user/jeff/numa/sys/kern/kern_cpuset.c	Fri Dec 15 23:19:49 2017	(r326888)
+++ user/jeff/numa/sys/kern/kern_cpuset.c	Fri Dec 15 23:35:20 2017	(r326889)
@@ -51,17 +51,21 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/capsicum.h>
 #include <sys/cpuset.h>
+#include <sys/domainset.h>
 #include <sys/sx.h>
 #include <sys/queue.h>
 #include <sys/libkern.h>
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
+#include <sys/vmmeter.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
+#include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
+#include <vm/vm_phys.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
@@ -109,8 +113,10 @@ __FBSDID("$FreeBSD$");
  * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...).
  */
 static uma_zone_t cpuset_zone;
+static uma_zone_t domainset_zone;
 static struct mtx cpuset_lock;
 static struct setlist cpuset_ids;
+static struct domainlist cpuset_domains;
 static struct unrhdr *cpuset_unr;
 static struct cpuset *cpuset_zero, *cpuset_default;
 
@@ -122,6 +128,30 @@ cpuset_t *cpuset_root;
 cpuset_t cpuset_domain[MAXMEMDOM];
 
 /*
+ * Find the first non-anonymous set starting from 'set'.
+ */
+static struct cpuset *
+cpuset_getbase(struct cpuset *set)
+{
+
+	if (set->cs_id == CPUSET_INVALID)
+		set = set->cs_parent;
+	return (set);
+}
+
+/*
+ * Walks up the tree from 'set' to find the root.
+ */
+static struct cpuset *
+cpuset_getroot(struct cpuset *set)
+{
+
+	while ((set->cs_flags & CPU_SET_ROOT) == 0 && set->cs_parent != NULL)
+		set = set->cs_parent;
+	return (set);
+}
+
+/*
  * Acquire a reference to a cpuset, all pointers must be tracked with refs.
  */
 struct cpuset *
@@ -140,12 +170,7 @@ static struct cpuset *
 cpuset_refroot(struct cpuset *set)
 {
 
-	for (; set->cs_parent != NULL; set = set->cs_parent)
-		if (set->cs_flags & CPU_SET_ROOT)
-			break;
-	cpuset_ref(set);
-
-	return (set);
+	return cpuset_ref(cpuset_getroot(set));
 }
 
 /*
@@ -157,11 +182,7 @@ static struct cpuset *
 cpuset_refbase(struct cpuset *set)
 {
 
-	if (set->cs_id == CPUSET_INVALID)
-		set = set->cs_parent;
-	cpuset_ref(set);
-
-	return (set);
+	return cpuset_ref(cpuset_getbase(set));
 }
 
 /*
@@ -257,17 +278,25 @@ cpuset_lookup(cpusetid_t setid, struct thread *td)
  * will have no valid cpu based on restrictions from the parent.
  */
 static int
-_cpuset_create(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask,
-    cpusetid_t id)
+_cpuset_create(struct cpuset *set, struct cpuset *parent,
+    const cpuset_t *mask, struct domainset *domain, cpusetid_t id)
 {
 
+	if (domain == NULL)
+		domain = parent->cs_domain;
+	if (mask == NULL)
+		mask = &parent->cs_mask;
 	if (!CPU_OVERLAP(&parent->cs_mask, mask))
 		return (EDEADLK);
+	/* The domain must be prepared ahead of time. */
+	if (!DOMAINSET_SUBSET(&parent->cs_domain->ds_mask, &domain->ds_mask))
+		return (EDEADLK);
 	CPU_COPY(mask, &set->cs_mask);
 	LIST_INIT(&set->cs_children);
 	refcount_init(&set->cs_ref, 1);
 	set->cs_flags = 0;
 	mtx_lock_spin(&cpuset_lock);
+	set->cs_domain = domain;
 	CPU_AND(&set->cs_mask, &parent->cs_mask);
 	set->cs_id = id;
 	set->cs_parent = cpuset_ref(parent);
@@ -294,8 +323,8 @@ cpuset_create(struct cpuset **setp, struct cpuset *par
 	id = alloc_unr(cpuset_unr);
 	if (id == -1)
 		return (ENFILE);
-	*setp = set = uma_zalloc(cpuset_zone, M_WAITOK);
-	error = _cpuset_create(set, parent, mask, id);
+	*setp = set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
+	error = _cpuset_create(set, parent, mask, NULL, id);
 	if (error == 0)
 		return (0);
 	free_unr(cpuset_unr, id);
@@ -304,7 +333,187 @@ cpuset_create(struct cpuset **setp, struct cpuset *par
 	return (error);
 }
 
+static void
+cpuset_freelist_add(struct setlist *list, int count)
+{
+	struct cpuset *set;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		set = uma_zalloc(cpuset_zone, M_ZERO | M_WAITOK);
+		LIST_INSERT_HEAD(list, set, cs_link);
+	}
+}
+
+static void
+cpuset_freelist_init(struct setlist *list, int count)
+{
+
+	LIST_INIT(list);
+	cpuset_freelist_add(list, count);
+}
+
+static void
+cpuset_freelist_free(struct setlist *list)
+{
+	struct cpuset *set;
+
+	while ((set = LIST_FIRST(list)) != NULL) {
+		LIST_REMOVE(set, cs_link);
+		uma_zfree(cpuset_zone, set);
+	}
+}
+
+static void
+domainset_freelist_add(struct domainlist *list, int count)
+{
+	struct domainset *set;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		set = uma_zalloc(domainset_zone, M_ZERO | M_WAITOK);
+		LIST_INSERT_HEAD(list, set, ds_link);
+	}
+}
+
+static void
+domainset_freelist_init(struct domainlist *list, int count)
+{
+
+	LIST_INIT(list);
+	domainset_freelist_add(list, count);
+}
+
+static void
+domainset_freelist_free(struct domainlist *list)
+{
+	struct domainset *set;
+
+	while ((set = LIST_FIRST(list)) != NULL) {
+		LIST_REMOVE(set, ds_link);
+		uma_zfree(domainset_zone, set);
+	}
+}
+
+/* Copy a domainset preserving mask and policy. */
+static void
+domainset_copy(const struct domainset *from, struct domainset *to)
+{
+
+	DOMAINSET_COPY(&from->ds_mask, &to->ds_mask);
+	to->ds_policy = from->ds_policy;
+}
+
+/* Return 1 if mask and policy are equal, otherwise 0. */
+static int
+domainset_equal(const struct domainset *one, const struct domainset *two)
+{
+
+	return (DOMAINSET_CMP(&one->ds_mask, &two->ds_mask) == 0 &&
+	    one->ds_policy == two->ds_policy);
+}
+
 /*
+ * Lookup or create a domainset.  The key is provided in ds_mask and
+ * ds_policy.  If the domainset does not yet exist the storage in
+ * 'domain' is used to insert.  Otherwise this storage is freed to the
+ * domainset_zone and the existing domainset is returned.
+ */
+static struct domainset *
+_domainset_create(struct domainset *domain, struct domainlist *freelist)
+{
+	struct domainset *ndomain;
+
+	mtx_lock_spin(&cpuset_lock);
+	LIST_FOREACH(ndomain, &cpuset_domains, ds_link)
+		if (domainset_equal(ndomain, domain))
+			break;
+	/*
+	 * If the domain does not yet exist we insert it and initialize
+	 * various iteration helpers which are not part of the key.
+	 */
+	if (ndomain == NULL) {
+		LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link);
+		domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask);
+		domain->ds_max = DOMAINSET_FLS(&domain->ds_mask) + 1;
+	}
+	mtx_unlock_spin(&cpuset_lock);
+	if (ndomain == NULL)
+		return (domain);
+	if (freelist != NULL)
+		LIST_INSERT_HEAD(freelist, domain, ds_link);
+	else
+		uma_zfree(domainset_zone, domain);
+	return (ndomain);
+	
+}
+
+/*
+ * Create or lookup a domainset based on the key held in 'domain'.
+ */
+static struct domainset *
+domainset_create(const struct domainset *domain)
+{
+	struct domainset *ndomain;
+
+	ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO);
+	domainset_copy(domain, ndomain);
+	return _domainset_create(ndomain, NULL);
+}
+
+/*
+ * Update thread domainset pointers.
+ */
+static void
+domainset_notify(void)
+{
+	struct thread *td;
+	struct proc *p;
+
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		PROC_LOCK(p);
+		if (p->p_state == PRS_NEW) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		FOREACH_THREAD_IN_PROC(p, td) {
+			thread_lock(td);
+			td->td_domain.dr_policy = td->td_cpuset->cs_domain;
+			thread_unlock(td);
+		}
+		PROC_UNLOCK(p);
+	}
+	sx_sunlock(&allproc_lock);
+	kernel_object->domain.dr_policy = cpuset_default->cs_domain;
+}
+
+/*
+ * Create a new set that is a subset of a parent.
+ */
+static struct domainset *
+domainset_shadow(const struct domainset *pdomain,
+    const struct domainset *domain, struct domainlist *freelist)
+{
+	struct domainset *ndomain;
+
+	ndomain = LIST_FIRST(freelist);
+	LIST_REMOVE(ndomain, ds_link);
+
+	/*
+	 * Initialize the key from the request.
+	 */
+	domainset_copy(domain, ndomain);
+
+	/*
+	 * Restrict the key by the parent.
+	 */
+	DOMAINSET_AND(&ndomain->ds_mask, &pdomain->ds_mask);
+
+	return _domainset_create(ndomain, freelist);
+}
+
+/*
  * Recursively check for errors that would occur from applying mask to
  * the tree of sets starting at 'set'.  Checks for sets that would become
  * empty as well as RDONLY flags.
@@ -376,10 +585,12 @@ cpuset_modify(struct cpuset *set, cpuset_t *mask)
 	 * Verify that we have access to this set of
 	 * cpus.
 	 */
-	root = set->cs_parent;
-	if (root && !CPU_SUBSET(&root->cs_mask, mask))
-		return (EINVAL);
+	root = cpuset_getroot(set);
 	mtx_lock_spin(&cpuset_lock);
+	if (root && !CPU_SUBSET(&root->cs_mask, mask)) {
+		error = EINVAL;
+		goto out;
+	}
 	error = cpuset_testupdate(set, mask, 0);
 	if (error)
 		goto out;
@@ -392,6 +603,136 @@ out:
 }
 
 /*
+ * Recursively check for errors that would occur from applying mask to
+ * the tree of sets starting at 'set'.  Checks for sets that would become
+ * empty as well as RDONLY flags.
+ */
+static int
+cpuset_testupdate_domain(struct cpuset *set, struct domainset *dset,
+    struct domainset *orig, int *count, int check_mask)
+{
+	struct cpuset *nset;
+	struct domainset *domain;
+	struct domainset newset;
+	int error;
+
+	mtx_assert(&cpuset_lock, MA_OWNED);
+	if (set->cs_flags & CPU_SET_RDONLY)
+		return (EPERM);
+	domain = set->cs_domain;
+	domainset_copy(domain, &newset);
+	if (!domainset_equal(domain, orig)) {
+		if (!DOMAINSET_OVERLAP(&domain->ds_mask, &dset->ds_mask))
+			return (EDEADLK);
+		DOMAINSET_AND(&newset.ds_mask, &dset->ds_mask);
+		/* Count the number of domains that are changing. */
+		(*count)++;
+	}
+	error = 0;
+	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
+		if ((error = cpuset_testupdate_domain(nset, &newset, domain,
+		    count, 1)) != 0)
+			break;
+	return (error);
+}
+
+/*
+ * Applies the mask 'mask' without checking for empty sets or permissions.
+ */
+static void
+cpuset_update_domain(struct cpuset *set, struct domainset *domain,
+    struct domainset *orig, struct domainlist *domains)
+{
+	struct cpuset *nset;
+
+	mtx_assert(&cpuset_lock, MA_OWNED);
+	/*
+	 * If this domainset has changed from the parent we must calculate
+	 * a new set.  Otherwise it simply inherits from the parent.  When
+	 * we inherit from the parent we get a new mask and policy.  If the
+	 * set is modified from the parent we keep the policy and only
+	 * update the mask.
+	 */
+	if (set->cs_domain != orig) {
+		orig = set->cs_domain;
+		set->cs_domain = domainset_shadow(domain, orig, domains);
+	} else
+		set->cs_domain = domain;
+	LIST_FOREACH(nset, &set->cs_children, cs_siblings) 
+		cpuset_update_domain(nset, set->cs_domain, orig, domains);
+
+	return;
+}
+
+/*
+ * Modify the set 'set' to use a copy the domainset provided.  Apply this new
+ * mask to restrict all children in the tree.  Checks for validity before
+ * applying the changes.
+ */
+static int
+cpuset_modify_domain(struct cpuset *set, struct domainset *domain)
+{
+	struct domainlist domains;
+	struct domainset temp;
+	struct domainset *dset;
+	struct cpuset *root;
+	int ndomains, needed;
+	int error;
+
+	error = priv_check(curthread, PRIV_SCHED_CPUSET);
+	if (error)
+		return (error);
+	/*
+	 * In case we are called from within the jail
+	 * we do not allow modifying the dedicated root
+	 * cpuset of the jail but may still allow to
+	 * change child sets.
+	 */
+	if (jailed(curthread->td_ucred) &&
+	    set->cs_flags & CPU_SET_ROOT)
+		return (EPERM);
+	domainset_freelist_init(&domains, 0);
+	domain = domainset_create(domain);
+	ndomains = needed = 0;
+	do {
+		if (ndomains < needed) {
+			domainset_freelist_add(&domains, needed - ndomains);
+			ndomains = needed;
+		}
+		root = cpuset_getroot(set);
+		mtx_lock_spin(&cpuset_lock);
+		dset = root->cs_domain;
+		/*
+		 * Verify that we have access to this set of domains.
+		 */
+		if (root &&
+		    !DOMAINSET_SUBSET(&dset->ds_mask, &domain->ds_mask)) {
+			error = EINVAL;
+			goto out;
+		}
+		/*
+		 * Determine whether we can apply this set of domains and
+		 * how many new domain structures it will require.
+		 */
+		domainset_copy(domain, &temp);
+		needed = 0;
+		error = cpuset_testupdate_domain(set, &temp, set->cs_domain,
+		    &needed, 0);
+		if (error)
+			goto out;
+	} while (ndomains < needed);
+	dset = set->cs_domain;
+	cpuset_update_domain(set, domain, dset, &domains);
+out:
+	mtx_unlock_spin(&cpuset_lock);
+	domainset_freelist_free(&domains);
+	if (error == 0)
+		domainset_notify();
+
+	return (error);
+}
+
+/*
  * Resolve the 'which' parameter of several cpuset apis.
  *
  * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid.  Also
@@ -481,44 +822,204 @@ cpuset_which(cpuwhich_t which, id_t id, struct proc **
 	return (0);
 }
 
+static int
+cpuset_testshadow(struct cpuset *set, const cpuset_t *mask,
+    const struct domainset *domain)
+{
+	struct cpuset *parent;
+	struct domainset *dset;
+
+	parent = cpuset_getbase(set);
+	/*
+	 * If we are restricting a cpu mask it must be a subset of the
+	 * parent or invalid CPUs have been specified.
+	 */
+	if (mask != NULL && !CPU_SUBSET(&parent->cs_mask, mask))
+		return (EINVAL);
+
+	/*
+	 * If we are restricting a domain mask it must be a subset of the
+	 * parent or invalid domains have been specified.
+	 */
+	dset = parent->cs_domain;
+	if (domain != NULL &&
+	    !DOMAINSET_SUBSET(&dset->ds_mask, &domain->ds_mask))
+		return (EINVAL);
+
+	return (0);
+}
+
 /*
  * Create an anonymous set with the provided mask in the space provided by
- * 'fset'.  If the passed in set is anonymous we use its parent otherwise
+ * 'nset'.  If the passed in set is anonymous we use its parent otherwise
  * the new set is a child of 'set'.
  */
 static int
-cpuset_shadow(struct cpuset *set, struct cpuset *fset, const cpuset_t *mask)
+cpuset_shadow(struct cpuset *set, struct cpuset **nsetp,
+   const cpuset_t *mask, const struct domainset *domain,
+   struct setlist *cpusets, struct domainlist *domains)
 {
 	struct cpuset *parent;
+	struct cpuset *nset;
+	struct domainset *dset;
+	struct domainset *d;
+	int error;
 
-	if (set->cs_id == CPUSET_INVALID)
-		parent = set->cs_parent;
+	error = cpuset_testshadow(set, mask, domain);
+	if (error)
+		return (error);
+
+	parent = cpuset_getbase(set);
+	dset = parent->cs_domain;
+	if (mask == NULL)
+		mask = &set->cs_mask;
+	if (domain != NULL)
+		d = domainset_shadow(dset, domain, domains);
 	else
-		parent = set;
-	if (!CPU_SUBSET(&parent->cs_mask, mask))
+		d = set->cs_domain;
+	nset = LIST_FIRST(cpusets);
+	error = _cpuset_create(nset, parent, mask, d, CPUSET_INVALID);
+	if (error == 0) {
+		LIST_REMOVE(nset, cs_link);
+		*nsetp = nset;
+	}
+	return (error);
+}
+
+static struct cpuset *
+cpuset_update_thread(struct thread *td, struct cpuset *nset)
+{
+	struct cpuset *tdset;
+
+	tdset = td->td_cpuset;
+	td->td_cpuset = nset;
+	td->td_domain.dr_policy = nset->cs_domain;
+	sched_affinity(td);
+
+	return (tdset);
+}
+
+static int
+cpuset_setproc_test_maskthread(struct cpuset *tdset, cpuset_t *mask,
+    struct domainset *domain)
+{
+	struct cpuset *parent;
+
+	parent = cpuset_getbase(tdset);
+	if (mask == NULL)
+		mask = &tdset->cs_mask;
+	if (domain == NULL)
+		domain = tdset->cs_domain;
+	return cpuset_testshadow(parent, mask, domain);
+}
+
+static int
+cpuset_setproc_maskthread(struct cpuset *tdset, cpuset_t *mask,
+    struct domainset *domain, struct cpuset **nsetp,
+    struct setlist *freelist, struct domainlist *domainlist)
+{
+	struct cpuset *parent;
+
+	parent = cpuset_getbase(tdset);
+	if (mask == NULL)
+		mask = &tdset->cs_mask;
+	if (domain == NULL)
+		domain = tdset->cs_domain;
+	return cpuset_shadow(parent, nsetp, mask, domain, freelist,
+	    domainlist);
+}
+
+static int
+cpuset_setproc_setthread_mask(struct cpuset *tdset, struct cpuset *set,
+    cpuset_t *mask, struct domainset *domain)
+{
+	struct cpuset *parent;
+
+	parent = cpuset_getbase(tdset);
+
+	/*
+	 * If the thread restricted its mask then apply that same
+	 * restriction to the new set, otherwise take it wholesale.
+	 */
+	if (CPU_CMP(&tdset->cs_mask, &parent->cs_mask) != 0) {
+		CPU_COPY(&tdset->cs_mask, mask);
+		CPU_AND(mask, &set->cs_mask);
+	} else
+		CPU_COPY(&set->cs_mask, mask);
+
+	/*
+	 * If the thread restricted the domain then we apply the
+	 * restriction to the new set but retain the policy.
+	 */
+	if (tdset->cs_domain != parent->cs_domain) {
+		domainset_copy(tdset->cs_domain, domain);
+		DOMAINSET_AND(&domain->ds_mask, &set->cs_domain->ds_mask);
+	} else
+		domainset_copy(set->cs_domain, domain);
+
+	if (CPU_EMPTY(mask) || DOMAINSET_EMPTY(&domain->ds_mask))
 		return (EDEADLK);
-	return (_cpuset_create(fset, parent, mask, CPUSET_INVALID));
+
+	return (0);
 }
 
+static int
+cpuset_setproc_test_setthread(struct cpuset *tdset, struct cpuset *set)
+{
+	struct domainset domain;
+	cpuset_t mask;
+
+	if (tdset->cs_id != CPUSET_INVALID)
+		return (0);
+	return cpuset_setproc_setthread_mask(tdset, set, &mask, &domain);
+}
+
+static int
+cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set,
+    struct cpuset **nsetp, struct setlist *freelist,
+    struct domainlist *domainlist)
+{
+	struct domainset domain;
+	cpuset_t mask;
+	int error;
+
+	/*
+	 * If we're replacing on a thread that has not constrained the
+	 * original set we can simply accept the new set.
+	 */
+	if (tdset->cs_id != CPUSET_INVALID) {
+		*nsetp = cpuset_ref(set);
+		return (0);
+	}
+	error = cpuset_setproc_setthread_mask(tdset, set, &mask, &domain);
+	if (error)
+		return (error);
+
+	return cpuset_shadow(tdset, nsetp, &mask, &domain, freelist,
+	    domainlist);
+}
+
 /*
- * Handle two cases for replacing the base set or mask of an entire process.
+ * Handle three cases for updating an entire process.
  *
- * 1) Set is non-null and mask is null.  This reparents all anonymous sets
- *    to the provided set and replaces all non-anonymous td_cpusets with the
- *    provided set.
- * 2) Mask is non-null and set is null.  This replaces or creates anonymous
- *    sets for every thread with the existing base as a parent.
+ * 1) Set is non-null.  This reparents all anonymous sets to the provided
+ *    set and replaces all non-anonymous td_cpusets with the provided set.
+ * 2) Mask is non-null.  This replaces or creates anonymous sets for every
+ *    thread with the existing base as a parent.
+ * 3) domain is non-null.  This creates anonymous sets for every thread
+ *    and replaces the domain set.
  *
  * This is overly complicated because we can't allocate while holding a 
  * spinlock and spinlocks must be held while changing and examining thread
  * state.
  */
 static int
-cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask)
+cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
+    struct domainset *domain)
 {
 	struct setlist freelist;
 	struct setlist droplist;
-	struct cpuset *tdset;
+	struct domainlist domainlist;
 	struct cpuset *nset;
 	struct thread *td;
 	struct proc *p;
@@ -533,7 +1034,9 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t
 	 * 2) If enough cpusets have not been allocated release the locks and
 	 *    allocate them.  Loop.
 	 */
-	LIST_INIT(&freelist);
+	cpuset_freelist_init(&freelist, 1);
+	domainset_freelist_init(&domainlist, 1);
+	nfree = 1;
 	LIST_INIT(&droplist);
 	nfree = 0;
 	for (;;) {
@@ -544,39 +1047,27 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t
 			break;
 		threads = p->p_numthreads;
 		PROC_UNLOCK(p);
-		for (; nfree < threads; nfree++) {
-			nset = uma_zalloc(cpuset_zone, M_WAITOK);
-			LIST_INSERT_HEAD(&freelist, nset, cs_link);
+		if (nfree < threads) {
+			cpuset_freelist_add(&freelist, threads - nfree);
+			domainset_freelist_add(&domainlist, threads - nfree);
+			nfree = threads;
 		}
 	}
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * Now that the appropriate locks are held and we have enough cpusets,
-	 * make sure the operation will succeed before applying changes.  The
+	 * make sure the operation will succeed before applying changes. The
 	 * proc lock prevents td_cpuset from changing between calls.
 	 */
 	error = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
-		tdset = td->td_cpuset;
-		/*
-		 * Verify that a new mask doesn't specify cpus outside of
-		 * the set the thread is a member of.
-		 */
-		if (mask) {
-			if (tdset->cs_id == CPUSET_INVALID)
-				tdset = tdset->cs_parent;
-			if (!CPU_SUBSET(&tdset->cs_mask, mask))
-				error = EDEADLK;
-		/*
-		 * Verify that a new set won't leave an existing thread
-		 * mask without a cpu to run on.  It can, however, restrict
-		 * the set.
-		 */
-		} else if (tdset->cs_id == CPUSET_INVALID) {
-			if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask))
-				error = EDEADLK;
-		}
+		if (set != NULL)
+			error = cpuset_setproc_test_setthread(td->td_cpuset,
+			    set);
+		else
+			error = cpuset_setproc_test_maskthread(td->td_cpuset,
+			    mask, domain);
 		thread_unlock(td);
 		if (error)
 			goto unlock_out;
@@ -588,33 +1079,17 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t
 	 */
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
-		/*
-		 * If we presently have an anonymous set or are applying a
-		 * mask we must create an anonymous shadow set.  That is
-		 * either parented to our existing base or the supplied set.
-		 *
-		 * If we have a base set with no anonymous shadow we simply
-		 * replace it outright.
-		 */
-		tdset = td->td_cpuset;
-		if (tdset->cs_id == CPUSET_INVALID || mask) {
-			nset = LIST_FIRST(&freelist);
-			LIST_REMOVE(nset, cs_link);
-			if (mask)
-				error = cpuset_shadow(tdset, nset, mask);
-			else
-				error = _cpuset_create(nset, set,
-				    &tdset->cs_mask, CPUSET_INVALID);
-			if (error) {
-				LIST_INSERT_HEAD(&freelist, nset, cs_link);
-				thread_unlock(td);
-				break;
-			}
-		} else
-			nset = cpuset_ref(set);
-		cpuset_rel_defer(&droplist, tdset);
-		td->td_cpuset = nset;
-		sched_affinity(td);
+		if (set != NULL)
+			error = cpuset_setproc_setthread(td->td_cpuset, set,
+			    &nset, &freelist, &domainlist);
+		else
+			error = cpuset_setproc_maskthread(td->td_cpuset, mask,
+			    domain, &nset, &freelist, &domainlist);
+		if (error) {
+			thread_unlock(td);
+			break;
+		}
+		cpuset_rel_defer(&droplist, cpuset_update_thread(td, nset));
 		thread_unlock(td);
 	}
 unlock_out:
@@ -622,10 +1097,8 @@ unlock_out:
 out:
 	while ((nset = LIST_FIRST(&droplist)) != NULL)
 		cpuset_rel_complete(nset);
-	while ((nset = LIST_FIRST(&freelist)) != NULL) {
-		LIST_REMOVE(nset, cs_link);
-		uma_zfree(cpuset_zone, nset);
-	}
+	cpuset_freelist_free(&freelist);
+	domainset_freelist_free(&domainlist);
 	return (error);
 }
 
@@ -690,46 +1163,57 @@ cpusetobj_strscan(cpuset_t *set, const char *buf)
 }
 
 /*
- * Apply an anonymous mask to a single thread.
+ * Apply an anonymous mask or a domain to a single thread.
  */
-int
-cpuset_setthread(lwpid_t id, cpuset_t *mask)
+static int
+_cpuset_setthread(lwpid_t id, cpuset_t *mask, struct domainset *domain)
 {
+	struct setlist cpusets;
+	struct domainlist domainlist;
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *td;
 	struct proc *p;
 	int error;
 
-	nset = uma_zalloc(cpuset_zone, M_WAITOK);
+	cpuset_freelist_init(&cpusets, 1);
+	domainset_freelist_init(&domainlist, domain != NULL);
 	error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set);
 	if (error)
 		goto out;
 	set = NULL;
 	thread_lock(td);
-	error = cpuset_shadow(td->td_cpuset, nset, mask);
-	if (error == 0) {
-		set = td->td_cpuset;
-		td->td_cpuset = nset;
-		sched_affinity(td);
-		nset = NULL;
-	}
+	error = cpuset_shadow(td->td_cpuset, &nset, mask, domain,
+	    &cpusets, &domainlist);
+	if (error == 0)
+		set = cpuset_update_thread(td, nset);
 	thread_unlock(td);
 	PROC_UNLOCK(p);
 	if (set)
 		cpuset_rel(set);
 out:
-	if (nset)
-		uma_zfree(cpuset_zone, nset);
+	cpuset_freelist_free(&cpusets);
+	domainset_freelist_free(&domainlist);
 	return (error);
 }
 
 /*
+ * Apply an anonymous mask to a single thread.
+ */
+int
+cpuset_setthread(lwpid_t id, cpuset_t *mask)
+{
+
+	return _cpuset_setthread(id, mask, NULL);
+}
+
+/*
  * Apply new cpumask to the ithread.
  */
 int
 cpuset_setithread(lwpid_t id, int cpu)
 {
+	struct setlist cpusets;
 	struct cpuset *nset, *rset;
 	struct cpuset *parent, *old_set;
 	struct thread *td;
@@ -738,8 +1222,8 @@ cpuset_setithread(lwpid_t id, int cpu)
 	cpuset_t mask;
 	int error;
 
-	nset = uma_zalloc(cpuset_zone, M_WAITOK);
-	rset = uma_zalloc(cpuset_zone, M_WAITOK);
+	cpuset_freelist_init(&cpusets, 1);
+	rset = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
 	cs_id = CPUSET_INVALID;
 
 	CPU_ZERO(&mask);
@@ -756,13 +1240,15 @@ cpuset_setithread(lwpid_t id, int cpu)
 	old_set = td->td_cpuset;
 
 	if (cpu == NOCPU) {
+		nset = LIST_FIRST(&cpusets);
+		LIST_REMOVE(nset, cs_link);
 
 		/*
 		 * roll back to default set. We're not using cpuset_shadow()
 		 * here because we can fail CPU_SUBSET() check. This can happen
 		 * if default set does not contain all CPUs.
 		 */
-		error = _cpuset_create(nset, cpuset_default, &mask,
+		error = _cpuset_create(nset, cpuset_default, &mask, NULL,
 		    CPUSET_INVALID);
 
 		goto applyset;
@@ -779,7 +1265,7 @@ cpuset_setithread(lwpid_t id, int cpu)
 		 * with any mask.
 		 */
 		error = _cpuset_create(rset, cpuset_zero,
-		    &cpuset_zero->cs_mask, cs_id);
+		    &cpuset_zero->cs_mask, NULL, cs_id);
 		if (error != 0) {
 			PROC_UNLOCK(p);
 			goto out;
@@ -794,22 +1280,19 @@ cpuset_setithread(lwpid_t id, int cpu)
 		old_set = NULL;
 	}
 
-	error = cpuset_shadow(parent, nset, &mask);
+	error = cpuset_shadow(parent, &nset, &mask, NULL, &cpusets, NULL);
 applyset:
 	if (error == 0) {
 		thread_lock(td);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list