svn commit: r331861 - in user/jeff/numa: lib/libc/sys sys/compat/freebsd32 sys/kern sys/sys sys/vm

Jeff Roberson jeff at FreeBSD.org
Sun Apr 1 04:11:41 UTC 2018


Author: jeff
Date: Sun Apr  1 04:11:38 2018
New Revision: 331861
URL: https://svnweb.freebsd.org/changeset/base/331861

Log:
  Experimental support for msetdomain() a syscall similar to linux's mbind()
  that allows you to set NUMA policy on memory ranges.

Modified:
  user/jeff/numa/lib/libc/sys/Symbol.map
  user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h
  user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c
  user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c
  user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c
  user/jeff/numa/sys/compat/freebsd32/syscalls.master
  user/jeff/numa/sys/kern/init_sysent.c
  user/jeff/numa/sys/kern/kern_cpuset.c
  user/jeff/numa/sys/kern/syscalls.c
  user/jeff/numa/sys/kern/syscalls.master
  user/jeff/numa/sys/kern/systrace_args.c
  user/jeff/numa/sys/sys/domainset.h
  user/jeff/numa/sys/sys/syscall.h
  user/jeff/numa/sys/sys/syscall.mk
  user/jeff/numa/sys/sys/syscallsubr.h
  user/jeff/numa/sys/sys/sysproto.h
  user/jeff/numa/sys/vm/vm_fault.c
  user/jeff/numa/sys/vm/vm_map.c
  user/jeff/numa/sys/vm/vm_map.h
  user/jeff/numa/sys/vm/vm_object.c

Modified: user/jeff/numa/lib/libc/sys/Symbol.map
==============================================================================
--- user/jeff/numa/lib/libc/sys/Symbol.map	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/lib/libc/sys/Symbol.map	Sun Apr  1 04:11:38 2018	(r331861)
@@ -401,6 +401,7 @@ FBSD_1.5 {
 	statfs;
 	cpuset_getdomain;
 	cpuset_setdomain;
+	msetdomain;
 };
 
 FBSDprivate_1.0 {
@@ -1029,4 +1030,6 @@ FBSDprivate_1.0 {
 	__sys_cpuset_getdomain;
 	_cpuset_setdomain;
 	__sys_cpuset_setdomain;
+	_msetdomain;
+	__msetdomain;
 };

Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h	Sun Apr  1 04:11:38 2018	(r331861)
@@ -469,4 +469,5 @@
 #define	FREEBSD32_SYS_freebsd32_cpuset_getdomain	561
 #define	FREEBSD32_SYS_freebsd32_cpuset_setdomain	562
 #define	FREEBSD32_SYS_getrandom	563
-#define	FREEBSD32_SYS_MAXSYSCALL	564
+#define	FREEBSD32_SYS_msetdomain	564
+#define	FREEBSD32_SYS_MAXSYSCALL	565

Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -596,4 +596,5 @@ const char *freebsd32_syscallnames[] = {
 	"freebsd32_cpuset_getdomain",			/* 561 = freebsd32_cpuset_getdomain */
 	"freebsd32_cpuset_setdomain",			/* 562 = freebsd32_cpuset_setdomain */
 	"getrandom",			/* 563 = getrandom */
+	"msetdomain",			/* 564 = msetdomain */
 };

Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -645,4 +645,5 @@ struct sysent freebsd32_sysent[] = {
 	{ AS(freebsd32_cpuset_getdomain_args), (sy_call_t *)freebsd32_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 561 = freebsd32_cpuset_getdomain */
 	{ AS(freebsd32_cpuset_setdomain_args), (sy_call_t *)freebsd32_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 562 = freebsd32_cpuset_setdomain */
 	{ AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 563 = getrandom */
+	{ AS(msetdomain_args), (sy_call_t *)sys_msetdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 564 = msetdomain */
 };

Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -3283,6 +3283,18 @@ systrace_args(int sysnum, void *params, uint64_t *uarg
 		*n_args = 3;
 		break;
 	}
+	/* msetdomain */
+	case 564: {
+		struct msetdomain_args *p = params;
+		uarg[0] = (intptr_t) p->addr; /* void * */
+		uarg[1] = p->size; /* size_t */
+		uarg[2] = p->domainsetsize; /* size_t */
+		uarg[3] = (intptr_t) p->mask; /* domainset_t * */
+		iarg[4] = p->policy; /* int */
+		iarg[5] = p->flags; /* int */
+		*n_args = 6;
+		break;
+	}
 	default:
 		*n_args = 0;
 		break;
@@ -8825,6 +8837,31 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *d
 			break;
 		};
 		break;
+	/* msetdomain */
+	case 564:
+		switch(ndx) {
+		case 0:
+			p = "userland void *";
+			break;
+		case 1:
+			p = "size_t";
+			break;
+		case 2:
+			p = "size_t";
+			break;
+		case 3:
+			p = "userland domainset_t *";
+			break;
+		case 4:
+			p = "int";
+			break;
+		case 5:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
 	default:
 		break;
 	};
@@ -10678,6 +10715,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *
 		break;
 	/* getrandom */
 	case 563:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* msetdomain */
+	case 564:
 		if (ndx == 0 || ndx == 1)
 			p = "int";
 		break;

Modified: user/jeff/numa/sys/compat/freebsd32/syscalls.master
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/syscalls.master	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/compat/freebsd32/syscalls.master	Sun Apr  1 04:11:38 2018	(r331861)
@@ -1118,5 +1118,9 @@
 				    int policy); }
 563	AUE_NULL	NOPROTO	{ int getrandom(void *buf, size_t buflen, \
 				    unsigned int flags); }
+564     AUE_NULL        NOPROTO	{ int msetdomain(void *addr, \
+				    size_t size, size_t domainsetsize, \
+				    domainset_t *mask, int policy, \
+				    int flags); }
 
 ; vim: syntax=off

Modified: user/jeff/numa/sys/kern/init_sysent.c
==============================================================================
--- user/jeff/numa/sys/kern/init_sysent.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/kern/init_sysent.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -615,4 +615,5 @@ struct sysent sysent[] = {
 	{ AS(cpuset_getdomain_args), (sy_call_t *)sys_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 561 = cpuset_getdomain */
 	{ AS(cpuset_setdomain_args), (sy_call_t *)sys_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 562 = cpuset_setdomain */
 	{ AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 563 = getrandom */
+	{ AS(msetdomain_args), (sy_call_t *)sys_msetdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC },	/* 564 = msetdomain */
 };

Modified: user/jeff/numa/sys/kern/kern_cpuset.c
==============================================================================
--- user/jeff/numa/sys/kern/kern_cpuset.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/kern/kern_cpuset.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -64,6 +64,9 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/uma.h>
 #include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
@@ -2005,6 +2008,57 @@ out:
 	return (error);
 }
 
+static int
+domainset_copyin(struct domainset *domain, size_t domainsetsize,
+    const domainset_t *maskp, int policy)
+{
+	domainset_t *mask;
+	char *end, *cp;
+	int error;
+
+	if (domainsetsize < sizeof(domainset_t) ||
+	    domainsetsize > DOMAINSET_MAXSIZE / NBBY)
+		return (ERANGE);
+
+	if (policy <= DOMAINSET_POLICY_INVALID ||
+	    policy > DOMAINSET_POLICY_MAX)
+		return (EINVAL);
+
+	memset(domain, 0, sizeof(*domain));
+	mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
+	error = copyin(maskp, mask, domainsetsize);
+	if (error != 0)
+		goto out;
+	/*
+	 * Verify that no high bits are set.
+	 */
+	if (domainsetsize > sizeof(domainset_t)) {
+		end = cp = (char *)&mask->__bits;
+		end += domainsetsize;
+		cp += sizeof(domainset_t);
+		while (cp != end)
+			if (*cp++ != 0) {
+				error = EINVAL;
+				goto out;
+			}
+
+	}
+	DOMAINSET_COPY(mask, &domain->ds_mask);
+	domain->ds_policy = policy;
+	/* Translate preferred policy into a mask and fallback. */
+	if (policy == DOMAINSET_POLICY_PREFER) {
+		/* Only support a single preferred domain. */
+		if (DOMAINSET_COUNT(&domain->ds_mask) != 1) {
+			error = EINVAL;
+			goto out;
+		}
+		domain->ds_prefer = DOMAINSET_FFS(&domain->ds_mask) - 1;
+	}
+out:
+	free(mask, M_TEMP);
+	return (error);
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_setdomain_args {
 	cpulevel_t	level;
@@ -2015,6 +2069,7 @@ struct cpuset_setdomain_args {
 	int 		policy;
 };
 #endif
+
 int
 sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap)
 {
@@ -2032,12 +2087,8 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le
 	struct thread *ttd;
 	struct proc *p;
 	struct domainset domain;
-	domainset_t *mask;
 	int error;
 
-	if (domainsetsize < sizeof(domainset_t) ||
-	    domainsetsize > DOMAINSET_MAXSIZE / NBBY)
-		return (ERANGE);
 	/* In Capability mode, you can only set your own CPU set. */
 	if (IN_CAPABILITY_MODE(td)) {
 		if (level != CPU_LEVEL_WHICH)
@@ -2047,43 +2098,13 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le
 		if (id != -1)
 			return (ECAPMODE);
 	}
-	memset(&domain, 0, sizeof(domain));
-	mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
-	error = copyin(maskp, mask, domainsetsize);
-	if (error)
-		goto out;
-	/*
-	 * Verify that no high bits are set.
-	 */
-	if (domainsetsize > sizeof(domainset_t)) {
-		char *end;
-		char *cp;
 
-		end = cp = (char *)&mask->__bits;
-		end += domainsetsize;
-		cp += sizeof(domainset_t);
-		while (cp != end)
-			if (*cp++ != 0) {
-				error = EINVAL;
-				goto out;
-			}
-
-	}
-	DOMAINSET_COPY(mask, &domain.ds_mask);
-	domain.ds_policy = policy;
-	if (policy <= DOMAINSET_POLICY_INVALID ||
-	    policy > DOMAINSET_POLICY_MAX)
-		return (EINVAL);
-
-	/* Translate preferred policy into a mask and fallback. */
-	if (policy == DOMAINSET_POLICY_PREFER) {
-		/* Only support a single preferred domain. */
-		if (DOMAINSET_COUNT(&domain.ds_mask) != 1)
-			return (EINVAL);
-		domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1;
-		/* This will be constrained by domainset_shadow(). */
+	error = domainset_copyin(&domain, domainsetsize, maskp, policy);
+	if (error)
+		return (error);
+	/* This will be constrained by cpuset_shadow(). */
+	if (policy == DOMAINSET_POLICY_PREFER) 
 		DOMAINSET_FILL(&domain.ds_mask);
-	}
 
 	switch (level) {
 	case CPU_LEVEL_ROOT:
@@ -2146,12 +2167,106 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le
 		break;
 	}
 out:
-	free(mask, M_TEMP);
 	return (error);
 }
 
-#ifdef DDB
+#ifndef _SYS_SYSPROTO_H_
+struct msetdomain_args {
+	void 		*addr;
+	size_t		size;
+	size_t		domainsetsize;
+	domainset_t 	*mask;
+	int		policy;
+	int		flags;
+};
+#endif
 
+int
+sys_msetdomain(struct thread *td, struct msetdomain_args *uap)
+{
+	return (kern_msetdomain(td, (uintptr_t)uap->addr, uap->size,
+	    uap->domainsetsize, uap->mask, uap->policy, uap->flags));
+}
+
+int
+kern_msetdomain(struct thread *td, uintptr_t addr0, size_t size,
+    size_t domainsetsize, const domainset_t *mask, int policy, int flags)
+{
+	struct domainset domain, *set, *nset;
+	struct cpuset *cset;
+	struct thread *ttd;
+	struct proc *p;
+        vm_offset_t addr;
+        vm_size_t pageoff;
+	int error;
+
+	/* Normalize the addresses. */
+        addr = trunc_page(addr0);
+        pageoff = (addr & PAGE_MASK);
+        addr -= pageoff;
+        size += pageoff;
+        size = (vm_size_t)round_page(size);
+        if (addr + size < addr)
+                return (EINVAL);
+
+	/* Short-circuit for POLICY_INVALID == reset to default. */
+	if (policy == DOMAINSET_POLICY_INVALID) {
+		nset = NULL;
+		goto apply;
+	}
+
+	/*
+	 * Copy in and initialize the domainset from the user arguments.
+	 */
+	error = domainset_copyin(&domain, domainsetsize, mask, policy);
+	if (error)
+		return (error);
+
+	/*
+	 * Grab the list of allowed domains from the numbered cpuset this
+	 * process is a member of.
+	 */
+	error = cpuset_which(CPU_WHICH_PID, -1, &p, &ttd, &cset);
+	if (error)
+		return (error);
+	thread_lock(ttd);
+	set = cpuset_getbase(ttd->td_cpuset)->cs_domain;
+	thread_unlock(ttd);
+	PROC_UNLOCK(p);
+
+	/*
+	 * Validate the new policy against the allowed set.
+	 */
+	if (policy == DOMAINSET_POLICY_PREFER)
+		DOMAINSET_COPY(&set->ds_mask, &domain.ds_mask);
+	if (!domainset_valid(set, &domain))
+		return (EINVAL);
+
+	/*
+	 * Attempt to create a new set based on this key.
+	 */
+	nset = domainset_create(&domain);
+	if (nset == NULL)
+		return (EINVAL);
+
+	/*
+	 * Attempt to apply the new set to the memory range.
+	 */
+apply:
+	switch (vm_map_setdomain(&td->td_proc->p_vmspace->vm_map, addr,
+	    addr + size, nset, flags)) {
+	case KERN_SUCCESS:
+		break;
+	case KERN_INVALID_ADDRESS:
+		return (EFAULT);
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+#ifdef DDB
 static void
 ddb_display_bitset(const struct bitset *set, int size)
 {

Modified: user/jeff/numa/sys/kern/syscalls.c
==============================================================================
--- user/jeff/numa/sys/kern/syscalls.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/kern/syscalls.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -570,4 +570,5 @@ const char *syscallnames[] = {
 	"cpuset_getdomain",			/* 561 = cpuset_getdomain */
 	"cpuset_setdomain",			/* 562 = cpuset_setdomain */
 	"getrandom",			/* 563 = getrandom */
+	"msetdomain",			/* 564 = msetdomain */
 };

Modified: user/jeff/numa/sys/kern/syscalls.master
==============================================================================
--- user/jeff/numa/sys/kern/syscalls.master	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/kern/syscalls.master	Sun Apr  1 04:11:38 2018	(r331861)
@@ -1023,6 +1023,9 @@
 				    int policy); }
 563	AUE_NULL	STD	{ int getrandom(void *buf, size_t buflen, \
 				    unsigned int flags); }
+564	AUE_NULL	STD	{ int msetdomain(void *addr, size_t size, \
+				    size_t domainsetsize, domainset_t *mask, \
+				    int policy, int flags); }
 
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master

Modified: user/jeff/numa/sys/kern/systrace_args.c
==============================================================================
--- user/jeff/numa/sys/kern/systrace_args.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/kern/systrace_args.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -3291,6 +3291,18 @@ systrace_args(int sysnum, void *params, uint64_t *uarg
 		*n_args = 3;
 		break;
 	}
+	/* msetdomain */
+	case 564: {
+		struct msetdomain_args *p = params;
+		uarg[0] = (intptr_t) p->addr; /* void * */
+		uarg[1] = p->size; /* size_t */
+		uarg[2] = p->domainsetsize; /* size_t */
+		uarg[3] = (intptr_t) p->mask; /* domainset_t * */
+		iarg[4] = p->policy; /* int */
+		iarg[5] = p->flags; /* int */
+		*n_args = 6;
+		break;
+	}
 	default:
 		*n_args = 0;
 		break;
@@ -8777,6 +8789,31 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *d
 			break;
 		};
 		break;
+	/* msetdomain */
+	case 564:
+		switch(ndx) {
+		case 0:
+			p = "userland void *";
+			break;
+		case 1:
+			p = "size_t";
+			break;
+		case 2:
+			p = "size_t";
+			break;
+		case 3:
+			p = "userland domainset_t *";
+			break;
+		case 4:
+			p = "int";
+			break;
+		case 5:
+			p = "int";
+			break;
+		default:
+			break;
+		};
+		break;
 	default:
 		break;
 	};
@@ -10665,6 +10702,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *
 		break;
 	/* getrandom */
 	case 563:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
+	/* msetdomain */
+	case 564:
 		if (ndx == 0 || ndx == 1)
 			p = "int";
 		break;

Modified: user/jeff/numa/sys/sys/domainset.h
==============================================================================
--- user/jeff/numa/sys/sys/domainset.h	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/sys/domainset.h	Sun Apr  1 04:11:38 2018	(r331861)
@@ -114,6 +114,7 @@ int	cpuset_getdomain(cpulevel_t, cpuwhich_t, id_t, siz
 	    int *);
 int	cpuset_setdomain(cpulevel_t, cpuwhich_t, id_t, size_t,
 	    const domainset_t *, int);
+int	msetdomain(void *, size_t, size_t, domainset_t *, int, int);
 
 __END_DECLS
 #endif

Modified: user/jeff/numa/sys/sys/syscall.h
==============================================================================
--- user/jeff/numa/sys/sys/syscall.h	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/sys/syscall.h	Sun Apr  1 04:11:38 2018	(r331861)
@@ -479,4 +479,5 @@
 #define	SYS_cpuset_getdomain	561
 #define	SYS_cpuset_setdomain	562
 #define	SYS_getrandom	563
-#define	SYS_MAXSYSCALL	564
+#define	SYS_msetdomain	564
+#define	SYS_MAXSYSCALL	565

Modified: user/jeff/numa/sys/sys/syscall.mk
==============================================================================
--- user/jeff/numa/sys/sys/syscall.mk	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/sys/syscall.mk	Sun Apr  1 04:11:38 2018	(r331861)
@@ -406,4 +406,5 @@ MIASM =  \
 	kevent.o \
 	cpuset_getdomain.o \
 	cpuset_setdomain.o \
-	getrandom.o
+	getrandom.o \
+	msetdomain.o

Modified: user/jeff/numa/sys/sys/syscallsubr.h
==============================================================================
--- user/jeff/numa/sys/sys/syscallsubr.h	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/sys/syscallsubr.h	Sun Apr  1 04:11:38 2018	(r331861)
@@ -175,6 +175,9 @@ int	kern_mlock(struct proc *proc, struct ucred *cred, 
 int	kern_mmap(struct thread *td, uintptr_t addr, size_t size, int prot,
 	    int flags, int fd, off_t pos);
 int	kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot);
+int	kern_msetdomain(struct thread *td, uintptr_t addr,
+	    size_t size, size_t domainsetsize, const domainset_t *maskp,
+	    int policy, int flags);
 int	kern_msgctl(struct thread *, int, int, struct msqid_ds *);
 int	kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *);
 int	kern_msgsnd(struct thread *, int, const void *, size_t, int, long);

Modified: user/jeff/numa/sys/sys/sysproto.h
==============================================================================
--- user/jeff/numa/sys/sys/sysproto.h	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/sys/sysproto.h	Sun Apr  1 04:11:38 2018	(r331861)
@@ -1773,6 +1773,14 @@ struct getrandom_args {
 	char buflen_l_[PADL_(size_t)]; size_t buflen; char buflen_r_[PADR_(size_t)];
 	char flags_l_[PADL_(unsigned int)]; unsigned int flags; char flags_r_[PADR_(unsigned int)];
 };
+struct msetdomain_args {
+	char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
+	char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)];
+	char domainsetsize_l_[PADL_(size_t)]; size_t domainsetsize; char domainsetsize_r_[PADR_(size_t)];
+	char mask_l_[PADL_(domainset_t *)]; domainset_t * mask; char mask_r_[PADR_(domainset_t *)];
+	char policy_l_[PADL_(int)]; int policy; char policy_r_[PADR_(int)];
+	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
 int	nosys(struct thread *, struct nosys_args *);
 void	sys_sys_exit(struct thread *, struct sys_exit_args *);
 int	sys_fork(struct thread *, struct fork_args *);
@@ -2154,6 +2162,7 @@ int	sys_kevent(struct thread *, struct kevent_args *);
 int	sys_cpuset_getdomain(struct thread *, struct cpuset_getdomain_args *);
 int	sys_cpuset_setdomain(struct thread *, struct cpuset_setdomain_args *);
 int	sys_getrandom(struct thread *, struct getrandom_args *);
+int	sys_msetdomain(struct thread *, struct msetdomain_args *);
 
 #ifdef COMPAT_43
 
@@ -3047,6 +3056,7 @@ int	freebsd11_mknodat(struct thread *, struct freebsd1
 #define	SYS_AUE_cpuset_getdomain	AUE_NULL
 #define	SYS_AUE_cpuset_setdomain	AUE_NULL
 #define	SYS_AUE_getrandom	AUE_NULL
+#define	SYS_AUE_msetdomain	AUE_NULL
 
 #undef PAD_
 #undef PADL_

Modified: user/jeff/numa/sys/vm/vm_fault.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_fault.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/vm/vm_fault.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -1609,7 +1609,6 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map
 	KASSERT(upgrade || dst_entry->object.vm_object == NULL,
 	    ("vm_fault_copy_entry: vm_object not NULL"));
 	if (src_object != dst_object) {
-		dst_object->domain = src_object->domain;
 		dst_entry->object.vm_object = dst_object;
 		dst_entry->offset = 0;
 		dst_object->charge = dst_entry->end - dst_entry->start;

Modified: user/jeff/numa/sys/vm/vm_map.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_map.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/vm/vm_map.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/domainset.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
@@ -848,6 +849,34 @@ vm_map_entry_create(vm_map_t map)
 }
 
 /*
+ *	vm_map_entry_object_allocate:	[ internal use only ]
+ *
+ *	Returns the object associated with a map entry, allocating
+ *	a default object if non presently exists.
+ */
+static vm_object_t
+vm_map_entry_object_allocate(vm_map_t map, vm_map_entry_t entry)
+{
+	vm_object_t object;
+
+	VM_MAP_ASSERT_LOCKED(map);
+	if (entry->object.vm_object != NULL)
+		return (entry->object.vm_object);
+
+	object = vm_object_allocate(OBJT_DEFAULT,
+	    atop(entry->end - entry->start));
+	entry->object.vm_object = object;
+	entry->offset = 0;
+	if (entry->cred != NULL) {
+		object->cred = entry->cred;
+		object->charge = entry->end - entry->start;
+		entry->cred = NULL;
+	}
+
+	return (object);
+}
+
+/*
  *	vm_map_entry_set_behavior:
  *
  *	Set the expected access behavior, either normal, random, or
@@ -1773,16 +1802,7 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry,
 	 */
 	if (entry->object.vm_object == NULL && !map->system_map &&
 	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
-		vm_object_t object;
-		object = vm_object_allocate(OBJT_DEFAULT,
-				atop(entry->end - entry->start));
-		entry->object.vm_object = object;
-		entry->offset = 0;
-		if (entry->cred != NULL) {
-			object->cred = entry->cred;
-			object->charge = entry->end - entry->start;
-			entry->cred = NULL;
-		}
+		vm_map_entry_object_allocate(map, entry);
 	} else if (entry->object.vm_object != NULL &&
 		   ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 		   entry->cred != NULL) {
@@ -1853,16 +1873,7 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, v
 	 */
 	if (entry->object.vm_object == NULL && !map->system_map &&
 	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
-		vm_object_t object;
-		object = vm_object_allocate(OBJT_DEFAULT,
-				atop(entry->end - entry->start));
-		entry->object.vm_object = object;
-		entry->offset = 0;
-		if (entry->cred != NULL) {
-			object->cred = entry->cred;
-			object->charge = entry->end - entry->start;
-			entry->cred = NULL;
-		}
+		vm_map_entry_object_allocate(map, entry);
 	} else if (entry->object.vm_object != NULL &&
 		   ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 		   entry->cred != NULL) {
@@ -3449,21 +3460,11 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 
 		case VM_INHERIT_SHARE:
 			/*
-			 * Clone the entry, creating the shared object if necessary.
+			 * Clone the entry, creating the shared object if
+			 * necessary.
 			 */
-			object = old_entry->object.vm_object;
-			if (object == NULL) {
-				object = vm_object_allocate(OBJT_DEFAULT,
-					atop(old_entry->end - old_entry->start));
-				old_entry->object.vm_object = object;
-				old_entry->offset = 0;
-				if (old_entry->cred != NULL) {
-					object->cred = old_entry->cred;
-					object->charge = old_entry->end -
-					    old_entry->start;
-					old_entry->cred = NULL;
-				}
-			}
+			object = vm_map_entry_object_allocate(old_map,
+			    old_entry);
 
 			/*
 			 * Add the reference before calling vm_object_shadow
@@ -4195,16 +4196,7 @@ RetryLookupLocked:
 	    !map->system_map) {
 		if (vm_map_lock_upgrade(map))
 			goto RetryLookup;
-		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
-		    atop(size));
-		entry->offset = 0;
-		if (entry->cred != NULL) {
-			VM_OBJECT_WLOCK(entry->object.vm_object);
-			entry->object.vm_object->cred = entry->cred;
-			entry->object.vm_object->charge = size;
-			VM_OBJECT_WUNLOCK(entry->object.vm_object);
-			entry->cred = NULL;
-		}
+		vm_map_entry_object_allocate(map, entry);
 		vm_map_lock_downgrade(map);
 	}
 
@@ -4313,6 +4305,107 @@ vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
 	 * Unlock the main-level map
 	 */
 	vm_map_unlock_read(map);
+}
+
+/*
+ *	vm_map_setdomain:
+ *
+ *	Assigns the NUMA policy contained in 'domain' to all objects
+ *	overlapping the requested address range.
+ */
+int
+vm_map_setdomain(vm_map_t map, vm_offset_t start, vm_offset_t end,
+    struct domainset *domain, int flags)
+{
+	vm_map_entry_t current, entry;
+	vm_object_t object;
+	int error;
+
+	error = KERN_SUCCESS;
+	vm_map_lock(map);
+	if (start < vm_map_min(map) || end > vm_map_max(map) ||
+	    start >= end || map->system_map) {
+		error = KERN_INVALID_ADDRESS;
+		goto out;
+	}
+
+	/*
+	 * Locate starting entry and clip if necessary.
+	 */
+	if (!vm_map_lookup_entry(map, start, &entry)) {
+		error = KERN_INVALID_ADDRESS;
+		goto out;
+	}
+	if (entry->start > start) {
+		error = KERN_INVALID_ADDRESS;
+		goto out;
+	}
+	vm_map_clip_start(map, entry, start);
+
+	/*
+	 * Walk the range looking for holes before we apply policy.
+	 */
+	for (current = entry;
+	     (current != &map->header) && (current->start < end);
+	     current = current->next
+	) {
+		if (current->end >= end)
+			break;
+		/* We don't support gaps. */
+		if (current->end != current->next->start) {
+			error = KERN_INVALID_ADDRESS;
+			goto out;
+		}
+	}
+
+	/*
+	 * Walk each overlapping map entry and update the backing
+	 * object's memory policy.
+	 */
+	for (current = entry;
+	     (current != &map->header) && (current->start < end);
+	     current = current->next
+	) {
+		/* Skip incompatible entries. */
+		if ((current->eflags &
+		    (MAP_ENTRY_GUARD | MAP_ENTRY_IS_SUB_MAP)) != 0)
+			continue;
+
+		/*
+		 * Clip the end and allocate the object so that we are
+		 * only modifying the requested range.
+		 */
+		vm_map_clip_end(map, current, end);
+		object = vm_map_entry_object_allocate(map, current);
+		if (current->eflags & MAP_ENTRY_NEEDS_COPY) {
+			vm_object_shadow(&current->object.vm_object,
+			    &current->offset, current->end - current->start);
+			current->eflags &= ~MAP_ENTRY_NEEDS_COPY;
+			object = current->object.vm_object;
+		}
+
+		/*
+		 * If the object is anonymous memory we need to split it
+		 * so that we can apply the unique alloction property to
+		 * this range.
+		 */
+		VM_OBJECT_WLOCK(object);
+		if (object->type == OBJT_DEFAULT ||
+		    object->type == OBJT_SWAP) {
+			vm_object_collapse(object);
+			if ((object->flags & OBJ_NOSPLIT) == 0) {
+				vm_object_split(current);
+				object = current->object.vm_object;
+			}
+		}
+		object->domain.dr_policy = domain;
+		VM_OBJECT_WUNLOCK(object);
+		vm_map_simplify_entry(map, current);
+	}
+out:
+	vm_map_unlock(map);
+
+	return (error);
 }
 
 #include "opt_ddb.h"

Modified: user/jeff/numa/sys/vm/vm_map.h
==============================================================================
--- user/jeff/numa/sys/vm/vm_map.h	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/vm/vm_map.h	Sun Apr  1 04:11:38 2018	(r331861)
@@ -403,5 +403,8 @@ int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 long vmspace_swap_count(struct vmspace *vmspace);
+struct domainset;
+int vm_map_setdomain(vm_map_t, vm_offset_t, vm_offset_t,
+    struct domainset *, int);
 #endif				/* _KERNEL */
 #endif				/* _VM_MAP_ */

Modified: user/jeff/numa/sys/vm/vm_object.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_object.c	Sun Apr  1 01:21:00 2018	(r331860)
+++ user/jeff/numa/sys/vm/vm_object.c	Sun Apr  1 04:11:38 2018	(r331861)
@@ -1328,7 +1328,6 @@ vm_object_shadow(
 	result->backing_object_offset = *offset;
 	if (source != NULL) {
 		VM_OBJECT_WLOCK(source);
-		result->domain = source->domain;
 		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
 		source->shadow_count++;
 #if VM_NRESERVLEVEL > 0


More information about the svn-src-user mailing list