svn commit: r331861 - in user/jeff/numa: lib/libc/sys sys/compat/freebsd32 sys/kern sys/sys sys/vm
Jeff Roberson
jeff at FreeBSD.org
Sun Apr 1 04:11:41 UTC 2018
Author: jeff
Date: Sun Apr 1 04:11:38 2018
New Revision: 331861
URL: https://svnweb.freebsd.org/changeset/base/331861
Log:
Experimental support for msetdomain() a syscall similar to linux's mbind()
that allows you to set NUMA policy on memory ranges.
Modified:
user/jeff/numa/lib/libc/sys/Symbol.map
user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h
user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c
user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c
user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c
user/jeff/numa/sys/compat/freebsd32/syscalls.master
user/jeff/numa/sys/kern/init_sysent.c
user/jeff/numa/sys/kern/kern_cpuset.c
user/jeff/numa/sys/kern/syscalls.c
user/jeff/numa/sys/kern/syscalls.master
user/jeff/numa/sys/kern/systrace_args.c
user/jeff/numa/sys/sys/domainset.h
user/jeff/numa/sys/sys/syscall.h
user/jeff/numa/sys/sys/syscall.mk
user/jeff/numa/sys/sys/syscallsubr.h
user/jeff/numa/sys/sys/sysproto.h
user/jeff/numa/sys/vm/vm_fault.c
user/jeff/numa/sys/vm/vm_map.c
user/jeff/numa/sys/vm/vm_map.h
user/jeff/numa/sys/vm/vm_object.c
Modified: user/jeff/numa/lib/libc/sys/Symbol.map
==============================================================================
--- user/jeff/numa/lib/libc/sys/Symbol.map Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/lib/libc/sys/Symbol.map Sun Apr 1 04:11:38 2018 (r331861)
@@ -401,6 +401,7 @@ FBSD_1.5 {
statfs;
cpuset_getdomain;
cpuset_setdomain;
+ msetdomain;
};
FBSDprivate_1.0 {
@@ -1029,4 +1030,6 @@ FBSDprivate_1.0 {
__sys_cpuset_getdomain;
_cpuset_setdomain;
__sys_cpuset_setdomain;
+ _msetdomain;
+ __msetdomain;
};
Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h Sun Apr 1 04:11:38 2018 (r331861)
@@ -469,4 +469,5 @@
#define FREEBSD32_SYS_freebsd32_cpuset_getdomain 561
#define FREEBSD32_SYS_freebsd32_cpuset_setdomain 562
#define FREEBSD32_SYS_getrandom 563
-#define FREEBSD32_SYS_MAXSYSCALL 564
+#define FREEBSD32_SYS_msetdomain 564
+#define FREEBSD32_SYS_MAXSYSCALL 565
Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -596,4 +596,5 @@ const char *freebsd32_syscallnames[] = {
"freebsd32_cpuset_getdomain", /* 561 = freebsd32_cpuset_getdomain */
"freebsd32_cpuset_setdomain", /* 562 = freebsd32_cpuset_setdomain */
"getrandom", /* 563 = getrandom */
+ "msetdomain", /* 564 = msetdomain */
};
Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -645,4 +645,5 @@ struct sysent freebsd32_sysent[] = {
{ AS(freebsd32_cpuset_getdomain_args), (sy_call_t *)freebsd32_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = freebsd32_cpuset_getdomain */
{ AS(freebsd32_cpuset_setdomain_args), (sy_call_t *)freebsd32_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = freebsd32_cpuset_setdomain */
{ AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 563 = getrandom */
+ { AS(msetdomain_args), (sy_call_t *)sys_msetdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 564 = msetdomain */
};
Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -3283,6 +3283,18 @@ systrace_args(int sysnum, void *params, uint64_t *uarg
*n_args = 3;
break;
}
+ /* msetdomain */
+ case 564: {
+ struct msetdomain_args *p = params;
+ uarg[0] = (intptr_t) p->addr; /* void * */
+ uarg[1] = p->size; /* size_t */
+ uarg[2] = p->domainsetsize; /* size_t */
+ uarg[3] = (intptr_t) p->mask; /* domainset_t * */
+ iarg[4] = p->policy; /* int */
+ iarg[5] = p->flags; /* int */
+ *n_args = 6;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -8825,6 +8837,31 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *d
break;
};
break;
+ /* msetdomain */
+ case 564:
+ switch(ndx) {
+ case 0:
+ p = "userland void *";
+ break;
+ case 1:
+ p = "size_t";
+ break;
+ case 2:
+ p = "size_t";
+ break;
+ case 3:
+ p = "userland domainset_t *";
+ break;
+ case 4:
+ p = "int";
+ break;
+ case 5:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -10678,6 +10715,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *
break;
/* getrandom */
case 563:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* msetdomain */
+ case 564:
if (ndx == 0 || ndx == 1)
p = "int";
break;
Modified: user/jeff/numa/sys/compat/freebsd32/syscalls.master
==============================================================================
--- user/jeff/numa/sys/compat/freebsd32/syscalls.master Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/compat/freebsd32/syscalls.master Sun Apr 1 04:11:38 2018 (r331861)
@@ -1118,5 +1118,9 @@
int policy); }
563 AUE_NULL NOPROTO { int getrandom(void *buf, size_t buflen, \
unsigned int flags); }
+564 AUE_NULL NOPROTO { int msetdomain(void *addr, \
+ size_t size, size_t domainsetsize, \
+ domainset_t *mask, int policy, \
+ int flags); }
; vim: syntax=off
Modified: user/jeff/numa/sys/kern/init_sysent.c
==============================================================================
--- user/jeff/numa/sys/kern/init_sysent.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/kern/init_sysent.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -615,4 +615,5 @@ struct sysent sysent[] = {
{ AS(cpuset_getdomain_args), (sy_call_t *)sys_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = cpuset_getdomain */
{ AS(cpuset_setdomain_args), (sy_call_t *)sys_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = cpuset_setdomain */
{ AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 563 = getrandom */
+ { AS(msetdomain_args), (sy_call_t *)sys_msetdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 564 = msetdomain */
};
Modified: user/jeff/numa/sys/kern/kern_cpuset.c
==============================================================================
--- user/jeff/numa/sys/kern/kern_cpuset.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/kern/kern_cpuset.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -64,6 +64,9 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
@@ -2005,6 +2008,57 @@ out:
return (error);
}
+static int
+domainset_copyin(struct domainset *domain, size_t domainsetsize,
+ const domainset_t *maskp, int policy)
+{
+ domainset_t *mask;
+ char *end, *cp;
+ int error;
+
+ if (domainsetsize < sizeof(domainset_t) ||
+ domainsetsize > DOMAINSET_MAXSIZE / NBBY)
+ return (ERANGE);
+
+ if (policy <= DOMAINSET_POLICY_INVALID ||
+ policy > DOMAINSET_POLICY_MAX)
+ return (EINVAL);
+
+ memset(domain, 0, sizeof(*domain));
+ mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
+ error = copyin(maskp, mask, domainsetsize);
+ if (error != 0)
+ goto out;
+ /*
+ * Verify that no high bits are set.
+ */
+ if (domainsetsize > sizeof(domainset_t)) {
+ end = cp = (char *)&mask->__bits;
+ end += domainsetsize;
+ cp += sizeof(domainset_t);
+ while (cp != end)
+ if (*cp++ != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ }
+ DOMAINSET_COPY(mask, &domain->ds_mask);
+ domain->ds_policy = policy;
+ /* Translate preferred policy into a mask and fallback. */
+ if (policy == DOMAINSET_POLICY_PREFER) {
+ /* Only support a single preferred domain. */
+ if (DOMAINSET_COUNT(&domain->ds_mask) != 1) {
+ error = EINVAL;
+ goto out;
+ }
+ domain->ds_prefer = DOMAINSET_FFS(&domain->ds_mask) - 1;
+ }
+out:
+ free(mask, M_TEMP);
+ return (error);
+}
+
#ifndef _SYS_SYSPROTO_H_
struct cpuset_setdomain_args {
cpulevel_t level;
@@ -2015,6 +2069,7 @@ struct cpuset_setdomain_args {
int policy;
};
#endif
+
int
sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap)
{
@@ -2032,12 +2087,8 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le
struct thread *ttd;
struct proc *p;
struct domainset domain;
- domainset_t *mask;
int error;
- if (domainsetsize < sizeof(domainset_t) ||
- domainsetsize > DOMAINSET_MAXSIZE / NBBY)
- return (ERANGE);
/* In Capability mode, you can only set your own CPU set. */
if (IN_CAPABILITY_MODE(td)) {
if (level != CPU_LEVEL_WHICH)
@@ -2047,43 +2098,13 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le
if (id != -1)
return (ECAPMODE);
}
- memset(&domain, 0, sizeof(domain));
- mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
- error = copyin(maskp, mask, domainsetsize);
- if (error)
- goto out;
- /*
- * Verify that no high bits are set.
- */
- if (domainsetsize > sizeof(domainset_t)) {
- char *end;
- char *cp;
- end = cp = (char *)&mask->__bits;
- end += domainsetsize;
- cp += sizeof(domainset_t);
- while (cp != end)
- if (*cp++ != 0) {
- error = EINVAL;
- goto out;
- }
-
- }
- DOMAINSET_COPY(mask, &domain.ds_mask);
- domain.ds_policy = policy;
- if (policy <= DOMAINSET_POLICY_INVALID ||
- policy > DOMAINSET_POLICY_MAX)
- return (EINVAL);
-
- /* Translate preferred policy into a mask and fallback. */
- if (policy == DOMAINSET_POLICY_PREFER) {
- /* Only support a single preferred domain. */
- if (DOMAINSET_COUNT(&domain.ds_mask) != 1)
- return (EINVAL);
- domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1;
- /* This will be constrained by domainset_shadow(). */
+ error = domainset_copyin(&domain, domainsetsize, maskp, policy);
+ if (error)
+ return (error);
+ /* This will be constrained by cpuset_shadow(). */
+ if (policy == DOMAINSET_POLICY_PREFER)
DOMAINSET_FILL(&domain.ds_mask);
- }
switch (level) {
case CPU_LEVEL_ROOT:
@@ -2146,12 +2167,106 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le
break;
}
out:
- free(mask, M_TEMP);
return (error);
}
-#ifdef DDB
+#ifndef _SYS_SYSPROTO_H_
+struct msetdomain_args {
+ void *addr;
+ size_t size;
+ size_t domainsetsize;
+ domainset_t *mask;
+ int policy;
+ int flags;
+};
+#endif
+int
+sys_msetdomain(struct thread *td, struct msetdomain_args *uap)
+{
+ return (kern_msetdomain(td, (uintptr_t)uap->addr, uap->size,
+ uap->domainsetsize, uap->mask, uap->policy, uap->flags));
+}
+
+int
+kern_msetdomain(struct thread *td, uintptr_t addr0, size_t size,
+ size_t domainsetsize, const domainset_t *mask, int policy, int flags)
+{
+ struct domainset domain, *set, *nset;
+ struct cpuset *cset;
+ struct thread *ttd;
+ struct proc *p;
+ vm_offset_t addr;
+ vm_size_t pageoff;
+ int error;
+
+ /* Normalize the addresses. */
+ addr = trunc_page(addr0);
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vm_size_t)round_page(size);
+ if (addr + size < addr)
+ return (EINVAL);
+
+ /* Short-circuit for POLICY_INVALID == reset to default. */
+ if (policy == DOMAINSET_POLICY_INVALID) {
+ nset = NULL;
+ goto apply;
+ }
+
+ /*
+ * Copy in and initialize the domainset from the user arguments.
+ */
+ error = domainset_copyin(&domain, domainsetsize, mask, policy);
+ if (error)
+ return (error);
+
+ /*
+ * Grab the list of allowed domains from the numbered cpuset this
+ * process is a member of.
+ */
+ error = cpuset_which(CPU_WHICH_PID, -1, &p, &ttd, &cset);
+ if (error)
+ return (error);
+ thread_lock(ttd);
+ set = cpuset_getbase(ttd->td_cpuset)->cs_domain;
+ thread_unlock(ttd);
+ PROC_UNLOCK(p);
+
+ /*
+ * Validate the new policy against the allowed set.
+ */
+ if (policy == DOMAINSET_POLICY_PREFER)
+ DOMAINSET_COPY(&set->ds_mask, &domain.ds_mask);
+ if (!domainset_valid(set, &domain))
+ return (EINVAL);
+
+ /*
+ * Attempt to create a new set based on this key.
+ */
+ nset = domainset_create(&domain);
+ if (nset == NULL)
+ return (EINVAL);
+
+ /*
+ * Attempt to apply the new set to the memory range.
+ */
+apply:
+ switch (vm_map_setdomain(&td->td_proc->p_vmspace->vm_map, addr,
+ addr + size, nset, flags)) {
+ case KERN_SUCCESS:
+ break;
+ case KERN_INVALID_ADDRESS:
+ return (EFAULT);
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+#ifdef DDB
static void
ddb_display_bitset(const struct bitset *set, int size)
{
Modified: user/jeff/numa/sys/kern/syscalls.c
==============================================================================
--- user/jeff/numa/sys/kern/syscalls.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/kern/syscalls.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -570,4 +570,5 @@ const char *syscallnames[] = {
"cpuset_getdomain", /* 561 = cpuset_getdomain */
"cpuset_setdomain", /* 562 = cpuset_setdomain */
"getrandom", /* 563 = getrandom */
+ "msetdomain", /* 564 = msetdomain */
};
Modified: user/jeff/numa/sys/kern/syscalls.master
==============================================================================
--- user/jeff/numa/sys/kern/syscalls.master Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/kern/syscalls.master Sun Apr 1 04:11:38 2018 (r331861)
@@ -1023,6 +1023,9 @@
int policy); }
563 AUE_NULL STD { int getrandom(void *buf, size_t buflen, \
unsigned int flags); }
+564 AUE_NULL STD { int msetdomain(void *addr, size_t size, \
+ size_t domainsetsize, domainset_t *mask, \
+ int policy, int flags); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Modified: user/jeff/numa/sys/kern/systrace_args.c
==============================================================================
--- user/jeff/numa/sys/kern/systrace_args.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/kern/systrace_args.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -3291,6 +3291,18 @@ systrace_args(int sysnum, void *params, uint64_t *uarg
*n_args = 3;
break;
}
+ /* msetdomain */
+ case 564: {
+ struct msetdomain_args *p = params;
+ uarg[0] = (intptr_t) p->addr; /* void * */
+ uarg[1] = p->size; /* size_t */
+ uarg[2] = p->domainsetsize; /* size_t */
+ uarg[3] = (intptr_t) p->mask; /* domainset_t * */
+ iarg[4] = p->policy; /* int */
+ iarg[5] = p->flags; /* int */
+ *n_args = 6;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -8777,6 +8789,31 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *d
break;
};
break;
+ /* msetdomain */
+ case 564:
+ switch(ndx) {
+ case 0:
+ p = "userland void *";
+ break;
+ case 1:
+ p = "size_t";
+ break;
+ case 2:
+ p = "size_t";
+ break;
+ case 3:
+ p = "userland domainset_t *";
+ break;
+ case 4:
+ p = "int";
+ break;
+ case 5:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -10665,6 +10702,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *
break;
/* getrandom */
case 563:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* msetdomain */
+ case 564:
if (ndx == 0 || ndx == 1)
p = "int";
break;
Modified: user/jeff/numa/sys/sys/domainset.h
==============================================================================
--- user/jeff/numa/sys/sys/domainset.h Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/sys/domainset.h Sun Apr 1 04:11:38 2018 (r331861)
@@ -114,6 +114,7 @@ int cpuset_getdomain(cpulevel_t, cpuwhich_t, id_t, siz
int *);
int cpuset_setdomain(cpulevel_t, cpuwhich_t, id_t, size_t,
const domainset_t *, int);
+int msetdomain(void *, size_t, size_t, domainset_t *, int, int);
__END_DECLS
#endif
Modified: user/jeff/numa/sys/sys/syscall.h
==============================================================================
--- user/jeff/numa/sys/sys/syscall.h Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/sys/syscall.h Sun Apr 1 04:11:38 2018 (r331861)
@@ -479,4 +479,5 @@
#define SYS_cpuset_getdomain 561
#define SYS_cpuset_setdomain 562
#define SYS_getrandom 563
-#define SYS_MAXSYSCALL 564
+#define SYS_msetdomain 564
+#define SYS_MAXSYSCALL 565
Modified: user/jeff/numa/sys/sys/syscall.mk
==============================================================================
--- user/jeff/numa/sys/sys/syscall.mk Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/sys/syscall.mk Sun Apr 1 04:11:38 2018 (r331861)
@@ -406,4 +406,5 @@ MIASM = \
kevent.o \
cpuset_getdomain.o \
cpuset_setdomain.o \
- getrandom.o
+ getrandom.o \
+ msetdomain.o
Modified: user/jeff/numa/sys/sys/syscallsubr.h
==============================================================================
--- user/jeff/numa/sys/sys/syscallsubr.h Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/sys/syscallsubr.h Sun Apr 1 04:11:38 2018 (r331861)
@@ -175,6 +175,9 @@ int kern_mlock(struct proc *proc, struct ucred *cred,
int kern_mmap(struct thread *td, uintptr_t addr, size_t size, int prot,
int flags, int fd, off_t pos);
int kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot);
+int kern_msetdomain(struct thread *td, uintptr_t addr,
+ size_t size, size_t domainsetsize, const domainset_t *maskp,
+ int policy, int flags);
int kern_msgctl(struct thread *, int, int, struct msqid_ds *);
int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *);
int kern_msgsnd(struct thread *, int, const void *, size_t, int, long);
Modified: user/jeff/numa/sys/sys/sysproto.h
==============================================================================
--- user/jeff/numa/sys/sys/sysproto.h Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/sys/sysproto.h Sun Apr 1 04:11:38 2018 (r331861)
@@ -1773,6 +1773,14 @@ struct getrandom_args {
char buflen_l_[PADL_(size_t)]; size_t buflen; char buflen_r_[PADR_(size_t)];
char flags_l_[PADL_(unsigned int)]; unsigned int flags; char flags_r_[PADR_(unsigned int)];
};
+struct msetdomain_args {
+ char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
+ char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)];
+ char domainsetsize_l_[PADL_(size_t)]; size_t domainsetsize; char domainsetsize_r_[PADR_(size_t)];
+ char mask_l_[PADL_(domainset_t *)]; domainset_t * mask; char mask_r_[PADR_(domainset_t *)];
+ char policy_l_[PADL_(int)]; int policy; char policy_r_[PADR_(int)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
int nosys(struct thread *, struct nosys_args *);
void sys_sys_exit(struct thread *, struct sys_exit_args *);
int sys_fork(struct thread *, struct fork_args *);
@@ -2154,6 +2162,7 @@ int sys_kevent(struct thread *, struct kevent_args *);
int sys_cpuset_getdomain(struct thread *, struct cpuset_getdomain_args *);
int sys_cpuset_setdomain(struct thread *, struct cpuset_setdomain_args *);
int sys_getrandom(struct thread *, struct getrandom_args *);
+int sys_msetdomain(struct thread *, struct msetdomain_args *);
#ifdef COMPAT_43
@@ -3047,6 +3056,7 @@ int freebsd11_mknodat(struct thread *, struct freebsd1
#define SYS_AUE_cpuset_getdomain AUE_NULL
#define SYS_AUE_cpuset_setdomain AUE_NULL
#define SYS_AUE_getrandom AUE_NULL
+#define SYS_AUE_msetdomain AUE_NULL
#undef PAD_
#undef PADL_
Modified: user/jeff/numa/sys/vm/vm_fault.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_fault.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/vm/vm_fault.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -1609,7 +1609,6 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map
KASSERT(upgrade || dst_entry->object.vm_object == NULL,
("vm_fault_copy_entry: vm_object not NULL"));
if (src_object != dst_object) {
- dst_object->domain = src_object->domain;
dst_entry->object.vm_object = dst_object;
dst_entry->offset = 0;
dst_object->charge = dst_entry->end - dst_entry->start;
Modified: user/jeff/numa/sys/vm/vm_map.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_map.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/vm/vm_map.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/domainset.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -848,6 +849,34 @@ vm_map_entry_create(vm_map_t map)
}
/*
+ * vm_map_entry_object_allocate: [ internal use only ]
+ *
+ * Returns the object associated with a map entry, allocating
+ * a default object if non presently exists.
+ */
+static vm_object_t
+vm_map_entry_object_allocate(vm_map_t map, vm_map_entry_t entry)
+{
+ vm_object_t object;
+
+ VM_MAP_ASSERT_LOCKED(map);
+ if (entry->object.vm_object != NULL)
+ return (entry->object.vm_object);
+
+ object = vm_object_allocate(OBJT_DEFAULT,
+ atop(entry->end - entry->start));
+ entry->object.vm_object = object;
+ entry->offset = 0;
+ if (entry->cred != NULL) {
+ object->cred = entry->cred;
+ object->charge = entry->end - entry->start;
+ entry->cred = NULL;
+ }
+
+ return (object);
+}
+
+/*
* vm_map_entry_set_behavior:
*
* Set the expected access behavior, either normal, random, or
@@ -1773,16 +1802,7 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry,
*/
if (entry->object.vm_object == NULL && !map->system_map &&
(entry->eflags & MAP_ENTRY_GUARD) == 0) {
- vm_object_t object;
- object = vm_object_allocate(OBJT_DEFAULT,
- atop(entry->end - entry->start));
- entry->object.vm_object = object;
- entry->offset = 0;
- if (entry->cred != NULL) {
- object->cred = entry->cred;
- object->charge = entry->end - entry->start;
- entry->cred = NULL;
- }
+ vm_map_entry_object_allocate(map, entry);
} else if (entry->object.vm_object != NULL &&
((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
entry->cred != NULL) {
@@ -1853,16 +1873,7 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, v
*/
if (entry->object.vm_object == NULL && !map->system_map &&
(entry->eflags & MAP_ENTRY_GUARD) == 0) {
- vm_object_t object;
- object = vm_object_allocate(OBJT_DEFAULT,
- atop(entry->end - entry->start));
- entry->object.vm_object = object;
- entry->offset = 0;
- if (entry->cred != NULL) {
- object->cred = entry->cred;
- object->charge = entry->end - entry->start;
- entry->cred = NULL;
- }
+ vm_map_entry_object_allocate(map, entry);
} else if (entry->object.vm_object != NULL &&
((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
entry->cred != NULL) {
@@ -3449,21 +3460,11 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
case VM_INHERIT_SHARE:
/*
- * Clone the entry, creating the shared object if necessary.
+ * Clone the entry, creating the shared object if
+ * necessary.
*/
- object = old_entry->object.vm_object;
- if (object == NULL) {
- object = vm_object_allocate(OBJT_DEFAULT,
- atop(old_entry->end - old_entry->start));
- old_entry->object.vm_object = object;
- old_entry->offset = 0;
- if (old_entry->cred != NULL) {
- object->cred = old_entry->cred;
- object->charge = old_entry->end -
- old_entry->start;
- old_entry->cred = NULL;
- }
- }
+ object = vm_map_entry_object_allocate(old_map,
+ old_entry);
/*
* Add the reference before calling vm_object_shadow
@@ -4195,16 +4196,7 @@ RetryLookupLocked:
!map->system_map) {
if (vm_map_lock_upgrade(map))
goto RetryLookup;
- entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
- atop(size));
- entry->offset = 0;
- if (entry->cred != NULL) {
- VM_OBJECT_WLOCK(entry->object.vm_object);
- entry->object.vm_object->cred = entry->cred;
- entry->object.vm_object->charge = size;
- VM_OBJECT_WUNLOCK(entry->object.vm_object);
- entry->cred = NULL;
- }
+ vm_map_entry_object_allocate(map, entry);
vm_map_lock_downgrade(map);
}
@@ -4313,6 +4305,107 @@ vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
* Unlock the main-level map
*/
vm_map_unlock_read(map);
+}
+
+/*
+ * vm_map_setdomain:
+ *
+ * Assigns the NUMA policy contained in 'domain' to all objects
+ * overlapping the requested address range.
+ */
+int
+vm_map_setdomain(vm_map_t map, vm_offset_t start, vm_offset_t end,
+ struct domainset *domain, int flags)
+{
+ vm_map_entry_t current, entry;
+ vm_object_t object;
+ int error;
+
+ error = KERN_SUCCESS;
+ vm_map_lock(map);
+ if (start < vm_map_min(map) || end > vm_map_max(map) ||
+ start >= end || map->system_map) {
+ error = KERN_INVALID_ADDRESS;
+ goto out;
+ }
+
+ /*
+ * Locate starting entry and clip if necessary.
+ */
+ if (!vm_map_lookup_entry(map, start, &entry)) {
+ error = KERN_INVALID_ADDRESS;
+ goto out;
+ }
+ if (entry->start > start) {
+ error = KERN_INVALID_ADDRESS;
+ goto out;
+ }
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Walk the range looking for holes before we apply policy.
+ */
+ for (current = entry;
+ (current != &map->header) && (current->start < end);
+ current = current->next
+ ) {
+ if (current->end >= end)
+ break;
+ /* We don't support gaps. */
+ if (current->end != current->next->start) {
+ error = KERN_INVALID_ADDRESS;
+ goto out;
+ }
+ }
+
+ /*
+ * Walk each overlapping map entry and update the backing
+ * object's memory policy.
+ */
+ for (current = entry;
+ (current != &map->header) && (current->start < end);
+ current = current->next
+ ) {
+ /* Skip incompatible entries. */
+ if ((current->eflags &
+ (MAP_ENTRY_GUARD | MAP_ENTRY_IS_SUB_MAP)) != 0)
+ continue;
+
+ /*
+ * Clip the end and allocate the object so that we are
+ * only modifying the requested range.
+ */
+ vm_map_clip_end(map, current, end);
+ object = vm_map_entry_object_allocate(map, current);
+ if (current->eflags & MAP_ENTRY_NEEDS_COPY) {
+ vm_object_shadow(¤t->object.vm_object,
+ ¤t->offset, current->end - current->start);
+ current->eflags &= ~MAP_ENTRY_NEEDS_COPY;
+ object = current->object.vm_object;
+ }
+
+ /*
+ * If the object is anonymous memory we need to split it
+ * so that we can apply the unique alloction property to
+ * this range.
+ */
+ VM_OBJECT_WLOCK(object);
+ if (object->type == OBJT_DEFAULT ||
+ object->type == OBJT_SWAP) {
+ vm_object_collapse(object);
+ if ((object->flags & OBJ_NOSPLIT) == 0) {
+ vm_object_split(current);
+ object = current->object.vm_object;
+ }
+ }
+ object->domain.dr_policy = domain;
+ VM_OBJECT_WUNLOCK(object);
+ vm_map_simplify_entry(map, current);
+ }
+out:
+ vm_map_unlock(map);
+
+ return (error);
}
#include "opt_ddb.h"
Modified: user/jeff/numa/sys/vm/vm_map.h
==============================================================================
--- user/jeff/numa/sys/vm/vm_map.h Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/vm/vm_map.h Sun Apr 1 04:11:38 2018 (r331861)
@@ -403,5 +403,8 @@ int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_
int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
int flags);
long vmspace_swap_count(struct vmspace *vmspace);
+struct domainset;
+int vm_map_setdomain(vm_map_t, vm_offset_t, vm_offset_t,
+ struct domainset *, int);
#endif /* _KERNEL */
#endif /* _VM_MAP_ */
Modified: user/jeff/numa/sys/vm/vm_object.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_object.c Sun Apr 1 01:21:00 2018 (r331860)
+++ user/jeff/numa/sys/vm/vm_object.c Sun Apr 1 04:11:38 2018 (r331861)
@@ -1328,7 +1328,6 @@ vm_object_shadow(
result->backing_object_offset = *offset;
if (source != NULL) {
VM_OBJECT_WLOCK(source);
- result->domain = source->domain;
LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
source->shadow_count++;
#if VM_NRESERVLEVEL > 0
More information about the svn-src-user
mailing list