git: 72bc1e6806cc - stable/13 - cpuset: Byte swap cpuset for compat32 on big endian architectures

From: Dmitry Chagin <dchagin_at_FreeBSD.org>
Date: Fri, 17 Jun 2022 19:40:18 UTC
The branch stable/13 has been updated by dchagin:

URL: https://cgit.FreeBSD.org/src/commit/?id=72bc1e6806ccff0cc3e712c65090e59482b33357

commit 72bc1e6806ccff0cc3e712c65090e59482b33357
Author:     Dmitry Chagin <dchagin@FreeBSD.org>
AuthorDate: 2022-06-17 19:35:14 +0000
Commit:     Dmitry Chagin <dchagin@FreeBSD.org>
CommitDate: 2022-06-17 19:35:14 +0000

    cpuset: Byte swap cpuset for compat32 on big endian architectures
    
    Summary:
    BITSET uses long as its basic underlying type, which is dependent on the
    compile type, meaning on 32-bit builds the basic type is 32 bits, but on
    64-bit builds it's 64 bits.  On little endian architectures this doesn't
    matter, because the LSB is always at the low bit, so the words get
    effectively concatenated moving between 32-bit and 64-bit, but on
    big-endian architectures it throws a wrench in, as setting bit 0 in
    32-bit mode is equivalent to setting bit 32 in 64-bit mode.  To
    demonstrate:
    
    32-bit mode:
    
    BIT_SET(foo, 0):        0x00000001
    
    64-bit sees: 0x0000000100000000
    
    cpuset is the only system interface that uses bitsets, so solve this
    by swapping the integer sub-components at the copyin/copyout points.
    
    Reviewed by:    kib
    Sponsored by:   Juniper Networks, Inc.
    Differential Revision:  https://reviews.freebsd.org/D35225
    
    (cherry picked from commit 47a57144af25a7bd768b29272d50a36fdf2874ba)
    
    Fix the build after 47a57144
    
    (cherry picked from commit 89737eb8290a10d96b77afac1b68e4740b43353b)
    
    cpuset: Fix the KASAN and KMSAN builds
    
    Rename the "copyin" and "copyout" fields of struct cpuset_copy_cb to
    something less generic, since sanitizers define interceptors for
    copyin() and copyout() using #define.
    
    Reported by:    syzbot+2db5d644097fc698fb6f@syzkaller.appspotmail.com
    Fixes:  47a57144af25 ("cpuset: Byte swap cpuset for compat32 on big endian architectures")
    Sponsored by:   The FreeBSD Foundation
    
    (cherry picked from commit 4a3e51335e86cee02569c04b9f1e95ca9abcb170)
    
    Use Linux semantics for the thread affinity syscalls.
    
    Linux has more tolerant checks of the user supplied cpuset_t's.
    
    Minimum cpuset_t size that the Linux kernel permits in case of
    getaffinity() is the maximum CPU id, present in the system / NBBY,
    the maximum size is not limited.
    For setaffinity(), Linux does not limit the size of the user-provided
    cpuset_t, internally using only the meaningful part of the set, where
    the upper bound is the maximum CPU id, present in the system, no larger
    than the size of the kernel cpuset_t.
    Unlike FreeBSD, Linux ignores high bits if set in the setaffinity(),
    so clear it in the sched_setaffinity() and Linuxulator itself.
    
    Reviewed by:            Pau Amma (man pages)
    In collaboration with:  jhb
    Differential revision:  https://reviews.freebsd.org/D34849
    MFC after:              2 weeks
    
    (cherry picked from commit f35093f8d6d8155ab2e56c11ee03d474688b16a2)
---
 lib/libc/gen/sched_getaffinity.c          |  27 +++-----
 lib/libc/gen/sched_setaffinity.c          |  29 ++++++--
 lib/libc/sys/cpuset_getaffinity.2         |  19 ++++--
 share/man/man3/pthread_attr_affinity_np.3 |  25 +++----
 sys/compat/freebsd32/freebsd32_misc.c     |   2 +-
 sys/compat/linux/linux_misc.c             |  45 ++++++++----
 sys/kern/kern_cpuset.c                    | 110 ++++++++++++++++++++----------
 sys/sys/syscallsubr.h                     |   2 +
 8 files changed, 162 insertions(+), 97 deletions(-)

diff --git a/lib/libc/gen/sched_getaffinity.c b/lib/libc/gen/sched_getaffinity.c
index 7d345eb82a3b..92135109156c 100644
--- a/lib/libc/gen/sched_getaffinity.c
+++ b/lib/libc/gen/sched_getaffinity.c
@@ -33,24 +33,15 @@
 int
 sched_getaffinity(pid_t pid, size_t cpusetsz, cpuset_t *cpuset)
 {
-	/*
-	 * Be more Linux-compatible:
-	 * - return EINVAL in passed size is less than size of cpuset_t
-	 *   in advance, instead of ERANGE from the syscall
-	 * - if passed size is larger than the size of cpuset_t, be
-	 *   permissive by claming it back to sizeof(cpuset_t) and
-	 *   zeroing the rest.
-	 */
-	if (cpusetsz < sizeof(cpuset_t)) {
+	int error;
+
+	error = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
+	    pid == 0 ? -1 : pid, cpusetsz, cpuset);
+	if (error == -1 && errno == ERANGE)
 		errno = EINVAL;
-		return (-1);
-	}
-	if (cpusetsz > sizeof(cpuset_t)) {
-		memset((char *)cpuset + sizeof(cpuset_t), 0,
-		    cpusetsz - sizeof(cpuset_t));
-		cpusetsz = sizeof(cpuset_t);
-	}
+	if (error == 0)
+		return (cpusetsz < sizeof(cpuset_t) ? cpusetsz :
+		    sizeof(cpuset_t));
 
-	return (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
-	    pid == 0 ? -1 : pid, cpusetsz, cpuset));
+	return (error);
 }
diff --git a/lib/libc/gen/sched_setaffinity.c b/lib/libc/gen/sched_setaffinity.c
index 09e2b9097d5a..0052521cd081 100644
--- a/lib/libc/gen/sched_setaffinity.c
+++ b/lib/libc/gen/sched_setaffinity.c
@@ -26,6 +26,8 @@
  * SUCH DAMAGE.
  */
 
+#include <sys/param.h>
+#include <sys/sysctl.h>
 #include <errno.h>
 #include <sched.h>
 #include <string.h>
@@ -33,15 +35,28 @@
 int
 sched_setaffinity(pid_t pid, size_t cpusetsz, const cpuset_t *cpuset)
 {
+	static int mp_maxid;
 	cpuset_t c;
-	int error;
+	int error, lbs, cpu;
+	size_t len, sz;
 
-	if (cpusetsz > sizeof(cpuset_t)) {
-		errno = EINVAL;
-		return (-1);
-	} else {
-		memset(&c, 0, sizeof(c));
-		memcpy(&c, cpuset, cpusetsz);
+	sz = cpusetsz > sizeof(cpuset_t) ? sizeof(cpuset_t) : cpusetsz;
+	memset(&c, 0, sizeof(c));
+	memcpy(&c, cpuset, sz);
+
+	/* Linux ignores high bits */
+	if (mp_maxid == 0) {
+		len = sizeof(mp_maxid);
+		error = sysctlbyname("kern.smp.maxid", &mp_maxid, &len,
+		    NULL, 0);
+		if (error == -1)
+			return (error);
+	}
+	lbs = CPU_FLS(&c) - 1;
+	if (lbs > mp_maxid) {
+		CPU_FOREACH_ISSET(cpu, &c)
+			if (cpu > mp_maxid)
+				CPU_CLR(cpu, &c);
 	}
 	error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
 	    pid == 0 ? -1 : pid, sizeof(cpuset_t), &c);
diff --git a/lib/libc/sys/cpuset_getaffinity.2 b/lib/libc/sys/cpuset_getaffinity.2
index bce9161a1880..f7ac3873a9be 100644
--- a/lib/libc/sys/cpuset_getaffinity.2
+++ b/lib/libc/sys/cpuset_getaffinity.2
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 23, 2017
+.Dd April 27, 2022
 .Dt CPUSET_GETAFFINITY 2
 .Os
 .Sh NAME
@@ -71,14 +71,19 @@ Masks of type
 are composed using the
 .Dv CPU_SET
 macros.
-The kernel tolerates large sets as long as all CPUs specified
-in the set exist.
-Sets smaller than the kernel uses generate an error on calls to
+If the user-supplied mask is not large enough to fit all of the matching CPUs,
 .Fn cpuset_getaffinity
-even if the result set would fit within the user supplied set.
+fails with
+.Er ERANGE .
 Calls to
 .Fn cpuset_setaffinity
-tolerate small sets with no restrictions.
+tolerate masks of any size with no restrictions.
+The kernel uses the meaningful part of the mask, where the upper bound is
+the maximum CPU id present in the system.
+If bits for non-existing CPUs are set, calls to
+.Fn cpuset_setaffinity
+fails with
+.Er EINVAL .
 .Pp
 The supplied mask should have a size of
 .Fa setsize
@@ -144,7 +149,7 @@ arguments could not be found.
 .It Bq Er ERANGE
 The
 .Fa cpusetsize
-was either preposterously large or smaller than the kernel set size.
+was smaller than needed to fit all of the matching CPUs.
 .It Bq Er EPERM
 The calling process did not have the credentials required to complete the
 operation.
diff --git a/share/man/man3/pthread_attr_affinity_np.3 b/share/man/man3/pthread_attr_affinity_np.3
index 7b1cd3dea0d9..2c85aee9ac19 100644
--- a/share/man/man3/pthread_attr_affinity_np.3
+++ b/share/man/man3/pthread_attr_affinity_np.3
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 12, 2021
+.Dd April 27, 2022
 .Dt PTHREAD_ATTR_AFFINITY_NP 3
 .Os
 .Sh NAME
@@ -51,14 +51,19 @@ Masks of type
 are composed using the
 .Dv CPU_SET
 macros.
-The kernel tolerates large sets as long as all CPUs specified
-in the set exist.
-Sets smaller than the kernel uses generate an error on calls to
-.Fn pthread_attr_getaffinity_np
-even if the result set would fit within the user supplied set.
+If the user-supplied mask is not large enough to fit all of the matching CPUs,
+.Fn cpuset_getaffinity
+fails with
+.Er ERANGE .
 Calls to
-.Fn pthread_attr_setaffinity_np
-tolerate small sets with no restrictions.
+.Fn cpuset_setaffinity
+tolerate masks of any size with no restrictions.
+The kernel uses the meaningful part of the mask, where the upper bound is
+the maximum CPU id present in the system.
+If bits for non-existing CPUs are set, calls to
+.Fn cpuset_setaffinity
+fails with
+.Er EINVAL .
 .Pp
 The supplied mask should have a size of
 .Fa cpusetsize
@@ -119,10 +124,6 @@ or the attribute specified by it is
 The
 .Fa cpusetp
 specified a CPU that was outside the set supported by the kernel.
-.It Bq Er ERANGE
-The
-.Fa cpusetsize
-is too small.
 .It Bq Er ENOMEM
 Insufficient memory exists to store the cpuset mask.
 .El
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index 185c74fd2b2f..9b54653d4489 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -3321,7 +3321,7 @@ freebsd32_cpuset_setaffinity(struct thread *td,
     struct freebsd32_cpuset_setaffinity_args *uap)
 {
 
-	return (kern_cpuset_setaffinity(td, uap->level, uap->which,
+	return (user_cpuset_setaffinity(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask,
 	    &cpuset_copy32_cb));
 }
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index 96555f2e8d04..9328755a7130 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
+#include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
@@ -2261,23 +2262,22 @@ int
 linux_sched_getaffinity(struct thread *td,
     struct linux_sched_getaffinity_args *args)
 {
-	int error;
 	struct thread *tdt;
-
-	if (args->len < sizeof(cpuset_t))
-		return (EINVAL);
+	int error;
+	id_t tid;
 
 	tdt = linux_tdfind(td, args->pid, -1);
 	if (tdt == NULL)
 		return (ESRCH);
-
+	tid = tdt->td_tid;
 	PROC_UNLOCK(tdt->td_proc);
 
 	error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
-	    tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr,
-	    &copy_set);
+	    tid, args->len, (cpuset_t *)args->user_mask_ptr, &copy_set);
+	if (error == ERANGE)
+		error = EINVAL;
 	if (error == 0)
-		td->td_retval[0] = sizeof(cpuset_t);
+		td->td_retval[0] = min(args->len, sizeof(cpuset_t));
 
 	return (error);
 }
@@ -2290,19 +2290,34 @@ linux_sched_setaffinity(struct thread *td,
     struct linux_sched_setaffinity_args *args)
 {
 	struct thread *tdt;
-
-	if (args->len < sizeof(cpuset_t))
-		return (EINVAL);
+	cpuset_t *mask;
+	int cpu, error;
+	size_t len;
+	id_t tid;
 
 	tdt = linux_tdfind(td, args->pid, -1);
 	if (tdt == NULL)
 		return (ESRCH);
-
+	tid = tdt->td_tid;
 	PROC_UNLOCK(tdt->td_proc);
 
-	return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
-	    tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr,
-	    &copy_set));
+	len = min(args->len, sizeof(cpuset_t));
+	mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);;
+	error = copyin(args->user_mask_ptr, mask, len);
+	if (error != 0)
+		goto out;
+	/* Linux ignore high bits */
+	CPU_FOREACH_ISSET(cpu, mask)
+		if (cpu > mp_maxid)
+			CPU_CLR(cpu, mask);
+
+	error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
+	    tid, mask);
+	if (error == EDEADLK)
+		error = EINVAL;
+out:
+	free(mask, M_TEMP);
+	return (error);
 }
 
 struct linux_rlimit64 {
diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c
index 97e09557f08f..6cfc56294ab8 100644
--- a/sys/kern/kern_cpuset.c
+++ b/sys/kern/kern_cpuset.c
@@ -1902,13 +1902,10 @@ kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
 	int error;
 	size_t size;
 
-	if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
-		return (ERANGE);
 	error = cpuset_check_capabilities(td, level, which, id);
 	if (error != 0)
 		return (error);
-	size = cpusetsize;
-	mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
+	mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);
 	error = cpuset_which(which, id, &p, &ttd, &set);
 	if (error)
 		goto out;
@@ -1978,8 +1975,33 @@ kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
 		cpuset_rel(set);
 	if (p)
 		PROC_UNLOCK(p);
-	if (error == 0)
+	if (error == 0) {
+		if (cpusetsize < howmany(CPU_FLS(mask), NBBY)) {
+			error = ERANGE;
+			goto out;
+		}
+		size = min(cpusetsize, sizeof(cpuset_t));
 		error = cb->cpuset_copyout(mask, maskp, size);
+		if (error != 0)
+			goto out;
+		if (cpusetsize > size) {
+			char *end;
+			char *cp;
+			int rv;
+
+			end = cp = (char *)&maskp->__bits;
+			end += cpusetsize;
+			cp += size;
+			while (cp != end) {
+				rv = subyte(cp, 0);
+				if (rv == -1) {
+					error = EFAULT;
+					goto out;
+				}
+				cp++;
+			}
+		}
+	}
 out:
 	free(mask, M_TEMP);
 	return (error);
@@ -1998,51 +2020,25 @@ int
 sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap)
 {
 
-	return (kern_cpuset_setaffinity(td, uap->level, uap->which,
+	return (user_cpuset_setaffinity(td, uap->level, uap->which,
 	    uap->id, uap->cpusetsize, uap->mask, &copy_set));
 }
 
 int
 kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
-    id_t id, size_t cpusetsize, const cpuset_t *maskp,
-    const struct cpuset_copy_cb *cb)
+    id_t id, cpuset_t *mask)
 {
 	struct cpuset *nset;
 	struct cpuset *set;
 	struct thread *ttd;
 	struct proc *p;
-	cpuset_t *mask;
 	int error;
 
-	if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
-		return (ERANGE);
 	error = cpuset_check_capabilities(td, level, which, id);
 	if (error != 0)
 		return (error);
-	mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO);
-	error = cb->cpuset_copyin(maskp, mask, cpusetsize);
-	if (error)
-		goto out;
-	/*
-	 * Verify that no high bits are set.
-	 */
-	if (cpusetsize > sizeof(cpuset_t)) {
-		char *end;
-		char *cp;
-
-		end = cp = (char *)&mask->__bits;
-		end += cpusetsize;
-		cp += sizeof(cpuset_t);
-		while (cp != end)
-			if (*cp++ != 0) {
-				error = EINVAL;
-				goto out;
-			}
-	}
-	if (CPU_EMPTY(mask)) {
-		error = EDEADLK;
-		goto out;
-	}
+	if (CPU_EMPTY(mask))
+		return (EDEADLK);
 	switch (level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
@@ -2064,8 +2060,7 @@ kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
 		case CPU_WHICH_INTRHANDLER:
 		case CPU_WHICH_ITHREAD:
 		case CPU_WHICH_DOMAIN:
-			error = EINVAL;
-			goto out;
+			return (EINVAL);
 		}
 		if (level == CPU_LEVEL_ROOT)
 			nset = cpuset_refroot(set);
@@ -2105,6 +2100,47 @@ kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
 		error = EINVAL;
 		break;
 	}
+	return (error);
+}
+
+int
+user_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
+    id_t id, size_t cpusetsize, const cpuset_t *maskp, const struct cpuset_copy_cb *cb)
+{
+	cpuset_t *mask;
+	int error;
+	size_t size;
+
+	size = min(cpusetsize, sizeof(cpuset_t));
+	mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);
+	error = cb->cpuset_copyin(maskp, mask, size);
+	if (error)
+		goto out;
+	/*
+	 * Verify that no high bits are set.
+	 */
+	if (cpusetsize > sizeof(cpuset_t)) {
+		const char *end, *cp;
+		int val;
+		end = cp = (const char *)&maskp->__bits;
+		end += cpusetsize;
+		cp += sizeof(cpuset_t);
+
+		while (cp != end) {
+			val = fubyte(cp);
+			if (val == -1) {
+				error = EFAULT;
+				goto out;
+			}
+			if (val != 0) {
+				error = EINVAL;
+				goto out;
+			}
+			cp++;
+		}
+	}
+	error = kern_cpuset_setaffinity(td, level, which, id, mask);
+
 out:
 	free(mask, M_TEMP);
 	return (error);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index a0fedf7fb49d..6c6f644ede33 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -120,6 +120,8 @@ int	kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp,
 	    const struct cpuset_copy_cb *cb);
 int	kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
+	    cpuwhich_t which, id_t id, cpuset_t *maskp);
+int	user_cpuset_setaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize,
 	    const cpuset_t *maskp, const struct cpuset_copy_cb *cb);
 int	kern_cpuset_getdomain(struct thread *td, cpulevel_t level,