Extending MADV_PROTECT

John Baldwin jhb at freebsd.org
Fri May 10 20:02:33 UTC 2013


On Thursday, May 09, 2013 8:31:47 am Konstantin Belousov wrote:
> On Thu, May 09, 2013 at 08:14:52AM -0400, John Baldwin wrote:
> > > You mentioned a priority, and I think ability to pass a structure to the
> > > sub-function of the syscall is better then carving bits in the int argument,
> > > or introducing a new syscall.
> > 
> > I think the priority would still be a pprotect operation.  In some ways it would
> > be nice to be able to do ioctls on processes and maybe this could be structured
> > similarly?
> > 
> > int procctl(int pid, unsigned long cmd, ...)
> > 
> > (So it's basically ioctl but with the 'fd' replaced with 'pid'.  This would also
> > mean that in the future with Robert's pdfork() you could perhaps have ioctl on
> > a process fd just foward the request to procctl).
> 
> Yes, this is exactly what I mean.

Ok, here is a patch for 8 that reworks this to use a procctl().  If this looks
reasonable I will port this to HEAD as two pieces: the first to add
procctl() and the second to add PROCSPROTECT.

Index: sys/cddl/contrib/opensolaris/uts/common/sys/procset.h
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/sys/procset.h	(revision 251038)
+++ sys/cddl/contrib/opensolaris/uts/common/sys/procset.h	(working copy)
@@ -51,6 +51,7 @@
 #define	P_INITUID	0
 #define	P_INITPGID	0
 
+#ifndef _IDTYPE_T_DECLARED
 
 /*
  *	The following defines the values for an identifier type.  It
@@ -81,7 +82,10 @@
 	P_PSETID	/* Processor set identifier		*/
 } idtype_t;
 
+#define	_IDTYPE_T_DECLARED
 
+#endif
+
 /*
  *	The following defines the operations which can be performed to
  *	combine two simple sets of processes to form another set of
Index: sys/compat/freebsd32/syscalls.master
===================================================================
--- sys/compat/freebsd32/syscalls.master	(revision 251038)
+++ sys/compat/freebsd32/syscalls.master	(working copy)
@@ -977,3 +977,15 @@
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2, \
 				    int advice); }
+532	AUE_NULL	UNIMPL	wait6
+533	AUE_NULL	UNIMPL	cap_rights_limit
+534	AUE_NULL	UNIMPL	cap_ioctls_limit
+535	AUE_NULL	UNIMPL	cap_ioctls_get
+536	AUE_NULL	UNIMPL	cap_fcntls_limit
+537	AUE_NULL	UNIMPL	cap_fcntls_get
+538	AUE_NULL	UNIMPL	bindat
+539	AUE_NULL	UNIMPL	connectat
+540	AUE_NULL	UNIMPL	chflagsat
+541	AUE_NULL	UNIMPL	accept4
+542	AUE_NULL	UNIMPL	pipe2
+543	AUE_NULL	UNIMPL	procctl
Index: sys/kern/makesyscalls.sh
===================================================================
--- sys/kern/makesyscalls.sh	(revision 251038)
+++ sys/kern/makesyscalls.sh	(working copy)
@@ -143,7 +143,8 @@
 		printf "#include <sys/acl.h>\n" > sysarg
 		printf "#include <sys/cpuset.h>\n" > sysarg
 		printf "#include <sys/_semaphore.h>\n" > sysarg
-		printf "#include <sys/ucontext.h>\n\n" > sysarg
+		printf "#include <sys/ucontext.h>\n" > sysarg
+		printf "#include <sys/wait.h>\n\n" > sysarg
 		printf "#include <bsm/audit_kevents.h>\n\n" > sysarg
 		printf "struct proc;\n\n" > sysarg
 		printf "struct thread;\n\n" > sysarg
Index: sys/kern/sys_process.c
===================================================================
--- sys/kern/sys_process.c	(revision 251038)
+++ sys/kern/sys_process.c	(working copy)
@@ -36,12 +36,17 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
 #include <sys/sx.h>
@@ -98,6 +103,8 @@
 
 #endif
 
+static MALLOC_DEFINE(M_PROCCTLOPS, "procctlops", "procctl data buffer");
+
 /*
  * Functions implemented using PROC_ACTION():
  *
@@ -1281,3 +1288,217 @@
 		msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
 	} while (p->p_step);
 }
+
+static int
+protect_setchild(struct thread *td, struct proc *p, int flags)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	if (p->p_flag & P_SYSTEM || p_cansee(td, p) != 0)
+		return (0);
+	if (flags & PPROT_SET)
+		p->p_flag |= P_PROTECTED;
+	else
+		p->p_flag &= ~P_PROTECTED;
+	return (1);
+}
+
+static int
+protect_setchildren(struct thread *td, struct proc *top, int flags)
+{
+	struct proc *p;
+	int ret;
+
+	p = top;
+	ret = 0;
+	sx_assert(&proctree_lock, SX_LOCKED);
+	for (;;) {
+		ret |= protect_setchild(td, p, flags);
+		PROC_UNLOCK(p);
+		/*
+		 * If this process has children, descend to them next,
+		 * otherwise do any siblings, and if done with this level,
+		 * follow back up the tree (but not past top).
+		 */
+		if (!LIST_EMPTY(&p->p_children))
+			p = LIST_FIRST(&p->p_children);
+		else for (;;) {
+			if (p == top) {
+				PROC_LOCK(p);
+				return (ret);
+			}
+			if (LIST_NEXT(p, p_sibling)) {
+				p = LIST_NEXT(p, p_sibling);
+				break;
+			}
+			p = p->p_pptr;
+		}
+		PROC_LOCK(p);
+	}
+}
+
+static int
+protect_set(struct thread *td, struct proc *p, int flags)
+{
+	int error, ret;
+
+	if ((flags & ~(PPROT_SET | PPROT_CLEAR | PPROT_DESCEND |
+	    PPROT_INHERIT)) != 0)
+		return (EINVAL);
+	if (flags & PPROT_INHERIT)
+		return (EOPNOTSUPP);
+
+	error = priv_check(td, PRIV_VM_MADV_PROTECT);
+	if (error)
+		return (error);
+
+	if (flags & PPROT_DESCEND)
+		ret = protect_setchildren(td, p, flags);
+	else
+		ret = protect_setchild(td, p, flags);
+	if (ret == 0)
+		return (EPERM);
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct procctl_args {
+	idtype_t idtype;
+	id_t	id;
+	u_long	com;
+	void	*data;
+};
+#endif
+/* ARGSUSED */
+int
+procctl(struct thread *td, struct procctl_args *uap)
+{
+	u_long com;
+	int arg, error;
+	u_int size;
+	void *data;
+
+	if (uap->com > 0xffffffff) {
+		printf(
+		    "WARNING pid %d (%s): procctl sign-extension procctl %lx\n",
+		    td->td_proc->p_pid, td->td_name, uap->com);
+		uap->com &= 0xffffffff;
+	}
+	com = uap->com;
+
+	/*
+	 * Interpret high order word to find amount of data to be
+	 * copied to/from the user's address space.
+	 */
+	size = IOCPARM_LEN(com);
+	if ((size > IOCPARM_MAX) ||
+	    ((com & (IOC_VOID  | IOC_IN | IOC_OUT)) == 0) ||
+	    ((com & (IOC_IN | IOC_OUT)) && size == 0) ||
+	    ((com & IOC_VOID) && size > 0 && size != sizeof(int)))
+		return (ENOTTY);
+
+	if (size > 0) {
+		if (com & IOC_VOID) {
+			/* Integer argument. */
+			arg = (intptr_t)uap->data;
+			data = (void *)&arg;
+			size = 0;
+		} else
+			data = malloc((u_long)size, M_PROCCTLOPS, M_WAITOK);
+	} else
+		data = (void *)&uap->data;
+	if (com & IOC_IN) {
+		error = copyin(uap->data, data, (u_int)size);
+		if (error) {
+			if (size > 0)
+				free(data, M_PROCCTLOPS);
+			return (error);
+		}
+	} else if (com & IOC_OUT) {
+		/*
+		 * Zero the buffer so the user always
+		 * gets back something deterministic.
+		 */
+		bzero(data, size);
+	}
+
+	error = kern_procctl(td, uap->idtype, uap->id, com, data);
+
+	if (error == 0 && (com & IOC_OUT))
+		error = copyout(data, uap->data, (u_int)size);
+
+	if (size > 0)
+		free(data, M_PROCCTLOPS);
+	return (error);
+}
+
+static int
+kern_procctl_single(struct thread *td, struct proc *p, u_long com, void *data)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	switch (com) {
+	case PROCSPROTECT:
+		return (protect_set(td, p, *(int *)data));
+	default:
+		return (ENOTTY);
+	}
+}
+
+int
+kern_procctl(struct thread *td, idtype_t idtype, id_t id, u_long com,
+    void *data)
+{
+	struct pgrp *pg;
+	struct proc *p;
+	int error;
+
+	sx_slock(&proctree_lock);
+	switch (idtype) {
+	case P_PID:
+		p = pfind(id);
+		if (p == NULL) {
+			error = ESRCH;
+			break;
+		}
+		if (p->p_state == PRS_NEW)
+			error = ESRCH;
+		else
+			error = p_cansee(td, p);
+		if (error == 0)
+			error = kern_procctl_single(td, p, com, data);
+		PROC_UNLOCK(p);
+		break;
+	case P_PGID:
+		/*
+		 * Attempt to apply the operation to all members of the
+		 * group.  Ignore processes in the group that can't be
+		 * seen.  Stop on the first error encountered.
+		 */
+		pg = pgfind(id);
+		if (pg == NULL) {
+			error = ESRCH;
+			break;
+		}
+		PGRP_UNLOCK(pg);
+		error = ESRCH;
+		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+			PROC_LOCK(p);
+			if (p->p_state == PRS_NEW ||
+			    p_cansee(td, p) != 0) {
+				PROC_UNLOCK(p);
+				continue;
+			}
+			error = kern_procctl_single(td, p, com, data);
+			PROC_UNLOCK(p);
+			if (error)
+				break;
+		}
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	sx_sunlock(&proctree_lock);
+	return (error);
+}
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master	(revision 251038)
+++ sys/kern/syscalls.master	(working copy)
@@ -938,5 +938,18 @@
 				    off_t offset, off_t len); }
 531	AUE_NULL	STD	{ int posix_fadvise(int fd, off_t offset, \
 				    off_t len, int advice); }
+532	AUE_NULL	UNIMPL	wait6
+533	AUE_NULL	UNIMPL	cap_rights_limit
+534	AUE_NULL	UNIMPL	cap_ioctls_limit
+535	AUE_NULL	UNIMPL	cap_ioctls_get
+536	AUE_NULL	UNIMPL	cap_fcntls_limit
+537	AUE_NULL	UNIMPL	cap_fcntls_get
+538	AUE_NULL	UNIMPL	bindat
+539	AUE_NULL	UNIMPL	connectat
+540	AUE_NULL	UNIMPL	chflagsat
+541	AUE_NULL	UNIMPL	accept4
+542	AUE_NULL	UNIMPL	pipe2
+543	AUE_NULL	STD	{ int procctl(idtype_t idtype, id_t id, \
+				    u_long com, void *data); }
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
Index: sys/sys/procctl.h
===================================================================
--- sys/sys/procctl.h	(revision 0)
+++ sys/sys/procctl.h	(working copy)
@@ -0,0 +1,29 @@
+/*-
+ * XXX: License
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SYS_PROCCTL_H_
+#define	_SYS_PROCCTL_H_
+
+#define	PROCSPROTECT	_IOW('p', 1, int)	/* set protected state */
+
+/* Flags for PROCSPROTECT (passed in integer arg). */
+#define	PPROT_SET	0x1
+#define	PPROT_CLEAR	0x0
+#define	PPROT_DESCEND	0x2
+#define	PPROT_INHERIT	0x4
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#include <sys/ioccom.h>
+#include <sys/wait.h>
+
+__BEGIN_DECLS
+int	procctl(idtype_t, id_t, unsigned long, void *);
+__END_DECLS
+
+#endif
+
+#endif /* !_SYS_PROCCTL_H_ */

Property changes on: sys/sys/procctl.h
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: sys/sys/syscallsubr.h
===================================================================
--- sys/sys/syscallsubr.h	(revision 251038)
+++ sys/sys/syscallsubr.h	(working copy)
@@ -33,6 +33,7 @@
 #include <sys/socket.h>
 #include <sys/mac.h>
 #include <sys/mount.h>
+#include <sys/wait.h>
 
 struct file;
 struct itimerval;
@@ -154,6 +155,8 @@
 	    int advice);
 int	kern_posix_fallocate(struct thread *td, int fd, off_t offset,
 	    off_t len);
+int	kern_procctl(struct thread *td, idtype_t idtype, id_t id, u_long com,
+	    void *data);
 int	kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
 	    fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);
Index: sys/sys/wait.h
===================================================================
--- sys/sys/wait.h	(revision 251038)
+++ sys/sys/wait.h	(working copy)
@@ -85,6 +85,46 @@
 #define	WLINUXCLONE 0x80000000	/* Wait for kthread spawned from linux_clone. */
 #endif
 
+#ifndef _IDTYPE_T_DECLARED
+typedef enum
+#if __BSD_VISIBLE
+	idtype		/* pollutes XPG4.2 namespace */
+#endif
+		{
+	/*
+	 * These names were mostly lifted from Solaris source code and
+	 * still use Solaris style naming to avoid breaking any
+	 * OpenSolaris code which has been ported to FreeBSD.  There
+	 * is no clear FreeBSD counterpart for all of the names, but
+	 * some have a clear correspondence to FreeBSD entities.
+	 *
+	 * The numerical values are kept synchronized with the Solaris
+	 * values.
+	 */
+	P_PID,			/* A process identifier. */
+	P_PPID,			/* A parent process identifier.	*/
+	P_PGID,			/* A process group identifier. */
+	P_SID,			/* A session identifier. */
+	P_CID,			/* A scheduling class identifier. */
+	P_UID,			/* A user identifier. */
+	P_GID,			/* A group identifier. */
+	P_ALL,			/* All processes. */
+	P_LWPID,		/* An LWP identifier. */
+	P_TASKID,		/* A task identifier. */
+	P_PROJID,		/* A project identifier. */
+	P_POOLID,		/* A pool identifier. */
+	P_JAILID,		/* A zone identifier. */
+	P_CTID,			/* A (process) contract identifier. */
+	P_CPUID,		/* CPU identifier. */
+	P_PSETID		/* Processor set identifier. */
+} idtype_t;			/* The type of id_t we are using. */
+
+#if __BSD_VISIBLE
+#define	P_ZONEID	P_JAILID
+#endif
+#define	_IDTYPE_T_DECLARED
+#endif
+
 /*
  * Tokens for special values of the "pid" parameter to wait4.
  */
Index: sys/vm/vm_mmap.c
===================================================================
--- sys/vm/vm_mmap.c	(revision 251038)
+++ sys/vm/vm_mmap.c	(working copy)
@@ -48,12 +48,14 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/ioccom.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/filedesc.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/vnode.h>
@@ -63,6 +65,7 @@
 #include <sys/mount.h>
 #include <sys/conf.h>
 #include <sys/stat.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/vmmeter.h>
 
@@ -668,23 +671,18 @@
 {
 	vm_offset_t start, end;
 	vm_map_t map;
-	struct proc *p;
-	int error;
+	int flags;
 
 	/*
 	 * Check for our special case, advising the swap pager we are
 	 * "immortal."
 	 */
 	if (uap->behav == MADV_PROTECT) {
-		error = priv_check(td, PRIV_VM_MADV_PROTECT);
-		if (error == 0) {
-			p = td->td_proc;
-			PROC_LOCK(p);
-			p->p_flag |= P_PROTECTED;
-			PROC_UNLOCK(p);
-		}
-		return (error);
+		flags = PPROT_SET;
+		return (kern_procctl(td, P_PID, td->td_proc->p_pid,
+		    PROCSPROTECT, &flags));
 	}
+
 	/*
 	 * Check for illegal behavior
 	 */

-- 
John Baldwin


More information about the freebsd-arch mailing list