Extending MADV_PROTECT
John Baldwin
jhb at freebsd.org
Fri May 10 20:02:33 UTC 2013
On Thursday, May 09, 2013 8:31:47 am Konstantin Belousov wrote:
> On Thu, May 09, 2013 at 08:14:52AM -0400, John Baldwin wrote:
> > > You mentioned a priority, and I think ability to pass a structure to the
> > > sub-function of the syscall is better then carving bits in the int argument,
> > > or introducing a new syscall.
> >
> > I think the priority would still be a pprotect operation. In some ways it would
> > be nice to be able to do ioctls on processes and maybe this could be structured
> > similarly?
> >
> > int procctl(int pid, unsigned long cmd, ...)
> >
> > (So it's basically ioctl but with the 'fd' replaced with 'pid'. This would also
> > mean that in the future with Robert's pdfork() you could perhaps have ioctl on
> > a process fd just foward the request to procctl).
>
> Yes, this is exactly what I mean.
Ok, here is a patch for 8 that reworks this to use a procctl(). If this looks
reasonable I will port this to HEAD as two pieces: the first to add
procctl() and the second to add PROCSPROTECT.
Index: sys/cddl/contrib/opensolaris/uts/common/sys/procset.h
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/sys/procset.h (revision 251038)
+++ sys/cddl/contrib/opensolaris/uts/common/sys/procset.h (working copy)
@@ -51,6 +51,7 @@
#define P_INITUID 0
#define P_INITPGID 0
+#ifndef _IDTYPE_T_DECLARED
/*
* The following defines the values for an identifier type. It
@@ -81,7 +82,10 @@
P_PSETID /* Processor set identifier */
} idtype_t;
+#define _IDTYPE_T_DECLARED
+#endif
+
/*
* The following defines the operations which can be performed to
* combine two simple sets of processes to form another set of
Index: sys/compat/freebsd32/syscalls.master
===================================================================
--- sys/compat/freebsd32/syscalls.master (revision 251038)
+++ sys/compat/freebsd32/syscalls.master (working copy)
@@ -977,3 +977,15 @@
uint32_t offset1, uint32_t offset2,\
uint32_t len1, uint32_t len2, \
int advice); }
+532 AUE_NULL UNIMPL wait6
+533 AUE_NULL UNIMPL cap_rights_limit
+534 AUE_NULL UNIMPL cap_ioctls_limit
+535 AUE_NULL UNIMPL cap_ioctls_get
+536 AUE_NULL UNIMPL cap_fcntls_limit
+537 AUE_NULL UNIMPL cap_fcntls_get
+538 AUE_NULL UNIMPL bindat
+539 AUE_NULL UNIMPL connectat
+540 AUE_NULL UNIMPL chflagsat
+541 AUE_NULL UNIMPL accept4
+542 AUE_NULL UNIMPL pipe2
+543 AUE_NULL UNIMPL procctl
Index: sys/kern/makesyscalls.sh
===================================================================
--- sys/kern/makesyscalls.sh (revision 251038)
+++ sys/kern/makesyscalls.sh (working copy)
@@ -143,7 +143,8 @@
printf "#include <sys/acl.h>\n" > sysarg
printf "#include <sys/cpuset.h>\n" > sysarg
printf "#include <sys/_semaphore.h>\n" > sysarg
- printf "#include <sys/ucontext.h>\n\n" > sysarg
+ printf "#include <sys/ucontext.h>\n" > sysarg
+ printf "#include <sys/wait.h>\n\n" > sysarg
printf "#include <bsm/audit_kevents.h>\n\n" > sysarg
printf "struct proc;\n\n" > sysarg
printf "struct thread;\n\n" > sysarg
Index: sys/kern/sys_process.c
===================================================================
--- sys/kern/sys_process.c (revision 251038)
+++ sys/kern/sys_process.c (working copy)
@@ -36,12 +36,17 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
+#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/procctl.h>
#include <sys/vnode.h>
#include <sys/ptrace.h>
#include <sys/sx.h>
@@ -98,6 +103,8 @@
#endif
+static MALLOC_DEFINE(M_PROCCTLOPS, "procctlops", "procctl data buffer");
+
/*
* Functions implemented using PROC_ACTION():
*
@@ -1281,3 +1288,217 @@
msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
} while (p->p_step);
}
+
+static int
+protect_setchild(struct thread *td, struct proc *p, int flags)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ if (p->p_flag & P_SYSTEM || p_cansee(td, p) != 0)
+ return (0);
+ if (flags & PPROT_SET)
+ p->p_flag |= P_PROTECTED;
+ else
+ p->p_flag &= ~P_PROTECTED;
+ return (1);
+}
+
+static int
+protect_setchildren(struct thread *td, struct proc *top, int flags)
+{
+ struct proc *p;
+ int ret;
+
+ p = top;
+ ret = 0;
+ sx_assert(&proctree_lock, SX_LOCKED);
+ for (;;) {
+ ret |= protect_setchild(td, p, flags);
+ PROC_UNLOCK(p);
+ /*
+ * If this process has children, descend to them next,
+ * otherwise do any siblings, and if done with this level,
+ * follow back up the tree (but not past top).
+ */
+ if (!LIST_EMPTY(&p->p_children))
+ p = LIST_FIRST(&p->p_children);
+ else for (;;) {
+ if (p == top) {
+ PROC_LOCK(p);
+ return (ret);
+ }
+ if (LIST_NEXT(p, p_sibling)) {
+ p = LIST_NEXT(p, p_sibling);
+ break;
+ }
+ p = p->p_pptr;
+ }
+ PROC_LOCK(p);
+ }
+}
+
+static int
+protect_set(struct thread *td, struct proc *p, int flags)
+{
+ int error, ret;
+
+ if ((flags & ~(PPROT_SET | PPROT_CLEAR | PPROT_DESCEND |
+ PPROT_INHERIT)) != 0)
+ return (EINVAL);
+ if (flags & PPROT_INHERIT)
+ return (EOPNOTSUPP);
+
+ error = priv_check(td, PRIV_VM_MADV_PROTECT);
+ if (error)
+ return (error);
+
+ if (flags & PPROT_DESCEND)
+ ret = protect_setchildren(td, p, flags);
+ else
+ ret = protect_setchild(td, p, flags);
+ if (ret == 0)
+ return (EPERM);
+ return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct procctl_args {
+ idtype_t idtype;
+ id_t id;
+ u_long com;
+ void *data;
+};
+#endif
+/* ARGSUSED */
+int
+procctl(struct thread *td, struct procctl_args *uap)
+{
+ u_long com;
+ int arg, error;
+ u_int size;
+ void *data;
+
+ if (uap->com > 0xffffffff) {
+ printf(
+ "WARNING pid %d (%s): procctl sign-extension procctl %lx\n",
+ td->td_proc->p_pid, td->td_name, uap->com);
+ uap->com &= 0xffffffff;
+ }
+ com = uap->com;
+
+ /*
+ * Interpret high order word to find amount of data to be
+ * copied to/from the user's address space.
+ */
+ size = IOCPARM_LEN(com);
+ if ((size > IOCPARM_MAX) ||
+ ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0) ||
+ ((com & (IOC_IN | IOC_OUT)) && size == 0) ||
+ ((com & IOC_VOID) && size > 0 && size != sizeof(int)))
+ return (ENOTTY);
+
+ if (size > 0) {
+ if (com & IOC_VOID) {
+ /* Integer argument. */
+ arg = (intptr_t)uap->data;
+ data = (void *)&arg;
+ size = 0;
+ } else
+ data = malloc((u_long)size, M_PROCCTLOPS, M_WAITOK);
+ } else
+ data = (void *)&uap->data;
+ if (com & IOC_IN) {
+ error = copyin(uap->data, data, (u_int)size);
+ if (error) {
+ if (size > 0)
+ free(data, M_PROCCTLOPS);
+ return (error);
+ }
+ } else if (com & IOC_OUT) {
+ /*
+ * Zero the buffer so the user always
+ * gets back something deterministic.
+ */
+ bzero(data, size);
+ }
+
+ error = kern_procctl(td, uap->idtype, uap->id, com, data);
+
+ if (error == 0 && (com & IOC_OUT))
+ error = copyout(data, uap->data, (u_int)size);
+
+ if (size > 0)
+ free(data, M_PROCCTLOPS);
+ return (error);
+}
+
+static int
+kern_procctl_single(struct thread *td, struct proc *p, u_long com, void *data)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ switch (com) {
+ case PROCSPROTECT:
+ return (protect_set(td, p, *(int *)data));
+ default:
+ return (ENOTTY);
+ }
+}
+
+int
+kern_procctl(struct thread *td, idtype_t idtype, id_t id, u_long com,
+ void *data)
+{
+ struct pgrp *pg;
+ struct proc *p;
+ int error;
+
+ sx_slock(&proctree_lock);
+ switch (idtype) {
+ case P_PID:
+ p = pfind(id);
+ if (p == NULL) {
+ error = ESRCH;
+ break;
+ }
+ if (p->p_state == PRS_NEW)
+ error = ESRCH;
+ else
+ error = p_cansee(td, p);
+ if (error == 0)
+ error = kern_procctl_single(td, p, com, data);
+ PROC_UNLOCK(p);
+ break;
+ case P_PGID:
+ /*
+ * Attempt to apply the operation to all members of the
+ * group. Ignore processes in the group that can't be
+ * seen. Stop on the first error encountered.
+ */
+ pg = pgfind(id);
+ if (pg == NULL) {
+ error = ESRCH;
+ break;
+ }
+ PGRP_UNLOCK(pg);
+ error = ESRCH;
+ LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NEW ||
+ p_cansee(td, p) != 0) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ error = kern_procctl_single(td, p, com, data);
+ PROC_UNLOCK(p);
+ if (error)
+ break;
+ }
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ sx_sunlock(&proctree_lock);
+ return (error);
+}
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master (revision 251038)
+++ sys/kern/syscalls.master (working copy)
@@ -938,5 +938,18 @@
off_t offset, off_t len); }
531 AUE_NULL STD { int posix_fadvise(int fd, off_t offset, \
off_t len, int advice); }
+532 AUE_NULL UNIMPL wait6
+533 AUE_NULL UNIMPL cap_rights_limit
+534 AUE_NULL UNIMPL cap_ioctls_limit
+535 AUE_NULL UNIMPL cap_ioctls_get
+536 AUE_NULL UNIMPL cap_fcntls_limit
+537 AUE_NULL UNIMPL cap_fcntls_get
+538 AUE_NULL UNIMPL bindat
+539 AUE_NULL UNIMPL connectat
+540 AUE_NULL UNIMPL chflagsat
+541 AUE_NULL UNIMPL accept4
+542 AUE_NULL UNIMPL pipe2
+543 AUE_NULL STD { int procctl(idtype_t idtype, id_t id, \
+ u_long com, void *data); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: sys/sys/procctl.h
===================================================================
--- sys/sys/procctl.h (revision 0)
+++ sys/sys/procctl.h (working copy)
@@ -0,0 +1,29 @@
+/*-
+ * XXX: License
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_PROCCTL_H_
+#define _SYS_PROCCTL_H_
+
+#define PROCSPROTECT _IOW('p', 1, int) /* set protected state */
+
+/* Flags for PROCSPROTECT (passed in integer arg). */
+#define PPROT_SET 0x1
+#define PPROT_CLEAR 0x0
+#define PPROT_DESCEND 0x2
+#define PPROT_INHERIT 0x4
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#include <sys/ioccom.h>
+#include <sys/wait.h>
+
+__BEGIN_DECLS
+int procctl(idtype_t, id_t, unsigned long, void *);
+__END_DECLS
+
+#endif
+
+#endif /* !_SYS_PROCCTL_H_ */
Property changes on: sys/sys/procctl.h
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: sys/sys/syscallsubr.h
===================================================================
--- sys/sys/syscallsubr.h (revision 251038)
+++ sys/sys/syscallsubr.h (working copy)
@@ -33,6 +33,7 @@
#include <sys/socket.h>
#include <sys/mac.h>
#include <sys/mount.h>
+#include <sys/wait.h>
struct file;
struct itimerval;
@@ -154,6 +155,8 @@
int advice);
int kern_posix_fallocate(struct thread *td, int fd, off_t offset,
off_t len);
+int kern_procctl(struct thread *td, idtype_t idtype, id_t id, u_long com,
+ void *data);
int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);
Index: sys/sys/wait.h
===================================================================
--- sys/sys/wait.h (revision 251038)
+++ sys/sys/wait.h (working copy)
@@ -85,6 +85,46 @@
#define WLINUXCLONE 0x80000000 /* Wait for kthread spawned from linux_clone. */
#endif
+#ifndef _IDTYPE_T_DECLARED
+typedef enum
+#if __BSD_VISIBLE
+ idtype /* pollutes XPG4.2 namespace */
+#endif
+ {
+ /*
+ * These names were mostly lifted from Solaris source code and
+ * still use Solaris style naming to avoid breaking any
+ * OpenSolaris code which has been ported to FreeBSD. There
+ * is no clear FreeBSD counterpart for all of the names, but
+ * some have a clear correspondence to FreeBSD entities.
+ *
+ * The numerical values are kept synchronized with the Solaris
+ * values.
+ */
+ P_PID, /* A process identifier. */
+ P_PPID, /* A parent process identifier. */
+ P_PGID, /* A process group identifier. */
+ P_SID, /* A session identifier. */
+ P_CID, /* A scheduling class identifier. */
+ P_UID, /* A user identifier. */
+ P_GID, /* A group identifier. */
+ P_ALL, /* All processes. */
+ P_LWPID, /* An LWP identifier. */
+ P_TASKID, /* A task identifier. */
+ P_PROJID, /* A project identifier. */
+ P_POOLID, /* A pool identifier. */
+ P_JAILID, /* A zone identifier. */
+ P_CTID, /* A (process) contract identifier. */
+ P_CPUID, /* CPU identifier. */
+ P_PSETID /* Processor set identifier. */
+} idtype_t; /* The type of id_t we are using. */
+
+#if __BSD_VISIBLE
+#define P_ZONEID P_JAILID
+#endif
+#define _IDTYPE_T_DECLARED
+#endif
+
/*
* Tokens for special values of the "pid" parameter to wait4.
*/
Index: sys/vm/vm_mmap.c
===================================================================
--- sys/vm/vm_mmap.c (revision 251038)
+++ sys/vm/vm_mmap.c (working copy)
@@ -48,12 +48,14 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/ioccom.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sysproto.h>
#include <sys/filedesc.h>
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/procctl.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/vnode.h>
@@ -63,6 +65,7 @@
#include <sys/mount.h>
#include <sys/conf.h>
#include <sys/stat.h>
+#include <sys/syscallsubr.h>
#include <sys/sysent.h>
#include <sys/vmmeter.h>
@@ -668,23 +671,18 @@
{
vm_offset_t start, end;
vm_map_t map;
- struct proc *p;
- int error;
+ int flags;
/*
* Check for our special case, advising the swap pager we are
* "immortal."
*/
if (uap->behav == MADV_PROTECT) {
- error = priv_check(td, PRIV_VM_MADV_PROTECT);
- if (error == 0) {
- p = td->td_proc;
- PROC_LOCK(p);
- p->p_flag |= P_PROTECTED;
- PROC_UNLOCK(p);
- }
- return (error);
+ flags = PPROT_SET;
+ return (kern_procctl(td, P_PID, td->td_proc->p_pid,
+ PROCSPROTECT, &flags));
}
+
/*
* Check for illegal behavior
*/
--
John Baldwin
More information about the freebsd-arch
mailing list