svn commit: r285622 - in head/sys: compat/cloudabi kern sys

Ed Schouten ed at FreeBSD.org
Thu Jul 16 07:05:45 UTC 2015


Author: ed
Date: Thu Jul 16 07:05:42 2015
New Revision: 285622
URL: https://svnweb.freebsd.org/changeset/base/285622

Log:
  Implement CloudABI's exec() call.
  
  Summary:
  In a runtime that is purely based on capability-based security, there is
  a strong emphasis on how programs start their execution. We need to make
  sure that we execute an new program with an exact set of file
  descriptors, ensuring that credentials are not leaked into the process
  accidentally.
  
  Providing the right file descriptors is just half the problem. There
  also needs to be a framework in place that gives meaning to these file
  descriptors. How does a CloudABI mail server know which of the file
  descriptors corresponds to the socket that receives incoming emails?
  Furthermore, how will this mail server acquire its configuration
  parameters, as it cannot open a configuration file from a global path on
  disk?
  
  CloudABI solves this problem by replacing traditional string command
  line arguments by tree-like data structure consisting of scalars,
  sequences and mappings (similar to YAML/JSON). In this structure, file
  descriptors are treated as a first-class citizen. When calling exec(),
  file descriptors are passed on to the new executable if and only if they
  are referenced from this tree structure. See the cloudabi-run(1) man
  page for more details and examples (sysutils/cloudabi-utils).
  
  Fortunately, the kernel does not need to care about this tree structure
  at all. The C library is responsible for serializing and deserializing,
  but also for extracting the list of referenced file descriptors. The
  system call only receives a copy of the serialized data and a layout of
  what the new file descriptor table should look like:
  
      int proc_exec(int execfd, const void *data, size_t datalen, const int *fds,
                size_t fdslen);
  
  This change introduces a set of fd*_remapped() functions:
  
  - fdcopy_remapped() pulls a copy of a file descriptor table, remapping
    all of the file descriptors according to the provided mapping table.
  - fdinstall_remapped() replaces the file descriptor table of the process
    by the copy created by fdcopy_remapped().
  - fdescfree_remapped() frees the table in case we aborted before
    fdinstall_remapped().
  
  We then add a function exec_copyin_data_fds() that builds on top these
  functions. It copies in the data and constructs a new remapped file
  descriptor. This is used by cloudabi_sys_proc_exec().
  
  Test Plan:
  cloudabi-run(1) is capable of spawning processes successfully, providing
  it data and file descriptors. procstat -f seems to confirm all is good.
  Regular FreeBSD processes also work properly.
  
  Reviewers: kib, mjg
  
  Reviewed By: mjg
  
  Subscribers: imp
  
  Differential Revision: https://reviews.freebsd.org/D3079

Modified:
  head/sys/compat/cloudabi/cloudabi_proc.c
  head/sys/kern/kern_descrip.c
  head/sys/kern/kern_exec.c
  head/sys/sys/filedesc.h
  head/sys/sys/imgact.h

Modified: head/sys/compat/cloudabi/cloudabi_proc.c
==============================================================================
--- head/sys/compat/cloudabi/cloudabi_proc.c	Thu Jul 16 05:14:20 2015	(r285621)
+++ head/sys/compat/cloudabi/cloudabi_proc.c	Thu Jul 16 07:05:42 2015	(r285622)
@@ -27,10 +27,12 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/imgact.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
+#include <sys/syscallsubr.h>
 
 #include <compat/cloudabi/cloudabi_proto.h>
 
@@ -38,9 +40,16 @@ int
 cloudabi_sys_proc_exec(struct thread *td,
     struct cloudabi_sys_proc_exec_args *uap)
 {
+	struct image_args args;
+	int error;
 
-	/* Not implemented. */
-	return (ENOSYS);
+	error = exec_copyin_data_fds(td, &args, uap->data, uap->datalen,
+	    uap->fds, uap->fdslen);
+	if (error == 0) {
+		args.fd = uap->fd;
+		error = kern_execve(td, &args, NULL);
+	}
+	return (error);
 }
 
 int

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c	Thu Jul 16 05:14:20 2015	(r285621)
+++ head/sys/kern/kern_descrip.c	Thu Jul 16 07:05:42 2015	(r285622)
@@ -1921,6 +1921,14 @@ fdunshare(struct thread *td)
 	p->p_fd = tmp;
 }
 
+void
+fdinstall_remapped(struct thread *td, struct filedesc *fdp)
+{
+
+	fdescfree(td);
+	td->td_proc->p_fd = fdp;
+}
+
 /*
  * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
  * this is to ease callers, not catch errors.
@@ -1960,6 +1968,65 @@ fdcopy(struct filedesc *fdp)
 }
 
 /*
+ * Copies a filedesc structure, while remapping all file descriptors
+ * stored inside using a translation table.
+ *
+ * File descriptors are copied over to the new file descriptor table,
+ * regardless of whether the close-on-exec flag is set.
+ */
+int
+fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
+    struct filedesc **ret)
+{
+	struct filedesc *newfdp;
+	struct filedescent *nfde, *ofde;
+	int error, i;
+
+	MPASS(fdp != NULL);
+
+	newfdp = fdinit(fdp, true);
+	if (nfds > fdp->fd_lastfile + 1) {
+		/* New table cannot be larger than the old one. */
+		error = E2BIG;
+		goto bad;
+	}
+	/* Copy all passable descriptors (i.e. not kqueue). */
+	newfdp->fd_freefile = nfds;
+	for (i = 0; i < nfds; ++i) {
+		if (fds[i] < 0 || fds[i] > fdp->fd_lastfile) {
+			/* File descriptor out of bounds. */
+			error = EBADF;
+			goto bad;
+		}
+		ofde = &fdp->fd_ofiles[fds[i]];
+		if (ofde->fde_file == NULL) {
+			/* Unused file descriptor. */
+			error = EBADF;
+			goto bad;
+		}
+		if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) {
+			/* File descriptor cannot be passed. */
+			error = EINVAL;
+			goto bad;
+		}
+		nfde = &newfdp->fd_ofiles[i];
+		*nfde = *ofde;
+		filecaps_copy(&ofde->fde_caps, &nfde->fde_caps);
+		fhold(nfde->fde_file);
+		fdused_init(newfdp, i);
+		newfdp->fd_lastfile = i;
+	}
+	newfdp->fd_cmask = fdp->fd_cmask;
+	FILEDESC_SUNLOCK(fdp);
+	*ret = newfdp;
+	return (0);
+bad:
+	FILEDESC_SUNLOCK(fdp);
+	fdescfree_remapped(newfdp);
+	return (error);
+}
+
+/*
  * Clear POSIX style locks. This is only used when fdp looses a reference (i.e.
  * one of processes using it exits) and the table used to be shared.
  */
@@ -2114,6 +2181,42 @@ fdescfree(struct thread *td)
 	fddrop(fdp);
 }
 
+void
+fdescfree_remapped(struct filedesc *fdp)
+{
+	struct filedesc0 *fdp0;
+	struct filedescent *fde;
+	struct file *fp;
+	struct freetable *ft, *tft;
+	int i;
+
+	for (i = 0; i <= fdp->fd_lastfile; i++) {
+		fde = &fdp->fd_ofiles[i];
+		fp = fde->fde_file;
+		if (fp != NULL) {
+			fdefree_last(fde);
+			(void) closef(fp, NULL);
+		}
+	}
+
+	if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
+		free(fdp->fd_map, M_FILEDESC);
+	if (fdp->fd_nfiles > NDFILE)
+		free(fdp->fd_files, M_FILEDESC);
+
+	fdp0 = (struct filedesc0 *)fdp;
+	SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft)
+		free(ft->ft_table, M_FILEDESC);
+
+	if (fdp->fd_cdir != NULL)
+		vrele(fdp->fd_cdir);
+	if (fdp->fd_rdir != NULL)
+		vrele(fdp->fd_rdir);
+	if (fdp->fd_jdir != NULL)
+		vrele(fdp->fd_jdir);
+	fddrop(fdp);
+}
+
 /*
  * For setugid programs, we don't want to people to use that setugidness
  * to generate error messages which write to a file which otherwise would

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c	Thu Jul 16 05:14:20 2015	(r285621)
+++ head/sys/kern/kern_exec.c	Thu Jul 16 07:05:42 2015	(r285622)
@@ -580,13 +580,20 @@ interpret:
 	else
 		suword(--stack_base, imgp->args->argc);
 
-	/*
-	 * For security and other reasons, the file descriptor table cannot
-	 * be shared after an exec.
-	 */
-	fdunshare(td);
-	/* close files on exec */
-	fdcloseexec(td);
+	if (args->fdp != NULL) {
+		/* Install a brand new file descriptor table. */
+		fdinstall_remapped(td, args->fdp);
+		args->fdp = NULL;
+	} else {
+		/*
+		 * Keep on using the existing file descriptor table. For
+		 * security and other reasons, the file descriptor table
+		 * cannot be shared after an exec.
+		 */
+		fdunshare(td);
+		/* close files on exec */
+		fdcloseexec(td);
+	}
 
 	/*
 	 * Malloc things before we need locks.
@@ -1197,6 +1204,71 @@ err_exit:
 	return (error);
 }
 
+int
+exec_copyin_data_fds(struct thread *td, struct image_args *args,
+    const void *data, size_t datalen, const int *fds, size_t fdslen)
+{
+	struct filedesc *ofdp;
+	const char *p;
+	int *kfds;
+	int error;
+
+	memset(args, '\0', sizeof(*args));
+	ofdp = td->td_proc->p_fd;
+	if (datalen >= ARG_MAX || fdslen > ofdp->fd_lastfile + 1)
+		return (E2BIG);
+	error = exec_alloc_args(args);
+	if (error != 0)
+		return (error);
+
+	args->begin_argv = args->buf;
+	args->stringspace = ARG_MAX;
+
+	if (datalen > 0) {
+		/*
+		 * Argument buffer has been provided. Copy it into the
+		 * kernel as a single string and add a terminating null
+		 * byte.
+		 */
+		error = copyin(data, args->begin_argv, datalen);
+		if (error != 0)
+			goto err_exit;
+		args->begin_argv[datalen] = '\0';
+		args->endp = args->begin_argv + datalen + 1;
+		args->stringspace -= datalen + 1;
+
+		/*
+		 * Traditional argument counting. Count the number of
+		 * null bytes.
+		 */
+		for (p = args->begin_argv; p < args->endp; ++p)
+			if (*p == '\0')
+				++args->argc;
+	} else {
+		/* No argument buffer provided. */
+		args->endp = args->begin_argv;
+	}
+	/* There are no environment variables. */
+	args->begin_envv = args->endp;
+
+	/* Create new file descriptor table. */
+	kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK);
+	error = copyin(fds, kfds, fdslen * sizeof(int));
+	if (error != 0) {
+		free(kfds, M_TEMP);
+		goto err_exit;
+	}
+	error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp);
+	free(kfds, M_TEMP);
+	if (error != 0)
+		goto err_exit;
+
+	return (0);
+err_exit:
+	exec_free_args(args);
+	return (error);
+}
+
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
  * argument, and environment strings.  Returns zero if the allocation succeeds
@@ -1223,6 +1295,8 @@ exec_free_args(struct image_args *args)
 		free(args->fname_buf, M_TEMP);
 		args->fname_buf = NULL;
 	}
+	if (args->fdp != NULL)
+		fdescfree_remapped(args->fdp);
 }
 
 /*

Modified: head/sys/sys/filedesc.h
==============================================================================
--- head/sys/sys/filedesc.h	Thu Jul 16 05:14:20 2015	(r285621)
+++ head/sys/sys/filedesc.h	Thu Jul 16 07:05:42 2015	(r285622)
@@ -170,8 +170,12 @@ void	fdclose(struct thread *td, struct f
 void	fdcloseexec(struct thread *td);
 void	fdsetugidsafety(struct thread *td);
 struct	filedesc *fdcopy(struct filedesc *fdp);
+int	fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
+	    struct filedesc **newfdp);
+void	fdinstall_remapped(struct thread *td, struct filedesc *fdp);
 void	fdunshare(struct thread *td);
 void	fdescfree(struct thread *td);
+void	fdescfree_remapped(struct filedesc *fdp);
 struct	filedesc *fdinit(struct filedesc *fdp, bool prepfiles);
 struct	filedesc *fdshare(struct filedesc *fdp);
 struct filedesc_to_leader *

Modified: head/sys/sys/imgact.h
==============================================================================
--- head/sys/sys/imgact.h	Thu Jul 16 05:14:20 2015	(r285621)
+++ head/sys/sys/imgact.h	Thu Jul 16 07:05:42 2015	(r285622)
@@ -49,6 +49,7 @@ struct image_args {
 	int argc;		/* count of argument strings */
 	int envc;		/* count of environment strings */
 	int fd;			/* file descriptor of the executable */
+	struct filedesc *fdp;	/* new file descriptor table */
 };
 
 struct image_params {
@@ -99,6 +100,8 @@ void	exec_setregs(struct thread *, struc
 int	exec_shell_imgact(struct image_params *);
 int	exec_copyin_args(struct image_args *, char *, enum uio_seg,
 	char **, char **);
+int	exec_copyin_data_fds(struct thread *, struct image_args *, const void *,
+	size_t, const int *, size_t);
 int	pre_execve(struct thread *td, struct vmspace **oldvmspace);
 void	post_execve(struct thread *td, int error, struct vmspace *oldvmspace);
 #endif


More information about the svn-src-all mailing list