PERFORCE change 171017 for review

Jonathan Anderson jona at FreeBSD.org
Wed Nov 25 15:14:55 UTC 2009


http://p4web.freebsd.org/chv.cgi?CH=171017

Change 171017 by jona at jona-capsicum-kent on 2009/11/25 15:14:52

	Add openat(2) in capability mode.
	
	openat(2) is now permitted in capability mode, subject to the
	constraint that the relative path must not "escape" the FD that
	the lookup is being conducted relative to. This results in EPERM
	when in capability mode (no change otherwise).
	
	openat(2) also now wraps the resulting FD with a capability if the
	directory FD was a capability. The rights of the new capability
	are identical to those of the original.

Affected files ...

.. //depot/projects/trustedbsd/capabilities/src/sys/amd64/conf/CAPABILITIES#2 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#22 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/kern/init_sysent.c#42 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/kern/kern_descrip.c#30 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#27 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_lookup.c#13 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_syscalls.c#18 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#26 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/sys/filedesc.h#4 edit
.. //depot/projects/trustedbsd/capabilities/src/sys/sys/namei.h#6 edit

Differences ...

==== //depot/projects/trustedbsd/capabilities/src/sys/amd64/conf/CAPABILITIES#2 (text+ko) ====

@@ -1,4 +1,9 @@
 include GENERIC
+
 options CAPABILITIES
 options PROCDESC
 options KDTRACE_HOOKS
+options WITNESS
+options KDB
+options DDB
+

==== //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#22 (text+ko) ====

@@ -38,7 +38,7 @@
 ## - sys_exit(2), abort2(2) and close(2) are very important.
 ## - Sorted alphabetically, please keep it that way.
 ##
-## $P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#21 $
+## $P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#22 $
 ##
 
 ##
@@ -453,6 +453,12 @@
 olio_listio
 
 ##
+## Allow openat(2), which we have constrained to prevent accessing files
+## which are not "under" the directory FD given to the syscall.
+##
+openat
+
+##
 ## Allow poll(2), which will be scoped by capability rights.
 ##
 ## XXXRW: Perhaps we don't need the OpenBSD version?

==== //depot/projects/trustedbsd/capabilities/src/sys/kern/init_sysent.c#42 (text+ko) ====

@@ -533,7 +533,7 @@
 	{ AS(mkdirat_args), (sy_call_t *)mkdirat, AUE_MKDIRAT, NULL, 0, 0, 0 },	/* 496 = mkdirat */
 	{ AS(mkfifoat_args), (sy_call_t *)mkfifoat, AUE_MKFIFOAT, NULL, 0, 0, 0 },	/* 497 = mkfifoat */
 	{ AS(mknodat_args), (sy_call_t *)mknodat, AUE_MKNODAT, NULL, 0, 0, 0 },	/* 498 = mknodat */
-	{ AS(openat_args), (sy_call_t *)openat, AUE_OPENAT_RWTC, NULL, 0, 0, 0 },	/* 499 = openat */
+	{ AS(openat_args), (sy_call_t *)openat, AUE_OPENAT_RWTC, NULL, 0, 0, SYF_CAPENABLED },	/* 499 = openat */
 	{ AS(readlinkat_args), (sy_call_t *)readlinkat, AUE_READLINKAT, NULL, 0, 0, 0 },	/* 500 = readlinkat */
 	{ AS(renameat_args), (sy_call_t *)renameat, AUE_RENAMEAT, NULL, 0, 0, 0 },	/* 501 = renameat */
 	{ AS(symlinkat_args), (sy_call_t *)symlinkat, AUE_SYMLINKAT, NULL, 0, 0, 0 },	/* 502 = symlinkat */

==== //depot/projects/trustedbsd/capabilities/src/sys/kern/kern_descrip.c#30 (text+ko) ====

@@ -1543,13 +1543,31 @@
 int
 falloc(struct thread *td, struct file **resultfp, int *resultfd)
 {
+	return _falloc(td, resultfp, resultfd, 1);
+}
+
+/*
+ * Create a new open file structure and, optionally, allocate a file decriptor
+ * for the process that refers to it.
+ */
+int
+_falloc(struct thread *td, struct file **resultfp, int *resultfd,
+        int addfd)
+{
 	struct proc *p = td->td_proc;
 	struct file *fp;
-	int error, i;
+	int error, i = -1;
 	int maxuserfiles = maxfiles - (maxfiles / 20);
 	static struct timeval lastfail;
 	static int curfail;
 
+	/*
+	 * Cowardly refuse to create a referenceless file: if we're not adding
+	 * the file to the process descriptor array, then the calling code
+	 * MUST expect a pointer to be returned.
+	 */
+	if (!addfd && !resultfp) return (error = EINVAL);
+
 	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
 	if ((openfiles >= maxuserfiles &&
 	    priv_check(td, PRIV_MAXFILES) != 0) ||
@@ -1561,14 +1579,16 @@
 		uma_zfree(file_zone, fp);
 		return (ENFILE);
 	}
-	atomic_add_int(&openfiles, 1);
+	if (addfd)
+		atomic_add_int(&openfiles, 1);
 
 	/*
+	 * If addfd:
 	 * If the process has file descriptor zero open, add the new file
 	 * descriptor to the list of open files at that point, otherwise
 	 * put it at the front of the list of open files.
 	 */
-	refcount_init(&fp->f_count, 1);
+	refcount_init(&fp->f_count, (addfd > 0));
 	if (resultfp)
 		fhold(fp);
 	fp->f_cred = crhold(td->td_ucred);
@@ -1577,16 +1597,20 @@
 	fp->f_vnode = NULL;
 	LIST_INIT(&fp->f_caps);
 	fp->f_capcount = 0;
-	FILEDESC_XLOCK(p->p_fd);
-	if ((error = fdalloc(td, 0, &i))) {
+
+	if (addfd) {
+		FILEDESC_XLOCK(p->p_fd);
+		if ((error = fdalloc(td, 0, &i))) {
+			FILEDESC_XUNLOCK(p->p_fd);
+			fdrop(fp, td);
+			if (resultfp)
+				fdrop(fp, td);
+			return (error);
+		}
+		p->p_fd->fd_ofiles[i] = fp;
 		FILEDESC_XUNLOCK(p->p_fd);
-		fdrop(fp, td);
-		if (resultfp)
-			fdrop(fp, td);
-		return (error);
 	}
-	p->p_fd->fd_ofiles[i] = fp;
-	FILEDESC_XUNLOCK(p->p_fd);
+
 	if (resultfp)
 		*resultfp = fp;
 	if (resultfd)

==== //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#27 (text+ko) ====

@@ -50,7 +50,7 @@
 #include "opt_capabilities.h"
 
 #include <sys/cdefs.h>
-__FBSDID("$P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#26 $");
+__FBSDID("$P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#27 $");
 
 #include <sys/param.h>
 #include <sys/capability.h>
@@ -278,28 +278,52 @@
 int
 cap_new(struct thread *td, struct cap_new_args *uap)
 {
-	struct capability *c, *c_old;
-	struct file *fp, *fp_cap, *fp_object;
-	int error, fd_cap;
+	int error, capfd;
+	int fd = uap->fd;
+	struct file *fp, *cap;
+	cap_rights_t rights = uap->rights;
 
-	AUDIT_ARG_FD(uap->fd);
-	AUDIT_ARG_RIGHTS(uap->rights);
-	if ((uap->rights | CAP_MASK_VALID) != CAP_MASK_VALID)
-		return (EINVAL);
+	AUDIT_ARG_FD(fd);
+	AUDIT_ARG_RIGHTS(rights);
 
-	c = uma_zalloc(capability_zone, M_WAITOK | M_ZERO);
-
 	/*
 	 * We always allow creating a capability referencing an existing
 	 * descriptor or capability, even if it's not of much use to the
 	 * application.
 	 */
-	error = fget(td, uap->fd, 0, &fp);
-	if (error)
-		goto fail;
+	error = fget(td, fd, 0, &fp);
+	if (error) return (error);
 
 	AUDIT_ARG_FILE(td->td_proc, fp);
 
+	error = kern_capwrap(td, fp, rights, &cap, &capfd);
+
+	/*
+	 * Release our reference to the file (another one has been taken for
+	 * the capability's sake if necessary).
+	 */
+	fdrop(fp, td);
+
+	return error;
+}
+
+
+/*
+ * Create a capability to wrap around an existing file.
+ */
+int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
+                 struct file **cap, int *capfd)
+{
+	struct capability *c, *c_old;
+	struct file *fp_object;
+	int error;
+
+	if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID)
+		return (EINVAL);
+
+	c = uma_zalloc(capability_zone, M_WAITOK | M_ZERO);
+
+
 	/*
 	 * If a new capability is being derived from an existing capability,
 	 * then the new capability rights must be a subset of the existing
@@ -307,18 +331,18 @@
 	 */
 	if (fp->f_type == DTYPE_CAPABILITY) {
 		c_old = fp->f_data;
-		if ((c_old->cap_rights | uap->rights) != c_old->cap_rights) {
+		if ((c_old->cap_rights | rights) != c_old->cap_rights) {
 			error = ENOTCAPABLE;
-			goto fail2;
+			goto fail;
 		}
 	}
 
 	/*
 	 * Allocate a new file descriptor to hang the capability off.
 	 */
-	error = falloc(td, &fp_cap, &fd_cap);
+	error = falloc(td, cap, capfd);
 	if (error)
-		goto fail2;
+		goto fail;
 
 	/*
 	 * Rather than nesting capabilities, directly reference the object an
@@ -332,10 +356,10 @@
 	else
 		fp_object = fp;
 	fhold(fp_object);
-	c->cap_rights = uap->rights;
+	c->cap_rights = rights;
 	c->cap_object = fp_object;
-	c->cap_file = fp_cap;
-	finit(fp_cap, fp->f_flag, DTYPE_CAPABILITY, c, &capability_ops);
+	c->cap_file = *cap;
+	finit(*cap, fp->f_flag, DTYPE_CAPABILITY, c, &capability_ops);
 
 	/*
 	 * Add this capability to the per-file list of referencing
@@ -345,13 +369,15 @@
 	LIST_INSERT_HEAD(&fp_object->f_caps, c, cap_filelist);
 	fp_object->f_capcount++;
 	mtx_pool_unlock(mtxpool_sleep, fp_object);
-	td->td_retval[0] = fd_cap;
-	fdrop(fp, td);
-	fdrop(fp_cap, td);
+	td->td_retval[0] = *capfd;
+
+	/*
+	 * Release our private reference (the proc filedesc still has one).
+	 */
+	fdrop(*cap, td);
+
 	return (0);
 
-fail2:
-	fdrop(fp, td);
 fail:
 	uma_zfree(capability_zone, c);
 	return (error);

==== //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_lookup.c#13 (text+ko) ====

@@ -140,9 +140,11 @@
 	int vfslocked;
 
 #ifdef KDB
-	if (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) {
+	if ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE)
+	    && (ndp->ni_dirfd == AT_FDCWD))
+	{
 		printf("namei: pid %d proc %s performed namei in capability "
-		    "mode\n", p->p_pid, p->p_comm);
+		    "mode (and it's not *at())\n", p->p_pid, p->p_comm);
 		kdb_backtrace();
 	}
 #endif
@@ -478,6 +480,7 @@
 	int dvfslocked;			/* VFS Giant state for parent */
 	int tvfslocked;
 	int lkflags_save;
+	int insidebasedir = 0;		/* we're under the *at() base */
 	
 	/*
 	 * Setup: break out flag bits into variables.
@@ -504,6 +507,11 @@
 		cnp->cn_lkflags = LK_SHARED;
 	else
 		cnp->cn_lkflags = LK_EXCLUSIVE;
+
+	/* we do not allow absolute lookups in capability mode */
+	if(ndp->ni_basedir && (ndp->ni_startdir == ndp->ni_rootdir))
+		return (error = EPERM);
+
 	dp = ndp->ni_startdir;
 	ndp->ni_startdir = NULLVP;
 	vn_lock(dp,
@@ -572,6 +580,11 @@
 		goto bad;
 	}
 
+
+	/* Check to see if we're at the *at directory */
+	if(dp == ndp->ni_basedir) insidebasedir = 1;
+
+
 	/*
 	 * Check for degenerate name (e.g. / or "")
 	 * which is a way of talking about a directory,
@@ -626,6 +639,13 @@
 			goto bad;
 		}
 		for (;;) {
+			/* attempting to wander out of the *at root */
+			if(dp == ndp->ni_basedir)
+			{
+				error = EPERM;
+				goto bad;
+			}
+
 			for (pr = cnp->cn_cred->cr_prison; pr != NULL;
 			     pr = pr->pr_parent)
 				if (dp == pr->pr_root)
@@ -886,6 +906,16 @@
 		VOP_UNLOCK(dp, 0);
 success:
 	/*
+	 * If we're in capability mode and the syscall was *at(), ensure
+	 * that the *at() base was part of the path
+	 */
+	if(ndp->ni_basedir && !insidebasedir)
+	{
+		error = EPERM;
+		goto bad;
+	}
+
+	/*
 	 * Because of lookup_shared we may have the vnode shared locked, but
 	 * the caller may want it to be exclusively locked.
 	 */

==== //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_syscalls.c#18 (text+ko) ====

@@ -1083,6 +1083,8 @@
 	return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
 }
 
+
+
 int
 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     int flags, int mode)
@@ -1090,7 +1092,7 @@
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
-	struct vnode *vp;
+	struct vnode *vp, *base = 0;
 	struct vattr vat;
 	struct mount *mp;
 	int cmode;
@@ -1099,6 +1101,7 @@
 	struct flock lf;
 	struct nameidata nd;
 	int vfslocked;
+	cap_rights_t baserights = -1;
 
 	AUDIT_ARG_FFLAGS(flags);
 	AUDIT_ARG_MODE(mode);
@@ -1115,16 +1118,69 @@
 	else
 		flags = FFLAGS(flags);
 
-	error = falloc(td, &nfp, &indx);
+	/* get capability info of base FD */
+	if (fd >= 0)
+	{
+		struct file *f;
+		const cap_rights_t LOOKUP_RIGHTS = CAP_LOOKUP | CAP_ATBASE;
+
+		FILEDESC_SLOCK(fdp);
+
+		error = fgetcap(td, fd, &f);
+		if (error == 0) {
+			/* FD is a capability; get rights and unwrap */
+			struct file *real_fp = NULL;
+
+			baserights = cap_rights(f);
+			error = cap_fextract(f, LOOKUP_RIGHTS, &real_fp);
+
+			/* hold the underlying file, not the capability */
+			if (error == 0) fhold(real_fp);
+			fdrop(f, td);
+
+			f = real_fp;
+		}
+		else if (error == EINVAL)
+			/* not a capability; get the real file pointer */
+			error = fget(td, fd, LOOKUP_RIGHTS, &f);
+
+
+
+		/* if in capability mode, get base vnode (for namei) */
+		if (!error && (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE)) {
+			base = f->f_vnode;
+			vref(base);
+		}
+
+
+		/* don't need to hold the base any more */
+		if (f != NULL) fdrop(f, td);
+
+		if (error) {
+			FILEDESC_SUNLOCK(fdp);
+			return (error);
+		}
+		else
+			FILEDESC_SUNLOCK(fdp);
+	}
+
+
+	/*
+	 * allocate the file descriptor, but only add it to the descriptor
+	 * array if fd isn't a capability (in which case we'll add the
+	 * capability instead, later)
+	 */
+	error = _falloc(td, &nfp, &indx, (baserights == -1));
 	if (error)
 		return (error);
-	/* An extra reference on `nfp' has been held for us by falloc(). */
+
+	/* An extra reference on `nfp' has been held for us by _falloc(). */
 	fp = nfp;
 	/* Set the flags early so the finit in devfs can pick them up. */
 	fp->f_flag = flags & FMASK;
 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
-	NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
-	    td);
+	NDINIT_ATBASE(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
+	              path, fd, base, td);
 	td->td_dupfd = -1;		/* XXX check for fdopen */
 	error = vn_open(&nd, &flags, cmode, fp);
 	if (error) {
@@ -1133,11 +1189,8 @@
 		 * wonderous happened deep below and we just pass it up
 		 * pretending we know what we do.
 		 */
-		if (error == ENXIO && fp->f_ops != &badfileops) {
-			fdrop(fp, td);
-			td->td_retval[0] = indx;
-			return (0);
-		}
+		if (error == ENXIO && fp->f_ops != &badfileops)
+			goto success;
 
 		/*
 		 * handle special fdopen() case.  bleh.  dupfdopen() is
@@ -1147,15 +1200,14 @@
 		if ((error == ENODEV || error == ENXIO) &&
 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
 		    (error =
-			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
-			td->td_retval[0] = indx;
-			fdrop(fp, td);
-			return (0);
-		}
+			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0)
+			goto success;
+
 		/*
 		 * Clean up the descriptor, but only if another thread hadn't
 		 * replaced or closed it.
 		 */
+		if (base) vrele(base);
 		fdclose(fdp, fp, indx, td);
 		fdrop(fp, td);
 
@@ -1213,15 +1265,28 @@
 			goto bad;
 	}
 	VFS_UNLOCK_GIANT(vfslocked);
+
+success:
+	if (baserights != -1) {
+		/* wrap the result in a capability */
+		struct file *cap;
+
+		error = kern_capwrap(td, fp, baserights, &cap, &indx);
+		if (error) goto bad_unlocked;
+	}
+
 	/*
 	 * Release our private reference, leaving the one associated with
 	 * the descriptor table intact.
 	 */
+	if (base) vrele(base);
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (0);
 bad:
 	VFS_UNLOCK_GIANT(vfslocked);
+bad_unlocked:
+	if (base) vrele(base);
 	fdclose(fdp, fp, indx, td);
 	fdrop(fp, td);
 	return (error);

==== //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#26 (text+ko) ====

@@ -30,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $P4: //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#25 $
+ * $P4: //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#26 $
  */
 
 /*
@@ -96,7 +96,8 @@
 #define	CAP_TTYHOOK		0x0001000000000000ULL	/* register tty hook */
 #define	CAP_FCHDIR		0x0002000000000000ULL	/* fchdir(2) */
 #define	CAP_FSCK		0x0004000000000000ULL	/* sysctl_ffs_fsck */
-#define	CAP_MASK_VALID		0x0007ffffffffffffULL
+#define	CAP_ATBASE		0x0008000000000000ULL	/* openat(2), etc. */
+#define	CAP_MASK_VALID		0x000fffffffffffffULL
 
 /*
  * Notes:
@@ -138,6 +139,13 @@
 
 #ifdef _KERNEL
 struct file;
+struct thread;
+
+/*
+ * Create a capability to wrap a file object.
+ */
+int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
+                 struct file **cap, int *capfd);
 
 /*
  * Given a file descriptor that may be a capability, check the requested

==== //depot/projects/trustedbsd/capabilities/src/sys/sys/filedesc.h#4 (text+ko) ====

@@ -112,6 +112,8 @@
 int	dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd,
 	    int mode, int error);
 int	falloc(struct thread *td, struct file **resultfp, int *resultfd);
+int	_falloc(struct thread *td, struct file **resultfp, int *resultfd,
+	        int addfd);
 int	fdalloc(struct thread *td, int minfd, int *result);
 int	fdavail(struct thread *td, int n);
 int	fdcheckstd(struct thread *td);

==== //depot/projects/trustedbsd/capabilities/src/sys/sys/namei.h#6 (text+ko) ====

@@ -70,6 +70,7 @@
 	struct	vnode *ni_rootdir;	/* logical root directory */
 	struct	vnode *ni_topdir;	/* logical top directory */
 	int	ni_dirfd;		/* starting directory for *at functions */
+	struct	vnode *ni_basedir;	/* root for capability-mode *at */
 	/*
 	 * Results: returned from/manipulated by lookup
 	 */
@@ -151,11 +152,13 @@
  * Initialization of a nameidata structure.
  */
 #define	NDINIT(ndp, op, flags, segflg, namep, td)			\
-	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, td)
-#define	NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td)		\
-	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, td)
-#define	NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td)		\
-	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, td)
+	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, NULL, td)
+#define	NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td)	\
+	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, NULL, td)
+#define	NDINIT_ATBASE(ndp, op, flags, segflg, namep, dirfd, base, td)	\
+	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, base, td)
+#define	NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td)	\
+	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, NULL, td)
 
 static __inline void
 NDINIT_ALL(struct nameidata *ndp,
@@ -164,6 +167,7 @@
 	const char *namep,
 	int dirfd,
 	struct vnode *startdir,
+	struct vnode *basedir,
 	struct thread *td)
 {
 	ndp->ni_cnd.cn_nameiop = op;
@@ -172,6 +176,7 @@
 	ndp->ni_dirp = namep;
 	ndp->ni_dirfd = dirfd;
 	ndp->ni_startdir = startdir;
+	ndp->ni_basedir = basedir;
 	ndp->ni_cnd.cn_thread = td;
 }
 


More information about the p4-projects mailing list