svn commit: r358503 - in head: lib/libprocstat sys/compat/linprocfs sys/kern sys/security/audit sys/sys sys/ufs/ffs

Mateusz Guzik mjg at FreeBSD.org
Sun Mar 1 21:53:49 UTC 2020


Author: mjg
Date: Sun Mar  1 21:53:46 2020
New Revision: 358503
URL: https://svnweb.freebsd.org/changeset/base/358503

Log:
  fd: move vnodes out of filedesc into a dedicated structure
  
  The new structure is copy-on-write. With the assumption that path lookups are
  significantly more frequent than chdirs and chrooting this is a win.
  
  This provides stable root and jail root vnodes without the need to reference
  them on lookup, which in turn means less work on globally shared structures.
  Note this also happens to fix a bug where jail vnode was never referenced,
  meaning subsequent access on lookup could run into use-after-free.
  
  Reviewed by:	kib
  Differential Revision:	https://reviews.freebsd.org/D23884

Modified:
  head/lib/libprocstat/libprocstat.c
  head/sys/compat/linprocfs/linprocfs.c
  head/sys/kern/kern_descrip.c
  head/sys/kern/kern_linker.c
  head/sys/kern/vfs_cache.c
  head/sys/kern/vfs_lookup.c
  head/sys/kern/vfs_mountroot.c
  head/sys/security/audit/audit_bsm_klib.c
  head/sys/sys/filedesc.h
  head/sys/ufs/ffs/ffs_alloc.c

Modified: head/lib/libprocstat/libprocstat.c
==============================================================================
--- head/lib/libprocstat/libprocstat.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/lib/libprocstat/libprocstat.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -459,6 +459,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struc
 {
 	struct file file;
 	struct filedesc filed;
+	struct pwd pwd;
 	struct vm_map_entry vmentry;
 	struct vm_object object;
 	struct vmspace vmspace;
@@ -473,6 +474,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struc
 	int i, fflags;
 	int prot, type;
 	unsigned int nfiles;
+	bool haspwd;
 
 	assert(procstat);
 	kd = procstat->kd;
@@ -485,6 +487,15 @@ procstat_getfiles_kvm(struct procstat *procstat, struc
 		warnx("can't read filedesc at %p", (void *)kp->ki_fd);
 		return (NULL);
 	}
+	haspwd = false;
+	if (filed.fd_pwd != NULL) {
+		if (!kvm_read_all(kd, (unsigned long)filed.fd_pwd, &pwd,
+		    sizeof(pwd))) {
+			warnx("can't read fd_pwd at %p", (void *)filed.fd_pwd);
+			return (NULL);
+		}
+		haspwd = true;
+	}
 
 	/*
 	 * Allocate list head.
@@ -495,25 +506,27 @@ procstat_getfiles_kvm(struct procstat *procstat, struc
 	STAILQ_INIT(head);
 
 	/* root directory vnode, if one. */
-	if (filed.fd_rdir) {
-		entry = filestat_new_entry(filed.fd_rdir, PS_FST_TYPE_VNODE, -1,
-		    PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL);
-		if (entry != NULL)
-			STAILQ_INSERT_TAIL(head, entry, next);
-	}
-	/* current working directory vnode. */
-	if (filed.fd_cdir) {
-		entry = filestat_new_entry(filed.fd_cdir, PS_FST_TYPE_VNODE, -1,
-		    PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL);
-		if (entry != NULL)
-			STAILQ_INSERT_TAIL(head, entry, next);
-	}
-	/* jail root, if any. */
-	if (filed.fd_jdir) {
-		entry = filestat_new_entry(filed.fd_jdir, PS_FST_TYPE_VNODE, -1,
-		    PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL);
-		if (entry != NULL)
-			STAILQ_INSERT_TAIL(head, entry, next);
+	if (haspwd) {
+		if (pwd.pwd_rdir) {
+			entry = filestat_new_entry(pwd.pwd_rdir, PS_FST_TYPE_VNODE, -1,
+			    PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL);
+			if (entry != NULL)
+				STAILQ_INSERT_TAIL(head, entry, next);
+		}
+		/* current working directory vnode. */
+		if (pwd.pwd_cdir) {
+			entry = filestat_new_entry(pwd.pwd_cdir, PS_FST_TYPE_VNODE, -1,
+			    PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL);
+			if (entry != NULL)
+				STAILQ_INSERT_TAIL(head, entry, next);
+		}
+		/* jail root, if any. */
+		if (pwd.pwd_jdir) {
+			entry = filestat_new_entry(pwd.pwd_jdir, PS_FST_TYPE_VNODE, -1,
+			    PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL);
+			if (entry != NULL)
+				STAILQ_INSERT_TAIL(head, entry, next);
+		}
 	}
 	/* ktrace vnode, if one */
 	if (kp->ki_tracep) {

Modified: head/sys/compat/linprocfs/linprocfs.c
==============================================================================
--- head/sys/compat/linprocfs/linprocfs.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/compat/linprocfs/linprocfs.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -1028,23 +1028,16 @@ linprocfs_doprocstatus(PFS_FILL_ARGS)
 static int
 linprocfs_doproccwd(PFS_FILL_ARGS)
 {
-	struct filedesc *fdp;
-	struct vnode *vp;
+	struct pwd *pwd;
 	char *fullpath = "unknown";
 	char *freepath = NULL;
 
-	fdp = p->p_fd;
-	FILEDESC_SLOCK(fdp);
-	vp = fdp->fd_cdir;
-	if (vp != NULL)
-		VREF(vp);
-	FILEDESC_SUNLOCK(fdp);
-	vn_fullpath(td, vp, &fullpath, &freepath);
-	if (vp != NULL)
-		vrele(vp);
+	pwd = pwd_hold(td);
+	vn_fullpath(td, pwd->pwd_cdir, &fullpath, &freepath);
 	sbuf_printf(sb, "%s", fullpath);
 	if (freepath)
 		free(freepath, M_TEMP);
+	pwd_drop(pwd);
 	return (0);
 }
 
@@ -1054,23 +1047,18 @@ linprocfs_doproccwd(PFS_FILL_ARGS)
 static int
 linprocfs_doprocroot(PFS_FILL_ARGS)
 {
-	struct filedesc *fdp;
+	struct pwd *pwd;
 	struct vnode *vp;
 	char *fullpath = "unknown";
 	char *freepath = NULL;
 
-	fdp = p->p_fd;
-	FILEDESC_SLOCK(fdp);
-	vp = jailed(p->p_ucred) ? fdp->fd_jdir : fdp->fd_rdir;
-	if (vp != NULL)
-		VREF(vp);
-	FILEDESC_SUNLOCK(fdp);
+	pwd = pwd_hold(td);
+	vp = jailed(p->p_ucred) ? pwd->pwd_jdir : pwd->pwd_rdir;
 	vn_fullpath(td, vp, &fullpath, &freepath);
-	if (vp != NULL)
-		vrele(vp);
 	sbuf_printf(sb, "%s", fullpath);
 	if (freepath)
 		free(freepath, M_TEMP);
+	pwd_drop(pwd);
 	return (0);
 }
 

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/kern/kern_descrip.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
 #include <ddb/ddb.h>
 
 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
+static MALLOC_DEFINE(M_PWD, "pwd", "Descriptor table vnodes");
 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
     "file desc to leader structures");
 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
@@ -116,6 +117,8 @@ static void	filecaps_copy_finish(const struct filecaps
 static u_long 	*filecaps_free_prep(struct filecaps *fcaps);
 static void	filecaps_free_finish(u_long *ioctls);
 
+static struct pwd *pwd_alloc(void);
+
 /*
  * Each process has:
  *
@@ -314,24 +317,6 @@ fdfree(struct filedesc *fdp, int fd)
 	fdunused(fdp, fd);
 }
 
-void
-pwd_ensure_dirs(void)
-{
-	struct filedesc *fdp;
-
-	fdp = curproc->p_fd;
-	FILEDESC_XLOCK(fdp);
-	if (fdp->fd_cdir == NULL) {
-		fdp->fd_cdir = rootvnode;
-		vrefact(rootvnode);
-	}
-	if (fdp->fd_rdir == NULL) {
-		fdp->fd_rdir = rootvnode;
-		vrefact(rootvnode);
-	}
-	FILEDESC_XUNLOCK(fdp);
-}
-
 /*
  * System calls on descriptors.
  */
@@ -2014,22 +1999,16 @@ fdinit(struct filedesc *fdp, bool prepfiles)
 	newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles;
 	newfdp->fd_files->fdt_nfiles = NDFILE;
 
-	if (fdp == NULL)
+	if (fdp == NULL) {
+		newfdp->fd_pwd = pwd_alloc();
 		return (newfdp);
+	}
 
 	if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles)
 		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 
 	FILEDESC_SLOCK(fdp);
-	newfdp->fd_cdir = fdp->fd_cdir;
-	if (newfdp->fd_cdir)
-		vrefact(newfdp->fd_cdir);
-	newfdp->fd_rdir = fdp->fd_rdir;
-	if (newfdp->fd_rdir)
-		vrefact(newfdp->fd_rdir);
-	newfdp->fd_jdir = fdp->fd_jdir;
-	if (newfdp->fd_jdir)
-		vrefact(newfdp->fd_jdir);
+	newfdp->fd_pwd = pwd_hold_filedesc(fdp);
 
 	if (!prepfiles) {
 		FILEDESC_SUNLOCK(fdp);
@@ -2327,7 +2306,7 @@ fdescfree(struct thread *td)
 {
 	struct proc *p;
 	struct filedesc *fdp;
-	struct vnode *cdir, *jdir, *rdir;
+	struct pwd *pwd;
 
 	p = td->td_proc;
 	fdp = p->p_fd;
@@ -2349,20 +2328,11 @@ fdescfree(struct thread *td)
 		return;
 
 	FILEDESC_XLOCK(fdp);
-	cdir = fdp->fd_cdir;
-	fdp->fd_cdir = NULL;
-	rdir = fdp->fd_rdir;
-	fdp->fd_rdir = NULL;
-	jdir = fdp->fd_jdir;
-	fdp->fd_jdir = NULL;
+	pwd = fdp->fd_pwd;
+	pwd_set(fdp, NULL);
 	FILEDESC_XUNLOCK(fdp);
 
-	if (cdir != NULL)
-		vrele(cdir);
-	if (rdir != NULL)
-		vrele(rdir);
-	if (jdir != NULL)
-		vrele(jdir);
+	pwd_drop(pwd);
 
 	fdescfree_fds(td, fdp, 1);
 }
@@ -2371,13 +2341,7 @@ void
 fdescfree_remapped(struct filedesc *fdp)
 {
 
-	if (fdp->fd_cdir != NULL)
-		vrele(fdp->fd_cdir);
-	if (fdp->fd_rdir != NULL)
-		vrele(fdp->fd_rdir);
-	if (fdp->fd_jdir != NULL)
-		vrele(fdp->fd_jdir);
-
+	pwd_drop(fdp->fd_pwd);
 	fdescfree_fds(curthread, fdp, 0);
 }
 
@@ -3287,37 +3251,117 @@ chroot_refuse_vdir_fds(struct filedesc *fdp)
 	return (0);
 }
 
+static void
+pwd_fill(struct pwd *oldpwd, struct pwd *newpwd)
+{
+
+	if (newpwd->pwd_cdir == NULL && oldpwd->pwd_cdir != NULL) {
+		vrefact(oldpwd->pwd_cdir);
+		newpwd->pwd_cdir = oldpwd->pwd_cdir;
+	}
+
+	if (newpwd->pwd_rdir == NULL && oldpwd->pwd_rdir != NULL) {
+		vrefact(oldpwd->pwd_rdir);
+		newpwd->pwd_rdir = oldpwd->pwd_rdir;
+	}
+
+	if (newpwd->pwd_jdir == NULL && oldpwd->pwd_jdir != NULL) {
+		vrefact(oldpwd->pwd_jdir);
+		newpwd->pwd_jdir = oldpwd->pwd_jdir;
+	}
+}
+
+struct pwd *
+pwd_hold_filedesc(struct filedesc *fdp)
+{
+	struct pwd *pwd;
+
+	FILEDESC_LOCK_ASSERT(fdp);
+	pwd = fdp->fd_pwd;
+	if (pwd != NULL)
+		refcount_acquire(&pwd->pwd_refcount);
+	return (pwd);
+}
+
+struct pwd *
+pwd_hold(struct thread *td)
+{
+	struct filedesc *fdp;
+	struct pwd *pwd;
+
+	fdp = td->td_proc->p_fd;
+
+	FILEDESC_SLOCK(fdp);
+	pwd = fdp->fd_pwd;
+	MPASS(pwd != NULL);
+	refcount_acquire(&pwd->pwd_refcount);
+	FILEDESC_SUNLOCK(fdp);
+	return (pwd);
+}
+
+static struct pwd *
+pwd_alloc(void)
+{
+	struct pwd *pwd;
+
+	pwd = malloc(sizeof(*pwd), M_PWD, M_WAITOK | M_ZERO);
+	refcount_init(&pwd->pwd_refcount, 1);
+	return (pwd);
+}
+
+void
+pwd_drop(struct pwd *pwd)
+{
+
+	if (!refcount_release(&pwd->pwd_refcount))
+		return;
+
+	if (pwd->pwd_cdir != NULL)
+		vrele(pwd->pwd_cdir);
+	if (pwd->pwd_rdir != NULL)
+		vrele(pwd->pwd_rdir);
+	if (pwd->pwd_jdir != NULL)
+		vrele(pwd->pwd_jdir);
+	free(pwd, M_PWD);
+}
+
 /*
- * Common routine for kern_chroot() and jail_attach().  The caller is
- * responsible for invoking priv_check() and mac_vnode_check_chroot() to
- * authorize this operation.
- */
+* Common routine for kern_chroot() and jail_attach().  The caller is
+* responsible for invoking priv_check() and mac_vnode_check_chroot() to
+* authorize this operation.
+*/
 int
 pwd_chroot(struct thread *td, struct vnode *vp)
 {
 	struct filedesc *fdp;
-	struct vnode *oldvp;
+	struct pwd *newpwd, *oldpwd;
 	int error;
 
 	fdp = td->td_proc->p_fd;
+	newpwd = pwd_alloc();
 	FILEDESC_XLOCK(fdp);
+	oldpwd = fdp->fd_pwd;
 	if (chroot_allow_open_directories == 0 ||
-	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
+	    (chroot_allow_open_directories == 1 &&
+	    oldpwd->pwd_rdir != rootvnode)) {
 		error = chroot_refuse_vdir_fds(fdp);
 		if (error != 0) {
 			FILEDESC_XUNLOCK(fdp);
+			pwd_drop(newpwd);
 			return (error);
 		}
 	}
-	oldvp = fdp->fd_rdir;
+
 	vrefact(vp);
-	fdp->fd_rdir = vp;
-	if (fdp->fd_jdir == NULL) {
+	newpwd->pwd_rdir = vp;
+	if (oldpwd->pwd_jdir == NULL) {
 		vrefact(vp);
-		fdp->fd_jdir = vp;
+		newpwd->pwd_jdir = vp;
 	}
+	pwd_fill(oldpwd, newpwd);
+	pwd_set(fdp, newpwd);
 	FILEDESC_XUNLOCK(fdp);
-	vrele(oldvp);
+	pwd_drop(oldpwd);
 	return (0);
 }
 
@@ -3325,18 +3369,53 @@ void
 pwd_chdir(struct thread *td, struct vnode *vp)
 {
 	struct filedesc *fdp;
-	struct vnode *oldvp;
+	struct pwd *newpwd, *oldpwd;
 
+	VNPASS(vp->v_usecount > 0, vp);
+
+	newpwd = pwd_alloc();
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
-	VNASSERT(vp->v_usecount > 0, vp,
-	    ("chdir to a vnode with zero usecount"));
-	oldvp = fdp->fd_cdir;
-	fdp->fd_cdir = vp;
+	oldpwd = fdp->fd_pwd;
+	newpwd->pwd_cdir = vp;
+	pwd_fill(oldpwd, newpwd);
+	pwd_set(fdp, newpwd);
 	FILEDESC_XUNLOCK(fdp);
-	vrele(oldvp);
+	pwd_drop(oldpwd);
 }
 
+void
+pwd_ensure_dirs(void)
+{
+	struct filedesc *fdp;
+	struct pwd *oldpwd, *newpwd;
+
+	fdp = curproc->p_fd;
+	FILEDESC_XLOCK(fdp);
+	oldpwd = fdp->fd_pwd;
+	if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) {
+		FILEDESC_XUNLOCK(fdp);
+		return;
+	}
+	FILEDESC_XUNLOCK(fdp);
+
+	newpwd = pwd_alloc();
+	FILEDESC_XLOCK(fdp);
+	oldpwd = fdp->fd_pwd;
+	pwd_fill(oldpwd, newpwd);
+	if (newpwd->pwd_cdir == NULL) {
+		vrefact(rootvnode);
+		newpwd->pwd_cdir = rootvnode;
+	}
+	if (newpwd->pwd_rdir == NULL) {
+		vrefact(rootvnode);
+		newpwd->pwd_rdir = rootvnode;
+	}
+	pwd_set(fdp, newpwd);
+	FILEDESC_XUNLOCK(fdp);
+	pwd_drop(oldpwd);
+}
+
 /*
  * Scan all active processes and prisons to see if any of them have a current
  * or root directory of `olddp'. If so, replace them with the new mount point.
@@ -3345,6 +3424,7 @@ void
 mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
 {
 	struct filedesc *fdp;
+	struct pwd *newpwd, *oldpwd;
 	struct prison *pr;
 	struct proc *p;
 	int nrele;
@@ -3352,6 +3432,7 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newd
 	if (vrefcnt(olddp) == 1)
 		return;
 	nrele = 0;
+	newpwd = pwd_alloc();
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
@@ -3360,25 +3441,36 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newd
 		if (fdp == NULL)
 			continue;
 		FILEDESC_XLOCK(fdp);
-		if (fdp->fd_cdir == olddp) {
+		oldpwd = fdp->fd_pwd;
+		if (oldpwd == NULL ||
+		    (oldpwd->pwd_cdir != olddp &&
+		    oldpwd->pwd_rdir != olddp &&
+		    oldpwd->pwd_jdir != olddp)) {
+			FILEDESC_XUNLOCK(fdp);
+			fddrop(fdp);
+			continue;
+		}
+		if (oldpwd->pwd_cdir == olddp) {
 			vrefact(newdp);
-			fdp->fd_cdir = newdp;
-			nrele++;
+			newpwd->pwd_cdir = newdp;
 		}
-		if (fdp->fd_rdir == olddp) {
+		if (oldpwd->pwd_rdir == olddp) {
 			vrefact(newdp);
-			fdp->fd_rdir = newdp;
-			nrele++;
+			newpwd->pwd_rdir = newdp;
 		}
-		if (fdp->fd_jdir == olddp) {
+		if (oldpwd->pwd_jdir == olddp) {
 			vrefact(newdp);
-			fdp->fd_jdir = newdp;
-			nrele++;
+			newpwd->pwd_jdir = newdp;
 		}
+		pwd_fill(oldpwd, newpwd);
+		pwd_set(fdp, newpwd);
 		FILEDESC_XUNLOCK(fdp);
+		pwd_drop(oldpwd);
 		fddrop(fdp);
+		newpwd = pwd_alloc();
 	}
 	sx_sunlock(&allproc_lock);
+	pwd_drop(newpwd);
 	if (rootvnode == olddp) {
 		vrefact(newdp);
 		rootvnode = newdp;
@@ -3714,6 +3806,7 @@ kern_proc_filedesc_out(struct proc *p,  struct sbuf *s
 	struct filedesc *fdp;
 	struct export_fd_buf *efbuf;
 	struct vnode *cttyvp, *textvp, *tracevp;
+	struct pwd *pwd;
 	int error, i;
 	cap_rights_t rights;
 
@@ -3754,21 +3847,25 @@ kern_proc_filedesc_out(struct proc *p,  struct sbuf *s
 		goto fail;
 	efbuf->fdp = fdp;
 	FILEDESC_SLOCK(fdp);
-	/* working directory */
-	if (fdp->fd_cdir != NULL) {
-		vrefact(fdp->fd_cdir);
-		export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
+	pwd = pwd_hold_filedesc(fdp);
+	if (pwd != NULL) {
+		/* working directory */
+		if (pwd->pwd_cdir != NULL) {
+			vrefact(pwd->pwd_cdir);
+			export_vnode_to_sb(pwd->pwd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
+		}
+		/* root directory */
+		if (pwd->pwd_rdir != NULL) {
+			vrefact(pwd->pwd_rdir);
+			export_vnode_to_sb(pwd->pwd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf);
+		}
+		/* jail directory */
+		if (pwd->pwd_jdir != NULL) {
+			vrefact(pwd->pwd_jdir);
+			export_vnode_to_sb(pwd->pwd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf);
+		}
+		pwd_drop(pwd);
 	}
-	/* root directory */
-	if (fdp->fd_rdir != NULL) {
-		vrefact(fdp->fd_rdir);
-		export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf);
-	}
-	/* jail directory */
-	if (fdp->fd_jdir != NULL) {
-		vrefact(fdp->fd_jdir);
-		export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf);
-	}
 	for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 		if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 			continue;
@@ -3882,6 +3979,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
 	struct kinfo_ofile *okif;
 	struct kinfo_file *kif;
 	struct filedesc *fdp;
+	struct pwd *pwd;
 	int error, i, *name;
 	struct file *fp;
 	struct proc *p;
@@ -3897,15 +3995,19 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
 	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
 	okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK);
 	FILEDESC_SLOCK(fdp);
-	if (fdp->fd_cdir != NULL)
-		export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
-		    okif, fdp, req);
-	if (fdp->fd_rdir != NULL)
-		export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
-		    okif, fdp, req);
-	if (fdp->fd_jdir != NULL)
-		export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
-		    okif, fdp, req);
+	pwd = pwd_hold_filedesc(fdp);
+	if (pwd != NULL) {
+		if (pwd->pwd_cdir != NULL)
+			export_vnode_for_osysctl(pwd->pwd_cdir, KF_FD_TYPE_CWD, kif,
+			    okif, fdp, req);
+		if (pwd->pwd_rdir != NULL)
+			export_vnode_for_osysctl(pwd->pwd_rdir, KF_FD_TYPE_ROOT, kif,
+			    okif, fdp, req);
+		if (pwd->pwd_jdir != NULL)
+			export_vnode_for_osysctl(pwd->pwd_jdir, KF_FD_TYPE_JAIL, kif,
+			    okif, fdp, req);
+		pwd_drop(pwd);
+	}
 	for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 		if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 			continue;
@@ -3973,6 +4075,7 @@ kern_proc_cwd_out(struct proc *p,  struct sbuf *sb, ss
 {
 	struct filedesc *fdp;
 	struct export_fd_buf *efbuf;
+	struct vnode *cdir;
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
@@ -3988,12 +4091,12 @@ kern_proc_cwd_out(struct proc *p,  struct sbuf *sb, ss
 	efbuf->remainder = maxlen;
 
 	FILEDESC_SLOCK(fdp);
-	if (fdp->fd_cdir == NULL)
+	cdir = fdp->fd_pwd->pwd_cdir;
+	if (cdir == NULL) {
 		error = EINVAL;
-	else {
-		vrefact(fdp->fd_cdir);
-		error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD,
-		    FREAD, efbuf);
+	} else {
+		vrefact(cdir);
+		error = export_vnode_to_sb(cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
 	}
 	FILEDESC_SUNLOCK(fdp);
 	fddrop(fdp);

Modified: head/sys/kern/kern_linker.c
==============================================================================
--- head/sys/kern/kern_linker.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/kern/kern_linker.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -2085,14 +2085,14 @@ linker_load_module(const char *kldname, const char *mo
 		KASSERT(verinfo == NULL, ("linker_load_module: verinfo"
 		    " is not NULL"));
 		/* check if root file system is not mounted */
-		if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL)
+		if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL)
 			return (ENXIO);
 		pathname = linker_search_kld(kldname);
 	} else {
 		if (modlist_lookup2(modname, verinfo) != NULL)
 			return (EEXIST);
 		/* check if root file system is not mounted */
-		if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL)
+		if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL)
 			return (ENXIO);
 		if (kldname != NULL)
 			pathname = strdup(kldname, M_LINKER);

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/kern/vfs_cache.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -2196,20 +2196,12 @@ sys___getcwd(struct thread *td, struct __getcwd_args *
 int
 vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen)
 {
-	struct filedesc *fdp;
-	struct vnode *cdir, *rdir;
+	struct pwd *pwd;
 	int error;
 
-	fdp = td->td_proc->p_fd;
-	FILEDESC_SLOCK(fdp);
-	cdir = fdp->fd_cdir;
-	vrefact(cdir);
-	rdir = fdp->fd_rdir;
-	vrefact(rdir);
-	FILEDESC_SUNLOCK(fdp);
-	error = vn_fullpath_any(td, cdir, rdir, buf, retbuf, buflen);
-	vrele(rdir);
-	vrele(cdir);
+	pwd = pwd_hold(td);
+	error = vn_fullpath_any(td, pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf, buflen);
+	pwd_drop(pwd);
 
 #ifdef KTRACE
 	if (KTRPOINT(curthread, KTR_NAMEI) && error == 0)
@@ -2256,9 +2248,8 @@ sys___realpathat(struct thread *td, struct __realpatha
 int
 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
 {
+	struct pwd *pwd;
 	char *buf;
-	struct filedesc *fdp;
-	struct vnode *rdir;
 	size_t buflen;
 	int error;
 
@@ -2267,13 +2258,9 @@ vn_fullpath(struct thread *td, struct vnode *vn, char 
 
 	buflen = MAXPATHLEN;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
-	fdp = td->td_proc->p_fd;
-	FILEDESC_SLOCK(fdp);
-	rdir = fdp->fd_rdir;
-	vrefact(rdir);
-	FILEDESC_SUNLOCK(fdp);
-	error = vn_fullpath_any(td, vn, rdir, buf, retbuf, &buflen);
-	vrele(rdir);
+	pwd = pwd_hold(td);
+	error = vn_fullpath_any(td, vn, pwd->pwd_rdir, buf, retbuf, &buflen);
+	pwd_drop(pwd);
 
 	if (!error)
 		*freebuf = buf;
@@ -2541,8 +2528,7 @@ vn_fullpath_hardlink(struct thread *td, struct nameida
     char **freebuf, size_t *buflen)
 {
 	char *buf, *tmpbuf;
-	struct filedesc *fdp;
-	struct vnode *rdir;
+	struct pwd *pwd;
 	struct componentname *cnp;
 	struct vnode *vp;
 	size_t addend;
@@ -2557,11 +2543,7 @@ vn_fullpath_hardlink(struct thread *td, struct nameida
 	slash_prefixed = false;
 
 	buf = malloc(*buflen, M_TEMP, M_WAITOK);
-	fdp = td->td_proc->p_fd;
-	FILEDESC_SLOCK(fdp);
-	rdir = fdp->fd_rdir;
-	vrefact(rdir);
-	FILEDESC_SUNLOCK(fdp);
+	pwd = pwd_hold(td);
 
 	addend = 0;
 	vp = ndp->ni_vp;
@@ -2582,16 +2564,17 @@ vn_fullpath_hardlink(struct thread *td, struct nameida
 	}
 
 	vref(vp);
-	error = vn_fullpath_dir(td, vp, rdir, buf, retbuf, buflen, slash_prefixed, addend);
+	error = vn_fullpath_dir(td, vp, pwd->pwd_rdir, buf, retbuf, buflen,
+	    slash_prefixed, addend);
 	if (error != 0)
 		goto out_bad;
 
-	vrele(rdir);
+	pwd_drop(pwd);
 	*freebuf = buf;
 
 	return (0);
 out_bad:
-	vrele(rdir);
+	pwd_drop(pwd);
 	free(buf, M_TEMP);
 	return (error);
 }

Modified: head/sys/kern/vfs_lookup.c
==============================================================================
--- head/sys/kern/vfs_lookup.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/kern/vfs_lookup.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -303,7 +303,6 @@ namei_handle_root(struct nameidata *ndp, struct vnode 
 int
 namei(struct nameidata *ndp)
 {
-	struct filedesc *fdp;	/* pointer to file descriptor state */
 	char *cp;		/* pointer into pathname argument */
 	struct vnode *dp;	/* the directory we are searching */
 	struct iovec aiov;		/* uio for reading symbolic links */
@@ -311,6 +310,7 @@ namei(struct nameidata *ndp)
 	struct file *dfp;
 	struct thread *td;
 	struct proc *p;
+	struct pwd *pwd;
 	cap_rights_t rights;
 	struct filecaps dirfd_caps;
 	struct uio auio;
@@ -327,7 +327,6 @@ namei(struct nameidata *ndp)
 	    ("namei: flags contaminated with nameiops"));
 	MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
 	    ndp->ni_startdir->v_type == VBAD);
-	fdp = p->p_fd;
 	TAILQ_INIT(&ndp->ni_cap_tracker);
 	ndp->ni_lcf = 0;
 
@@ -395,13 +394,13 @@ namei(struct nameidata *ndp)
 	/*
 	 * Get starting point for the translation.
 	 */
-	FILEDESC_SLOCK(fdp);
+	pwd = pwd_hold(td);
 	/*
 	 * The reference on ni_rootdir is acquired in the block below to avoid
 	 * back-to-back atomics for absolute lookups.
 	 */
-	ndp->ni_rootdir = fdp->fd_rdir;
-	ndp->ni_topdir = fdp->fd_jdir;
+	ndp->ni_rootdir = pwd->pwd_rdir;
+	ndp->ni_topdir = pwd->pwd_jdir;
 
 	startdir_used = 0;
 	dp = NULL;
@@ -422,7 +421,7 @@ namei(struct nameidata *ndp)
 			dp = ndp->ni_startdir;
 			startdir_used = 1;
 		} else if (ndp->ni_dirfd == AT_FDCWD) {
-			dp = fdp->fd_cdir;
+			dp = pwd->pwd_cdir;
 			if (dp == ndp->ni_rootdir) {
 				vrefactn(dp, 2);
 			} else {
@@ -442,7 +441,7 @@ namei(struct nameidata *ndp)
 			 * Effectively inlined fgetvp_rights, because we need to
 			 * inspect the file as well as grabbing the vnode.
 			 */
-			error = fget_cap_locked(fdp, ndp->ni_dirfd, &rights,
+			error = fget_cap(td, ndp->ni_dirfd, &rights,
 			    &dfp, &ndp->ni_filecaps);
 			if (error != 0) {
 				/*
@@ -450,16 +449,19 @@ namei(struct nameidata *ndp)
 				 * or capability-related, both of which can be
 				 * safely returned to the caller.
 				 */
-			} else if (dfp->f_ops == &badfileops) {
-				error = EBADF;
-			} else if (dfp->f_vnode == NULL) {
-				error = ENOTDIR;
 			} else {
-				dp = dfp->f_vnode;
-				vrefact(dp);
+				if (dfp->f_ops == &badfileops) {
+					error = EBADF;
+				} else if (dfp->f_vnode == NULL) {
+					error = ENOTDIR;
+				} else {
+					dp = dfp->f_vnode;
+					vrefact(dp);
 
-				if ((dfp->f_flag & FSEARCH) != 0)
-					cnp->cn_flags |= NOEXECCHECK;
+					if ((dfp->f_flag & FSEARCH) != 0)
+						cnp->cn_flags |= NOEXECCHECK;
+				}
+				fdrop(dfp, td);
 			}
 #ifdef CAPABILITIES
 			/*
@@ -481,7 +483,7 @@ namei(struct nameidata *ndp)
 	}
 	if (error == 0 && (cnp->cn_flags & BENEATH) != 0) {
 		if (ndp->ni_dirfd == AT_FDCWD) {
-			ndp->ni_beneath_latch = fdp->fd_cdir;
+			ndp->ni_beneath_latch = pwd->pwd_cdir;
 			vrefact(ndp->ni_beneath_latch);
 		} else {
 			rights = ndp->ni_rightsneeded;
@@ -496,7 +498,6 @@ namei(struct nameidata *ndp)
 		if (error == 0)
 			ndp->ni_lcf |= NI_LCF_LATCH;
 	}
-	FILEDESC_SUNLOCK(fdp);
 	/*
 	 * If we are auditing the kernel pathname, save the user pathname.
 	 */
@@ -542,6 +543,7 @@ namei(struct nameidata *ndp)
 			nameicap_cleanup(ndp, true);
 			SDT_PROBE2(vfs, namei, lookup, return, error,
 			    (error == 0 ? ndp->ni_vp : NULL));
+			pwd_drop(pwd);
 			return (error);
 		}
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
@@ -617,6 +619,7 @@ out:
 	namei_cleanup_cnp(cnp);
 	nameicap_cleanup(ndp, true);
 	SDT_PROBE2(vfs, namei, lookup, return, error, NULL);
+	pwd_drop(pwd);
 	return (error);
 }
 

Modified: head/sys/kern/vfs_mountroot.c
==============================================================================
--- head/sys/kern/vfs_mountroot.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/kern/vfs_mountroot.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -237,27 +237,13 @@ root_mounted(void)
 static void
 set_rootvnode(void)
 {
-	struct proc *p;
 
 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
 		panic("set_rootvnode: Cannot find root vnode");
 
 	VOP_UNLOCK(rootvnode);
 
-	p = curthread->td_proc;
-	FILEDESC_XLOCK(p->p_fd);
-
-	if (p->p_fd->fd_cdir != NULL)
-		vrele(p->p_fd->fd_cdir);
-	p->p_fd->fd_cdir = rootvnode;
-	VREF(rootvnode);
-
-	if (p->p_fd->fd_rdir != NULL)
-		vrele(p->p_fd->fd_rdir);
-	p->p_fd->fd_rdir = rootvnode;
-	VREF(rootvnode);
-
-	FILEDESC_XUNLOCK(p->p_fd);
+	pwd_ensure_dirs();
 }
 
 static int

Modified: head/sys/security/audit/audit_bsm_klib.c
==============================================================================
--- head/sys/security/audit/audit_bsm_klib.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/security/audit/audit_bsm_klib.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -493,38 +493,35 @@ void
 audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
 {
 	struct vnode *cdir, *rdir;
-	struct filedesc *fdp;
+	struct pwd *pwd;
 	cap_rights_t rights;
 	int error;
+	bool vrele_cdir;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d",
 	    __func__,  __FILE__, __LINE__);
 
-	rdir = cdir = NULL;
-	fdp = td->td_proc->p_fd;
-	FILEDESC_SLOCK(fdp);
-	if (*path == '/') {
-		rdir = fdp->fd_rdir;
-		vrefact(rdir);
-	} else {
+	pwd = pwd_hold(td);
+	rdir = pwd->pwd_rdir;
+	cdir = NULL;
+	vrele_cdir = false;
+	if (*path != '/') {
 		if (dirfd == AT_FDCWD) {
-			cdir = fdp->fd_cdir;
-			vrefact(cdir);
+			cdir = pwd->pwd_cdir;
 		} else {
 			error = fgetvp(td, dirfd, cap_rights_init(&rights), &cdir);
 			if (error != 0) {
-				FILEDESC_SUNLOCK(fdp);
 				cpath[0] = '\0';
+				pwd_drop(pwd);
 				return;
 			}
+			vrele_cdir = true;
 		}
 	}
-	FILEDESC_SUNLOCK(fdp);
 
 	audit_canon_path_vp(td, rdir, cdir, path, cpath);
 
-	if (rdir != NULL)
-		vrele(rdir);
-	if (cdir != NULL)
+	pwd_drop(pwd);
+	if (vrele_cdir)
 		vrele(cdir);
 }

Modified: head/sys/sys/filedesc.h
==============================================================================
--- head/sys/sys/filedesc.h	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/sys/filedesc.h	Sun Mar  1 21:53:46 2020	(r358503)
@@ -76,11 +76,16 @@ struct fdescenttbl {
  */
 #define NDSLOTTYPE	u_long
 
+struct pwd {
+	volatile u_int pwd_refcount;
+	struct	vnode *pwd_cdir;		/* current directory */
+	struct	vnode *pwd_rdir;		/* root directory */
+	struct	vnode *pwd_jdir;		/* jail root directory */
+};
+
 struct filedesc {
 	struct	fdescenttbl *fd_files;	/* open files table */
-	struct	vnode *fd_cdir;		/* current directory */
-	struct	vnode *fd_rdir;		/* root directory */
-	struct	vnode *fd_jdir;		/* jail root directory */
+	struct	pwd *fd_pwd;		/* directories */
 	NDSLOTTYPE *fd_map;		/* bitmap of free fds */
 	int	fd_lastfile;		/* high-water mark of fd_ofiles */
 	int	fd_freefile;		/* approx. next free file */
@@ -252,6 +257,17 @@ fd_modified(struct filedesc *fdp, int fd, seqc_t seqc)
 void	pwd_chdir(struct thread *td, struct vnode *vp);
 int	pwd_chroot(struct thread *td, struct vnode *vp);
 void	pwd_ensure_dirs(void);
+
+struct pwd *pwd_hold_filedesc(struct filedesc *fdp);
+struct pwd *pwd_hold(struct thread *td);
+void	pwd_drop(struct pwd *pwd);
+static inline void
+pwd_set(struct filedesc *fdp, struct pwd *newpwd)
+{
+
+	FILEDESC_XLOCK_ASSERT(fdp);
+	fdp->fd_pwd = newpwd;
+}
 
 #endif /* _KERNEL */
 

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c	Sun Mar  1 21:50:13 2020	(r358502)
+++ head/sys/ufs/ffs/ffs_alloc.c	Sun Mar  1 21:53:46 2020	(r358503)
@@ -3190,6 +3190,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 	struct inode *ip, *dp;
 	struct mount *mp;
 	struct fs *fs;
+	struct pwd *pwd;
 	ufs2_daddr_t blkno;
 	long blkcnt, blksize;
 	u_long key;
@@ -3448,11 +3449,11 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 		/*
 		 * Now we get and lock the child directory containing "..".
 		 */
-		FILEDESC_SLOCK(td->td_proc->p_fd);
-		dvp = td->td_proc->p_fd->fd_cdir;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list