svn commit: r202113 - in head/sys: compat/linux kern sys ufs/ffs ufs/ufs

Kirk McKusick mckusick at FreeBSD.org
Mon Jan 11 20:44:05 UTC 2010


Author: mckusick
Date: Mon Jan 11 20:44:05 2010
New Revision: 202113
URL: http://svn.freebsd.org/changeset/base/202113

Log:
  Background:
  
  When renaming a directory it passes through several intermediate
  states. First its new name will be created causing it to have two
  names (from possibly different parents). Next, if it has different
  parents, its value of ".." will be changed from pointing to the old
  parent to pointing to the new parent. Concurrently, its old name
  will be removed bringing it back into a consistent state. When fsck
  encounters an extra name for a directory, it offers to remove the
  "extraneous hard link"; when it finds that the names have been
  changed but the update to ".." has not happened, it offers to rewrite
  ".." to point at the correct parent. Both of these changes were
  considered unexpected so would cause fsck in preen mode or fsck in
  background mode to fail with the need to run fsck manually to fix
  these problems. Fsck running in preen mode or background mode now
  corrects these expected inconsistencies that arise during directory
  rename. The functionality added with this update is used by fsck
  running in background mode to make these fixes.
  
  Solution:
  
  This update adds three new fsck sysctl commands to support background
  fsck in correcting expected inconsistencies that arise from incomplete
  directory rename operations. They are:
  
  setcwd(dirinode) - set the current directory to dirinode in the
      filesystem associated with the snapshot.
  setdotdot(oldvalue, newvalue) - Verify that the inode number for ".."
      in the current directory is oldvalue then change it to newvalue.
  unlink(nameptr, oldvalue) - Verify that the inode number associated
      with nameptr in the current directory is oldvalue then unlink it.
  
  As with all other fsck sysctls, these new ones may only be used by
  processes with appropriate priviledge.
  
  Reported by:    	jeff
  Security issues:	rwatson

Modified:
  head/sys/compat/linux/linux_file.c
  head/sys/kern/vfs_syscalls.c
  head/sys/sys/syscallsubr.h
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/fs.h
  head/sys/ufs/ufs/ufs_lookup.c

Modified: head/sys/compat/linux/linux_file.c
==============================================================================
--- head/sys/compat/linux/linux_file.c	Mon Jan 11 20:32:51 2010	(r202112)
+++ head/sys/compat/linux/linux_file.c	Mon Jan 11 20:44:05 2010	(r202113)
@@ -652,7 +652,7 @@ linux_unlinkat(struct thread *td, struct
 	if (args->flag & LINUX_AT_REMOVEDIR)
 		error = kern_rmdirat(td, dfd, path, UIO_SYSSPACE);
 	else
-		error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE);
+		error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE, 0);
 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
 		/* Introduce POSIX noncompliant behaviour of Linux */
 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c	Mon Jan 11 20:32:51 2010	(r202112)
+++ head/sys/kern/vfs_syscalls.c	Mon Jan 11 20:44:05 2010	(r202113)
@@ -1815,23 +1815,25 @@ unlinkat(struct thread *td, struct unlin
 	if (flag & AT_REMOVEDIR)
 		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 	else
-		return (kern_unlinkat(td, fd, path, UIO_USERSPACE));
+		return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 }
 
 int
 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 {
 
-	return (kern_unlinkat(td, AT_FDCWD, path, pathseg));
+	return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
 }
 
 int
-kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
+kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
+    ino_t oldinum)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 	struct nameidata nd;
+	struct stat sb;
 	int vfslocked;
 
 restart:
@@ -1842,9 +1844,13 @@ restart:
 		return (error == EINVAL ? EPERM : error);
 	vfslocked = NDHASGIANT(&nd);
 	vp = nd.ni_vp;
-	if (vp->v_type == VDIR)
+	if (vp->v_type == VDIR && oldinum == 0) {
 		error = EPERM;		/* POSIX */
-	else {
+	} else if (oldinum != 0 &&
+		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
+		  sb.st_ino != oldinum) {
+			error = EIDRM;	/* Identifier removed */
+	} else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *

Modified: head/sys/sys/syscallsubr.h
==============================================================================
--- head/sys/sys/syscallsubr.h	Mon Jan 11 20:32:51 2010	(r202112)
+++ head/sys/sys/syscallsubr.h	Mon Jan 11 20:44:05 2010	(r202113)
@@ -212,7 +212,7 @@ int	kern_truncate(struct thread *td, cha
 	    off_t length);
 int	kern_unlink(struct thread *td, char *path, enum uio_seg pathseg);
 int	kern_unlinkat(struct thread *td, int fd, char *path,
-	    enum uio_seg pathseg);
+	    enum uio_seg pathseg, ino_t oldinum);
 int	kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
 	    struct timeval *tptr, enum uio_seg tptrseg);
 int	kern_utimesat(struct thread *td, int fd, char *path,

Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c	Mon Jan 11 20:32:51 2010	(r202112)
+++ head/sys/ufs/ffs/ffs_alloc.c	Mon Jan 11 20:44:05 2010	(r202113)
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
+#include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/priv.h>
@@ -76,9 +77,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
+#include <security/audit/audit.h>
+
+#include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -2328,7 +2333,7 @@ ffs_fserr(fs, inum, cp)
 
 /*
  * This function provides the capability for the fsck program to
- * update an active filesystem. Eleven operations are provided:
+ * update an active filesystem. Fourteen operations are provided:
  *
  * adjrefcnt(inode, amt) - adjusts the reference count on the
  *	specified inode by the specified amount. Under normal
@@ -2349,6 +2354,12 @@ ffs_fserr(fs, inum, cp)
  *	as in use.
  * setflags(flags, set/clear) - the fs_flags field has the specified
  *	flags set (second parameter +1) or cleared (second parameter -1).
+ * setcwd(dirinode) - set the current directory to dirinode in the
+ *	filesystem associated with the snapshot.
+ * setdotdot(oldvalue, newvalue) - Verify that the inode number for ".."
+ *	in the current directory is oldvalue then change it to newvalue.
+ * unlink(nameptr, oldvalue) - Verify that the inode number associated
+ *	with nameptr in the current directory is oldvalue then unlink it.
  */
 
 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
@@ -2386,6 +2397,15 @@ static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FRE
 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Change Filesystem Flags");
 
+static SYSCTL_NODE(_vfs_ffs, FFS_SET_CWD, setcwd, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Set Current Working Directory");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_SET_DOTDOT, setdotdot, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Change Value of .. Entry");
+
+static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Unlink a Duplicate Name");
+
 #ifdef DEBUG
 static int fsckcmds = 0;
 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
@@ -2394,16 +2414,18 @@ SYSCTL_INT(_debug, OID_AUTO, fsckcmds, C
 static int
 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 {
+	struct thread *td = curthread;
 	struct fsck_cmd cmd;
 	struct ufsmount *ump;
-	struct vnode *vp;
-	struct inode *ip;
+	struct vnode *vp, *vpold, *dvp, *fdvp;
+	struct inode *ip, *dp;
 	struct mount *mp;
 	struct fs *fs;
 	ufs2_daddr_t blkno;
 	long blkcnt, blksize;
+	struct filedesc *fdp;
 	struct file *fp;
-	int filetype, error;
+	int vfslocked, filetype, error;
 
 	if (req->newlen > sizeof cmd)
 		return (EBADRPC);
@@ -2413,15 +2435,20 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 		return (ERPCMISMATCH);
 	if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0)
 		return (error);
-	vn_start_write(fp->f_data, &mp, V_WAIT);
+	vp = fp->f_data;
+	if (vp->v_type != VREG && vp->v_type != VDIR) {
+		fdrop(fp, td);
+		return (EINVAL);
+	}
+	vn_start_write(vp, &mp, V_WAIT);
 	if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
 		vn_finished_write(mp);
-		fdrop(fp, curthread);
+		fdrop(fp, td);
 		return (EINVAL);
 	}
 	if (mp->mnt_flag & MNT_RDONLY) {
 		vn_finished_write(mp);
-		fdrop(fp, curthread);
+		fdrop(fp, td);
 		return (EROFS);
 	}
 	ump = VFSTOUFS(mp);
@@ -2553,6 +2580,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_ndir += cmd.value;
 		break;
+
 	case FFS_ADJ_NBFREE:
 #ifdef DEBUG
 		if (fsckcmds) {
@@ -2562,6 +2590,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_nbfree += cmd.value;
 		break;
+
 	case FFS_ADJ_NIFREE:
 #ifdef DEBUG
 		if (fsckcmds) {
@@ -2571,6 +2600,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_nifree += cmd.value;
 		break;
+
 	case FFS_ADJ_NFFREE:
 #ifdef DEBUG
 		if (fsckcmds) {
@@ -2580,6 +2610,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_nffree += cmd.value;
 		break;
+
 	case FFS_ADJ_NUMCLUSTERS:
 #ifdef DEBUG
 		if (fsckcmds) {
@@ -2590,6 +2621,91 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 		fs->fs_cstotal.cs_numclusters += cmd.value;
 		break;
 
+	case FFS_SET_CWD:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: set current directory to inode %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
+		}
+#endif /* DEBUG */
+		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_SHARED, &vp)))
+			break;
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		AUDIT_ARG_VNODE1(vp);
+		if ((error = change_dir(vp, td)) != 0) {
+			vput(vp);
+			VFS_UNLOCK_GIANT(vfslocked);
+			break;
+		}
+		VOP_UNLOCK(vp, 0);
+		VFS_UNLOCK_GIANT(vfslocked);
+		fdp = td->td_proc->p_fd;
+		FILEDESC_XLOCK(fdp);
+		vpold = fdp->fd_cdir;
+		fdp->fd_cdir = vp;
+		FILEDESC_XUNLOCK(fdp);
+		vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
+		vrele(vpold);
+		VFS_UNLOCK_GIANT(vfslocked);
+		break;
+
+	case FFS_SET_DOTDOT:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: change .. in cwd from %jd to %jd\n",
+			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
+			    (intmax_t)cmd.size);
+		}
+#endif /* DEBUG */
+		/*
+		 * First we have to get and lock the parent directory
+		 * to which ".." points.
+		 */
+		error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &fdvp);
+		if (error)
+			break;
+		/*
+		 * Now we get and lock the child directory containing "..".
+		 */
+		FILEDESC_SLOCK(td->td_proc->p_fd);
+		dvp = td->td_proc->p_fd->fd_cdir;
+		FILEDESC_SUNLOCK(td->td_proc->p_fd);
+		if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
+			vput(fdvp);
+			break;
+		}
+		dp = VTOI(dvp);
+		dp->i_offset = 12;	/* XXX mastertemplate.dot_reclen */
+		error = ufs_dirrewrite(dp, VTOI(fdvp), (ino_t)cmd.size,
+		    DT_DIR, 0);
+		cache_purge(fdvp);
+		cache_purge(dvp);
+		vput(dvp);
+		vput(fdvp);
+		break;
+
+	case FFS_UNLINK:
+#ifdef DEBUG
+		if (fsckcmds) {
+			char buf[32];
+
+			if (copyinstr((char *)(int)cmd.value, buf, 32, NULL))
+				strncpy(buf, "Name_too_long", 32);
+			printf("%s: unlink %s (inode %jd)\n",
+			    mp->mnt_stat.f_mntonname, buf, (intmax_t)cmd.size);
+		}
+#endif /* DEBUG */
+		/*
+		 * kern_unlinkat will do its own start/finish writes and
+		 * they do not nest, so drop ours here. Setting mp == NULL
+		 * indicates that vn_finished_write is not needed down below.
+		 */
+		vn_finished_write(mp);
+		mp = NULL;
+		error = kern_unlinkat(td, AT_FDCWD, (char *)(int)cmd.value,
+		    UIO_USERSPACE, (ino_t)cmd.size);
+		break;
+
 	default:
 #ifdef DEBUG
 		if (fsckcmds) {
@@ -2601,7 +2717,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 		break;
 
 	}
-	fdrop(fp, curthread);
+	fdrop(fp, td);
 	vn_finished_write(mp);
 	return (error);
 }

Modified: head/sys/ufs/ffs/fs.h
==============================================================================
--- head/sys/ufs/ffs/fs.h	Mon Jan 11 20:32:51 2010	(r202112)
+++ head/sys/ufs/ffs/fs.h	Mon Jan 11 20:44:05 2010	(r202113)
@@ -211,7 +211,10 @@
 #define	FFS_ADJ_NIFREE		 9	/* adjust number of free inodes */
 #define	FFS_ADJ_NFFREE		10 	/* adjust number of free frags */
 #define	FFS_ADJ_NUMCLUSTERS	11	/* adjust number of free clusters */
-#define	FFS_MAXID		12	/* number of valid ffs ids */
+#define FFS_SET_CWD		12	/* set current directory */
+#define	FFS_SET_DOTDOT		13	/* set inode number for ".." */
+#define	FFS_UNLINK		14	/* remove a name in the filesystem */
+#define	FFS_MAXID		15	/* number of valid ffs ids */
 
 /*
  * Command structure passed in to the filesystem to adjust filesystem values.

Modified: head/sys/ufs/ufs/ufs_lookup.c
==============================================================================
--- head/sys/ufs/ufs/ufs_lookup.c	Mon Jan 11 20:32:51 2010	(r202112)
+++ head/sys/ufs/ufs/ufs_lookup.c	Mon Jan 11 20:44:05 2010	(r202113)
@@ -1224,6 +1224,11 @@ ufs_dirrewrite(dp, oip, newinum, newtype
 	error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
 	if (error)
 		return (error);
+	if (ep->d_namlen == 2 && ep->d_name[1] == '.' && ep->d_name[0] == '.' &&
+	    ep->d_ino != oip->i_number) {
+		brelse(bp);
+		return (EIDRM);
+	}
 	ep->d_ino = newinum;
 	if (!OFSFMT(vdp))
 		ep->d_type = newtype;


More information about the svn-src-all mailing list