kern/156545: commit references a PR

dfilter service dfilter at FreeBSD.ORG
Sun Apr 24 11:10:07 UTC 2011


The following reply was made to PR kern/156545; it has been noted by GNATS.

From: dfilter at FreeBSD.ORG (dfilter service)
To: bug-followup at FreeBSD.org
Cc:  
Subject: Re: kern/156545: commit references a PR
Date: Sun, 24 Apr 2011 11:02:04 +0000 (UTC)

 Author: kib
 Date: Sun Apr 24 11:01:42 2011
 New Revision: 220986
 URL: http://svn.freebsd.org/changeset/base/220986
 
 Log:
   Merge the part of r207141 that fixes the locking for ufs_rename() (and
   r218838 followup).
   
   Adopt the SU calls to the stable/8 SU implementation, with the help from
   Kirk.
   
   PR:	kern/156545
   Reviewed by:	mckusick
   Tested by:	pho
 
 Modified:
   stable/8/sys/ufs/ufs/inode.h
   stable/8/sys/ufs/ufs/ufs_extern.h
   stable/8/sys/ufs/ufs/ufs_lookup.c
   stable/8/sys/ufs/ufs/ufs_vnops.c
 Directory Properties:
   stable/8/sys/   (props changed)
   stable/8/sys/amd64/include/xen/   (props changed)
   stable/8/sys/cddl/contrib/opensolaris/   (props changed)
   stable/8/sys/contrib/dev/acpica/   (props changed)
   stable/8/sys/contrib/pf/   (props changed)
 
 Modified: stable/8/sys/ufs/ufs/inode.h
 ==============================================================================
 --- stable/8/sys/ufs/ufs/inode.h	Sun Apr 24 10:47:56 2011	(r220985)
 +++ stable/8/sys/ufs/ufs/inode.h	Sun Apr 24 11:01:42 2011	(r220986)
 @@ -120,7 +120,7 @@ struct inode {
  #define	IN_CHANGE	0x0002		/* Inode change time update request. */
  #define	IN_UPDATE	0x0004		/* Modification time update request. */
  #define	IN_MODIFIED	0x0008		/* Inode has been modified. */
 -#define	IN_RENAME	0x0010		/* Inode is being renamed. */
 +#define	IN_NEEDSYNC	0x0010		/* Inode requires fsync. */
  #define	IN_LAZYMOD	0x0040		/* Modified, but don't write yet. */
  #define	IN_SPACECOUNTED	0x0080		/* Blocks to be freed in free count. */
  #define	IN_LAZYACCESS	0x0100		/* Process IN_ACCESS after the
 
 Modified: stable/8/sys/ufs/ufs/ufs_extern.h
 ==============================================================================
 --- stable/8/sys/ufs/ufs/ufs_extern.h	Sun Apr 24 10:47:56 2011	(r220985)
 +++ stable/8/sys/ufs/ufs/ufs_extern.h	Sun Apr 24 11:01:42 2011	(r220986)
 @@ -57,7 +57,7 @@ int	 ufs_bmap(struct vop_bmap_args *);
  int	 ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
  	    struct buf *, int *, int *);
  int	 ufs_fhtovp(struct mount *, struct ufid *, struct vnode **);
 -int	 ufs_checkpath(ino_t, struct inode *, struct ucred *);
 +int	 ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
  void	 ufs_dirbad(struct inode *, doff_t, char *);
  int	 ufs_dirbadentry(struct vnode *, struct direct *, int);
  int	 ufs_dirempty(struct inode *, ino_t, struct ucred *);
 @@ -66,9 +66,11 @@ int	 ufs_extwrite(struct vop_write_args 
  void	 ufs_makedirentry(struct inode *, struct componentname *,
  	    struct direct *);
  int	 ufs_direnter(struct vnode *, struct vnode *, struct direct *,
 -	    struct componentname *, struct buf *);
 +	    struct componentname *, struct buf *, int);
  int	 ufs_dirremove(struct vnode *, struct inode *, int, int);
  int	 ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int);
 +int	 ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *,
 +	    ino_t *);
  int	 ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *);
  int	 ufs_inactive(struct vop_inactive_args *);
  int	 ufs_init(struct vfsconf *);
 
 Modified: stable/8/sys/ufs/ufs/ufs_lookup.c
 ==============================================================================
 --- stable/8/sys/ufs/ufs/ufs_lookup.c	Sun Apr 24 10:47:56 2011	(r220985)
 +++ stable/8/sys/ufs/ufs/ufs_lookup.c	Sun Apr 24 11:01:42 2011	(r220986)
 @@ -76,9 +76,6 @@ SYSCTL_INT(_debug, OID_AUTO, dircheck, C
  /* true if old FS format...*/
  #define OFSFMT(vp)	((vp)->v_mount->mnt_maxsymlinklen <= 0)
  
 -static int ufs_lookup_(struct vnode *, struct vnode **, struct componentname *,
 -    ino_t *);
 -
  #ifdef QUOTA
  static int
  ufs_lookup_upgrade_lock(struct vnode *vp)
 @@ -214,11 +211,11 @@ ufs_lookup(ap)
  	} */ *ap;
  {
  
 -	return (ufs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
 +	return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
  }
  
 -static int
 -ufs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
 +int
 +ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
      ino_t *dd_ino)
  {
  	struct inode *dp;		/* inode for directory being searched */
 @@ -556,6 +553,8 @@ notfound:
  	return (ENOENT);
  
  found:
 +	if (dd_ino != NULL)
 +		*dd_ino = ino;
  	if (numdirpasses == 2)
  		nchstats.ncs_pass2++;
  	/*
 @@ -578,11 +577,6 @@ found:
  	if ((flags & ISLASTCN) && nameiop == LOOKUP)
  		dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1);
  
 -	if (dd_ino != NULL) {
 -		*dd_ino = ino;
 -		return (0);
 -	}
 -
  	/*
  	 * If deleting, and at end of pathname, return
  	 * parameters which can be used to remove file.
 @@ -590,17 +584,6 @@ found:
  	if (nameiop == DELETE && (flags & ISLASTCN)) {
  		if (flags & LOCKPARENT)
  			ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
 -		if ((error = VFS_VGET(vdp->v_mount, ino,
 -		    LK_EXCLUSIVE, &tdp)) != 0)
 -			return (error);
 -
 -		error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
 -		if (error) {
 -			vput(tdp);
 -			return (error);
 -		}
 -
 -
  		/*
  		 * Return pointer to current entry in dp->i_offset,
  		 * and distance past previous entry (if there
 @@ -617,6 +600,16 @@ found:
  			dp->i_count = 0;
  		else
  			dp->i_count = dp->i_offset - prevoff;
 +		if (dd_ino != NULL)
 +			return (0);
 +		if ((error = VFS_VGET(vdp->v_mount, ino,
 +		    LK_EXCLUSIVE, &tdp)) != 0)
 +			return (error);
 +		error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
 +		if (error) {
 +			vput(tdp);
 +			return (error);
 +		}
  		if (dp->i_number == ino) {
  			VREF(vdp);
  			*vpp = vdp;
 @@ -648,6 +641,8 @@ found:
  		dp->i_offset = i_offset;
  		if (dp->i_number == ino)
  			return (EISDIR);
 +		if (dd_ino != NULL)
 +			return (0);
  		if ((error = VFS_VGET(vdp->v_mount, ino,
  		    LK_EXCLUSIVE, &tdp)) != 0)
  			return (error);
 @@ -682,6 +677,8 @@ found:
  		cnp->cn_flags |= SAVENAME;
  		return (0);
  	}
 +	if (dd_ino != NULL)
 +		return (0);
  
  	/*
  	 * Step through the translation in the name.  We do not `vput' the
 @@ -713,7 +710,7 @@ found:
  		 * to the inode we looked up before vdp lock was
  		 * dropped.
  		 */
 -		error = ufs_lookup_(pdp, NULL, cnp, &ino1);
 +		error = ufs_lookup_ino(pdp, NULL, cnp, &ino1);
  		if (error) {
  			vput(tdp);
  			return (error);
 @@ -865,12 +862,13 @@ ufs_makedirentry(ip, cnp, newdirp)
   * soft dependency code).
   */
  int
 -ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
 +ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename)
  	struct vnode *dvp;
  	struct vnode *tvp;
  	struct direct *dirp;
  	struct componentname *cnp;
  	struct buf *newdirbp;
 +	int isrename;
  {
  	struct ucred *cr;
  	struct thread *td;
 @@ -943,22 +941,30 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
  				blkoff += DIRBLKSIZ;
  			}
  			if (softdep_setup_directory_add(bp, dp, dp->i_offset,
 -			    dirp->d_ino, newdirbp, 1) == 0) {
 -				bdwrite(bp);
 +			    dirp->d_ino, newdirbp, 1))
 +				dp->i_flag |= IN_NEEDSYNC;
 +#ifdef JOURNALED_SOFTUPDATES
 +			if (newdirbp)
 +				bdwrite(newdirbp);
 +#endif
 +			bdwrite(bp);
 +			if ((dp->i_flag & IN_NEEDSYNC) == 0)
  				return (UFS_UPDATE(dvp, 0));
 -			}
 -			/* We have just allocated a directory block in an
 -			 * indirect block. Rather than tracking when it gets
 -			 * claimed by the inode, we simply do a VOP_FSYNC
 -			 * now to ensure that it is there (in case the user
 -			 * does a future fsync). Note that we have to unlock
 -			 * the inode for the entry that we just entered, as
 -			 * the VOP_FSYNC may need to lock other inodes which
 -			 * can lead to deadlock if we also hold a lock on
 -			 * the newly entered node.
 +			/*
 +			 * We have just allocated a directory block in an
 +			 * indirect block.  We must prevent holes in the
 +			 * directory created if directory entries are
 +			 * written out of order.  To accomplish this we
 +			 * fsync when we extend a directory into indirects.
 +			 * During rename it's not safe to drop the tvp lock
 +			 * so sync must be delayed until it is.
 +			 *
 +			 * This synchronous step could be removed if fsck and
 +			 * the kernel were taught to fill in sparse
 +			 * directories rather than panic.
  			 */
 -			if ((error = bwrite(bp)))
 -				return (error);
 +			if (isrename)
 +				return (0);
  			if (tvp != NULL)
  				VOP_UNLOCK(tvp, 0);
  			error = VOP_FSYNC(dvp, MNT_WAIT, td);
 @@ -1099,6 +1105,10 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
  		(void) softdep_setup_directory_add(bp, dp,
  		    dp->i_offset + (caddr_t)ep - dirbuf,
  		    dirp->d_ino, newdirbp, 0);
 +#ifdef JOURNALED_SOFTUPDATES
 +		if (newdirbp != NULL)
 +			bdwrite(newdirbp);
 +#endif
  		bdwrite(bp);
  	} else {
  		if (DOINGASYNC(dvp)) {
 @@ -1116,7 +1126,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
  	 * lock other inodes which can lead to deadlock if we also hold a
  	 * lock on the newly entered node.
  	 */
 -	if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) {
 +	if (isrename == 0 && error == 0 &&
 +	    dp->i_endoff && dp->i_endoff < dp->i_size) {
  		if (tvp != NULL)
  			VOP_UNLOCK(tvp, 0);
  #ifdef UFS_DIRHASH
 @@ -1386,25 +1397,25 @@ ufs_dir_dd_ino(struct vnode *vp, struct 
  
  /*
   * Check if source directory is in the path of the target directory.
 - * Target is supplied locked, source is unlocked.
 - * The target is always vput before returning.
   */
  int
 -ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred)
 +ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino)
  {
 -	struct vnode *vp, *vp1;
 +	struct mount *mp;
 +	struct vnode *tvp, *vp, *vp1;
  	int error;
  	ino_t dd_ino;
  
 -	vp = ITOV(target);
 -	if (target->i_number == source_ino) {
 -		error = EEXIST;
 -		goto out;
 -	}
 -	error = 0;
 +	vp = tvp = ITOV(target);
 +	mp = vp->v_mount;
 +	*wait_ino = 0;
 +	if (target->i_number == source_ino)
 +		return (EEXIST);
 +	if (target->i_number == parent_ino)
 +		return (0);
  	if (target->i_number == ROOTINO)
 -		goto out;
 -
 +		return (0);
 +	error = 0;
  	for (;;) {
  		error = ufs_dir_dd_ino(vp, cred, &dd_ino);
  		if (error != 0)
 @@ -1415,9 +1426,13 @@ ufs_checkpath(ino_t source_ino, struct i
  		}
  		if (dd_ino == ROOTINO)
  			break;
 -		error = vn_vget_ino(vp, dd_ino, LK_EXCLUSIVE, &vp1);
 -		if (error != 0)
 +		if (dd_ino == parent_ino)
 +			break;
 +		error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1);
 +		if (error != 0) {
 +			*wait_ino = dd_ino;
  			break;
 +		}
  		/* Recheck that ".." still points to vp1 after relock of vp */
  		error = ufs_dir_dd_ino(vp, cred, &dd_ino);
  		if (error != 0) {
 @@ -1429,14 +1444,14 @@ ufs_checkpath(ino_t source_ino, struct i
  			vput(vp1);
  			continue;
  		}
 -		vput(vp);
 +		if (vp != tvp)
 +			vput(vp);
  		vp = vp1;
  	}
  
 -out:
  	if (error == ENOTDIR)
 -		printf("checkpath: .. not a directory\n");
 -	if (vp != NULL)
 +		panic("checkpath: .. not a directory\n");
 +	if (vp != tvp)
  		vput(vp);
  	return (error);
  }
 
 Modified: stable/8/sys/ufs/ufs/ufs_vnops.c
 ==============================================================================
 --- stable/8/sys/ufs/ufs/ufs_vnops.c	Sun Apr 24 10:47:56 2011	(r220985)
 +++ stable/8/sys/ufs/ufs/ufs_vnops.c	Sun Apr 24 11:01:42 2011	(r220986)
 @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
  #include <sys/kernel.h>
  #include <sys/fcntl.h>
  #include <sys/stat.h>
 +#include <sys/sysctl.h>
  #include <sys/bio.h>
  #include <sys/buf.h>
  #include <sys/mount.h>
 @@ -114,6 +115,8 @@ static vop_close_t	ufsfifo_close;
  static vop_kqfilter_t	ufsfifo_kqfilter;
  static vop_pathconf_t	ufsfifo_pathconf;
  
 +SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
 +
  /*
   * A virgin directory (no blushing please).
   */
 @@ -992,7 +995,7 @@ ufs_link(ap)
  	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
  	if (!error) {
  		ufs_makedirentry(ip, cnp, &newdir);
 -		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
 +		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
  	}
  
  	if (error) {
 @@ -1043,7 +1046,7 @@ ufs_whiteout(ap)
  		newdir.d_namlen = cnp->cn_namelen;
  		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
  		newdir.d_type = DT_WHT;
 -		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
 +		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
  		break;
  
  	case DELETE:
 @@ -1062,6 +1065,11 @@ ufs_whiteout(ap)
  	return (error);
  }
  
 +static volatile int rename_restarts;
 +SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
 +    __DEVOLATILE(int *, &rename_restarts), 0,
 +    "Times rename had to restart due to lock contention");
 +
  /*
   * Rename system call.
   * 	rename("foo", "bar");
 @@ -1101,111 +1109,185 @@ ufs_rename(ap)
  	struct vnode *tdvp = ap->a_tdvp;
  	struct vnode *fvp = ap->a_fvp;
  	struct vnode *fdvp = ap->a_fdvp;
 +	struct vnode *nvp;
  	struct componentname *tcnp = ap->a_tcnp;
  	struct componentname *fcnp = ap->a_fcnp;
  	struct thread *td = fcnp->cn_thread;
 -	struct inode *ip, *xp, *dp;
 +	struct inode *fip, *tip, *tdp, *fdp;
  	struct direct newdir;
 -	int doingdirectory = 0, oldparent = 0, newparent = 0;
 +	off_t endoff;
 +	int doingdirectory, newparent;
  	int error = 0, ioflag;
 -	ino_t fvp_ino;
 +	struct mount *mp;
 +	ino_t ino;
  
  #ifdef INVARIANTS
  	if ((tcnp->cn_flags & HASBUF) == 0 ||
  	    (fcnp->cn_flags & HASBUF) == 0)
  		panic("ufs_rename: no name");
  #endif
 +	endoff = 0;
 +	mp = tdvp->v_mount;
 +	VOP_UNLOCK(tdvp, 0);
 +	if (tvp && tvp != tdvp)
 +		VOP_UNLOCK(tvp, 0);
  	/*
  	 * Check for cross-device rename.
  	 */
  	if ((fvp->v_mount != tdvp->v_mount) ||
  	    (tvp && (fvp->v_mount != tvp->v_mount))) {
  		error = EXDEV;
 -abortit:
 -		if (tdvp == tvp)
 -			vrele(tdvp);
 -		else
 -			vput(tdvp);
 -		if (tvp)
 -			vput(tvp);
 -		vrele(fdvp);
 +		mp = NULL;
 +		goto releout;
 +	}
 +	error = vfs_busy(mp, 0);
 +	if (error) {
 +		mp = NULL;
 +		goto releout;
 +	}
 +relock:
 +	/* 
 +	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
 +	 * and fdvp and tdvp are the same directory.  Subsequently we need
 +	 * to double-check all paths and in the directory rename case we
 +	 * need to verify that we are not creating a directory loop.  To
 +	 * handle this we acquire all but fdvp using non-blocking
 +	 * acquisitions.  If we fail to acquire any lock in the path we will
 +	 * drop all held locks, acquire the new lock in a blocking fashion,
 +	 * and then release it and restart the rename.  This acquire/release
 +	 * step ensures that we do not spin on a lock waiting for release.
 +	 */
 +	error = vn_lock(fdvp, LK_EXCLUSIVE);
 +	if (error)
 +		goto releout;
 +	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 +		VOP_UNLOCK(fdvp, 0);
 +		error = vn_lock(tdvp, LK_EXCLUSIVE);
 +		if (error)
 +			goto releout;
 +		VOP_UNLOCK(tdvp, 0);
 +		atomic_add_int(&rename_restarts, 1);
 +		goto relock;
 +	}
 +	/*
 +	 * Re-resolve fvp to be certain it still exists and fetch the
 +	 * correct vnode.
 +	 */
 +	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 +	if (error) {
 +		VOP_UNLOCK(fdvp, 0);
 +		VOP_UNLOCK(tdvp, 0);
 +		goto releout;
 +	}
 +	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 +	if (error) {
 +		VOP_UNLOCK(fdvp, 0);
 +		VOP_UNLOCK(tdvp, 0);
 +		if (error != EBUSY)
 +			goto releout;
 +		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 +		if (error != 0)
 +			goto releout;
 +		VOP_UNLOCK(nvp, 0);
  		vrele(fvp);
 -		return (error);
 +		fvp = nvp;
 +		atomic_add_int(&rename_restarts, 1);
 +		goto relock;
 +	}
 +	vrele(fvp);
 +	fvp = nvp;
 +	/*
 +	 * Re-resolve tvp and acquire the vnode lock if present.
 +	 */
 +	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
 +	if (error != 0 && error != EJUSTRETURN) {
 +		VOP_UNLOCK(fdvp, 0);
 +		VOP_UNLOCK(tdvp, 0);
 +		VOP_UNLOCK(fvp, 0);
 +		goto releout;
  	}
 -
 +	/*
 +	 * If tvp disappeared we just carry on.
 +	 */
 +	if (error == EJUSTRETURN && tvp != NULL) {
 +		vrele(tvp);
 +		tvp = NULL;
 +	}
 +	/*
 +	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 +	 * if the non-blocking acquire fails.
 +	 */
 +	if (error == 0) {
 +		nvp = NULL;
 +		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 +		if (tvp)
 +			vrele(tvp);
 +		tvp = nvp;
 +		if (error) {
 +			VOP_UNLOCK(fdvp, 0);
 +			VOP_UNLOCK(tdvp, 0);
 +			VOP_UNLOCK(fvp, 0);
 +			if (error != EBUSY)
 +				goto releout;
 +			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 +			if (error != 0)
 +				goto releout;
 +			VOP_UNLOCK(nvp, 0);
 +			atomic_add_int(&rename_restarts, 1);
 +			goto relock;
 +		}
 +	}
 +	fdp = VTOI(fdvp);
 +	fip = VTOI(fvp);
 +	tdp = VTOI(tdvp);
 +	tip = NULL;
 +	if (tvp)
 +		tip = VTOI(tvp);
  	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
  	    (VTOI(tdvp)->i_flags & APPEND))) {
  		error = EPERM;
 -		goto abortit;
 +		goto unlockout;
  	}
 -
  	/*
  	 * Renaming a file to itself has no effect.  The upper layers should
 -	 * not call us in that case.  Temporarily just warn if they do.
 +	 * not call us in that case.  However, things could change after
 +	 * we drop the locks above.
  	 */
  	if (fvp == tvp) {
 -		printf("ufs_rename: fvp == tvp (can't happen)\n");
  		error = 0;
 -		goto abortit;
 +		goto unlockout;
  	}
 -
 -	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
 -		goto abortit;
 -	dp = VTOI(fdvp);
 -	ip = VTOI(fvp);
 -	if (ip->i_nlink >= LINK_MAX) {
 -		VOP_UNLOCK(fvp, 0);
 +	doingdirectory = 0;
 +	newparent = 0;
 +	ino = fip->i_number;
 +	if (fip->i_nlink >= LINK_MAX) {
  		error = EMLINK;
 -		goto abortit;
 +		goto unlockout;
  	}
 -	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 -	    || (dp->i_flags & APPEND)) {
 -		VOP_UNLOCK(fvp, 0);
 +	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 +	    || (fdp->i_flags & APPEND)) {
  		error = EPERM;
 -		goto abortit;
 +		goto unlockout;
  	}
 -	if ((ip->i_mode & IFMT) == IFDIR) {
 +	if ((fip->i_mode & IFMT) == IFDIR) {
  		/*
  		 * Avoid ".", "..", and aliases of "." for obvious reasons.
  		 */
  		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 -		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
 -		    (ip->i_flag & IN_RENAME)) {
 -			VOP_UNLOCK(fvp, 0);
 +		    fdp == fip ||
 +		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
  			error = EINVAL;
 -			goto abortit;
 +			goto unlockout;
  		}
 -		ip->i_flag |= IN_RENAME;
 -		oldparent = dp->i_number;
 +		if (fdp->i_number != tdp->i_number)
 +			newparent = tdp->i_number;
  		doingdirectory = 1;
  	}
 -	vrele(fdvp);
 -
 -	/*
 -	 * When the target exists, both the directory
 -	 * and target vnodes are returned locked.
 -	 */
 -	dp = VTOI(tdvp);
 -	xp = NULL;
 -	if (tvp)
 -		xp = VTOI(tvp);
 -
 -	/*
 -	 * 1) Bump link count while we're moving stuff
 -	 *    around.  If we crash somewhere before
 -	 *    completing our work, the link count
 -	 *    may be wrong, but correctable.
 -	 */
 -	ip->i_effnlink++;
 -	ip->i_nlink++;
 -	DIP_SET(ip, i_nlink, ip->i_nlink);
 -	ip->i_flag |= IN_CHANGE;
 -	if (DOINGSOFTDEP(fvp))
 -		softdep_change_linkcnt(ip);
 -	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
 -				       DOINGASYNC(fvp)))) != 0) {
 -		VOP_UNLOCK(fvp, 0);
 -		goto bad;
 +	if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
 +	    (tvp != NULL && tvp->v_type == VDIR &&
 +	    tvp->v_mountedhere != NULL)) {
 +		error = EXDEV;
 +		goto unlockout;
  	}
  
  	/*
 @@ -1214,35 +1296,55 @@ abortit:
  	 * directory hierarchy above the target, as this would
  	 * orphan everything below the source directory. Also
  	 * the user must have write permission in the source so
 -	 * as to be able to change "..". We must repeat the call
 -	 * to namei, as the parent directory is unlocked by the
 -	 * call to checkpath().
 -	 */
 -	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
 -	fvp_ino = ip->i_number;
 -	VOP_UNLOCK(fvp, 0);
 -	if (oldparent != dp->i_number)
 -		newparent = dp->i_number;
 +	 * as to be able to change "..".
 +	 */
  	if (doingdirectory && newparent) {
 -		if (error)	/* write access check above */
 -			goto bad;
 -		if (xp != NULL)
 -			vput(tvp);
 -		error = ufs_checkpath(fvp_ino, dp, tcnp->cn_cred);
 +		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
  		if (error)
 -			goto out;
 +			goto unlockout;
 +		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
 +		    &ino);
 +		/*
 +		 * We encountered a lock that we have to wait for.  Unlock
 +		 * everything else and VGET before restarting.
 +		 */
 +		if (ino) {
 +			VOP_UNLOCK(fdvp, 0);
 +			VOP_UNLOCK(fvp, 0);
 +			VOP_UNLOCK(tdvp, 0);
 +			if (tvp)
 +				VOP_UNLOCK(tvp, 0);
 +			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
 +			if (error == 0)
 +				vput(nvp);
 +			atomic_add_int(&rename_restarts, 1);
 +			goto relock;
 +		}
 +		if (error)
 +			goto unlockout;
  		if ((tcnp->cn_flags & SAVESTART) == 0)
  			panic("ufs_rename: lost to startdir");
 -		VREF(tdvp);
 -		error = relookup(tdvp, &tvp, tcnp);
 -		if (error)
 -			goto out;
 -		vrele(tdvp);
 -		dp = VTOI(tdvp);
 -		xp = NULL;
 -		if (tvp)
 -			xp = VTOI(tvp);
  	}
 +	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
 +	    tdp->i_effnlink == 0)
 +		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 +
 +	/*
 +	 * 1) Bump link count while we're moving stuff
 +	 *    around.  If we crash somewhere before
 +	 *    completing our work, the link count
 +	 *    may be wrong, but correctable.
 +	 */
 +	fip->i_effnlink++;
 +	fip->i_nlink++;
 +	DIP_SET(fip, i_nlink, fip->i_nlink);
 +	fip->i_flag |= IN_CHANGE;
 +	if (DOINGSOFTDEP(fvp))
 +		softdep_change_linkcnt(fip);
 +	error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
 +	if (error)
 +		goto bad;
 +
  	/*
  	 * 2) If target doesn't exist, link the target
  	 *    to the source and unlink the source.
 @@ -1250,52 +1352,37 @@ abortit:
  	 *    entry to reference the source inode and
  	 *    expunge the original entry's existence.
  	 */
 -	if (xp == NULL) {
 -		if (dp->i_dev != ip->i_dev)
 +	if (tip == NULL) {
 +		if (tdp->i_dev != fip->i_dev)
  			panic("ufs_rename: EXDEV");
 -		/*
 -		 * Account for ".." in new directory.
 -		 * When source and destination have the same
 -		 * parent we don't fool with the link count.
 -		 */
  		if (doingdirectory && newparent) {
 -			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
 +			/*
 +			 * Account for ".." in new directory.
 +			 * When source and destination have the same
 +			 * parent we don't adjust the link count.  The
 +			 * actual link modification is completed when
 +			 * .. is rewritten below.
 +			 */
 +			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
  				error = EMLINK;
  				goto bad;
  			}
 -			dp->i_effnlink++;
 -			dp->i_nlink++;
 -			DIP_SET(dp, i_nlink, dp->i_nlink);
 -			dp->i_flag |= IN_CHANGE;
 -			if (DOINGSOFTDEP(tdvp))
 -				softdep_change_linkcnt(dp);
 -			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
 -						   DOINGASYNC(tdvp)));
 -			if (error)
 -				goto bad;
  		}
 -		ufs_makedirentry(ip, tcnp, &newdir);
 -		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
 -		if (error) {
 -			if (doingdirectory && newparent) {
 -				dp->i_effnlink--;
 -				dp->i_nlink--;
 -				DIP_SET(dp, i_nlink, dp->i_nlink);
 -				dp->i_flag |= IN_CHANGE;
 -				if (DOINGSOFTDEP(tdvp))
 -					softdep_change_linkcnt(dp);
 -				(void)UFS_UPDATE(tdvp, 1);
 -			}
 +		ufs_makedirentry(fip, tcnp, &newdir);
 +		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
 +		if (error)
  			goto bad;
 -		}
 -		vput(tdvp);
 +		/* Setup tdvp for directory compaction if needed. */
 +		if (tdp->i_count && tdp->i_endoff &&
 +		    tdp->i_endoff < tdp->i_size)
 +			endoff = tdp->i_endoff;
  	} else {
 -		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
 +		if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
  			panic("ufs_rename: EXDEV");
  		/*
  		 * Short circuit rename(foo, foo).
  		 */
 -		if (xp->i_number == ip->i_number)
 +		if (tip->i_number == fip->i_number)
  			panic("ufs_rename: same file");
  		/*
  		 * If the parent directory is "sticky", then the caller
 @@ -1303,7 +1390,7 @@ abortit:
  		 * destination of the rename.  This implements append-only
  		 * directories.
  		 */
 -		if ((dp->i_mode & S_ISTXT) &&
 +		if ((tdp->i_mode & S_ISTXT) &&
  		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
  		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
  			error = EPERM;
 @@ -1314,9 +1401,9 @@ abortit:
  		 * to it. Also, ensure source and target are compatible
  		 * (both directories, or both not directories).
  		 */
 -		if ((xp->i_mode&IFMT) == IFDIR) {
 -			if ((xp->i_effnlink > 2) ||
 -			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
 +		if ((tip->i_mode & IFMT) == IFDIR) {
 +			if ((tip->i_effnlink > 2) ||
 +			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
  				error = ENOTEMPTY;
  				goto bad;
  			}
 @@ -1329,20 +1416,30 @@ abortit:
  			error = EISDIR;
  			goto bad;
  		}
 -		error = ufs_dirrewrite(dp, xp, ip->i_number,
 -		    IFTODT(ip->i_mode),
 -		    (doingdirectory && newparent) ? newparent : doingdirectory);
 -		if (error)
 -			goto bad;
  		if (doingdirectory) {
  			if (!newparent) {
 -				dp->i_effnlink--;
 +				tdp->i_effnlink--;
  				if (DOINGSOFTDEP(tdvp))
 -					softdep_change_linkcnt(dp);
 +					softdep_change_linkcnt(tdp);
  			}
 -			xp->i_effnlink--;
 +			tip->i_effnlink--;
  			if (DOINGSOFTDEP(tvp))
 -				softdep_change_linkcnt(xp);
 +				softdep_change_linkcnt(tip);
 +		}
 +		error = ufs_dirrewrite(tdp, tip, fip->i_number,
 +		    IFTODT(fip->i_mode),
 +		    (doingdirectory && newparent) ? newparent : doingdirectory);
 +		if (error) {
 +			if (doingdirectory) {
 +				if (!newparent) {
 +					tdp->i_effnlink++;
 +					if (DOINGSOFTDEP(tdvp))
 +						softdep_change_linkcnt(tdp);
 +				}
 +				tip->i_effnlink++;
 +				if (DOINGSOFTDEP(tvp))
 +					softdep_change_linkcnt(tip);
 +			}
  		}
  		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
  			/*
 @@ -1357,115 +1454,107 @@ abortit:
  			 * them now.
  			 */
  			if (!newparent) {
 -				dp->i_nlink--;
 -				DIP_SET(dp, i_nlink, dp->i_nlink);
 -				dp->i_flag |= IN_CHANGE;
 +				tdp->i_nlink--;
 +				DIP_SET(tdp, i_nlink, tdp->i_nlink);
 +				tdp->i_flag |= IN_CHANGE;
  			}
 -			xp->i_nlink--;
 -			DIP_SET(xp, i_nlink, xp->i_nlink);
 -			xp->i_flag |= IN_CHANGE;
 +			tip->i_nlink--;
 +			DIP_SET(tip, i_nlink, tip->i_nlink);
 +			tip->i_flag |= IN_CHANGE;
  			ioflag = IO_NORMAL;
  			if (!DOINGASYNC(tvp))
  				ioflag |= IO_SYNC;
 +			/* Don't go to bad here as the new link exists. */
  			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
  			    tcnp->cn_cred, tcnp->cn_thread)) != 0)
 -				goto bad;
 +				goto unlockout;
  		}
 -		vput(tdvp);
 -		vput(tvp);
 -		xp = NULL;
  	}
  
  	/*
 -	 * 3) Unlink the source.
 +	 * 3) Unlink the source.  We have to resolve the path again to
 +	 * fixup the directory offset and count for ufs_dirremove.
  	 */
 -	fcnp->cn_flags &= ~MODMASK;
 -	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 -	if ((fcnp->cn_flags & SAVESTART) == 0)
 -		panic("ufs_rename: lost from startdir");
 -	VREF(fdvp);
 -	error = relookup(fdvp, &fvp, fcnp);
 -	if (error == 0)
 -		vrele(fdvp);
 -	if (fvp != NULL) {
 -		xp = VTOI(fvp);
 -		dp = VTOI(fdvp);
 -	} else {
 -		/*
 -		 * From name has disappeared.  IN_RENAME is not sufficient
 -		 * to protect against directory races due to timing windows,
 -		 * so we have to remove the panic.  XXX the only real way
 -		 * to solve this issue is at a much higher level.  By the
 -		 * time we hit ufs_rename() it's too late.
 -		 */
 -#if 0
 -		if (doingdirectory)
 -			panic("ufs_rename: lost dir entry");
 -#endif
 -		vrele(ap->a_fvp);
 -		return (0);
 +	if (fdvp == tdvp) {
 +		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 +		if (error)
 +			panic("ufs_rename: from entry went away!");
 +		if (ino != fip->i_number)
 +			panic("ufs_rename: ino mismatch %d != %d\n", ino,
 +			    fip->i_number);
  	}
  	/*
 -	 * Ensure that the directory entry still exists and has not
 -	 * changed while the new name has been entered. If the source is
 -	 * a file then the entry may have been unlinked or renamed. In
 -	 * either case there is no further work to be done. If the source
 -	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
 -	 * flag ensures that it cannot be moved by another rename or removed
 -	 * by a rmdir.
 -	 */
 -	if (xp != ip) {
 -		/*
 -		 * From name resolves to a different inode.  IN_RENAME is
 -		 * not sufficient protection against timing window races
 -		 * so we can't panic here.  XXX the only real way
 -		 * to solve this issue is at a much higher level.  By the
 -		 * time we hit ufs_rename() it's too late.
 -		 */
 -#if 0
 -		if (doingdirectory)
 -			panic("ufs_rename: lost dir entry");
 -#endif
 -	} else {
 +	 * If the source is a directory with a
 +	 * new parent, the link count of the old
 +	 * parent directory must be decremented
 +	 * and ".." set to point to the new parent.
 +	 */
 +	if (doingdirectory && newparent) {
  		/*
 -		 * If the source is a directory with a
 -		 * new parent, the link count of the old
 -		 * parent directory must be decremented
 -		 * and ".." set to point to the new parent.
 +		 * If tip exists we simply use its link, otherwise we must
 +		 * add a new one.
  		 */
 -		if (doingdirectory && newparent) {
 -			xp->i_offset = mastertemplate.dot_reclen;
 -			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
 -			cache_purge(fdvp);
 -		}
 -		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
 -		xp->i_flag &= ~IN_RENAME;
 -	}
 -	if (dp)
 -		vput(fdvp);
 -	if (xp)
 -		vput(fvp);
 -	vrele(ap->a_fvp);
 +		if (tip == NULL) {
 +			tdp->i_effnlink++;
 +			tdp->i_nlink++;
 +			DIP_SET(tdp, i_nlink, tdp->i_nlink);
 +			tdp->i_flag |= IN_CHANGE;
 +			if (DOINGSOFTDEP(tdvp))
 +				softdep_change_linkcnt(tdp);
 +			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
 +						   DOINGASYNC(tdvp)));
 +			/* Don't go to bad here as the new link exists. */
 +			if (error)
 +				goto unlockout;
 +		}
 +		fip->i_offset = mastertemplate.dot_reclen;
 +		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
 +		cache_purge(fdvp);
 +	}
 +	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
 +
 +unlockout:
 +	vput(fdvp);
 +	vput(fvp);
 +	if (tvp)
 +		vput(tvp);
 +	/*
 +	 * If compaction or fsync was requested do it now that other locks
 +	 * are no longer needed.
 +	 */
 +	if (error == 0 && endoff != 0) {
 +#ifdef UFS_DIRHASH
 +		if (tdp->i_dirhash != NULL)
 +			ufsdirhash_dirtrunc(tdp, endoff);
 +#endif
 +		UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
 +		    td);
 +	}
 +	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
 +		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
 +	vput(tdvp);
 +	if (mp)
 +		vfs_unbusy(mp);
  	return (error);
  
  bad:
 -	if (xp)
 -		vput(ITOV(xp));
 -	vput(ITOV(dp));
 -out:
 -	if (doingdirectory)
 -		ip->i_flag &= ~IN_RENAME;
 -	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
 -		ip->i_effnlink--;
 -		ip->i_nlink--;
 -		DIP_SET(ip, i_nlink, ip->i_nlink);
 -		ip->i_flag |= IN_CHANGE;
 -		ip->i_flag &= ~IN_RENAME;
 -		if (DOINGSOFTDEP(fvp))
 -			softdep_change_linkcnt(ip);
 -		vput(fvp);
 -	} else
 -		vrele(fvp);
 +	fip->i_effnlink--;
 +	fip->i_nlink--;
 +	DIP_SET(fip, i_nlink, fip->i_nlink);
 +	fip->i_flag |= IN_CHANGE;
 +	if (DOINGSOFTDEP(fvp))
 +		softdep_change_linkcnt(fip);
 +	goto unlockout;
 +
 +releout:
 
 *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
 _______________________________________________
 svn-src-all at freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe at freebsd.org"
 


More information about the freebsd-fs mailing list