svn commit: r187838 - head/sys/fs/cd9660

John Baldwin jhb at FreeBSD.org
Wed Jan 28 10:54:58 PST 2009


Author: jhb
Date: Wed Jan 28 18:54:56 2009
New Revision: 187838
URL: http://svn.freebsd.org/changeset/base/187838

Log:
  Mark cd9660 MPSAFE and add support for using shared vnode locks during
  pathname lookups.
  - Remove 'i_offset' and 'i_ino' from the ISO node structure and replace
    them with local variables in the lookup routine instead.
  - Cache a copy of 'i_diroff' for use during a lookup in a local variable.
  - Save a copy of the found directory entry in a malloc'd buffer after a
    successfull lookup before getting the vnode.  This allows us to release
    the buffer holding the directory block before calling vget() which
    otherwise resulted in a LOR between "bufwait" and the vnode lock.
  - Use an inlined version of vn_vget_ino() to handle races with ..
    lookups.  I had to inline the code here since cd9660 uses an internal
    vget routine to save a disk I/O that would otherwise re-read the
    directory block.
  - Honor the requested locking flags during lookups to allow for shared
    locking.
  - Honor the requested locking flags passed to VFS_ROOT() and VFS_VGET()
    similar to UFS.
  - Don't make every ISO 9660 vnode hold a reference on the vnode of the
    underlying device vnode of the mountpoint.  The mountpoint already
    holds a suitable reference.

Modified:
  head/sys/fs/cd9660/cd9660_lookup.c
  head/sys/fs/cd9660/cd9660_node.c
  head/sys/fs/cd9660/cd9660_node.h
  head/sys/fs/cd9660/cd9660_vfsops.c

Modified: head/sys/fs/cd9660/cd9660_lookup.c
==============================================================================
--- head/sys/fs/cd9660/cd9660_lookup.c	Wed Jan 28 18:51:11 2009	(r187837)
+++ head/sys/fs/cd9660/cd9660_lookup.c	Wed Jan 28 18:54:56 2009	(r187838)
@@ -94,28 +94,33 @@ cd9660_lookup(ap)
 	struct iso_node *dp;		/* inode for directory being searched */
 	struct iso_mnt *imp;		/* filesystem that directory is in */
 	struct buf *bp;			/* a buffer of directory entries */
-	struct iso_directory_record *ep = 0;/* the current directory entry */
+	struct iso_directory_record *ep;/* the current directory entry */
+	struct iso_directory_record *ep2;/* copy of current directory entry */
 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
 	int saveoffset = 0;		/* offset of last directory entry in dir */
+	doff_t i_diroff;		/* cached i_diroff value. */
+	doff_t i_offset;		/* cached i_offset value. */
 	int numdirpasses;		/* strategy for directory search */
 	doff_t endsearch;		/* offset to end directory search */
 	struct vnode *pdp;		/* saved dp during symlink work */
 	struct vnode *tdp;		/* returned by cd9660_vget_internal */
 	u_long bmask;			/* block offset mask */
 	int error;
-	ino_t ino = 0, saved_ino;
-	int reclen;
+	ino_t ino, i_ino;
+	int ltype, reclen;
 	u_short namelen;
 	int isoflags;
 	char altname[NAME_MAX];
 	int res;
 	int assoc, len;
 	char *name;
+	struct mount *mp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 
+	ep2 = ep = NULL;
 	bp = NULL;
 	*vpp = NULL;
 	vdp = ap->a_dvp;
@@ -125,9 +130,11 @@ cd9660_lookup(ap)
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 */
-
+	ino = reclen = 0;
+	i_diroff = dp->i_diroff;
 	len = cnp->cn_namelen;
 	name = cnp->cn_nameptr;
+
 	/*
 	 * A leading `=' means, we are looking for an associated file
 	 */
@@ -149,15 +156,14 @@ cd9660_lookup(ap)
 	 * of simplicity.
 	 */
 	bmask = imp->im_bmask;
-	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
-	    dp->i_diroff > dp->i_size) {
+	if (nameiop != LOOKUP || i_diroff == 0 || i_diroff > dp->i_size) {
 		entryoffsetinblock = 0;
-		dp->i_offset = 0;
+		i_offset = 0;
 		numdirpasses = 1;
 	} else {
-		dp->i_offset = dp->i_diroff;
-		if ((entryoffsetinblock = dp->i_offset & bmask) &&
-		    (error = cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)))
+		i_offset = i_diroff;
+		if ((entryoffsetinblock = i_offset & bmask) &&
+		    (error = cd9660_blkatoff(vdp, (off_t)i_offset, NULL, &bp)))
 				return (error);
 		numdirpasses = 2;
 		nchstats.ncs_2passes++;
@@ -165,17 +171,17 @@ cd9660_lookup(ap)
 	endsearch = dp->i_size;
 
 searchloop:
-	while (dp->i_offset < endsearch) {
+	while (i_offset < endsearch) {
 		/*
 		 * If offset is on a block boundary,
 		 * read the next directory block.
 		 * Release previous if it exists.
 		 */
-		if ((dp->i_offset & bmask) == 0) {
+		if ((i_offset & bmask) == 0) {
 			if (bp != NULL)
 				brelse(bp);
 			if ((error =
-			    cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0)
+			    cd9660_blkatoff(vdp, (off_t)i_offset, NULL, &bp)) != 0)
 				return (error);
 			entryoffsetinblock = 0;
 		}
@@ -188,8 +194,8 @@ searchloop:
 		reclen = isonum_711(ep->length);
 		if (reclen == 0) {
 			/* skip to next block, if any */
-			dp->i_offset =
-			    (dp->i_offset & ~bmask) + imp->logical_block_size;
+			i_offset =
+			    (i_offset & ~bmask) + imp->logical_block_size;
 			continue;
 		}
 
@@ -224,7 +230,7 @@ searchloop:
 						 * Save directory entry's inode number and
 						 * release directory buffer.
 						 */
-						dp->i_ino = isodirino(ep, imp);
+						i_ino = isodirino(ep, imp);
 						goto found;
 					}
 					if (namelen != 1
@@ -241,7 +247,7 @@ searchloop:
 					else
 						ino = dbtob(bp->b_blkno)
 							+ entryoffsetinblock;
-					saveoffset = dp->i_offset;
+					saveoffset = i_offset;
 				} else if (ino)
 					goto foundino;
 #ifdef	NOSORTBUG	/* On some CDs directory entries are not sorted correctly */
@@ -257,22 +263,22 @@ searchloop:
 				ino = isodirino(ep, imp);
 			else
 				ino = dbtob(bp->b_blkno) + entryoffsetinblock;
-			dp->i_ino = ino;
-			cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+			i_ino = ino;
+			cd9660_rrip_getname(ep, altname, &namelen, &i_ino, imp);
 			if (namelen == cnp->cn_namelen
 			    && !bcmp(name,altname,namelen))
 				goto found;
 			ino = 0;
 			break;
 		}
-		dp->i_offset += reclen;
+		i_offset += reclen;
 		entryoffsetinblock += reclen;
 	}
 	if (ino) {
 foundino:
-		dp->i_ino = ino;
-		if (saveoffset != dp->i_offset) {
-			if (lblkno(imp, dp->i_offset) !=
+		i_ino = ino;
+		if (saveoffset != i_offset) {
+			if (lblkno(imp, i_offset) !=
 			    lblkno(imp, saveoffset)) {
 				if (bp != NULL)
 					brelse(bp);
@@ -283,7 +289,8 @@ foundino:
 			entryoffsetinblock = saveoffset & bmask;
 			ep = (struct iso_directory_record *)
 				((char *)bp->b_data + entryoffsetinblock);
-			dp->i_offset = saveoffset;
+			reclen = isonum_711(ep->length);
+			i_offset = saveoffset;
 		}
 		goto found;
 	}
@@ -294,8 +301,8 @@ notfound:
 	 */
 	if (numdirpasses == 2) {
 		numdirpasses--;
-		dp->i_offset = 0;
-		endsearch = dp->i_diroff;
+		i_offset = 0;
+		endsearch = i_diroff;
 		goto searchloop;
 	}
 	if (bp != NULL)
@@ -320,10 +327,10 @@ found:
 	 * in the cache as to where the entry was found.
 	 */
 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
-		dp->i_diroff = dp->i_offset;
+		dp->i_diroff = i_offset;
 
 	/*
-	 * Step through the translation in the name.  We do not `iput' the
+	 * Step through the translation in the name.  We do not `vput' the
 	 * directory because we may need it again if a symbolic link
 	 * is relative to the current directory.  Instead we save it
 	 * unlocked as "pdp".  We must get the target inode before unlocking
@@ -333,7 +340,7 @@ found:
 	 * when following backward pointers ".." we must unlock the
 	 * parent directory before getting the requested directory.
 	 * There is a potential race condition here if both the current
-	 * and parent directories are removed before the `iget' for the
+	 * and parent directories are removed before the `vget' for the
 	 * inode associated with ".." returns.  We hope that this occurs
 	 * infrequently since we cannot avoid this race condition without
 	 * implementing a sophisticated deadlock detection algorithm.
@@ -342,30 +349,75 @@ found:
 	 * that point backwards in the directory structure.
 	 */
 	pdp = vdp;
+
 	/*
-	 * If ino is different from dp->i_ino,
+	 * Make a copy of the directory entry for non "." lookups so
+	 * we can drop the buffer before calling vget() to avoid a
+	 * lock order reversal between the vnode lock and the buffer
+	 * lock.
+	 */
+	if (dp->i_number != i_ino) {
+		ep2 = malloc(reclen, M_TEMP, M_WAITOK);
+		bcopy(ep, ep2, reclen);
+		ep = ep2;
+	}
+	brelse(bp);
+
+	/*
+	 * If ino is different from i_ino,
 	 * it's a relocated directory.
 	 */
 	if (flags & ISDOTDOT) {
-		saved_ino = dp->i_ino;
-		VOP_UNLOCK(pdp, 0);	/* race to get the inode */
-		error = cd9660_vget_internal(vdp->v_mount, saved_ino,
-					     LK_EXCLUSIVE, &tdp,
-					     saved_ino != ino, ep);
-		brelse(bp);
-		vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY);
+		/*
+		 * Expanded copy of vn_vget_ino() so that we can use
+		 * cd9660_vget_internal().
+		 */
+		mp = pdp->v_mount;
+		ltype = VOP_ISLOCKED(pdp);
+		for (;;) {
+			error = vfs_busy(mp, MBF_NOWAIT);
+			if (error == 0)
+				break;
+			VOP_UNLOCK(pdp, 0);
+			pause("vn_vget", 1);
+			vn_lock(pdp, ltype | LK_RETRY);
+			if (pdp->v_iflag & VI_DOOMED)
+				return (ENOENT);
+		}
+		VOP_UNLOCK(pdp, 0);
+		error = cd9660_vget_internal(vdp->v_mount, i_ino,
+					     cnp->cn_lkflags, &tdp,
+					     i_ino != ino, ep);
+		free(ep2, M_TEMP);
+		vfs_unbusy(mp);
+		vn_lock(pdp, ltype | LK_RETRY);
+		if (pdp->v_iflag & VI_DOOMED) {
+			if (error == 0)
+				vput(tdp);
+			error = ENOENT;
+		}
 		if (error)
 			return (error);
 		*vpp = tdp;
-	} else if (dp->i_number == dp->i_ino) {
-		brelse(bp);
+	} else if (dp->i_number == i_ino) {
 		VREF(vdp);	/* we want ourself, ie "." */
+		/*
+		 * When we lookup "." we still can be asked to lock it
+		 * differently.
+		 */
+		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
+		if (ltype != VOP_ISLOCKED(vdp)) {
+			if (ltype == LK_EXCLUSIVE)
+				vn_lock(vdp, LK_UPGRADE | LK_RETRY);
+			else /* if (ltype == LK_SHARED) */
+				vn_lock(vdp, LK_DOWNGRADE | LK_RETRY);
+		}
 		*vpp = vdp;
 	} else {
-		error = cd9660_vget_internal(vdp->v_mount, dp->i_ino,
-					     LK_EXCLUSIVE, &tdp,
-					     dp->i_ino != ino, ep);
-		brelse(bp);
+		error = cd9660_vget_internal(vdp->v_mount, i_ino,
+					     cnp->cn_lkflags, &tdp,
+					     i_ino != ino, ep);
+		free(ep2, M_TEMP);
 		if (error)
 			return (error);
 		*vpp = tdp;

Modified: head/sys/fs/cd9660/cd9660_node.c
==============================================================================
--- head/sys/fs/cd9660/cd9660_node.c	Wed Jan 28 18:51:11 2009	(r187837)
+++ head/sys/fs/cd9660/cd9660_node.c	Wed Jan 28 18:54:56 2009	(r187838)
@@ -92,7 +92,6 @@ cd9660_reclaim(ap)
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
-	struct iso_node *ip = VTOI(vp);
 
 	if (prtactive && vrefcnt(vp) != 0)
 		vprint("cd9660_reclaim: pushing active", vp);
@@ -108,8 +107,6 @@ cd9660_reclaim(ap)
 	/*
 	 * Purge old data structures associated with the inode.
 	 */
-	if (ip->i_mnt->im_devvp)
-		vrele(ip->i_mnt->im_devvp);
 	free(vp->v_data, M_ISOFSNODE);
 	vp->v_data = NULL;
 	return (0);

Modified: head/sys/fs/cd9660/cd9660_node.h
==============================================================================
--- head/sys/fs/cd9660/cd9660_node.h	Wed Jan 28 18:51:11 2009	(r187837)
+++ head/sys/fs/cd9660/cd9660_node.h	Wed Jan 28 18:54:56 2009	(r187838)
@@ -64,8 +64,6 @@ struct iso_node {
 	struct	lockf *i_lockf;	/* head of byte-level lock list */
 	doff_t	i_endoff;	/* end of useful stuff in directory */
 	doff_t	i_diroff;	/* offset in dir, where we found last entry */
-	doff_t	i_offset;	/* offset of free space in directory */
-	ino_t	i_ino;		/* inode number of found directory */
 
 	long iso_extent;	/* extent of file */
 	unsigned long i_size;

Modified: head/sys/fs/cd9660/cd9660_vfsops.c
==============================================================================
--- head/sys/fs/cd9660/cd9660_vfsops.c	Wed Jan 28 18:51:11 2009	(r187837)
+++ head/sys/fs/cd9660/cd9660_vfsops.c	Wed Jan 28 18:54:56 2009	(r187838)
@@ -375,6 +375,7 @@ iso_mountfs(devvp, mp)
 	mp->mnt_maxsymlinklen = 0;
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED;
 	MNT_IUNLOCK(mp);
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
@@ -545,7 +546,7 @@ cd9660_root(mp, flags, vpp, td)
 	 * With RRIP we must use the `.' entry of the root directory.
 	 * Simply tell vget, that it's a relocated directory.
 	 */
-	return (cd9660_vget_internal(mp, ino, LK_EXCLUSIVE, vpp,
+	return (cd9660_vget_internal(mp, ino, flags, vpp,
 	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
 }
 
@@ -659,6 +660,22 @@ cd9660_vget_internal(mp, ino, flags, vpp
 	if (error || *vpp != NULL)
 		return (error);
 
+	/*
+	 * We must promote to an exclusive lock for vnode creation.  This
+	 * can happen if lookup is passed LOCKSHARED.
+ 	 */
+	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
+		flags &= ~LK_TYPE_MASK;
+		flags |= LK_EXCLUSIVE;
+	}
+
+	/*
+	 * We do not lock vnode creation as it is believed to be too
+	 * expensive for such rare case as simultaneous creation of vnode
+	 * for same ino by different processes. We just allow them to race
+	 * and check later to decide who wins. Let the race begin!
+	 */
+
 	imp = VFSTOISOFS(mp);
 	dev = imp->im_dev;
 
@@ -739,7 +756,6 @@ cd9660_vget_internal(mp, ino, flags, vpp
 		bp = 0;
 
 	ip->i_mnt = imp;
-	VREF(imp->im_devvp);
 
 	if (relocated) {
 		/*
@@ -797,6 +813,7 @@ cd9660_vget_internal(mp, ino, flags, vpp
 		vp->v_op = &cd9660_fifoops;
 		break;
 	default:
+		VN_LOCK_ASHARE(vp);
 		break;
 	}
 


More information about the svn-src-all mailing list