svn commit: r238553 - stable/8/sys/fs/tmpfs
Xin LI
delphij at FreeBSD.org
Tue Jul 17 17:34:49 UTC 2012
Author: delphij
Date: Tue Jul 17 17:34:48 2012
New Revision: 238553
URL: http://svn.freebsd.org/changeset/base/238553
Log:
MFC the following revisions per request from gleb@:
r197953: Add locking around access to parent node, and bail out when
the parent node is already freed rather than panicking the system.
r227822: Avoid panics from recursive rename operations. Not a perfect
patch but good enough for now.
r232959: Don't enforce LK_RETRY to get existing vnode in
tmpfs_alloc_vp().
Doomed vnode is hardly of any use here, besides all callers handle error
case. vfs_hash_get() does the same.
Don't mess with vnode holdcount, vget() takes care of it already.
r232960: Prevent tmpfs_rename() deadlock in a way similar to UFS
Unlock vnodes and try to lock them one by one. Relookup fvp and tvp.
Modified:
stable/8/sys/fs/tmpfs/tmpfs.h
stable/8/sys/fs/tmpfs/tmpfs_subr.c
stable/8/sys/fs/tmpfs/tmpfs_vnops.c
Directory Properties:
stable/8/sys/ (props changed)
Modified: stable/8/sys/fs/tmpfs/tmpfs.h
==============================================================================
--- stable/8/sys/fs/tmpfs/tmpfs.h Tue Jul 17 14:36:40 2012 (r238552)
+++ stable/8/sys/fs/tmpfs/tmpfs.h Tue Jul 17 17:34:48 2012 (r238553)
@@ -304,10 +304,30 @@ LIST_HEAD(tmpfs_node_list, tmpfs_node);
#define TMPFS_NODE_LOCK(node) mtx_lock(&(node)->tn_interlock)
#define TMPFS_NODE_UNLOCK(node) mtx_unlock(&(node)->tn_interlock)
-#define TMPFS_NODE_MTX(node) (&(node)->tn_interlock)
+#define TMPFS_NODE_MTX(node) (&(node)->tn_interlock)
+
+#ifdef INVARIANTS
+#define TMPFS_ASSERT_LOCKED(node) do { \
+ MPASS(node != NULL); \
+ MPASS(node->tn_vnode != NULL); \
+ if (!VOP_ISLOCKED(node->tn_vnode) && \
+ !mtx_owned(TMPFS_NODE_MTX(node))) \
+ panic("tmpfs: node is not locked: %p", node); \
+ } while (0)
+#define TMPFS_ASSERT_ELOCKED(node) do { \
+ MPASS((node) != NULL); \
+ MPASS((node)->tn_vnode != NULL); \
+ mtx_assert(TMPFS_NODE_MTX(node), MA_OWNED); \
+ ASSERT_VOP_LOCKED((node)->tn_vnode, "tmpfs"); \
+ } while (0)
+#else
+#define TMPFS_ASSERT_LOCKED(node) (void)0
+#define TMPFS_ASSERT_ELOCKED(node) (void)0
+#endif
#define TMPFS_VNODE_ALLOCATING 1
#define TMPFS_VNODE_WANT 2
+#define TMPFS_VNODE_DOOMED 4
/* --------------------------------------------------------------------- */
/*
Modified: stable/8/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================
--- stable/8/sys/fs/tmpfs/tmpfs_subr.c Tue Jul 17 14:36:40 2012 (r238552)
+++ stable/8/sys/fs/tmpfs/tmpfs_subr.c Tue Jul 17 17:34:48 2012 (r238553)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/stat.h>
#include <sys/systm.h>
+#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
@@ -55,6 +56,8 @@ __FBSDID("$FreeBSD$");
#include <fs/tmpfs/tmpfs_fifoops.h>
#include <fs/tmpfs/tmpfs_vnops.h>
+SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system");
+
/* --------------------------------------------------------------------- */
/*
@@ -124,7 +127,9 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
nnode->tn_dir.tn_readdir_lastn = 0;
nnode->tn_dir.tn_readdir_lastp = NULL;
nnode->tn_links++;
+ TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
nnode->tn_dir.tn_parent->tn_links++;
+ TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent);
break;
case VFIFO:
@@ -187,6 +192,7 @@ tmpfs_free_node(struct tmpfs_mount *tmp,
#ifdef INVARIANTS
TMPFS_NODE_LOCK(node);
MPASS(node->tn_vnode == NULL);
+ MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0);
TMPFS_NODE_UNLOCK(node);
#endif
@@ -314,11 +320,14 @@ tmpfs_alloc_vp(struct mount *mp, struct
loop:
TMPFS_NODE_LOCK(node);
if ((vp = node->tn_vnode) != NULL) {
+ MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0);
VI_LOCK(vp);
TMPFS_NODE_UNLOCK(node);
- vholdl(vp);
- (void) vget(vp, lkflag | LK_INTERLOCK | LK_RETRY, curthread);
- vdrop(vp);
+ error = vget(vp, lkflag | LK_INTERLOCK, curthread);
+ if (error != 0) {
+ vp = NULL;
+ goto out;
+ }
/*
* Make sure the vnode is still there after
@@ -332,6 +341,14 @@ loop:
goto out;
}
+ if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) ||
+ (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) {
+ TMPFS_NODE_UNLOCK(node);
+ error = ENOENT;
+ vp = NULL;
+ goto out;
+ }
+
/*
* otherwise lock the vp list while we call getnewvnode
* since that can block.
@@ -377,6 +394,7 @@ loop:
vp->v_op = &tmpfs_fifoop_entries;
break;
case VDIR:
+ MPASS(node->tn_dir.tn_parent != NULL);
if (node->tn_dir.tn_parent == node)
vp->v_vflag |= VV_ROOT;
break;
@@ -407,11 +425,13 @@ unlock:
out:
*vpp = vp;
- MPASS(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
#ifdef INVARIANTS
- TMPFS_NODE_LOCK(node);
- MPASS(*vpp == node->tn_vnode);
- TMPFS_NODE_UNLOCK(node);
+ if (error == 0) {
+ MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp));
+ TMPFS_NODE_LOCK(node);
+ MPASS(*vpp == node->tn_vnode);
+ TMPFS_NODE_UNLOCK(node);
+ }
#endif
return error;
@@ -430,10 +450,9 @@ tmpfs_free_vp(struct vnode *vp)
node = VP_TO_TMPFS_NODE(vp);
- TMPFS_NODE_LOCK(node);
+ mtx_assert(TMPFS_NODE_MTX(node), MA_OWNED);
node->tn_vnode = NULL;
vp->v_data = NULL;
- TMPFS_NODE_UNLOCK(node);
}
/* --------------------------------------------------------------------- */
@@ -657,7 +676,18 @@ tmpfs_dir_getdotdotdent(struct tmpfs_nod
TMPFS_VALIDATE_DIR(node);
MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
+ /*
+ * Return ENOENT if the current node is already removed.
+ */
+ TMPFS_ASSERT_LOCKED(node);
+ if (node->tn_dir.tn_parent == NULL) {
+ return (ENOENT);
+ }
+
+ TMPFS_NODE_LOCK(node->tn_dir.tn_parent);
dent.d_fileno = node->tn_dir.tn_parent->tn_id;
+ TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent);
+
dent.d_type = DT_DIR;
dent.d_namlen = 2;
dent.d_name[0] = '.';
Modified: stable/8/sys/fs/tmpfs/tmpfs_vnops.c
==============================================================================
--- stable/8/sys/fs/tmpfs/tmpfs_vnops.c Tue Jul 17 14:36:40 2012 (r238552)
+++ stable/8/sys/fs/tmpfs/tmpfs_vnops.c Tue Jul 17 17:34:48 2012 (r238553)
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sf_buf.h>
#include <sys/stat.h>
#include <sys/systm.h>
+#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <sys/vnode.h>
@@ -60,6 +61,13 @@ __FBSDID("$FreeBSD$");
#include <fs/tmpfs/tmpfs_vnops.h>
#include <fs/tmpfs/tmpfs.h>
+SYSCTL_DECL(_vfs_tmpfs);
+
+static volatile int tmpfs_rename_restarts;
+SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
+ __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
+ "Times rename had to restart due to lock contention");
+
/* --------------------------------------------------------------------- */
static int
@@ -86,6 +94,11 @@ tmpfs_lookup(struct vop_cachedlookup_arg
dnode->tn_dir.tn_parent == dnode,
!(cnp->cn_flags & ISDOTDOT)));
+ TMPFS_ASSERT_LOCKED(dnode);
+ if (dnode->tn_dir.tn_parent == NULL) {
+ error = ENOENT;
+ goto out;
+ }
if (cnp->cn_flags & ISDOTDOT) {
int ltype = 0;
@@ -893,6 +906,139 @@ out:
/* --------------------------------------------------------------------- */
+/*
+ * We acquire all but fdvp locks using non-blocking acquisitions. If we
+ * fail to acquire any lock in the path we will drop all held locks,
+ * acquire the new lock in a blocking fashion, and then release it and
+ * restart the rename. This acquire/release step ensures that we do not
+ * spin on a lock waiting for release. On error release all vnode locks
+ * and decrement references the way tmpfs_rename() would do.
+ */
+static int
+tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
+ struct vnode *tdvp, struct vnode **tvpp,
+ struct componentname *fcnp, struct componentname *tcnp)
+{
+ struct vnode *nvp;
+ struct mount *mp;
+ struct tmpfs_dirent *de;
+ int error, restarts = 0;
+
+ VOP_UNLOCK(tdvp, 0);
+ if (*tvpp != NULL && *tvpp != tdvp)
+ VOP_UNLOCK(*tvpp, 0);
+ mp = fdvp->v_mount;
+
+relock:
+ restarts += 1;
+ error = vn_lock(fdvp, LK_EXCLUSIVE);
+ if (error)
+ goto releout;
+ if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+ VOP_UNLOCK(fdvp, 0);
+ error = vn_lock(tdvp, LK_EXCLUSIVE);
+ if (error)
+ goto releout;
+ VOP_UNLOCK(tdvp, 0);
+ goto relock;
+ }
+ /*
+ * Re-resolve fvp to be certain it still exists and fetch the
+ * correct vnode.
+ */
+ de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
+ if (de == NULL) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
+ (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
+ error = EINVAL;
+ else
+ error = ENOENT;
+ goto releout;
+ }
+ error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+ if (error != 0) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ if (error != EBUSY)
+ goto releout;
+ error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
+ if (error != 0)
+ goto releout;
+ VOP_UNLOCK(nvp, 0);
+ /*
+ * Concurrent rename race.
+ */
+ if (nvp == tdvp) {
+ vrele(nvp);
+ error = EINVAL;
+ goto releout;
+ }
+ vrele(*fvpp);
+ *fvpp = nvp;
+ goto relock;
+ }
+ vrele(*fvpp);
+ *fvpp = nvp;
+ VOP_UNLOCK(*fvpp, 0);
+ /*
+ * Re-resolve tvp and acquire the vnode lock if present.
+ */
+ de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
+ /*
+ * If tvp disappeared we just carry on.
+ */
+ if (de == NULL && *tvpp != NULL) {
+ vrele(*tvpp);
+ *tvpp = NULL;
+ }
+ /*
+ * Get the tvp ino if the lookup succeeded. We may have to restart
+ * if the non-blocking acquire fails.
+ */
+ if (de != NULL) {
+ nvp = NULL;
+ error = tmpfs_alloc_vp(mp, de->td_node,
+ LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+ if (*tvpp != NULL)
+ vrele(*tvpp);
+ *tvpp = nvp;
+ if (error != 0) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ if (error != EBUSY)
+ goto releout;
+ error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
+ &nvp);
+ if (error != 0)
+ goto releout;
+ VOP_UNLOCK(nvp, 0);
+ /*
+ * fdvp contains fvp, thus tvp (=fdvp) is not empty.
+ */
+ if (nvp == fdvp) {
+ error = ENOTEMPTY;
+ goto releout;
+ }
+ goto relock;
+ }
+ }
+ tmpfs_rename_restarts += restarts;
+
+ return (0);
+
+releout:
+ vrele(fdvp);
+ vrele(*fvpp);
+ vrele(tdvp);
+ if (*tvpp != NULL)
+ vrele(*tvpp);
+ tmpfs_rename_restarts += restarts;
+
+ return (error);
+}
+
static int
tmpfs_rename(struct vop_rename_args *v)
{
@@ -902,10 +1048,12 @@ tmpfs_rename(struct vop_rename_args *v)
struct vnode *tdvp = v->a_tdvp;
struct vnode *tvp = v->a_tvp;
struct componentname *tcnp = v->a_tcnp;
+ struct mount *mp = NULL;
char *newname;
int error;
struct tmpfs_dirent *de;
+ struct tmpfs_mount *tmp;
struct tmpfs_node *fdnode;
struct tmpfs_node *fnode;
struct tmpfs_node *tnode;
@@ -916,8 +1064,6 @@ tmpfs_rename(struct vop_rename_args *v)
MPASS(fcnp->cn_flags & HASBUF);
MPASS(tcnp->cn_flags & HASBUF);
- tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
-
/* Disallow cross-device renames.
* XXX Why isn't this done by the caller? */
if (fvp->v_mount != tdvp->v_mount ||
@@ -926,8 +1072,6 @@ tmpfs_rename(struct vop_rename_args *v)
goto out;
}
- tdnode = VP_TO_TMPFS_DIR(tdvp);
-
/* If source and target are the same file, there is nothing to do. */
if (fvp == tvp) {
error = 0;
@@ -936,11 +1080,37 @@ tmpfs_rename(struct vop_rename_args *v)
/* If we need to move the directory between entries, lock the
* source so that we can safely operate on it. */
- if (tdvp != fdvp) {
- error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
- if (error != 0)
- goto out;
+ if (fdvp != tdvp && fdvp != tvp) {
+ if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+ mp = tdvp->v_mount;
+ error = vfs_busy(mp, 0);
+ if (error != 0) {
+ mp = NULL;
+ goto out;
+ }
+ error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
+ fcnp, tcnp);
+ if (error != 0) {
+ vfs_unbusy(mp);
+ return (error);
+ }
+ ASSERT_VOP_ELOCKED(fdvp,
+ "tmpfs_rename: fdvp not locked");
+ ASSERT_VOP_ELOCKED(tdvp,
+ "tmpfs_rename: tdvp not locked");
+ if (tvp != NULL)
+ ASSERT_VOP_ELOCKED(tvp,
+ "tmpfs_rename: tvp not locked");
+ if (fvp == tvp) {
+ error = 0;
+ goto out_locked;
+ }
+ }
}
+
+ tmp = VFS_TO_TMPFS(tdvp->v_mount);
+ tdnode = VP_TO_TMPFS_DIR(tdvp);
+ tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
fdnode = VP_TO_TMPFS_DIR(fdvp);
fnode = VP_TO_TMPFS_NODE(fvp);
de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
@@ -1014,25 +1184,63 @@ tmpfs_rename(struct vop_rename_args *v)
* directory being moved. Otherwise, we'd end up
* with stale nodes. */
n = tdnode;
+ /* TMPFS_LOCK garanties that no nodes are freed while
+ * traversing the list. Nodes can only be marked as
+ * removed: tn_parent == NULL. */
+ TMPFS_LOCK(tmp);
+ TMPFS_NODE_LOCK(n);
while (n != n->tn_dir.tn_parent) {
+ struct tmpfs_node *parent;
+
if (n == fnode) {
+ TMPFS_NODE_UNLOCK(n);
+ TMPFS_UNLOCK(tmp);
error = EINVAL;
if (newname != NULL)
free(newname, M_TMPFSNAME);
goto out_locked;
}
- n = n->tn_dir.tn_parent;
+ parent = n->tn_dir.tn_parent;
+ TMPFS_NODE_UNLOCK(n);
+ if (parent == NULL) {
+ n = NULL;
+ break;
+ }
+ TMPFS_NODE_LOCK(parent);
+ if (parent->tn_dir.tn_parent == NULL) {
+ TMPFS_NODE_UNLOCK(parent);
+ n = NULL;
+ break;
+ }
+ n = parent;
}
+ TMPFS_UNLOCK(tmp);
+ if (n == NULL) {
+ error = EINVAL;
+ if (newname != NULL)
+ free(newname, M_TMPFSNAME);
+ goto out_locked;
+ }
+ TMPFS_NODE_UNLOCK(n);
/* Adjust the parent pointer. */
TMPFS_VALIDATE_DIR(fnode);
+ TMPFS_NODE_LOCK(de->td_node);
de->td_node->tn_dir.tn_parent = tdnode;
+ TMPFS_NODE_UNLOCK(de->td_node);
/* As a result of changing the target of the '..'
* entry, the link count of the source and target
* directories has to be adjusted. */
- fdnode->tn_links--;
+ TMPFS_NODE_LOCK(tdnode);
+ TMPFS_ASSERT_LOCKED(tdnode);
tdnode->tn_links++;
+ TMPFS_NODE_UNLOCK(tdnode);
+
+ TMPFS_NODE_LOCK(fdnode);
+ TMPFS_ASSERT_LOCKED(fdnode);
+ fdnode->tn_links--;
+ TMPFS_NODE_UNLOCK(fdnode);
}
/* Do the move: just remove the entry from the source directory
@@ -1078,7 +1286,7 @@ tmpfs_rename(struct vop_rename_args *v)
error = 0;
out_locked:
- if (fdnode != tdnode)
+ if (fdvp != tdvp && fdvp != tvp)
VOP_UNLOCK(fdvp, 0);
out:
@@ -1096,6 +1304,9 @@ out:
vrele(fdvp);
vrele(fvp);
+ if (mp != NULL)
+ vfs_unbusy(mp);
+
return error;
}
@@ -1166,17 +1377,28 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
goto out;
}
+
/* Detach the directory entry from the directory (dnode). */
tmpfs_dir_detach(dvp, de);
if (v->a_cnp->cn_flags & DOWHITEOUT)
tmpfs_dir_whiteout_add(dvp, v->a_cnp);
+ /* No vnode should be allocated for this entry from this point */
+ TMPFS_NODE_LOCK(node);
+ TMPFS_ASSERT_ELOCKED(node);
node->tn_links--;
+ node->tn_dir.tn_parent = NULL;
node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
TMPFS_NODE_MODIFIED;
- node->tn_dir.tn_parent->tn_links--;
- node->tn_dir.tn_parent->tn_status |= TMPFS_NODE_ACCESSED | \
+
+ TMPFS_NODE_UNLOCK(node);
+
+ TMPFS_NODE_LOCK(dnode);
+ TMPFS_ASSERT_ELOCKED(dnode);
+ dnode->tn_links--;
+ dnode->tn_status |= TMPFS_NODE_ACCESSED | \
TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
+ TMPFS_NODE_UNLOCK(dnode);
cache_purge(dvp);
cache_purge(vp);
@@ -1364,13 +1586,21 @@ tmpfs_reclaim(struct vop_reclaim_args *v
vnode_destroy_vobject(vp);
cache_purge(vp);
+
+ TMPFS_NODE_LOCK(node);
+ TMPFS_ASSERT_ELOCKED(node);
tmpfs_free_vp(vp);
/* If the node referenced by this vnode was deleted by the user,
* we must free its associated data structures (now that the vnode
* is being reclaimed). */
- if (node->tn_links == 0)
+ if (node->tn_links == 0 &&
+ (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
+ node->tn_vpstate = TMPFS_VNODE_DOOMED;
+ TMPFS_NODE_UNLOCK(node);
tmpfs_free_node(tmp, node);
+ } else
+ TMPFS_NODE_UNLOCK(node);
MPASS(vp->v_data == NULL);
return 0;
More information about the svn-src-stable
mailing list