RELENG_5 panic [nic: _mtx_lock_sleep: recursed on non-recursivemutex nfsd_mtx]

Robert Watson rwatson at freebsd.org
Mon Oct 18 04:01:50 PDT 2004


On Mon, 18 Oct 2004, Robert Watson wrote:

> If this is timing-related, it could be KTR disrupts the timing
> sufficiently to prevent it.  More likely, it went away because the work
> load on the NFS server changed, or the directory layout changed, or some
> other factor that caused the passage through the NFS code to change. 

Actually, I think I found it.  Did the drive or partition layout in your
NFS server change recently?  If so, that would cause the fsid of the file
system to change, resulting in ESTALE errors being returned to the client; 
I found one or two nits in the error handling for vfs_getvfs()  that would
result in the problem you saw.  The attached patch should correct, as well
as add additional assertions to catch related bugs; note that this patch
appears to work fine in my environment, but that the additional assertions
could detect bugs I'm not seeing in my environment for similar reasons to
the ones that prevented me from seeing the one you just reported. 

FYI, when the last client referencing the old FSID was rebooted, the
problem would have gone away as that set of ESTALE conditions would no
longer be triggered.

Robert N M Watson             FreeBSD Core Team, TrustedBSD Projects
robert at fledge.watson.org      Principal Research Scientist, McAfee Research

> >  - aW
> > 
> > 
> > 
> > 	0n Sun, Oct 17, 2004 at 10:19:34AM -0400, Robert Watson wrote: 
> > 	
> > 	On Fri, 15 Oct 2004, Wilkinson, Alex wrote:
> > 	
> > 	> Hi all,
> > 	> 
> > 	> I currently get a panic with "nfs_server_enable=YES" in /etc/rc.conf.
> > 	> 
> > 	> OS: FreeBSD 5.3-BETA4 #2: Tue Sep 14 13:55:30 UTC 2004
> > 	> 
> > 	> Backtrace
> > 	> ---------
> > 	> 
> > 	> panic: _mtx_lock_sleep: recursed on non-recursive mutex nfsd_mtx @ /usr /src/sys/nfsserver/nfs_serv.c:1947
> > 	
> > 	Is the NFS server code compiled into your kernel, or is it getting loaded
> > 	as a module?  Do you have any other NFS-related entries in /etc/rc.conf?
> > 	Could you show the output of "show locks" and "show witness" with witness
> > 	compiled into the kernel?
> > 	
> > 	Thanks!
> > 	
> > 	Robert N M Watson             FreeBSD Core Team, TrustedBSD Projects
> > 	robert at fledge.watson.org      Principal Research Scientist, McAfee Research
> > 	
> > 	
> > 	> cpuid = 0
> > 	> KDB: enter: panic
> > 	> [thread 100074]
> > 	> Stopped at      kdb_enter+0x32: leave
> > 	> db> tr
> > 	> kdb_enter(c068ba66,0,c068aed8,dd2ed90c,c1b6b340) at kdb_enter+0x32
> > 	> panic(c068aed8,c069885f,c0698617,79b,c0698617) at panic+0x1b0
> > 	> _mtx_lock_sleep(c071c940,c1b6b340,0,c0698617,79b) at _mtx_lock_sleep+0x16e
> > 	> _mtx_lock_flags(c071c940,0,c0698617,79b,0) at _mtx_lock_flags+0xb0
> > 	> nfsrv_create(c1eb6800,c1b98800,c1b6b340,dd2edc8c,0) at nfsrv_create+0x8c4
> > 	> nfssvc(c1b6b340,dd2edd14,8,0,2) at nfssvc+0x6ea
> > 	> syscall(2f,2f,2f,0,0) at syscall+0x13b
> > 	> Xint0x80_syscall() at Xint0x80_syscall+0x1f
> > 	> --- syscall (155, FreeBSD ELF32, nfssvc), eip = 0x280c60bf, esp = 0xbfbfeb1c, eb p = 0xbfbfeb38 ---
> > 	> 
> > 	> 
> > 	>  - aW
> > 	> _______________________________________________
> > 	> freebsd-current at freebsd.org mailing list
> > 	> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> > 	> To unsubscribe, send any mail to "freebsd-current-unsubscribe at freebsd.org"
> > 	> 
> > 	
> > _______________________________________________
> > freebsd-current at freebsd.org mailing list
> > http://lists.freebsd.org/mailman/listinfo/freebsd-current
> > To unsubscribe, send any mail to "freebsd-current-unsubscribe at freebsd.org"
> > 
> 
> _______________________________________________
> freebsd-current at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe at freebsd.org"
> 
-------------- next part --------------
Index: nfs_serv.c
===================================================================
RCS file: /home/ncvs/src/sys/nfsserver/nfs_serv.c,v
retrieving revision 1.148
diff -u -r1.148 nfs_serv.c
--- nfs_serv.c	25 Aug 2004 16:52:59 -0000	1.148
+++ nfs_serv.c	18 Oct 2004 09:53:52 -0000
@@ -225,6 +225,7 @@
 	tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(nfsmode);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -285,6 +286,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -412,6 +414,7 @@
 		mtx_unlock(&Giant);	/* VFS */
 		NFSD_LOCK();
 	}
+	NFSD_LOCK_ASSERT();
 
 	/*
 	 * If the size is being changed write acces is required, otherwise
@@ -439,6 +442,7 @@
 	if (!error)
 		error = postat_ret;
 out:
+	NFSD_LOCK_ASSERT();
 	if (vp != NULL) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -460,6 +464,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (vp)
@@ -653,6 +658,7 @@
 	}
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (dirp)
@@ -771,6 +777,7 @@
 	mb->m_next = mp3;
 	mp3 = NULL;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (mp3)
 		m_freem(mp3);
 	if (vp) {
@@ -1040,6 +1047,7 @@
 	}
 	*tl = txdr_unsigned(cnt);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -1242,6 +1250,7 @@
 	if (!error)
 		error = aftat_ret;
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
 		2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
 	if (v3) {
@@ -1275,6 +1284,7 @@
 	}
 	error = 0;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (vp)
@@ -1386,6 +1396,7 @@
 	    }
 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
 nfsmout:
+		NFSD_LOCK_ASSERT();
 		m_freem(mrep);
 		error = EIO;
 		nfsm_writereply(2 * NFSX_UNSIGNED);
@@ -1719,7 +1730,7 @@
 	nfsm_srvmtofh(fhp);
 	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 		error = ESTALE;
-		goto ereply;
+		goto ereply_locked;
 	}
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
@@ -1943,8 +1954,11 @@
 		}
 	}
 ereply:
+	NFSD_UNLOCK_ASSERT();
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
+ereply_locked:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
 	if (v3) {
 		if (!error) {
@@ -1961,6 +1975,7 @@
 	error = 0;
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (nd.ni_startdir) {
@@ -2116,6 +2131,7 @@
 	 * send response, cleanup, return.
 	 */
 out:
+	NFSD_UNLOCK_ASSERT();
 	if (nd.ni_startdir) {
 		vrele(nd.ni_startdir);
 		nd.ni_startdir = NULL;
@@ -2146,9 +2162,10 @@
 		diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 		VOP_UNLOCK(dirp, 0, td);
 	}
-ereply:
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
+ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
 	if (v3) {
 		if (!error) {
@@ -2164,6 +2181,7 @@
 	NFSD_LOCK();
 	return (0);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (dirp)
@@ -2249,6 +2267,7 @@
 			goto out;
 		}
 out:
+		NFSD_UNLOCK_ASSERT();
 		if (!error) {
 			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -2280,12 +2299,14 @@
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_WCCDATA(v3));
 	if (v3) {
 		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 		error = 0;
 	}
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -2397,8 +2418,11 @@
 		vrele(tdirp);
 		tdirp = NULL;
 	}
-	if (error)
+	if (error) {
+		mtx_unlock(&Giant);	/* VFS */
+		NFSD_LOCK();
 		goto out1;
+	}
 
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
@@ -2455,6 +2479,7 @@
 	      fromnd.ni_cnd.cn_namelen))
 		error = -1;
 out:
+	NFSD_UNLOCK_ASSERT();
 	if (!error) {
 		/*
 		 * The VOP_RENAME function releases all vnode references &
@@ -2477,9 +2502,10 @@
 	}
 	/* fall through */
 
-out1:
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
+out1:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(2 * NFSX_WCCDATA(v3));
 	if (v3) {
 		/* Release existing locks to prevent deadlock. */
@@ -2518,6 +2544,7 @@
 	/*
 	 * Clear out tond related fields
 	 */
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (tdirp)
@@ -2680,6 +2707,7 @@
 	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 	if (v3) {
 		nfsm_srvpostop_attr(getret, &at);
@@ -2689,6 +2717,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -2744,6 +2773,8 @@
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
 	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
+		NFSD_UNLOCK();
+		mtx_lock(&Giant);	/* VFS */
 		error = ESTALE;
 		goto out;
 	}
@@ -2841,6 +2872,7 @@
 	    }
 	}
 out:
+	NFSD_UNLOCK_ASSERT();
 	/*
 	 * These releases aren't strictly required, does even doing them
 	 * make any sense? XXX can nfsm_reply() block?
@@ -2872,6 +2904,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -2930,6 +2963,8 @@
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
 	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
+		NFSD_UNLOCK();
+		mtx_lock(&Giant);	/* VFS */
 		error = ESTALE;
 		goto out;
 	}
@@ -3004,6 +3039,7 @@
 			error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 	}
 out:
+	NFSD_UNLOCK_ASSERT();
 	if (dirp) {
 		if (dirp == nd.ni_dvp) {
 			diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
@@ -3048,6 +3084,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (dirp)
@@ -3152,6 +3189,7 @@
 	 * Issue or abort op.  Since SAVESTART is not set, path name
 	 * component is freed by the VOP after either.
 	 */
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (!error)
@@ -3187,6 +3225,7 @@
 	/* fall through */
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -3356,6 +3395,7 @@
 	 */
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
+	NFSD_UNLOCK_ASSERT();
 	iv.iov_base = rbuf;
 	iv.iov_len = fullsiz;
 	io.uio_iov = &iv;
@@ -3556,6 +3596,7 @@
 	FREE((caddr_t)cookies, M_TEMP);
 
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -3664,6 +3705,7 @@
 	VOP_UNLOCK(vp, 0, td);
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
+	NFSD_UNLOCK_ASSERT();
 	iv.iov_base = rbuf;
 	iv.iov_len = fullsiz;
 	io.uio_iov = &iv;
@@ -3897,6 +3939,7 @@
 			}
 		}
 invalid:
+		NFSD_UNLOCK_ASSERT();
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
@@ -3923,6 +3966,7 @@
 	FREE((caddr_t)cookies, M_TEMP);
 	FREE((caddr_t)rbuf, M_TEMP);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -4081,6 +4125,7 @@
 	vp = NULL;
 	NFSD_LOCK();
 ereply:
+	NFSD_LOCK_ASSERT();
 	nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
 	nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 	if (!error) {
@@ -4093,6 +4138,7 @@
 		error = 0;
 	}
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	if (vp)
@@ -4194,6 +4240,7 @@
 			sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
 	}
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -4280,6 +4327,7 @@
 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
 		NFSV3FSINFO_CANSETTIME);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -4361,6 +4409,7 @@
 	pc->pc_caseinsensitive = nfsrv_nfs_false;
 	pc->pc_casepreserving = nfsrv_nfs_true;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	if (vp) {
 		NFSD_UNLOCK();
 		mtx_lock(&Giant);	/* VFS */
@@ -4389,6 +4438,7 @@
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	nfsm_reply(0);
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	return (error);
 }
 
@@ -4415,6 +4465,7 @@
 	nfsm_reply(0);
 	error = 0;
 nfsmout:
+	NFSD_LOCK_ASSERT();
 	return (error);
 }
 
@@ -4440,7 +4491,6 @@
 	int error;
 
 	NFSD_LOCK_ASSERT();
-	NFSD_UNLOCK();
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	if (flags & VWRITE) {
@@ -4455,8 +4505,7 @@
 			case VREG:
 			case VDIR:
 			case VLNK:
-				error = EROFS;
-				goto out;
+				return (EROFS);
 			default:
 				break;
 			}
@@ -4465,15 +4514,14 @@
 		 * If there's shared text associated with
 		 * the inode, we can't allow writing.
 		 */
-		if (vp->v_vflag & VV_TEXT) {
-			NFSD_LOCK();
+		if (vp->v_vflag & VV_TEXT)
 			return (ETXTBSY);
-		}
 	}
+	NFSD_UNLOCK();
 	mtx_lock(&Giant);	/* VFS */
 	error = VOP_GETATTR(vp, &vattr, cred, td);
 	if (error)
-		goto out2;
+		goto out;
 	error = VOP_ACCESS(vp, flags, cred, td);
 	/*
 	 * Allow certain operations for the owner (reads and writes
@@ -4481,9 +4529,9 @@
 	 */
 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
 		error = 0;
-out2:
-	mtx_unlock(&Giant);	/* VFS */
 out:
+	NFSD_UNLOCK_ASSERT();
+	mtx_unlock(&Giant);	/* VFS */
 	NFSD_LOCK();
 	return error;
 }


More information about the freebsd-current mailing list