svn commit: r208287 - head/sys/ufs/ffs

Jeff Roberson jeff at FreeBSD.org
Wed May 19 06:18:02 UTC 2010


Author: jeff
Date: Wed May 19 06:18:01 2010
New Revision: 208287
URL: http://svn.freebsd.org/changeset/base/208287

Log:
   - Don't immediately re-run softdepflush if we didn't make any progress
     on the last iteration.  This can lead to a deadlock when we have
     worklist items that cannot be immediately satisfied.
  
  Reported by:	uqs, Dimitry Andric <dimitry at andric.com>
  
   - Remove some unnecessary debugging code and place some other under
     SUJ_DEBUG.
   - Examine the journal state in softdep_slowdown().
   - Re-format some comments so I may more easily add flag descriptions.

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c	Wed May 19 04:00:42 2010	(r208286)
+++ head/sys/ufs/ffs/ffs_softdep.c	Wed May 19 06:18:01 2010	(r208287)
@@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
 #ifndef DEBUG
 #define DEBUG
 #endif
-#define	SUJ_DEBUG
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -1200,6 +1199,7 @@ softdep_flush(void)
 	struct ufsmount *ump;
 	struct thread *td;
 	int remaining;
+	int progress;
 	int vfslocked;
 
 	td = curthread;
@@ -1224,7 +1224,7 @@ softdep_flush(void)
 		}
 		FREE_LOCK(&lk);
 		VFS_UNLOCK_GIANT(vfslocked);
-		remaining = 0;
+		remaining = progress = 0;
 		mtx_lock(&mountlist_mtx);
 		for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp)  {
 			nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1233,7 +1233,7 @@ softdep_flush(void)
 			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 				continue;
 			vfslocked = VFS_LOCK_GIANT(mp);
-			softdep_process_worklist(mp, 0);
+			progress += softdep_process_worklist(mp, 0);
 			ump = VFSTOUFS(mp);
 			remaining += ump->softdep_on_worklist -
 				ump->softdep_on_worklist_inprogress;
@@ -1243,7 +1243,7 @@ softdep_flush(void)
 			vfs_unbusy(mp);
 		}
 		mtx_unlock(&mountlist_mtx);
-		if (remaining)
+		if (remaining && progress)
 			continue;
 		ACQUIRE_LOCK(&lk);
 		if (!req_pending)
@@ -1449,7 +1449,7 @@ process_worklist_item(mp, flags)
 	struct mount *mp;
 	int flags;
 {
-	struct worklist *wk, *wkXXX;
+	struct worklist *wk;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	int matchcnt = 0;
@@ -1472,11 +1472,8 @@ process_worklist_item(mp, flags)
 	vp = NULL;
 	ump = VFSTOUFS(mp);
 	LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {
-		if (wk->wk_state & INPROGRESS) {
-			wkXXX = wk;
+		if (wk->wk_state & INPROGRESS)
 			continue;
-		}
-		wkXXX = wk;	/* Record the last valid wk pointer. */
 		if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
 			break;
 		wk->wk_state |= INPROGRESS;
@@ -2364,7 +2361,7 @@ remove_from_journal(wk)
 
 	mtx_assert(&lk, MA_OWNED);
 	ump = VFSTOUFS(wk->wk_mp);
-#ifdef DEBUG	/* XXX Expensive, temporary. */
+#ifdef SUJ_DEBUG
 	{
 		struct worklist *wkn;
 
@@ -2401,16 +2398,15 @@ journal_space(ump, thresh)
 	struct jblocks *jblocks;
 	int avail;
 
+	jblocks = ump->softdep_jblocks;
+	if (jblocks == NULL)
+		return (1);
 	/*
 	 * We use a tighter restriction here to prevent request_cleanup()
 	 * running in threads from running into locks we currently hold.
 	 */
 	if (num_inodedep > (max_softdeps / 10) * 9)
 		return (0);
-
-	jblocks = ump->softdep_jblocks;
-	if (jblocks == NULL)
-		return (1);
 	if (thresh)
 		thresh = jblocks->jb_min;
 	else
@@ -2727,7 +2723,7 @@ softdep_process_journal(mp, flags)
 				break;
 			printf("softdep: Out of journal space!\n");
 			softdep_speedup();
-			msleep(jblocks, &lk, PRIBIO, "jblocks", 1);
+			msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
 		}
 		FREE_LOCK(&lk);
 		jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
@@ -10870,18 +10866,29 @@ int
 softdep_slowdown(vp)
 	struct vnode *vp;
 {
+	struct ufsmount *ump;
+	int jlow;
 	int max_softdeps_hard;
 
 	ACQUIRE_LOCK(&lk);
+	jlow = 0;
+	/*
+	 * Check for journal space if needed.
+	 */
+	if (DOINGSUJ(vp)) {
+		ump = VFSTOUFS(vp->v_mount);
+		if (journal_space(ump, 0) == 0)
+			jlow = 1;
+	}
 	max_softdeps_hard = max_softdeps * 11 / 10;
 	if (num_dirrem < max_softdeps_hard / 2 &&
 	    num_inodedep < max_softdeps_hard &&
 	    VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&
-	    num_freeblkdep < max_softdeps_hard) {
+	    num_freeblkdep < max_softdeps_hard && jlow == 0) {
 		FREE_LOCK(&lk);
   		return (0);
 	}
-	if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps)
+	if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps || jlow)
 		softdep_speedup();
 	stat_sync_limit_hit += 1;
 	FREE_LOCK(&lk);

Modified: head/sys/ufs/ffs/softdep.h
==============================================================================
--- head/sys/ufs/ffs/softdep.h	Wed May 19 04:00:42 2010	(r208286)
+++ head/sys/ufs/ffs/softdep.h	Wed May 19 06:18:01 2010	(r208287)
@@ -46,51 +46,65 @@
  * copy of the data. A particular data dependency is eliminated when
  * it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE.
  * 
- * ATTACHED means that the data is not currently being written to
- * disk. UNDONE means that the data has been rolled back to a safe
+ * The ATTACHED flag means that the data is not currently being written
+ * to disk.
+ * 
+ * The UNDONE flag means that the data has been rolled back to a safe
  * state for writing to the disk. When the I/O completes, the data is
  * restored to its current form and the state reverts to ATTACHED.
  * The data must be locked throughout the rollback, I/O, and roll
  * forward so that the rolled back information is never visible to
- * user processes. The COMPLETE flag indicates that the item has been
- * written. For example, a dependency that requires that an inode be
- * written will be marked COMPLETE after the inode has been written
- * to disk. The DEPCOMPLETE flag indicates the completion of any other
+ * user processes.
+ *
+ * The COMPLETE flag indicates that the item has been written. For example,
+ * a dependency that requires that an inode be written will be marked
+ * COMPLETE after the inode has been written to disk.
+ * 
+ * The DEPCOMPLETE flag indicates the completion of any other
  * dependencies such as the writing of a cylinder group map has been
  * completed. A dependency structure may be freed only when both it
  * and its dependencies have completed and any rollbacks that are in
  * progress have finished as indicated by the set of ALLCOMPLETE flags
- * all being set. The two MKDIR flags indicate additional dependencies
- * that must be done when creating a new directory. MKDIR_BODY is
- * cleared when the directory data block containing the "." and ".."
- * entries has been written. MKDIR_PARENT is cleared when the parent
- * inode with the increased link count for ".." has been written. When
- * both MKDIR flags have been cleared, the DEPCOMPLETE flag is set to
- * indicate that the directory dependencies have been completed. The
- * writing of the directory inode itself sets the COMPLETE flag which
- * then allows the directory entry for the new directory to be written
- * to disk. The RMDIR flag marks a dirrem structure as representing
- * the removal of a directory rather than a file. When the removal
- * dependencies are completed, additional work needs to be done
- * (truncation of the "." and ".." entries, an additional decrement
- * of the associated inode, and a decrement of the parent inode). The
- * DIRCHG flag marks a diradd structure as representing the changing
+ * all being set.
+ * 
+ * The two MKDIR flags indicate additional dependencies that must be done
+ * when creating a new directory. MKDIR_BODY is cleared when the directory
+ * data block containing the "." and ".." entries has been written.
+ * MKDIR_PARENT is cleared when the parent inode with the increased link
+ * count for ".." has been written. When both MKDIR flags have been
+ * cleared, the DEPCOMPLETE flag is set to indicate that the directory
+ * dependencies have been completed. The writing of the directory inode
+ * itself sets the COMPLETE flag which then allows the directory entry for
+ * the new directory to be written to disk. The RMDIR flag marks a dirrem
+ * structure as representing the removal of a directory rather than a
+ * file. When the removal dependencies are completed, additional work needs
+ * to be done* (an additional decrement of the associated inode, and a
+ * decrement of the parent inode).
+ *
+ * The DIRCHG flag marks a diradd structure as representing the changing
  * of an existing entry rather than the addition of a new one. When
  * the update is complete the dirrem associated with the inode for
  * the old name must be added to the worklist to do the necessary
- * reference count decrement. The GOINGAWAY flag indicates that the
- * data structure is frozen from further change until its dependencies
- * have been completed and its resources freed after which it will be
- * discarded. The IOSTARTED flag prevents multiple calls to the I/O
- * start routine from doing multiple rollbacks. The SPACECOUNTED flag
- * says that the files space has been accounted to the pending free
- * space count. The NEWBLOCK flag marks pagedep structures that have
- * just been allocated, so must be claimed by the inode before all
- * dependencies are complete. The INPROGRESS flag marks worklist
- * structures that are still on the worklist, but are being considered
- * for action by some process. The UFS1FMT flag indicates that the
- * inode being processed is a ufs1 format. The EXTDATA flag indicates
- * that the allocdirect describes an extended-attributes dependency.
+ * reference count decrement.
+ * 
+ * The GOINGAWAY flag indicates that the data structure is frozen from
+ * further change until its dependencies have been completed and its
+ * resources freed after which it will be discarded.
+ *
+ * The IOSTARTED flag prevents multiple calls to the I/O start routine from
+ * doing multiple rollbacks.
+ *
+ * The NEWBLOCK flag marks pagedep structures that have just been allocated,
+ * so must be claimed by the inode before all dependencies are complete.
+ *
+ * The INPROGRESS flag marks worklist structures that are still on the
+ * worklist, but are being considered for action by some process.
+ *
+ * The UFS1FMT flag indicates that the inode being processed is a ufs1 format.
+ *
+ * The EXTDATA flag indicates that the allocdirect describes an
+ * extended-attributes dependency.
+ *
  * The ONWORKLIST flag shows whether the structure is currently linked
  * onto a worklist.
  */


More information about the svn-src-all mailing list