svn commit: r202989 - projects/suj/head/sys/ufs/ffs

Jeff Roberson jeff at FreeBSD.org
Mon Jan 25 23:24:26 UTC 2010


Author: jeff
Date: Mon Jan 25 23:24:25 2010
New Revision: 202989
URL: http://svn.freebsd.org/changeset/base/202989

Log:
   - Change the journal format to place a segment header on every physical
     block so that there is no possibility that recovery confuses a record
     with a segment header.  Also add a block count to the segment header
     so this doesn't need to be calculated everywhere.
   - Switch the SUJ flag to what was INDEXDIRS so that old kernels
     automatically clear the SUJ flag if it is mounted.  Old fsck can still
     create differences between the checker and the journal so the filesystem
     must be checked all the way to clean with either the old or new fsck
     when returning to a new implementation from old.
   - We can immediately remove canceled jaddrefs from the inode list of refs
     unless they are NEWBLOCK references which modify a bitmap.  Adjust the
     ref counts appropriately so that dirty mounted filesystems don't panic
     on invalid link counts if the journal is ignored.
   - Require the force flag to mount a journaled filesystem if it is dirty.
     Tell the user that this will invalidate his journal and require a full
     fsck.
   - Don't permit truncate to change the size of the file until it's done
     when using suj.  This can confuse the number of frags in use when
     recovery evaluates the inode.

Modified:
  projects/suj/head/sys/ufs/ffs/ffs_inode.c
  projects/suj/head/sys/ufs/ffs/ffs_softdep.c
  projects/suj/head/sys/ufs/ffs/ffs_vfsops.c
  projects/suj/head/sys/ufs/ffs/fs.h

Modified: projects/suj/head/sys/ufs/ffs/ffs_inode.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_inode.c	Mon Jan 25 20:59:04 2010	(r202988)
+++ projects/suj/head/sys/ufs/ffs/ffs_inode.c	Mon Jan 25 23:24:25 2010	(r202989)
@@ -187,6 +187,7 @@ ffs_truncate(vp, length, flags, cred, td
 	 * (e.g., the file is being unlinked), then pick it off with
 	 * soft updates below.
 	 */
+	allerror = 0;
 	needextclean = 0;
 	softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp);
 	extblocks = 0;
@@ -412,7 +413,13 @@ ffs_truncate(vp, length, flags, cred, td
 			DIP_SET(ip, i_db[i], 0);
 	}
 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
-	allerror = ffs_update(vp, 1);
+	/*
+	 * When doing softupdate journaling we must preserve the size along
+	 * with the old pointers until they are freed or we might not
+	 * know how many fragments remain.
+	 */
+	if (!DOINGSUJ(vp))
+		allerror = ffs_update(vp, 1);
 	
 	/*
 	 * Having written the new inode to disk, save its new configuration

Modified: projects/suj/head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_softdep.c	Mon Jan 25 20:59:04 2010	(r202988)
+++ projects/suj/head/sys/ufs/ffs/ffs_softdep.c	Mon Jan 25 23:24:25 2010	(r202989)
@@ -566,23 +566,24 @@ static	int handle_written_indirdep(struc
 	    struct buf**);
 static	int handle_written_inodeblock(struct inodedep *, struct buf *);
 static	int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
-static	void handle_written_jaddref(struct jaddref *, struct jseg *);
-static	void handle_written_jremref(struct jremref *, struct jseg *);
+static	void handle_written_jaddref(struct jaddref *);
+static	void handle_written_jremref(struct jremref *);
 static	void handle_written_jseg(struct jseg *, struct buf *);
-static	void handle_written_jnewblk(struct jnewblk *, struct jseg *);
-static	void handle_written_jfreeblk(struct jfreeblk *, struct jseg *);
-static	void handle_written_jfreefrag(struct jfreefrag *, struct jseg *);
+static	void handle_written_jnewblk(struct jnewblk *);
+static	void handle_written_jfreeblk(struct jfreeblk *);
+static	void handle_written_jfreefrag(struct jfreefrag *);
 static	void complete_jseg(struct jseg *);
 static	void jseg_write(struct fs *, struct jblocks *, struct jseg *,
 	    uint8_t *);
-static	void jaddref_write(struct jaddref *, uint8_t *);
-static	void jremref_write(struct jremref *, uint8_t *);
-static	void jmvref_write(struct jmvref *, uint8_t *);
-static	void jtrunc_write(struct jtrunc *, uint8_t *);
-static	void jnewblk_write(struct jnewblk *, uint8_t *);
-static	void jfreeblk_write(struct jfreeblk *, uint8_t *);
-static	void jfreefrag_write(struct jfreefrag *, uint8_t *);
-static	inline void inoref_write(struct inoref *, struct jrefrec *);
+static	void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
+static	void jremref_write(struct jremref *, struct jseg *, uint8_t *);
+static	void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
+static	void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
+static	void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
+static	void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
+static	void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
+static	inline void inoref_write(struct inoref *, struct jseg *,
+	    struct jrefrec *);
 static	void handle_allocdirect_partdone(struct allocdirect *,
 	    struct workhead *);
 static	void cancel_newblk(struct newblk *, struct workhead *);
@@ -694,7 +695,7 @@ static	struct jaddref *newjaddref(struct
 	    uint16_t);
 static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
 	    uint16_t);
-static inline struct jsegdep *inoref_segattach(struct inoref *, struct jseg *);
+static inline struct jsegdep *inoref_jseg(struct inoref *);
 static	struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
 static	struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
 	    ufs2_daddr_t, int);
@@ -945,6 +946,10 @@ static int stat_indir_blk_ptrs;	/* bufs 
 static int stat_inode_bitmap;	/* bufs redirtied as inode bitmap not written */
 static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
 static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
+static int stat_jaddref;	/* bufs redirtied as ino bitmap can not write */
+static int stat_jnewblk;	/* bufs redirtied as blk bitmap can not write */
+static int stat_journal_min;	/* Times hit journal min threshold */
+static int stat_journal_low;	/* Times hit journal low threshold */
 
 SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
     &max_softdeps, 0, "");
@@ -972,6 +977,14 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, dir
     &stat_direct_blk_ptrs, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, dir_entry, CTLFLAG_RW,
     &stat_dir_entry, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jaddref_rollback, CTLFLAG_RW,
+    &stat_jaddref, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jnewblk_rollback, CTLFLAG_RW,
+    &stat_jnewblk, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_low, CTLFLAG_RW,
+    &stat_journal_low, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
+    &stat_journal_min, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
@@ -2172,6 +2185,7 @@ journal_suspend(ump)
 	jblocks = ump->softdep_jblocks;
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
+		stat_journal_min++;
 		mp->mnt_kern_flag |= MNTK_SUSPEND;
 		mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc);
 	}
@@ -2208,6 +2222,7 @@ softdep_prealloc(vp, waitok)
 		FREE_LOCK(&lk);
 		return (0);
 	}
+	stat_journal_low++;
 	FREE_LOCK(&lk);
 	if (waitok == MNT_NOWAIT)
 		return (ENOSPC);
@@ -2241,6 +2256,7 @@ softdep_prelink(dvp, vp)
 	mtx_assert(&lk, MA_OWNED);
 	if (journal_space(ump, jblocks->jb_low))
 		return;
+	stat_journal_low++;
 	FREE_LOCK(&lk);
 	if (vp)
 		ffs_syncvnode(vp, MNT_NOWAIT);
@@ -2273,15 +2289,19 @@ jseg_write(fs, jblocks, jseg, data)
 	rec->jsr_seq = jseg->js_seq;
 	rec->jsr_oldest = jblocks->jb_oldestseq;
 	rec->jsr_cnt = jseg->js_cnt;
+	rec->jsr_blocks = jseg->js_size / DEV_BSIZE;
 	rec->jsr_crc = 0;
 	rec->jsr_time = fs->fs_mtime;
 }
 
 static inline void
-inoref_write(inoref, rec)
+inoref_write(inoref, jseg, rec)
 	struct inoref *inoref;
+	struct jseg *jseg;
 	struct jrefrec *rec;
 {
+
+	inoref->if_jsegdep->jd_seg = jseg;
 	rec->jr_ino = inoref->if_ino;
 	rec->jr_parent = inoref->if_parent;
 	rec->jr_nlink = inoref->if_nlink;
@@ -2290,32 +2310,35 @@ inoref_write(inoref, rec)
 }
 
 static void
-jaddref_write(jaddref, data)
+jaddref_write(jaddref, jseg, data)
 	struct jaddref *jaddref;
+	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jrefrec *rec;
 
 	rec = (struct jrefrec *)data;
 	rec->jr_op = JOP_ADDREF;
-	inoref_write(&jaddref->ja_ref, rec);
+	inoref_write(&jaddref->ja_ref, jseg, rec);
 }
 
 static void
-jremref_write(jremref, data)
+jremref_write(jremref, jseg, data)
 	struct jremref *jremref;
+	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jrefrec *rec;
 
 	rec = (struct jrefrec *)data;
 	rec->jr_op = JOP_REMREF;
-	inoref_write(&jremref->jr_ref, rec);
+	inoref_write(&jremref->jr_ref, jseg, rec);
 }
 
 static	void
-jmvref_write(jmvref, data)
+jmvref_write(jmvref, jseg, data)
 	struct jmvref *jmvref;
+	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jmvrec *rec;
@@ -2329,12 +2352,14 @@ jmvref_write(jmvref, data)
 }
 
 static void
-jnewblk_write(jnewblk, data)
+jnewblk_write(jnewblk, jseg, data)
 	struct jnewblk *jnewblk;
+	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
+	jnewblk->jn_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_NEWBLK;
 	rec->jb_ino = jnewblk->jn_ino;
@@ -2345,12 +2370,14 @@ jnewblk_write(jnewblk, data)
 }
 
 static void
-jfreeblk_write(jfreeblk, data)
+jfreeblk_write(jfreeblk, jseg, data)
 	struct jfreeblk *jfreeblk;
+	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
+	jfreeblk->jf_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_FREEBLK;
 	rec->jb_ino = jfreeblk->jf_ino;
@@ -2361,12 +2388,14 @@ jfreeblk_write(jfreeblk, data)
 }
 
 static void
-jfreefrag_write(jfreefrag, data)
+jfreefrag_write(jfreefrag, jseg, data)
 	struct jfreefrag *jfreefrag;
+	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
+	jfreefrag->fr_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_FREEBLK;
 	rec->jb_ino = jfreefrag->fr_ino;
@@ -2377,8 +2406,9 @@ jfreefrag_write(jfreefrag, data)
 }
 
 static void
-jtrunc_write(jtrunc, data)
+jtrunc_write(jtrunc, jseg, data)
 	struct jtrunc *jtrunc;
+	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jtrncrec *rec;
@@ -2406,10 +2436,11 @@ softdep_process_journal(mp, flags)
 	uint8_t *data;
 	struct fs *fs;
 	int segwritten;
-	int jrecmin;	/* Minimum write size. */
-	int jrecmax;	/* Maximum write size. */
+	int jrecmin;	/* Minimum records per block. */
+	int jrecmax;	/* Maximum records per block. */
 	int size;
 	int cnt;
+	int off;
 
 	if ((mp->mnt_flag & MNT_SUJ) == 0)
 		return;
@@ -2421,8 +2452,8 @@ softdep_process_journal(mp, flags)
 	 * bound is picked to prevent buffer cache fragmentation and limit
 	 * processing time per I/O.
 	 */
-	jrecmax = fs->fs_bsize / JREC_SIZE;
-	jrecmin = DEV_BSIZE / JREC_SIZE;
+	jrecmin = (DEV_BSIZE / JREC_SIZE) - 1; /* -1 for seg header */
+	jrecmax = (fs->fs_bsize / DEV_BSIZE) * jrecmin;
 	segwritten = 0;
 	while ((cnt = ump->softdep_on_journal) != 0) {
 		/*
@@ -2430,15 +2461,15 @@ softdep_process_journal(mp, flags)
 		 * entries and add them to the segment.  Notice cnt is
 		 * off by one to account for the space required by the
 		 * jsegrec.  If we don't have a full block to log skip it
-		 * unless we haven't written anything in 10 seconds.
+		 * unless we haven't written anything in 5 seconds.
 		 */
 		cnt++;
 		if (cnt < jrecmax) {
 			if (segwritten)
-				return;
-			if (flags != MNT_WAIT &&
-			   (ticks - jblocks->jb_age) > hz*10)
-			break;
+				break;
+			if (flags == MNT_NOWAIT &&
+			   (ticks - jblocks->jb_age) < hz*5)
+				break;
 		}
 		/*
 		 * Verify some free journal space.  softdep_prealloc() should
@@ -2458,9 +2489,7 @@ softdep_process_journal(mp, flags)
 		workitem_alloc(&jseg->js_list, D_JSEG, mp);
 		LIST_INIT(&jseg->js_entries);
 		jseg->js_state = ATTACHED;
-		jseg->js_refs = 1;	/* Self reference. */
 		jseg->js_jblocks = jblocks;
-		size = roundup2(cnt * JREC_SIZE, DEV_BSIZE);
 		bp = geteblk(fs->fs_bsize, 0);
 		ACQUIRE_LOCK(&lk);
 		/*
@@ -2482,31 +2511,31 @@ softdep_process_journal(mp, flags)
 		 * Calculate the disk block size required for the available
 		 * records rounded to the min size.
 		 */
-		cnt = ump->softdep_on_journal + 1;
+		cnt = ump->softdep_on_journal;
 		if (cnt < jrecmax)
-			cnt = roundup2(cnt, jrecmin);
+			size = howmany(cnt, jrecmin) * DEV_BSIZE;
 		else
-			cnt = jrecmax;
-		size = cnt * JREC_SIZE;
+			size = fs->fs_bsize;
 		/*
 		 * Allocate a disk block for this journal data and account
 		 * for truncation of the requested size if enough contiguous
 		 * space was not available.
 		 */
-		bp->b_blkno = bp->b_lblkno = jblocks_alloc(jblocks, size,
-		    &size);
+		bp->b_blkno = jblocks_alloc(jblocks, size, &size);
+		bp->b_lblkno = bp->b_blkno;
 		bp->b_offset = bp->b_blkno * DEV_BSIZE;
 		bp->b_bcount = size;
 		bp->b_bufobj = &ump->um_devvp->v_bufobj;
 		bp->b_flags &= ~B_INVAL;
 		/*
-		 * Initialize our jseg with as many as cnt - 1 records.
-		 * Assign the next sequence number to it and link it
-		 * in-order.
+		 * Initialize our jseg with cnt records.  Assign the next
+		 * sequence number to it and link it in-order.
 		 */
-		cnt = MIN(ump->softdep_on_journal, (size / JREC_SIZE) - 1);
+		cnt = MIN(ump->softdep_on_journal,
+		    (size / DEV_BSIZE) * jrecmin);
 		jseg->js_buf = bp;
 		jseg->js_cnt = cnt;
+		jseg->js_refs = cnt + 1;	/* Self ref. */
 		jseg->js_size = size;
 		jseg->js_seq = jblocks->jb_nextseq++;
 		if (TAILQ_EMPTY(&jblocks->jb_segs))
@@ -2518,43 +2547,49 @@ softdep_process_journal(mp, flags)
 		 * Start filling in records from the pending list.
 		 */
 		data = bp->b_data;
-		jseg_write(fs, jblocks, jseg, data);
-		data += JREC_SIZE;
+		off = 0;
 		while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
 		    != NULL) {
+			/* Place a segment header on every device block. */
+			if ((off % DEV_BSIZE) == 0) {
+				jseg_write(fs, jblocks, jseg, data);
+				off += JREC_SIZE;
+				data = bp->b_data + off;
+			}
 			remove_from_journal(wk);
 			wk->wk_state |= IOSTARTED;
 			WORKLIST_INSERT(&jseg->js_entries, wk);
 			switch (wk->wk_type) {
 			case D_JADDREF:
-				jaddref_write(WK_JADDREF(wk), data);
+				jaddref_write(WK_JADDREF(wk), jseg, data);
 				break;
 			case D_JREMREF:
-				jremref_write(WK_JREMREF(wk), data);
+				jremref_write(WK_JREMREF(wk), jseg, data);
 				break;
 			case D_JMVREF:
-				jmvref_write(WK_JMVREF(wk), data);
+				jmvref_write(WK_JMVREF(wk), jseg, data);
 				break;
 			case D_JNEWBLK:
-				jnewblk_write(WK_JNEWBLK(wk), data);
+				jnewblk_write(WK_JNEWBLK(wk), jseg, data);
 				break;
 			case D_JFREEBLK:
-				jfreeblk_write(WK_JFREEBLK(wk), data);
+				jfreeblk_write(WK_JFREEBLK(wk), jseg, data);
 				break;
 			case D_JFREEFRAG:
-				jfreefrag_write(WK_JFREEFRAG(wk), data);
+				jfreefrag_write(WK_JFREEFRAG(wk), jseg, data);
 				break;
 			case D_JTRUNC:
-				jtrunc_write(WK_JTRUNC(wk), data);
+				jtrunc_write(WK_JTRUNC(wk), jseg, data);
 				break;
 			default:
 				panic("process_journal: Unknown type %s",
 				    TYPENAME(wk->wk_type));
 				/* NOTREACHED */
 			}
-			data += JREC_SIZE;
 			if (--cnt == 0)
 				break;
+			off += JREC_SIZE;
+			data = bp->b_data + off;
 		}
 		/*
 		 * Write this one buffer and continue.
@@ -2621,29 +2656,29 @@ complete_jseg(jseg)
 		KASSERT(i < jseg->js_cnt,
 		    ("handle_written_jseg: overflow %d >= %d",
 		    i, jseg->js_cnt));
-		jseg->js_refs++; /* Ref goes to the jsegdep below. */
 		switch (wk->wk_type) {
 		case D_JADDREF:
-			handle_written_jaddref(WK_JADDREF(wk), jseg);
+			handle_written_jaddref(WK_JADDREF(wk));
 			break;
 		case D_JREMREF:
-			handle_written_jremref(WK_JREMREF(wk), jseg);
+			handle_written_jremref(WK_JREMREF(wk));
 			break;
 		case D_JMVREF:
-			jseg->js_refs--;	/* No jsegdep here. */
+			/* No jsegdep here. */
+			free_jseg(jseg);
 			jmvref = WK_JMVREF(wk);
 			LIST_REMOVE(jmvref, jm_deps);
 			free_pagedep(jmvref->jm_pagedep);
 			WORKITEM_FREE(jmvref, D_JMVREF);
 			break;
 		case D_JNEWBLK:
-			handle_written_jnewblk(WK_JNEWBLK(wk), jseg);
+			handle_written_jnewblk(WK_JNEWBLK(wk));
 			break;
 		case D_JFREEBLK:
-			handle_written_jfreeblk(WK_JFREEBLK(wk), jseg);
+			handle_written_jfreeblk(WK_JFREEBLK(wk));
 			break;
 		case D_JFREEFRAG:
-			handle_written_jfreefrag(WK_JFREEFRAG(wk), jseg);
+			handle_written_jfreefrag(WK_JFREEFRAG(wk));
 			break;
 		case D_JTRUNC:
 			WK_JTRUNC(wk)->jt_jsegdep->jd_seg = jseg;
@@ -2698,15 +2733,13 @@ handle_written_jseg(jseg, bp)
 }
 
 static inline struct jsegdep *
-inoref_segattach(inoref, jseg)
+inoref_jseg(inoref)
 	struct inoref *inoref;
-	struct jseg *jseg;
 {
 	struct jsegdep *jsegdep;
 
 	jsegdep = inoref->if_jsegdep;
 	inoref->if_jsegdep = NULL;
-	jsegdep->jd_seg = jseg;
 
 	return (jsegdep);
 }
@@ -2717,18 +2750,15 @@ inoref_segattach(inoref, jseg)
  * for the jremref to complete will be awoken by free_jremref.
  */
 static void
-handle_written_jremref(jremref, jseg)
+handle_written_jremref(jremref)
 	struct jremref *jremref;
-	struct jseg *jseg;
 {
 	struct inodedep *inodedep;
 	struct jsegdep *jsegdep;
 	struct dirrem *dirrem;
 
-	/*
-	 * Attach the jsegdep to the jseg.
-	 */
-	jsegdep = inoref_segattach(&jremref->jr_ref, jseg);
+	/* Grab the jsegdep. */
+	jsegdep = inoref_jseg(&jremref->jr_ref);
 	/*
 	 * Remove us from the inoref list.
 	 */
@@ -2758,19 +2788,16 @@ handle_written_jremref(jremref, jseg)
  * bmsafemap dependency and attempt to remove the jaddref from the bmsafemap.
  */
 static void
-handle_written_jaddref(jaddref, jseg)
+handle_written_jaddref(jaddref)
 	struct jaddref *jaddref;
-	struct jseg *jseg;
 {
 	struct jsegdep *jsegdep;
 	struct inodedep *inodedep;
 	struct diradd *diradd;
 	struct mkdir *mkdir;
 
-	/*
-	 * Attach the jsegdep to the jseg.
-	 */
-	jsegdep = inoref_segattach(&jaddref->ja_ref, jseg);
+	/* Grab the jsegdep. */
+	jsegdep = inoref_jseg(&jaddref->ja_ref);
 	mkdir = NULL;
 	diradd = NULL;
 	if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
@@ -2820,20 +2847,16 @@ handle_written_jaddref(jaddref, jseg)
  * is placed in the bmsafemap to await notification of a written bitmap.
  */
 static void
-handle_written_jnewblk(jnewblk, jseg)
+handle_written_jnewblk(jnewblk)
 	struct jnewblk *jnewblk;
-	struct jseg *jseg;
 {
 	struct bmsafemap *bmsafemap;
 	struct jsegdep *jsegdep;
 	struct newblk *newblk;
 
-	/*
-	 * Attach the jsegdep to the jseg.
-	 */
+	/* Grab the jsegdep. */
 	jsegdep = jnewblk->jn_jsegdep;
 	jnewblk->jn_jsegdep = NULL;
-	jsegdep->jd_seg = jseg;
 	/*
 	 * Add the written block to the bmsafemap so it can be notified when
 	 * the bitmap is on disk.
@@ -2896,19 +2919,15 @@ free_jfreefrag(jfreefrag)
  * freefrag is added to the worklist if this completes its dependencies.
  */
 static void
-handle_written_jfreefrag(jfreefrag, jseg)
+handle_written_jfreefrag(jfreefrag)
 	struct jfreefrag *jfreefrag;
-	struct jseg *jseg;
 {
 	struct jsegdep *jsegdep;
 	struct freefrag *freefrag;
 
-	/*
-	 * Attach the jsegdep to the jseg.
-	 */
+	/* Grab the jsegdep. */
 	jsegdep = jfreefrag->fr_jsegdep;
 	jfreefrag->fr_jsegdep = NULL;
-	jsegdep->jd_seg = jseg;
 	freefrag = jfreefrag->fr_freefrag;
 	if (freefrag == NULL)
 		panic("handle_written_jfreefrag: No freefrag.");
@@ -2928,17 +2947,15 @@ handle_written_jfreefrag(jfreefrag, jseg
  * have been reclaimed.
  */
 static void
-handle_written_jfreeblk(jfreeblk, jseg)
+handle_written_jfreeblk(jfreeblk)
 	struct jfreeblk *jfreeblk;
-	struct jseg *jseg;
 {
 	struct freeblks *freeblks;
 	struct jsegdep *jsegdep;
 
-	/* Attach the jsegdep to the jseg. */
+	/* Grab the jsegdep. */
 	jsegdep = jfreeblk->jf_jsegdep;
 	jfreeblk->jf_jsegdep = NULL;
-	jsegdep->jd_seg = jseg;
 	freeblks = jfreeblk->jf_freeblks;
 	LIST_REMOVE(jfreeblk, jf_deps);
 	WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list);
@@ -3185,10 +3202,6 @@ move_newblock_dep(jaddref, inodedep)
 	}
 	if (jaddrefn == NULL)
 		return;
-	if (inodedep == NULL)
-		if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
-		    0, &inodedep) == 0)
-			panic("move_newblock_dep: Lost inodedep");
 	jaddrefn->ja_state &= ~(ATTACHED | UNDONE);
 	jaddrefn->ja_state |= jaddref->ja_state &
 	    (ATTACHED | UNDONE | NEWBLOCK);
@@ -3217,6 +3230,7 @@ cancel_jaddref(jaddref, inodedep, wkhd)
 	struct workhead *wkhd;
 {
 	struct inoref *inoref;
+	struct jsegdep *jsegdep;
 	int needsj;
 
 	KASSERT((jaddref->ja_state & COMPLETE) == 0,
@@ -3225,19 +3239,22 @@ cancel_jaddref(jaddref, inodedep, wkhd)
 		needsj = 1;
 	else
 		needsj = 0;
+	if (inodedep == NULL)
+		if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
+		    0, &inodedep) == 0)
+			panic("cancel_jaddref: Lost inodedep");
 	/*
-	 * If we're not journaling this remove we must adjust the nlink of
-	 * any reference operation that follows us so that it is consistent
-	 * with the in-memory reference.
-	 */
-	if (needsj == 0)
-		for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
-		    inoref = TAILQ_NEXT(inoref, if_deps))
-			inoref->if_nlink--;
-	if (jaddref->ja_ref.if_jsegdep) {
-		free_jsegdep(jaddref->ja_ref.if_jsegdep);
-		jaddref->ja_ref.if_jsegdep = NULL;
-	}
+	 * We must adjust the nlink of any reference operation that follows
+	 * us so that it is consistent with the in-memory reference.  This
+	 * ensures that inode nlink rollbacks always have the correct link.
+	 * Entries which have already been copied into the journal buffer
+	 * will be unaltered on disk but the subsequent remove record will
+	 * correct them.
+	 */
+	for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
+	    inoref = TAILQ_NEXT(inoref, if_deps))
+		inoref->if_nlink--;
+	jsegdep = inoref_jseg(&jaddref->ja_ref);
 	if (jaddref->ja_state & NEWBLOCK)
 		move_newblock_dep(jaddref, inodedep);
 	if (jaddref->ja_state & IOWAITING) {
@@ -3248,8 +3265,24 @@ cancel_jaddref(jaddref, inodedep, wkhd)
 	if (jaddref->ja_state & IOSTARTED) {
 		jaddref->ja_state &= ~IOSTARTED;
 		WORKLIST_REMOVE(&jaddref->ja_list);
-	} else
+		WORKLIST_INSERT(wkhd, &jsegdep->jd_list);
+	} else {
+		free_jsegdep(jsegdep);
 		remove_from_journal(&jaddref->ja_list);
+	}
+	/*
+	 * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove
+	 * can arrange for them to be freed with the bitmap.  Otherwise we
+	 * no longer need this addref attached to the inoreflst and it
+	 * will incorrectly adjust nlink if we leave it.
+	 */
+	if ((jaddref->ja_state & NEWBLOCK) == 0) {
+		TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
+		    if_deps);
+		jaddref->ja_state |= COMPLETE;
+		free_jaddref(jaddref);
+		return (needsj);
+	}
 	jaddref->ja_state |= GOINGAWAY;
 	/*
 	 * Leave the head of the list for jsegdeps for fast merging.
@@ -3331,15 +3364,11 @@ cancel_jnewblk(jnewblk, wkhd)
 	struct jnewblk *jnewblk;
 	struct workhead *wkhd;
 {
+	struct jsegdep *jsegdep;
 
-	if (jnewblk->jn_jsegdep) {
-		free_jsegdep(jnewblk->jn_jsegdep);
-		jnewblk->jn_jsegdep = NULL;
-	}
-	if (jnewblk->jn_state & IOWAITING) {
-		jnewblk->jn_state &= ~IOWAITING;
-		wakeup(&jnewblk->jn_list);
-	}
+	jsegdep = jnewblk->jn_jsegdep;
+	jnewblk->jn_jsegdep  = NULL;
+	free_jsegdep(jsegdep);
 	jnewblk->jn_newblk = NULL;
 	jnewblk->jn_state |= GOINGAWAY;
 	if (jnewblk->jn_state & IOSTARTED) {
@@ -3355,6 +3384,10 @@ cancel_jnewblk(jnewblk, wkhd)
 		LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jnewblk->jn_list, wk_list);
 	} else
 		WORKLIST_INSERT(wkhd, &jnewblk->jn_list);
+	if (jnewblk->jn_state & IOWAITING) {
+		jnewblk->jn_state &= ~IOWAITING;
+		wakeup(&jnewblk->jn_list);
+	}
 }
 
 static void
@@ -6706,6 +6739,7 @@ cancel_mkdir_dotdot(ip, dirrem, jremref)
 		panic("cancel_mkdir_dotdot: Unable to find mkdir\n");
 	if ((jaddref = mkdir->md_jaddref) != NULL) {
 		mkdir->md_jaddref = NULL;
+		jaddref->ja_state &= ~MKDIR_PARENT;
 		if (inodedep_lookup(UFSTOVFS(ip->i_ump), jaddref->ja_ino, 0,
 		    &inodedep) == 0)
 			panic("cancel_mkdir_dotdot: Lost parent inodedep");
@@ -8536,6 +8570,7 @@ initiate_write_bmsafemap(bmsafemap, bp)
 				clrbit(inosused, ino);
 				jaddref->ja_state &= ~ATTACHED;
 				jaddref->ja_state |= UNDONE;
+				stat_jaddref++;
 			} else if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
 				panic("initiate_write_bmsafemap: inode %d "
 				    "marked free", jaddref->ja_ino);
@@ -8564,6 +8599,7 @@ initiate_write_bmsafemap(bmsafemap, bp)
 			 * it.
 			 */
 			if (cleared) {
+				stat_jnewblk++;
 				jnewblk->jn_state &= ~ATTACHED;
 				jnewblk->jn_state |= UNDONE;
 			} else if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
@@ -10581,6 +10617,7 @@ softdep_request_cleanup(fs, vp)
 		if (error != 0)
 			return (0);
 	}
+	process_removes(vp);
 	while (fs->fs_pendingblocks > 0 && fs->fs_cstotal.cs_nbfree <= needed) {
 		if (time_second > starttime)
 			return (0);

Modified: projects/suj/head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_vfsops.c	Mon Jan 25 20:59:04 2010	(r202988)
+++ projects/suj/head/sys/ufs/ffs/ffs_vfsops.c	Mon Jan 25 23:24:25 2010	(r202989)
@@ -299,7 +299,8 @@ ffs_mount(struct mount *mp)
 			if (fs->fs_clean == 0) {
 				fs->fs_flags |= FS_UNCLEAN;
 				if ((mp->mnt_flag & MNT_FORCE) ||
-				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
+				    ((fs->fs_flags &
+				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 				     (fs->fs_flags & FS_DOSOFTDEP))) {
 					printf("WARNING: %s was not %s\n",
 					   fs->fs_fsmnt, "properly dismounted");
@@ -307,6 +308,9 @@ ffs_mount(struct mount *mp)
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
+					if (fs->fs_flags & FS_SUJ)
+						printf(
+"WARNING: Forced mount will invalidated journal contents\n");
 					return (EPERM);
 				}
 			}
@@ -707,7 +711,7 @@ ffs_mountfs(devvp, mp, td)
 	if (fs->fs_clean == 0) {
 		fs->fs_flags |= FS_UNCLEAN;
 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
-		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
+		    ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 		     (fs->fs_flags & FS_DOSOFTDEP))) {
 			printf(
 "WARNING: %s was not properly dismounted\n",
@@ -716,6 +720,9 @@ ffs_mountfs(devvp, mp, td)
 			printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 			    fs->fs_fsmnt);
+			if (fs->fs_flags & FS_SUJ)
+				printf(
+"WARNING: Forced mount will invalidated journal contents\n");
 			error = EPERM;
 			goto out;
 		}

Modified: projects/suj/head/sys/ufs/ffs/fs.h
==============================================================================
--- projects/suj/head/sys/ufs/ffs/fs.h	Mon Jan 25 20:59:04 2010	(r202988)
+++ projects/suj/head/sys/ufs/ffs/fs.h	Mon Jan 25 23:24:25 2010	(r202989)
@@ -411,13 +411,13 @@ CTASSERT(sizeof(struct fs) == 1376);
 #define FS_UNCLEAN	0x0001	/* filesystem not clean at mount */
 #define FS_DOSOFTDEP	0x0002	/* filesystem using soft dependencies */
 #define FS_NEEDSFSCK	0x0004	/* filesystem needs sync fsck before mount */
-#define FS_INDEXDIRS	0x0008	/* kernel supports indexed directories */
+#define	FS_SUJ       	0x0008	/* Filesystem using softupdate journal */
 #define FS_ACLS		0x0010	/* file system has POSIX.1e ACLs enabled */
 #define FS_MULTILABEL	0x0020	/* file system is MAC multi-label */
 #define FS_GJOURNAL	0x0040	/* gjournaled file system */
 #define FS_FLAGS_UPDATED 0x0080	/* flags have been moved to new location */
 #define FS_NFS4ACLS	0x0100	/* file system has NFSv4 ACLs enabled */
-#define	FS_SUJ       0x200	/* Filesystem using softupdate journal */
+#define FS_INDEXDIRS	0x0200	/* kernel supports indexed directories */
 
 /*
  * Macros to access bits in the fs_active array.
@@ -657,17 +657,19 @@ lbn_level(ufs_lbn_t lbn)
 #define	JREC_SIZE	32	/* Record and segment header size. */
 
 #define	SUJ_MIN		(1 * 1024 * 1024)	/* Minimum journal size */
-#define	SUJ_MAX		(64 * SUJ_MIN)		/* Maximum journal size */
+#define	SUJ_MAX		(32 * SUJ_MIN)		/* Maximum journal size */
 
 /*
  * Size of the segment record header.  There is at most one for each disk
  * block and at least one for each filesystem block in the journal.  The
- * segment header is followed by an array of records.
+ * segment header is followed by an array of records.  fsck depends on
+ * the first element in each record being 'op' and the second being 'ino'.
  */
 struct jsegrec {
 	uint64_t	jsr_seq;	/* Our sequence number */
 	uint64_t	jsr_oldest;	/* Oldest valid sequence number */
-	uint32_t	jsr_cnt;	/* Count of valid records */
+	uint16_t	jsr_cnt;	/* Count of valid records */
+	uint16_t	jsr_blocks;	/* Count of DEV_BSIZE blocks. */
 	uint32_t	jsr_crc;	/* 32bit crc of the valid space */
 	ufs_time_t	jsr_time;	/* timestamp for mount instance */
 };


More information about the svn-src-projects mailing list