svn commit: r202989 - projects/suj/head/sys/ufs/ffs
Jeff Roberson
jeff at FreeBSD.org
Mon Jan 25 23:24:26 UTC 2010
Author: jeff
Date: Mon Jan 25 23:24:25 2010
New Revision: 202989
URL: http://svn.freebsd.org/changeset/base/202989
Log:
- Change the journal format to place a segment header on every physical
block so that there is no possibility that recovery confuses a record
with a segment header. Also add a block count to the segment header
so this doesn't need to be calculated everywhere.
- Switch the SUJ flag to what was INDEXDIRS so that old kernels
automatically clear the SUJ flag if it is mounted. Old fsck can still
create differences between the checker and the journal so the filesystem
must be checked all the way to clean with either the old or new fsck
when returning to a new implementation from old.
- We can immediately remove canceled jaddrefs from the inode list of refs
unless they are NEWBLOCK references which modify a bitmap. Adjust the
ref counts appropriately so that dirty mounted filesystems don't panic
on invalid link counts if the journal is ignored.
- Require the force flag to mount a journaled filesystem if it is dirty.
Tell the user that this will invalidate his journal and require a full
fsck.
- Don't permit truncate to change the size of the file until it's done
when using suj. This can confuse the number of frags in use when
recovery evaluates the inode.
Modified:
projects/suj/head/sys/ufs/ffs/ffs_inode.c
projects/suj/head/sys/ufs/ffs/ffs_softdep.c
projects/suj/head/sys/ufs/ffs/ffs_vfsops.c
projects/suj/head/sys/ufs/ffs/fs.h
Modified: projects/suj/head/sys/ufs/ffs/ffs_inode.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_inode.c Mon Jan 25 20:59:04 2010 (r202988)
+++ projects/suj/head/sys/ufs/ffs/ffs_inode.c Mon Jan 25 23:24:25 2010 (r202989)
@@ -187,6 +187,7 @@ ffs_truncate(vp, length, flags, cred, td
* (e.g., the file is being unlinked), then pick it off with
* soft updates below.
*/
+ allerror = 0;
needextclean = 0;
softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp);
extblocks = 0;
@@ -412,7 +413,13 @@ ffs_truncate(vp, length, flags, cred, td
DIP_SET(ip, i_db[i], 0);
}
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- allerror = ffs_update(vp, 1);
+ /*
+ * When doing softupdate journaling we must preserve the size along
+ * with the old pointers until they are freed or we might not
+ * know how many fragments remain.
+ */
+ if (!DOINGSUJ(vp))
+ allerror = ffs_update(vp, 1);
/*
* Having written the new inode to disk, save its new configuration
Modified: projects/suj/head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_softdep.c Mon Jan 25 20:59:04 2010 (r202988)
+++ projects/suj/head/sys/ufs/ffs/ffs_softdep.c Mon Jan 25 23:24:25 2010 (r202989)
@@ -566,23 +566,24 @@ static int handle_written_indirdep(struc
struct buf**);
static int handle_written_inodeblock(struct inodedep *, struct buf *);
static int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
-static void handle_written_jaddref(struct jaddref *, struct jseg *);
-static void handle_written_jremref(struct jremref *, struct jseg *);
+static void handle_written_jaddref(struct jaddref *);
+static void handle_written_jremref(struct jremref *);
static void handle_written_jseg(struct jseg *, struct buf *);
-static void handle_written_jnewblk(struct jnewblk *, struct jseg *);
-static void handle_written_jfreeblk(struct jfreeblk *, struct jseg *);
-static void handle_written_jfreefrag(struct jfreefrag *, struct jseg *);
+static void handle_written_jnewblk(struct jnewblk *);
+static void handle_written_jfreeblk(struct jfreeblk *);
+static void handle_written_jfreefrag(struct jfreefrag *);
static void complete_jseg(struct jseg *);
static void jseg_write(struct fs *, struct jblocks *, struct jseg *,
uint8_t *);
-static void jaddref_write(struct jaddref *, uint8_t *);
-static void jremref_write(struct jremref *, uint8_t *);
-static void jmvref_write(struct jmvref *, uint8_t *);
-static void jtrunc_write(struct jtrunc *, uint8_t *);
-static void jnewblk_write(struct jnewblk *, uint8_t *);
-static void jfreeblk_write(struct jfreeblk *, uint8_t *);
-static void jfreefrag_write(struct jfreefrag *, uint8_t *);
-static inline void inoref_write(struct inoref *, struct jrefrec *);
+static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
+static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
+static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
+static void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
+static void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
+static void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
+static void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
+static inline void inoref_write(struct inoref *, struct jseg *,
+ struct jrefrec *);
static void handle_allocdirect_partdone(struct allocdirect *,
struct workhead *);
static void cancel_newblk(struct newblk *, struct workhead *);
@@ -694,7 +695,7 @@ static struct jaddref *newjaddref(struct
uint16_t);
static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
uint16_t);
-static inline struct jsegdep *inoref_segattach(struct inoref *, struct jseg *);
+static inline struct jsegdep *inoref_jseg(struct inoref *);
static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
ufs2_daddr_t, int);
@@ -945,6 +946,10 @@ static int stat_indir_blk_ptrs; /* bufs
static int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
+static int stat_jaddref; /* bufs redirtied as ino bitmap can not write */
+static int stat_jnewblk; /* bufs redirtied as blk bitmap can not write */
+static int stat_journal_min; /* Times hit journal min threshold */
+static int stat_journal_low; /* Times hit journal low threshold */
SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
&max_softdeps, 0, "");
@@ -972,6 +977,14 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, dir
&stat_direct_blk_ptrs, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, dir_entry, CTLFLAG_RW,
&stat_dir_entry, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jaddref_rollback, CTLFLAG_RW,
+ &stat_jaddref, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jnewblk_rollback, CTLFLAG_RW,
+ &stat_jnewblk, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_low, CTLFLAG_RW,
+ &stat_journal_low, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
+ &stat_journal_min, 0, "");
SYSCTL_DECL(_vfs_ffs);
@@ -2172,6 +2185,7 @@ journal_suspend(ump)
jblocks = ump->softdep_jblocks;
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
+ stat_journal_min++;
mp->mnt_kern_flag |= MNTK_SUSPEND;
mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc);
}
@@ -2208,6 +2222,7 @@ softdep_prealloc(vp, waitok)
FREE_LOCK(&lk);
return (0);
}
+ stat_journal_low++;
FREE_LOCK(&lk);
if (waitok == MNT_NOWAIT)
return (ENOSPC);
@@ -2241,6 +2256,7 @@ softdep_prelink(dvp, vp)
mtx_assert(&lk, MA_OWNED);
if (journal_space(ump, jblocks->jb_low))
return;
+ stat_journal_low++;
FREE_LOCK(&lk);
if (vp)
ffs_syncvnode(vp, MNT_NOWAIT);
@@ -2273,15 +2289,19 @@ jseg_write(fs, jblocks, jseg, data)
rec->jsr_seq = jseg->js_seq;
rec->jsr_oldest = jblocks->jb_oldestseq;
rec->jsr_cnt = jseg->js_cnt;
+ rec->jsr_blocks = jseg->js_size / DEV_BSIZE;
rec->jsr_crc = 0;
rec->jsr_time = fs->fs_mtime;
}
static inline void
-inoref_write(inoref, rec)
+inoref_write(inoref, jseg, rec)
struct inoref *inoref;
+ struct jseg *jseg;
struct jrefrec *rec;
{
+
+ inoref->if_jsegdep->jd_seg = jseg;
rec->jr_ino = inoref->if_ino;
rec->jr_parent = inoref->if_parent;
rec->jr_nlink = inoref->if_nlink;
@@ -2290,32 +2310,35 @@ inoref_write(inoref, rec)
}
static void
-jaddref_write(jaddref, data)
+jaddref_write(jaddref, jseg, data)
struct jaddref *jaddref;
+ struct jseg *jseg;
uint8_t *data;
{
struct jrefrec *rec;
rec = (struct jrefrec *)data;
rec->jr_op = JOP_ADDREF;
- inoref_write(&jaddref->ja_ref, rec);
+ inoref_write(&jaddref->ja_ref, jseg, rec);
}
static void
-jremref_write(jremref, data)
+jremref_write(jremref, jseg, data)
struct jremref *jremref;
+ struct jseg *jseg;
uint8_t *data;
{
struct jrefrec *rec;
rec = (struct jrefrec *)data;
rec->jr_op = JOP_REMREF;
- inoref_write(&jremref->jr_ref, rec);
+ inoref_write(&jremref->jr_ref, jseg, rec);
}
static void
-jmvref_write(jmvref, data)
+jmvref_write(jmvref, jseg, data)
struct jmvref *jmvref;
+ struct jseg *jseg;
uint8_t *data;
{
struct jmvrec *rec;
@@ -2329,12 +2352,14 @@ jmvref_write(jmvref, data)
}
static void
-jnewblk_write(jnewblk, data)
+jnewblk_write(jnewblk, jseg, data)
struct jnewblk *jnewblk;
+ struct jseg *jseg;
uint8_t *data;
{
struct jblkrec *rec;
+ jnewblk->jn_jsegdep->jd_seg = jseg;
rec = (struct jblkrec *)data;
rec->jb_op = JOP_NEWBLK;
rec->jb_ino = jnewblk->jn_ino;
@@ -2345,12 +2370,14 @@ jnewblk_write(jnewblk, data)
}
static void
-jfreeblk_write(jfreeblk, data)
+jfreeblk_write(jfreeblk, jseg, data)
struct jfreeblk *jfreeblk;
+ struct jseg *jseg;
uint8_t *data;
{
struct jblkrec *rec;
+ jfreeblk->jf_jsegdep->jd_seg = jseg;
rec = (struct jblkrec *)data;
rec->jb_op = JOP_FREEBLK;
rec->jb_ino = jfreeblk->jf_ino;
@@ -2361,12 +2388,14 @@ jfreeblk_write(jfreeblk, data)
}
static void
-jfreefrag_write(jfreefrag, data)
+jfreefrag_write(jfreefrag, jseg, data)
struct jfreefrag *jfreefrag;
+ struct jseg *jseg;
uint8_t *data;
{
struct jblkrec *rec;
+ jfreefrag->fr_jsegdep->jd_seg = jseg;
rec = (struct jblkrec *)data;
rec->jb_op = JOP_FREEBLK;
rec->jb_ino = jfreefrag->fr_ino;
@@ -2377,8 +2406,9 @@ jfreefrag_write(jfreefrag, data)
}
static void
-jtrunc_write(jtrunc, data)
+jtrunc_write(jtrunc, jseg, data)
struct jtrunc *jtrunc;
+ struct jseg *jseg;
uint8_t *data;
{
struct jtrncrec *rec;
@@ -2406,10 +2436,11 @@ softdep_process_journal(mp, flags)
uint8_t *data;
struct fs *fs;
int segwritten;
- int jrecmin; /* Minimum write size. */
- int jrecmax; /* Maximum write size. */
+ int jrecmin; /* Minimum records per block. */
+ int jrecmax; /* Maximum records per block. */
int size;
int cnt;
+ int off;
if ((mp->mnt_flag & MNT_SUJ) == 0)
return;
@@ -2421,8 +2452,8 @@ softdep_process_journal(mp, flags)
* bound is picked to prevent buffer cache fragmentation and limit
* processing time per I/O.
*/
- jrecmax = fs->fs_bsize / JREC_SIZE;
- jrecmin = DEV_BSIZE / JREC_SIZE;
+ jrecmin = (DEV_BSIZE / JREC_SIZE) - 1; /* -1 for seg header */
+ jrecmax = (fs->fs_bsize / DEV_BSIZE) * jrecmin;
segwritten = 0;
while ((cnt = ump->softdep_on_journal) != 0) {
/*
@@ -2430,15 +2461,15 @@ softdep_process_journal(mp, flags)
* entries and add them to the segment. Notice cnt is
* off by one to account for the space required by the
* jsegrec. If we don't have a full block to log skip it
- * unless we haven't written anything in 10 seconds.
+ * unless we haven't written anything in 5 seconds.
*/
cnt++;
if (cnt < jrecmax) {
if (segwritten)
- return;
- if (flags != MNT_WAIT &&
- (ticks - jblocks->jb_age) > hz*10)
- break;
+ break;
+ if (flags == MNT_NOWAIT &&
+ (ticks - jblocks->jb_age) < hz*5)
+ break;
}
/*
* Verify some free journal space. softdep_prealloc() should
@@ -2458,9 +2489,7 @@ softdep_process_journal(mp, flags)
workitem_alloc(&jseg->js_list, D_JSEG, mp);
LIST_INIT(&jseg->js_entries);
jseg->js_state = ATTACHED;
- jseg->js_refs = 1; /* Self reference. */
jseg->js_jblocks = jblocks;
- size = roundup2(cnt * JREC_SIZE, DEV_BSIZE);
bp = geteblk(fs->fs_bsize, 0);
ACQUIRE_LOCK(&lk);
/*
@@ -2482,31 +2511,31 @@ softdep_process_journal(mp, flags)
* Calculate the disk block size required for the available
* records rounded to the min size.
*/
- cnt = ump->softdep_on_journal + 1;
+ cnt = ump->softdep_on_journal;
if (cnt < jrecmax)
- cnt = roundup2(cnt, jrecmin);
+ size = howmany(cnt, jrecmin) * DEV_BSIZE;
else
- cnt = jrecmax;
- size = cnt * JREC_SIZE;
+ size = fs->fs_bsize;
/*
* Allocate a disk block for this journal data and account
* for truncation of the requested size if enough contiguous
* space was not available.
*/
- bp->b_blkno = bp->b_lblkno = jblocks_alloc(jblocks, size,
- &size);
+ bp->b_blkno = jblocks_alloc(jblocks, size, &size);
+ bp->b_lblkno = bp->b_blkno;
bp->b_offset = bp->b_blkno * DEV_BSIZE;
bp->b_bcount = size;
bp->b_bufobj = &ump->um_devvp->v_bufobj;
bp->b_flags &= ~B_INVAL;
/*
- * Initialize our jseg with as many as cnt - 1 records.
- * Assign the next sequence number to it and link it
- * in-order.
+ * Initialize our jseg with cnt records. Assign the next
+ * sequence number to it and link it in-order.
*/
- cnt = MIN(ump->softdep_on_journal, (size / JREC_SIZE) - 1);
+ cnt = MIN(ump->softdep_on_journal,
+ (size / DEV_BSIZE) * jrecmin);
jseg->js_buf = bp;
jseg->js_cnt = cnt;
+ jseg->js_refs = cnt + 1; /* Self ref. */
jseg->js_size = size;
jseg->js_seq = jblocks->jb_nextseq++;
if (TAILQ_EMPTY(&jblocks->jb_segs))
@@ -2518,43 +2547,49 @@ softdep_process_journal(mp, flags)
* Start filling in records from the pending list.
*/
data = bp->b_data;
- jseg_write(fs, jblocks, jseg, data);
- data += JREC_SIZE;
+ off = 0;
while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
!= NULL) {
+ /* Place a segment header on every device block. */
+ if ((off % DEV_BSIZE) == 0) {
+ jseg_write(fs, jblocks, jseg, data);
+ off += JREC_SIZE;
+ data = bp->b_data + off;
+ }
remove_from_journal(wk);
wk->wk_state |= IOSTARTED;
WORKLIST_INSERT(&jseg->js_entries, wk);
switch (wk->wk_type) {
case D_JADDREF:
- jaddref_write(WK_JADDREF(wk), data);
+ jaddref_write(WK_JADDREF(wk), jseg, data);
break;
case D_JREMREF:
- jremref_write(WK_JREMREF(wk), data);
+ jremref_write(WK_JREMREF(wk), jseg, data);
break;
case D_JMVREF:
- jmvref_write(WK_JMVREF(wk), data);
+ jmvref_write(WK_JMVREF(wk), jseg, data);
break;
case D_JNEWBLK:
- jnewblk_write(WK_JNEWBLK(wk), data);
+ jnewblk_write(WK_JNEWBLK(wk), jseg, data);
break;
case D_JFREEBLK:
- jfreeblk_write(WK_JFREEBLK(wk), data);
+ jfreeblk_write(WK_JFREEBLK(wk), jseg, data);
break;
case D_JFREEFRAG:
- jfreefrag_write(WK_JFREEFRAG(wk), data);
+ jfreefrag_write(WK_JFREEFRAG(wk), jseg, data);
break;
case D_JTRUNC:
- jtrunc_write(WK_JTRUNC(wk), data);
+ jtrunc_write(WK_JTRUNC(wk), jseg, data);
break;
default:
panic("process_journal: Unknown type %s",
TYPENAME(wk->wk_type));
/* NOTREACHED */
}
- data += JREC_SIZE;
if (--cnt == 0)
break;
+ off += JREC_SIZE;
+ data = bp->b_data + off;
}
/*
* Write this one buffer and continue.
@@ -2621,29 +2656,29 @@ complete_jseg(jseg)
KASSERT(i < jseg->js_cnt,
("handle_written_jseg: overflow %d >= %d",
i, jseg->js_cnt));
- jseg->js_refs++; /* Ref goes to the jsegdep below. */
switch (wk->wk_type) {
case D_JADDREF:
- handle_written_jaddref(WK_JADDREF(wk), jseg);
+ handle_written_jaddref(WK_JADDREF(wk));
break;
case D_JREMREF:
- handle_written_jremref(WK_JREMREF(wk), jseg);
+ handle_written_jremref(WK_JREMREF(wk));
break;
case D_JMVREF:
- jseg->js_refs--; /* No jsegdep here. */
+ /* No jsegdep here. */
+ free_jseg(jseg);
jmvref = WK_JMVREF(wk);
LIST_REMOVE(jmvref, jm_deps);
free_pagedep(jmvref->jm_pagedep);
WORKITEM_FREE(jmvref, D_JMVREF);
break;
case D_JNEWBLK:
- handle_written_jnewblk(WK_JNEWBLK(wk), jseg);
+ handle_written_jnewblk(WK_JNEWBLK(wk));
break;
case D_JFREEBLK:
- handle_written_jfreeblk(WK_JFREEBLK(wk), jseg);
+ handle_written_jfreeblk(WK_JFREEBLK(wk));
break;
case D_JFREEFRAG:
- handle_written_jfreefrag(WK_JFREEFRAG(wk), jseg);
+ handle_written_jfreefrag(WK_JFREEFRAG(wk));
break;
case D_JTRUNC:
WK_JTRUNC(wk)->jt_jsegdep->jd_seg = jseg;
@@ -2698,15 +2733,13 @@ handle_written_jseg(jseg, bp)
}
static inline struct jsegdep *
-inoref_segattach(inoref, jseg)
+inoref_jseg(inoref)
struct inoref *inoref;
- struct jseg *jseg;
{
struct jsegdep *jsegdep;
jsegdep = inoref->if_jsegdep;
inoref->if_jsegdep = NULL;
- jsegdep->jd_seg = jseg;
return (jsegdep);
}
@@ -2717,18 +2750,15 @@ inoref_segattach(inoref, jseg)
* for the jremref to complete will be awoken by free_jremref.
*/
static void
-handle_written_jremref(jremref, jseg)
+handle_written_jremref(jremref)
struct jremref *jremref;
- struct jseg *jseg;
{
struct inodedep *inodedep;
struct jsegdep *jsegdep;
struct dirrem *dirrem;
- /*
- * Attach the jsegdep to the jseg.
- */
- jsegdep = inoref_segattach(&jremref->jr_ref, jseg);
+ /* Grab the jsegdep. */
+ jsegdep = inoref_jseg(&jremref->jr_ref);
/*
* Remove us from the inoref list.
*/
@@ -2758,19 +2788,16 @@ handle_written_jremref(jremref, jseg)
* bmsafemap dependency and attempt to remove the jaddref from the bmsafemap.
*/
static void
-handle_written_jaddref(jaddref, jseg)
+handle_written_jaddref(jaddref)
struct jaddref *jaddref;
- struct jseg *jseg;
{
struct jsegdep *jsegdep;
struct inodedep *inodedep;
struct diradd *diradd;
struct mkdir *mkdir;
- /*
- * Attach the jsegdep to the jseg.
- */
- jsegdep = inoref_segattach(&jaddref->ja_ref, jseg);
+ /* Grab the jsegdep. */
+ jsegdep = inoref_jseg(&jaddref->ja_ref);
mkdir = NULL;
diradd = NULL;
if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
@@ -2820,20 +2847,16 @@ handle_written_jaddref(jaddref, jseg)
* is placed in the bmsafemap to await notification of a written bitmap.
*/
static void
-handle_written_jnewblk(jnewblk, jseg)
+handle_written_jnewblk(jnewblk)
struct jnewblk *jnewblk;
- struct jseg *jseg;
{
struct bmsafemap *bmsafemap;
struct jsegdep *jsegdep;
struct newblk *newblk;
- /*
- * Attach the jsegdep to the jseg.
- */
+ /* Grab the jsegdep. */
jsegdep = jnewblk->jn_jsegdep;
jnewblk->jn_jsegdep = NULL;
- jsegdep->jd_seg = jseg;
/*
* Add the written block to the bmsafemap so it can be notified when
* the bitmap is on disk.
@@ -2896,19 +2919,15 @@ free_jfreefrag(jfreefrag)
* freefrag is added to the worklist if this completes its dependencies.
*/
static void
-handle_written_jfreefrag(jfreefrag, jseg)
+handle_written_jfreefrag(jfreefrag)
struct jfreefrag *jfreefrag;
- struct jseg *jseg;
{
struct jsegdep *jsegdep;
struct freefrag *freefrag;
- /*
- * Attach the jsegdep to the jseg.
- */
+ /* Grab the jsegdep. */
jsegdep = jfreefrag->fr_jsegdep;
jfreefrag->fr_jsegdep = NULL;
- jsegdep->jd_seg = jseg;
freefrag = jfreefrag->fr_freefrag;
if (freefrag == NULL)
panic("handle_written_jfreefrag: No freefrag.");
@@ -2928,17 +2947,15 @@ handle_written_jfreefrag(jfreefrag, jseg
* have been reclaimed.
*/
static void
-handle_written_jfreeblk(jfreeblk, jseg)
+handle_written_jfreeblk(jfreeblk)
struct jfreeblk *jfreeblk;
- struct jseg *jseg;
{
struct freeblks *freeblks;
struct jsegdep *jsegdep;
- /* Attach the jsegdep to the jseg. */
+ /* Grab the jsegdep. */
jsegdep = jfreeblk->jf_jsegdep;
jfreeblk->jf_jsegdep = NULL;
- jsegdep->jd_seg = jseg;
freeblks = jfreeblk->jf_freeblks;
LIST_REMOVE(jfreeblk, jf_deps);
WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list);
@@ -3185,10 +3202,6 @@ move_newblock_dep(jaddref, inodedep)
}
if (jaddrefn == NULL)
return;
- if (inodedep == NULL)
- if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
- 0, &inodedep) == 0)
- panic("move_newblock_dep: Lost inodedep");
jaddrefn->ja_state &= ~(ATTACHED | UNDONE);
jaddrefn->ja_state |= jaddref->ja_state &
(ATTACHED | UNDONE | NEWBLOCK);
@@ -3217,6 +3230,7 @@ cancel_jaddref(jaddref, inodedep, wkhd)
struct workhead *wkhd;
{
struct inoref *inoref;
+ struct jsegdep *jsegdep;
int needsj;
KASSERT((jaddref->ja_state & COMPLETE) == 0,
@@ -3225,19 +3239,22 @@ cancel_jaddref(jaddref, inodedep, wkhd)
needsj = 1;
else
needsj = 0;
+ if (inodedep == NULL)
+ if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
+ 0, &inodedep) == 0)
+ panic("cancel_jaddref: Lost inodedep");
/*
- * If we're not journaling this remove we must adjust the nlink of
- * any reference operation that follows us so that it is consistent
- * with the in-memory reference.
- */
- if (needsj == 0)
- for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
- inoref = TAILQ_NEXT(inoref, if_deps))
- inoref->if_nlink--;
- if (jaddref->ja_ref.if_jsegdep) {
- free_jsegdep(jaddref->ja_ref.if_jsegdep);
- jaddref->ja_ref.if_jsegdep = NULL;
- }
+ * We must adjust the nlink of any reference operation that follows
+ * us so that it is consistent with the in-memory reference. This
+ * ensures that inode nlink rollbacks always have the correct link.
+ * Entries which have already been copied into the journal buffer
+ * will be unaltered on disk but the subsequent remove record will
+ * correct them.
+ */
+ for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
+ inoref = TAILQ_NEXT(inoref, if_deps))
+ inoref->if_nlink--;
+ jsegdep = inoref_jseg(&jaddref->ja_ref);
if (jaddref->ja_state & NEWBLOCK)
move_newblock_dep(jaddref, inodedep);
if (jaddref->ja_state & IOWAITING) {
@@ -3248,8 +3265,24 @@ cancel_jaddref(jaddref, inodedep, wkhd)
if (jaddref->ja_state & IOSTARTED) {
jaddref->ja_state &= ~IOSTARTED;
WORKLIST_REMOVE(&jaddref->ja_list);
- } else
+ WORKLIST_INSERT(wkhd, &jsegdep->jd_list);
+ } else {
+ free_jsegdep(jsegdep);
remove_from_journal(&jaddref->ja_list);
+ }
+ /*
+ * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove
+ * can arrange for them to be freed with the bitmap. Otherwise we
+ * no longer need this addref attached to the inoreflst and it
+ * will incorrectly adjust nlink if we leave it.
+ */
+ if ((jaddref->ja_state & NEWBLOCK) == 0) {
+ TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
+ if_deps);
+ jaddref->ja_state |= COMPLETE;
+ free_jaddref(jaddref);
+ return (needsj);
+ }
jaddref->ja_state |= GOINGAWAY;
/*
* Leave the head of the list for jsegdeps for fast merging.
@@ -3331,15 +3364,11 @@ cancel_jnewblk(jnewblk, wkhd)
struct jnewblk *jnewblk;
struct workhead *wkhd;
{
+ struct jsegdep *jsegdep;
- if (jnewblk->jn_jsegdep) {
- free_jsegdep(jnewblk->jn_jsegdep);
- jnewblk->jn_jsegdep = NULL;
- }
- if (jnewblk->jn_state & IOWAITING) {
- jnewblk->jn_state &= ~IOWAITING;
- wakeup(&jnewblk->jn_list);
- }
+ jsegdep = jnewblk->jn_jsegdep;
+ jnewblk->jn_jsegdep = NULL;
+ free_jsegdep(jsegdep);
jnewblk->jn_newblk = NULL;
jnewblk->jn_state |= GOINGAWAY;
if (jnewblk->jn_state & IOSTARTED) {
@@ -3355,6 +3384,10 @@ cancel_jnewblk(jnewblk, wkhd)
LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jnewblk->jn_list, wk_list);
} else
WORKLIST_INSERT(wkhd, &jnewblk->jn_list);
+ if (jnewblk->jn_state & IOWAITING) {
+ jnewblk->jn_state &= ~IOWAITING;
+ wakeup(&jnewblk->jn_list);
+ }
}
static void
@@ -6706,6 +6739,7 @@ cancel_mkdir_dotdot(ip, dirrem, jremref)
panic("cancel_mkdir_dotdot: Unable to find mkdir\n");
if ((jaddref = mkdir->md_jaddref) != NULL) {
mkdir->md_jaddref = NULL;
+ jaddref->ja_state &= ~MKDIR_PARENT;
if (inodedep_lookup(UFSTOVFS(ip->i_ump), jaddref->ja_ino, 0,
&inodedep) == 0)
panic("cancel_mkdir_dotdot: Lost parent inodedep");
@@ -8536,6 +8570,7 @@ initiate_write_bmsafemap(bmsafemap, bp)
clrbit(inosused, ino);
jaddref->ja_state &= ~ATTACHED;
jaddref->ja_state |= UNDONE;
+ stat_jaddref++;
} else if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
panic("initiate_write_bmsafemap: inode %d "
"marked free", jaddref->ja_ino);
@@ -8564,6 +8599,7 @@ initiate_write_bmsafemap(bmsafemap, bp)
* it.
*/
if (cleared) {
+ stat_jnewblk++;
jnewblk->jn_state &= ~ATTACHED;
jnewblk->jn_state |= UNDONE;
} else if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
@@ -10581,6 +10617,7 @@ softdep_request_cleanup(fs, vp)
if (error != 0)
return (0);
}
+ process_removes(vp);
while (fs->fs_pendingblocks > 0 && fs->fs_cstotal.cs_nbfree <= needed) {
if (time_second > starttime)
return (0);
Modified: projects/suj/head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_vfsops.c Mon Jan 25 20:59:04 2010 (r202988)
+++ projects/suj/head/sys/ufs/ffs/ffs_vfsops.c Mon Jan 25 23:24:25 2010 (r202989)
@@ -299,7 +299,8 @@ ffs_mount(struct mount *mp)
if (fs->fs_clean == 0) {
fs->fs_flags |= FS_UNCLEAN;
if ((mp->mnt_flag & MNT_FORCE) ||
- ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
+ ((fs->fs_flags &
+ (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
(fs->fs_flags & FS_DOSOFTDEP))) {
printf("WARNING: %s was not %s\n",
fs->fs_fsmnt, "properly dismounted");
@@ -307,6 +308,9 @@ ffs_mount(struct mount *mp)
printf(
"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
fs->fs_fsmnt);
+ if (fs->fs_flags & FS_SUJ)
+ printf(
+"WARNING: Forced mount will invalidated journal contents\n");
return (EPERM);
}
}
@@ -707,7 +711,7 @@ ffs_mountfs(devvp, mp, td)
if (fs->fs_clean == 0) {
fs->fs_flags |= FS_UNCLEAN;
if (ronly || (mp->mnt_flag & MNT_FORCE) ||
- ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
+ ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
(fs->fs_flags & FS_DOSOFTDEP))) {
printf(
"WARNING: %s was not properly dismounted\n",
@@ -716,6 +720,9 @@ ffs_mountfs(devvp, mp, td)
printf(
"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
fs->fs_fsmnt);
+ if (fs->fs_flags & FS_SUJ)
+ printf(
+"WARNING: Forced mount will invalidated journal contents\n");
error = EPERM;
goto out;
}
Modified: projects/suj/head/sys/ufs/ffs/fs.h
==============================================================================
--- projects/suj/head/sys/ufs/ffs/fs.h Mon Jan 25 20:59:04 2010 (r202988)
+++ projects/suj/head/sys/ufs/ffs/fs.h Mon Jan 25 23:24:25 2010 (r202989)
@@ -411,13 +411,13 @@ CTASSERT(sizeof(struct fs) == 1376);
#define FS_UNCLEAN 0x0001 /* filesystem not clean at mount */
#define FS_DOSOFTDEP 0x0002 /* filesystem using soft dependencies */
#define FS_NEEDSFSCK 0x0004 /* filesystem needs sync fsck before mount */
-#define FS_INDEXDIRS 0x0008 /* kernel supports indexed directories */
+#define FS_SUJ 0x0008 /* Filesystem using softupdate journal */
#define FS_ACLS 0x0010 /* file system has POSIX.1e ACLs enabled */
#define FS_MULTILABEL 0x0020 /* file system is MAC multi-label */
#define FS_GJOURNAL 0x0040 /* gjournaled file system */
#define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */
#define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */
-#define FS_SUJ 0x200 /* Filesystem using softupdate journal */
+#define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */
/*
* Macros to access bits in the fs_active array.
@@ -657,17 +657,19 @@ lbn_level(ufs_lbn_t lbn)
#define JREC_SIZE 32 /* Record and segment header size. */
#define SUJ_MIN (1 * 1024 * 1024) /* Minimum journal size */
-#define SUJ_MAX (64 * SUJ_MIN) /* Maximum journal size */
+#define SUJ_MAX (32 * SUJ_MIN) /* Maximum journal size */
/*
* Size of the segment record header. There is at most one for each disk
* block and at least one for each filesystem block in the journal. The
- * segment header is followed by an array of records.
+ * segment header is followed by an array of records. fsck depends on
+ * the first element in each record being 'op' and the second being 'ino'.
*/
struct jsegrec {
uint64_t jsr_seq; /* Our sequence number */
uint64_t jsr_oldest; /* Oldest valid sequence number */
- uint32_t jsr_cnt; /* Count of valid records */
+ uint16_t jsr_cnt; /* Count of valid records */
+ uint16_t jsr_blocks; /* Count of DEV_BSIZE blocks. */
uint32_t jsr_crc; /* 32bit crc of the valid space */
ufs_time_t jsr_time; /* timestamp for mount instance */
};
More information about the svn-src-projects
mailing list