svn commit: r204991 - in projects/suj: 6/sbin/fsck_ffs
6/sbin/tunefs 6/sys/ufs/ffs 7/sbin/fsck_ffs 7/sbin/tunefs
7/sys/ufs/ffs 8/sbin/fsck_ffs 8/sbin/tunefs 8/sys/ufs/ffs
Jeff Roberson
jeff at FreeBSD.org
Thu Mar 11 01:02:28 UTC 2010
Author: jeff
Date: Thu Mar 11 01:02:27 2010
New Revision: 204991
URL: http://svn.freebsd.org/changeset/base/204991
Log:
- Catch up to changes made on the suj/head branch.
Modified:
projects/suj/6/sbin/fsck_ffs/suj.c
projects/suj/6/sbin/tunefs/tunefs.c
projects/suj/6/sys/ufs/ffs/ffs_softdep.c
projects/suj/7/sbin/fsck_ffs/suj.c
projects/suj/7/sbin/tunefs/tunefs.c
projects/suj/7/sys/ufs/ffs/ffs_softdep.c
projects/suj/8/sbin/fsck_ffs/suj.c
projects/suj/8/sbin/tunefs/tunefs.c
projects/suj/8/sys/ufs/ffs/ffs_softdep.c
Modified: projects/suj/6/sbin/fsck_ffs/suj.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/suj.c Wed Mar 10 23:02:06 2010 (r204990)
+++ projects/suj/6/sbin/fsck_ffs/suj.c Thu Mar 11 01:02:27 2010 (r204991)
@@ -142,10 +142,11 @@ uint64_t jbytes;
uint64_t jrecs;
typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int);
-static void ino_trunc(ino_t ino, off_t size);
+static void ino_trunc(ino_t, off_t);
static void ino_decr(ino_t);
static void ino_adjust(struct suj_ino *);
-static void ino_build(struct suj_ino *sino);
+static void ino_build(struct suj_ino *);
+static int blk_isfree(ufs2_daddr_t);
static void *
errmalloc(size_t n)
@@ -496,7 +497,7 @@ blk_setmask(struct jblkrec *brec, int *m
* to be freed. The mask value can be used to free partial blocks.
*/
static int
-blk_isfree(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags)
+blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags)
{
struct suj_blk *sblk;
struct suj_rec *srec;
@@ -532,7 +533,7 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino,
blk_setmask(brec, &mask);
}
if (debug)
- printf("blk_isfree: blk %jd sblk %jd off %d mask 0x%X\n",
+ printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n",
blk, sblk->sb_blk, off, mask);
return (mask >> off);
}
@@ -542,6 +543,9 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino,
* if any part of the indirect has been reallocated or the last journal
* entry was an allocation. Just allocated indirects may not have valid
* pointers yet and all of their children will have their own records.
+ * It is also not safe to follow an indirect if the cg bitmap has been
+ * cleared as a new allocation may write to the block prior to the journal
+ * being written.
*
* Returns 1 if it's safe to follow the indirect and 0 otherwise.
*/
@@ -559,7 +563,7 @@ blk_isindir(ufs2_daddr_t blk, ino_t ino,
brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec;
if (blk_equals(brec, ino, lbn, blk, fs->fs_frag))
if (brec->jb_op == JOP_FREEBLK)
- return (1);
+ return (!blk_isfree(blk));
return (0);
}
@@ -644,6 +648,19 @@ blk_free(ufs2_daddr_t bno, int mask, int
}
/*
+ * Returns 1 if the whole block starting at 'bno' is marked free and 0
+ * otherwise.
+ */
+static int
+blk_isfree(ufs2_daddr_t bno)
+{
+ struct suj_cg *sc;
+
+ sc = cg_lookup(dtog(fs, bno));
+ return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno));
+}
+
+/*
* Fetch an indirect block to find the block at a given lbn. The lbn
* may be negative to fetch a specific indirect block pointer or positive
* to fetch a specific block.
@@ -1059,7 +1076,7 @@ blk_free_visit(ino_t ino, ufs_lbn_t lbn,
{
int mask;
- mask = blk_isfree(blk, ino, lbn, frags);
+ mask = blk_freemask(blk, ino, lbn, frags);
if (debug)
printf("blk %jd freemask 0x%X\n", blk, mask);
blk_free(blk, mask, frags);
@@ -1076,7 +1093,7 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino
uint64_t resid;
int mask;
- mask = blk_isfree(blk, ino, lbn, frags);
+ mask = blk_freemask(blk, ino, lbn, frags);
if (debug)
printf("blk %jd freemask 0x%X\n", blk, mask);
resid = 0;
@@ -1615,7 +1632,7 @@ blk_check(struct suj_blk *sblk)
if (isat == 1) {
if (frags == brec->jb_frags)
continue;
- mask = blk_isfree(blk, brec->jb_ino, brec->jb_lbn,
+ mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn,
brec->jb_frags);
mask >>= frags;
blk += frags;
@@ -2259,7 +2276,8 @@ suj_verifyino(union dinode *ip)
return (-1);
}
- if (DIP(ip, di_flags) != (SF_IMMUTABLE | SF_NOUNLINK)) {
+ if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) !=
+ (SF_IMMUTABLE | SF_NOUNLINK)) {
printf("Invalid flags 0x%X for journal inode %d\n",
DIP(ip, di_flags), sujino);
return (-1);
@@ -2595,19 +2613,19 @@ suj_check(const char *filesys)
cg_apply(cg_check_blk);
cg_apply(cg_check_ino);
}
+ if (preen == 0 && reply("WRITE CHANGES") == 0)
+ return (0);
/*
* To remain idempotent with partial truncations the free bitmaps
* must be written followed by indirect blocks and lastly inode
* blocks. This preserves access to the modified pointers until
* they are freed.
*/
- if (preen || reply("WRITE CHANGES")) {
- cg_apply(cg_write);
- dblk_write();
- cg_apply(cg_write_inos);
- /* Write back superblock. */
- closedisk(filesys);
- }
+ cg_apply(cg_write);
+ dblk_write();
+ cg_apply(cg_write_inos);
+ /* Write back superblock. */
+ closedisk(filesys);
printf("** %jd journal records in %jd bytes for %.2f%% utilization\n",
jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100);
printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n",
Modified: projects/suj/6/sbin/tunefs/tunefs.c
==============================================================================
--- projects/suj/6/sbin/tunefs/tunefs.c Wed Mar 10 23:02:06 2010 (r204990)
+++ projects/suj/6/sbin/tunefs/tunefs.c Thu Mar 11 01:02:27 2010 (r204991)
@@ -529,6 +529,7 @@ journal_findfile(void)
{
struct ufs1_dinode *dp1;
struct ufs2_dinode *dp2;
+ ino_t ino;
int mode;
void *ip;
int i;
@@ -547,9 +548,9 @@ journal_findfile(void)
for (i = 0; i < NDADDR; i++) {
if (dp1->di_db[i] == 0)
break;
- if (dir_search(dp1->di_db[i],
- sblksize(&sblock, (off_t)dp1->di_size, i)) != 0)
- return (-1);
+ if ((ino = dir_search(dp1->di_db[i],
+ sblksize(&sblock, (off_t)dp1->di_size, i))) != 0)
+ return (ino);
}
} else {
if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
@@ -559,9 +560,9 @@ journal_findfile(void)
for (i = 0; i < NDADDR; i++) {
if (dp2->di_db[i] == 0)
break;
- if (dir_search(dp2->di_db[i],
- sblksize(&sblock, (off_t)dp2->di_size, i)) != 0)
- return (-1);
+ if ((ino = dir_search(dp2->di_db[i],
+ sblksize(&sblock, (off_t)dp2->di_size, i))) != 0)
+ return (ino);
}
}
@@ -760,10 +761,11 @@ journal_clear(void)
void *ip;
ino = journal_findfile();
- if (ino <= 0) {
+ if (ino == (ino_t)-1 || ino == 0) {
warnx("Journal file does not exist");
return;
}
+ printf("Clearing journal flags from inode %d\n", ino);
if (getino(&disk, &ip, ino, &mode) != 0) {
warn("Failed to get journal inode");
return;
@@ -801,11 +803,13 @@ journal_alloc(int64_t size)
* If the journal file exists we can't allocate it.
*/
ino = journal_findfile();
- if (ino > 0)
+ if (ino == (ino_t)-1)
+ return (-1);
+ if (ino > 0) {
warnx("Journal file %s already exists, please remove.",
SUJ_FILE);
- if (ino != 0)
return (-1);
+ }
/*
* If the user didn't supply a size pick one based on the filesystem
* size constrained with hardcoded MIN and MAX values. We opt for
@@ -859,13 +863,13 @@ journal_alloc(int64_t size)
dp1->di_size = size;
dp1->di_mode = IFREG | IREAD;
dp1->di_nlink = 1;
- dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
+ dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
} else {
bzero(dp2, sizeof(*dp2));
dp2->di_size = size;
dp2->di_mode = IFREG | IREAD;
dp2->di_nlink = 1;
- dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
+ dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
}
for (i = 0; i < NDADDR && resid; i++, resid--) {
blk = journal_balloc();
Modified: projects/suj/6/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/6/sys/ufs/ffs/ffs_softdep.c Wed Mar 10 23:02:06 2010 (r204990)
+++ projects/suj/6/sys/ufs/ffs/ffs_softdep.c Thu Mar 11 01:02:27 2010 (r204991)
@@ -1,5 +1,7 @@
/*-
- * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
+ * Copyright 1998, 2000 Marshall Kirk McKusick.
+ * Copyright 2009, 2010 Jeffrey W. Roberson <jeff at FreeBSD.org>
+ * All rights reserved.
*
* The soft updates code is derived from the appendix of a University
* of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
@@ -23,17 +25,16 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
*/
@@ -955,6 +956,11 @@ static int stat_jaddref; /* bufs redirti
static int stat_jnewblk; /* bufs redirtied as blk bitmap can not write */
static int stat_journal_min; /* Times hit journal min threshold */
static int stat_journal_low; /* Times hit journal low threshold */
+static int stat_journal_wait; /* Times blocked in jwait(). */
+static int stat_jwait_filepage; /* Times blocked in jwait() for filepage. */
+static int stat_jwait_freeblks; /* Times blocked in jwait() for freeblks. */
+static int stat_jwait_inode; /* Times blocked in jwait() for inodes. */
+static int stat_jwait_newblk; /* Times blocked in jwait() for newblks. */
SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
&max_softdeps, 0, "");
@@ -990,6 +996,16 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, jou
&stat_journal_low, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
&stat_journal_min, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW,
+ &stat_journal_wait, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW,
+ &stat_jwait_filepage, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW,
+ &stat_jwait_freeblks, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW,
+ &stat_jwait_inode, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW,
+ &stat_jwait_newblk, 0, "");
SYSCTL_DECL(_vfs_ffs);
@@ -2488,16 +2504,11 @@ softdep_process_journal(mp, flags)
* entries and add them to the segment. Notice cnt is
* off by one to account for the space required by the
* jsegrec. If we don't have a full block to log skip it
- * unless we haven't written anything in 5 seconds.
+ * unless we haven't written anything.
*/
cnt++;
- if (cnt < jrecmax) {
- if (segwritten)
- break;
- if (flags == MNT_NOWAIT &&
- (ticks - jblocks->jb_age) < hz*5)
- break;
- }
+ if (cnt < jrecmax && segwritten)
+ break;
/*
* Verify some free journal space. softdep_prealloc() should
* guarantee that we don't run out so this is indicative of
@@ -2621,23 +2632,16 @@ softdep_process_journal(mp, flags)
/*
* Write this one buffer and continue.
*/
-#if 1
WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
FREE_LOCK(&lk);
BO_LOCK(bp->b_bufobj);
bgetvp(ump->um_devvp, bp);
BO_UNLOCK(bp->b_bufobj);
- /* XXX Could bawrite here. */
- bwrite(bp);
- ACQUIRE_LOCK(&lk);
-#else
- /* This case simulates the write but does not log anything. */
- handle_written_jseg(jseg, bp);
- FREE_LOCK(&lk);
- brelse(bp);
+ if (flags == MNT_NOWAIT)
+ bawrite(bp);
+ else
+ bwrite(bp);
ACQUIRE_LOCK(&lk);
-#endif
- segwritten++;
}
/*
* If we've suspended the filesystem because we ran out of journal
@@ -3476,6 +3480,7 @@ jwait(wk)
struct worklist *wk;
{
+ stat_journal_wait++;
/*
* If IO has not started we process the journal. We can't mark the
* worklist item as IOWAITING because we drop the lock while
@@ -3544,8 +3549,10 @@ softdep_setup_trunc(vp, length, flags)
jtrunc->jt_size = DIP(ip, i_size);
ACQUIRE_LOCK(&lk);
add_to_journal(&jtrunc->jt_list);
- while (jsegdep->jd_seg == NULL)
+ while (jsegdep->jd_seg == NULL) {
+ stat_jwait_freeblks++;
jwait(&jtrunc->jt_list);
+ }
FREE_LOCK(&lk);
return (jsegdep);
@@ -4949,7 +4956,7 @@ softdep_setup_freeblocks(ip, length, fla
* for the allocations will suffice.
*/
inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
- if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
+ if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED ||
(fs->fs_flags & FS_SUJ) == 0)
needj = 0;
else
@@ -5200,6 +5207,7 @@ deallocate_dependencies(bp, inodedep, fr
while ((jremref =
LIST_FIRST(&dirrem->dm_jremrefhd))
!= NULL) {
+ stat_jwait_filepage++;
jwait(&jremref->jr_list);
return (0);
}
@@ -5221,6 +5229,7 @@ deallocate_dependencies(bp, inodedep, fr
}
while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd))
!= NULL) {
+ stat_jwait_filepage++;
jwait(&jmvref->jm_list);
return (0);
}
@@ -5496,12 +5505,17 @@ softdep_freefile(pvp, ino, mode)
* will never be written.
*/
if (inodedep && inodedep->id_state & UNLINKED) {
+ /*
+ * Save the journal work to be freed with the bitmap
+ * before we clear UNLINKED. Otherwise it can be lost
+ * if the inode block is written.
+ */
+ handle_bufwait(inodedep, &freefile->fx_jwork);
clear_unlinked_inodedep(inodedep);
+ /* Re-acquire inodedep as we've dropped lk. */
inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
- if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) {
+ if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0)
inodedep->id_state |= GOINGAWAY;
- handle_bufwait(inodedep, &freefile->fx_jwork);
- }
}
if (inodedep == NULL || check_inode_unwritten(inodedep)) {
FREE_LOCK(&lk);
@@ -5621,21 +5635,24 @@ freework_freeblock(freework)
int complete;
int pending;
int bsize;
+ int needj;
freeblks = freework->fw_freeblks;
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
fs = ump->um_fs;
+ needj = freeblks->fb_list.wk_mp->mnt_kern_flag & MNTK_SUJ;
complete = 0;
LIST_INIT(&wkhd);
/*
* If we are canceling an existing jnewblk pass it to the free
* routine, otherwise pass the freeblk which will ultimately
- * release the freeblks
+ * release the freeblks. If we're not journaling, we can just
+ * free the freeblks immediately.
*/
if (!LIST_EMPTY(&freework->fw_jwork)) {
LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list);
complete = 1;
- } else
+ } else if (needj)
WORKLIST_INSERT_UNLOCKED(&wkhd, &freework->fw_list);
bsize = lfragtosize(fs, freework->fw_frags);
pending = btodb(bsize);
@@ -5652,7 +5669,7 @@ freework_freeblock(freework)
}
ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno,
bsize, freeblks->fb_previousinum, &wkhd);
- if (complete == 0)
+ if (complete == 0 && needj)
return;
/*
* The jnewblk will be discarded and the bits in the map never
@@ -5823,6 +5840,7 @@ indir_trunc(freework, dbn, lbn)
ufs2_daddr_t dbn;
ufs_lbn_t lbn;
{
+ struct freework *nfreework;
struct workhead wkhd;
struct jnewblk *jnewblk;
struct freeblks *freeblks;
@@ -5838,6 +5856,7 @@ indir_trunc(freework, dbn, lbn)
int i, nblocks, ufs1fmt;
int fs_pendingblocks;
int freedeps;
+ int needj;
int level;
int cnt;
@@ -5850,6 +5869,7 @@ indir_trunc(freework, dbn, lbn)
fs = ump->um_fs;
fs_pendingblocks = 0;
freedeps = 0;
+ needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ;
lbnadd = 1;
for (i = level; i > 0; i--)
lbnadd *= NINDIR(fs);
@@ -5941,7 +5961,8 @@ indir_trunc(freework, dbn, lbn)
cnt++;
}
ACQUIRE_LOCK(&lk);
- freework->fw_ref += NINDIR(fs) + 1;
+ if (needj)
+ freework->fw_ref += NINDIR(fs) + 1;
/* Any remaining journal work can be completed with freeblks. */
jwork_move(&freeblks->fb_jwork, &wkhd);
FREE_LOCK(&lk);
@@ -5950,6 +5971,7 @@ indir_trunc(freework, dbn, lbn)
nb = bap1[0];
else
nb = bap2[0];
+ nfreework = freework;
/*
* Reclaim on disk blocks.
*/
@@ -5965,13 +5987,14 @@ indir_trunc(freework, dbn, lbn)
continue;
cnt++;
if (level != 0) {
- struct freework *nfreework;
ufs_lbn_t nlbn;
nlbn = (lbn + 1) - (i * lbnadd);
- nfreework = newfreework(freeblks, freework, nlbn, nb,
- fs->fs_frag, 0);
- freedeps++;
+ if (needj != 0) {
+ nfreework = newfreework(freeblks, freework,
+ nlbn, nb, fs->fs_frag, 0);
+ freedeps++;
+ }
indir_trunc(nfreework, fsbtodb(fs, nb), nlbn);
} else {
struct freedep *freedep;
@@ -5981,7 +6004,8 @@ indir_trunc(freework, dbn, lbn)
* all blocks being released to the same CG.
*/
LIST_INIT(&wkhd);
- if (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb))) {
+ if (needj != 0 &&
+ (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) {
freedep = newfreedep(freework);
WORKLIST_INSERT_UNLOCKED(&wkhd,
&freedep->fd_list);
@@ -5989,22 +6013,37 @@ indir_trunc(freework, dbn, lbn)
}
ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
fs->fs_bsize, freeblks->fb_previousinum, &wkhd);
- fs_pendingblocks += nblocks;
}
}
- ACQUIRE_LOCK(&lk);
- freework->fw_off = i;
if (level == 0)
fs_pendingblocks = (nblocks * cnt);
- freework->fw_ref += freedeps;
- freework->fw_ref -= NINDIR(fs) + 1;
- if (freework->fw_ref != 0)
+ /*
+ * If we're not journaling we can free the indirect now. Otherwise
+ * setup the ref counts and offset so this indirect can be completed
+ * when its children are free.
+ */
+ if (needj == 0) {
+ fs_pendingblocks += nblocks;
+ dbn = dbtofsb(fs, dbn);
+ ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
+ freeblks->fb_previousinum, NULL);
+ ACQUIRE_LOCK(&lk);
+ freeblks->fb_chkcnt -= fs_pendingblocks;
+ if (freework->fw_blkno == dbn)
+ handle_written_freework(freework);
+ FREE_LOCK(&lk);
freework = NULL;
- FREE_LOCK(&lk);
- if (fs_pendingblocks) {
+ } else {
ACQUIRE_LOCK(&lk);
+ freework->fw_off = i;
+ freework->fw_ref += freedeps;
+ freework->fw_ref -= NINDIR(fs) + 1;
+ if (freework->fw_ref != 0)
+ freework = NULL;
freeblks->fb_chkcnt -= fs_pendingblocks;
FREE_LOCK(&lk);
+ }
+ if (fs_pendingblocks) {
UFS_LOCK(ump);
fs->fs_pendingblocks -= fs_pendingblocks;
UFS_UNLOCK(ump);
@@ -7870,10 +7909,14 @@ initiate_write_filepage(pagedep, bp)
* locked so the dependency can not go away.
*/
LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
- while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
+ while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) {
+ stat_jwait_filepage++;
jwait(&jremref->jr_list);
- while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL)
+ }
+ while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) {
+ stat_jwait_filepage++;
jwait(&jmvref->jm_list);
+ }
for (i = 0; i < DAHASHSZ; i++) {
LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
ep = (struct direct *)
@@ -9729,6 +9772,7 @@ again:
TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
== DEPCOMPLETE) {
+ stat_jwait_inode++;
jwait(&inoref->if_list);
goto again;
}
@@ -9867,6 +9911,7 @@ restart:
TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
== DEPCOMPLETE) {
+ stat_jwait_inode++;
jwait(&inoref->if_list);
goto restart;
}
@@ -10110,6 +10155,7 @@ loop:
case D_ALLOCINDIR:
newblk = WK_NEWBLK(wk);
if (newblk->nb_jnewblk != NULL) {
+ stat_jwait_newblk++;
jwait(&newblk->nb_jnewblk->jn_list);
goto restart;
}
@@ -10135,6 +10181,7 @@ loop:
&WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
newblk = (struct newblk *)aip;
if (newblk->nb_jnewblk != NULL) {
+ stat_jwait_newblk++;
jwait(&newblk->nb_jnewblk->jn_list);
goto restart;
}
@@ -10262,6 +10309,7 @@ restart:
TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
== DEPCOMPLETE) {
+ stat_jwait_inode++;
jwait(&inoref->if_list);
goto restart;
}
@@ -10304,6 +10352,7 @@ flush_deplist(listhead, waitfor, errorp)
TAILQ_FOREACH(adp, listhead, ad_next) {
newblk = (struct newblk *)adp;
if (newblk->nb_jnewblk != NULL) {
+ stat_jwait_newblk++;
jwait(&newblk->nb_jnewblk->jn_list);
return (1);
}
@@ -10368,6 +10417,7 @@ flush_newblk_dep(vp, mp, lbn)
* Flush the journal.
*/
if (newblk->nb_jnewblk != NULL) {
+ stat_jwait_newblk++;
jwait(&newblk->nb_jnewblk->jn_list);
continue;
}
@@ -10477,6 +10527,7 @@ restart:
TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
== DEPCOMPLETE) {
+ stat_jwait_inode++;
jwait(&inoref->if_list);
goto restart;
}
@@ -10636,12 +10687,12 @@ softdep_request_cleanup(fs, vp)
if (error != 0)
return (0);
}
- process_removes(vp);
while (fs->fs_pendingblocks > 0 && fs->fs_cstotal.cs_nbfree <= needed) {
if (time_second > starttime)
return (0);
UFS_UNLOCK(ump);
ACQUIRE_LOCK(&lk);
+ process_removes(vp);
if (ump->softdep_on_worklist > 0 &&
process_worklist_item(UFSTOVFS(ump), LK_NOWAIT) != -1) {
stat_worklist_push += 1;
@@ -10898,6 +10949,8 @@ softdep_count_dependencies(bp, wantcount
struct allocindir *aip;
struct pagedep *pagedep;
struct dirrem *dirrem;
+ struct newblk *newblk;
+ struct mkdir *mkdir;
struct diradd *dap;
int i, retval;
@@ -10992,12 +11045,30 @@ softdep_count_dependencies(bp, wantcount
}
continue;
- case D_FREEWORK:
- case D_FREEDEP:
- case D_JSEGDEP:
case D_ALLOCDIRECT:
case D_ALLOCINDIR:
+ newblk = WK_NEWBLK(wk);
+ if (newblk->nb_jnewblk) {
+ /* Journal allocate dependency. */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
case D_MKDIR:
+ mkdir = WK_MKDIR(wk);
+ if (mkdir->md_jaddref) {
+ /* Journal reference dependency. */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
+ continue;
+
+ case D_FREEWORK:
+ case D_FREEDEP:
+ case D_JSEGDEP:
case D_JSEG:
case D_SBDEP:
/* never a dependency on these blocks */
Modified: projects/suj/7/sbin/fsck_ffs/suj.c
==============================================================================
--- projects/suj/7/sbin/fsck_ffs/suj.c Wed Mar 10 23:02:06 2010 (r204990)
+++ projects/suj/7/sbin/fsck_ffs/suj.c Thu Mar 11 01:02:27 2010 (r204991)
@@ -142,10 +142,11 @@ uint64_t jbytes;
uint64_t jrecs;
typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int);
-static void ino_trunc(ino_t ino, off_t size);
+static void ino_trunc(ino_t, off_t);
static void ino_decr(ino_t);
static void ino_adjust(struct suj_ino *);
-static void ino_build(struct suj_ino *sino);
+static void ino_build(struct suj_ino *);
+static int blk_isfree(ufs2_daddr_t);
static void *
errmalloc(size_t n)
@@ -496,7 +497,7 @@ blk_setmask(struct jblkrec *brec, int *m
* to be freed. The mask value can be used to free partial blocks.
*/
static int
-blk_isfree(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags)
+blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags)
{
struct suj_blk *sblk;
struct suj_rec *srec;
@@ -532,7 +533,7 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino,
blk_setmask(brec, &mask);
}
if (debug)
- printf("blk_isfree: blk %jd sblk %jd off %d mask 0x%X\n",
+ printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n",
blk, sblk->sb_blk, off, mask);
return (mask >> off);
}
@@ -542,6 +543,9 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino,
* if any part of the indirect has been reallocated or the last journal
* entry was an allocation. Just allocated indirects may not have valid
* pointers yet and all of their children will have their own records.
+ * It is also not safe to follow an indirect if the cg bitmap has been
+ * cleared as a new allocation may write to the block prior to the journal
+ * being written.
*
* Returns 1 if it's safe to follow the indirect and 0 otherwise.
*/
@@ -559,7 +563,7 @@ blk_isindir(ufs2_daddr_t blk, ino_t ino,
brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec;
if (blk_equals(brec, ino, lbn, blk, fs->fs_frag))
if (brec->jb_op == JOP_FREEBLK)
- return (1);
+ return (!blk_isfree(blk));
return (0);
}
@@ -644,6 +648,19 @@ blk_free(ufs2_daddr_t bno, int mask, int
}
/*
+ * Returns 1 if the whole block starting at 'bno' is marked free and 0
+ * otherwise.
+ */
+static int
+blk_isfree(ufs2_daddr_t bno)
+{
+ struct suj_cg *sc;
+
+ sc = cg_lookup(dtog(fs, bno));
+ return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno));
+}
+
+/*
* Fetch an indirect block to find the block at a given lbn. The lbn
* may be negative to fetch a specific indirect block pointer or positive
* to fetch a specific block.
@@ -1059,7 +1076,7 @@ blk_free_visit(ino_t ino, ufs_lbn_t lbn,
{
int mask;
- mask = blk_isfree(blk, ino, lbn, frags);
+ mask = blk_freemask(blk, ino, lbn, frags);
if (debug)
printf("blk %jd freemask 0x%X\n", blk, mask);
blk_free(blk, mask, frags);
@@ -1076,7 +1093,7 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino
uint64_t resid;
int mask;
- mask = blk_isfree(blk, ino, lbn, frags);
+ mask = blk_freemask(blk, ino, lbn, frags);
if (debug)
printf("blk %jd freemask 0x%X\n", blk, mask);
resid = 0;
@@ -1615,7 +1632,7 @@ blk_check(struct suj_blk *sblk)
if (isat == 1) {
if (frags == brec->jb_frags)
continue;
- mask = blk_isfree(blk, brec->jb_ino, brec->jb_lbn,
+ mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn,
brec->jb_frags);
mask >>= frags;
blk += frags;
@@ -2259,7 +2276,8 @@ suj_verifyino(union dinode *ip)
return (-1);
}
- if (DIP(ip, di_flags) != (SF_IMMUTABLE | SF_NOUNLINK)) {
+ if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) !=
+ (SF_IMMUTABLE | SF_NOUNLINK)) {
printf("Invalid flags 0x%X for journal inode %d\n",
DIP(ip, di_flags), sujino);
return (-1);
@@ -2595,19 +2613,19 @@ suj_check(const char *filesys)
cg_apply(cg_check_blk);
cg_apply(cg_check_ino);
}
+ if (preen == 0 && reply("WRITE CHANGES") == 0)
+ return (0);
/*
* To remain idempotent with partial truncations the free bitmaps
* must be written followed by indirect blocks and lastly inode
* blocks. This preserves access to the modified pointers until
* they are freed.
*/
- if (preen || reply("WRITE CHANGES")) {
- cg_apply(cg_write);
- dblk_write();
- cg_apply(cg_write_inos);
- /* Write back superblock. */
- closedisk(filesys);
- }
+ cg_apply(cg_write);
+ dblk_write();
+ cg_apply(cg_write_inos);
+ /* Write back superblock. */
+ closedisk(filesys);
printf("** %jd journal records in %jd bytes for %.2f%% utilization\n",
jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100);
printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n",
Modified: projects/suj/7/sbin/tunefs/tunefs.c
==============================================================================
--- projects/suj/7/sbin/tunefs/tunefs.c Wed Mar 10 23:02:06 2010 (r204990)
+++ projects/suj/7/sbin/tunefs/tunefs.c Thu Mar 11 01:02:27 2010 (r204991)
@@ -562,6 +562,7 @@ journal_findfile(void)
{
struct ufs1_dinode *dp1;
struct ufs2_dinode *dp2;
+ ino_t ino;
int mode;
void *ip;
int i;
@@ -580,9 +581,9 @@ journal_findfile(void)
for (i = 0; i < NDADDR; i++) {
if (dp1->di_db[i] == 0)
break;
- if (dir_search(dp1->di_db[i],
- sblksize(&sblock, (off_t)dp1->di_size, i)) != 0)
- return (-1);
+ if ((ino = dir_search(dp1->di_db[i],
+ sblksize(&sblock, (off_t)dp1->di_size, i))) != 0)
+ return (ino);
}
} else {
if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
@@ -592,9 +593,9 @@ journal_findfile(void)
for (i = 0; i < NDADDR; i++) {
if (dp2->di_db[i] == 0)
break;
- if (dir_search(dp2->di_db[i],
- sblksize(&sblock, (off_t)dp2->di_size, i)) != 0)
- return (-1);
+ if ((ino = dir_search(dp2->di_db[i],
+ sblksize(&sblock, (off_t)dp2->di_size, i))) != 0)
+ return (ino);
}
}
@@ -793,10 +794,11 @@ journal_clear(void)
void *ip;
ino = journal_findfile();
- if (ino <= 0) {
+ if (ino == (ino_t)-1 || ino == 0) {
warnx("Journal file does not exist");
return;
}
+ printf("Clearing journal flags from inode %d\n", ino);
if (getino(&disk, &ip, ino, &mode) != 0) {
warn("Failed to get journal inode");
return;
@@ -834,11 +836,13 @@ journal_alloc(int64_t size)
* If the journal file exists we can't allocate it.
*/
ino = journal_findfile();
- if (ino > 0)
+ if (ino == (ino_t)-1)
+ return (-1);
+ if (ino > 0) {
warnx("Journal file %s already exists, please remove.",
SUJ_FILE);
- if (ino != 0)
return (-1);
+ }
/*
* If the user didn't supply a size pick one based on the filesystem
* size constrained with hardcoded MIN and MAX values. We opt for
@@ -892,13 +896,13 @@ journal_alloc(int64_t size)
dp1->di_size = size;
dp1->di_mode = IFREG | IREAD;
dp1->di_nlink = 1;
- dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
+ dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
} else {
bzero(dp2, sizeof(*dp2));
dp2->di_size = size;
dp2->di_mode = IFREG | IREAD;
dp2->di_nlink = 1;
- dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
+ dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
}
for (i = 0; i < NDADDR && resid; i++, resid--) {
blk = journal_balloc();
Modified: projects/suj/7/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/7/sys/ufs/ffs/ffs_softdep.c Wed Mar 10 23:02:06 2010 (r204990)
+++ projects/suj/7/sys/ufs/ffs/ffs_softdep.c Thu Mar 11 01:02:27 2010 (r204991)
@@ -1,5 +1,7 @@
/*-
- * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
+ * Copyright 1998, 2000 Marshall Kirk McKusick.
+ * Copyright 2009, 2010 Jeffrey W. Roberson <jeff at FreeBSD.org>
+ * All rights reserved.
*
* The soft updates code is derived from the appendix of a University
* of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
@@ -23,17 +25,16 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
*/
@@ -958,6 +959,11 @@ static int stat_jaddref; /* bufs redirti
static int stat_jnewblk; /* bufs redirtied as blk bitmap can not write */
static int stat_journal_min; /* Times hit journal min threshold */
static int stat_journal_low; /* Times hit journal low threshold */
+static int stat_journal_wait; /* Times blocked in jwait(). */
+static int stat_jwait_filepage; /* Times blocked in jwait() for filepage. */
+static int stat_jwait_freeblks; /* Times blocked in jwait() for freeblks. */
+static int stat_jwait_inode; /* Times blocked in jwait() for inodes. */
+static int stat_jwait_newblk; /* Times blocked in jwait() for newblks. */
SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
&max_softdeps, 0, "");
@@ -993,6 +999,16 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, jou
&stat_journal_low, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
&stat_journal_min, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW,
+ &stat_journal_wait, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW,
+ &stat_jwait_filepage, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW,
+ &stat_jwait_freeblks, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW,
+ &stat_jwait_inode, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW,
+ &stat_jwait_newblk, 0, "");
SYSCTL_DECL(_vfs_ffs);
@@ -2519,16 +2535,11 @@ softdep_process_journal(mp, flags)
* entries and add them to the segment. Notice cnt is
* off by one to account for the space required by the
* jsegrec. If we don't have a full block to log skip it
- * unless we haven't written anything in 5 seconds.
+ * unless we haven't written anything.
*/
cnt++;
- if (cnt < jrecmax) {
- if (segwritten)
- break;
- if (flags == MNT_NOWAIT &&
- (ticks - jblocks->jb_age) < hz*5)
- break;
- }
+ if (cnt < jrecmax && segwritten)
+ break;
/*
* Verify some free journal space. softdep_prealloc() should
* guarantee that we don't run out so this is indicative of
@@ -2652,23 +2663,16 @@ softdep_process_journal(mp, flags)
/*
* Write this one buffer and continue.
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list