svn commit: r203013 - in projects/suj: 6/lib/libufs 6/sbin/fsck_ffs
6/sbin/tunefs 6/sys/sys 6/sys/ufs/ffs 7/lib/libufs
7/sbin/fsck_ffs 7/sbin/mount 7/sbin/tunefs 7/sys/sys
7/sys/ufs/ffs 7/sys/ufs/u...
Jeff Roberson
jeff at FreeBSD.org
Tue Jan 26 06:45:39 UTC 2010
Author: jeff
Date: Tue Jan 26 06:45:38 2010
New Revision: 203013
URL: http://svn.freebsd.org/changeset/base/203013
Log:
- Merge r203012 from suj/head
Modified:
projects/suj/6/lib/libufs/cgroup.c
projects/suj/6/lib/libufs/libufs.h
projects/suj/6/sbin/fsck_ffs/pass4.c
projects/suj/6/sbin/fsck_ffs/suj.c
projects/suj/6/sbin/tunefs/tunefs.c
projects/suj/6/sys/sys/mount.h
projects/suj/6/sys/ufs/ffs/ffs_alloc.c
projects/suj/6/sys/ufs/ffs/ffs_softdep.c
projects/suj/6/sys/ufs/ffs/ffs_vfsops.c
projects/suj/6/sys/ufs/ffs/fs.h
projects/suj/7/lib/libufs/cgroup.c
projects/suj/7/lib/libufs/libufs.h
projects/suj/7/sbin/fsck_ffs/pass4.c
projects/suj/7/sbin/fsck_ffs/suj.c
projects/suj/7/sbin/mount/mount.c
projects/suj/7/sbin/tunefs/tunefs.c
projects/suj/7/sys/sys/mount.h
projects/suj/7/sys/ufs/ffs/ffs_alloc.c
projects/suj/7/sys/ufs/ffs/ffs_softdep.c
projects/suj/7/sys/ufs/ffs/ffs_vfsops.c
projects/suj/7/sys/ufs/ffs/fs.h
projects/suj/7/sys/ufs/ufs/inode.h
projects/suj/8/lib/libufs/cgroup.c
projects/suj/8/lib/libufs/libufs.h
projects/suj/8/sbin/fsck_ffs/pass4.c
projects/suj/8/sbin/fsck_ffs/suj.c
projects/suj/8/sbin/mount/mount.c
projects/suj/8/sbin/tunefs/tunefs.c
projects/suj/8/sys/sys/mount.h
projects/suj/8/sys/ufs/ffs/ffs_alloc.c
projects/suj/8/sys/ufs/ffs/ffs_softdep.c
projects/suj/8/sys/ufs/ffs/ffs_vfsops.c
projects/suj/8/sys/ufs/ffs/fs.h
projects/suj/8/sys/ufs/ufs/inode.h
Modified: projects/suj/6/lib/libufs/cgroup.c
==============================================================================
--- projects/suj/6/lib/libufs/cgroup.c Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/lib/libufs/cgroup.c Tue Jan 26 06:45:38 2010 (r203013)
@@ -71,6 +71,67 @@ gotit:
return (cgbase(fs, cgp->cg_cgx) + blkstofrags(fs, bno));
}
+int
+cgbfree(struct uufsd *disk, ufs2_daddr_t bno, long size)
+{
+ u_int8_t *blksfree;
+ struct fs *fs;
+ struct cg *cgp;
+ ufs1_daddr_t fragno, cgbno;
+ int i, cg, blk, frags, bbase;
+
+ fs = &disk->d_fs;
+ cg = dtog(fs, bno);
+ if (cgread1(disk, cg) != 1)
+ return (-1);
+ cgp = &disk->d_cg;
+ cgbno = dtogd(fs, bno);
+ blksfree = cg_blksfree(cgp);
+ if (size == fs->fs_bsize) {
+ fragno = fragstoblks(fs, cgbno);
+ ffs_setblock(fs, blksfree, fragno);
+ ffs_clusteracct(fs, cgp, fragno, 1);
+ cgp->cg_cs.cs_nbfree++;
+ fs->fs_cstotal.cs_nbfree++;
+ fs->fs_cs(fs, cg).cs_nbfree++;
+ } else {
+ bbase = cgbno - fragnum(fs, cgbno);
+ /*
+ * decrement the counts associated with the old frags
+ */
+ blk = blkmap(fs, blksfree, bbase);
+ ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+ /*
+ * deallocate the fragment
+ */
+ frags = numfrags(fs, size);
+ for (i = 0; i < frags; i++)
+ setbit(blksfree, cgbno + i);
+ cgp->cg_cs.cs_nffree += i;
+ fs->fs_cstotal.cs_nffree += i;
+ fs->fs_cs(fs, cg).cs_nffree += i;
+ /*
+ * add back in counts associated with the new frags
+ */
+ blk = blkmap(fs, blksfree, bbase);
+ ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+ /*
+ * if a complete block has been reassembled, account for it
+ */
+ fragno = fragstoblks(fs, bbase);
+ if (ffs_isblock(fs, blksfree, fragno)) {
+ cgp->cg_cs.cs_nffree -= fs->fs_frag;
+ fs->fs_cstotal.cs_nffree -= fs->fs_frag;
+ fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
+ ffs_clusteracct(fs, cgp, fragno, 1);
+ cgp->cg_cs.cs_nbfree++;
+ fs->fs_cstotal.cs_nbfree++;
+ fs->fs_cs(fs, cg).cs_nbfree++;
+ }
+ }
+ return cgwrite(disk);
+}
+
ino_t
cgialloc(struct uufsd *disk)
{
Modified: projects/suj/6/lib/libufs/libufs.h
==============================================================================
--- projects/suj/6/lib/libufs/libufs.h Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/lib/libufs/libufs.h Tue Jan 26 06:45:38 2010 (r203013)
@@ -110,6 +110,7 @@ ssize_t bwrite(struct uufsd *, ufs2_dadd
* cgroup.c
*/
ufs2_daddr_t cgballoc(struct uufsd *);
+int cgbfree(struct uufsd *, ufs2_daddr_t, long);
ino_t cgialloc(struct uufsd *);
int cgread(struct uufsd *);
int cgread1(struct uufsd *, int);
Modified: projects/suj/6/sbin/fsck_ffs/pass4.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/pass4.c Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/sbin/fsck_ffs/pass4.c Tue Jan 26 06:45:38 2010 (r203013)
@@ -72,9 +72,6 @@ pass4(void)
for (i = 0; i < inostathead[cg].il_numalloced; i++, inumber++) {
if (inumber < ROOTINO)
continue;
- if (sblock.fs_flags & FS_SUJ &&
- inumber == sblock.fs_sujournal)
- continue;
idesc.id_number = inumber;
switch (inoinfo(inumber)->ino_state) {
Modified: projects/suj/6/sbin/fsck_ffs/suj.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/suj.c Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/sbin/fsck_ffs/suj.c Tue Jan 26 06:45:38 2010 (r203013)
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <stdint.h>
#include <libufs.h>
+#include <string.h>
#include <strings.h>
#include <err.h>
#include <assert.h>
@@ -63,6 +64,7 @@ struct suj_seg {
struct suj_rec {
TAILQ_ENTRY(suj_rec) sr_next;
union jrec *sr_rec;
+ int sr_alt; /* Is alternate address? */
};
TAILQ_HEAD(srechd, suj_rec);
@@ -127,6 +129,7 @@ TAILQ_HEAD(seghd, suj_seg) allsegs;
uint64_t oldseq;
static struct uufsd *disk = NULL;
static struct fs *fs = NULL;
+ino_t sujino;
/*
* Summary statistics.
@@ -191,8 +194,7 @@ closedisk(const char *devnam)
fs->fs_cstotal.cs_nifree += cgsum->cs_nifree;
fs->fs_cstotal.cs_ndir += cgsum->cs_ndir;
}
- /* XXX Don't set clean for now, we don't trust the journal. */
- /* fs->fs_clean = 1; */
+ fs->fs_clean = 1;
fs->fs_time = time(NULL);
fs->fs_mtime = time(NULL);
if (sbwrite(disk, 0) == -1)
@@ -1823,6 +1825,7 @@ ino_append(union jrec *rec)
sino->si_hasrecs = 1;
srec = errmalloc(sizeof(*srec));
srec->sr_rec = rec;
+ srec->sr_alt = 0;
TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next);
}
@@ -1844,9 +1847,10 @@ ino_build_ref(struct suj_ino *sino, stru
refrec = (struct jrefrec *)srec->sr_rec;
if (debug)
- printf("ino_build: op %d, ino %d, nlink %d, parent %d, diroff %jd\n",
- refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent,
- refrec->jr_diroff);
+ printf("ino_build: op %d, ino %d, nlink %d, "
+ "parent %d, diroff %jd\n",
+ refrec->jr_op, refrec->jr_ino, refrec->jr_nlink,
+ refrec->jr_parent, refrec->jr_diroff);
/*
* Search for a mvrec that matches this offset. Whether it's an add
@@ -1871,16 +1875,19 @@ ino_build_ref(struct suj_ino *sino, stru
rrn = errmalloc(sizeof(*refrec));
*rrn = *refrec;
rrn->jr_op = JOP_ADDREF;
+ rrn->jr_diroff = mvrec->jm_oldoff;
srn = errmalloc(sizeof(*srec));
+ srn->sr_alt = 1;
srn->sr_rec = (union jrec *)rrn;
ino_build_ref(sino, srn);
- refrec->jr_diroff = mvrec->jm_oldoff;
}
}
}
/*
* We walk backwards so that adds and removes are evaluated in the
- * correct order.
+ * correct order. If a primary record conflicts with an alt keep
+ * the primary and discard the alt. We must track this to keep
+ * the correct number of removes in the list.
*/
for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn;
srn = TAILQ_PREV(srn, srechd, sr_next)) {
@@ -1890,7 +1897,17 @@ ino_build_ref(struct suj_ino *sino, stru
continue;
if (debug)
printf("Discarding dup.\n");
- rrn->jr_mode = refrec->jr_mode;
+ if (srn->sr_alt == 0) {
+ rrn->jr_mode = refrec->jr_mode;
+ return;
+ }
+ /*
+ * Replace the record in place with the old nlink in case
+ * we replace the head of the list. Abandon srec as a dup.
+ */
+ refrec->jr_nlink = rrn->jr_nlink;
+ srn->sr_rec = srec->sr_rec;
+ srn->sr_alt = srec->sr_alt;
return;
}
TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next);
@@ -1930,9 +1947,12 @@ ino_move_ref(struct suj_ino *sino, struc
/*
* When an entry is moved we don't know whether the write
* to move has completed yet. To resolve this we create
- * a new add dependency in the new location as if it were added
- * twice. Only one will succeed.
+ * a new add dependency in the new location as if it were
+ * added twice. Only one will succeed. Consider the
+ * new offset the primary location for the inode and the
+ * old offset the alt.
*/
+ srn->sr_alt = 1;
refrec = errmalloc(sizeof(*refrec));
refrec->jr_op = JOP_ADDREF;
refrec->jr_ino = mvrec->jm_ino;
@@ -1941,12 +1961,14 @@ ino_move_ref(struct suj_ino *sino, struc
refrec->jr_mode = rrn->jr_mode;
refrec->jr_nlink = rrn->jr_nlink;
srn = errmalloc(sizeof(*srn));
+ srn->sr_alt = 0;
srn->sr_rec = (union jrec *)refrec;
ino_build_ref(sino, srn);
break;
}
/*
- * Add this mvrec to the queue of pending mvs.
+ * Add this mvrec to the queue of pending mvs, possibly collapsing
+ * it with a prior move for the same inode and offset.
*/
for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn;
srn = TAILQ_PREV(srn, srechd, sr_next)) {
@@ -2195,19 +2217,25 @@ suj_verifyino(union dinode *ip)
if (DIP(ip, di_nlink) != 1) {
printf("Invalid link count %d for journal inode %d\n",
- DIP(ip, di_nlink), fs->fs_sujournal);
+ DIP(ip, di_nlink), sujino);
+ return (-1);
+ }
+
+ if (DIP(ip, di_flags) != (SF_IMMUTABLE | SF_NOUNLINK)) {
+ printf("Invalid flags 0x%X for journal inode %d\n",
+ DIP(ip, di_flags), sujino);
return (-1);
}
- if (DIP(ip, di_mode) != IFREG) {
- printf("Invalid mode %d for journal inode %d\n",
- DIP(ip, di_mode), fs->fs_sujournal);
+ if (DIP(ip, di_mode) != (IFREG | IREAD)) {
+ printf("Invalid mode %o for journal inode %d\n",
+ DIP(ip, di_mode), sujino);
return (-1);
}
if (DIP(ip, di_size) < SUJ_MIN || DIP(ip, di_size) > SUJ_MAX) {
printf("Invalid size %jd for journal inode %d\n",
- DIP(ip, di_size), fs->fs_sujournal);
+ DIP(ip, di_size), sujino);
return (-1);
}
@@ -2447,20 +2475,60 @@ restart:
}
/*
+ * Search a directory block for the SUJ_FILE.
+ */
+static void
+suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
+{
+ char block[MAXBSIZE];
+ struct direct *dp;
+ int bytes;
+ int off;
+
+ if (sujino)
+ return;
+ bytes = lfragtosize(fs, frags);
+ if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0)
+ err(1, "Failed to read ROOTINO directory block %jd", blk);
+ for (off = 0; off < bytes; off += dp->d_reclen) {
+ dp = (struct direct *)&block[off];
+ if (dp->d_reclen == 0)
+ break;
+ if (dp->d_ino == 0)
+ continue;
+ if (dp->d_namlen != strlen(SUJ_FILE))
+ continue;
+ if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0)
+ continue;
+ sujino = dp->d_ino;
+ return;
+ }
+}
+
+/*
* Orchestrate the verification of a filesystem via the softupdates journal.
*/
int
suj_check(const char *filesys)
{
union dinode *jip;
+ union dinode *ip;
uint64_t blocks;
opendisk(filesys);
TAILQ_INIT(&allsegs);
/*
+ * Find the journal inode.
+ */
+ ip = ino_read(ROOTINO);
+ sujino = 0;
+ ino_visit(ip, ROOTINO, suj_find, 0);
+ if (sujino == 0)
+ errx(1, "Journal inode removed. Use tunefs to re-create.");
+ /*
* Fetch the journal inode and verify it.
*/
- jip = ino_read(fs->fs_sujournal);
+ jip = ino_read(sujino);
printf("** SU+J Recovering %s\n", filesys);
if (suj_verifyino(jip) != 0)
return (-1);
@@ -2469,11 +2537,11 @@ suj_check(const char *filesys)
* available journal blocks in with suj_read().
*/
printf("** Reading %jd byte journal from inode %d.\n",
- DIP(jip, di_size), fs->fs_sujournal);
+ DIP(jip, di_size), sujino);
suj_jblocks = jblocks_create();
- blocks = ino_visit(jip, fs->fs_sujournal, suj_add_block, 0);
+ blocks = ino_visit(jip, sujino, suj_add_block, 0);
if (blocks != numfrags(fs, DIP(jip, di_size)))
- errx(1, "Sparse journal inode %d.\n", fs->fs_sujournal);
+ errx(1, "Sparse journal inode %d.\n", sujino);
suj_read();
jblocks_destroy(suj_jblocks);
suj_jblocks = NULL;
Modified: projects/suj/6/sbin/tunefs/tunefs.c
==============================================================================
--- projects/suj/6/sbin/tunefs/tunefs.c Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/sbin/tunefs/tunefs.c Tue Jan 26 06:45:38 2010 (r203013)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h>
+#include <ufs/ufs/dir.h>
#include <ctype.h>
#include <err.h>
@@ -74,6 +75,7 @@ struct uufsd disk;
void usage(void);
void printfs(void);
int journal_alloc(int64_t size);
+void journal_clear(void);
void sbdirty(void);
int
@@ -327,11 +329,11 @@ main(int argc, char *argv[])
if ((~sblock.fs_flags & FS_SUJ) == FS_SUJ) {
warnx("%s remains unchanged as disabled", name);
} else {
- sbdirty();
+ journal_clear();
sblock.fs_flags &= ~(FS_DOSOFTDEP | FS_SUJ);
- sblock.fs_sujournal = 0;
sblock.fs_sujfree = 0;
- warnx("%s cleared", name);
+ warnx("%s cleared, "
+ "remove .sujournal to reclaim space", name);
}
}
}
@@ -452,11 +454,9 @@ journal_balloc(void)
{
ufs2_daddr_t blk;
struct cg *cgp;
- struct fs *fs;
int valid;
cgp = &disk.d_cg;
- fs = &disk.d_fs;
for (;;) {
blk = cgballoc(&disk);
if (blk > 0)
@@ -482,13 +482,231 @@ journal_balloc(void)
warnx("Failed to find sufficient free blocks for the journal");
return -1;
}
- if (bwrite(&disk, fsbtodb(fs, blk), clrbuf, fs->fs_bsize) <= 0) {
+ if (bwrite(&disk, fsbtodb(&sblock, blk), clrbuf,
+ sblock.fs_bsize) <= 0) {
warn("Failed to initialize new block");
return -1;
}
return (blk);
}
+/*
+ * Search a directory block for the SUJ_FILE.
+ */
+static ino_t
+dir_search(ufs2_daddr_t blk, int bytes)
+{
+ char block[MAXBSIZE];
+ struct direct *dp;
+ int off;
+
+ if (bread(&disk, fsbtodb(&sblock, blk), block, bytes) <= 0) {
+ warn("Failed to read dir block");
+ return (-1);
+ }
+ for (off = 0; off < bytes; off += dp->d_reclen) {
+ dp = (struct direct *)&block[off];
+ if (dp->d_reclen == 0)
+ break;
+ if (dp->d_ino == 0)
+ continue;
+ if (dp->d_namlen != strlen(SUJ_FILE))
+ continue;
+ if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0)
+ continue;
+ return (dp->d_ino);
+ }
+
+ return (0);
+}
+
+/*
+ * Search in the ROOTINO for the SUJ_FILE. If it exists we can not enable
+ * journaling.
+ */
+static ino_t
+journal_findfile(void)
+{
+ struct ufs1_dinode *dp1;
+ struct ufs2_dinode *dp2;
+ int mode;
+ void *ip;
+ int i;
+
+ if (getino(&disk, &ip, ROOTINO, &mode) != 0) {
+ warn("Failed to get root inode");
+ return (-1);
+ }
+ dp2 = ip;
+ dp1 = ip;
+ if (sblock.fs_magic == FS_UFS1_MAGIC) {
+ if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
+ warnx("ROOTINO extends beyond direct blocks.");
+ return (-1);
+ }
+ for (i = 0; i < NDADDR; i++) {
+ if (dp1->di_db[i] == 0)
+ break;
+ if (dir_search(dp1->di_db[i],
+ sblksize(&sblock, (off_t)dp1->di_size, i)) != 0)
+ return (-1);
+ }
+ } else {
+ if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
+ warnx("ROOTINO extends beyond direct blocks.");
+ return (-1);
+ }
+ for (i = 0; i < NDADDR; i++) {
+ if (dp2->di_db[i] == 0)
+ break;
+ if (dir_search(dp2->di_db[i],
+ sblksize(&sblock, (off_t)dp2->di_size, i)) != 0)
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Insert the journal at inode 'ino' into directory blk 'blk' at the first
+ * free offset of 'off'. DIRBLKSIZ blocks after off are initialized as
+ * empty.
+ */
+static int
+dir_insert(ufs2_daddr_t blk, off_t off, ino_t ino)
+{
+ struct direct *dp;
+ char block[MAXBSIZE];
+
+ if (bread(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) {
+ warn("Failed to read dir block");
+ return (-1);
+ }
+ bzero(&block[off], sblock.fs_bsize - off);
+ dp = (struct direct *)&block[off];
+ dp->d_ino = ino;
+ dp->d_reclen = DIRBLKSIZ;
+ dp->d_type = DT_REG;
+ dp->d_namlen = strlen(SUJ_FILE);
+ bcopy(SUJ_FILE, &dp->d_name, strlen(SUJ_FILE));
+ off += DIRBLKSIZ;
+ for (; off < sblock.fs_bsize; off += DIRBLKSIZ) {
+ dp = (struct direct *)&block[off];
+ dp->d_ino = 0;
+ dp->d_reclen = DIRBLKSIZ;
+ dp->d_type = DT_UNKNOWN;
+ }
+ if (bwrite(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) {
+ warn("Failed to write dir block");
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * Extend a directory block in 'blk' by copying it to a full size block
+ * and inserting the new journal inode into .sujournal.
+ */
+static int
+dir_extend(ufs2_daddr_t blk, ufs2_daddr_t nblk, off_t size, ino_t ino)
+{
+ char block[MAXBSIZE];
+
+ if (bread(&disk, fsbtodb(&sblock, blk), block, size) <= 0) {
+ warn("Failed to read dir block");
+ return (-1);
+ }
+ if (bwrite(&disk, fsbtodb(&sblock, nblk), block, size) <= 0) {
+ warn("Failed to write dir block");
+ return (-1);
+ }
+
+ return dir_insert(nblk, size, ino);
+}
+
+/*
+ * Insert the journal file into the ROOTINO directory. We always extend the
+ * last frag
+ */
+static int
+journal_insertfile(ino_t ino)
+{
+ struct ufs1_dinode *dp1;
+ struct ufs2_dinode *dp2;
+ void *ip;
+ ufs2_daddr_t nblk;
+ ufs2_daddr_t blk;
+ ufs_lbn_t lbn;
+ int size;
+ int mode;
+ int off;
+
+ if (getino(&disk, &ip, ROOTINO, &mode) != 0) {
+ warn("Failed to get root inode");
+ sbdirty();
+ return (-1);
+ }
+ dp2 = ip;
+ dp1 = ip;
+ blk = 0;
+ size = 0;
+ nblk = journal_balloc();
+ if (nblk <= 0)
+ return (-1);
+ /*
+ * For simplicity sake we aways extend the ROOTINO into a new
+ * directory block rather than searching for space and inserting
+ * into an existing block. However, if the rootino has frags
+ * have to free them and extend the block.
+ */
+ if (sblock.fs_magic == FS_UFS1_MAGIC) {
+ lbn = lblkno(&sblock, dp1->di_size);
+ off = blkoff(&sblock, dp1->di_size);
+ blk = dp1->di_db[lbn];
+ size = sblksize(&sblock, (off_t)dp1->di_size, lbn);
+ } else {
+ lbn = lblkno(&sblock, dp2->di_size);
+ off = blkoff(&sblock, dp2->di_size);
+ blk = dp2->di_db[lbn];
+ size = sblksize(&sblock, (off_t)dp2->di_size, lbn);
+ }
+ if (off != 0) {
+ if (dir_extend(blk, nblk, off, ino) == -1)
+ return (-1);
+ } else {
+ blk = 0;
+ if (dir_insert(nblk, 0, ino) == -1)
+ return (-1);
+ }
+ if (sblock.fs_magic == FS_UFS1_MAGIC) {
+ dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
+ dp1->di_db[lbn] = nblk;
+ dp1->di_size = lblktosize(&sblock, lbn+1);
+ } else {
+ dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
+ dp2->di_db[lbn] = nblk;
+ dp2->di_size = lblktosize(&sblock, lbn+1);
+ }
+ if (putino(&disk) < 0) {
+ warn("Failed to write root inode");
+ return (-1);
+ }
+ if (cgwrite(&disk) < 0) {
+ warn("Failed to write updated cg");
+ sbdirty();
+ return (-1);
+ }
+ if (blk) {
+ if (cgbfree(&disk, blk, size) < 0) {
+ warn("Failed to write cg");
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
static int
indir_fill(ufs2_daddr_t blk, int level, int *resid)
{
@@ -496,22 +714,20 @@ indir_fill(ufs2_daddr_t blk, int level,
ufs1_daddr_t *bap1;
ufs2_daddr_t *bap2;
ufs2_daddr_t nblk;
- struct fs *fs;
int ncnt;
int cnt;
int i;
- fs = &disk.d_fs;
bzero(indirbuf, sizeof(indirbuf));
bap1 = (ufs1_daddr_t *)indirbuf;
bap2 = (void *)bap1;
cnt = 0;
- for (i = 0; i < NINDIR(fs) && *resid != 0; i++) {
+ for (i = 0; i < NINDIR(&sblock) && *resid != 0; i++) {
nblk = journal_balloc();
if (nblk <= 0)
return (-1);
cnt++;
- if (fs->fs_magic == FS_UFS1_MAGIC)
+ if (sblock.fs_magic == FS_UFS1_MAGIC)
*bap1++ = nblk;
else
*bap2++ = nblk;
@@ -523,13 +739,47 @@ indir_fill(ufs2_daddr_t blk, int level,
} else
(*resid)--;
}
- if (bwrite(&disk, fsbtodb(fs, blk), indirbuf, fs->fs_bsize) <= 0) {
+ if (bwrite(&disk, fsbtodb(&sblock, blk), indirbuf,
+ sblock.fs_bsize) <= 0) {
warn("Failed to write indirect");
return (-1);
}
return (cnt);
}
+/*
+ * Clear the flag bits so the journal can be removed.
+ */
+void
+journal_clear(void)
+{
+ struct ufs1_dinode *dp1;
+ struct ufs2_dinode *dp2;
+ ino_t ino;
+ int mode;
+ void *ip;
+
+ ino = journal_findfile();
+ if (ino <= 0) {
+ warnx("Journal file does not exist");
+ return;
+ }
+ if (getino(&disk, &ip, ino, &mode) != 0) {
+ warn("Failed to get journal inode");
+ return;
+ }
+ dp2 = ip;
+ dp1 = ip;
+ if (sblock.fs_magic == FS_UFS1_MAGIC)
+ dp1->di_flags = 0;
+ else
+ dp2->di_flags = 0;
+ if (putino(&disk) < 0) {
+ warn("Failed to write journal inode");
+ return;
+ }
+}
+
int
journal_alloc(int64_t size)
{
@@ -538,32 +788,39 @@ journal_alloc(int64_t size)
ufs2_daddr_t blk;
void *ip;
struct cg *cgp;
- struct fs *fs;
int resid;
ino_t ino;
int blks;
int mode;
int i;
- fs = &disk.d_fs;
cgp = &disk.d_cg;
ino = 0;
/*
+ * If the journal file exists we can't allocate it.
+ */
+ ino = journal_findfile();
+ if (ino > 0)
+ warnx("Journal file %s already exists, please remove.",
+ SUJ_FILE);
+ if (ino != 0)
+ return (-1);
+ /*
* If the user didn't supply a size pick one based on the filesystem
* size constrained with hardcoded MIN and MAX values. We opt for
* 1/1024th of the filesystem up to MAX but not exceeding one CG and
* not less than the MIN.
*/
if (size == 0) {
- size = (fs->fs_size * fs->fs_bsize) / 1024;
+ size = (sblock.fs_size * sblock.fs_bsize) / 1024;
size = MIN(SUJ_MAX, size);
- if (size / fs->fs_fsize > fs->fs_fpg)
- size = fs->fs_fpg * fs->fs_fsize;
+ if (size / sblock.fs_fsize > sblock.fs_fpg)
+ size = sblock.fs_fpg * sblock.fs_fsize;
size = MAX(SUJ_MIN, size);
}
- resid = blocks = size / fs->fs_bsize;
- if (fs->fs_cstotal.cs_nbfree < blocks) {
+ resid = blocks = size / sblock.fs_bsize;
+ if (sblock.fs_cstotal.cs_nbfree < blocks) {
warn("Insufficient free space for %jd byte journal", size);
return (-1);
}
@@ -576,9 +833,9 @@ journal_alloc(int64_t size)
continue;
/*
* Try to minimize fragmentation by requiring at least a
- * 1/8th of the blocks be present in each cg we use.
+ * 1/16th of the blocks be present in each cg we use.
*/
- if (cgp->cg_cs.cs_nbfree < blocks / 8)
+ if (cgp->cg_cs.cs_nbfree < blocks / 16)
continue;
ino = cgialloc(&disk);
if (ino <= 0)
@@ -597,22 +854,24 @@ journal_alloc(int64_t size)
*/
dp2 = ip;
dp1 = ip;
- if (fs->fs_magic == FS_UFS1_MAGIC) {
+ if (sblock.fs_magic == FS_UFS1_MAGIC) {
bzero(dp1, sizeof(*dp1));
dp1->di_size = size;
- dp1->di_mode = IFREG;
+ dp1->di_mode = IFREG | IREAD;
dp1->di_nlink = 1;
+ dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
} else {
bzero(dp2, sizeof(*dp2));
dp2->di_size = size;
- dp2->di_mode = IFREG;
+ dp2->di_mode = IFREG | IREAD;
dp2->di_nlink = 1;
+ dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
}
for (i = 0; i < NDADDR && resid; i++, resid--) {
blk = journal_balloc();
if (blk <= 0)
goto out;
- if (fs->fs_magic == FS_UFS1_MAGIC) {
+ if (sblock.fs_magic == FS_UFS1_MAGIC) {
dp1->di_db[i] = blk;
dp1->di_blocks++;
} else {
@@ -629,7 +888,7 @@ journal_alloc(int64_t size)
sbdirty();
goto out;
}
- if (fs->fs_magic == FS_UFS1_MAGIC) {
+ if (sblock.fs_magic == FS_UFS1_MAGIC) {
dp1->di_ib[i] = blk;
dp1->di_blocks += blks;
} else {
@@ -637,10 +896,10 @@ journal_alloc(int64_t size)
dp2->di_blocks += blks;
}
}
- if (fs->fs_magic == FS_UFS1_MAGIC)
- dp1->di_blocks *= fs->fs_bsize / disk.d_bsize;
+ if (sblock.fs_magic == FS_UFS1_MAGIC)
+ dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize;
else
- dp2->di_blocks *= fs->fs_bsize / disk.d_bsize;
+ dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize;
if (putino(&disk) < 0) {
warn("Failed to write inode");
sbdirty();
@@ -651,8 +910,11 @@ journal_alloc(int64_t size)
sbdirty();
return (-1);
}
- fs->fs_sujournal = ino;
- fs->fs_sujfree = 0;
+ if (journal_insertfile(ino) < 0) {
+ sbdirty();
+ return (-1);
+ }
+ sblock.fs_sujfree = 0;
return (0);
}
warnx("Insufficient contiguous free space for the journal.");
Modified: projects/suj/6/sys/sys/mount.h
==============================================================================
--- projects/suj/6/sys/sys/mount.h Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/sys/sys/mount.h Tue Jan 26 06:45:38 2010 (r203013)
@@ -231,7 +231,6 @@ void __mnt_vnode_markerfree(str
#define MNT_NOATIME 0x10000000 /* disable update of file access time */
#define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */
#define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */
-#define MNT_SUJ 0x00000080 /* softdep journaling */
/*
* NFS export related mount flags.
@@ -267,7 +266,7 @@ void __mnt_vnode_markerfree(str
MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \
MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \
MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \
- MNT_JAILDEVFS | MNT_MULTILABEL | MNT_ACLS | MNT_SUJ)
+ MNT_JAILDEVFS | MNT_MULTILABEL | MNT_ACLS)
/* Mask of flags that can be updated. */
#define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \
@@ -303,6 +302,7 @@ void __mnt_vnode_markerfree(str
* with the unmount attempt (used by NFS).
*/
#define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */
+#define MNTK_SUJ 0x00000100 /* Softdep journaling enabled */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
#define MNTK_SUSPEND 0x08000000 /* request write suspension */
Modified: projects/suj/6/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- projects/suj/6/sys/ufs/ffs/ffs_alloc.c Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/sys/ufs/ffs/ffs_alloc.c Tue Jan 26 06:45:38 2010 (r203013)
@@ -1837,6 +1837,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, i
ino_t inum;
struct workhead *dephd;
{
+ struct mount *mp;
struct cg *cgp;
struct buf *bp;
ufs1_daddr_t fragno, cgbno;
@@ -1951,7 +1952,8 @@ ffs_blkfree(ump, fs, devvp, bno, size, i
fs->fs_fmod = 1;
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
- if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP)
+ mp = UFSTOVFS(ump);
+ if (mp->mnt_flag & MNT_SOFTDEP)
softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
numfrags(fs, size), dephd);
bdwrite(bp);
Modified: projects/suj/6/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/6/sys/ufs/ffs/ffs_softdep.c Tue Jan 26 06:36:10 2010 (r203012)
+++ projects/suj/6/sys/ufs/ffs/ffs_softdep.c Tue Jan 26 06:45:38 2010 (r203013)
@@ -1879,7 +1879,7 @@ softdep_unmount(mp)
struct mount *mp;
{
- if (mp->mnt_flag & MNT_SUJ)
+ if (mp->mnt_kern_flag & MNTK_SUJ)
journal_unmount(mp);
}
@@ -2021,16 +2021,36 @@ journal_mount(mp, fs, cred)
struct fs *fs;
struct ucred *cred;
{
+ struct componentname cnp;
struct jblocks *jblocks;
+ struct vnode *dvp;
struct vnode *vp;
struct inode *ip;
ufs2_daddr_t blkno;
+ ino_t sujournal;
int bcount;
int error;
int i;
- mp->mnt_flag |= MNT_SUJ;
- error = VFS_VGET(mp, fs->fs_sujournal, LK_EXCLUSIVE, &vp);
+ mp->mnt_kern_flag |= MNTK_SUJ;
+ error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
+ if (error)
+ return (error);
+ bzero(&cnp, sizeof(cnp));
+ cnp.cn_nameiop = LOOKUP;
+ cnp.cn_flags = ISLASTCN;
+ cnp.cn_thread = curthread;
+ cnp.cn_cred = curthread->td_ucred;
+ cnp.cn_pnbuf = SUJ_FILE;
+ cnp.cn_nameptr = SUJ_FILE;
+ cnp.cn_namelen = strlen(SUJ_FILE);
+ error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
+ vput(dvp);
+ if (error != 0) {
+ printf("Failed to find journal. Use tunefs to create one\n");
+ return (error);
+ }
+ error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, &vp);
if (error)
return (error);
ip = VTOI(vp);
@@ -2052,9 +2072,18 @@ journal_mount(mp, fs, cred)
}
jblocks->jb_low = jblocks->jb_free / 3; /* Reserve 33%. */
jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
- DIP_SET(ip, i_modrev, fs->fs_mtime);
- ip->i_flags |= IN_MODIFIED;
- ffs_update(vp, 1);
+ /*
+ * Only validate the journal contents if the filesystem is clean,
+ * otherwise we write the logs but they'll never be used. If the
+ * filesystem was still dirty when we mounted it the journal is
+ * invalid and a new journal can only be valid if it starts from a
+ * clean mount.
+ */
+ if (fs->fs_clean) {
+ DIP_SET(ip, i_modrev, fs->fs_mtime);
+ ip->i_flags |= IN_MODIFIED;
+ ffs_update(vp, 1);
+ }
VFSTOUFS(mp)->softdep_jblocks = jblocks;
out:
vput(vp);
@@ -2136,6 +2165,11 @@ remove_from_journal(wk)
ump->softdep_on_journal -= 1;
}
+/*
+ * Check for journal space as well as dependency limits so the prelink
+ * code can throttle both journaled and non-journaled filesystems.
+ * Threshold is 0 for low and 1 for min.
+ */
static int
journal_space(ump, thresh)
struct ufsmount *ump;
@@ -2144,7 +2178,20 @@ journal_space(ump, thresh)
struct jblocks *jblocks;
int avail;
+ /*
+ * We use a tighter restriction here to prevent request_cleanup()
+ * running in threads from running into locks we currently hold.
+ */
+ if (num_inodedep > (max_softdeps / 10) * 9)
+ return (0);
+
jblocks = ump->softdep_jblocks;
+ if (jblocks == NULL)
+ return (1);
+ if (thresh)
+ thresh = jblocks->jb_min;
+ else
+ thresh = jblocks->jb_low;
avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
avail = jblocks->jb_free - avail;
@@ -2187,15 +2234,13 @@ softdep_prealloc(vp, waitok)
struct vnode *vp;
int waitok;
{
- struct jblocks *jblocks;
struct ufsmount *ump;
if (DOINGSUJ(vp) == 0)
return (0);
ump = VFSTOUFS(vp->v_mount);
- jblocks = ump->softdep_jblocks;
ACQUIRE_LOCK(&lk);
- if (journal_space(ump, jblocks->jb_low)) {
+ if (journal_space(ump, 0)) {
FREE_LOCK(&lk);
return (0);
}
@@ -2210,9 +2255,9 @@ softdep_prealloc(vp, waitok)
ffs_syncvnode(vp, waitok);
ACQUIRE_LOCK(&lk);
process_removes(vp);
- if (journal_space(ump, jblocks->jb_low) == 0) {
+ if (journal_space(ump, 0) == 0) {
softdep_speedup();
- if (journal_space(ump, jblocks->jb_min) == 0)
+ if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
FREE_LOCK(&lk);
@@ -2220,18 +2265,22 @@ softdep_prealloc(vp, waitok)
return (0);
}
+/*
+ * Before adjusting a link count on a vnode verify that we have sufficient
+ * journal space. If not, process operations that depend on the currently
+ * locked pair of vnodes to try to flush space as the syncer, buf daemon,
+ * and softdep flush threads can not acquire these locks to reclaim space.
+ */
static void
softdep_prelink(dvp, vp)
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list