svn commit: r203013 - in projects/suj: 6/lib/libufs 6/sbin/fsck_ffs 6/sbin/tunefs 6/sys/sys 6/sys/ufs/ffs 7/lib/libufs 7/sbin/fsck_ffs 7/sbin/mount 7/sbin/tunefs 7/sys/sys 7/sys/ufs/ffs 7/sys/ufs/u...

Jeff Roberson jeff at FreeBSD.org
Tue Jan 26 06:45:39 UTC 2010


Author: jeff
Date: Tue Jan 26 06:45:38 2010
New Revision: 203013
URL: http://svn.freebsd.org/changeset/base/203013

Log:
   - Merge r203012 from suj/head

Modified:
  projects/suj/6/lib/libufs/cgroup.c
  projects/suj/6/lib/libufs/libufs.h
  projects/suj/6/sbin/fsck_ffs/pass4.c
  projects/suj/6/sbin/fsck_ffs/suj.c
  projects/suj/6/sbin/tunefs/tunefs.c
  projects/suj/6/sys/sys/mount.h
  projects/suj/6/sys/ufs/ffs/ffs_alloc.c
  projects/suj/6/sys/ufs/ffs/ffs_softdep.c
  projects/suj/6/sys/ufs/ffs/ffs_vfsops.c
  projects/suj/6/sys/ufs/ffs/fs.h
  projects/suj/7/lib/libufs/cgroup.c
  projects/suj/7/lib/libufs/libufs.h
  projects/suj/7/sbin/fsck_ffs/pass4.c
  projects/suj/7/sbin/fsck_ffs/suj.c
  projects/suj/7/sbin/mount/mount.c
  projects/suj/7/sbin/tunefs/tunefs.c
  projects/suj/7/sys/sys/mount.h
  projects/suj/7/sys/ufs/ffs/ffs_alloc.c
  projects/suj/7/sys/ufs/ffs/ffs_softdep.c
  projects/suj/7/sys/ufs/ffs/ffs_vfsops.c
  projects/suj/7/sys/ufs/ffs/fs.h
  projects/suj/7/sys/ufs/ufs/inode.h
  projects/suj/8/lib/libufs/cgroup.c
  projects/suj/8/lib/libufs/libufs.h
  projects/suj/8/sbin/fsck_ffs/pass4.c
  projects/suj/8/sbin/fsck_ffs/suj.c
  projects/suj/8/sbin/mount/mount.c
  projects/suj/8/sbin/tunefs/tunefs.c
  projects/suj/8/sys/sys/mount.h
  projects/suj/8/sys/ufs/ffs/ffs_alloc.c
  projects/suj/8/sys/ufs/ffs/ffs_softdep.c
  projects/suj/8/sys/ufs/ffs/ffs_vfsops.c
  projects/suj/8/sys/ufs/ffs/fs.h
  projects/suj/8/sys/ufs/ufs/inode.h

Modified: projects/suj/6/lib/libufs/cgroup.c
==============================================================================
--- projects/suj/6/lib/libufs/cgroup.c	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/lib/libufs/cgroup.c	Tue Jan 26 06:45:38 2010	(r203013)
@@ -71,6 +71,67 @@ gotit:
 	return (cgbase(fs, cgp->cg_cgx) + blkstofrags(fs, bno));
 }
 
+int
+cgbfree(struct uufsd *disk, ufs2_daddr_t bno, long size)
+{
+	u_int8_t *blksfree;
+	struct fs *fs;
+	struct cg *cgp;
+	ufs1_daddr_t fragno, cgbno;
+	int i, cg, blk, frags, bbase;
+
+	fs = &disk->d_fs;
+	cg = dtog(fs, bno);
+	if (cgread1(disk, cg) != 1)
+		return (-1);
+	cgp = &disk->d_cg;
+	cgbno = dtogd(fs, bno);
+	blksfree = cg_blksfree(cgp);
+	if (size == fs->fs_bsize) {
+		fragno = fragstoblks(fs, cgbno);
+		ffs_setblock(fs, blksfree, fragno);
+		ffs_clusteracct(fs, cgp, fragno, 1);
+		cgp->cg_cs.cs_nbfree++;
+		fs->fs_cstotal.cs_nbfree++;
+		fs->fs_cs(fs, cg).cs_nbfree++;
+	} else {
+		bbase = cgbno - fragnum(fs, cgbno);
+		/*
+		 * decrement the counts associated with the old frags
+		 */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+		/*
+		 * deallocate the fragment
+		 */
+		frags = numfrags(fs, size);
+		for (i = 0; i < frags; i++)
+			setbit(blksfree, cgbno + i);
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cg).cs_nffree += i;
+		/*
+		 * add back in counts associated with the new frags
+		 */
+		blk = blkmap(fs, blksfree, bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+		/*
+		 * if a complete block has been reassembled, account for it
+		 */
+		fragno = fragstoblks(fs, bbase);
+		if (ffs_isblock(fs, blksfree, fragno)) {
+			cgp->cg_cs.cs_nffree -= fs->fs_frag;
+			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
+			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
+			ffs_clusteracct(fs, cgp, fragno, 1);
+			cgp->cg_cs.cs_nbfree++;
+			fs->fs_cstotal.cs_nbfree++;
+			fs->fs_cs(fs, cg).cs_nbfree++;
+		}
+	}
+	return cgwrite(disk);
+}
+
 ino_t
 cgialloc(struct uufsd *disk)
 {

Modified: projects/suj/6/lib/libufs/libufs.h
==============================================================================
--- projects/suj/6/lib/libufs/libufs.h	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/lib/libufs/libufs.h	Tue Jan 26 06:45:38 2010	(r203013)
@@ -110,6 +110,7 @@ ssize_t bwrite(struct uufsd *, ufs2_dadd
  * cgroup.c
  */
 ufs2_daddr_t cgballoc(struct uufsd *);
+int cgbfree(struct uufsd *, ufs2_daddr_t, long);
 ino_t cgialloc(struct uufsd *);
 int cgread(struct uufsd *);
 int cgread1(struct uufsd *, int);

Modified: projects/suj/6/sbin/fsck_ffs/pass4.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/pass4.c	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/sbin/fsck_ffs/pass4.c	Tue Jan 26 06:45:38 2010	(r203013)
@@ -72,9 +72,6 @@ pass4(void)
 		for (i = 0; i < inostathead[cg].il_numalloced; i++, inumber++) {
 			if (inumber < ROOTINO)
 				continue;
-			if (sblock.fs_flags & FS_SUJ &&
-			    inumber == sblock.fs_sujournal)
-				continue;
 			idesc.id_number = inumber;
 			switch (inoinfo(inumber)->ino_state) {
 

Modified: projects/suj/6/sbin/fsck_ffs/suj.c
==============================================================================
--- projects/suj/6/sbin/fsck_ffs/suj.c	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/sbin/fsck_ffs/suj.c	Tue Jan 26 06:45:38 2010	(r203013)
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <stdlib.h>
 #include <stdint.h>
 #include <libufs.h>
+#include <string.h>
 #include <strings.h>
 #include <err.h>
 #include <assert.h>
@@ -63,6 +64,7 @@ struct suj_seg {
 struct suj_rec {
 	TAILQ_ENTRY(suj_rec) sr_next;
 	union jrec	*sr_rec;
+	int		sr_alt;	/* Is alternate address? */
 };
 TAILQ_HEAD(srechd, suj_rec);
 
@@ -127,6 +129,7 @@ TAILQ_HEAD(seghd, suj_seg) allsegs;
 uint64_t oldseq;
 static struct uufsd *disk = NULL;
 static struct fs *fs = NULL;
+ino_t sujino;
 
 /*
  * Summary statistics.
@@ -191,8 +194,7 @@ closedisk(const char *devnam)
 		fs->fs_cstotal.cs_nifree += cgsum->cs_nifree;
 		fs->fs_cstotal.cs_ndir += cgsum->cs_ndir;
 	}
-	/* XXX Don't set clean for now, we don't trust the journal. */
-	/* fs->fs_clean = 1; */
+	fs->fs_clean = 1;
 	fs->fs_time = time(NULL);
 	fs->fs_mtime = time(NULL);
 	if (sbwrite(disk, 0) == -1)
@@ -1823,6 +1825,7 @@ ino_append(union jrec *rec)
 	sino->si_hasrecs = 1;
 	srec = errmalloc(sizeof(*srec));
 	srec->sr_rec = rec;
+	srec->sr_alt = 0;
 	TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next);
 }
 
@@ -1844,9 +1847,10 @@ ino_build_ref(struct suj_ino *sino, stru
 
 	refrec = (struct jrefrec *)srec->sr_rec;
 	if (debug)
-		printf("ino_build: op %d, ino %d, nlink %d, parent %d, diroff %jd\n", 
-		    refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent,
-		    refrec->jr_diroff);
+		printf("ino_build: op %d, ino %d, nlink %d, "
+		    "parent %d, diroff %jd\n", 
+		    refrec->jr_op, refrec->jr_ino, refrec->jr_nlink,
+		    refrec->jr_parent, refrec->jr_diroff);
 
 	/*
 	 * Search for a mvrec that matches this offset.  Whether it's an add
@@ -1871,16 +1875,19 @@ ino_build_ref(struct suj_ino *sino, stru
 				rrn = errmalloc(sizeof(*refrec));
 				*rrn = *refrec;
 				rrn->jr_op = JOP_ADDREF;
+				rrn->jr_diroff = mvrec->jm_oldoff;
 				srn = errmalloc(sizeof(*srec));
+				srn->sr_alt = 1;
 				srn->sr_rec = (union jrec *)rrn;
 				ino_build_ref(sino, srn);
-				refrec->jr_diroff = mvrec->jm_oldoff;
 			}
 		}
 	}
 	/*
 	 * We walk backwards so that adds and removes are evaluated in the
-	 * correct order.
+	 * correct order.  If a primary record conflicts with an alt keep
+	 * the primary and discard the alt.  We must track this to keep
+	 * the correct number of removes in the list.
 	 */
 	for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn;
 	    srn = TAILQ_PREV(srn, srechd, sr_next)) {
@@ -1890,7 +1897,17 @@ ino_build_ref(struct suj_ino *sino, stru
 			continue;
 		if (debug)
 			printf("Discarding dup.\n");
-		rrn->jr_mode = refrec->jr_mode;
+		if (srn->sr_alt == 0) {
+			rrn->jr_mode = refrec->jr_mode;
+			return;
+		}
+		/*
+		 * Replace the record in place with the old nlink in case
+		 * we replace the head of the list.  Abandon srec as a dup.
+		 */
+		refrec->jr_nlink = rrn->jr_nlink;
+		srn->sr_rec = srec->sr_rec;
+		srn->sr_alt = srec->sr_alt;
 		return;
 	}
 	TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next);
@@ -1930,9 +1947,12 @@ ino_move_ref(struct suj_ino *sino, struc
 		/*
 		 * When an entry is moved we don't know whether the write
 		 * to move has completed yet.  To resolve this we create
-		 * a new add dependency in the new location as if it were added
-		 * twice.  Only one will succeed.
+		 * a new add dependency in the new location as if it were
+		 * added twice.  Only one will succeed.  Consider the
+		 * new offset the primary location for the inode and the
+		 * old offset the alt.
 		 */
+		srn->sr_alt = 1;
 		refrec = errmalloc(sizeof(*refrec));
 		refrec->jr_op = JOP_ADDREF;
 		refrec->jr_ino = mvrec->jm_ino;
@@ -1941,12 +1961,14 @@ ino_move_ref(struct suj_ino *sino, struc
 		refrec->jr_mode = rrn->jr_mode;
 		refrec->jr_nlink = rrn->jr_nlink;
 		srn = errmalloc(sizeof(*srn));
+		srn->sr_alt = 0;
 		srn->sr_rec = (union jrec *)refrec;
 		ino_build_ref(sino, srn);
 		break;
 	}
 	/*
-	 * Add this mvrec to the queue of pending mvs.
+	 * Add this mvrec to the queue of pending mvs, possibly collapsing
+	 * it with a prior move for the same inode and offset.
 	 */
 	for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn;
 	    srn = TAILQ_PREV(srn, srechd, sr_next)) {
@@ -2195,19 +2217,25 @@ suj_verifyino(union dinode *ip)
 
 	if (DIP(ip, di_nlink) != 1) {
 		printf("Invalid link count %d for journal inode %d\n",
-		    DIP(ip, di_nlink), fs->fs_sujournal);
+		    DIP(ip, di_nlink), sujino);
+		return (-1);
+	}
+
+	if (DIP(ip, di_flags) != (SF_IMMUTABLE | SF_NOUNLINK)) {
+		printf("Invalid flags 0x%X for journal inode %d\n",
+		    DIP(ip, di_flags), sujino);
 		return (-1);
 	}
 
-	if (DIP(ip, di_mode) != IFREG) {
-		printf("Invalid mode %d for journal inode %d\n",
-		    DIP(ip, di_mode), fs->fs_sujournal);
+	if (DIP(ip, di_mode) != (IFREG | IREAD)) {
+		printf("Invalid mode %o for journal inode %d\n",
+		    DIP(ip, di_mode), sujino);
 		return (-1);
 	}
 
 	if (DIP(ip, di_size) < SUJ_MIN || DIP(ip, di_size) > SUJ_MAX) {
 		printf("Invalid size %jd for journal inode %d\n",
-		    DIP(ip, di_size), fs->fs_sujournal);
+		    DIP(ip, di_size), sujino);
 		return (-1);
 	}
 
@@ -2447,20 +2475,60 @@ restart:
 }
 
 /*
+ * Search a directory block for the SUJ_FILE.
+ */
+static void
+suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
+{
+	char block[MAXBSIZE];
+	struct direct *dp;
+	int bytes;
+	int off;
+
+	if (sujino)
+		return;
+	bytes = lfragtosize(fs, frags);
+	if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0)
+		err(1, "Failed to read ROOTINO directory block %jd", blk);
+	for (off = 0; off < bytes; off += dp->d_reclen) {
+		dp = (struct direct *)&block[off];
+		if (dp->d_reclen == 0)
+			break;
+		if (dp->d_ino == 0)
+			continue;
+		if (dp->d_namlen != strlen(SUJ_FILE))
+			continue;
+		if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0)
+			continue;
+		sujino = dp->d_ino;
+		return;
+	}
+}
+
+/*
  * Orchestrate the verification of a filesystem via the softupdates journal.
  */
 int
 suj_check(const char *filesys)
 {
 	union dinode *jip;
+	union dinode *ip;
 	uint64_t blocks;
 
 	opendisk(filesys);
 	TAILQ_INIT(&allsegs);
 	/*
+	 * Find the journal inode.
+	 */
+	ip = ino_read(ROOTINO);
+	sujino = 0;
+	ino_visit(ip, ROOTINO, suj_find, 0);
+	if (sujino == 0)
+		errx(1, "Journal inode removed.  Use tunefs to re-create.");
+	/*
 	 * Fetch the journal inode and verify it.
 	 */
-	jip = ino_read(fs->fs_sujournal);
+	jip = ino_read(sujino);
 	printf("** SU+J Recovering %s\n", filesys);
 	if (suj_verifyino(jip) != 0)
 		return (-1);
@@ -2469,11 +2537,11 @@ suj_check(const char *filesys)
 	 * available journal blocks in with suj_read().
 	 */
 	printf("** Reading %jd byte journal from inode %d.\n",
-	    DIP(jip, di_size), fs->fs_sujournal);
+	    DIP(jip, di_size), sujino);
 	suj_jblocks = jblocks_create();
-	blocks = ino_visit(jip, fs->fs_sujournal, suj_add_block, 0);
+	blocks = ino_visit(jip, sujino, suj_add_block, 0);
 	if (blocks != numfrags(fs, DIP(jip, di_size)))
-		errx(1, "Sparse journal inode %d.\n", fs->fs_sujournal);
+		errx(1, "Sparse journal inode %d.\n", sujino);
 	suj_read();
 	jblocks_destroy(suj_jblocks);
 	suj_jblocks = NULL;

Modified: projects/suj/6/sbin/tunefs/tunefs.c
==============================================================================
--- projects/suj/6/sbin/tunefs/tunefs.c	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/sbin/tunefs/tunefs.c	Tue Jan 26 06:45:38 2010	(r203013)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
+#include <ufs/ufs/dir.h>
 
 #include <ctype.h>
 #include <err.h>
@@ -74,6 +75,7 @@ struct uufsd disk;
 void usage(void);
 void printfs(void);
 int journal_alloc(int64_t size);
+void journal_clear(void);
 void sbdirty(void);
 
 int
@@ -327,11 +329,11 @@ main(int argc, char *argv[])
 			if ((~sblock.fs_flags & FS_SUJ) == FS_SUJ) {
 				warnx("%s remains unchanged as disabled", name);
 			} else {
-				sbdirty();
+				journal_clear();
  				sblock.fs_flags &= ~(FS_DOSOFTDEP | FS_SUJ);
-				sblock.fs_sujournal = 0;
 				sblock.fs_sujfree = 0;
- 				warnx("%s cleared", name);
+ 				warnx("%s cleared, "
+				    "remove .sujournal to reclaim space", name);
 			}
  		}
 	}
@@ -452,11 +454,9 @@ journal_balloc(void)
 {
 	ufs2_daddr_t blk;
 	struct cg *cgp;
-	struct fs *fs;
 	int valid;
 
 	cgp = &disk.d_cg;
-	fs = &disk.d_fs;
 	for (;;) {
 		blk = cgballoc(&disk);
 		if (blk > 0)
@@ -482,13 +482,231 @@ journal_balloc(void)
 		warnx("Failed to find sufficient free blocks for the journal");
 		return -1;
 	}
-	if (bwrite(&disk, fsbtodb(fs, blk), clrbuf, fs->fs_bsize) <= 0) {
+	if (bwrite(&disk, fsbtodb(&sblock, blk), clrbuf,
+	    sblock.fs_bsize) <= 0) {
 		warn("Failed to initialize new block");
 		return -1;
 	}
 	return (blk);
 }
 
+/*
+ * Search a directory block for the SUJ_FILE.
+ */
+static ino_t
+dir_search(ufs2_daddr_t blk, int bytes)
+{
+	char block[MAXBSIZE];
+	struct direct *dp;
+	int off;
+
+	if (bread(&disk, fsbtodb(&sblock, blk), block, bytes) <= 0) {
+		warn("Failed to read dir block");
+		return (-1);
+	}
+	for (off = 0; off < bytes; off += dp->d_reclen) {
+		dp = (struct direct *)&block[off];
+		if (dp->d_reclen == 0)
+			break;
+		if (dp->d_ino == 0)
+			continue;
+		if (dp->d_namlen != strlen(SUJ_FILE))
+			continue;
+		if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0)
+			continue;
+		return (dp->d_ino);
+	}
+
+	return (0);
+}
+
+/*
+ * Search in the ROOTINO for the SUJ_FILE.  If it exists we can not enable
+ * journaling.
+ */
+static ino_t
+journal_findfile(void)
+{
+	struct ufs1_dinode *dp1;
+	struct ufs2_dinode *dp2;
+	int mode;
+	void *ip;
+	int i;
+
+	if (getino(&disk, &ip, ROOTINO, &mode) != 0) {
+		warn("Failed to get root inode");
+		return (-1);
+	}
+	dp2 = ip;
+	dp1 = ip;
+	if (sblock.fs_magic == FS_UFS1_MAGIC) {
+		if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
+			warnx("ROOTINO extends beyond direct blocks.");
+			return (-1);
+		}
+		for (i = 0; i < NDADDR; i++) {
+			if (dp1->di_db[i] == 0)
+				break;
+			if (dir_search(dp1->di_db[i],
+			    sblksize(&sblock, (off_t)dp1->di_size, i)) != 0)
+				return (-1);
+		}
+	} else {
+		if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
+			warnx("ROOTINO extends beyond direct blocks.");
+			return (-1);
+		}
+		for (i = 0; i < NDADDR; i++) {
+			if (dp2->di_db[i] == 0)
+				break;
+			if (dir_search(dp2->di_db[i],
+			    sblksize(&sblock, (off_t)dp2->di_size, i)) != 0)
+				return (-1);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Insert the journal at inode 'ino' into directory blk 'blk' at the first
+ * free offset of 'off'.  DIRBLKSIZ blocks after off are initialized as
+ * empty.
+ */
+static int
+dir_insert(ufs2_daddr_t blk, off_t off, ino_t ino)
+{
+	struct direct *dp;
+	char block[MAXBSIZE];
+
+	if (bread(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) {
+		warn("Failed to read dir block");
+		return (-1);
+	}
+	bzero(&block[off], sblock.fs_bsize - off);
+	dp = (struct direct *)&block[off];
+	dp->d_ino = ino;
+	dp->d_reclen = DIRBLKSIZ;
+	dp->d_type = DT_REG;
+	dp->d_namlen = strlen(SUJ_FILE);
+	bcopy(SUJ_FILE, &dp->d_name, strlen(SUJ_FILE));
+	off += DIRBLKSIZ;
+	for (; off < sblock.fs_bsize; off += DIRBLKSIZ) {
+		dp = (struct direct *)&block[off];
+		dp->d_ino = 0;
+		dp->d_reclen = DIRBLKSIZ;
+		dp->d_type = DT_UNKNOWN;
+	}
+	if (bwrite(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) {
+		warn("Failed to write dir block");
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * Extend a directory block in 'blk' by copying it to a full size block
+ * and inserting the new journal inode into .sujournal.
+ */
+static int
+dir_extend(ufs2_daddr_t blk, ufs2_daddr_t nblk, off_t size, ino_t ino)
+{
+	char block[MAXBSIZE];
+
+	if (bread(&disk, fsbtodb(&sblock, blk), block, size) <= 0) {
+		warn("Failed to read dir block");
+		return (-1);
+	}
+	if (bwrite(&disk, fsbtodb(&sblock, nblk), block, size) <= 0) {
+		warn("Failed to write dir block");
+		return (-1);
+	}
+
+	return dir_insert(nblk, size, ino);
+}
+
+/*
+ * Insert the journal file into the ROOTINO directory.  We always extend the
+ * last frag
+ */
+static int
+journal_insertfile(ino_t ino)
+{
+	struct ufs1_dinode *dp1;
+	struct ufs2_dinode *dp2;
+	void *ip;
+	ufs2_daddr_t nblk;
+	ufs2_daddr_t blk;
+	ufs_lbn_t lbn;
+	int size;
+	int mode;
+	int off;
+
+	if (getino(&disk, &ip, ROOTINO, &mode) != 0) {
+		warn("Failed to get root inode");
+		sbdirty();
+		return (-1);
+	}
+	dp2 = ip;
+	dp1 = ip;
+	blk = 0;
+	size = 0;
+	nblk = journal_balloc();
+	if (nblk <= 0)
+		return (-1);
+	/*
+	 * For simplicity sake we aways extend the ROOTINO into a new
+	 * directory block rather than searching for space and inserting
+	 * into an existing block.  However, if the rootino has frags
+	 * have to free them and extend the block.
+	 */
+	if (sblock.fs_magic == FS_UFS1_MAGIC) {
+		lbn = lblkno(&sblock, dp1->di_size);
+		off = blkoff(&sblock, dp1->di_size);
+		blk = dp1->di_db[lbn];
+		size = sblksize(&sblock, (off_t)dp1->di_size, lbn);
+	} else {
+		lbn = lblkno(&sblock, dp2->di_size);
+		off = blkoff(&sblock, dp2->di_size);
+		blk = dp2->di_db[lbn];
+		size = sblksize(&sblock, (off_t)dp2->di_size, lbn);
+	}
+	if (off != 0) {
+		if (dir_extend(blk, nblk, off, ino) == -1)
+			return (-1);
+	} else {
+		blk = 0;
+		if (dir_insert(nblk, 0, ino) == -1)
+			return (-1);
+	}
+	if (sblock.fs_magic == FS_UFS1_MAGIC) {
+		dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
+		dp1->di_db[lbn] = nblk;
+		dp1->di_size = lblktosize(&sblock, lbn+1);
+	} else {
+		dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
+		dp2->di_db[lbn] = nblk;
+		dp2->di_size = lblktosize(&sblock, lbn+1);
+	}
+	if (putino(&disk) < 0) {
+		warn("Failed to write root inode");
+		return (-1);
+	}
+	if (cgwrite(&disk) < 0) {
+		warn("Failed to write updated cg");
+		sbdirty();
+		return (-1);
+	}
+	if (blk) {
+		if (cgbfree(&disk, blk, size) < 0) {
+			warn("Failed to write cg");
+			return (-1);
+		}
+	}
+
+	return (0);
+}
+
 static int
 indir_fill(ufs2_daddr_t blk, int level, int *resid)
 {
@@ -496,22 +714,20 @@ indir_fill(ufs2_daddr_t blk, int level, 
 	ufs1_daddr_t *bap1;
 	ufs2_daddr_t *bap2;
 	ufs2_daddr_t nblk;
-	struct fs *fs;
 	int ncnt;
 	int cnt;
 	int i;
 
-	fs = &disk.d_fs;
 	bzero(indirbuf, sizeof(indirbuf));
 	bap1 = (ufs1_daddr_t *)indirbuf;
 	bap2 = (void *)bap1;
 	cnt = 0;
-	for (i = 0; i < NINDIR(fs) && *resid != 0; i++) {
+	for (i = 0; i < NINDIR(&sblock) && *resid != 0; i++) {
 		nblk = journal_balloc();
 		if (nblk <= 0)
 			return (-1);
 		cnt++;
-		if (fs->fs_magic == FS_UFS1_MAGIC)
+		if (sblock.fs_magic == FS_UFS1_MAGIC)
 			*bap1++ = nblk;
 		else
 			*bap2++ = nblk;
@@ -523,13 +739,47 @@ indir_fill(ufs2_daddr_t blk, int level, 
 		} else 
 			(*resid)--;
 	}
-	if (bwrite(&disk, fsbtodb(fs, blk), indirbuf, fs->fs_bsize) <= 0) {
+	if (bwrite(&disk, fsbtodb(&sblock, blk), indirbuf,
+	    sblock.fs_bsize) <= 0) {
 		warn("Failed to write indirect");
 		return (-1);
 	}
 	return (cnt);
 }
 
+/*
+ * Clear the flag bits so the journal can be removed.
+ */
+void
+journal_clear(void)
+{
+	struct ufs1_dinode *dp1;
+	struct ufs2_dinode *dp2;
+	ino_t ino;
+	int mode;
+	void *ip;
+
+	ino = journal_findfile();
+	if (ino <= 0) {
+		warnx("Journal file does not exist");
+		return;
+	}
+	if (getino(&disk, &ip, ino, &mode) != 0) {
+		warn("Failed to get journal inode");
+		return;
+	}
+	dp2 = ip;
+	dp1 = ip;
+	if (sblock.fs_magic == FS_UFS1_MAGIC)
+		dp1->di_flags = 0;
+	else
+		dp2->di_flags = 0;
+	if (putino(&disk) < 0) {
+		warn("Failed to write journal inode");
+		return;
+	}
+}
+
 int
 journal_alloc(int64_t size)
 {
@@ -538,32 +788,39 @@ journal_alloc(int64_t size)
 	ufs2_daddr_t blk;
 	void *ip;
 	struct cg *cgp;
-	struct fs *fs;
 	int resid;
 	ino_t ino;
 	int blks;
 	int mode;
 	int i;
 
-	fs = &disk.d_fs;
 	cgp = &disk.d_cg;
 	ino = 0;
 
 	/*
+	 * If the journal file exists we can't allocate it.
+	 */
+	ino = journal_findfile();
+	if (ino > 0)
+		warnx("Journal file %s already exists, please remove.",
+		    SUJ_FILE);
+	if (ino != 0)
+		return (-1);
+	/*
 	 * If the user didn't supply a size pick one based on the filesystem
 	 * size constrained with hardcoded MIN and MAX values.  We opt for
 	 * 1/1024th of the filesystem up to MAX but not exceeding one CG and
 	 * not less than the MIN.
 	 */
 	if (size == 0) {
-		size = (fs->fs_size * fs->fs_bsize) / 1024;
+		size = (sblock.fs_size * sblock.fs_bsize) / 1024;
 		size = MIN(SUJ_MAX, size);
-		if (size / fs->fs_fsize > fs->fs_fpg)
-			size = fs->fs_fpg * fs->fs_fsize;
+		if (size / sblock.fs_fsize > sblock.fs_fpg)
+			size = sblock.fs_fpg * sblock.fs_fsize;
 		size = MAX(SUJ_MIN, size);
 	}
-	resid = blocks = size / fs->fs_bsize;
-	if (fs->fs_cstotal.cs_nbfree < blocks) {
+	resid = blocks = size / sblock.fs_bsize;
+	if (sblock.fs_cstotal.cs_nbfree < blocks) {
 		warn("Insufficient free space for %jd byte journal", size);
 		return (-1);
 	}
@@ -576,9 +833,9 @@ journal_alloc(int64_t size)
 			continue;
 		/*
 		 * Try to minimize fragmentation by requiring at least a
-		 * 1/8th of the blocks be present in each cg we use.
+		 * 1/16th of the blocks be present in each cg we use.
 		 */
-		if (cgp->cg_cs.cs_nbfree < blocks / 8)
+		if (cgp->cg_cs.cs_nbfree < blocks / 16)
 			continue;
 		ino = cgialloc(&disk);
 		if (ino <= 0)
@@ -597,22 +854,24 @@ journal_alloc(int64_t size)
 		 */
 		dp2 = ip;
 		dp1 = ip;
-		if (fs->fs_magic == FS_UFS1_MAGIC) {
+		if (sblock.fs_magic == FS_UFS1_MAGIC) {
 			bzero(dp1, sizeof(*dp1));
 			dp1->di_size = size;
-			dp1->di_mode = IFREG;
+			dp1->di_mode = IFREG | IREAD;
 			dp1->di_nlink = 1;
+			dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
 		} else {
 			bzero(dp2, sizeof(*dp2));
 			dp2->di_size = size;
-			dp2->di_mode = IFREG;
+			dp2->di_mode = IFREG | IREAD;
 			dp2->di_nlink = 1;
+			dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK;
 		}
 		for (i = 0; i < NDADDR && resid; i++, resid--) {
 			blk = journal_balloc();
 			if (blk <= 0)
 				goto out;
-			if (fs->fs_magic == FS_UFS1_MAGIC) {
+			if (sblock.fs_magic == FS_UFS1_MAGIC) {
 				dp1->di_db[i] = blk;
 				dp1->di_blocks++;
 			} else {
@@ -629,7 +888,7 @@ journal_alloc(int64_t size)
 				sbdirty();
 				goto out;
 			}
-			if (fs->fs_magic == FS_UFS1_MAGIC) {
+			if (sblock.fs_magic == FS_UFS1_MAGIC) {
 				dp1->di_ib[i] = blk;
 				dp1->di_blocks += blks;
 			} else {
@@ -637,10 +896,10 @@ journal_alloc(int64_t size)
 				dp2->di_blocks += blks;
 			}
 		}
-		if (fs->fs_magic == FS_UFS1_MAGIC)
-			dp1->di_blocks *= fs->fs_bsize / disk.d_bsize;
+		if (sblock.fs_magic == FS_UFS1_MAGIC)
+			dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize;
 		else
-			dp2->di_blocks *= fs->fs_bsize / disk.d_bsize;
+			dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize;
 		if (putino(&disk) < 0) {
 			warn("Failed to write inode");
 			sbdirty();
@@ -651,8 +910,11 @@ journal_alloc(int64_t size)
 			sbdirty();
 			return (-1);
 		}
-		fs->fs_sujournal = ino;
-		fs->fs_sujfree = 0;
+		if (journal_insertfile(ino) < 0) {
+			sbdirty();
+			return (-1);
+		}
+		sblock.fs_sujfree = 0;
 		return (0);
 	}
 	warnx("Insufficient contiguous free space for the journal.");

Modified: projects/suj/6/sys/sys/mount.h
==============================================================================
--- projects/suj/6/sys/sys/mount.h	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/sys/sys/mount.h	Tue Jan 26 06:45:38 2010	(r203013)
@@ -231,7 +231,6 @@ void          __mnt_vnode_markerfree(str
 #define	MNT_NOATIME	0x10000000	/* disable update of file access time */
 #define	MNT_NOCLUSTERR	0x40000000	/* disable cluster read */
 #define	MNT_NOCLUSTERW	0x80000000	/* disable cluster write */
-#define	MNT_SUJ		0x00000080	/* softdep journaling */
 
 /*
  * NFS export related mount flags.
@@ -267,7 +266,7 @@ void          __mnt_vnode_markerfree(str
 			MNT_ROOTFS	| MNT_NOATIME	| MNT_NOCLUSTERR| \
 			MNT_NOCLUSTERW	| MNT_SUIDDIR	| MNT_SOFTDEP	| \
 			MNT_IGNORE	| MNT_EXPUBLIC	| MNT_NOSYMFOLLOW | \
-			MNT_JAILDEVFS	| MNT_MULTILABEL | MNT_ACLS | MNT_SUJ)
+			MNT_JAILDEVFS	| MNT_MULTILABEL | MNT_ACLS)
 
 /* Mask of flags that can be updated. */
 #define	MNT_UPDATEMASK (MNT_NOSUID	| MNT_NOEXEC	| \
@@ -303,6 +302,7 @@ void          __mnt_vnode_markerfree(str
  * with the unmount attempt (used by NFS).
  */
 #define MNTK_UNMOUNTF	0x00000001	/* forced unmount in progress */
+#define	MNTK_SUJ	0x00000100	/* Softdep journaling enabled */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define	MNTK_SUSPEND	0x08000000	/* request write suspension */

Modified: projects/suj/6/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- projects/suj/6/sys/ufs/ffs/ffs_alloc.c	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/sys/ufs/ffs/ffs_alloc.c	Tue Jan 26 06:45:38 2010	(r203013)
@@ -1837,6 +1837,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, i
 	ino_t inum;
 	struct workhead *dephd;
 {
+	struct mount *mp;
 	struct cg *cgp;
 	struct buf *bp;
 	ufs1_daddr_t fragno, cgbno;
@@ -1951,7 +1952,8 @@ ffs_blkfree(ump, fs, devvp, bno, size, i
 	fs->fs_fmod = 1;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
-	if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP)
+	mp = UFSTOVFS(ump);
+	if (mp->mnt_flag & MNT_SOFTDEP)
 		softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
 		    numfrags(fs, size), dephd);
 	bdwrite(bp);

Modified: projects/suj/6/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/6/sys/ufs/ffs/ffs_softdep.c	Tue Jan 26 06:36:10 2010	(r203012)
+++ projects/suj/6/sys/ufs/ffs/ffs_softdep.c	Tue Jan 26 06:45:38 2010	(r203013)
@@ -1879,7 +1879,7 @@ softdep_unmount(mp)
 	struct mount *mp;
 {
 
-	if (mp->mnt_flag & MNT_SUJ)
+	if (mp->mnt_kern_flag & MNTK_SUJ)
 		journal_unmount(mp);
 }
 
@@ -2021,16 +2021,36 @@ journal_mount(mp, fs, cred)
 	struct fs *fs;
 	struct ucred *cred;
 {
+	struct componentname cnp;
 	struct jblocks *jblocks;
+	struct vnode *dvp;
 	struct vnode *vp;
 	struct inode *ip;
 	ufs2_daddr_t blkno;
+	ino_t sujournal;
 	int bcount;
 	int error;
 	int i;
 
-	mp->mnt_flag |= MNT_SUJ;
-	error = VFS_VGET(mp, fs->fs_sujournal, LK_EXCLUSIVE, &vp);
+	mp->mnt_kern_flag |= MNTK_SUJ;
+	error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
+	if (error)
+		return (error);
+	bzero(&cnp, sizeof(cnp));
+	cnp.cn_nameiop = LOOKUP;
+	cnp.cn_flags = ISLASTCN;
+	cnp.cn_thread = curthread;
+	cnp.cn_cred = curthread->td_ucred;
+	cnp.cn_pnbuf = SUJ_FILE;
+	cnp.cn_nameptr = SUJ_FILE;
+	cnp.cn_namelen = strlen(SUJ_FILE);
+	error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
+	vput(dvp);
+	if (error != 0) {
+		printf("Failed to find journal.  Use tunefs to create one\n");
+		return (error);
+	}
+	error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, &vp);
 	if (error)
 		return (error);
 	ip = VTOI(vp);
@@ -2052,9 +2072,18 @@ journal_mount(mp, fs, cred)
 	}
 	jblocks->jb_low = jblocks->jb_free / 3;	/* Reserve 33%. */
 	jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
-	DIP_SET(ip, i_modrev, fs->fs_mtime);
-	ip->i_flags |= IN_MODIFIED;
-	ffs_update(vp, 1);
+	/*
+	 * Only validate the journal contents if the filesystem is clean,
+	 * otherwise we write the logs but they'll never be used.  If the
+	 * filesystem was still dirty when we mounted it the journal is
+	 * invalid and a new journal can only be valid if it starts from a
+	 * clean mount.
+	 */
+	if (fs->fs_clean) {
+		DIP_SET(ip, i_modrev, fs->fs_mtime);
+		ip->i_flags |= IN_MODIFIED;
+		ffs_update(vp, 1);
+	}
 	VFSTOUFS(mp)->softdep_jblocks = jblocks;
 out:
 	vput(vp);
@@ -2136,6 +2165,11 @@ remove_from_journal(wk)
 	ump->softdep_on_journal -= 1;
 }
 
+/*
+ * Check for journal space as well as dependency limits so the prelink
+ * code can throttle both journaled and non-journaled filesystems.
+ * Threshold is 0 for low and 1 for min.
+ */
 static int
 journal_space(ump, thresh)
 	struct ufsmount *ump;
@@ -2144,7 +2178,20 @@ journal_space(ump, thresh)
 	struct jblocks *jblocks;
 	int avail;
 
+	/*
+	 * We use a tighter restriction here to prevent request_cleanup()
+	 * running in threads from running into locks we currently hold.
+	 */
+	if (num_inodedep > (max_softdeps / 10) * 9)
+		return (0);
+
 	jblocks = ump->softdep_jblocks;
+	if (jblocks == NULL)
+		return (1);
+	if (thresh)
+		thresh = jblocks->jb_min;
+	else
+		thresh = jblocks->jb_low;
 	avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
 	avail = jblocks->jb_free - avail;
 
@@ -2187,15 +2234,13 @@ softdep_prealloc(vp, waitok)
 	struct vnode *vp;
 	int waitok;
 {
-	struct jblocks *jblocks;
 	struct ufsmount *ump;
 
 	if (DOINGSUJ(vp) == 0)
 		return (0);
 	ump = VFSTOUFS(vp->v_mount);
-	jblocks = ump->softdep_jblocks;
 	ACQUIRE_LOCK(&lk);
-	if (journal_space(ump, jblocks->jb_low)) {
+	if (journal_space(ump, 0)) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
@@ -2210,9 +2255,9 @@ softdep_prealloc(vp, waitok)
 	ffs_syncvnode(vp, waitok);
 	ACQUIRE_LOCK(&lk);
 	process_removes(vp);
-	if (journal_space(ump, jblocks->jb_low) == 0) {
+	if (journal_space(ump, 0) == 0) {
 		softdep_speedup();
-		if (journal_space(ump, jblocks->jb_min) == 0)
+		if (journal_space(ump, 1) == 0)
 			journal_suspend(ump);
 	}
 	FREE_LOCK(&lk);
@@ -2220,18 +2265,22 @@ softdep_prealloc(vp, waitok)
 	return (0);
 }
 
+/*
+ * Before adjusting a link count on a vnode verify that we have sufficient
+ * journal space.  If not, process operations that depend on the currently
+ * locked pair of vnodes to try to flush space as the syncer, buf daemon,
+ * and softdep flush threads can not acquire these locks to reclaim space.
+ */
 static void
 softdep_prelink(dvp, vp)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-projects mailing list