svn commit: r249785 - in stable/9/sbin: dumpfs newfs tunefs

Kirk McKusick mckusick at FreeBSD.org
Tue Apr 23 01:09:24 UTC 2013


Author: mckusick
Date: Tue Apr 23 01:09:22 2013
New Revision: 249785
URL: http://svnweb.freebsd.org/changeset/base/249785

Log:
  MFC of 248623:
  
  The purpose of this change to the FFS layout policy is to reduce the
  running time for a full fsck. It also reduces the random access time
  for large files and speeds the traversal time for directory tree walks.
  
  The key idea is to reserve a small area in each cylinder group
  immediately following the inode blocks for the use of metadata,
  specifically indirect blocks and directory contents. The new policy
  is to preferentially place metadata in the metadata area and
  everything else in the blocks that follow the metadata area.
  
  The size of this area can be set when creating a filesystem using
  newfs(8) or changed in an existing filesystem using tunefs(8).
  Both utilities use the `-k held-for-metadata-blocks' option to
  specify the amount of space to be held for metadata blocks in each
  cylinder group. By default, newfs(8) sets this area to half of
  minfree (typically 4% of the data area).
  
  This work was inspired by a paper presented at Usenix's FAST '13:
  www.usenix.org/conference/fast13/ffsck-fast-file-system-checker
  
  Details of this implementation appears in the April 2013 of ;login:
  www.usenix.org/publications/login/april-2013-volume-38-number-2.
  A copy of the April 2013 ;login: paper can also be downloaded
  from: www.mckusick.com/publications/faster_fsck.pdf.
  
  Reviewed by: kib
  Tested by:   Peter Holm

Modified:
  stable/9/sbin/dumpfs/dumpfs.c
  stable/9/sbin/newfs/mkfs.c
  stable/9/sbin/newfs/newfs.8
  stable/9/sbin/newfs/newfs.c
  stable/9/sbin/newfs/newfs.h
  stable/9/sbin/tunefs/tunefs.8
  stable/9/sbin/tunefs/tunefs.c
Directory Properties:
  stable/9/sbin/dumpfs/   (props changed)
  stable/9/sbin/newfs/   (props changed)
  stable/9/sbin/tunefs/   (props changed)

Modified: stable/9/sbin/dumpfs/dumpfs.c
==============================================================================
--- stable/9/sbin/dumpfs/dumpfs.c	Tue Apr 23 00:55:04 2013	(r249784)
+++ stable/9/sbin/dumpfs/dumpfs.c	Tue Apr 23 01:09:22 2013	(r249785)
@@ -241,8 +241,8 @@ dumpfs(const char *name)
 	    afs.fs_sblkno, afs.fs_cblkno, afs.fs_iblkno, afs.fs_dblkno);
 	printf("cgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t%d\n",
 	    afs.fs_cgrotor, afs.fs_fmod, afs.fs_ronly, afs.fs_clean);
-	printf("avgfpdir %d\tavgfilesize %d\n",
-	    afs.fs_avgfpdir, afs.fs_avgfilesize);
+	printf("metaspace %jd\tavgfpdir %d\tavgfilesize %d\n",
+	    afs.fs_metaspace, afs.fs_avgfpdir, afs.fs_avgfilesize);
 	printf("flags\t");
 	if (afs.fs_old_flags & FS_FLAGS_UPDATED)
 		fsflags = afs.fs_flags;

Modified: stable/9/sbin/newfs/mkfs.c
==============================================================================
--- stable/9/sbin/newfs/mkfs.c	Tue Apr 23 00:55:04 2013	(r249784)
+++ stable/9/sbin/newfs/mkfs.c	Tue Apr 23 01:09:22 2013	(r249785)
@@ -444,6 +444,12 @@ restart:
 	if (sblock.fs_sbsize > SBLOCKSIZE)
 		sblock.fs_sbsize = SBLOCKSIZE;
 	sblock.fs_minfree = minfree;
+	if (metaspace > 0 && metaspace < sblock.fs_fpg / 2)
+		sblock.fs_metaspace = blknum(&sblock, metaspace);
+	else if (metaspace != -1)
+		/* reserve half of minfree for metadata blocks */
+		sblock.fs_metaspace = blknum(&sblock,
+		    (sblock.fs_fpg * minfree) / 200);
 	if (maxbpg == 0)
 		sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize);
 	else

Modified: stable/9/sbin/newfs/newfs.8
==============================================================================
--- stable/9/sbin/newfs/newfs.8	Tue Apr 23 00:55:04 2013	(r249784)
+++ stable/9/sbin/newfs/newfs.8	Tue Apr 23 01:09:22 2013	(r249785)
@@ -50,6 +50,7 @@
 .Op Fl g Ar avgfilesize
 .Op Fl h Ar avgfpdir
 .Op Fl i Ar bytes
+.Op Fl k Ar held-for-metadata-blocks
 .Op Fl m Ar free-space
 .Op Fl o Ar optimization
 .Op Fl p Ar partition
@@ -163,6 +164,17 @@ This flag is implemented by running the
 .Xr tunefs 8
 utility found in the user's
 .Dv $PATH .
+.It Fl k Ar held-for-metadata-blocks
+Set the amount of space to be held for metadata blocks in each cylinder group.
+When set, the file system preference routines will try to save
+the specified amount of space immediately following the inode blocks
+in each cylinder group for use by metadata blocks.
+Clustering the metadata blocks speeds up random file access
+and decreases the running time of
+.Xr fsck 8 .
+By default
+.Xr newfs 8
+sets it to half of the space reserved to minfree.
 .It Fl l
 Enable multilabel MAC on the new file system.
 .It Fl m Ar free-space

Modified: stable/9/sbin/newfs/newfs.c
==============================================================================
--- stable/9/sbin/newfs/newfs.c	Tue Apr 23 00:55:04 2013	(r249784)
+++ stable/9/sbin/newfs/newfs.c	Tue Apr 23 01:09:22 2013	(r249785)
@@ -102,6 +102,7 @@ int	bsize = 0;		/* block size */
 int	maxbsize = 0;		/* maximum clustering */
 int	maxblkspercg = MAXBLKSPERCG; /* maximum blocks per cylinder group */
 int	minfree = MINFREE;	/* free space threshold */
+int	metaspace;		/* space held for metadata blocks */
 int	opt = DEFAULTOPT;	/* optimization preference (space or time) */
 int	density;		/* number of bytes per inode */
 int	maxcontig = 0;		/* max contiguous blocks to allocate */
@@ -141,7 +142,7 @@ main(int argc, char *argv[])
 	part_name = 'c';
 	reserved = 0;
 	while ((ch = getopt(argc, argv,
-	    "EJL:NO:RS:T:UXa:b:c:d:e:f:g:h:i:jlm:no:p:r:s:t")) != -1)
+	    "EJL:NO:RS:T:UXa:b:c:d:e:f:g:h:i:jk:lm:no:p:r:s:t")) != -1)
 		switch (ch) {
 		case 'E':
 			Eflag = 1;
@@ -248,6 +249,13 @@ main(int argc, char *argv[])
 		case 'l':
 			lflag = 1;
 			break;
+		case 'k':
+			if ((metaspace = atoi(optarg)) < 0)
+				errx(1, "%s: bad metadata space %%", optarg);
+			if (metaspace == 0)
+				/* force to stay zero in mkfs */
+				metaspace = -1;
+			break;
 		case 'm':
 			if ((minfree = atoi(optarg)) < 0 || minfree > 99)
 				errx(1, "%s: bad free space %%", optarg);
@@ -501,6 +509,7 @@ usage()
 	fprintf(stderr, "\t-h average files per directory\n");
 	fprintf(stderr, "\t-i number of bytes per inode\n");
 	fprintf(stderr, "\t-j enable soft updates journaling\n");
+	fprintf(stderr, "\t-k space to hold for metadata blocks\n");
 	fprintf(stderr, "\t-l enable multilabel MAC\n");
 	fprintf(stderr, "\t-n do not create .snap directory\n");
 	fprintf(stderr, "\t-m minimum free space %%\n");

Modified: stable/9/sbin/newfs/newfs.h
==============================================================================
--- stable/9/sbin/newfs/newfs.h	Tue Apr 23 00:55:04 2013	(r249784)
+++ stable/9/sbin/newfs/newfs.h	Tue Apr 23 01:09:22 2013	(r249785)
@@ -95,6 +95,7 @@ extern int	bsize;		/* block size */
 extern int	maxbsize;	/* maximum clustering */
 extern int	maxblkspercg;	/* maximum blocks per cylinder group */
 extern int	minfree;	/* free space threshold */
+extern int	metaspace;	/* space held for metadata blocks */
 extern int	opt;		/* optimization preference (space or time) */
 extern int	density;	/* number of bytes per inode */
 extern int	maxcontig;	/* max contiguous blocks to allocate */

Modified: stable/9/sbin/tunefs/tunefs.8
==============================================================================
--- stable/9/sbin/tunefs/tunefs.8	Tue Apr 23 00:55:04 2013	(r249784)
+++ stable/9/sbin/tunefs/tunefs.8	Tue Apr 23 01:09:22 2013	(r249785)
@@ -42,6 +42,7 @@
 .Op Fl f Ar avgfilesize
 .Op Fl j Cm enable | disable
 .Op Fl J Cm enable | disable
+.Op Fl k Ar held-for-metadata-blocks
 .Op Fl L Ar volname
 .Op Fl l Cm enable | disable
 .Op Fl m Ar minfree
@@ -96,6 +97,19 @@ Specify the expected average file size.
 Turn on/off soft updates journaling.
 .It Fl J Cm enable | disable
 Turn on/off gjournal flag.
+.It Fl k Ar held-for-metadata-blocks
+Set the amount of space to be held for metadata blocks.
+When set, the file system preference routines will try to save
+the specified amount of space immediately following the inode blocks
+in each cylinder group for use by metadata blocks.
+Clustering the metadata blocks speeds up random file access
+and decreases the running time of
+.Xr fsck 8 .
+While this option can be set at any time,
+it is most effective if set before any data is loaded into the file system.
+By default
+.Xr newfs 8
+sets it to half of the space reserved to minfree.
 .It Fl L Ar volname
 Add/modify an optional file system volume label.
 .It Fl l Cm enable | disable

Modified: stable/9/sbin/tunefs/tunefs.c
==============================================================================
--- stable/9/sbin/tunefs/tunefs.c	Tue Apr 23 00:55:04 2013	(r249784)
+++ stable/9/sbin/tunefs/tunefs.c	Tue Apr 23 01:09:22 2013	(r249785)
@@ -87,10 +87,9 @@ main(int argc, char *argv[])
 	const char *special, *on;
 	const char *name;
 	int active;
-	int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, Lflag;
-	int lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue, pflag, sflag;
-	int tflag;
-	int svalue, Svalue;
+	int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, kflag;
+	int kvalue, Lflag, lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue;
+	int pflag, sflag, svalue, Svalue, tflag;
 	int ch, found_arg, i;
 	const char *chg[2];
 	struct ufs_args args;
@@ -98,13 +97,13 @@ main(int argc, char *argv[])
 
 	if (argc < 3)
 		usage();
-	Aflag = aflag = eflag = fflag = jflag = Jflag = Lflag = lflag = 0;
-	mflag = Nflag = nflag = oflag = pflag = sflag = tflag = 0;
+	Aflag = aflag = eflag = fflag = jflag = Jflag = kflag = Lflag = 0;
+	lflag = mflag = Nflag = nflag = oflag = pflag = sflag = tflag = 0;
 	avalue = jvalue = Jvalue = Lvalue = lvalue = Nvalue = nvalue = NULL;
 	evalue = fvalue = mvalue = ovalue = svalue = Svalue = 0;
 	active = 0;
 	found_arg = 0;		/* At least one arg is required. */
-	while ((ch = getopt(argc, argv, "Aa:e:f:j:J:L:l:m:N:n:o:ps:S:t:"))
+	while ((ch = getopt(argc, argv, "Aa:e:f:j:J:k:L:l:m:N:n:o:ps:S:t:"))
 	    != -1)
 		switch (ch) {
 
@@ -169,6 +168,14 @@ main(int argc, char *argv[])
 			Jflag = 1;
 			break;
 
+		case 'k':
+			found_arg = 1;
+			name = "space to hold for metadata blocks";
+			kvalue = atoi(optarg);
+			if (mvalue < 0)
+				errx(10, "bad %s (%s)", name, optarg);
+			kflag = 1;
+			break;
 
 		case 'L':
 			found_arg = 1;
@@ -402,6 +409,22 @@ main(int argc, char *argv[])
 			}
 		}
 	}
+	if (kflag) {
+		name = "space to hold for metadata blocks";
+		if (sblock.fs_metaspace == kvalue)
+			warnx("%s remains unchanged as %d", name, kvalue);
+		else {
+			kvalue = blknum(&sblock, kvalue);
+			if (kvalue > sblock.fs_fpg / 2) {
+				kvalue = blknum(&sblock, sblock.fs_fpg / 2);
+				warnx("%s cannot exceed half the file system "
+				    "space", name);
+			}
+			warnx("%s changes from %jd to %d",
+				    name, sblock.fs_metaspace, kvalue);
+			sblock.fs_metaspace = kvalue;
+		}
+	}
 	if (lflag) {
 		name = "multilabel";
 		if (strcmp(lvalue, "enable") == 0) {
@@ -1065,7 +1088,7 @@ usage(void)
 {
 	fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n",
 "usage: tunefs [-A] [-a enable | disable] [-e maxbpg] [-f avgfilesize]",
-"              [-J enable | disable] [-j enable | disable]", 
+"              [-J enable | disable] [-j enable | disable] [-k metaspace]",
 "              [-L volname] [-l enable | disable] [-m minfree]",
 "              [-N enable | disable] [-n enable | disable]",
 "              [-o space | time] [-p] [-s avgfpdir] [-t enable | disable]",
@@ -1098,6 +1121,8 @@ printfs(void)
 	      sblock.fs_avgfpdir);
 	warnx("minimum percentage of free space: (-m)             %d%%",
 	      sblock.fs_minfree);
+	warnx("space to hold for metadata blocks: (-k)            %jd",
+	      sblock.fs_metaspace);
 	warnx("optimization preference: (-o)                      %s",
 	      sblock.fs_optim == FS_OPTSPACE ? "space" : "time");
 	if (sblock.fs_minfree >= MINFREE &&


More information about the svn-src-stable mailing list