svn commit: r202283 - in head/sys: conf fs/ext2fs gnu/fs/ext2fs gnu/fs/reiserfs modules/ext2fs

Ulf Lilleengen lulf at FreeBSD.org
Thu Jan 14 14:30:54 UTC 2010


Author: lulf
Date: Thu Jan 14 14:30:54 2010
New Revision: 202283
URL: http://svn.freebsd.org/changeset/base/202283

Log:
  Bring in the ext2fs work done by Aditya Sarawgi during and after Google Summer
  of Code 2009:
  
  - BSDL block and inode allocation policies for ext2fs. This involves the use
    FFS1 style block and inode allocation for ext2fs. Preallocation was removed
    since it was GPL'd.
  - Make ext2fs MPSAFE by introducing locks to per-mount datastructures.
  - Fixes for kern/122047 PR.
  - Various small bugfixes.
  - Move out of gnu/ directory.
  
  Sponsored by:   Google Inc.
  Submitted by:	Aditya Sarawgi <sarawgi.aditya AT SPAMFREE gmail DOT com>

Added:
  head/sys/fs/ext2fs/
     - copied from r201427, head/sys/gnu/fs/ext2fs/
  head/sys/fs/ext2fs/ext2_dinode.h   (contents, props changed)
  head/sys/fs/ext2fs/ext2_dir.h   (contents, props changed)
  head/sys/fs/ext2fs/ext2fs.h   (contents, props changed)
Deleted:
  head/sys/fs/ext2fs/COPYRIGHT.INFO
  head/sys/fs/ext2fs/ext2_bitops.h
  head/sys/fs/ext2fs/ext2_fs.h
  head/sys/fs/ext2fs/ext2_fs_sb.h
  head/sys/fs/ext2fs/ext2_linux_balloc.c
  head/sys/fs/ext2fs/ext2_linux_ialloc.c
  head/sys/fs/ext2fs/i386-bitops.h
  head/sys/gnu/fs/ext2fs/
Modified:
  head/sys/conf/files
  head/sys/fs/ext2fs/ext2_alloc.c
  head/sys/fs/ext2fs/ext2_balloc.c
  head/sys/fs/ext2fs/ext2_bmap.c
  head/sys/fs/ext2fs/ext2_extern.h
  head/sys/fs/ext2fs/ext2_inode.c
  head/sys/fs/ext2fs/ext2_inode_cnv.c
  head/sys/fs/ext2fs/ext2_lookup.c
  head/sys/fs/ext2fs/ext2_mount.h
  head/sys/fs/ext2fs/ext2_readwrite.c
  head/sys/fs/ext2fs/ext2_subr.c
  head/sys/fs/ext2fs/ext2_vfsops.c
  head/sys/fs/ext2fs/ext2_vnops.c
  head/sys/fs/ext2fs/fs.h
  head/sys/fs/ext2fs/inode.h
  head/sys/gnu/fs/reiserfs/reiserfs_fs.h
  head/sys/modules/ext2fs/Makefile

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Thu Jan 14 11:03:26 2010	(r202282)
+++ head/sys/conf/files	Thu Jan 14 14:30:54 2010	(r202283)
@@ -1956,18 +1956,15 @@ geom/virstor/binstream.c	optional geom_v
 geom/virstor/g_virstor.c	optional geom_virstor
 geom/virstor/g_virstor_md.c	optional geom_virstor
 geom/zero/g_zero.c		optional geom_zero
-gnu/fs/ext2fs/ext2_alloc.c		optional ext2fs \
-	warning "kernel contains GPL contaminated ext2fs filesystem"
-gnu/fs/ext2fs/ext2_balloc.c	optional ext2fs
-gnu/fs/ext2fs/ext2_bmap.c	optional ext2fs
-gnu/fs/ext2fs/ext2_inode.c	optional ext2fs
-gnu/fs/ext2fs/ext2_inode_cnv.c	optional ext2fs
-gnu/fs/ext2fs/ext2_linux_balloc.c	optional ext2fs
-gnu/fs/ext2fs/ext2_linux_ialloc.c	optional ext2fs
-gnu/fs/ext2fs/ext2_lookup.c	optional ext2fs
-gnu/fs/ext2fs/ext2_subr.c	optional ext2fs
-gnu/fs/ext2fs/ext2_vfsops.c	optional ext2fs
-gnu/fs/ext2fs/ext2_vnops.c	optional ext2fs
+fs/ext2fs/ext2_alloc.c		optional ext2fs
+fs/ext2fs/ext2_balloc.c		optional ext2fs
+fs/ext2fs/ext2_bmap.c		optional ext2fs
+fs/ext2fs/ext2_inode.c		optional ext2fs
+fs/ext2fs/ext2_inode_cnv.c	optional ext2fs
+fs/ext2fs/ext2_lookup.c		optional ext2fs
+fs/ext2fs/ext2_subr.c		optional ext2fs
+fs/ext2fs/ext2_vfsops.c		optional ext2fs
+fs/ext2fs/ext2_vnops.c		optional ext2fs
 gnu/fs/reiserfs/reiserfs_hashes.c	optional reiserfs \
 	warning "kernel contains GPL contaminated ReiserFS filesystem"
 gnu/fs/reiserfs/reiserfs_inode.c	optional reiserfs

Modified: head/sys/fs/ext2fs/ext2_alloc.c
==============================================================================
--- head/sys/gnu/fs/ext2fs/ext2_alloc.c	Sun Jan  3 12:17:51 2010	(r201427)
+++ head/sys/fs/ext2fs/ext2_alloc.c	Thu Jan 14 14:30:54 2010	(r202283)
@@ -43,52 +43,54 @@
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/syslog.h>
+#include <sys/buf.h>
 
-#include <gnu/fs/ext2fs/inode.h>
-#include <gnu/fs/ext2fs/ext2_mount.h>
-#include <gnu/fs/ext2fs/ext2_fs.h>
-#include <gnu/fs/ext2fs/ext2_fs_sb.h>
-#include <gnu/fs/ext2fs/fs.h>
-#include <gnu/fs/ext2fs/ext2_extern.h>
-
-static void	ext2_fserr(struct ext2_sb_info *, u_int, char *);
-
-/*
- * Linux calls this functions at the following locations:
- * (1) the inode is freed
- * (2) a preallocation miss occurs
- * (3) truncate is called
- * (4) release_file is called and f_mode & 2
- *
- * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2)
- * the call in vfree might be redundant
- */
-void
-ext2_discard_prealloc(ip)
-	struct inode * ip;
-{
-#ifdef EXT2_PREALLOCATE
-        if (ip->i_prealloc_count) {
-                int i = ip->i_prealloc_count;
-                ip->i_prealloc_count = 0;
-                ext2_free_blocks (ITOV(ip)->v_mount,
-                                  ip->i_prealloc_block,
-                                  i);
-        }
-#endif
-}
-
+#include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/fs.h>
+#include <fs/ext2fs/ext2_extern.h>
+
+static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
+static u_long	ext2_dirpref(struct inode *);
+static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
+static u_long	ext2_hashalloc(struct inode *, int, long, int,
+				daddr_t (*)(struct inode *, int, daddr_t, 
+						int));
+static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
+static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
 /*
  * Allocate a block in the file system.
- * 
- * this takes the framework from ffs_alloc. To implement the
- * actual allocation, it calls ext2_new_block, the ported version
- * of the same Linux routine.
  *
- * we note that this is always called in connection with ext2_blkpref
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate a block in the same cylinder group.
+ *   4) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
+ * If no block preference is given the following hierarchy is used
+ * to allocate a block:
+ *   1) allocate a block in the cylinder group that contains the
+ *        inode for the file.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
  *
- * preallocation is done as Linux does it
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate a block in the same cylinder group.
+ *   4) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
+ * If no block preference is given the following hierarchy is used
+ * to allocate a block:
+ *   1) allocate a block in the cylinder group that contains the
+ *        inode for the file.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *        available block is located.
  */
+
 int
 ext2_alloc(ip, lbn, bpref, size, cred, bnp)
 	struct inode *ip;
@@ -97,75 +99,46 @@ ext2_alloc(ip, lbn, bpref, size, cred, b
 	struct ucred *cred;
 	int32_t *bnp;
 {
-	struct ext2_sb_info *fs;
+	struct m_ext2fs *fs;
+	struct ext2mount *ump;
 	int32_t bno;
-	
+	int cg;	
 	*bnp = 0;
 	fs = ip->i_e2fs;
+	ump = ip->i_ump;
+	mtx_assert(EXT2_MTX(ump), MA_OWNED);
 #ifdef DIAGNOSTIC
-	if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) {
+	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
 		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
-		    fs->s_blocksize, size, fs->fs_fsmnt);
+		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
 		panic("ext2_alloc: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ext2_alloc: missing credential");
 #endif /* DIAGNOSTIC */
-	if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0)
+	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
 		goto nospace;
 	if (cred->cr_uid != 0 && 
-		fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count)
+		fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
 		goto nospace;
-	if (bpref >= fs->s_es->s_blocks_count)
+	if (bpref >= fs->e2fs->e2fs_bcount)
 		bpref = 0;
-	/* call the Linux code */
-#ifdef EXT2_PREALLOCATE
-	/* To have a preallocation hit, we must
-	 * - have at least one block preallocated
-	 * - and our preferred block must have that block number or one below
-	 */
-        if (ip->i_prealloc_count &&
-            (bpref == ip->i_prealloc_block ||
-             bpref + 1 == ip->i_prealloc_block))
-        {
-                bno = ip->i_prealloc_block++;
-                ip->i_prealloc_count--;
-                /* ext2_debug ("preallocation hit (%lu/%lu).\n",
-                            ++alloc_hits, ++alloc_attempts); */
-
-		/* Linux gets, clears, and releases the buffer at this
-		   point - we don't have to that; we leave it to the caller
-		 */
-        } else {
-                ext2_discard_prealloc (ip);
-                /* ext2_debug ("preallocation miss (%lu/%lu).\n",
-                            alloc_hits, ++alloc_attempts); */
-                if (S_ISREG(ip->i_mode))
-                        bno = ext2_new_block
-                                (ITOV(ip)->v_mount, bpref,
-                                 &ip->i_prealloc_count,
-                                 &ip->i_prealloc_block);
-                else
-			bno = (int32_t)ext2_new_block(ITOV(ip)->v_mount, 
-					bpref, 0, 0);
+	 if (bpref == 0)
+                cg = ino_to_cg(fs, ip->i_number);
+        else
+                cg = dtog(fs, bpref);
+        bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
+                                                 ext2_alloccg);
+        if (bno > 0) {
+                ip->i_blocks += btodb(fs->e2fs_bsize);
+                ip->i_flag |= IN_CHANGE | IN_UPDATE;
+                *bnp = bno;
+                return (0);
         }
-#else
-	bno = (int32_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0);
-#endif
-
-	if (bno > 0) {
-		/* set next_alloc fields as done in block_getblk */
-		ip->i_next_alloc_block = lbn;
-		ip->i_next_alloc_goal = bno;
-
-		ip->i_blocks += btodb(size);
-		ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		*bnp = bno;
-		return (0);
-	}
 nospace:
+	EXT2_UNLOCK(ump);
 	ext2_fserr(fs, cred->cr_uid, "file system full");
-	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
 	return (ENOSPC);
 }
 
@@ -187,6 +160,8 @@ nospace:
 #ifdef FANCY_REALLOC
 #include <sys/sysctl.h>
 static int doasyncfree = 1;
+static int doreallocblks = 1;
+
 #ifdef	OPT_DEBUG
 SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
 #endif	/* OPT_DEBUG */
@@ -204,19 +179,21 @@ ext2_reallocblks(ap)
 return ENOSPC;
 #else
 
-	struct ext2_sb_info *fs;
+	struct m_ext2fs *fs;
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp;
-	int32_t *bap, *sbap, *ebap;
+	int32_t *bap, *sbap, *ebap = 0;
+	struct ext2mount *ump;
 	struct cluster_save *buflist;
-	int32_t start_lbn, end_lbn, soff, eoff, newblk, blkno;
 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
+	int32_t start_lbn, end_lbn, soff, newblk, blkno =0;
 	int i, len, start_lvl, end_lvl, pref, ssize;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_e2fs;
+	ump = ip->i_ump;
 #ifdef UNKLAR
 	if (fs->fs_contigsumsize <= 0)
 		return (ENOSPC);
@@ -238,8 +215,8 @@ return ENOSPC;
 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
 		return (ENOSPC);
-	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
-	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
+	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
+	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
 		return (ENOSPC);
 	/*
 	 * Get the starting offset and block map for the first block.
@@ -249,7 +226,7 @@ return ENOSPC;
 		soff = start_lbn;
 	} else {
 		idp = &start_ap[start_lvl - 1];
-		if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &sbp)) {
+		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
 			brelse(sbp);
 			return (ENOSPC);
 		}
@@ -259,7 +236,8 @@ return ENOSPC;
 	/*
 	 * Find the preferred location for the cluster.
 	 */
-	pref = ext2_blkpref(ip, start_lbn, soff, sbap);
+	EXT2_LOCK(ump); 
+	pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno);
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
@@ -271,16 +249,20 @@ return ENOSPC;
 			panic("ext2_reallocblk: start == end");
 #endif
 		ssize = len - (idp->in_off + 1);
-		if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &ebp))
+		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
+			EXT2_UNLOCK(ump);	
 			goto fail;
+		}
 		ebap = (int32_t *)ebp->b_data;
 	}
 	/*
 	 * Search the block map looking for an allocation of the desired size.
 	 */
-	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref,
-	    len, (u_long (*)())ext2_clusteralloc)) == 0)
+	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
+	    len, ext2_clusteralloc)) == 0){
+		EXT2_UNLOCK(ump);
 		goto fail;
+	}	
 	/*
 	 * We have found a new contiguous block.
 	 *
@@ -289,9 +271,10 @@ return ENOSPC;
 	 * with the file.
 	 */
 	blkno = newblk;
-	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) {
+	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
 		if (i == ssize)
 			bap = ebap;
+			soff = -i;
 #ifdef DIAGNOSTIC
 		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
 			panic("ext2_reallocblks: alloc mismatch");
@@ -322,17 +305,18 @@ return ENOSPC;
 		if (!doasyncfree)
 			ext2_update(vp, 1);
 	}
-	if (ssize < len)
+	if (ssize < len) {
 		if (doasyncfree)
 			bdwrite(ebp);
 		else
 			bwrite(ebp);
+	}
 	/*
 	 * Last, free the old blocks and assign the new blocks to the buffers.
 	 */
-	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) {
+	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
 		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
-		    fs->s_blocksize);
+		    fs->e2fs_bsize);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 	}
 	return (0);
@@ -350,8 +334,6 @@ fail:
 /*
  * Allocate an inode in the file system.
  * 
- * we leave the actual allocation strategy to the (modified)
- * ext2_new_inode(), to make sure we get the policies right
  */
 int
 ext2_valloc(pvp, mode, cred, vpp)
@@ -361,21 +343,38 @@ ext2_valloc(pvp, mode, cred, vpp)
 	struct vnode **vpp;
 {
 	struct inode *pip;
-	struct ext2_sb_info *fs;
+	struct m_ext2fs *fs;
 	struct inode *ip;
-	ino_t ino;
-	int i, error;
+	struct ext2mount *ump;
+	ino_t ino, ipref;
+	int i, error, cg;
 	
 	*vpp = NULL;
 	pip = VTOI(pvp);
 	fs = pip->i_e2fs;
-	if (fs->s_es->s_free_inodes_count == 0)
-		goto noinodes;
+	ump = pip->i_ump;
 
-	/* call the Linux routine - it returns the inode number only */
-	ino = ext2_new_inode(pip, mode);
+	EXT2_LOCK(ump);
+	if (fs->e2fs->e2fs_ficount == 0)
+		goto noinodes;
+	/*
+	 * If it is a directory then obtain a cylinder group based on
+	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
+	 * always the next inode.
+	 */
+	if((mode & IFMT) == IFDIR) {
+		cg = ext2_dirpref(pip);
+		if (fs->e2fs_contigdirs[cg] < 255)
+			fs->e2fs_contigdirs[cg]++;
+	} else {
+		cg = ino_to_cg(fs, pip->i_number);
+		if (fs->e2fs_contigdirs[cg] > 0)
+			fs->e2fs_contigdirs[cg]--;
+	}
+	ipref = cg * fs->e2fs->e2fs_ipg + 1;
+	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
 
-	if (ino == 0)
+	if (ino == 0) 
 		goto noinodes;
 	error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
 	if (error) {
@@ -410,12 +409,126 @@ printf("ext2_valloc: allocated inode %d\
 */
 	return (0);
 noinodes:
+	EXT2_UNLOCK(ump);
 	ext2_fserr(fs, cred->cr_uid, "out of inodes");
-	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
+	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
 	return (ENOSPC);
 }
 
 /*
+ * Find a cylinder to place a directory.
+ *
+ * The policy implemented by this algorithm is to allocate a
+ * directory inode in the same cylinder group as its parent
+ * directory, but also to reserve space for its files inodes
+ * and data. Restrict the number of directories which may be
+ * allocated one after another in the same cylinder group
+ * without intervening allocation of files.
+ *
+ * If we allocate a first level directory then force allocation
+ * in another cylinder group.
+ *
+ */
+static u_long
+ext2_dirpref(struct inode *pip)
+{
+	struct m_ext2fs *fs;
+        int cg, prefcg, dirsize, cgsize;
+	int avgifree, avgbfree, avgndir, curdirsize;
+	int minifree, minbfree, maxndir;
+	int mincg, minndir;
+	int maxcontigdirs;
+
+	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
+	fs = pip->i_e2fs;
+
+ 	avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
+	avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
+	avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;
+
+	/*
+	 * Force allocation in another cg if creating a first level dir.
+	 */
+	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
+	if (ITOV(pip)->v_vflag & VV_ROOT) {
+		prefcg = arc4random() % fs->e2fs_gcount;
+		mincg = prefcg;
+		minndir = fs->e2fs_ipg;
+		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
+			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
+			    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
+			    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
+				mincg = cg;
+				minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
+			}
+		for (cg = 0; cg < prefcg; cg++)
+			if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
+                            fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
+                            fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
+                                mincg = cg;
+                                minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
+                        }
+
+		return (mincg);
+	}
+
+	/*
+	 * Count various limits which used for
+	 * optimal allocation of a directory inode.
+	 */
+	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
+	minifree = avgifree - avgifree / 4;
+	if (minifree < 1)
+		minifree = 1;
+	minbfree = avgbfree - avgbfree / 4;
+	if (minbfree < 1)
+		minbfree = 1;
+	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
+	dirsize = AVGDIRSIZE;
+	curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
+	if (dirsize < curdirsize)
+		dirsize = curdirsize;
+	if (dirsize <= 0)
+		maxcontigdirs = 0;		/* dirsize overflowed */
+	else
+		maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
+	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
+	if (maxcontigdirs == 0)
+		maxcontigdirs = 1;
+
+	/*
+	 * Limit number of dirs in one cg and reserve space for 
+	 * regular files, but only if we have no deficit in
+	 * inodes or space.
+	 */
+	prefcg = ino_to_cg(fs, pip->i_number);
+	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
+		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
+		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
+	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
+			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
+				return (cg);
+		}
+	for (cg = 0; cg < prefcg; cg++)
+		if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
+		    fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
+	    	    fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
+			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
+				return (cg);
+		}
+	/*
+	 * This is a backstop when we have deficit in space.
+	 */
+	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
+		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
+			return (cg);
+	for (cg = 0; cg < prefcg; cg++)
+		if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
+			break;
+	return (cg);
+}
+
+/*
  * Select the desired position for the next block in a file.  
  *
  * we try to mimic what Remy does in inode_getblk/block_getblk
@@ -437,11 +550,12 @@ ext2_blkpref(ip, lbn, indx, bap, blocknr
 	int32_t blocknr;
 {
 	int	tmp;
+	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
 
 	/* if the next block is actually what we thought it is,
 	   then set the goal to what we thought it should be
 	*/
-	if(ip->i_next_alloc_block == lbn)
+	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
 		return ip->i_next_alloc_goal;
 
 	/* now check whether we were provided with an array that basically
@@ -458,13 +572,230 @@ ext2_blkpref(ip, lbn, indx, bap, blocknr
 	return blocknr ? blocknr :
 			(int32_t)(ip->i_block_group * 
 			EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + 
-			ip->i_e2fs->s_es->s_first_data_block;
+			ip->i_e2fs->e2fs->e2fs_first_dblock;
+}
+
+/*
+ * Implement the cylinder overflow algorithm.
+ *
+ * The policy implemented by this algorithm is:
+ *   1) allocate the block in its requested cylinder group.
+ *   2) quadradically rehash on the cylinder group number.
+ *   3) brute force search for a free block.
+ */
+static u_long
+ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
+                daddr_t (*allocator)(struct inode *, int, daddr_t, int))
+{
+	struct m_ext2fs *fs;
+	ino_t result;
+	int i, icg = cg;
+
+	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
+	fs = ip->i_e2fs;
+	/*
+	 * 1: preferred cylinder group
+	 */
+	result = (*allocator)(ip, cg, pref, size);
+	if (result)
+		return (result);
+	/*
+	 * 2: quadratic rehash
+	 */
+	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
+		cg += i;
+		if (cg >= fs->e2fs_gcount)
+			cg -= fs->e2fs_gcount;
+		result = (*allocator)(ip, cg, 0, size);
+		if (result)
+			return (result);
+	}
+	/*
+	 * 3: brute force search
+	 * Note that we start at i == 2, since 0 was checked initially,
+	 * and 1 is always checked in the quadratic rehash.
+	 */
+	cg = (icg + 2) % fs->e2fs_gcount;
+	for (i = 2; i < fs->e2fs_gcount; i++) {
+		result = (*allocator)(ip, cg, 0, size);
+		if (result)
+			return (result);
+		cg++;
+		if (cg == fs->e2fs_gcount)
+			cg = 0;
+	}
+	return (0);
+}
+
+/*
+ * Determine whether a block can be allocated.
+ *
+ * Check to see if a block of the appropriate size is available,
+ * and if it is, allocate it.
+ */
+static daddr_t
+ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
+{
+	struct m_ext2fs *fs;
+	struct buf *bp;
+	struct ext2mount *ump;
+	int error, bno, start, end, loc;
+	char *bbp;
+	/* XXX ondisk32 */
+	fs = ip->i_e2fs;
+	ump = ip->i_ump;
+	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
+		return (0);
+	EXT2_UNLOCK(ump);
+	error = bread(ip->i_devvp, fsbtodb(fs,
+		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+		(int)fs->e2fs_bsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		EXT2_LOCK(ump);
+		return (0);
+	}
+	bbp = (char *)bp->b_data;
+
+	if (dtog(fs, bpref) != cg)
+		bpref = 0;
+	if (bpref != 0) {
+		bpref = dtogd(fs, bpref);
+		/*
+		 * if the requested block is available, use it
+		 */
+		if (isclr(bbp, bpref)) {
+			bno = bpref;
+			goto gotit;
+		}
+	}
+	/*
+	 * no blocks in the requested cylinder, so take next
+	 * available one in this cylinder group.
+	 * first try to get 8 contigous blocks, then fall back to a single
+	 * block.
+	 */
+	if (bpref)
+		start = dtogd(fs, bpref) / NBBY;
+	else
+		start = 0;
+	end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
+	for (loc = start; loc < end; loc++) {
+		if (bbp[loc] == 0) {
+			bno = loc * NBBY;
+			goto gotit;
+		}
+	}
+	for (loc = 0; loc < start; loc++) {
+		if (bbp[loc] == 0) {
+			bno = loc * NBBY;
+			goto gotit;
+		}
+	}
+
+	bno = ext2_mapsearch(fs, bbp, bpref);
+	if (bno < 0){
+		brelse(bp);
+		EXT2_LOCK(ump);
+		return (0);
+	}
+gotit:
+#ifdef DIAGNOSTIC
+	if (isset(bbp, (daddr_t)bno)) {
+		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
+			cg, bno, fs->e2fs_fsmnt);
+		panic("ext2fs_alloccg: dup alloc");
+	}
+#endif
+	setbit(bbp, (daddr_t)bno);
+	EXT2_LOCK(ump);
+	fs->e2fs->e2fs_fbcount--;
+	fs->e2fs_gd[cg].ext2bgd_nbfree--;
+	fs->e2fs_fmod = 1;
+	EXT2_UNLOCK(ump);
+	bdwrite(bp);
+	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
+}
+
+/*
+ * Determine whether an inode can be allocated.
+ *
+ * Check to see if an inode is available, and if it is,
+ * allocate it using tode in the specified cylinder group.
+ */
+static daddr_t
+ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
+{
+	struct m_ext2fs *fs;
+	struct buf *bp;
+	struct ext2mount *ump;
+	int error, start, len, loc, map, i;
+	char *ibp;
+	ipref--; /* to avoid a lot of (ipref -1) */
+	if (ipref == -1)
+		ipref = 0;
+	fs = ip->i_e2fs;
+	ump = ip->i_ump;
+	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
+		return (0);
+	EXT2_UNLOCK(ump);	
+	error = bread(ip->i_devvp, fsbtodb(fs,
+		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
+		(int)fs->e2fs_bsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		EXT2_LOCK(ump);
+		return (0);
+	}
+	ibp = (char *)bp->b_data;
+	if (ipref) {
+		ipref %= fs->e2fs->e2fs_ipg;
+		if (isclr(ibp, ipref))
+			goto gotit;
+	}
+	start = ipref / NBBY;
+	len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY);
+	loc = skpc(0xff, len, &ibp[start]);
+	if (loc == 0) {
+		len = start + 1;
+		start = 0;
+		loc = skpc(0xff, len, &ibp[0]);
+		if (loc == 0) {
+			printf("cg = %d, ipref = %lld, fs = %s\n",
+				cg, (long long)ipref, fs->e2fs_fsmnt);
+			panic("ext2fs_nodealloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	} 
+	i = start + len - loc;
+	map = ibp[i];
+	ipref = i * NBBY;
+	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
+		if ((map & i) == 0) {
+			goto gotit;
+		}
+	}
+	printf("fs = %s\n", fs->e2fs_fsmnt);
+	panic("ext2fs_nodealloccg: block not in map");
+	/* NOTREACHED */
+gotit:
+	setbit(ibp, ipref);
+	EXT2_LOCK(ump);
+	fs->e2fs_gd[cg].ext2bgd_nifree--;
+	fs->e2fs->e2fs_ficount--;
+	fs->e2fs_fmod = 1;
+	if ((mode & IFMT) == IFDIR) {
+		fs->e2fs_gd[cg].ext2bgd_ndirs++;
+		fs->e2fs_total_dir++;
+	}
+	EXT2_UNLOCK(ump);
+	bdwrite(bp);
+	return (cg * fs->e2fs->e2fs_ipg + ipref +1);
 }
 
 /*
  * Free a block or fragment.
  *
- * pass on to the Linux code
  */
 void
 ext2_blkfree(ip, bno, size)
@@ -472,19 +803,47 @@ ext2_blkfree(ip, bno, size)
 	int32_t bno;
 	long size;
 {
-	struct ext2_sb_info *fs;
+	struct m_ext2fs *fs;
+	struct buf *bp;
+	struct ext2mount *ump;
+	int cg, error;
+	char *bbp;
 
 	fs = ip->i_e2fs;
-	/*
-	 *	call Linux code with mount *, block number, count
-	 */
-	ext2_free_blocks(ITOV(ip)->v_mount, bno, size / fs->s_frag_size);
+	ump = ip->i_ump;
+	cg = dtog(fs, bno);
+	if ((u_int)bno >= fs->e2fs->e2fs_bcount) {
+                printf("bad block %lld, ino %llu\n", (long long)bno,
+                    (unsigned long long)ip->i_number);
+                ext2_fserr(fs, ip->i_uid, "bad block");
+                return;
+        }
+        error = bread(ip->i_devvp,
+                fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+                (int)fs->e2fs_bsize, NOCRED, &bp);
+        if (error) {
+                brelse(bp);
+                return;
+        }
+        bbp = (char *)bp->b_data;
+        bno = dtogd(fs, bno);
+        if (isclr(bbp, bno)) {
+                printf("block = %lld, fs = %s\n",
+                     (long long)bno, fs->e2fs_fsmnt);
+                panic("blkfree: freeing free block");
+        }
+        clrbit(bbp, bno);
+	EXT2_LOCK(ump);
+        fs->e2fs->e2fs_fbcount++;
+        fs->e2fs_gd[cg].ext2bgd_nbfree++;
+        fs->e2fs_fmod = 1;
+	EXT2_UNLOCK(ump);
+        bdwrite(bp);
 }
 
 /*
  * Free an inode.
  *
- * the maintenance of the actual bitmaps is again up to the linux code
  */
 int
 ext2_vfree(pvp, ino, mode)
@@ -492,30 +851,94 @@ ext2_vfree(pvp, ino, mode)
 	ino_t ino;
 	int mode;
 {
-	struct ext2_sb_info *fs;
+	struct m_ext2fs *fs;
 	struct inode *pip;
-	mode_t save_i_mode;
+	struct buf *bp;
+	struct ext2mount *ump;
+	int error, cg;
+	char * ibp;
+/*	mode_t save_i_mode; */
 
 	pip = VTOI(pvp);
 	fs = pip->i_e2fs;
-	if ((u_int)ino > fs->s_inodes_per_group * fs->s_groups_count)
+	ump = pip->i_ump;
+	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
 		panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s",
-		    pip->i_devvp, ino, fs->fs_fsmnt);
+		    pip->i_devvp, ino, fs->e2fs_fsmnt);
 
-/* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, mode);
+	cg = ino_to_cg(fs, ino);
+	error = bread(pip->i_devvp,
+		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
+		(int)fs->e2fs_bsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (0);
+	}
+	ibp = (char *)bp->b_data;
+	ino = (ino - 1) % fs->e2fs->e2fs_ipg;
+	if (isclr(ibp, ino)) {
+		printf("ino = %llu, fs = %s\n",
+			 (unsigned long long)ino, fs->e2fs_fsmnt);
+		if (fs->e2fs_ronly == 0)
+			panic("ifree: freeing free inode");
+	}
+	clrbit(ibp, ino);
+	EXT2_LOCK(ump);
+	fs->e2fs->e2fs_ficount++;
+	fs->e2fs_gd[cg].ext2bgd_nifree++;
+	if ((mode & IFMT) == IFDIR) {
+		fs->e2fs_gd[cg].ext2bgd_ndirs--;
+		fs->e2fs_total_dir--;
+	}
+	fs->e2fs_fmod = 1;
+	EXT2_UNLOCK(ump);
+	bdwrite(bp);
+	return (0);
+}
+
+/*
+ * Find a block in the specified cylinder group.
+ *
+ * It is a panic if a request is made to find a block if none are
+ * available.
  */
-	ext2_discard_prealloc(pip);
+static daddr_t
+ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
+{
+	daddr_t bno;
+	int start, len, loc, i, map;
 
-	/* we need to make sure that ext2_free_inode can adjust the
-	   used_dir_counts in the group summary information - I'd
-	   really like to know what the rationale behind this
-	   'set i_mode to zero to denote an unused inode' is
+	/*
+	 * find the fragment by searching through the free block
+	 * map for an appropriate bit pattern
 	 */
-	save_i_mode = pip->i_mode;
-	pip->i_mode = mode;
-	ext2_free_inode(pip);	
-	pip->i_mode = save_i_mode;
-	return (0);
+	if (bpref)
+		start = dtogd(fs, bpref) / NBBY;
+	else
+		start = 0;
+	len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
+	loc = skpc(0xff, len, &bbp[start]);
+	if (loc == 0) {
+		len = start + 1;
+		start = 0;
+		loc = skpc(0xff, len, &bbp[start]);
+		if (loc == 0) {
+			printf("start = %d, len = %d, fs = %s\n",
+				start, len, fs->e2fs_fsmnt);
+			panic("ext2fs_alloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	}
+	i = start + len - loc;
+	map = bbp[i];
+	bno = i * NBBY;
+	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
+		if ((map & i) == 0)
+			return (bno);
+	}
+	printf("fs = %s\n", fs->e2fs_fsmnt);
+	panic("ext2fs_mapsearch: block not in map");
+	/* NOTREACHED */
 }
 
 /*
@@ -526,10 +949,25 @@ ext2_vfree(pvp, ino, mode)
  */
 static void
 ext2_fserr(fs, uid, cp)
-	struct ext2_sb_info *fs;
-	u_int uid;
+	struct m_ext2fs *fs;
+	uid_t uid;
 	char *cp;
 {
 
-	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
+	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
+}
+
+int
+cg_has_sb(int i)
+{
+        int a3, a5, a7;
+
+        if (i == 0 || i == 1)
+                return 1;
+        for (a3 = 3, a5 = 5, a7 = 7;
+            a3 <= i || a5 <= i || a7 <= i;
+            a3 *= 3, a5 *= 5, a7 *= 7)
+                if (i == a3 || i == a5 || i == a7)
+                        return 1;
+        return 0;
 }

Modified: head/sys/fs/ext2fs/ext2_balloc.c
==============================================================================
--- head/sys/gnu/fs/ext2fs/ext2_balloc.c	Sun Jan  3 12:17:51 2010	(r201427)
+++ head/sys/fs/ext2fs/ext2_balloc.c	Thu Jan 14 14:30:54 2010	(r202283)
@@ -44,42 +44,39 @@
 #include <sys/ucred.h>
 #include <sys/vnode.h>
 
-#include <gnu/fs/ext2fs/inode.h>
-#include <gnu/fs/ext2fs/ext2_fs.h>
-#include <gnu/fs/ext2fs/ext2_fs_sb.h>
-#include <gnu/fs/ext2fs/fs.h>
-#include <gnu/fs/ext2fs/ext2_extern.h>
-
+#include <fs/ext2fs/inode.h>
+#include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/fs.h>
+#include <fs/ext2fs/ext2_extern.h>
+#include <fs/ext2fs/ext2_mount.h>
 /*
  * Balloc defines the structure of file system storage
  * by allocating the physical blocks on a device given
  * the inode and the logical block number in a file.
  */
 int
-ext2_balloc(ip, bn, size, cred, bpp, flags)
+ext2_balloc(ip, lbn, size, cred, bpp, flags)
 	struct inode *ip;
-	int32_t bn;
+	int32_t lbn;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list