svn commit: r247387 - head/sys/ufs/ffs
Konstantin Belousov
kib at FreeBSD.org
Wed Feb 27 07:31:24 UTC 2013
Author: kib
Date: Wed Feb 27 07:31:23 2013
New Revision: 247387
URL: http://svnweb.freebsd.org/changeset/base/247387
Log:
An inode block must not be blockingly read while cg block is owned.
The order is inode buffer lock -> snaplk -> cg buffer lock, reversing
the order causes deadlocks.
Inode block must not be written while cg block buffer is owned. The
FFS copy on write needs to allocate a block to copy the content of the
inode block, and the cylinder group selected for the allocation might
be the same as the owned cg block. The reserved block detection code
in the ffs_copyonwrite() and ffs_bp_snapblk() is unable to detect the
situation, because the locked cg buffer is not exposed to it.
In order to maintain the dependency between initialized inode block
and the cg_initediblk pointer, look up the inode buffer in
non-blocking mode. If succeeded, brelse cg block, initialize the inode
block and write it. After the write is finished, reread cg block and
update the cg_initediblk.
If inode block is already locked by another thread, let the another
thread initialize it. If another thread raced with us after we
started writing inode block, the situation is detected by an update of
cg_initediblk. Note that double-initialization of the inode block is
harmless, the block cannot be used until cg_initediblk is incremented.
Sponsored by: The FreeBSD Foundation
In collaboration with: pho
Reviewed by: mckusick
MFC after: 1 month
X-MFC-note: after r246877
Modified:
head/sys/ufs/ffs/ffs_alloc.c
Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c Wed Feb 27 06:53:15 2013 (r247386)
+++ head/sys/ufs/ffs/ffs_alloc.c Wed Feb 27 07:31:23 2013 (r247387)
@@ -1790,6 +1790,17 @@ fail:
return (0);
}
+static inline struct buf *
+getinobuf(struct inode *ip, u_int cg, u_int32_t cginoblk, int gbflags)
+{
+ struct fs *fs;
+
+ fs = ip->i_fs;
+ return (getblk(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs,
+ cg * fs->fs_ipg + cginoblk)), (int)fs->fs_bsize, 0, 0,
+ gbflags));
+}
+
/*
* Determine whether an inode can be allocated.
*
@@ -1814,9 +1825,11 @@ ffs_nodealloccg(ip, cg, ipref, mode, unu
u_int8_t *inosused, *loc;
struct ufs2_dinode *dp2;
int error, start, len, i;
+ u_int32_t old_initediblk;
fs = ip->i_fs;
ump = ip->i_ump;
+check_nifree:
if (fs->fs_cs(fs, cg).cs_nifree == 0)
return (0);
UFS_UNLOCK(ump);
@@ -1828,13 +1841,13 @@ ffs_nodealloccg(ip, cg, ipref, mode, unu
return (0);
}
cgp = (struct cg *)bp->b_data;
+restart:
if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
brelse(bp);
UFS_LOCK(ump);
return (0);
}
bp->b_xflags |= BX_BKGRDWRITE;
- cgp->cg_old_time = cgp->cg_time = time_second;
inosused = cg_inosused(cgp);
if (ipref) {
ipref %= fs->fs_ipg;
@@ -1856,7 +1869,6 @@ ffs_nodealloccg(ip, cg, ipref, mode, unu
}
}
ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1;
- cgp->cg_irotor = ipref;
gotit:
/*
* Check to see if we need to initialize more inodes.
@@ -1864,9 +1876,37 @@ gotit:
if (fs->fs_magic == FS_UFS2_MAGIC &&
ipref + INOPB(fs) > cgp->cg_initediblk &&
cgp->cg_initediblk < cgp->cg_niblk) {
- ibp = getblk(ip->i_devvp, fsbtodb(fs,
- ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)),
- (int)fs->fs_bsize, 0, 0, 0);
+ old_initediblk = cgp->cg_initediblk;
+
+ /*
+ * Free the cylinder group lock before writing the
+ * initialized inode block. Entering the
+ * babarrierwrite() with the cylinder group lock
+ * causes lock order violation between the lock and
+ * snaplk.
+ *
+ * Another thread can decide to initialize the same
+ * inode block, but whichever thread first gets the
+ * cylinder group lock after writing the newly
+ * allocated inode block will update it and the other
+ * will realize that it has lost and leave the
+ * cylinder group unchanged.
+ */
+ ibp = getinobuf(ip, cg, old_initediblk, GB_LOCK_NOWAIT);
+ brelse(bp);
+ if (ibp == NULL) {
+ /*
+ * The inode block buffer is already owned by
+ * another thread, which must initialize it.
+ * Wait on the buffer to allow another thread
+ * to finish the updates, with dropped cg
+ * buffer lock, then retry.
+ */
+ ibp = getinobuf(ip, cg, old_initediblk, 0);
+ brelse(ibp);
+ UFS_LOCK(ump);
+ goto check_nifree;
+ }
bzero(ibp->b_data, (int)fs->fs_bsize);
dp2 = (struct ufs2_dinode *)(ibp->b_data);
for (i = 0; i < INOPB(fs); i++) {
@@ -1883,8 +1923,29 @@ gotit:
* loading of newly created filesystems.
*/
babarrierwrite(ibp);
- cgp->cg_initediblk += INOPB(fs);
+
+ /*
+ * After the inode block is written, try to update the
+ * cg initediblk pointer. If another thread beat us
+ * to it, then leave it unchanged as the other thread
+ * has already set it correctly.
+ */
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ UFS_LOCK(ump);
+ ACTIVECLEAR(fs, cg);
+ UFS_UNLOCK(ump);
+ if (error != 0) {
+ brelse(bp);
+ return (error);
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (cgp->cg_initediblk == old_initediblk)
+ cgp->cg_initediblk += INOPB(fs);
+ goto restart;
}
+ cgp->cg_old_time = cgp->cg_time = time_second;
+ cgp->cg_irotor = ipref;
UFS_LOCK(ump);
ACTIVECLEAR(fs, cg);
setbit(inosused, ipref);
More information about the svn-src-head
mailing list