svn commit: r256817 - in head/sys/ufs: ffs ufs
Kirk McKusick
mckusick at FreeBSD.org
Mon Oct 21 00:28:03 UTC 2013
Author: mckusick
Date: Mon Oct 21 00:28:02 2013
New Revision: 256817
URL: http://svnweb.freebsd.org/changeset/base/256817
Log:
Restructuring of the soft updates code to set it up so that the
single kernel-wide soft update lock can be replaced with a
per-filesystem soft-updates lock. This per-filesystem lock will
allow each filesystem to have its own soft-updates flushing thread
rather than being limited to a single soft-updates flushing thread
for the entire kernel.
Move soft update variables out of the ufsmount structure and into
their own mount_softdeps structure referenced by ufsmount field
um_softdep. Eventually the per-filesystem lock will be in this
structure. For now there is simply a pointer to the kernel-wide
soft updates lock.
Change all instances of ACQUIRE_LOCK and FREE_LOCK to pass the lock
pointer in the mount_softdeps structure instead of a pointer to the
kernel-wide soft-updates lock.
Replace the five hash tables used by soft updates with per-filesystem
copies of these tables allocated in the mount_softdeps structure.
Several functions that flush dependencies when too many are allocated
in the kernel used to operate across all filesystems. They are now
parameterized to flush dependencies from a specified filesystem.
For now, we stick with the round-robin flushing strategy when the
kernel as a whole has too many dependencies allocated.
While there are many lines of changes, there should be no functional
change in the operation of soft updates.
Tested by: Peter Holm and Scott Long
Sponsored by: Netflix
Modified:
head/sys/ufs/ffs/ffs_softdep.c
head/sys/ufs/ffs/softdep.h
head/sys/ufs/ufs/ufsmount.h
Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c Mon Oct 21 00:04:26 2013 (r256816)
+++ head/sys/ufs/ffs/ffs_softdep.c Mon Oct 21 00:28:02 2013 (r256817)
@@ -616,48 +616,6 @@ softdep_freework(wkhd)
FEATURE(softupdates, "FFS soft-updates support");
-/*
- * These definitions need to be adapted to the system to which
- * this file is being ported.
- */
-
-#define M_SOFTDEP_FLAGS (M_WAITOK)
-
-#define D_PAGEDEP 0
-#define D_INODEDEP 1
-#define D_BMSAFEMAP 2
-#define D_NEWBLK 3
-#define D_ALLOCDIRECT 4
-#define D_INDIRDEP 5
-#define D_ALLOCINDIR 6
-#define D_FREEFRAG 7
-#define D_FREEBLKS 8
-#define D_FREEFILE 9
-#define D_DIRADD 10
-#define D_MKDIR 11
-#define D_DIRREM 12
-#define D_NEWDIRBLK 13
-#define D_FREEWORK 14
-#define D_FREEDEP 15
-#define D_JADDREF 16
-#define D_JREMREF 17
-#define D_JMVREF 18
-#define D_JNEWBLK 19
-#define D_JFREEBLK 20
-#define D_JFREEFRAG 21
-#define D_JSEG 22
-#define D_JSEGDEP 23
-#define D_SBDEP 24
-#define D_JTRUNC 25
-#define D_JFSYNC 26
-#define D_SENTINEL 27
-#define D_LAST D_SENTINEL
-
-unsigned long dep_current[D_LAST + 1];
-unsigned long dep_highuse[D_LAST + 1];
-unsigned long dep_total[D_LAST + 1];
-unsigned long dep_write[D_LAST + 1];
-
static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
"soft updates stats");
static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
@@ -669,6 +627,11 @@ static SYSCTL_NODE(_debug_softdep, OID_A
static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
"current dependencies written");
+unsigned long dep_current[D_LAST + 1];
+unsigned long dep_highuse[D_LAST + 1];
+unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
+
#define SOFTDEP_TYPE(type, str, long) \
static MALLOC_DEFINE(M_ ## type, #str, long); \
SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD, \
@@ -713,6 +676,9 @@ static MALLOC_DEFINE(M_SENTINEL, "sentin
static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
+static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
+
+#define M_SOFTDEP_FLAGS (M_WAITOK)
/*
* translate from workitem type to memory type
@@ -749,8 +715,6 @@ static struct malloc_type *memtype[] = {
M_SENTINEL
};
-static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
-
#define DtoM(type) (memtype[type])
/*
@@ -766,51 +730,16 @@ static LIST_HEAD(mkdirlist, mkdir) mkdir
#define DOT_OFFSET offsetof(struct dirtemplate, dot_ino)
/*
- * Forward declarations.
- */
-struct inodedep_hashhead;
-struct newblk_hashhead;
-struct pagedep_hashhead;
-struct bmsafemap_hashhead;
-
-/*
- * Private journaling structures.
- */
-struct jblocks {
- struct jseglst jb_segs; /* TAILQ of current segments. */
- struct jseg *jb_writeseg; /* Next write to complete. */
- struct jseg *jb_oldestseg; /* Oldest segment with valid entries. */
- struct jextent *jb_extent; /* Extent array. */
- uint64_t jb_nextseq; /* Next sequence number. */
- uint64_t jb_oldestwrseq; /* Oldest written sequence number. */
- uint8_t jb_needseg; /* Need a forced segment. */
- uint8_t jb_suspended; /* Did journal suspend writes? */
- int jb_avail; /* Available extents. */
- int jb_used; /* Last used extent. */
- int jb_head; /* Allocator head. */
- int jb_off; /* Allocator extent offset. */
- int jb_blocks; /* Total disk blocks covered. */
- int jb_free; /* Total disk blocks free. */
- int jb_min; /* Minimum free space. */
- int jb_low; /* Low on space. */
- int jb_age; /* Insertion time of oldest rec. */
-};
-
-struct jextent {
- ufs2_daddr_t je_daddr; /* Disk block address. */
- int je_blocks; /* Disk block count. */
-};
-
-/*
* Internal function prototypes.
*/
+static void check_clear_deps(struct mount *);
static void softdep_error(char *, int);
static int softdep_process_worklist(struct mount *, int);
static int softdep_waitidle(struct mount *);
static void drain_output(struct vnode *);
static struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
-static void clear_remove(void);
-static void clear_inodedeps(void);
+static void clear_remove(struct mount *);
+static void clear_inodedeps(struct mount *);
static void unlinked_inodedep(struct mount *, struct inodedep *);
static void clear_unlinked_inodedep(struct inodedep *);
static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
@@ -954,20 +883,20 @@ static void allocdirect_merge(struct all
struct allocdirect *, struct allocdirect *);
static struct freefrag *allocindir_merge(struct allocindir *,
struct allocindir *);
-static int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int,
+static int bmsafemap_find(struct bmsafemap_hashhead *, int,
struct bmsafemap **);
static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
int cg, struct bmsafemap *);
-static int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t,
- int, struct newblk **);
+static int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
+ struct newblk **);
static int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
-static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
+static int inodedep_find(struct inodedep_hashhead *, ino_t,
struct inodedep **);
static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
static int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
int, struct pagedep **);
static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
- struct mount *mp, int, struct pagedep **);
+ struct pagedep **);
static void pause_timer(void *);
static int request_cleanup(struct mount *, int);
static int process_worklist_item(struct mount *, int, int);
@@ -982,9 +911,9 @@ static void remove_from_worklist(struct
static void softdep_flush(void);
static void softdep_flushjournal(struct mount *);
static int softdep_speedup(void);
-static void worklist_speedup(void);
+static void worklist_speedup(struct mount *);
static int journal_mount(struct mount *, struct fs *, struct ucred *);
-static void journal_unmount(struct mount *);
+static void journal_unmount(struct ufsmount *);
static int journal_space(struct ufsmount *, int);
static void journal_suspend(struct ufsmount *);
static int journal_unsuspend(struct ufsmount *ump);
@@ -1030,15 +959,25 @@ static void softdep_disk_write_complete(
static void softdep_deallocate_dependencies(struct buf *);
static int softdep_count_dependencies(struct buf *bp, int);
+/*
+ * Global lock over all of soft updates.
+ */
static struct rwlock lk;
RW_SYSINIT(softdep_lock, &lk, "Softdep Lock");
-#define TRY_ACQUIRE_LOCK(lk) rw_try_wlock(lk)
-#define ACQUIRE_LOCK(lk) rw_wlock(lk)
-#define FREE_LOCK(lk) rw_wunlock(lk)
+/*
+ * Allow per-filesystem soft-updates locking.
+ * For now all use the same global lock defined above.
+ */
+#define LOCK_PTR(ump) ((ump)->um_softdep->sd_fslock)
+#define TRY_ACQUIRE_LOCK(ump) rw_try_wlock((ump)->um_softdep->sd_fslock)
+#define ACQUIRE_LOCK(ump) rw_wlock((ump)->um_softdep->sd_fslock)
+#define FREE_LOCK(ump) rw_wunlock((ump)->um_softdep->sd_fslock)
+#define LOCK_OWNED(ump) rw_assert((ump)->um_softdep->sd_fslock, \
+ RA_WLOCKED)
-#define BUF_AREC(bp) lockallowrecurse(&(bp)->b_lock)
-#define BUF_NOREC(bp) lockdisablerecurse(&(bp)->b_lock)
+#define BUF_AREC(bp) lockallowrecurse(&(bp)->b_lock)
+#define BUF_NOREC(bp) lockdisablerecurse(&(bp)->b_lock)
/*
* Worklist queue management.
@@ -1073,7 +1012,7 @@ worklist_insert(head, item, locked)
{
if (locked)
- rw_assert(&lk, RA_WLOCKED);
+ LOCK_OWNED(VFSTOUFS(item->wk_mp));
if (item->wk_state & ONWORKLIST)
panic("worklist_insert: %p %s(0x%X) already on list",
item, TYPENAME(item->wk_type), item->wk_state);
@@ -1088,7 +1027,7 @@ worklist_remove(item, locked)
{
if (locked)
- rw_assert(&lk, RA_WLOCKED);
+ LOCK_OWNED(VFSTOUFS(item->wk_mp));
if ((item->wk_state & ONWORKLIST) == 0)
panic("worklist_remove: %p %s(0x%X) not on list",
item, TYPENAME(item->wk_type), item->wk_state);
@@ -1161,7 +1100,6 @@ jwork_move(dst, src)
freedep = freedep_merge(WK_FREEDEP(wk), freedep);
}
- rw_assert(&lk, RA_WLOCKED);
while ((wk = LIST_FIRST(src)) != NULL) {
WORKLIST_REMOVE(wk);
WORKLIST_INSERT(dst, wk);
@@ -1216,7 +1154,6 @@ workitem_free(item, type)
int type;
{
struct ufsmount *ump;
- rw_assert(&lk, RA_WLOCKED);
#ifdef DEBUG
if (item->wk_state & ONWORKLIST)
@@ -1229,6 +1166,7 @@ workitem_free(item, type)
if (item->wk_state & IOWAITING)
wakeup(item);
ump = VFSTOUFS(item->wk_mp);
+ LOCK_OWNED(ump);
KASSERT(ump->softdep_deps > 0,
("workitem_free: %s: softdep_deps going negative",
ump->um_fs->fs_fsmnt));
@@ -1237,7 +1175,11 @@ workitem_free(item, type)
KASSERT(dep_current[item->wk_type] > 0,
("workitem_free: %s: dep_current[%s] going negative",
ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+ KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+ ("workitem_free: %s: softdep_curdeps[%s] going negative",
+ ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
dep_current[item->wk_type]--;
+ ump->softdep_curdeps[item->wk_type] -= 1;
free(item, DtoM(type));
}
@@ -1254,14 +1196,15 @@ workitem_alloc(item, type, mp)
item->wk_state = 0;
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
dep_current[type]++;
if (dep_current[type] > dep_highuse[type])
dep_highuse[type] = dep_current[type];
dep_total[type]++;
+ ump->softdep_curdeps[type] += 1;
ump->softdep_deps++;
ump->softdep_accdeps++;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
}
static void
@@ -1269,7 +1212,15 @@ workitem_reassign(item, newtype)
struct worklist *item;
int newtype;
{
+ struct ufsmount *ump;
+ ump = VFSTOUFS(item->wk_mp);
+ LOCK_OWNED(ump);
+ KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+ ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
+ VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+ ump->softdep_curdeps[item->wk_type] -= 1;
+ ump->softdep_curdeps[newtype] += 1;
KASSERT(dep_current[item->wk_type] > 0,
("workitem_reassign: %s: dep_current[%s] going negative",
VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
@@ -1290,7 +1241,8 @@ static int tickdelay = 2; /* number of t
static int proc_waiting; /* tracks whether we have a timeout posted */
static int *stat_countp; /* statistic to count in proc_waiting timeout */
static struct callout softdep_callout;
-static int req_pending;
+static struct mount *req_pending;
+#define ALLCLEAN ((struct mount *)-1)
static int req_clear_inodedeps; /* syncer process flush some inodedeps */
static int req_clear_remove; /* syncer process flush some freeblks */
static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1298,6 +1250,7 @@ static int softdep_flushcache = 0; /* Sh
/*
* runtime statistics
*/
+static int stat_softdep_mounts; /* number of softdep mounted filesystems */
static int stat_worklist_push; /* number of worklist cleanups */
static int stat_blk_limit_push; /* number of times block limit neared */
static int stat_ino_limit_push; /* number of times inode limit neared */
@@ -1329,6 +1282,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, tic
&tickdelay, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW,
&maxindirdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, softdep_mounts, CTLFLAG_RD,
+ &stat_softdep_mounts, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
&stat_worklist_push, 0,"");
SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
@@ -1382,13 +1337,10 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, flu
SYSCTL_DECL(_vfs_ffs);
-LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl;
-static u_long bmsafemap_hash; /* size of hash table - 1 */
-
-static int compute_summary_at_mount = 0; /* Whether to recompute the summary at mount time */
+/* Whether to recompute the summary at mount time */
+static int compute_summary_at_mount = 0;
SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
&compute_summary_at_mount, 0, "Recompute summary at mount");
-
static struct proc *softdepproc;
static struct kproc_desc softdep_kp = {
"softdepflush",
@@ -1413,21 +1365,6 @@ softdep_flush(void)
for (;;) {
kproc_suspend_check(softdepproc);
- ACQUIRE_LOCK(&lk);
- /*
- * If requested, try removing inode or removal dependencies.
- */
- if (req_clear_inodedeps) {
- clear_inodedeps();
- req_clear_inodedeps -= 1;
- wakeup_one(&proc_waiting);
- }
- if (req_clear_remove) {
- clear_remove();
- req_clear_remove -= 1;
- wakeup_one(&proc_waiting);
- }
- FREE_LOCK(&lk);
remaining = progress = 0;
mtx_lock(&mountlist_mtx);
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
@@ -1436,8 +1373,8 @@ softdep_flush(void)
continue;
if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
continue;
- progress += softdep_process_worklist(mp, 0);
ump = VFSTOUFS(mp);
+ progress += softdep_process_worklist(mp, 0);
remaining += ump->softdep_on_worklist;
mtx_lock(&mountlist_mtx);
nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1446,20 +1383,21 @@ softdep_flush(void)
mtx_unlock(&mountlist_mtx);
if (remaining && progress)
continue;
- ACQUIRE_LOCK(&lk);
- if (!req_pending)
+ rw_wlock(&lk);
+ if (req_pending == NULL)
msleep(&req_pending, &lk, PVM, "sdflush", hz);
- req_pending = 0;
- FREE_LOCK(&lk);
+ req_pending = NULL;
+ rw_wunlock(&lk);
}
}
static void
-worklist_speedup(void)
+worklist_speedup(mp)
+ struct mount *mp;
{
rw_assert(&lk, RA_WLOCKED);
if (req_pending == 0) {
- req_pending = 1;
+ req_pending = mp;
wakeup(&req_pending);
}
}
@@ -1468,9 +1406,9 @@ static int
softdep_speedup(void)
{
- worklist_speedup();
+ worklist_speedup(ALLCLEAN);
bd_speedup();
- return speedup_syncer();
+ return (speedup_syncer());
}
/*
@@ -1491,8 +1429,8 @@ add_to_worklist(wk, flags)
{
struct ufsmount *ump;
- rw_assert(&lk, RA_WLOCKED);
ump = VFSTOUFS(wk->wk_mp);
+ LOCK_OWNED(ump);
if (wk->wk_state & ONWORKLIST)
panic("add_to_worklist: %s(0x%X) already on list",
TYPENAME(wk->wk_type), wk->wk_state);
@@ -1508,7 +1446,7 @@ add_to_worklist(wk, flags)
}
ump->softdep_on_worklist += 1;
if (flags & WK_NODELAY)
- worklist_speedup();
+ worklist_speedup(wk->wk_mp);
}
/*
@@ -1544,9 +1482,11 @@ wait_worklist(wk, wmesg)
struct worklist *wk;
char *wmesg;
{
+ struct ufsmount *ump;
+ ump = VFSTOUFS(wk->wk_mp);
wk->wk_state |= IOWAITING;
- msleep(wk, &lk, PVM, wmesg, 0);
+ msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
}
/*
@@ -1568,54 +1508,41 @@ softdep_process_worklist(mp, full)
long starttime;
KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
- /*
- * Record the process identifier of our caller so that we can give
- * this process preferential treatment in request_cleanup below.
- */
+ if (MOUNTEDSOFTDEP(mp) == 0)
+ return (0);
matchcnt = 0;
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
starttime = time_second;
- softdep_process_journal(mp, NULL, full?MNT_WAIT:0);
+ softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
+ check_clear_deps(mp);
while (ump->softdep_on_worklist > 0) {
if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
break;
else
matchcnt += cnt;
- /*
- * If requested, try removing inode or removal dependencies.
- */
- if (req_clear_inodedeps) {
- clear_inodedeps();
- req_clear_inodedeps -= 1;
- wakeup_one(&proc_waiting);
- }
- if (req_clear_remove) {
- clear_remove();
- req_clear_remove -= 1;
- wakeup_one(&proc_waiting);
- }
+ check_clear_deps(mp);
/*
* We do not generally want to stop for buffer space, but if
* we are really being a buffer hog, we will stop and wait.
*/
if (should_yield()) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
kern_yield(PRI_USER);
bwillwrite();
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
/*
* Never allow processing to run for more than one
- * second. Otherwise the other mountpoints may get
- * excessively backlogged.
+ * second. This gives the syncer thread the opportunity
+ * to pause if appropriate.
*/
if (!full && starttime != time_second)
break;
}
if (full == 0)
journal_unsuspend(ump);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (matchcnt);
}
@@ -1630,12 +1557,13 @@ process_removes(vp)
{
struct inodedep *inodedep;
struct dirrem *dirrem;
+ struct ufsmount *ump;
struct mount *mp;
ino_t inum;
- rw_assert(&lk, RA_WLOCKED);
-
mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
inum = VTOI(vp)->i_number;
for (;;) {
top:
@@ -1658,12 +1586,12 @@ top:
if (dirrem == NULL)
return;
remove_from_worklist(&dirrem->dm_list);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
panic("process_removes: suspended filesystem");
handle_workitem_remove(dirrem, 0);
vn_finished_secondary_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
}
}
@@ -1679,13 +1607,14 @@ process_truncates(vp)
{
struct inodedep *inodedep;
struct freeblks *freeblks;
+ struct ufsmount *ump;
struct mount *mp;
ino_t inum;
int cgwait;
- rw_assert(&lk, RA_WLOCKED);
-
mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
inum = VTOI(vp)->i_number;
for (;;) {
if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
@@ -1706,33 +1635,33 @@ process_truncates(vp)
}
/* Freeblks is waiting on a inode write. */
if ((freeblks->fb_state & COMPLETE) == 0) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
ffs_update(vp, 1);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
break;
}
if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
(ALLCOMPLETE | ONWORKLIST)) {
remove_from_worklist(&freeblks->fb_list);
freeblks->fb_state |= INPROGRESS;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (vn_start_secondary_write(NULL, &mp,
V_NOWAIT))
panic("process_truncates: "
"suspended filesystem");
handle_workitem_freeblocks(freeblks, 0);
vn_finished_secondary_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
break;
}
if (freeblks->fb_cgwait)
cgwait++;
}
if (cgwait) {
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
sync_cgs(mp, MNT_WAIT);
ffs_sync_snap(mp, MNT_WAIT);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
continue;
}
if (freeblks == NULL)
@@ -1756,7 +1685,6 @@ process_worklist_item(mp, target, flags)
int matchcnt;
int error;
- rw_assert(&lk, RA_WLOCKED);
KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
/*
* If we are being called because of a process doing a
@@ -1767,6 +1695,7 @@ process_worklist_item(mp, target, flags)
return (-1);
PHOLD(curproc); /* Don't let the stack go away. */
ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
matchcnt = 0;
sentinel.wk_mp = NULL;
sentinel.wk_type = D_SENTINEL;
@@ -1783,7 +1712,7 @@ process_worklist_item(mp, target, flags)
wk);
wk->wk_state |= INPROGRESS;
remove_from_worklist(wk);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
panic("process_worklist_item: suspended filesystem");
switch (wk->wk_type) {
@@ -1816,7 +1745,7 @@ process_worklist_item(mp, target, flags)
/* NOTREACHED */
}
vn_finished_secondary_write(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
if (error == 0) {
if (++matchcnt == target)
break;
@@ -1850,6 +1779,7 @@ softdep_move_dependencies(oldbp, newbp)
struct buf *newbp;
{
struct worklist *wk, *wktail;
+ struct ufsmount *ump;
int dirty;
if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
@@ -1858,7 +1788,8 @@ softdep_move_dependencies(oldbp, newbp)
("softdep_move_dependencies called on non-softdep filesystem"));
dirty = 0;
wktail = NULL;
- ACQUIRE_LOCK(&lk);
+ ump = VFSTOUFS(wk->wk_mp);
+ ACQUIRE_LOCK(ump);
while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
LIST_REMOVE(wk, wk_list);
if (wk->wk_type == D_BMSAFEMAP &&
@@ -1870,7 +1801,7 @@ softdep_move_dependencies(oldbp, newbp)
LIST_INSERT_AFTER(wktail, wk, wk_list);
wktail = wk;
}
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
return (dirty);
}
@@ -1916,15 +1847,15 @@ softdep_waitidle(struct mount *mp)
int i;
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(&lk);
+ ACQUIRE_LOCK(ump);
for (i = 0; i < 10 && ump->softdep_deps; i++) {
ump->softdep_req = 1;
if (ump->softdep_on_worklist)
panic("softdep_waitidle: work added after flush.");
- msleep(&ump->softdep_deps, &lk, PVM, "softdeps", 1);
+ msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM, "softdeps", 1);
}
ump->softdep_req = 0;
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
error = 0;
if (i == 10) {
error = EBUSY;
@@ -2023,12 +1954,14 @@ retry_flush:
/*
* Structure hashing.
*
- * There are three types of structures that can be looked up:
+ * There are four types of structures that can be looked up:
* 1) pagedep structures identified by mount point, inode number,
* and logical block.
* 2) inodedep structures identified by mount point and inode number.
* 3) newblk structures identified by mount point and
* physical block number.
+ * 4) bmsafemap structures identified by mount point and
+ * cylinder group number.
*
* The "pagedep" and "inodedep" dependency structures are hashed
* separately from the file blocks and inodes to which they correspond.
@@ -2040,7 +1973,8 @@ retry_flush:
* their allocdirect or allocindir structure.
*
* The lookup routines optionally create and hash a new instance when
- * an existing entry is not found.
+ * an existing entry is not found. The bmsafemap lookup routine always
+ * allocates a new structure if an existing one is not found.
*/
#define DEPALLOC 0x0001 /* allocate structure if lookup fails */
#define NODELAY 0x0002 /* cannot do background work */
@@ -2048,26 +1982,20 @@ retry_flush:
/*
* Structures and routines associated with pagedep caching.
*/
-LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
-u_long pagedep_hash; /* size of hash table - 1 */
-#define PAGEDEP_HASH(mp, inum, lbn) \
- (&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
- pagedep_hash])
+#define PAGEDEP_HASH(ump, inum, lbn) \
+ (&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
static int
-pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp)
+pagedep_find(pagedephd, ino, lbn, pagedeppp)
struct pagedep_hashhead *pagedephd;
ino_t ino;
ufs_lbn_t lbn;
- struct mount *mp;
- int flags;
struct pagedep **pagedeppp;
{
struct pagedep *pagedep;
LIST_FOREACH(pagedep, pagedephd, pd_hash) {
- if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn &&
- mp == pagedep->pd_list.wk_mp) {
+ if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
*pagedeppp = pagedep;
return (1);
}
@@ -2093,10 +2021,12 @@ pagedep_lookup(mp, bp, ino, lbn, flags,
struct pagedep *pagedep;
struct pagedep_hashhead *pagedephd;
struct worklist *wk;
+ struct ufsmount *ump;
int ret;
int i;
- rw_assert(&lk, RA_WLOCKED);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
if (bp) {
LIST_FOREACH(wk, &bp->b_dep, wk_list) {
if (wk->wk_type == D_PAGEDEP) {
@@ -2105,8 +2035,8 @@ pagedep_lookup(mp, bp, ino, lbn, flags,
}
}
}
- pagedephd = PAGEDEP_HASH(mp, ino, lbn);
- ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+ pagedephd = PAGEDEP_HASH(ump, ino, lbn);
+ ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
if (ret) {
if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
@@ -2114,12 +2044,12 @@ pagedep_lookup(mp, bp, ino, lbn, flags,
}
if ((flags & DEPALLOC) == 0)
return (0);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
pagedep = malloc(sizeof(struct pagedep),
M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
- ACQUIRE_LOCK(&lk);
- ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+ ACQUIRE_LOCK(ump);
+ ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
if (*pagedeppp) {
/*
* This should never happen since we only create pagedeps
@@ -2143,22 +2073,19 @@ pagedep_lookup(mp, bp, ino, lbn, flags,
/*
* Structures and routines associated with inodedep caching.
*/
-LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
-static u_long inodedep_hash; /* size of hash table - 1 */
-#define INODEDEP_HASH(fs, inum) \
- (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
+#define INODEDEP_HASH(ump, inum) \
+ (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
static int
-inodedep_find(inodedephd, fs, inum, inodedeppp)
+inodedep_find(inodedephd, inum, inodedeppp)
struct inodedep_hashhead *inodedephd;
- struct fs *fs;
ino_t inum;
struct inodedep **inodedeppp;
{
struct inodedep *inodedep;
LIST_FOREACH(inodedep, inodedephd, id_hash)
- if (inum == inodedep->id_ino && fs == inodedep->id_fs)
+ if (inum == inodedep->id_ino)
break;
if (inodedep) {
*inodedeppp = inodedep;
@@ -2183,13 +2110,15 @@ inodedep_lookup(mp, inum, flags, inodede
{
struct inodedep *inodedep;
struct inodedep_hashhead *inodedephd;
+ struct ufsmount *ump;
struct fs *fs;
- rw_assert(&lk, RA_WLOCKED);
- fs = VFSTOUFS(mp)->um_fs;
- inodedephd = INODEDEP_HASH(fs, inum);
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ fs = ump->um_fs;
+ inodedephd = INODEDEP_HASH(ump, inum);
- if (inodedep_find(inodedephd, fs, inum, inodedeppp))
+ if (inodedep_find(inodedephd, inum, inodedeppp))
return (1);
if ((flags & DEPALLOC) == 0)
return (0);
@@ -2198,12 +2127,12 @@ inodedep_lookup(mp, inum, flags, inodede
*/
if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0)
request_cleanup(mp, FLUSH_INODES);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
inodedep = malloc(sizeof(struct inodedep),
M_INODEDEP, M_SOFTDEP_FLAGS);
workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
- ACQUIRE_LOCK(&lk);
- if (inodedep_find(inodedephd, fs, inum, inodedeppp)) {
+ ACQUIRE_LOCK(ump);
+ if (inodedep_find(inodedephd, inum, inodedeppp)) {
WORKITEM_FREE(inodedep, D_INODEDEP);
return (1);
}
@@ -2235,15 +2164,12 @@ inodedep_lookup(mp, inum, flags, inodede
/*
* Structures and routines associated with newblk caching.
*/
-LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
-u_long newblk_hash; /* size of hash table - 1 */
-#define NEWBLK_HASH(fs, inum) \
- (&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
+#define NEWBLK_HASH(ump, inum) \
+ (&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
static int
-newblk_find(newblkhd, mp, newblkno, flags, newblkpp)
+newblk_find(newblkhd, newblkno, flags, newblkpp)
struct newblk_hashhead *newblkhd;
- struct mount *mp;
ufs2_daddr_t newblkno;
int flags;
struct newblk **newblkpp;
@@ -2253,8 +2179,6 @@ newblk_find(newblkhd, mp, newblkno, flag
LIST_FOREACH(newblk, newblkhd, nb_hash) {
if (newblkno != newblk->nb_newblkno)
continue;
- if (mp != newblk->nb_list.wk_mp)
- continue;
/*
* If we're creating a new dependency don't match those that
* have already been converted to allocdirects. This is for
@@ -2286,18 +2210,21 @@ newblk_lookup(mp, newblkno, flags, newbl
{
struct newblk *newblk;
struct newblk_hashhead *newblkhd;
+ struct ufsmount *ump;
- newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno);
- if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp))
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ newblkhd = NEWBLK_HASH(ump, newblkno);
+ if (newblk_find(newblkhd, newblkno, flags, newblkpp))
return (1);
if ((flags & DEPALLOC) == 0)
return (0);
- FREE_LOCK(&lk);
+ FREE_LOCK(ump);
newblk = malloc(sizeof(union allblk), M_NEWBLK,
M_SOFTDEP_FLAGS | M_ZERO);
workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
- ACQUIRE_LOCK(&lk);
- if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) {
+ ACQUIRE_LOCK(ump);
+ if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
WORKITEM_FREE(newblk, D_NEWBLK);
return (1);
}
@@ -2315,10 +2242,8 @@ newblk_lookup(mp, newblkno, flags, newbl
/*
* Structures and routines associated with freed indirect block caching.
*/
-struct freeworklst *indir_hashtbl;
-u_long indir_hash; /* size of hash table - 1 */
-#define INDIR_HASH(mp, blkno) \
- (&indir_hashtbl[((((register_t)(mp)) >> 13) + (blkno)) & indir_hash])
+#define INDIR_HASH(ump, blkno) \
+ (&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
/*
* Lookup an indirect block in the indir hash table. The freework is
@@ -2331,14 +2256,14 @@ indirblk_lookup(mp, blkno)
ufs2_daddr_t blkno;
{
struct freework *freework;
- struct freeworklst *wkhd;
+ struct indir_hashhead *wkhd;
+ struct ufsmount *ump;
- wkhd = INDIR_HASH(mp, blkno);
+ ump = VFSTOUFS(mp);
+ wkhd = INDIR_HASH(ump, blkno);
TAILQ_FOREACH(freework, wkhd, fw_next) {
if (freework->fw_blkno != blkno)
continue;
- if (freework->fw_list.wk_mp != mp)
- continue;
indirblk_remove(freework);
return (1);
}
@@ -2356,15 +2281,17 @@ indirblk_insert(freework)
{
struct jblocks *jblocks;
struct jseg *jseg;
+ struct ufsmount *ump;
- jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks;
+ ump = VFSTOUFS(freework->fw_list.wk_mp);
+ jblocks = ump->softdep_jblocks;
jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
if (jseg == NULL)
return;
LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
- TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp,
- freework->fw_blkno), freework, fw_next);
+ TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
+ fw_next);
freework->fw_state &= ~DEPCOMPLETE;
}
@@ -2372,10 +2299,11 @@ static void
indirblk_remove(freework)
struct freework *freework;
{
+ struct ufsmount *ump;
+ ump = VFSTOUFS(freework->fw_list.wk_mp);
LIST_REMOVE(freework, fw_segs);
- TAILQ_REMOVE(INDIR_HASH(freework->fw_list.wk_mp,
- freework->fw_blkno), freework, fw_next);
+ TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
freework->fw_state |= DEPCOMPLETE;
if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
WORKITEM_FREE(freework, D_FREEWORK);
@@ -2388,20 +2316,8 @@ indirblk_remove(freework)
void
softdep_initialize()
{
- int i;
- LIST_INIT(&mkdirlisthd);
max_softdeps = desiredvnodes * 4;
- pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash);
- inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
- newblk_hashtbl = hashinit(max_softdeps / 2, M_NEWBLK, &newblk_hash);
- bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash);
- i = 1 << (ffs(desiredvnodes / 10) - 1);
- indir_hashtbl = malloc(i * sizeof(indir_hashtbl[0]), M_FREEWORK,
- M_WAITOK);
- indir_hash = i - 1;
- for (i = 0; i <= indir_hash; i++)
- TAILQ_INIT(&indir_hashtbl[i]);
/* initialise bioops hack */
bioops.io_start = softdep_disk_io_initiation;
@@ -2421,12 +2337,13 @@ void
softdep_uninitialize()
{
+ /* clear bioops hack */
+ bioops.io_start = NULL;
+ bioops.io_complete = NULL;
+ bioops.io_deallocate = NULL;
+ bioops.io_countdeps = NULL;
+
callout_drain(&softdep_callout);
- hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash);
- hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash);
- hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash);
- hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash);
- free(indir_hashtbl, M_FREEWORK);
}
/*
@@ -2441,19 +2358,24 @@ softdep_mount(devvp, mp, fs, cred)
struct ucred *cred;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list