svn commit: r269533 - in head/sys: kern ufs/ffs
Kirk McKusick
mckusick at FreeBSD.org
Mon Aug 4 22:03:59 UTC 2014
Author: mckusick
Date: Mon Aug 4 22:03:58 2014
New Revision: 269533
URL: http://svnweb.freebsd.org/changeset/base/269533
Log:
Add support for multi-threading of soft updates.
Replace a single soft updates thread with a thread per FFS-filesystem
mount point. The threads are associated with the bufdaemon process.
Reviewed by: kib
Tested by: Peter Holm and Scott Long
MFC after: 2 weeks
Sponsored by: Netflix
Modified:
head/sys/kern/vfs_bio.c
head/sys/ufs/ffs/ffs_softdep.c
head/sys/ufs/ffs/softdep.h
Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c Mon Aug 4 21:41:01 2014 (r269532)
+++ head/sys/kern/vfs_bio.c Mon Aug 4 22:03:58 2014 (r269533)
@@ -98,7 +98,8 @@ struct buf_ops buf_ops_bio = {
struct buf *buf; /* buffer header pool */
caddr_t unmapped_buf;
-static struct proc *bufdaemonproc;
+/* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */
+struct proc *bufdaemonproc;
static int inmem(struct vnode *vp, daddr_t blkno);
static void vm_hold_free_pages(struct buf *bp, int newbsize);
Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c Mon Aug 4 21:41:01 2014 (r269532)
+++ head/sys/ufs/ffs/ffs_softdep.c Mon Aug 4 22:03:58 2014 (r269533)
@@ -908,9 +908,9 @@ static void add_to_worklist(struct workl
static void wake_worklist(struct worklist *);
static void wait_worklist(struct worklist *, char *);
static void remove_from_worklist(struct worklist *);
-static void softdep_flush(void);
+static void softdep_flush(void *);
static void softdep_flushjournal(struct mount *);
-static int softdep_speedup(void);
+static int softdep_speedup(struct ufsmount *);
static void worklist_speedup(struct mount *);
static int journal_mount(struct mount *, struct fs *, struct ucred *);
static void journal_unmount(struct ufsmount *);
@@ -962,18 +962,21 @@ static int softdep_count_dependencies(st
/*
* Global lock over all of soft updates.
*/
-static struct rwlock lk;
-RW_SYSINIT(softdep_lock, &lk, "Softdep Lock");
+static struct mtx lk;
+MTX_SYSINIT(softdep_lock, &lk, "Global Softdep Lock", MTX_DEF);
+
+#define ACQUIRE_GBLLOCK(lk) mtx_lock(lk)
+#define FREE_GBLLOCK(lk) mtx_unlock(lk)
+#define GBLLOCK_OWNED(lk) mtx_assert((lk), MA_OWNED)
/*
- * Allow per-filesystem soft-updates locking.
- * For now all use the same global lock defined above.
+ * Per-filesystem soft-updates locking.
*/
-#define LOCK_PTR(ump) ((ump)->um_softdep->sd_fslock)
-#define TRY_ACQUIRE_LOCK(ump) rw_try_wlock((ump)->um_softdep->sd_fslock)
-#define ACQUIRE_LOCK(ump) rw_wlock((ump)->um_softdep->sd_fslock)
-#define FREE_LOCK(ump) rw_wunlock((ump)->um_softdep->sd_fslock)
-#define LOCK_OWNED(ump) rw_assert((ump)->um_softdep->sd_fslock, \
+#define LOCK_PTR(ump) (&(ump)->um_softdep->sd_fslock)
+#define TRY_ACQUIRE_LOCK(ump) rw_try_wlock(&(ump)->um_softdep->sd_fslock)
+#define ACQUIRE_LOCK(ump) rw_wlock(&(ump)->um_softdep->sd_fslock)
+#define FREE_LOCK(ump) rw_wunlock(&(ump)->um_softdep->sd_fslock)
+#define LOCK_OWNED(ump) rw_assert(&(ump)->um_softdep->sd_fslock, \
RA_WLOCKED)
#define BUF_AREC(bp) lockallowrecurse(&(bp)->b_lock)
@@ -1178,7 +1181,7 @@ workitem_free(item, type)
KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
("workitem_free: %s: softdep_curdeps[%s] going negative",
ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
- dep_current[item->wk_type]--;
+ atomic_subtract_long(&dep_current[item->wk_type], 1);
ump->softdep_curdeps[item->wk_type] -= 1;
free(item, DtoM(type));
}
@@ -1196,11 +1199,13 @@ workitem_alloc(item, type, mp)
item->wk_state = 0;
ump = VFSTOUFS(mp);
- ACQUIRE_LOCK(ump);
+ ACQUIRE_GBLLOCK(&lk);
dep_current[type]++;
if (dep_current[type] > dep_highuse[type])
dep_highuse[type] = dep_current[type];
dep_total[type]++;
+ FREE_GBLLOCK(&lk);
+ ACQUIRE_LOCK(ump);
ump->softdep_curdeps[type] += 1;
ump->softdep_deps++;
ump->softdep_accdeps++;
@@ -1224,11 +1229,13 @@ workitem_reassign(item, newtype)
KASSERT(dep_current[item->wk_type] > 0,
("workitem_reassign: %s: dep_current[%s] going negative",
VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
- dep_current[item->wk_type]--;
+ ACQUIRE_GBLLOCK(&lk);
dep_current[newtype]++;
+ dep_current[item->wk_type]--;
if (dep_current[newtype] > dep_highuse[newtype])
dep_highuse[newtype] = dep_current[newtype];
dep_total[newtype]++;
+ FREE_GBLLOCK(&lk);
item->wk_type = newtype;
}
@@ -1236,13 +1243,10 @@ workitem_reassign(item, newtype)
* Workitem queue management
*/
static int max_softdeps; /* maximum number of structs before slowdown */
-static int maxindirdeps = 50; /* max number of indirdeps before slowdown */
static int tickdelay = 2; /* number of ticks to pause during slowdown */
static int proc_waiting; /* tracks whether we have a timeout posted */
static int *stat_countp; /* statistic to count in proc_waiting timeout */
static struct callout softdep_callout;
-static struct mount *req_pending;
-#define ALLCLEAN ((struct mount *)-1)
static int req_clear_inodedeps; /* syncer process flush some inodedeps */
static int req_clear_remove; /* syncer process flush some freeblks */
static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1250,7 +1254,7 @@ static int softdep_flushcache = 0; /* Sh
/*
* runtime statistics
*/
-static int stat_softdep_mounts; /* number of softdep mounted filesystems */
+static int stat_flush_threads; /* number of softdep flushing threads */
static int stat_worklist_push; /* number of worklist cleanups */
static int stat_blk_limit_push; /* number of times block limit neared */
static int stat_ino_limit_push; /* number of times inode limit neared */
@@ -1281,10 +1285,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, max
&max_softdeps, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW,
&tickdelay, 0, "");
-SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW,
- &maxindirdeps, 0, "");
-SYSCTL_INT(_debug_softdep, OID_AUTO, softdep_mounts, CTLFLAG_RD,
- &stat_softdep_mounts, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, flush_threads, CTLFLAG_RD,
+ &stat_flush_threads, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
&stat_worklist_push, 0,"");
SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
@@ -1344,53 +1346,67 @@ SYSCTL_DECL(_vfs_ffs);
static int compute_summary_at_mount = 0;
SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
&compute_summary_at_mount, 0, "Recompute summary at mount");
-static struct proc *softdepproc;
-static struct kproc_desc softdep_kp = {
- "softdepflush",
- softdep_flush,
- &softdepproc
-};
-SYSINIT(sdproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
- &softdep_kp);
-
+static int print_threads = 0;
+SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW,
+ &print_threads, 0, "Notify flusher thread start/stop");
+
+/* List of all filesystems mounted with soft updates */
+static TAILQ_HEAD(, mount_softdeps) softdepmounts;
+
+/*
+ * This function cleans the worklist for a filesystem.
+ * Each filesystem running with soft dependencies gets its own
+ * thread to run in this function. The thread is started up in
+ * softdep_mount and shutdown in softdep_unmount. They show up
+ * as part of the kernel "bufdaemon" process whose process
+ * entry is available in bufdaemonproc.
+ */
+static int searchfailed;
+extern struct proc *bufdaemonproc;
static void
-softdep_flush(void)
+softdep_flush(addr)
+ void *addr;
{
- struct mount *nmp;
struct mount *mp;
- struct ufsmount *ump;
struct thread *td;
- int remaining;
- int progress;
+ struct ufsmount *ump;
td = curthread;
td->td_pflags |= TDP_NORUNNINGBUF;
-
+ mp = (struct mount *)addr;
+ ump = VFSTOUFS(mp);
+ atomic_add_int(&stat_flush_threads, 1);
+ if (print_threads) {
+ if (stat_flush_threads == 1)
+ printf("Running %s at pid %d\n", bufdaemonproc->p_comm,
+ bufdaemonproc->p_pid);
+ printf("Start thread %s\n", td->td_name);
+ }
for (;;) {
- kproc_suspend_check(softdepproc);
- remaining = progress = 0;
- mtx_lock(&mountlist_mtx);
- for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
- nmp = TAILQ_NEXT(mp, mnt_list);
- if (MOUNTEDSOFTDEP(mp) == 0)
- continue;
- if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
- continue;
- ump = VFSTOUFS(mp);
- progress += softdep_process_worklist(mp, 0);
- remaining += ump->softdep_on_worklist;
- mtx_lock(&mountlist_mtx);
- nmp = TAILQ_NEXT(mp, mnt_list);
- vfs_unbusy(mp);
- }
- mtx_unlock(&mountlist_mtx);
- if (remaining && progress)
+ while (softdep_process_worklist(mp, 0) > 0 ||
+ (MOUNTEDSUJ(mp) &&
+ VFSTOUFS(mp)->softdep_jblocks->jb_suspended))
+ kthread_suspend_check();
+ ACQUIRE_LOCK(ump);
+ if ((ump->softdep_flags & FLUSH_CLEANUP) == 0)
+ msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM,
+ "sdflush", hz / 2);
+ ump->softdep_flags &= ~FLUSH_CLEANUP;
+ /*
+ * Check to see if we are done and need to exit.
+ */
+ if ((ump->softdep_flags & FLUSH_EXIT) == 0) {
+ FREE_LOCK(ump);
continue;
- rw_wlock(&lk);
- if (req_pending == NULL)
- msleep(&req_pending, &lk, PVM, "sdflush", hz);
- req_pending = NULL;
- rw_wunlock(&lk);
+ }
+ ump->softdep_flags &= ~FLUSH_EXIT;
+ FREE_LOCK(ump);
+ wakeup(&ump->softdep_flags);
+ if (print_threads)
+ printf("Stop thread %s: searchfailed %d, did cleanups %d\n", td->td_name, searchfailed, ump->um_softdep->sd_cleanups);
+ atomic_subtract_int(&stat_flush_threads, 1);
+ kthread_exit();
+ panic("kthread_exit failed\n");
}
}
@@ -1398,19 +1414,70 @@ static void
worklist_speedup(mp)
struct mount *mp;
{
- rw_assert(&lk, RA_WLOCKED);
- if (req_pending == 0) {
- req_pending = mp;
- wakeup(&req_pending);
+ struct ufsmount *ump;
+
+ ump = VFSTOUFS(mp);
+ LOCK_OWNED(ump);
+ if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0) {
+ ump->softdep_flags |= FLUSH_CLEANUP;
+ if (ump->softdep_flushtd->td_wchan == &ump->softdep_flushtd)
+ wakeup(&ump->softdep_flushtd);
}
}
static int
-softdep_speedup(void)
+softdep_speedup(ump)
+ struct ufsmount *ump;
{
+ struct ufsmount *altump;
+ struct mount_softdeps *sdp;
- worklist_speedup(ALLCLEAN);
+ LOCK_OWNED(ump);
+ worklist_speedup(ump->um_mountp);
bd_speedup();
+ /*
+ * If we have global shortages, then we need other
+ * filesystems to help with the cleanup. Here we wakeup a
+ * flusher thread for a filesystem that is over its fair
+ * share of resources.
+ */
+ if (req_clear_inodedeps || req_clear_remove) {
+ ACQUIRE_GBLLOCK(&lk);
+ TAILQ_FOREACH(sdp, &softdepmounts, sd_next) {
+ if ((altump = sdp->sd_ump) == ump)
+ continue;
+ if (((req_clear_inodedeps &&
+ altump->softdep_curdeps[D_INODEDEP] >
+ max_softdeps / stat_flush_threads) ||
+ (req_clear_remove &&
+ altump->softdep_curdeps[D_DIRREM] >
+ (max_softdeps / 2) / stat_flush_threads)) &&
+ TRY_ACQUIRE_LOCK(altump))
+ break;
+ }
+ if (sdp == NULL) {
+ searchfailed++;
+ FREE_GBLLOCK(&lk);
+ } else {
+ /*
+ * Move to the end of the list so we pick a
+ * different one on out next try.
+ */
+ TAILQ_REMOVE(&softdepmounts, sdp, sd_next);
+ TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+ FREE_GBLLOCK(&lk);
+ if ((altump->softdep_flags &
+ (FLUSH_CLEANUP | FLUSH_EXIT)) == 0) {
+ altump->softdep_flags |= FLUSH_CLEANUP;
+ altump->um_softdep->sd_cleanups++;
+ if (altump->softdep_flushtd->td_wchan ==
+ &altump->softdep_flushtd) {
+ wakeup(&altump->softdep_flushtd);
+ }
+ }
+ FREE_LOCK(altump);
+ }
+ }
return (speedup_syncer());
}
@@ -2126,9 +2193,14 @@ inodedep_lookup(mp, inum, flags, inodede
if ((flags & DEPALLOC) == 0)
return (0);
/*
- * If we are over our limit, try to improve the situation.
- */
- if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0)
+ * If the system is over its limit and our filesystem is
+ * responsible for more than our share of that usage and
+ * we are not in a rush, request some inodedep cleanup.
+ */
+ while (dep_current[D_INODEDEP] > max_softdeps &&
+ (flags & NODELAY) == 0 &&
+ ump->softdep_curdeps[D_INODEDEP] >
+ max_softdeps / stat_flush_threads)
request_cleanup(mp, FLUSH_INODES);
FREE_LOCK(ump);
inodedep = malloc(sizeof(struct inodedep),
@@ -2320,6 +2392,7 @@ void
softdep_initialize()
{
+ TAILQ_INIT(&softdepmounts);
max_softdeps = desiredvnodes * 4;
/* initialise bioops hack */
@@ -2378,7 +2451,9 @@ softdep_mount(devvp, mp, fs, cred)
ump = VFSTOUFS(mp);
ump->um_softdep = sdp;
MNT_IUNLOCK(mp);
- LOCK_PTR(ump) = &lk;
+ rw_init(LOCK_PTR(ump), "Per-Filesystem Softdep Lock");
+ TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
+ sdp->sd_ump = ump;
LIST_INIT(&ump->softdep_workitem_pending);
LIST_INIT(&ump->softdep_journal_pending);
TAILQ_INIT(&ump->softdep_unlinked);
@@ -2409,7 +2484,12 @@ softdep_mount(devvp, mp, fs, cred)
softdep_unmount(mp);
return (error);
}
- atomic_add_int(&stat_softdep_mounts, 1);
+ /*
+ * Start our flushing thread in the bufdaemon process.
+ */
+ kproc_kthread_add(&softdep_flush, mp, &bufdaemonproc,
+ &ump->softdep_flushtd, 0, 0, "softdepflush", "%s worker",
+ mp->mnt_stat.f_mntonname);
/*
* When doing soft updates, the counters in the
* superblock may have gotten out of sync. Recomputation
@@ -2465,7 +2545,24 @@ softdep_unmount(mp)
MNT_IUNLOCK(mp);
journal_unmount(ump);
}
- atomic_subtract_int(&stat_softdep_mounts, 1);
+ /*
+ * Shut down our flushing thread. Check for NULL is if
+ * softdep_mount errors out before the thread has been created.
+ */
+ if (ump->softdep_flushtd != NULL) {
+ ACQUIRE_LOCK(ump);
+ ump->softdep_flags |= FLUSH_EXIT;
+ wakeup(&ump->softdep_flushtd);
+ msleep(&ump->softdep_flags, LOCK_PTR(ump), PVM | PDROP,
+ "sdwait", 0);
+ KASSERT((ump->softdep_flags & FLUSH_EXIT) == 0,
+ ("Thread shutdown failed"));
+ }
+ /*
+ * Free up our resources.
+ */
+ rw_destroy(LOCK_PTR(ump));
+ TAILQ_REMOVE(&softdepmounts, ump->um_softdep, sd_next);
hashdestroy(ump->pagedep_hashtbl, M_PAGEDEP, ump->pagedep_hash_size);
hashdestroy(ump->inodedep_hashtbl, M_INODEDEP, ump->inodedep_hash_size);
hashdestroy(ump->newblk_hashtbl, M_NEWBLK, ump->newblk_hash_size);
@@ -2788,7 +2885,7 @@ journal_space(ump, thresh)
*/
limit = (max_softdeps / 10) * 9;
if (dep_current[D_INODEDEP] > limit &&
- ump->softdep_curdeps[D_INODEDEP] > limit / stat_softdep_mounts)
+ ump->softdep_curdeps[D_INODEDEP] > limit / stat_flush_threads)
return (0);
if (thresh)
thresh = jblocks->jb_min;
@@ -2813,7 +2910,7 @@ journal_suspend(ump)
if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
stat_journal_min++;
mp->mnt_kern_flag |= MNTK_SUSPEND;
- mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc);
+ mp->mnt_susp_owner = ump->softdep_flushtd;
}
jblocks->jb_suspended = 1;
MNT_IUNLOCK(mp);
@@ -2888,7 +2985,7 @@ softdep_prealloc(vp, waitok)
process_removes(vp);
process_truncates(vp);
if (journal_space(ump, 0) == 0) {
- softdep_speedup();
+ softdep_speedup(ump);
if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
@@ -2932,10 +3029,10 @@ softdep_prelink(dvp, vp)
}
process_removes(dvp);
process_truncates(dvp);
- softdep_speedup();
+ softdep_speedup(ump);
process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
if (journal_space(ump, 0) == 0) {
- softdep_speedup();
+ softdep_speedup(ump);
if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
@@ -3257,7 +3354,7 @@ softdep_process_journal(mp, needwk, flag
if (flags != MNT_WAIT)
break;
printf("softdep: Out of journal space!\n");
- softdep_speedup();
+ softdep_speedup(ump);
msleep(jblocks, LOCK_PTR(ump), PRIBIO, "jblocks", hz);
}
FREE_LOCK(ump);
@@ -3970,7 +4067,7 @@ free_freedep(freedep)
/*
* Allocate a new freework structure that may be a level in an indirect
* when parent is not NULL or a top level block when it is. The top level
- * freework structures are allocated without the soft updates lock held
+ * freework structures are allocated without the per-filesystem lock held
* and before the freeblks is visible outside of softdep_setup_freeblocks().
*/
static struct freework *
@@ -4039,7 +4136,7 @@ cancel_jfreeblk(freeblks, blkno)
/*
* Allocate a new jfreeblk to journal top level block pointer when truncating
- * a file. The caller must add this to the worklist when the soft updates
+ * a file. The caller must add this to the worklist when the per-filesystem
* lock is held.
*/
static struct jfreeblk *
@@ -7419,7 +7516,7 @@ softdep_freefile(pvp, ino, mode)
clear_unlinked_inodedep(inodedep);
/*
* Re-acquire inodedep as we've dropped the
- * soft updates lock in clear_unlinked_inodedep().
+ * per-filesystem lock in clear_unlinked_inodedep().
*/
inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
}
@@ -7965,10 +8062,8 @@ indir_trunc(freework, dbn, lbn)
* If we're goingaway, free the indirdep. Otherwise it will
* linger until the write completes.
*/
- if (goingaway) {
+ if (goingaway)
free_indirdep(indirdep);
- ump->softdep_numindirdeps -= 1;
- }
}
FREE_LOCK(ump);
/* Initialize pointers depending on block size. */
@@ -8140,7 +8235,7 @@ cancel_allocindir(aip, bp, freeblks, tru
* Create the mkdir dependencies for . and .. in a new directory. Link them
* in to a newdirblk so any subsequent additions are tracked properly. The
* caller is responsible for adding the mkdir1 dependency to the journal
- * and updating id_mkdiradd. This function returns with the soft updates
+ * and updating id_mkdiradd. This function returns with the per-filesystem
* lock held.
*/
static struct mkdir *
@@ -8958,12 +9053,16 @@ newdirrem(bp, dp, ip, isrmdir, prevdirre
panic("newdirrem: whiteout");
dvp = ITOV(dp);
/*
- * If we are over our limit, try to improve the situation.
+ * If the system is over its limit and our filesystem is
+ * responsible for more than our share of that usage and
+ * we are not a snapshot, request some inodedep cleanup.
* Limiting the number of dirrem structures will also limit
* the number of freefile and freeblks structures.
*/
ACQUIRE_LOCK(ip->i_ump);
- if (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2)
+ while (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2 &&
+ ip->i_ump->softdep_curdeps[D_DIRREM] >
+ (max_softdeps / 2) / stat_flush_threads)
(void) request_cleanup(ITOV(dp)->v_mount, FLUSH_BLOCKS);
FREE_LOCK(ip->i_ump);
dirrem = malloc(sizeof(struct dirrem),
@@ -9914,7 +10013,7 @@ initiate_write_filepage(pagedep, bp)
* Wait for all journal remove dependencies to hit the disk.
* We can not allow any potentially conflicting directory adds
* to be visible before removes and rollback is too difficult.
- * The soft updates lock may be dropped and re-acquired, however
+ * The per-filesystem lock may be dropped and re-acquired, however
* we hold the buf locked so the dependency can not go away.
*/
LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
@@ -10378,7 +10477,6 @@ cancel_indirdep(indirdep, bp, freeblks)
LIST_REMOVE(indirdep, ir_next);
}
indirdep->ir_state |= GOINGAWAY;
- VFSTOUFS(indirdep->ir_list.wk_mp)->softdep_numindirdeps += 1;
/*
* Pass in bp for blocks still have journal writes
* pending so we can cancel them on their own.
@@ -10805,7 +10903,7 @@ softdep_disk_write_complete(bp)
ACQUIRE_LOCK(ump);
while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
WORKLIST_REMOVE(wk);
- dep_write[wk->wk_type]++;
+ atomic_add_long(&dep_write[wk->wk_type], 1);
if (wk == owk)
panic("duplicate worklist: %p\n", wk);
owk = wk;
@@ -11488,7 +11586,7 @@ diradd_inode_written(dap, inodedep)
/*
* Returns true if the bmsafemap will have rollbacks when written. Must only
- * be called with the soft updates lock and the buf lock on the cg held.
+ * be called with the per-filesystem lock and the buf lock on the cg held.
*/
static int
bmsafemap_backgroundwrite(bmsafemap, bp)
@@ -12912,18 +13010,42 @@ softdep_slowdown(vp)
if (journal_space(ump, 0) == 0)
jlow = 1;
}
+ /*
+ * If the system is under its limits and our filesystem is
+ * not responsible for more than our share of the usage and
+ * we are not low on journal space, then no need to slow down.
+ */
max_softdeps_hard = max_softdeps * 11 / 10;
if (dep_current[D_DIRREM] < max_softdeps_hard / 2 &&
dep_current[D_INODEDEP] < max_softdeps_hard &&
- VFSTOUFS(vp->v_mount)->softdep_numindirdeps < maxindirdeps &&
- dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0) {
+ dep_current[D_INDIRDEP] < max_softdeps_hard / 1000 &&
+ dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0 &&
+ ump->softdep_curdeps[D_DIRREM] <
+ (max_softdeps_hard / 2) / stat_flush_threads &&
+ ump->softdep_curdeps[D_INODEDEP] <
+ max_softdeps_hard / stat_flush_threads &&
+ ump->softdep_curdeps[D_INDIRDEP] <
+ (max_softdeps_hard / 1000) / stat_flush_threads &&
+ ump->softdep_curdeps[D_FREEBLKS] <
+ max_softdeps_hard / stat_flush_threads) {
FREE_LOCK(ump);
return (0);
}
- if (VFSTOUFS(vp->v_mount)->softdep_numindirdeps >= maxindirdeps || jlow)
- softdep_speedup();
+ /*
+ * If the journal is low or our filesystem is over its limit
+ * then speedup the cleanup.
+ */
+ if (ump->softdep_curdeps[D_INDIRDEP] <
+ (max_softdeps_hard / 1000) / stat_flush_threads || jlow)
+ softdep_speedup(ump);
stat_sync_limit_hit += 1;
FREE_LOCK(ump);
+ /*
+ * We only slow down the rate at which new dependencies are
+ * generated if we are not using journaling. With journaling,
+ * the cleanup should always be sufficient to keep things
+ * under control.
+ */
if (DOINGSUJ(vp))
return (0);
return (1);
@@ -12981,13 +13103,12 @@ softdep_request_cleanup(fs, vp, cred, re
return (0);
}
/*
- * If we are in need of resources, consider pausing for
- * tickdelay to give ourselves some breathing room.
+ * If we are in need of resources, start by cleaning up
+ * any block removals associated with our inode.
*/
ACQUIRE_LOCK(ump);
process_removes(vp);
process_truncates(vp);
- request_cleanup(UFSTOVFS(ump), resource);
FREE_LOCK(ump);
/*
* Now clean up at least as many resources as we will need.
@@ -13120,7 +13241,7 @@ request_cleanup(mp, resource)
* Next, we attempt to speed up the syncer process. If that
* is successful, then we allow the process to continue.
*/
- if (softdep_speedup() &&
+ if (softdep_speedup(ump) &&
resource != FLUSH_BLOCKS_WAIT &&
resource != FLUSH_INODES_WAIT)
return(0);
@@ -13138,15 +13259,19 @@ request_cleanup(mp, resource)
case FLUSH_INODES:
case FLUSH_INODES_WAIT:
+ ACQUIRE_GBLLOCK(&lk);
stat_ino_limit_push += 1;
req_clear_inodedeps += 1;
+ FREE_GBLLOCK(&lk);
stat_countp = &stat_ino_limit_hit;
break;
case FLUSH_BLOCKS:
case FLUSH_BLOCKS_WAIT:
+ ACQUIRE_GBLLOCK(&lk);
stat_blk_limit_push += 1;
req_clear_remove += 1;
+ FREE_GBLLOCK(&lk);
stat_countp = &stat_blk_limit_hit;
break;
@@ -13157,6 +13282,8 @@ request_cleanup(mp, resource)
* Hopefully the syncer daemon will catch up and awaken us.
* We wait at most tickdelay before proceeding in any case.
*/
+ ACQUIRE_GBLLOCK(&lk);
+ FREE_LOCK(ump);
proc_waiting += 1;
if (callout_pending(&softdep_callout) == FALSE)
callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
@@ -13164,6 +13291,8 @@ request_cleanup(mp, resource)
msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
proc_waiting -= 1;
+ FREE_GBLLOCK(&lk);
+ ACQUIRE_LOCK(ump);
return (1);
}
@@ -13177,16 +13306,13 @@ pause_timer(arg)
void *arg;
{
- rw_assert(&lk, RA_WLOCKED);
+ GBLLOCK_OWNED(&lk);
/*
* The callout_ API has acquired mtx and will hold it around this
* function call.
*/
- *stat_countp += 1;
- wakeup_one(&proc_waiting);
- if (proc_waiting > 0)
- callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
- pause_timer, 0);
+ *stat_countp += proc_waiting;
+ wakeup(&proc_waiting);
}
/*
@@ -13197,7 +13323,6 @@ check_clear_deps(mp)
struct mount *mp;
{
- rw_assert(&lk, RA_WLOCKED);
/*
* If we are suspended, it may be because of our using
* too many inodedeps, so help clear them out.
@@ -13207,16 +13332,22 @@ check_clear_deps(mp)
/*
* General requests for cleanup of backed up dependencies
*/
+ ACQUIRE_GBLLOCK(&lk);
if (req_clear_inodedeps) {
req_clear_inodedeps -= 1;
+ FREE_GBLLOCK(&lk);
clear_inodedeps(mp);
- wakeup_one(&proc_waiting);
+ ACQUIRE_GBLLOCK(&lk);
+ wakeup(&proc_waiting);
}
if (req_clear_remove) {
req_clear_remove -= 1;
+ FREE_GBLLOCK(&lk);
clear_remove(mp);
- wakeup_one(&proc_waiting);
+ ACQUIRE_GBLLOCK(&lk);
+ wakeup(&proc_waiting);
}
+ FREE_GBLLOCK(&lk);
}
/*
Modified: head/sys/ufs/ffs/softdep.h
==============================================================================
--- head/sys/ufs/ffs/softdep.h Mon Aug 4 21:41:01 2014 (r269532)
+++ head/sys/ufs/ffs/softdep.h Mon Aug 4 22:03:58 2014 (r269533)
@@ -1025,7 +1025,7 @@ TAILQ_HEAD(indir_hashhead, freework);
* Allocated at mount and freed at unmount.
*/
struct mount_softdeps {
- struct rwlock *sd_fslock; /* softdep lock */
+ struct rwlock sd_fslock; /* softdep lock */
struct workhead sd_workitem_pending; /* softdep work queue */
struct worklist *sd_worklist_tail; /* Tail pointer for above */
struct workhead sd_journal_pending; /* journal work queue */
@@ -1046,15 +1046,24 @@ struct mount_softdeps {
u_long sd_bmhashsize; /* bmsafemap hash table size-1*/
struct indir_hashhead *sd_indirhash; /* indir hash table */
u_long sd_indirhashsize; /* indir hash table size-1 */
- long sd_numindirdeps; /* outstanding indirdeps */
int sd_on_journal; /* Items on the journal list */
int sd_on_worklist; /* Items on the worklist */
int sd_deps; /* Total dependency count */
int sd_accdeps; /* accumulated dep count */
int sd_req; /* Wakeup when deps hits 0. */
+ int sd_flags; /* comm with flushing thread */
+ int sd_cleanups; /* Calls to cleanup */
+ struct thread *sd_flushtd; /* thread handling flushing */
+ TAILQ_ENTRY(mount_softdeps) sd_next; /* List of softdep filesystem */
+ struct ufsmount *sd_ump; /* our ufsmount structure */
u_long sd_curdeps[D_LAST + 1]; /* count of current deps */
};
/*
+ * Flags for communicating with the syncer thread.
+ */
+#define FLUSH_EXIT 0x0001 /* time to exit */
+#define FLUSH_CLEANUP 0x0002 /* need to clear out softdep structures */
+/*
* Keep the old names from when these were in the ufsmount structure.
*/
#define softdep_workitem_pending um_softdep->sd_workitem_pending
@@ -1077,10 +1086,11 @@ struct mount_softdeps {
#define bmsafemap_hash_size um_softdep->sd_bmhashsize
#define indir_hashtbl um_softdep->sd_indirhash
#define indir_hash_size um_softdep->sd_indirhashsize
-#define softdep_numindirdeps um_softdep->sd_numindirdeps
#define softdep_on_journal um_softdep->sd_on_journal
#define softdep_on_worklist um_softdep->sd_on_worklist
#define softdep_deps um_softdep->sd_deps
#define softdep_accdeps um_softdep->sd_accdeps
#define softdep_req um_softdep->sd_req
+#define softdep_flags um_softdep->sd_flags
+#define softdep_flushtd um_softdep->sd_flushtd
#define softdep_curdeps um_softdep->sd_curdeps
More information about the svn-src-all
mailing list