svn commit: r354406 - stable/11/sys/kern

Alexander Motin mav at FreeBSD.org
Wed Nov 6 18:15:20 UTC 2019


Author: mav
Date: Wed Nov  6 18:15:20 2019
New Revision: 354406
URL: https://svnweb.freebsd.org/changeset/base/354406

Log:
  MFC r354241: Some more taskqueue optimizations.
  
   - Optimize enqueue for two task priority values by adding new tq_hint
  field, pointing to the last task inserted into the middle of the list.
  In case of more then two priority values it should halve average search.
   - Move tq_active insert/remove out of the taskqueue_run_locked loop.
  Instead of dirtying few shared cache lines per task introduce different
  mechanism to drain active tasks, based on task sequence number counter,
  that uses only cache lines already present in cache.  Since the new
  mechanism does not need ordering, switch tq_active from TAILQ to LIST.
   - Move static and dynamic struct taskqueue fields into different cache
  lines.  Move lock into its own cache line, so that heavy lock spinning
  by multiple waiting threads would not affect the running thread.
   - While there, correct some TQ_SLEEP() wait messages.
  
  This change fixes certain ZFS write workloads, causing huge congestion
  on taskqueue lock.  Those workloads combine some large block writes to
  saturate the pool and trigger allocation throttling, which uses higher
  priority tasks to requeue the delayed I/Os, with many small blocks to
  generate deep queue of small tasks for taskqueue to sort.
  
  Sponsored by:	iXsystems, Inc.

Modified:
  stable/11/sys/kern/subr_gtaskqueue.c
  stable/11/sys/kern/subr_taskqueue.c

Modified: stable/11/sys/kern/subr_gtaskqueue.c
==============================================================================
--- stable/11/sys/kern/subr_gtaskqueue.c	Wed Nov  6 18:02:18 2019	(r354405)
+++ stable/11/sys/kern/subr_gtaskqueue.c	Wed Nov  6 18:15:20 2019	(r354406)
@@ -55,24 +55,24 @@ static void	gtaskqueue_thread_loop(void *arg);
 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
 
 struct gtaskqueue_busy {
-	struct gtask	*tb_running;
-	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
+	struct gtask		*tb_running;
+	u_int			 tb_seq;
+	LIST_ENTRY(gtaskqueue_busy) tb_link;
 };
 
-static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
-
 struct gtaskqueue {
 	STAILQ_HEAD(, gtask)	tq_queue;
+	LIST_HEAD(, gtaskqueue_busy) tq_active;
+	u_int			tq_seq;
+	int			tq_callouts;
+	struct mtx_padalign	tq_mutex;
 	gtaskqueue_enqueue_fn	tq_enqueue;
 	void			*tq_context;
 	char			*tq_name;
-	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
-	struct mtx		tq_mutex;
 	struct thread		**tq_threads;
 	int			tq_tcount;
 	int			tq_spin;
 	int			tq_flags;
-	int			tq_callouts;
 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
 };
@@ -111,12 +111,11 @@ gtask_dump(struct gtask *gtask)
 #endif
 
 static __inline int
-TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
-    int t)
+TQ_SLEEP(struct gtaskqueue *tq, void *p, const char *wm)
 {
 	if (tq->tq_spin)
-		return (msleep_spin(p, m, wm, t));
-	return (msleep(p, m, pri, wm, t));
+		return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
+	return (msleep(p, &tq->tq_mutex, 0, wm, 0));
 }
 
 static struct gtaskqueue *
@@ -140,7 +139,7 @@ _gtaskqueue_create(const char *name, int mflags,
 	}
 
 	STAILQ_INIT(&queue->tq_queue);
-	TAILQ_INIT(&queue->tq_active);
+	LIST_INIT(&queue->tq_active);
 	queue->tq_enqueue = enqueue;
 	queue->tq_context = context;
 	queue->tq_name = tq_name;
@@ -163,7 +162,7 @@ gtaskqueue_terminate(struct thread **pp, struct gtaskq
 
 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 		wakeup(tq);
-		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
+		TQ_SLEEP(tq, pp, "gtq_destroy");
 	}
 }
 
@@ -174,7 +173,7 @@ gtaskqueue_free(struct gtaskqueue *queue)
 	TQ_LOCK(queue);
 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 	gtaskqueue_terminate(queue->tq_threads, queue);
-	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
+	KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 	mtx_destroy(&queue->tq_mutex);
 	free(queue->tq_threads, M_GTASKQUEUE);
@@ -239,7 +238,7 @@ gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 	 * have completed or are currently executing.
 	 */
 	while (t_barrier.ta_flags & TASK_ENQUEUED)
-		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
+		TQ_SLEEP(queue, &t_barrier, "gtq_qdrain");
 }
 
 /*
@@ -250,32 +249,25 @@ gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 static void
 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
 {
-	struct gtaskqueue_busy tb_marker, *tb_first;
+	struct gtaskqueue_busy *tb;
+	u_int seq;
 
-	if (TAILQ_EMPTY(&queue->tq_active))
+	if (LIST_EMPTY(&queue->tq_active))
 		return;
 
 	/* Block taskq_terminate().*/
 	queue->tq_callouts++;
 
-	/*
-	 * Wait for all currently executing taskqueue threads
-	 * to go idle.
-	 */
-	tb_marker.tb_running = TB_DRAIN_WAITER;
-	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
-	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
-		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
-	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
+	/* Wait for any active task with sequence from the past. */
+	seq = queue->tq_seq;
+restart:
+	LIST_FOREACH(tb, &queue->tq_active, tb_link) {
+		if ((int)(tb->tb_seq - seq) <= 0) {
+			TQ_SLEEP(queue, tb->tb_running, "gtq_adrain");
+			goto restart;
+		}
+	}
 
-	/*
-	 * Wakeup any other drain waiter that happened to queue up
-	 * without any intervening active thread.
-	 */
-	tb_first = TAILQ_FIRST(&queue->tq_active);
-	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
-		wakeup(tb_first);
-
 	/* Release taskqueue_terminate(). */
 	queue->tq_callouts--;
 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
@@ -306,40 +298,27 @@ static void
 gtaskqueue_run_locked(struct gtaskqueue *queue)
 {
 	struct gtaskqueue_busy tb;
-	struct gtaskqueue_busy *tb_first;
 	struct gtask *gtask;
 
 	KASSERT(queue != NULL, ("tq is NULL"));
 	TQ_ASSERT_LOCKED(queue);
 	tb.tb_running = NULL;
+	LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
 
-	while (STAILQ_FIRST(&queue->tq_queue)) {
-		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
-
-		/*
-		 * Carefully remove the first task from the queue and
-		 * clear its TASK_ENQUEUED flag
-		 */
-		gtask = STAILQ_FIRST(&queue->tq_queue);
-		KASSERT(gtask != NULL, ("task is NULL"));
+	while ((gtask = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 		gtask->ta_flags &= ~TASK_ENQUEUED;
 		tb.tb_running = gtask;
+		tb.tb_seq = ++queue->tq_seq;
 		TQ_UNLOCK(queue);
 
 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
 		gtask->ta_func(gtask->ta_context);
 
 		TQ_LOCK(queue);
-		tb.tb_running = NULL;
 		wakeup(gtask);
-
-		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
-		tb_first = TAILQ_FIRST(&queue->tq_active);
-		if (tb_first != NULL &&
-		    tb_first->tb_running == TB_DRAIN_WAITER)
-			wakeup(tb_first);
 	}
+	LIST_REMOVE(&tb, tb_link);
 }
 
 static int
@@ -348,7 +327,7 @@ task_is_running(struct gtaskqueue *queue, struct gtask
 	struct gtaskqueue_busy *tb;
 
 	TQ_ASSERT_LOCKED(queue);
-	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
+	LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 		if (tb->tb_running == gtask)
 			return (1);
 	}
@@ -386,7 +365,7 @@ gtaskqueue_drain(struct gtaskqueue *queue, struct gtas
 
 	TQ_LOCK(queue);
 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
-		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
+		TQ_SLEEP(queue, gtask, "gtq_drain");
 	TQ_UNLOCK(queue);
 }
 
@@ -511,7 +490,7 @@ gtaskqueue_thread_loop(void *arg)
 		 */
 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 			break;
-		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
+		TQ_SLEEP(tq, tq, "-");
 	}
 	gtaskqueue_run_locked(tq);
 	/*
@@ -537,7 +516,7 @@ gtaskqueue_thread_enqueue(void *context)
 
 	tqp = context;
 	tq = *tqp;
-	wakeup_one(tq);
+	wakeup_any(tq);
 }
 
 

Modified: stable/11/sys/kern/subr_taskqueue.c
==============================================================================
--- stable/11/sys/kern/subr_taskqueue.c	Wed Nov  6 18:02:18 2019	(r354405)
+++ stable/11/sys/kern/subr_taskqueue.c	Wed Nov  6 18:15:20 2019	(r354406)
@@ -54,24 +54,25 @@ static void	 taskqueue_swi_enqueue(void *);
 static void	 taskqueue_swi_giant_enqueue(void *);
 
 struct taskqueue_busy {
-	struct task	*tb_running;
-	TAILQ_ENTRY(taskqueue_busy) tb_link;
+	struct task		*tb_running;
+	u_int			 tb_seq;
+	LIST_ENTRY(taskqueue_busy) tb_link;
 };
 
-struct task * const TB_DRAIN_WAITER = (struct task *)0x1;
-
 struct taskqueue {
 	STAILQ_HEAD(, task)	tq_queue;
+	LIST_HEAD(, taskqueue_busy) tq_active;
+	struct task		*tq_hint;
+	u_int			tq_seq;
+	int			tq_callouts;
+	struct mtx_padalign	tq_mutex;
 	taskqueue_enqueue_fn	tq_enqueue;
 	void			*tq_context;
 	char			*tq_name;
-	TAILQ_HEAD(, taskqueue_busy) tq_active;
-	struct mtx		tq_mutex;
 	struct thread		**tq_threads;
 	int			tq_tcount;
 	int			tq_spin;
 	int			tq_flags;
-	int			tq_callouts;
 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
 };
@@ -114,12 +115,11 @@ _timeout_task_init(struct taskqueue *queue, struct tim
 }
 
 static __inline int
-TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
-    int t)
+TQ_SLEEP(struct taskqueue *tq, void *p, const char *wm)
 {
 	if (tq->tq_spin)
-		return (msleep_spin(p, m, wm, t));
-	return (msleep(p, m, pri, wm, t));
+		return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
+	return (msleep(p, &tq->tq_mutex, 0, wm, 0));
 }
 
 static struct taskqueue *
@@ -143,7 +143,7 @@ _taskqueue_create(const char *name, int mflags,
 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 
 	STAILQ_INIT(&queue->tq_queue);
-	TAILQ_INIT(&queue->tq_active);
+	LIST_INIT(&queue->tq_active);
 	queue->tq_enqueue = enqueue;
 	queue->tq_context = context;
 	queue->tq_name = tq_name;
@@ -194,7 +194,7 @@ taskqueue_terminate(struct thread **pp, struct taskque
 
 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 		wakeup(tq);
-		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
+		TQ_SLEEP(tq, pp, "tq_destroy");
 	}
 }
 
@@ -205,7 +205,7 @@ taskqueue_free(struct taskqueue *queue)
 	TQ_LOCK(queue);
 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 	taskqueue_terminate(queue->tq_threads, queue);
-	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
+	KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 	mtx_destroy(&queue->tq_mutex);
 	free(queue->tq_threads, M_TASKQUEUE);
@@ -231,21 +231,30 @@ taskqueue_enqueue_locked(struct taskqueue *queue, stru
 	}
 
 	/*
-	 * Optimise the case when all tasks have the same priority.
+	 * Optimise cases when all tasks use small set of priorities.
+	 * In case of only one priority we always insert at the end.
+	 * In case of two tq_hint typically gives the insertion point.
+	 * In case of more then two tq_hint should halve the search.
 	 */
 	prev = STAILQ_LAST(&queue->tq_queue, task, ta_link);
 	if (!prev || prev->ta_priority >= task->ta_priority) {
 		STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
 	} else {
-		prev = NULL;
-		for (ins = STAILQ_FIRST(&queue->tq_queue); ins;
-		     prev = ins, ins = STAILQ_NEXT(ins, ta_link))
+		prev = queue->tq_hint;
+		if (prev && prev->ta_priority >= task->ta_priority) {
+			ins = STAILQ_NEXT(prev, ta_link);
+		} else {
+			prev = NULL;
+			ins = STAILQ_FIRST(&queue->tq_queue);
+		}
+		for (; ins; prev = ins, ins = STAILQ_NEXT(ins, ta_link))
 			if (ins->ta_priority < task->ta_priority)
 				break;
 
-		if (prev)
+		if (prev) {
 			STAILQ_INSERT_AFTER(&queue->tq_queue, prev, task, ta_link);
-		else
+			queue->tq_hint = task;
+		} else
 			STAILQ_INSERT_HEAD(&queue->tq_queue, task, ta_link);
 	}
 
@@ -362,6 +371,7 @@ taskqueue_drain_tq_queue(struct taskqueue *queue)
 	 */
 	TASK_INIT(&t_barrier, USHRT_MAX, taskqueue_task_nop_fn, &t_barrier);
 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
+	queue->tq_hint = &t_barrier;
 	t_barrier.ta_pending = 1;
 
 	/*
@@ -369,7 +379,7 @@ taskqueue_drain_tq_queue(struct taskqueue *queue)
 	 * have completed or are currently executing.
 	 */
 	while (t_barrier.ta_pending != 0)
-		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
+		TQ_SLEEP(queue, &t_barrier, "tq_qdrain");
 	return (1);
 }
 
@@ -381,32 +391,25 @@ taskqueue_drain_tq_queue(struct taskqueue *queue)
 static int
 taskqueue_drain_tq_active(struct taskqueue *queue)
 {
-	struct taskqueue_busy tb_marker, *tb_first;
+	struct taskqueue_busy *tb;
+	u_int seq;
 
-	if (TAILQ_EMPTY(&queue->tq_active))
+	if (LIST_EMPTY(&queue->tq_active))
 		return (0);
 
 	/* Block taskq_terminate().*/
 	queue->tq_callouts++;
 
-	/*
-	 * Wait for all currently executing taskqueue threads
-	 * to go idle.
-	 */
-	tb_marker.tb_running = TB_DRAIN_WAITER;
-	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
-	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
-		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
-	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
+	/* Wait for any active task with sequence from the past. */
+	seq = queue->tq_seq;
+restart:
+	LIST_FOREACH(tb, &queue->tq_active, tb_link) {
+		if ((int)(tb->tb_seq - seq) <= 0) {
+			TQ_SLEEP(queue, tb->tb_running, "tq_adrain");
+			goto restart;
+		}
+	}
 
-	/*
-	 * Wakeup any other drain waiter that happened to queue up
-	 * without any intervening active thread.
-	 */
-	tb_first = TAILQ_FIRST(&queue->tq_active);
-	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
-		wakeup(tb_first);
-
 	/* Release taskqueue_terminate(). */
 	queue->tq_callouts--;
 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
@@ -438,42 +441,31 @@ static void
 taskqueue_run_locked(struct taskqueue *queue)
 {
 	struct taskqueue_busy tb;
-	struct taskqueue_busy *tb_first;
 	struct task *task;
 	int pending;
 
 	KASSERT(queue != NULL, ("tq is NULL"));
 	TQ_ASSERT_LOCKED(queue);
 	tb.tb_running = NULL;
+	LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
 
-	while (STAILQ_FIRST(&queue->tq_queue)) {
-		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
-
-		/*
-		 * Carefully remove the first task from the queue and
-		 * zero its pending count.
-		 */
-		task = STAILQ_FIRST(&queue->tq_queue);
-		KASSERT(task != NULL, ("task is NULL"));
+	while ((task = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
+		if (queue->tq_hint == task)
+			queue->tq_hint = NULL;
 		pending = task->ta_pending;
 		task->ta_pending = 0;
 		tb.tb_running = task;
+		tb.tb_seq = ++queue->tq_seq;
 		TQ_UNLOCK(queue);
 
 		KASSERT(task->ta_func != NULL, ("task->ta_func is NULL"));
 		task->ta_func(task->ta_context, pending);
 
 		TQ_LOCK(queue);
-		tb.tb_running = NULL;
 		wakeup(task);
-
-		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
-		tb_first = TAILQ_FIRST(&queue->tq_active);
-		if (tb_first != NULL &&
-		    tb_first->tb_running == TB_DRAIN_WAITER)
-			wakeup(tb_first);
 	}
+	LIST_REMOVE(&tb, tb_link);
 }
 
 void
@@ -491,7 +483,7 @@ task_is_running(struct taskqueue *queue, struct task *
 	struct taskqueue_busy *tb;
 
 	TQ_ASSERT_LOCKED(queue);
-	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
+	LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 		if (tb->tb_running == task)
 			return (1);
 	}
@@ -520,8 +512,11 @@ taskqueue_cancel_locked(struct taskqueue *queue, struc
     u_int *pendp)
 {
 
-	if (task->ta_pending > 0)
+	if (task->ta_pending > 0) {
 		STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link);
+		if (queue->tq_hint == task)
+			queue->tq_hint = NULL;
+	}
 	if (pendp != NULL)
 		*pendp = task->ta_pending;
 	task->ta_pending = 0;
@@ -570,7 +565,7 @@ taskqueue_drain(struct taskqueue *queue, struct task *
 
 	TQ_LOCK(queue);
 	while (task->ta_pending != 0 || task_is_running(queue, task))
-		TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
+		TQ_SLEEP(queue, task, "tq_drain");
 	TQ_UNLOCK(queue);
 }
 
@@ -776,7 +771,7 @@ taskqueue_thread_loop(void *arg)
 		 */
 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 			break;
-		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
+		TQ_SLEEP(tq, tq, "-");
 	}
 	taskqueue_run_locked(tq);
 	/*


More information about the svn-src-all mailing list