ule+smp: small optimization for turnstile priority lending

Andriy Gapon avg at FreeBSD.org
Tue Sep 18 14:05:10 UTC 2012


Here is a snippet that demonstrates the issue on a supposedly fully loaded
2-processor system:

136794   0 3670427870244462 KTRGRAPH group:"thread", id:"Xorg tid 102818",
state:"running", attributes: prio:122

136793   0 3670427870241000 KTRGRAPH group:"thread", id:"cc1plus tid 111916",
state:"yielding", attributes: prio:183, wmesg:"(null)", lockname:"(null)"

136792   1 3670427870240829 KTRGRAPH group:"thread", id:"idle: cpu1 tid 100004",
state:"running", attributes: prio:255

136791   1 3670427870239520 KTRGRAPH group:"load", id:"CPU 1 load", counter:0,
attributes: none

136790   1 3670427870239248 KTRGRAPH group:"thread", id:"firefox tid 113473",
state:"blocked", attributes: prio:122, wmesg:"(null)", lockname:"unp_mtx"

136789   1 3670427870237697 KTRGRAPH group:"load", id:"CPU 0 load", counter:2,
attributes: none

136788   1 3670427870236394 KTRGRAPH group:"thread", id:"firefox tid 113473",
point:"wokeup", attributes: linkedto:"Xorg tid 102818"

136787   1 3670427870236145 KTRGRAPH group:"thread", id:"Xorg tid 102818",
state:"runq add", attributes: prio:122, linkedto:"firefox tid 113473"

136786   1 3670427870235981 KTRGRAPH group:"load", id:"CPU 1 load", counter:1,
attributes: none

136785   1 3670427870235707 KTRGRAPH group:"thread", id:"Xorg tid 102818",
state:"runq rem", attributes: prio:176

136784   1 3670427870235423 KTRGRAPH group:"thread", id:"Xorg tid 102818",
point:"prio", attributes: prio:176, new prio:122, linkedto:"firefox tid 113473"

136783   1 3670427870202392 KTRGRAPH group:"thread", id:"firefox tid 113473",
state:"running", attributes: prio:104

See how how the Xorg thread was forced from CPU 1 to CPU 0 where it preempted
cc1plus thread (I do have preemption enabled) only to leave CPU 1 with zero load.

Here is a proposed solution:

    turnstile_wait: optimize priority lending to a thread on a runqueue

    As the current thread is definitely going into mi_switch, it now removes
    its load before doing priority propagation which can potentially result
    in sched_add.  In the SMP && ULE case the latter searches for the
    least loaded CPU to place a boosted thread, which is supposedly about
    to run.

diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 8e466cd..3299cae 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -1878,7 +1878,10 @@ sched_switch(struct thread *td, struct thread *newtd, int
flags)
 		/* This thread must be going to sleep. */
 		TDQ_LOCK(tdq);
 		mtx = thread_lock_block(td);
-		tdq_load_rem(tdq, td);
+#if defined(SMP)
+		if ((flags & SW_TYPE_MASK) != SWT_TURNSTILE)
+#endif
+			tdq_load_rem(tdq, td);
 	}
 	/*
 	 * We enter here with the thread blocked and assigned to the
@@ -2412,6 +2415,21 @@ sched_rem(struct thread *td)
 		tdq_setlowpri(tdq, NULL);
 }

+void
+sched_load_rem(struct thread *td)
+{
+	struct tdq *tdq;
+
+	KASSERT(td == curthread,
+	    ("sched_rem_load: only curthread is supported"));
+	KASSERT(td->td_oncpu == td->td_sched->ts_cpu,
+	    ("thread running on cpu different from ts_cpu"));
+	tdq = TDQ_CPU(td->td_sched->ts_cpu);
+	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+	MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
+	tdq_load_rem(tdq, td);
+}
+
 /*
  * Fetch cpu utilization information.  Updates on demand.
  */
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index 31d16fe..d1d68e9 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -731,6 +731,13 @@ turnstile_wait(struct turnstile *ts, struct thread *owner,
int queue)
 		LIST_INSERT_HEAD(&ts->ts_free, td->td_turnstile, ts_hash);
 	}
 	thread_lock(td);
+#if defined(SCHED_ULE) && defined(SMP)
+	/*
+	 * Remove load earlier so that it does not affect cpu selection
+	 * for a thread waken up due to priority lending, if any.
+	 */
+	sched_load_rem(td);
+#endif
 	thread_lock_set(td, &ts->ts_lock);
 	td->td_turnstile = NULL;

diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index 4b8387c..b1ead1b 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -110,6 +110,9 @@ void	sched_preempt(struct thread *td);
 void	sched_add(struct thread *td, int flags);
 void	sched_clock(struct thread *td);
 void	sched_rem(struct thread *td);
+#if defined(SCHED_ULE) && defined(SMP)
+void	sched_load_rem(struct thread *td);
+#endif
 void	sched_tick(int cnt);
 void	sched_relinquish(struct thread *td);
 struct thread *sched_choose(void);

-- 
Andriy Gapon


More information about the freebsd-hackers mailing list