PERFORCE change 67928 for review

John Baldwin jhb at FreeBSD.org
Thu Dec 30 12:59:53 PST 2004


http://perforce.freebsd.org/chv.cgi?CH=67928

Change 67928 by jhb at jhb_slimer on 2004/12/30 20:59:50

	IFC @67927.  Loop back priority inversion fixes.

Affected files ...

.. //depot/projects/smpng/sys/compat/ndis/subr_hal.c#9 integrate
.. //depot/projects/smpng/sys/dev/md/md.c#61 integrate
.. //depot/projects/smpng/sys/geom/geom_kern.c#19 integrate
.. //depot/projects/smpng/sys/geom/mirror/g_mirror.c#15 integrate
.. //depot/projects/smpng/sys/geom/raid3/g_raid3.c#12 integrate
.. //depot/projects/smpng/sys/kern/kern_intr.c#64 integrate
.. //depot/projects/smpng/sys/kern/kern_resource.c#53 integrate
.. //depot/projects/smpng/sys/kern/kern_synch.c#88 integrate
.. //depot/projects/smpng/sys/kern/sched_4bsd.c#45 integrate
.. //depot/projects/smpng/sys/kern/sched_ule.c#49 integrate
.. //depot/projects/smpng/sys/kern/subr_trap.c#75 integrate
.. //depot/projects/smpng/sys/kern/subr_turnstile.c#18 integrate
.. //depot/projects/smpng/sys/sys/proc.h#137 integrate
.. //depot/projects/smpng/sys/sys/sched.h#18 integrate
.. //depot/projects/smpng/sys/sys/turnstile.h#5 integrate

Differences ...

==== //depot/projects/smpng/sys/compat/ndis/subr_hal.c#9 (text+ko) ====

@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/compat/ndis/subr_hal.c,v 1.13 2004/08/01 20:04:30 wpaul Exp $");
+__FBSDID("$FreeBSD: src/sys/compat/ndis/subr_hal.c,v 1.14 2004/12/30 20:29:58 jhb Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -308,7 +308,6 @@
 	mtx_lock_spin(&sched_lock);
 	oldirql = curthread->td_base_pri;
 	sched_prio(curthread, PI_REALTIME);
-	curthread->td_base_pri = PI_REALTIME;
 	mtx_unlock_spin(&sched_lock);
 
 	return(oldirql);
@@ -324,7 +323,6 @@
 		panic("IRQL_NOT_GREATER_THAN");
 
 	mtx_lock_spin(&sched_lock);
-	curthread->td_base_pri = oldirql;
 	sched_prio(curthread, oldirql);
 	mtx_unlock_spin(&sched_lock);
 

==== //depot/projects/smpng/sys/dev/md/md.c#61 (text+ko) ====

@@ -6,7 +6,7 @@
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  *
- * $FreeBSD: src/sys/dev/md/md.c,v 1.146 2004/12/27 17:20:06 pjd Exp $
+ * $FreeBSD: src/sys/dev/md/md.c,v 1.147 2004/12/30 20:29:58 jhb Exp $
  *
  */
 
@@ -71,6 +71,7 @@
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
+#include <sys/sched.h>
 #include <sys/sf_buf.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
@@ -618,7 +619,9 @@
 	int error, hasgiant;
 
 	sc = arg;
-	curthread->td_base_pri = PRIBIO;
+	mtx_lock_spin(&sched_lock);
+	sched_prio(curthread, PRIBIO);
+	mtx_unlock_spin(&sched_lock);
 
 	switch (sc->type) {
 	case MD_VNODE:

==== //depot/projects/smpng/sys/geom/geom_kern.c#19 (text+ko) ====

@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/geom_kern.c,v 1.35 2004/09/13 14:58:27 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/geom_kern.c,v 1.36 2004/12/30 20:29:58 jhb Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -47,8 +47,9 @@
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/sbuf.h>
+#include <sys/sched.h>
 #include <sys/sx.h>
-#include <sys/sbuf.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 
@@ -87,7 +88,9 @@
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	tp->td_base_pri = PRIBIO;
+	mtx_lock_spin(&sched_lock);
+	sched_prio(tp, PRIBIO);
+	mtx_unlock_spin(&sched_lock);
 	for(;;) {
 		g_io_schedule_up(tp);
 	}
@@ -108,7 +111,9 @@
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	tp->td_base_pri = PRIBIO;
+	mtx_lock_spin(&sched_lock);
+	sched_prio(tp, PRIBIO);
+	mtx_unlock_spin(&sched_lock);
 	for(;;) {
 		g_io_schedule_down(tp);
 	}
@@ -129,7 +134,9 @@
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	tp->td_base_pri = PRIBIO;
+	mtx_lock_spin(&sched_lock);
+	sched_prio(tp, PRIBIO);
+	mtx_unlock_spin(&sched_lock);
 	for(;;) {
 		g_run_events();
 		tsleep(&g_wait_event, PRIBIO, "-", hz/10);

==== //depot/projects/smpng/sys/geom/mirror/g_mirror.c#15 (text+ko) ====

@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror.c,v 1.54 2004/12/23 21:15:15 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror.c,v 1.55 2004/12/30 20:29:58 jhb Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -42,6 +42,7 @@
 #include <geom/geom.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
+#include <sys/sched.h>
 #include <geom/mirror/g_mirror.h>
 
 
@@ -1478,7 +1479,9 @@
 	u_int nreqs;
 
 	sc = arg;
-	curthread->td_base_pri = PRIBIO;
+	mtx_lock_spin(&sched_lock);
+	sched_prio(curthread, PRIBIO);
+	mtx_unlock_spin(&sched_lock);
 
 	nreqs = 0;
 	for (;;) {

==== //depot/projects/smpng/sys/geom/raid3/g_raid3.c#12 (text+ko) ====

@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3.c,v 1.33 2004/12/28 21:52:45 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3.c,v 1.34 2004/12/30 20:29:58 jhb Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -43,6 +43,7 @@
 #include <geom/geom.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
+#include <sys/sched.h>
 #include <geom/raid3/g_raid3.h>
 
 
@@ -1729,7 +1730,9 @@
 	u_int nreqs;
 
 	sc = arg;
-	curthread->td_base_pri = PRIBIO;
+	mtx_lock_spin(&sched_lock);
+	sched_prio(curthread, PRIBIO);
+	mtx_unlock_spin(&sched_lock);
 
 	nreqs = 0;
 	for (;;) {

==== //depot/projects/smpng/sys/kern/kern_intr.c#64 (text+ko) ====

@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/kern_intr.c,v 1.120 2004/11/17 14:39:41 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/kern_intr.c,v 1.121 2004/12/30 20:29:58 jhb Exp $");
 
 #include "opt_ddb.h"
 
@@ -45,6 +45,7 @@
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/resourcevar.h>
+#include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vmmeter.h>
@@ -143,14 +144,12 @@
 	ih = TAILQ_FIRST(&ithd->it_handlers);
 	if (ih == NULL) {
 		mtx_lock_spin(&sched_lock);
-		td->td_priority = PRI_MAX_ITHD;
-		td->td_base_pri = PRI_MAX_ITHD;
+		sched_prio(td, PRI_MAX_ITHD);
 		mtx_unlock_spin(&sched_lock);
 		return;
 	}
 	mtx_lock_spin(&sched_lock);
-	td->td_priority = ih->ih_pri;
-	td->td_base_pri = ih->ih_pri;
+	sched_prio(td, ih->ih_pri);
 	mtx_unlock_spin(&sched_lock);
 	missed = 0;
 	TAILQ_FOREACH(ih, &ithd->it_handlers, ih_next) {

==== //depot/projects/smpng/sys/kern/kern_resource.c#53 (text+ko) ====

@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/kern_resource.c,v 1.146 2004/10/05 18:51:11 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/kern_resource.c,v 1.147 2004/12/30 20:29:58 jhb Exp $");
 
 #include "opt_compat.h"
 
@@ -423,7 +423,6 @@
 	}
 	sched_class(kg, rtp->type);
 	if (curthread->td_ksegrp == kg) {
-		curthread->td_base_pri = kg->kg_user_pri;
 		sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */
 	}
 	return (0);

==== //depot/projects/smpng/sys/kern/kern_synch.c#88 (text+ko) ====

@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/kern_synch.c,v 1.266 2004/12/26 00:14:21 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/kern_synch.c,v 1.267 2004/12/30 20:29:58 jhb Exp $");
 
 #include "opt_ktrace.h"
 
@@ -212,8 +212,6 @@
 
 	/*
 	 * Adjust this thread's priority.
-	 *
-	 * XXX: do we need to save priority in td_base_pri?
 	 */
 	mtx_lock_spin(&sched_lock);
 	sched_prio(td, priority & PRIMASK);

==== //depot/projects/smpng/sys/kern/sched_4bsd.c#45 (text+ko) ====

@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.70 2004/12/26 00:16:24 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.71 2004/12/30 20:52:44 jhb Exp $");
 
 #define kse td_sched
 
@@ -50,6 +50,7 @@
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
+#include <sys/turnstile.h>
 #include <machine/smp.h>
 
 /*
@@ -159,10 +160,12 @@
 static void	roundrobin(void *arg);
 static void	schedcpu(void);
 static void	schedcpu_thread(void);
+static void	sched_priority(struct thread *td, u_char prio);
 static void	sched_setup(void *dummy);
 static void	maybe_resched(struct thread *td);
 static void	updatepri(struct ksegrp *kg);
 static void	resetpriority(struct ksegrp *kg);
+static void	resetpriority_thread(struct thread *td, struct ksegrp *kg);
 #ifdef SMP
 static int	forward_wakeup(int  cpunum);
 #endif
@@ -516,9 +519,7 @@
 			kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu);
 		      	resetpriority(kg);
 			FOREACH_THREAD_IN_GROUP(kg, td) {
-				if (td->td_priority >= PUSER) {
-					sched_prio(td, kg->kg_user_pri);
-				}
+				resetpriority_thread(td, kg);
 			}
 		} /* end of ksegrp loop */
 		mtx_unlock_spin(&sched_lock);
@@ -561,7 +562,6 @@
 			newcpu = decay_cpu(loadfac, newcpu);
 		kg->kg_estcpu = newcpu;
 	}
-	resetpriority(kg);
 }
 
 /*
@@ -573,7 +573,6 @@
 resetpriority(struct ksegrp *kg)
 {
 	register unsigned int newpriority;
-	struct thread *td;
 
 	if (kg->kg_pri_class == PRI_TIMESHARE) {
 		newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT +
@@ -582,9 +581,25 @@
 		    PRI_MAX_TIMESHARE);
 		kg->kg_user_pri = newpriority;
 	}
-	FOREACH_THREAD_IN_GROUP(kg, td) {
-		maybe_resched(td);			/* XXXKSE silly */
-	}
+}
+
+/*
+ * Update the thread's priority when the associated ksegroup's user
+ * priority changes.
+ */
+static void
+resetpriority_thread(struct thread *td, struct ksegrp *kg)
+{
+
+	/* Only change threads with a time sharing user priority. */
+	if (td->td_priority < PRI_MIN_TIMESHARE ||
+	    td->td_priority > PRI_MAX_TIMESHARE)
+		return;
+
+	/* XXX the whole needresched thing is broken, but not silly. */
+	maybe_resched(td);
+
+	sched_prio(td, kg->kg_user_pri);
 }
 
 /* ARGSUSED */
@@ -674,8 +689,7 @@
 	kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
 	if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
 		resetpriority(kg);
-		if (td->td_priority >= PUSER)
-			td->td_priority = kg->kg_user_pri;
+		resetpriority_thread(td, kg);
 	}
 }
 
@@ -735,12 +749,16 @@
 sched_nice(struct proc *p, int nice)
 {
 	struct ksegrp *kg;
+	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&sched_lock, MA_OWNED);
 	p->p_nice = nice;
 	FOREACH_KSEGRP_IN_PROC(p, kg) {
 		resetpriority(kg);
+		FOREACH_THREAD_IN_GROUP(kg, td) {
+			resetpriority_thread(td, kg);
+		}
 	}
 }
 
@@ -757,14 +775,16 @@
  * changing the assignment of a kse to the thread,
  * and moving a KSE in the system run queue.
  */
-void
-sched_prio(struct thread *td, u_char prio)
+static void
+sched_priority(struct thread *td, u_char prio)
 {
 	CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
 	    td, td->td_proc->p_comm, td->td_priority, prio, curthread, 
 	    curthread->td_proc->p_comm);
 
 	mtx_assert(&sched_lock, MA_OWNED);
+	if (td->td_priority == prio)
+		return;
 	if (TD_ON_RUNQ(td)) {
 		adjustrunqueue(td, prio);
 	} else {
@@ -772,13 +792,76 @@
 	}
 }
 
+/*
+ * Update a thread's priority when it is lent another thread's
+ * priority.
+ */
+void
+sched_lend_prio(struct thread *td, u_char prio)
+{
+
+	td->td_flags |= TDF_BORROWING;
+	sched_priority(td, prio);
+}
+
+/*
+ * Restore a thread's priority when priority propagation is
+ * over.  The prio argument is the minimum priority the thread
+ * needs to have to satisfy other possible priority lending
+ * requests.  If the thread's regulary priority is less
+ * important than prio the thread will keep a priority boost
+ * of prio.
+ */
+void
+sched_unlend_prio(struct thread *td, u_char prio)
+{
+	u_char base_pri;
+
+	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
+	    td->td_base_pri <= PRI_MAX_TIMESHARE)
+		base_pri = td->td_ksegrp->kg_user_pri;
+	else
+		base_pri = td->td_base_pri;
+	if (prio >= base_pri) {
+		td->td_flags &= ~TDF_BORROWING;
+		sched_prio(td, base_pri);
+	} else
+		sched_lend_prio(td, prio);
+}
+
 void
+sched_prio(struct thread *td, u_char prio)
+{
+	u_char oldprio;
+
+	/* First, update the base priority. */
+	td->td_base_pri = prio;
+
+	/*
+	 * If the thread is borrowing another thread's priority, don't ever
+	 * lower the priority.
+	 */
+	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
+		return;
+
+	/* Change the real priority. */
+	oldprio = td->td_priority;
+	sched_priority(td, prio);
+
+	/*
+	 * If the thread is on a turnstile, then let the turnstile update
+	 * its state.
+	 */
+	if (TD_ON_LOCK(td) && oldprio != prio)
+		turnstile_adjust(td, oldprio);
+}
+
+void
 sched_sleep(struct thread *td)
 {
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	td->td_ksegrp->kg_slptime = 0;
-	td->td_base_pri = td->td_priority;
 }
 
 static void remrunqueue(struct thread *td);
@@ -889,8 +972,10 @@
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	kg = td->td_ksegrp;
-	if (kg->kg_slptime > 1)
+	if (kg->kg_slptime > 1) {
 		updatepri(kg);
+		resetpriority(kg);
+	}
 	kg->kg_slptime = 0;
 	setrunqueue(td, SRQ_BORING);
 }
@@ -1157,10 +1242,13 @@
 	 * it here and returning to user mode, so don't waste time setting
 	 * it perfectly here.
 	 */
+	KASSERT((td->td_flags & TDF_BORROWING) == 0,
+	    ("thread with borrowed priority returning to userland"));
 	kg = td->td_ksegrp;
 	if (td->td_priority != kg->kg_user_pri) {
 		mtx_lock_spin(&sched_lock);
 		td->td_priority = kg->kg_user_pri;
+		td->td_base_pri = kg->kg_user_pri;
 		mtx_unlock_spin(&sched_lock);
 	}
 }

==== //depot/projects/smpng/sys/kern/sched_ule.c#49 (text+ko) ====

@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/sched_ule.c,v 1.143 2004/12/26 23:21:48 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/sched_ule.c,v 1.144 2004/12/30 20:52:44 jhb Exp $");
 
 #include <opt_sched.h>
 
@@ -46,6 +46,7 @@
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
+#include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
@@ -133,8 +134,7 @@
 #define	KEF_XFERABLE	0x0004		/* Thread was added as transferable. */
 #define	KEF_HOLD	0x0008		/* Thread is temporarily bound. */
 #define	KEF_REMOVED	0x0010		/* Thread was removed while ASSIGNED */
-#define	KEF_PRIOELEV	0x0020		/* Thread has had its prio elevated. */
-#define	KEF_INTERNAL	0x0040
+#define	KEF_INTERNAL	0x0020
 
 struct kg_sched {
 	struct thread	*skg_last_assigned; /* (j) Last thread assigned to */
@@ -234,7 +234,7 @@
 #define	SCHED_INTERACTIVE(kg)						\
     (sched_interact_score(kg) < SCHED_INTERACT_THRESH)
 #define	SCHED_CURR(kg, ke)						\
-    ((ke->ke_flags & KEF_PRIOELEV) || SCHED_INTERACTIVE(kg))
+    ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg))
 
 /*
  * Cpu percentage computation macros and defines.
@@ -315,6 +315,7 @@
 static struct kse *sched_choose(void);		/* XXX Should be thread * */
 static void sched_slice(struct kse *ke);
 static void sched_priority(struct ksegrp *kg);
+static void sched_thread_priority(struct thread *td, u_char prio);
 static int sched_interact_score(struct ksegrp *kg);
 static void sched_interact_update(struct ksegrp *kg);
 static void sched_interact_fork(struct ksegrp *kg);
@@ -1066,7 +1067,7 @@
 	kg = ke->ke_ksegrp;
 	kseq = KSEQ_CPU(ke->ke_cpu);
 
-	if (ke->ke_flags & KEF_PRIOELEV) {
+	if (ke->ke_thread->td_flags & TDF_BORROWING) {
 		ke->ke_slice = SCHED_SLICE_MIN;
 		return;
 	}
@@ -1230,7 +1231,7 @@
 }
 
 void
-sched_prio(struct thread *td, u_char prio)
+sched_thread_priority(struct thread *td, u_char prio)
 {
 	struct kse *ke;
 
@@ -1239,6 +1240,8 @@
 	    curthread->td_proc->p_comm);
 	ke = td->td_kse;
 	mtx_assert(&sched_lock, MA_OWNED);
+	if (td->td_priority == prio)
+		return;
 	if (TD_ON_RUNQ(td)) {
 		/*
 		 * If the priority has been elevated due to priority
@@ -1253,8 +1256,6 @@
 			ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr;
 			runq_add(ke->ke_runq, ke, 0);
 		}
-		if (prio < td->td_priority)
-			ke->ke_flags |= KEF_PRIOELEV;
 		/*
 		 * Hold this kse on this cpu so that sched_prio() doesn't
 		 * cause excessive migration.  We only want migration to
@@ -1267,7 +1268,71 @@
 		td->td_priority = prio;
 }
 
+/*
+ * Update a thread's priority when it is lent another thread's
+ * priority.
+ */
+void
+sched_lend_prio(struct thread *td, u_char prio)
+{
+
+	td->td_flags |= TDF_BORROWING;
+	sched_thread_priority(td, prio);
+}
+
+/*
+ * Restore a thread's priority when priority propagation is
+ * over.  The prio argument is the minimum priority the thread
+ * needs to have to satisfy other possible priority lending
+ * requests.  If the thread's regular priority is less
+ * important than prio, the thread will keep a priority boost
+ * of prio.
+ */
 void
+sched_unlend_prio(struct thread *td, u_char prio)
+{
+	u_char base_pri;
+
+	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
+	    td->td_base_pri <= PRI_MAX_TIMESHARE)
+		base_pri = td->td_ksegrp->kg_user_pri;
+	else
+		base_pri = td->td_base_pri;
+	if (prio >= base_pri) {
+		td->td_flags &= ~ TDF_BORROWING;
+		sched_thread_priority(td, base_pri);
+	} else
+		sched_lend_prio(td, prio);
+}
+
+void
+sched_prio(struct thread *td, u_char prio)
+{
+	u_char oldprio;
+
+	/* First, update the base priority. */
+	td->td_base_pri = prio;
+
+	/*
+	 * If the therad is borrowing another thread's priority, don't
+	 * ever lower the priority.
+	 */
+	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
+		return;
+
+	/* Change the real priority. */
+	oldprio = td->td_priority;
+	sched_thread_priority(td, prio);
+
+	/*
+	 * If the thread is on a turnstile, then let the turnstile update
+	 * its state.
+	 */
+	if (TD_ON_LOCK(td) && oldprio != prio)
+		turnstile_adjust(td, oldprio);
+}
+	
+void
 sched_switch(struct thread *td, struct thread *newtd, int flags)
 {
 	struct kseq *ksq;
@@ -1374,7 +1439,6 @@
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	td->td_slptime = ticks;
-	td->td_base_pri = td->td_priority;
 }
 
 void
@@ -1644,21 +1708,14 @@
 sched_userret(struct thread *td)
 {
 	struct ksegrp *kg;
-	struct kse *ke;
 
-	kg = td->td_ksegrp;
-	ke = td->td_kse;
-	
-	if (td->td_priority != kg->kg_user_pri ||
-	    ke->ke_flags & KEF_PRIOELEV) {
+	KASSERT((td->td_flags & TDF_BORROWING) == 0,
+	    ("thread with borrowed priority returning to userland"));
+	kg = td->td_ksegrp;	
+	if (td->td_priority != kg->kg_user_pri) {
 		mtx_lock_spin(&sched_lock);
 		td->td_priority = kg->kg_user_pri;
-		if (ke->ke_flags & KEF_PRIOELEV) {
-			ke->ke_flags &= ~KEF_PRIOELEV;
-			sched_slice(ke);
-			if (ke->ke_slice == 0)
-				mi_switch(SW_INVOL, NULL);
-		}
+		td->td_base_pri = kg->kg_user_pri;
 		mtx_unlock_spin(&sched_lock);
 	}
 }

==== //depot/projects/smpng/sys/kern/subr_trap.c#75 (text+ko) ====

@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/subr_trap.c,v 1.277 2004/12/26 07:30:35 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/subr_trap.c,v 1.278 2004/12/30 20:30:58 jhb Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_mac.h"
@@ -128,6 +128,7 @@
 		ticks = td->td_sticks - oticks;
 		addupc_task(td, TRAPF_PC(frame), (u_int)ticks * psratio);
 	}
+
 	/*
 	 * Let the scheduler adjust our priority etc.
 	 */

==== //depot/projects/smpng/sys/kern/subr_turnstile.c#18 (text+ko) ====

@@ -59,7 +59,7 @@
 #include "opt_turnstile_profiling.h"
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/subr_turnstile.c,v 1.150 2004/10/12 18:36:20 jhb Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/subr_turnstile.c,v 1.151 2004/12/30 20:52:44 jhb Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -145,7 +145,9 @@
 #ifdef TURNSTILE_PROFILING
 static void	init_turnstile_profiling(void *arg);
 #endif
-static void	propagate_priority(struct thread *);
+static void	propagate_priority(struct thread *td);
+static int	turnstile_adjust_thread(struct turnstile *ts,
+		    struct thread *td);
 static void	turnstile_setowner(struct turnstile *ts, struct thread *owner);
 
 /*
@@ -158,7 +160,6 @@
 {
 	struct turnstile_chain *tc;
 	struct turnstile *ts;
-	struct thread *td1;
 	int pri;
 
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -187,8 +188,8 @@
 		 * isn't SRUN or SLOCK.
 		 */
 		KASSERT(!TD_IS_SLEEPING(td),
-		    ("sleeping thread (pid %d) owns a non-sleepable lock",
-		    td->td_proc->p_pid));
+		    ("sleeping thread (tid %d) owns a non-sleepable lock",
+		    td->td_tid));
 
 		/*
 		 * If this thread already has higher priority than the
@@ -198,10 +199,16 @@
 			return;
 
 		/*
-		 * If lock holder is actually running, just bump priority.
+		 * Bump this thread's priority.
+		 */
+		sched_lend_prio(td, pri);
+
+		/*
+		 * If lock holder is actually running or on the run queue
+		 * then we are done.
 		 */
-		if (TD_IS_RUNNING(td)) {
-			td->td_priority = pri;
+		if (TD_IS_RUNNING(td) || TD_ON_RUNQ(td)) {
+			MPASS(td->td_blocked == NULL);
 			return;
 		}
 
@@ -214,27 +221,11 @@
 #endif
 
 		/*
-		 * If on run queue move to new run queue, and quit.
-		 * XXXKSE this gets a lot more complicated under threads
-		 * but try anyhow.
-		 */
-		if (TD_ON_RUNQ(td)) {
-			MPASS(td->td_blocked == NULL);
-			sched_prio(td, pri);
-			return;
-		}
-
-		/*
-		 * Bump this thread's priority.
-		 */
-		td->td_priority = pri;
-
-		/*
 		 * If we aren't blocked on a lock, we should be.
 		 */
 		KASSERT(TD_ON_LOCK(td), (
-		    "process %d(%s):%d holds %s but isn't blocked on a lock\n",
-		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
+		    "thread %d(%s):%d holds %s but isn't blocked on a lock\n",
+		    td->td_tid, td->td_proc->p_comm, td->td_state,
 		    ts->ts_lockobj->lo_name));
 
 		/*
@@ -245,61 +236,81 @@
 		tc = TC_LOOKUP(ts->ts_lockobj);
 		mtx_lock_spin(&tc->tc_lock);
 
-		/*
-		 * This thread may not be blocked on this turnstile anymore
-		 * but instead might already be woken up on another CPU
-		 * that is waiting on sched_lock in turnstile_unpend() to
-		 * finish waking this thread up.  We can detect this case
-		 * by checking to see if this thread has been given a
-		 * turnstile by either turnstile_signal() or
-		 * turnstile_broadcast().  In this case, treat the thread as
-		 * if it was already running.
-		 */
-		if (td->td_turnstile != NULL) {
+		/* Resort td on the list if needed. */
+		if (!turnstile_adjust_thread(ts, td)) {
 			mtx_unlock_spin(&tc->tc_lock);
 			return;
 		}
+		mtx_unlock_spin(&tc->tc_lock);
+	}
+}
+
+/*
+ * Adjust the thread's position on a turnstile after its priority has been
+ * changed.
+ */
+static int
+turnstile_adjust_thread(struct turnstile *ts, struct thread *td)
+{
+	struct turnstile_chain *tc;
+	struct thread *td1, *td2;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	MPASS(TD_ON_LOCK(td));
 
-		/*
-		 * Check if the thread needs to be moved up on
-		 * the blocked chain.  It doesn't need to be moved
-		 * if it is already at the head of the list or if
-		 * the item in front of it still has a higher priority.
-		 */
-		if (td == TAILQ_FIRST(&ts->ts_blocked)) {
-			mtx_unlock_spin(&tc->tc_lock);
-			continue;
-		}
+	/*
+	 * This thread may not be blocked on this turnstile anymore
+	 * but instead might already be woken up on another CPU
+	 * that is waiting on sched_lock in turnstile_unpend() to
+	 * finish waking this thread up.  We can detect this case
+	 * by checking to see if this thread has been given a
+	 * turnstile by either turnstile_signal() or
+	 * turnstile_broadcast().  In this case, treat the thread as
+	 * if it was already running.
+	 */
+	if (td->td_turnstile != NULL)
+		return (0);
 
-		td1 = TAILQ_PREV(td, threadqueue, td_lockq);
-		if (td1->td_priority <= pri) {
-			mtx_unlock_spin(&tc->tc_lock);
-			continue;
-		}
+	/*
+	 * Check if the thread needs to be moved on the blocked chain.
+	 * It needs to be moved if either its priority is lower than
+	 * the previous thread or higher than the next thread.
+	 */
+	tc = TC_LOOKUP(ts->ts_lockobj);
+	mtx_assert(&tc->tc_lock, MA_OWNED);
+	td1 = TAILQ_PREV(td, threadqueue, td_lockq);
+	td2 = TAILQ_NEXT(td, td_lockq);
+	if ((td1 != NULL && td->td_priority < td1->td_priority) ||
+	    (td2 != NULL && td->td_priority > td2->td_priority)) {
 
 		/*
 		 * Remove thread from blocked chain and determine where
-		 * it should be moved up to.  Since we know that td1 has
-		 * a lower priority than td, we know that at least one
-		 * thread in the chain has a lower priority and that
-		 * td1 will thus not be NULL after the loop.
+		 * it should be moved to.
 		 */
 		mtx_lock_spin(&td_contested_lock);
 		TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq);
 		TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq) {
 			MPASS(td1->td_proc->p_magic == P_MAGIC);
-			if (td1->td_priority > pri)
+			if (td1->td_priority > td->td_priority)
 				break;
 		}
 
-		MPASS(td1 != NULL);
-		TAILQ_INSERT_BEFORE(td1, td, td_lockq);
+		if (td1 == NULL)
+			TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq);
+		else
+			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
 		mtx_unlock_spin(&td_contested_lock);
-		CTR4(KTR_LOCK,
-		    "propagate_priority: td %p moved before %p on [%p] %s",
-		    td, td1, ts->ts_lockobj, ts->ts_lockobj->lo_name);
-		mtx_unlock_spin(&tc->tc_lock);
+		if (td1 == NULL)
+			CTR3(KTR_LOCK,
+		    "turnstile_adjust_thread: td %d put at tail on [%p] %s",
+			    td->td_tid, ts->ts_lockobj, ts->ts_lockobj->lo_name);
+		else
+			CTR4(KTR_LOCK,
+		    "turnstile_adjust_thread: td %d moved before %d on [%p] %s",
+			    td->td_tid, td1->td_tid, ts->ts_lockobj,
+			    ts->ts_lockobj->lo_name);
 	}
+	return (1);
 }
 
 /*
@@ -355,6 +366,46 @@
 SYSINIT(turnstile0, SI_SUB_LOCK, SI_ORDER_ANY, init_turnstile0, NULL);
 
 /*
+ * Update a thread on the turnstile list after it's priority has been changed.
+ * The old priority is passed in as an argument.
+ */
+void
+turnstile_adjust(struct thread *td, u_char oldpri)
+{
+	struct turnstile_chain *tc;
+	struct turnstile *ts;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	MPASS(TD_ON_LOCK(td));
+
+	/*
+	 * Pick up the lock that td is blocked on.
+	 */
+	ts = td->td_blocked;
+	MPASS(ts != NULL);
+	tc = TC_LOOKUP(ts->ts_lockobj);
+	mtx_lock_spin(&tc->tc_lock);
+
+	/* Resort the turnstile on the list. */
+	if (!turnstile_adjust_thread(ts, td)) {
+		mtx_unlock_spin(&tc->tc_lock);
+		return;
+	}
+
+	/*
+	 * If our priority was lowered and we are at the head of the
+	 * turnstile, then propagate our new priority up the chain.
+	 * Note that we currently don't try to revoke lent priorities
+	 * when our priority goes up.
+	 */
+	if (td == TAILQ_FIRST(&ts->ts_blocked) && td->td_priority < oldpri) {
+		mtx_unlock_spin(&tc->tc_lock);
+		propagate_priority(td);
+	} else
+		mtx_unlock_spin(&tc->tc_lock);
+}
+
+/*
  * Set the owner of the lock this turnstile is attached to.
  */
 static void
@@ -470,7 +521,7 @@
 	 */
 	mtx_lock_spin(&sched_lock);
 	if (td->td_priority < owner->td_priority)
-		owner->td_priority = td->td_priority; 
+		sched_lend_prio(owner, td->td_priority);
 	mtx_unlock_spin(&sched_lock);
 }
 
@@ -578,14 +629,14 @@
 	propagate_priority(td);
 
 	if (LOCK_LOG_TEST(lock, 0))
-		CTR4(KTR_LOCK, "%s: td %p blocked on [%p] %s", __func__, td,
-		    lock, lock->lo_name);

>>> TRUNCATED FOR MAIL (1000 lines) <<<


More information about the p4-projects mailing list