PERFORCE change 98698 for review

Kip Macy kmacy at FreeBSD.org
Tue Jun 6 21:59:46 UTC 2006


http://perforce.freebsd.org/chv.cgi?CH=98698

Change 98698 by kmacy at kmacy_storage:sun4v_work_test on 2006/06/06 21:34:38

	reduce idle thread contention by moving choosethread (largely) out from under 
	sched_lock

Affected files ...

.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_witness.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/runq.h#2 edit
.. //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#3 edit

Differences ...

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#5 (text+ko) ====

@@ -104,9 +104,7 @@
 	p = td->td_proc;
 #ifdef SMP
 	mycpu = PCPU_GET(cpumask);
-	mtx_lock_spin(&sched_lock);
-	idle_cpus_mask |= mycpu;
-	mtx_unlock_spin(&sched_lock);
+	atomic_set_int(&idle_cpus_mask, mycpu);
 #endif
 	for (;;) {
 		mtx_assert(&Giant, MA_NOTOWNED);
@@ -114,15 +112,21 @@
 		while (sched_runnable() == 0)
 			cpu_idle();
 
-		mtx_lock_spin(&sched_lock);
+
 #ifdef SMP
-		idle_cpus_mask &= ~mycpu;
+		atomic_clear_int(&idle_cpus_mask, mycpu);
 #endif
-		if ((td = choosethread()) != curthread)
-			sched_switch(curthread, td, SW_VOL);
+		spinlock_enter(); /* avoid preemption after choosethread */
+		if ((td = choosethread()) != curthread) {
+			mtx_lock_spin(&sched_lock);
+			spinlock_exit();
+ 			sched_switch(curthread, td, SW_VOL);
+			mtx_unlock_spin(&sched_lock);
+		} else
+			spinlock_exit(); 
 #ifdef SMP
-		idle_cpus_mask |= mycpu;
+		atomic_set_int(&idle_cpus_mask, mycpu);
 #endif
-		mtx_unlock_spin(&sched_lock);
+
 	}
 }

==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#5 (text+ko) ====

@@ -47,6 +47,11 @@
 #if defined(SMP) && defined(SCHED_4BSD)
 #include <sys/sysctl.h>
 #endif
+#ifndef SMP
+#error "use SMP!"
+#define runq_lock(a, b)
+#define runq_unlock(a, b)
+#endif
 
 /* Uncomment this to enable logging of critical_enter/exit. */
 #if 0
@@ -330,9 +335,17 @@
 	    rqb->rqb_bits[RQB_WORD(pri)],
 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
 	    RQB_BIT(pri), RQB_WORD(pri));
-	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
+	atomic_clear_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri));
 }
 
+static __inline int
+runq_isset(struct runq *rq, int pri)
+{
+	struct rqbits *rqb;
+
+	rqb = &rq->rq_status;
+	return ((rqb->rqb_bits[RQB_WORD(pri)] & RQB_BIT(pri)) ? 1 : 0);
+}
 /*
  * Find the index of the first non-empty run queue.  This is done by
  * scanning the status bits, a set bit indicates a non-empty queue.
@@ -343,11 +356,30 @@
 	struct rqbits *rqb;
 	int pri;
 	int i;
-
+#ifdef SMP
+	u_long lockbits;
+#endif
 	rqb = &rq->rq_status;
 	for (i = 0; i < RQB_LEN; i++)
 		if (rqb->rqb_bits[i]) {
 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
+#ifdef SMP
+			lockbits = rq->rq_lockbits[i];
+			if (!atomic_cmpset_acq_long(&rq->rq_lockbits[i], 
+						       (lockbits & ~RQB_BIT(pri)),
+						    (lockbits | RQB_BIT(pri)))) 
+			{
+				i = 0;
+				continue;
+			}
+			if (!runq_isset(rq, pri)) {
+				atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)], 
+						      RQB_BIT(pri));
+				i = 0;
+				continue;
+			}
+			runq_clrbit(rq, pri);
+#endif
 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
 			    rqb->rqb_bits[i], i, pri);
 			return (pri);
@@ -370,7 +402,8 @@
 	    rqb->rqb_bits[RQB_WORD(pri)],
 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
 	    RQB_BIT(pri), RQB_WORD(pri));
-	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
+	/* XXX only works on 64-bit - 32 bit will need a mutex */
+	atomic_set_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri));
 }
 
 /*
@@ -385,14 +418,18 @@
 
 	pri = ke->ke_thread->td_priority / RQ_PPQ;
 	ke->ke_rqindex = pri;
-	runq_setbit(rq, pri);
 	rqh = &rq->rq_queues[pri];
 	CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
+	runq_lock(ke->ke_runq, ke);
 	if (flags & SRQ_PREEMPTED)
 		TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
 	else
 		TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
+	runq_unlock(ke->ke_runq, ke);
+#ifndef SMP
+	runq_setbit(rq, pri);
+#endif
 }
 
 /*
@@ -423,6 +460,30 @@
 SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
 #endif
 
+static struct kse *
+runq_check_lastcpu(struct rqhead *rqh, int count)
+{
+	/*
+	 * In the first couple of entries, check if
+	 * there is one for our CPU as a preference.
+	 */
+	int cpu = PCPU_GET(cpuid);
+	struct kse *ke, *ke2;
+	ke2 = ke = TAILQ_FIRST(rqh);
+	
+	while (count-- && ke2) {
+		if (ke->ke_thread->td_lastcpu == cpu) {
+			ke = ke2;
+			break;
+		}
+		ke2 = TAILQ_NEXT(ke2, ke_procq);
+	}
+	KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
+	CTR2(KTR_RUNQ,
+	     "runq_choose: kse=%p rqh=%p", ke, rqh);
+	return (ke);
+}
+
 /*
  * Find the highest priority process on the run queue.
  */
@@ -433,31 +494,21 @@
 	struct kse *ke;
 	int pri;
 
-	mtx_assert(&sched_lock, MA_OWNED);
 	while ((pri = runq_findbit(rq)) != -1) {
 		rqh = &rq->rq_queues[pri];
 #if defined(SMP) && defined(SCHED_4BSD)
 		/* fuzz == 1 is normal.. 0 or less are ignored */
-		if (runq_fuzz > 1) {
-			/*
-			 * In the first couple of entries, check if
-			 * there is one for our CPU as a preference.
-			 */
-			int count = runq_fuzz;
-			int cpu = PCPU_GET(cpuid);
-			struct kse *ke2;
-			ke2 = ke = TAILQ_FIRST(rqh);
-
-			while (count-- && ke2) {
-				if (ke->ke_thread->td_lastcpu == cpu) {
-					ke = ke2;
-					break;
-				}
-				ke2 = TAILQ_NEXT(ke2, ke_procq);
-			}
-		} else
+		if (runq_fuzz > 1) 
+			ke = runq_check_lastcpu(rqh, runq_fuzz);
+		else
 #endif
 			ke = TAILQ_FIRST(rqh);
+		if (ke) {
+			runq_remove_unlocked(rq, ke);
+			runq_unlock(rq, ke);
+		} else
+			panic("bit set but runq empty for bit %d - lockbits=0x%lx availbits=0x%lx", 
+			      pri, rq->rq_lockbits[0], rq->rq_status.rqb_bits[0]);
 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
 		CTR3(KTR_RUNQ,
 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
@@ -465,7 +516,7 @@
 	}
 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
 
-	return (NULL);
+ 	return (NULL);
 }
 
 /*
@@ -473,8 +524,8 @@
  * corresponding status bit if the queue becomes empty.
  * Caller must set ke->ke_state afterwards.
  */
-void
-runq_remove(struct runq *rq, struct kse *ke)
+static __inline void
+_runq_remove(struct runq *rq, struct kse *ke)
 {
 	struct rqhead *rqh;
 	int pri;
@@ -487,12 +538,56 @@
 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
 	TAILQ_REMOVE(rqh, ke, ke_procq);
+#ifndef SMP
 	if (TAILQ_EMPTY(rqh)) {
 		CTR0(KTR_RUNQ, "runq_remove: empty");
 		runq_clrbit(rq, pri);
 	}
+#endif
 }
 
+void
+runq_remove(struct runq *rq, struct kse *ke)
+{
+	runq_lock(rq, ke);
+	_runq_remove(rq, ke);
+	runq_unlock(rq, ke);
+}
+
+void
+runq_remove_unlocked(struct runq *rq, struct kse *ke)
+{
+	_runq_remove(rq, ke);
+}
+
+#ifdef SMP
+void
+runq_lock(struct runq *rq, struct kse *ke)
+{
+	int pri;
+	u_long lockbits;
+
+	pri = ke->ke_rqindex;
+	do {
+		lockbits = (rq->rq_lockbits[RQB_WORD(pri)] & ~RQB_BIT(pri));
+	} while (!atomic_cmpset_acq_long(&rq->rq_lockbits[RQB_WORD(pri)], lockbits,
+					 (lockbits | RQB_BIT(pri)))); 
+	runq_clrbit(rq, pri);
+}
+
+void
+runq_unlock(struct runq *rq, struct kse *ke)
+{
+	struct rqhead *rqh;
+	int pri;
+	
+	pri = ke->ke_rqindex;
+	rqh = &rq->rq_queues[pri];
+	if (!TAILQ_EMPTY(rqh)) 
+		runq_setbit(rq, pri);
+	atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)], RQB_BIT(pri));
+}
+#endif
 /****** functions that are temporarily here ***********/
 #include <vm/uma.h>
 extern struct mtx kse_zombie_lock;

==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#5 (text+ko) ====

@@ -824,7 +824,16 @@
 		if ((newtd->td_proc->p_flag & P_NOLOAD) == 0)
 			sched_load_add();
 	} else {
+#if 0
+		spinlock_enter();
+		mtx_unlock_spin(&sched_lock);
+#endif
 		newtd = choosethread();
+#if 0
+		mtx_lock_spin(&sched_lock);
+		spinlock_exit();
+#endif
+
 	}
 
 	if (td != newtd) {
@@ -1110,14 +1119,15 @@
 struct thread *
 sched_choose(void)
 {
-	struct kse *ke;
+	struct kse *ke, *kesel;
 	struct runq *rq;
+	struct thread *td = NULL;
 
 #ifdef SMP
 	struct kse *kecpu;
 
 	rq = &runq;
-	ke = runq_choose(&runq);
+	kesel = ke = runq_choose(&runq);
 	kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]);
 
 	if (ke == NULL || 
@@ -1125,26 +1135,27 @@
 	     kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) {
 		CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu,
 		     PCPU_GET(cpuid));
-		ke = kecpu;
+		kesel = kecpu;
 		rq = &runq_pcpu[PCPU_GET(cpuid)];
+		if (ke)
+			runq_add(rq, ke, SRQ_PREEMPTED);
 	} else { 
+		if (kecpu)
+			runq_add(rq, kecpu, SRQ_PREEMPTED);
 		CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke);
 	}
-
 #else
 	rq = &runq;
-	ke = runq_choose(&runq);
+	kesel = ke = runq_choose(&runq);
 #endif
+	if (kesel) {
+		kesel->ke_state = KES_THREAD;
 
-	if (ke) {
-		runq_remove(rq, ke);
-		ke->ke_state = KES_THREAD;
-
 		KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM,
 		    ("sched_choose: process swapped out"));
-		return (ke->ke_thread);
+		td = kesel->ke_thread;
 	}
-	return (NULL);
+	return (td);
 }
 
 void

==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_witness.c#5 (text+ko) ====

@@ -400,6 +400,7 @@
 	{ "vm page queue free mutex", &lock_class_mtx_spin },
 	{ "icu", &lock_class_mtx_spin },
 #ifdef SMP
+	{ "runq lock", &lock_class_mtx_spin },
 	{ "smp rendezvous", &lock_class_mtx_spin },
 #if defined(__i386__) || defined(__amd64__)
 	{ "tlb", &lock_class_mtx_spin },

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/runq.h#2 (text+ko) ====

@@ -42,17 +42,19 @@
  * Type of run queue status word.
  */
 typedef	u_int64_t	rqb_word_t;
+static int ffslut64[] = {
+	64,  1, 48,  2, 57, 49, 28,  3,
+	61, 58, 50, 42, 38, 29, 17,  4,
+	62, 55, 59, 36, 53, 51, 43, 22,
+	45, 39, 33, 30, 24, 18, 12,  5,
+	63, 47, 56, 27, 60, 41, 37, 16,
+	54, 35, 52, 21, 44, 32, 23, 11,
+	46, 26, 40, 15, 34, 20, 31, 10,
+	25, 14, 19,  9, 13,  8,  7,  6
+};
 
-static __inline u_long
-ffs64(u_long mask)
+static inline u_long ffs64(uint64_t mask)
 {
-	u_long bit;
-
-	if (mask == 0)
-		return (0);
-	for (bit = 1; (mask & 1UL) == 0; bit++)
-		mask >>= 1UL;
-	return (bit);
+	return mask ? ffslut64[((mask & (-mask)) * 0x07EF3AE369961512) >> 58] : 0;
 }
-
 #endif

==== //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#3 (text+ko) ====

@@ -59,6 +59,7 @@
  */
 struct runq {
 	struct	rqbits rq_status;
+	rqb_word_t     rq_lockbits[RQB_LEN];
 	struct	rqhead rq_queues[RQ_NQS];
 };
 
@@ -67,5 +68,8 @@
 struct	kse *runq_choose(struct runq *);
 void	runq_init(struct runq *);
 void	runq_remove(struct runq *, struct kse *);
+void	runq_remove_unlocked(struct runq *, struct kse *);
+void	runq_lock(struct runq *, struct kse *);
+void	runq_unlock(struct runq *, struct kse *);
 
 #endif


More information about the p4-projects mailing list