PERFORCE change 98698 for review
Kip Macy
kmacy at FreeBSD.org
Tue Jun 6 21:59:46 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=98698
Change 98698 by kmacy at kmacy_storage:sun4v_work_test on 2006/06/06 21:34:38
reduce idle thread contention by moving choosethread (largely) out from under
sched_lock
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/subr_witness.c#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/runq.h#2 edit
.. //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#3 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#5 (text+ko) ====
@@ -104,9 +104,7 @@
p = td->td_proc;
#ifdef SMP
mycpu = PCPU_GET(cpumask);
- mtx_lock_spin(&sched_lock);
- idle_cpus_mask |= mycpu;
- mtx_unlock_spin(&sched_lock);
+ atomic_set_int(&idle_cpus_mask, mycpu);
#endif
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
@@ -114,15 +112,21 @@
while (sched_runnable() == 0)
cpu_idle();
- mtx_lock_spin(&sched_lock);
+
#ifdef SMP
- idle_cpus_mask &= ~mycpu;
+ atomic_clear_int(&idle_cpus_mask, mycpu);
#endif
- if ((td = choosethread()) != curthread)
- sched_switch(curthread, td, SW_VOL);
+ spinlock_enter(); /* avoid preemption after choosethread */
+ if ((td = choosethread()) != curthread) {
+ mtx_lock_spin(&sched_lock);
+ spinlock_exit();
+ sched_switch(curthread, td, SW_VOL);
+ mtx_unlock_spin(&sched_lock);
+ } else
+ spinlock_exit();
#ifdef SMP
- idle_cpus_mask |= mycpu;
+ atomic_set_int(&idle_cpus_mask, mycpu);
#endif
- mtx_unlock_spin(&sched_lock);
+
}
}
==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#5 (text+ko) ====
@@ -47,6 +47,11 @@
#if defined(SMP) && defined(SCHED_4BSD)
#include <sys/sysctl.h>
#endif
+#ifndef SMP
+#error "use SMP!"
+#define runq_lock(a, b)
+#define runq_unlock(a, b)
+#endif
/* Uncomment this to enable logging of critical_enter/exit. */
#if 0
@@ -330,9 +335,17 @@
rqb->rqb_bits[RQB_WORD(pri)],
rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
RQB_BIT(pri), RQB_WORD(pri));
- rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
+ atomic_clear_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri));
}
+static __inline int
+runq_isset(struct runq *rq, int pri)
+{
+ struct rqbits *rqb;
+
+ rqb = &rq->rq_status;
+ return ((rqb->rqb_bits[RQB_WORD(pri)] & RQB_BIT(pri)) ? 1 : 0);
+}
/*
* Find the index of the first non-empty run queue. This is done by
* scanning the status bits, a set bit indicates a non-empty queue.
@@ -343,11 +356,30 @@
struct rqbits *rqb;
int pri;
int i;
-
+#ifdef SMP
+ u_long lockbits;
+#endif
rqb = &rq->rq_status;
for (i = 0; i < RQB_LEN; i++)
if (rqb->rqb_bits[i]) {
pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
+#ifdef SMP
+ lockbits = rq->rq_lockbits[i];
+ if (!atomic_cmpset_acq_long(&rq->rq_lockbits[i],
+ (lockbits & ~RQB_BIT(pri)),
+ (lockbits | RQB_BIT(pri))))
+ {
+ i = 0;
+ continue;
+ }
+ if (!runq_isset(rq, pri)) {
+ atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)],
+ RQB_BIT(pri));
+ i = 0;
+ continue;
+ }
+ runq_clrbit(rq, pri);
+#endif
CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
rqb->rqb_bits[i], i, pri);
return (pri);
@@ -370,7 +402,8 @@
rqb->rqb_bits[RQB_WORD(pri)],
rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
RQB_BIT(pri), RQB_WORD(pri));
- rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
+ /* XXX only works on 64-bit - 32 bit will need a mutex */
+ atomic_set_long(&rqb->rqb_bits[RQB_WORD(pri)], RQB_BIT(pri));
}
/*
@@ -385,14 +418,18 @@
pri = ke->ke_thread->td_priority / RQ_PPQ;
ke->ke_rqindex = pri;
- runq_setbit(rq, pri);
rqh = &rq->rq_queues[pri];
CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
+ runq_lock(ke->ke_runq, ke);
if (flags & SRQ_PREEMPTED)
TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
else
TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
+ runq_unlock(ke->ke_runq, ke);
+#ifndef SMP
+ runq_setbit(rq, pri);
+#endif
}
/*
@@ -423,6 +460,30 @@
SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
#endif
+static struct kse *
+runq_check_lastcpu(struct rqhead *rqh, int count)
+{
+ /*
+ * In the first couple of entries, check if
+ * there is one for our CPU as a preference.
+ */
+ int cpu = PCPU_GET(cpuid);
+ struct kse *ke, *ke2;
+ ke2 = ke = TAILQ_FIRST(rqh);
+
+ while (count-- && ke2) {
+ if (ke->ke_thread->td_lastcpu == cpu) {
+ ke = ke2;
+ break;
+ }
+ ke2 = TAILQ_NEXT(ke2, ke_procq);
+ }
+ KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
+ CTR2(KTR_RUNQ,
+ "runq_choose: kse=%p rqh=%p", ke, rqh);
+ return (ke);
+}
+
/*
* Find the highest priority process on the run queue.
*/
@@ -433,31 +494,21 @@
struct kse *ke;
int pri;
- mtx_assert(&sched_lock, MA_OWNED);
while ((pri = runq_findbit(rq)) != -1) {
rqh = &rq->rq_queues[pri];
#if defined(SMP) && defined(SCHED_4BSD)
/* fuzz == 1 is normal.. 0 or less are ignored */
- if (runq_fuzz > 1) {
- /*
- * In the first couple of entries, check if
- * there is one for our CPU as a preference.
- */
- int count = runq_fuzz;
- int cpu = PCPU_GET(cpuid);
- struct kse *ke2;
- ke2 = ke = TAILQ_FIRST(rqh);
-
- while (count-- && ke2) {
- if (ke->ke_thread->td_lastcpu == cpu) {
- ke = ke2;
- break;
- }
- ke2 = TAILQ_NEXT(ke2, ke_procq);
- }
- } else
+ if (runq_fuzz > 1)
+ ke = runq_check_lastcpu(rqh, runq_fuzz);
+ else
#endif
ke = TAILQ_FIRST(rqh);
+ if (ke) {
+ runq_remove_unlocked(rq, ke);
+ runq_unlock(rq, ke);
+ } else
+ panic("bit set but runq empty for bit %d - lockbits=0x%lx availbits=0x%lx",
+ pri, rq->rq_lockbits[0], rq->rq_status.rqb_bits[0]);
KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
CTR3(KTR_RUNQ,
"runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
@@ -465,7 +516,7 @@
}
CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
- return (NULL);
+ return (NULL);
}
/*
@@ -473,8 +524,8 @@
* corresponding status bit if the queue becomes empty.
* Caller must set ke->ke_state afterwards.
*/
-void
-runq_remove(struct runq *rq, struct kse *ke)
+static __inline void
+_runq_remove(struct runq *rq, struct kse *ke)
{
struct rqhead *rqh;
int pri;
@@ -487,12 +538,56 @@
ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
TAILQ_REMOVE(rqh, ke, ke_procq);
+#ifndef SMP
if (TAILQ_EMPTY(rqh)) {
CTR0(KTR_RUNQ, "runq_remove: empty");
runq_clrbit(rq, pri);
}
+#endif
}
+void
+runq_remove(struct runq *rq, struct kse *ke)
+{
+ runq_lock(rq, ke);
+ _runq_remove(rq, ke);
+ runq_unlock(rq, ke);
+}
+
+void
+runq_remove_unlocked(struct runq *rq, struct kse *ke)
+{
+ _runq_remove(rq, ke);
+}
+
+#ifdef SMP
+void
+runq_lock(struct runq *rq, struct kse *ke)
+{
+ int pri;
+ u_long lockbits;
+
+ pri = ke->ke_rqindex;
+ do {
+ lockbits = (rq->rq_lockbits[RQB_WORD(pri)] & ~RQB_BIT(pri));
+ } while (!atomic_cmpset_acq_long(&rq->rq_lockbits[RQB_WORD(pri)], lockbits,
+ (lockbits | RQB_BIT(pri))));
+ runq_clrbit(rq, pri);
+}
+
+void
+runq_unlock(struct runq *rq, struct kse *ke)
+{
+ struct rqhead *rqh;
+ int pri;
+
+ pri = ke->ke_rqindex;
+ rqh = &rq->rq_queues[pri];
+ if (!TAILQ_EMPTY(rqh))
+ runq_setbit(rq, pri);
+ atomic_clear_rel_long(&rq->rq_lockbits[RQB_WORD(pri)], RQB_BIT(pri));
+}
+#endif
/****** functions that are temporarily here ***********/
#include <vm/uma.h>
extern struct mtx kse_zombie_lock;
==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#5 (text+ko) ====
@@ -824,7 +824,16 @@
if ((newtd->td_proc->p_flag & P_NOLOAD) == 0)
sched_load_add();
} else {
+#if 0
+ spinlock_enter();
+ mtx_unlock_spin(&sched_lock);
+#endif
newtd = choosethread();
+#if 0
+ mtx_lock_spin(&sched_lock);
+ spinlock_exit();
+#endif
+
}
if (td != newtd) {
@@ -1110,14 +1119,15 @@
struct thread *
sched_choose(void)
{
- struct kse *ke;
+ struct kse *ke, *kesel;
struct runq *rq;
+ struct thread *td = NULL;
#ifdef SMP
struct kse *kecpu;
rq = &runq;
- ke = runq_choose(&runq);
+ kesel = ke = runq_choose(&runq);
kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]);
if (ke == NULL ||
@@ -1125,26 +1135,27 @@
kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) {
CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu,
PCPU_GET(cpuid));
- ke = kecpu;
+ kesel = kecpu;
rq = &runq_pcpu[PCPU_GET(cpuid)];
+ if (ke)
+ runq_add(rq, ke, SRQ_PREEMPTED);
} else {
+ if (kecpu)
+ runq_add(rq, kecpu, SRQ_PREEMPTED);
CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke);
}
-
#else
rq = &runq;
- ke = runq_choose(&runq);
+ kesel = ke = runq_choose(&runq);
#endif
+ if (kesel) {
+ kesel->ke_state = KES_THREAD;
- if (ke) {
- runq_remove(rq, ke);
- ke->ke_state = KES_THREAD;
-
KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM,
("sched_choose: process swapped out"));
- return (ke->ke_thread);
+ td = kesel->ke_thread;
}
- return (NULL);
+ return (td);
}
void
==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_witness.c#5 (text+ko) ====
@@ -400,6 +400,7 @@
{ "vm page queue free mutex", &lock_class_mtx_spin },
{ "icu", &lock_class_mtx_spin },
#ifdef SMP
+ { "runq lock", &lock_class_mtx_spin },
{ "smp rendezvous", &lock_class_mtx_spin },
#if defined(__i386__) || defined(__amd64__)
{ "tlb", &lock_class_mtx_spin },
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/runq.h#2 (text+ko) ====
@@ -42,17 +42,19 @@
* Type of run queue status word.
*/
typedef u_int64_t rqb_word_t;
+static int ffslut64[] = {
+ 64, 1, 48, 2, 57, 49, 28, 3,
+ 61, 58, 50, 42, 38, 29, 17, 4,
+ 62, 55, 59, 36, 53, 51, 43, 22,
+ 45, 39, 33, 30, 24, 18, 12, 5,
+ 63, 47, 56, 27, 60, 41, 37, 16,
+ 54, 35, 52, 21, 44, 32, 23, 11,
+ 46, 26, 40, 15, 34, 20, 31, 10,
+ 25, 14, 19, 9, 13, 8, 7, 6
+};
-static __inline u_long
-ffs64(u_long mask)
+static inline u_long ffs64(uint64_t mask)
{
- u_long bit;
-
- if (mask == 0)
- return (0);
- for (bit = 1; (mask & 1UL) == 0; bit++)
- mask >>= 1UL;
- return (bit);
+ return mask ? ffslut64[((mask & (-mask)) * 0x07EF3AE369961512) >> 58] : 0;
}
-
#endif
==== //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#3 (text+ko) ====
@@ -59,6 +59,7 @@
*/
struct runq {
struct rqbits rq_status;
+ rqb_word_t rq_lockbits[RQB_LEN];
struct rqhead rq_queues[RQ_NQS];
};
@@ -67,5 +68,8 @@
struct kse *runq_choose(struct runq *);
void runq_init(struct runq *);
void runq_remove(struct runq *, struct kse *);
+void runq_remove_unlocked(struct runq *, struct kse *);
+void runq_lock(struct runq *, struct kse *);
+void runq_unlock(struct runq *, struct kse *);
#endif
More information about the p4-projects
mailing list