PERFORCE change 103359 for review
John Birrell
jb at FreeBSD.org
Sun Aug 6 20:33:33 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=103359
Change 103359 by jb at jb_freebsd2 on 2006/08/06 20:32:38
Merge in KSE again, but only if the KSE kernel option is defined.
Affected files ...
.. //depot/projects/dtrace/src/sys/i386/i386/machdep.c#7 edit
.. //depot/projects/dtrace/src/sys/kern/init_main.c#4 edit
.. //depot/projects/dtrace/src/sys/kern/kern_resource.c#4 edit
.. //depot/projects/dtrace/src/sys/kern/kern_switch.c#7 edit
.. //depot/projects/dtrace/src/sys/kern/kern_thr.c#8 edit
.. //depot/projects/dtrace/src/sys/kern/kern_thread.c#4 edit
.. //depot/projects/dtrace/src/sys/kern/sched_4bsd.c#11 edit
.. //depot/projects/dtrace/src/sys/kern/tty.c#3 edit
.. //depot/projects/dtrace/src/sys/posix4/ksched.c#7 edit
Differences ...
==== //depot/projects/dtrace/src/sys/i386/i386/machdep.c#7 (text+ko) ====
@@ -2079,7 +2079,11 @@
* This may be done better later if it gets more high level
* components in it. If so just link td->td_proc here.
*/
+#ifdef KSE
+ proc_linkup(&proc0, &ksegrp0, &thread0);
+#else
proc_linkup(&proc0, &thread0);
+#endif
metadata_missing = 0;
if (bootinfo.bi_modulep) {
==== //depot/projects/dtrace/src/sys/kern/init_main.c#4 (text+ko) ====
@@ -95,6 +95,9 @@
static struct pgrp pgrp0;
struct proc proc0;
struct thread thread0 __aligned(8);
+#ifdef KSE
+struct ksegrp ksegrp0;
+#endif
struct vmspace vmspace0;
struct proc *initproc;
@@ -221,9 +224,6 @@
if ((*sipp)->subsystem == SI_SUB_DONE)
continue;
-#if 0
- printf("\t*%p(%p)\n", (*sipp)->func, (*sipp)->udata);
-#endif
#if defined(VERBOSE_SYSINIT)
if ((*sipp)->subsystem > last) {
verbose = 1;
@@ -274,10 +274,6 @@
/* Call function */
(*((*sipp)->func))((*sipp)->udata);
-#if 0
- printf("mi_startup: pstate=0x%lx pil=0x%lx\n",
- rdpr(pstate), rdpr(pil));
-#endif
#if defined(VERBOSE_SYSINIT)
if (verbose)
@@ -392,17 +388,35 @@
struct proc *p;
unsigned i;
struct thread *td;
+#ifdef KSE
+ struct ksegrp *kg;
+#endif
GIANT_REQUIRED;
p = &proc0;
td = &thread0;
+#ifdef KSE
+ kg = &ksegrp0;
+#endif
/*
* Initialize magic number.
*/
p->p_magic = P_MAGIC;
+#ifdef KSE
/*
+ * Initialize thread, process and ksegrp structures.
+ */
+ procinit(); /* set up proc zone */
+ threadinit(); /* set up thead, upcall and KSEGRP zones */
+
+ /*
+ * Initialise scheduler resources.
+ * Add scheduler specific parts to proc, ksegrp, thread as needed.
+ */
+#else
+ /*
* Initialize thread and process structures.
*/
procinit(); /* set up proc zone */
@@ -412,6 +426,7 @@
* Initialise scheduler resources.
* Add scheduler specific parts to proc, thread as needed.
*/
+#endif
schedinit(); /* scheduler gets its house in order */
/*
* Initialize sleep queue hash table
@@ -447,8 +462,13 @@
STAILQ_INIT(&p->p_ktr);
p->p_nice = NZERO;
td->td_state = TDS_RUNNING;
+#ifdef KSE
+ kg->kg_pri_class = PRI_TIMESHARE;
+ kg->kg_user_pri = PUSER;
+#else
td->td_pri_class = PRI_TIMESHARE;
td->td_user_pri = PUSER;
+#endif
td->td_priority = PVM;
td->td_base_pri = PUSER;
td->td_oncpu = 0;
@@ -611,7 +631,7 @@
p = td->td_proc;
vfs_mountroot();
-
+
/*
* Need just enough stack to hold the faked-up "execve()" arguments.
*/
@@ -687,7 +707,6 @@
args.fname = arg0;
args.argv = uap;
args.envv = NULL;
-
/*
* Now try to exec the program. If can't for any reason
==== //depot/projects/dtrace/src/sys/kern/kern_resource.c#4 (text+ko) ====
@@ -292,7 +292,11 @@
{
struct proc *curp;
struct proc *p;
+#ifdef KSE
+ struct ksegrp *kg;
+#else
struct thread *tdp;
+#endif
struct rtprio rtp;
int cierror, error;
@@ -328,14 +332,23 @@
* as leaving it zero.
*/
if (uap->pid == 0) {
+#ifdef KSE
+ pri_to_rtp(td->td_ksegrp, &rtp);
+#else
pri_to_rtp(td, &rtp);
+#endif
} else {
struct rtprio rtp2;
rtp.type = RTP_PRIO_IDLE;
rtp.prio = RTP_PRIO_MAX;
+#ifdef KSE
+ FOREACH_KSEGRP_IN_PROC(p, kg) {
+ pri_to_rtp(kg, &rtp2);
+#else
FOREACH_THREAD_IN_PROC(p, tdp) {
pri_to_rtp(tdp, &rtp2);
+#endif
if (rtp2.type < rtp.type ||
(rtp2.type == rtp.type &&
rtp2.prio < rtp.prio)) {
@@ -376,19 +389,39 @@
}
}
+#ifdef KSE
+ /*
+ * If we are setting our own priority, set just our
+ * KSEGRP but if we are doing another process,
+ * do all the groups on that process. If we
+ * specify our own pid we do the latter.
+ */
+#else
/*
* If we are setting our own priority, set just our
* thread but if we are doing another process,
* do all the threads on that process. If we
* specify our own pid we do the latter.
*/
+#endif
mtx_lock_spin(&sched_lock);
if (uap->pid == 0) {
+#ifdef KSE
+ error = rtp_to_pri(&rtp, td->td_ksegrp);
+#else
error = rtp_to_pri(&rtp, td);
+#endif
} else {
+#ifdef KSE
+ FOREACH_KSEGRP_IN_PROC(p, kg) {
+ if ((error = rtp_to_pri(&rtp, kg)) != 0) {
+ break;
+ }
+#else
FOREACH_THREAD_IN_PROC(p, td) {
if ((error = rtp_to_pri(&rtp, td)) != 0)
break;
+#endif
}
}
mtx_unlock_spin(&sched_lock);
@@ -402,7 +435,11 @@
}
int
+#ifdef KSE
+rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
+#else
rtp_to_pri(struct rtprio *rtp, struct thread *td)
+#endif
{
mtx_assert(&sched_lock, MA_OWNED);
@@ -410,42 +447,85 @@
return (EINVAL);
switch (RTP_PRIO_BASE(rtp->type)) {
case RTP_PRIO_REALTIME:
+#ifdef KSE
+ kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
+#else
td->td_user_pri = PRI_MIN_REALTIME + rtp->prio;
+#endif
break;
case RTP_PRIO_NORMAL:
+#ifdef KSE
+ kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
+#else
td->td_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
+#endif
break;
case RTP_PRIO_IDLE:
+#ifdef KSE
+ kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
+#else
td->td_user_pri = PRI_MIN_IDLE + rtp->prio;
+#endif
break;
default:
return (EINVAL);
}
+#ifdef KSE
+ sched_class(kg, rtp->type);
+ if (curthread->td_ksegrp == kg) {
+ sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */
+ }
+#else
sched_class(td, rtp->type); /* XXX fix */
if (curthread == td)
sched_prio(curthread, td->td_user_pri); /* XXX dubious */
+#endif
return (0);
}
void
+#ifdef KSE
+pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
+#else
pri_to_rtp(struct thread *td, struct rtprio *rtp)
+#endif
{
mtx_assert(&sched_lock, MA_OWNED);
+#ifdef KSE
+ switch (PRI_BASE(kg->kg_pri_class)) {
+#else
switch (PRI_BASE(td->td_pri_class)) {
+#endif
case PRI_REALTIME:
+#ifdef KSE
+ rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
+#else
rtp->prio = td->td_user_pri - PRI_MIN_REALTIME;
+#endif
break;
case PRI_TIMESHARE:
+#ifdef KSE
+ rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
+#else
rtp->prio = td->td_user_pri - PRI_MIN_TIMESHARE;
+#endif
break;
case PRI_IDLE:
+#ifdef KSE
+ rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
+#else
rtp->prio = td->td_user_pri - PRI_MIN_IDLE;
+#endif
break;
default:
break;
}
+#ifdef KSE
+ rtp->type = kg->kg_pri_class;
+#else
rtp->type = td->td_pri_class;
+#endif
}
#if defined(COMPAT_43)
==== //depot/projects/dtrace/src/sys/kern/kern_switch.c#7 (text+ko) ====
@@ -24,6 +24,69 @@
* SUCH DAMAGE.
*/
+#ifdef KSE
+/***
+Here is the logic..
+
+If there are N processors, then there are at most N KSEs (kernel
+schedulable entities) working to process threads that belong to a
+KSEGROUP (kg). If there are X of these KSEs actually running at the
+moment in question, then there are at most M (N-X) of these KSEs on
+the run queue, as running KSEs are not on the queue.
+
+Runnable threads are queued off the KSEGROUP in priority order.
+If there are M or more threads runnable, the top M threads
+(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
+their priority from those threads and are put on the run queue.
+
+The last thread that had a priority high enough to have a KSE associated
+with it, AND IS ON THE RUN QUEUE is pointed to by
+kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
+assigned as all the available KSEs are activly running, or because there
+are no threads queued, that pointer is NULL.
+
+When a KSE is removed from the run queue to become runnable, we know
+it was associated with the highest priority thread in the queue (at the head
+of the queue). If it is also the last assigned we know M was 1 and must
+now be 0. Since the thread is no longer queued that pointer must be
+removed from it. Since we know there were no more KSEs available,
+(M was 1 and is now 0) and since we are not FREEING our KSE
+but using it, we know there are STILL no more KSEs available, we can prove
+that the next thread in the ksegrp list will not have a KSE to assign to
+it, so we can show that the pointer must be made 'invalid' (NULL).
+
+The pointer exists so that when a new thread is made runnable, it can
+have its priority compared with the last assigned thread to see if
+it should 'steal' its KSE or not.. i.e. is it 'earlier'
+on the list than that thread or later.. If it's earlier, then the KSE is
+removed from the last assigned (which is now not assigned a KSE)
+and reassigned to the new thread, which is placed earlier in the list.
+The pointer is then backed up to the previous thread (which may or may not
+be the new thread).
+
+When a thread sleeps or is removed, the KSE becomes available and if there
+are queued threads that are not assigned KSEs, the highest priority one of
+them is assigned the KSE, which is then placed back on the run queue at
+the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
+to point to it.
+
+The following diagram shows 2 KSEs and 3 threads from a single process.
+
+ RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads)
+ \ \____
+ \ \
+ KSEGROUP---thread--thread--thread (queued in priority order)
+ \ /
+ \_______________/
+ (last_assigned)
+
+The result of this scheme is that the M available KSEs are always
+queued at the priorities they have inherrited from the M highest priority
+threads for that KSEGROUP. If this situation changes, the KSEs are
+reassigned to keep this true.
+***/
+#endif
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/sys/kern/kern_switch.c,v 1.122 2006/06/13 13:12:56 davidxu Exp $");
@@ -48,7 +111,6 @@
#include <sys/sysctl.h>
#endif
-
/* Uncomment this to enable logging of critical_enter/exit. */
#if 0
#define KTR_CRITICAL KTR_SCHED
@@ -82,36 +144,79 @@
/************************************************************************
* Functions that manipulate runnability from a thread perspective. *
************************************************************************/
+#ifdef KSE
/*
+ * Select the KSE that will be run next. From that find the thread, and
+ * remove it from the KSEGRP's run queue. If there is thread clustering,
+ * this will be what does it.
+ */
+#else
+/*
* Select the thread that will be run next.
*/
+#endif
struct thread *
choosethread(void)
{
+#ifdef KSE
+ struct kse *ke;
+#endif
struct thread *td;
+#ifdef KSE
+ struct ksegrp *kg;
+#endif
#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
/* Shutting down, run idlethread on AP's */
td = PCPU_GET(idlethread);
+#ifdef KSE
+ ke = td->td_kse;
+#endif
CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+#ifdef KSE
+ ke->ke_flags |= KEF_DIDRUN;
+#else
td->td_kse->ke_flags |= KEF_DIDRUN;
+#endif
TD_SET_RUNNING(td);
return (td);
}
#endif
retry:
+#ifdef KSE
+ ke = sched_choose();
+ if (ke) {
+ td = ke->ke_thread;
+ KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
+ kg = ke->ke_ksegrp;
+ if (td->td_proc->p_flag & P_HADTHREADS) {
+ if (kg->kg_last_assigned == td) {
+ kg->kg_last_assigned = TAILQ_PREV(td,
+ threadqueue, td_runq);
+ }
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ }
+#else
td = sched_choose();
if (td) {
+#endif
CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
td, td->td_priority);
} else {
/* Simulate runq_choose() having returned the idle thread */
td = PCPU_GET(idlethread);
+#ifdef KSE
+ ke = td->td_kse;
+#endif
CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
}
+#ifdef KSE
+ ke->ke_flags |= KEF_DIDRUN;
+#else
td->td_kse->ke_flags |= KEF_DIDRUN;
+#endif
/*
* If we are in panic, only allow system threads,
@@ -128,12 +233,105 @@
return (td);
}
+#ifdef KSE
+/*
+ * Given a surplus system slot, try assign a new runnable thread to it.
+ * Called from:
+ * sched_thread_exit() (local)
+ * sched_switch() (local)
+ * sched_thread_exit() (local)
+ * remrunqueue() (local) (not at the moment)
+ */
+static void
+slot_fill(struct ksegrp *kg)
+{
+ struct thread *td;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ while (kg->kg_avail_opennings > 0) {
+ /*
+ * Find the first unassigned thread
+ */
+ if ((td = kg->kg_last_assigned) != NULL)
+ td = TAILQ_NEXT(td, td_runq);
+ else
+ td = TAILQ_FIRST(&kg->kg_runq);
+
+ /*
+ * If we found one, send it to the system scheduler.
+ */
+ if (td) {
+ kg->kg_last_assigned = td;
+ sched_add(td, SRQ_YIELDING);
+ CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg);
+ } else {
+ /* no threads to use up the slots. quit now */
+ break;
+ }
+ }
+}
+
+#ifdef SCHED_4BSD
+/*
+ * Remove a thread from its KSEGRP's run queue.
+ * This in turn may remove it from a KSE if it was already assigned
+ * to one, possibly causing a new thread to be assigned to the KSE
+ * and the KSE getting a new priority.
+ */
+static void
+remrunqueue(struct thread *td)
+{
+ struct thread *td2, *td3;
+ struct ksegrp *kg;
+ struct kse *ke;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
+ kg = td->td_ksegrp;
+ ke = td->td_kse;
+ CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
+ TD_SET_CAN_RUN(td);
+ /*
+ * If it is not a threaded process, take the shortcut.
+ */
+ if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
+ /* remve from sys run queue and free up a slot */
+ sched_rem(td);
+ return;
+ }
+ td3 = TAILQ_PREV(td, threadqueue, td_runq);
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ if (ke->ke_state == KES_ONRUNQ) {
+ /*
+ * This thread has been assigned to the system run queue.
+ * We need to dissociate it and try assign the
+ * KSE to the next available thread. Then, we should
+ * see if we need to move the KSE in the run queues.
+ */
+ sched_rem(td);
+ td2 = kg->kg_last_assigned;
+ KASSERT((td2 != NULL), ("last assigned has wrong value"));
+ if (td2 == td)
+ kg->kg_last_assigned = td3;
+ /* slot_fill(kg); */ /* will replace it with another */
+ }
+}
+#endif
+#endif
+
/*
* Change the priority of a thread that is on the run queue.
*/
void
+#ifdef KSE
+adjustrunqueue( struct thread *td, int newpri)
+#else
adjustrunqueue(struct thread *td, int newpri)
+#endif
{
+#ifdef KSE
+ struct ksegrp *kg;
+#endif
struct kse *ke;
mtx_assert(&sched_lock, MA_OWNED);
@@ -141,6 +339,44 @@
ke = td->td_kse;
CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
+#ifdef KSE
+ /*
+ * If it is not a threaded process, take the shortcut.
+ */
+ if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
+ /* We only care about the kse in the run queue. */
+ td->td_priority = newpri;
+#ifndef SCHED_CORE
+ if (ke->ke_rqindex != (newpri / RQ_PPQ))
+#else
+ if (ke->ke_rqindex != newpri)
+#endif
+ {
+ sched_rem(td);
+ sched_add(td, SRQ_BORING);
+ }
+ return;
+ }
+
+ /* It is a threaded process */
+ kg = td->td_ksegrp;
+ if (ke->ke_state == KES_ONRUNQ
+#ifdef SCHED_ULE
+ || ((ke->ke_flags & KEF_ASSIGNED) != 0 &&
+ (ke->ke_flags & KEF_REMOVED) == 0)
+#endif
+ ) {
+ if (kg->kg_last_assigned == td) {
+ kg->kg_last_assigned =
+ TAILQ_PREV(td, threadqueue, td_runq);
+ }
+ sched_rem(td);
+ }
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ TD_SET_CAN_RUN(td);
+ td->td_priority = newpri;
+ setrunqueue(td, SRQ_BORING);
+#else
/* We only care about the kse in the run queue. */
td->td_priority = newpri;
#ifndef SCHED_CORE
@@ -152,14 +388,170 @@
sched_rem(td);
sched_add(td, SRQ_BORING);
}
+#endif
+}
+
+#ifdef KSE
+/*
+ * This function is called when a thread is about to be put on a
+ * ksegrp run queue because it has been made runnable or its
+ * priority has been adjusted and the ksegrp does not have a
+ * free kse slot. It determines if a thread from the same ksegrp
+ * should be preempted. If so, it tries to switch threads
+ * if the thread is on the same cpu or notifies another cpu that
+ * it should switch threads.
+ */
+
+static void
+maybe_preempt_in_ksegrp(struct thread *td)
+#if !defined(SMP)
+{
+ struct thread *running_thread;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ running_thread = curthread;
+
+ if (running_thread->td_ksegrp != td->td_ksegrp)
+ return;
+
+ if (td->td_priority >= running_thread->td_priority)
+ return;
+#ifdef PREEMPTION
+#ifndef FULL_PREEMPTION
+ if (td->td_priority > PRI_MAX_ITHD) {
+ running_thread->td_flags |= TDF_NEEDRESCHED;
+ return;
+ }
+#endif /* FULL_PREEMPTION */
+
+ if (running_thread->td_critnest > 1)
+ running_thread->td_owepreempt = 1;
+ else
+ mi_switch(SW_INVOL, NULL);
+
+#else /* PREEMPTION */
+ running_thread->td_flags |= TDF_NEEDRESCHED;
+#endif /* PREEMPTION */
+ return;
+}
+
+#else /* SMP */
+{
+ struct thread *running_thread;
+ int worst_pri;
+ struct ksegrp *kg;
+ cpumask_t cpumask,dontuse;
+ struct pcpu *pc;
+ struct pcpu *best_pcpu;
+ struct thread *cputhread;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+
+ running_thread = curthread;
+
+#if !defined(KSEG_PEEMPT_BEST_CPU)
+ if (running_thread->td_ksegrp != td->td_ksegrp) {
+#endif
+ kg = td->td_ksegrp;
+
+ /* if someone is ahead of this thread, wait our turn */
+ if (td != TAILQ_FIRST(&kg->kg_runq))
+ return;
+
+ worst_pri = td->td_priority;
+ best_pcpu = NULL;
+ dontuse = stopped_cpus | idle_cpus_mask;
+
+ /*
+ * Find a cpu with the worst priority that runs at thread from
+ * the same ksegrp - if multiple exist give first the last run
+ * cpu and then the current cpu priority
+ */
+
+ SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
+ cpumask = pc->pc_cpumask;
+ cputhread = pc->pc_curthread;
+
+ if ((cpumask & dontuse) ||
+ cputhread->td_ksegrp != kg)
+ continue;
+
+ if (cputhread->td_priority > worst_pri) {
+ worst_pri = cputhread->td_priority;
+ best_pcpu = pc;
+ continue;
+ }
+
+ if (cputhread->td_priority == worst_pri &&
+ best_pcpu != NULL &&
+ (td->td_lastcpu == pc->pc_cpuid ||
+ (PCPU_GET(cpumask) == cpumask &&
+ td->td_lastcpu != best_pcpu->pc_cpuid)))
+ best_pcpu = pc;
+ }
+
+ /* Check if we need to preempt someone */
+ if (best_pcpu == NULL)
+ return;
+
+#if defined(IPI_PREEMPTION) && defined(PREEMPTION)
+#if !defined(FULL_PREEMPTION)
+ if (td->td_priority <= PRI_MAX_ITHD)
+#endif /* ! FULL_PREEMPTION */
+ {
+ ipi_selected(best_pcpu->pc_cpumask, IPI_PREEMPT);
+ return;
+ }
+#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
+
+ if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) {
+ best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
+ ipi_selected(best_pcpu->pc_cpumask, IPI_AST);
+ return;
+ }
+#if !defined(KSEG_PEEMPT_BEST_CPU)
+ }
+#endif
+
+ if (td->td_priority >= running_thread->td_priority)
+ return;
+#ifdef PREEMPTION
+
+#if !defined(FULL_PREEMPTION)
+ if (td->td_priority > PRI_MAX_ITHD) {
+ running_thread->td_flags |= TDF_NEEDRESCHED;
+ }
+#endif /* ! FULL_PREEMPTION */
+
+ if (running_thread->td_critnest > 1)
+ running_thread->td_owepreempt = 1;
+ else
+ mi_switch(SW_INVOL, NULL);
+
+#else /* PREEMPTION */
+ running_thread->td_flags |= TDF_NEEDRESCHED;
+#endif /* PREEMPTION */
+ return;
}
+#endif /* !SMP */
+
+int limitcount;
+#endif
void
setrunqueue(struct thread *td, int flags)
{
+#ifdef KSE
+ struct ksegrp *kg;
+ struct thread *td2;
+ struct thread *tda;
+ CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d",
+ td, td->td_ksegrp, td->td_proc->p_pid);
+#else
CTR2(KTR_RUNQ, "setrunqueue: td:%p pid:%d",
td, td->td_proc->p_pid);
+#endif
CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)",
td, td->td_proc->p_comm, td->td_priority, curthread,
curthread->td_proc->p_comm);
@@ -169,7 +561,101 @@
KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
("setrunqueue: bad thread state"));
TD_SET_RUNQ(td);
+#ifdef KSE
+ kg = td->td_ksegrp;
+ if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
+ /*
+ * Common path optimisation: Only one of everything
+ * and the KSE is always already attached.
+ * Totally ignore the ksegrp run queue.
+ */
+ if (kg->kg_avail_opennings != 1) {
+ if (limitcount < 1) {
+ limitcount++;
+ printf("pid %d: corrected slot count (%d->1)\n",
+ td->td_proc->p_pid, kg->kg_avail_opennings);
+
+ }
+ kg->kg_avail_opennings = 1;
+ }
+ sched_add(td, flags);
+ return;
+ }
+
+ /*
+ * If the concurrency has reduced, and we would go in the
+ * assigned section, then keep removing entries from the
+ * system run queue, until we are not in that section
+ * or there is room for us to be put in that section.
+ * What we MUST avoid is the case where there are threads of less
+ * priority than the new one scheduled, but it can not
+ * be scheduled itself. That would lead to a non contiguous set
+ * of scheduled threads, and everything would break.
+ */
+ tda = kg->kg_last_assigned;
+ while ((kg->kg_avail_opennings <= 0) &&
+ (tda && (tda->td_priority > td->td_priority))) {
+ /*
+ * None free, but there is one we can commandeer.
+ */
+ CTR2(KTR_RUNQ,
+ "setrunqueue: kg:%p: take slot from td: %p", kg, tda);
+ sched_rem(tda);
+ tda = kg->kg_last_assigned =
+ TAILQ_PREV(tda, threadqueue, td_runq);
+ }
+
+ /*
+ * Add the thread to the ksegrp's run queue at
+ * the appropriate place.
+ */
+ TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+ if (td2->td_priority > td->td_priority) {
+ TAILQ_INSERT_BEFORE(td2, td, td_runq);
+ break;
+ }
+ }
+ if (td2 == NULL) {
+ /* We ran off the end of the TAILQ or it was empty. */
+ TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
+ }
+
+ /*
+ * If we have a slot to use, then put the thread on the system
+ * run queue and if needed, readjust the last_assigned pointer.
+ * it may be that we need to schedule something anyhow
+ * even if the availabel slots are -ve so that
+ * all the items < last_assigned are scheduled.
+ */
+ if (kg->kg_avail_opennings > 0) {
+ if (tda == NULL) {
+ /*
+ * No pre-existing last assigned so whoever is first
+ * gets the slot.. (maybe us)
+ */
+ td2 = TAILQ_FIRST(&kg->kg_runq);
+ kg->kg_last_assigned = td2;
+ } else if (tda->td_priority > td->td_priority) {
+ td2 = td;
+ } else {
+ /*
+ * We are past last_assigned, so
+ * give the next slot to whatever is next,
+ * which may or may not be us.
+ */
+ td2 = TAILQ_NEXT(tda, td_runq);
+ kg->kg_last_assigned = td2;
+ }
+ sched_add(td2, flags);
+ } else {
+ CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
+ td, td->td_ksegrp, td->td_proc->p_pid);
+ if ((flags & SRQ_YIELDING) == 0)
+ maybe_preempt_in_ksegrp(td);
+ }
+#else
sched_add(td, flags);
+#endif
}
/*
@@ -281,6 +767,24 @@
*/
MPASS(TD_ON_RUNQ(td));
MPASS(td->td_sched->ke_state != KES_ONRUNQ);
+#ifdef KSE
+ if (td->td_proc->p_flag & P_HADTHREADS) {
+ /*
+ * If this is a threaded process we actually ARE on the
+ * ksegrp run queue so take it off that first.
+ * Also undo any damage done to the last_assigned pointer.
+ * XXX Fix setrunqueue so this isn't needed
+ */
+ struct ksegrp *kg;
+
+ kg = td->td_ksegrp;
+ if (kg->kg_last_assigned == td)
+ kg->kg_last_assigned =
+ TAILQ_PREV(td, threadqueue, td_runq);
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ }
+
+#endif
TD_SET_RUNNING(td);
CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
td->td_proc->p_pid, td->td_proc->p_comm);
@@ -395,10 +899,11 @@
rqh = &rq->rq_queues[pri];
CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
- if (flags & SRQ_PREEMPTED)
+ if (flags & SRQ_PREEMPTED) {
TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
- else
+ } else {
TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
+ }
}
/*
@@ -485,7 +990,11 @@
struct rqhead *rqh;
int pri;
+#ifdef KSE
+ KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
+#else
KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM,
+#endif
("runq_remove: process swapped out"));
pri = ke->ke_rqindex;
rqh = &rq->rq_queues[pri];
@@ -503,6 +1012,24 @@
#include <vm/uma.h>
extern struct mtx kse_zombie_lock;
+#ifdef KSE
+/*
+ * Allocate scheduler specific per-process resources.
+ * The thread and ksegrp have already been linked in.
+ * In this case just set the default concurrency value.
+ *
+ * Called from:
+ * proc_init() (UMA init method)
+ */
+void
+sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
+{
+
+ /* This can go in sched_fork */
+ sched_init_concurrency(kg);
+}
+#endif
+
/*
* thread is being either created or recycled.
* Fix up the per-scheduler resources associated with it.
@@ -523,4 +1050,63 @@
ke->ke_state = KES_THREAD;
}
+#ifdef KSE
+/*
+ * Set up an initial concurrency of 1
+ * and set the given thread (if given) to be using that
+ * concurrency slot.
+ * May be used "offline"..before the ksegrp is attached to the world
+ * and thus wouldn't need schedlock in that case.
+ * Called from:
+ * thr_create()
+ * proc_init() (UMA) via sched_newproc()
+ */
+void
+sched_init_concurrency(struct ksegrp *kg)
+{
+
+ CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg);
+ kg->kg_concurrency = 1;
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list