PERFORCE change 50807 for review
Julian Elischer
julian at FreeBSD.org
Sat Apr 10 18:34:46 PDT 2004
http://perforce.freebsd.org/chv.cgi?CH=50807
Change 50807 by julian at julian_jules1 on 2004/04/10 18:34:01
Having thought about it, put everything into sched_4bsd.c
This will still need cleaning but it cleans up stuff.
Now to make it not crash.
Affected files ...
.. //depot/projects/nsched/sys/conf/files#5 edit
.. //depot/projects/nsched/sys/kern/sched_4bsd.c#3 edit
Differences ...
==== //depot/projects/nsched/sys/conf/files#5 (text+ko) ====
@@ -1080,9 +1080,7 @@
kern/link_elf.c standard
kern/md4c.c optional netsmb
kern/md5c.c standard
-kern/scheduler/4bsd/sched_4bsd.c optional sched_4bsd
-kern/scheduler/4bsd/sched_4bsd_kse.c optional sched_4bsd
-kern/scheduler/4bsd/sched_4bsd_runq.c optional sched_4bsd
+kern/sched_4bsd.c optional sched_4bsd
kern/scheduler/ule/sched_ule.c optional sched_ule
kern/scheduler/ule/sched_ule_kse.c optional sched_ule
kern/scheduler/ule/sched_ule_runq.c optional sched_ule
==== //depot/projects/nsched/sys/kern/sched_4bsd.c#3 (text+ko) ====
@@ -15,6 +15,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
+ * 3. [Condition rescinded by the Regents of the University of California.]
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -33,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.37 2004/04/05 21:03:35 imp Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.35 2004/03/05 19:27:04 rwatson Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -48,7 +49,122 @@
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/sx.h>
+#include <sys/queue.h>
+#include <machine/critical.h>
+#include <sys/thr.h> /* XXXKSE */
+#if 0
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#endif
+#include <vm/uma.h>
+#include <machine/critical.h>
+/*
+ * This is a scheduler private structure that it uses (for now)
+ * to implement the thread fairness algorythm.
+ * The threads are made runnable by the rest of the system, but
+ * only NKSE of them are actually put on the run queues to compete with
+ * threads from other processes. For normal processes there is only one KSE
+ * but for threaded processes we allocate only upto NKSE kses per ksegrp.
+ * KSEs are what is put on the system run queue, so multithreaded
+ * processes must multiplex their threads onto their KSEs, making them compete
+ * on a similar basis to nonthreaded processes.
+ * In pictures:
+ * With a single run queue used by all processors:
+ *
+ * RUNQ: --->KSE---KSE--...
+ * | /
+ * KSEG---THREAD--THREAD--THREAD
+ *
+ * (processors run THREADs from the KSEG until they are exhausted or
+ * the KSEG exhausts its quantum)
+ *
+ * XXX This structure is stictly speaking, not needed any more
+ * and its fields will move to the thread structure. The first 'N'
+ * threads can be kept trak of with a simple count. Do this soon.
+ */
+
+struct kse {
+ struct proc *ke_proc; /* (*) Associated process. */
+ struct ksegrp *ke_ksegrp; /* (*) Associated KSEG. */
+ TAILQ_ENTRY(kse) ke_kglist; /* (*) Queue of KSEs in ke_ksegrp. */
+ TAILQ_ENTRY(kse) ke_kgrlist; /* (*) Queue of KSEs in this state. */
+ TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */
+
+ struct thread *ke_thread; /* (*) Active associated thread. */
+#define ke_startzero ke_flags
+ int ke_flags; /* (j) KEF_* flags. */
+ fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */
+ u_char ke_oncpu; /* (j) Which cpu we are on. */
+ char ke_rqindex; /* (j) Run queue index. */
+ enum {
+ KES_UNUSED = 0x0,
+ KES_IDLE,
+ KES_ONRUNQ,
+ KES_UNQUEUED, /* in transit */
+ KES_THREAD /* slaved to thread state */
+ } ke_state; /* (j) KSE status. */
+#define ke_endzero ke_cpticks
+ int ke_cpticks; /* (j) Ticks of cpu time. */
+ struct runq *ke_runq; /* runq the kse is currently on */
+};
+
+/* flags kept in ke_flags */
+#define KEF_BOUND 0x00001 /* Stuck on a thread.. (as per normal procs) */
+#define KEF_EXIT 0x00002 /* KSE is being killed. */
+#define KEF_DIDRUN 0x00004 /* KSE actually ran. */
+
+/*
+ * Scheduler private extensions to thread, ksegrp and proc structures.
+ * These are invisile outside the scheduler.
+ * They are usually allocated beyond the end of the proc, thread or ksegrp
+ * structure and always accessed via an indirection.
+ * This allows proc0 and friends to be allocated statically
+ * without knowing the sise of the extension. This allows different
+ * schedulers to have different extensions.
+ * Proc0 case: thread0:[... ptr] [td_sched0]
+ * | ^
+ * \------/
+ *
+ * Usual case: td-> [... ptr|td_sched] ( allocated together )
+ * | ^
+ * \---/
+ */
+
+struct td_sched {
+ struct kse *std_last_kse; /* (j) Previous value of td_kse. */
+ struct kse *std_kse; /* (j) Current KSE if running. */
+};
+#define td_last_kse td_sched->std_last_kse
+#define td_kse td_sched->std_kse
+
+struct kg_sched {
+ TAILQ_HEAD(, kse) skg_kseq; /* (ke_kglist) All KSEs. */
+ TAILQ_HEAD(, kse) skg_iq; /* (ke_kgrlist) All idle KSEs. */
+ struct thread *skg_last_assigned; /* (j) Last thread assigned */
+ /* ( to a KSE). */
+ int skg_runq_kses; /* (j) Num KSEs on runq. */
+ int skg_idle_kses; /* (j) Num KSEs on iq. */
+ int skg_kses; /* (j) Num KSEs in group. */
+};
+#define kg_kseq kg_sched->skg_kseq
+#define kg_iq kg_sched->skg_iq
+#define kg_last_assigned kg_sched->skg_last_assigned
+#define kg_runq_kses kg_sched->skg_runq_kses
+#define kg_idle_kses kg_sched->skg_idle_kses
+#define kg_kses kg_sched->skg_kses
+
+#define FIRST_KSE_IN_KSEGRP(kg) TAILQ_FIRST(&(kg)->kg_kseq)
+#define FIRST_KSE_IN_PROC(p) FIRST_KSE_IN_KSEGRP(FIRST_KSEGRP_IN_PROC(p))
+
+void kse_free(struct kse *ke);
+void kse_stash(struct kse *ke);
+void kse_reassign(struct kse *ke);
+void sched_fork_kse(struct thread *parenttd, struct kse *newke);
+void sched_exit_kse(struct proc *parent, struct thread *childtd);
+
#define KTR_4BSD 0x0
/*
@@ -65,14 +181,7 @@
#endif
#define NICE_WEIGHT 1 /* Priorities per nice level. */
-struct ke_sched {
- int ske_cpticks; /* (j) Ticks of cpu time. */
- struct runq *ske_runq; /* runq the kse is currently on */
-};
-#define ke_runq ke_sched->ske_runq
-#define KEF_BOUND KEF_SCHED1
-
-#define SKE_RUNQ_PCPU(ke) \
+#define KE_RUNQ_PCPU(ke) \
((ke)->ke_runq != 0 && (ke)->ke_runq != &runq)
/*
@@ -81,13 +190,7 @@
*/
#define KSE_CAN_MIGRATE(ke) \
((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0)
-static struct ke_sched ke_sched;
-struct ke_sched *kse0_sched = &ke_sched;
-struct kg_sched *ksegrp0_sched = NULL;
-struct p_sched *proc0_sched = NULL;
-struct td_sched *thread0_sched = NULL;
-
static int sched_tdcnt; /* Total runnable threads in the system. */
static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */
#define SCHED_QUANTUM (hz / 10) /* Default sched quantum */
@@ -169,6 +272,10 @@
curthread->td_flags |= TDF_NEEDRESCHED;
}
+
+/****************************************************************
+ * BSD 4 based scheduling policy engine. *
+ ***************************************************************/
/*
* Force switch among equal priority processes every 100ms.
* We don't actually need to force a context switch of the current process.
@@ -338,20 +445,20 @@
* stop recalculating its priority until
* it wakes up.
*/
- if (ke->ke_sched->ske_cpticks == 0)
+ if (ke->ke_cpticks == 0)
continue;
#if (FSHIFT >= CCPU_SHIFT)
ke->ke_pctcpu += (realstathz == 100)
- ? ((fixpt_t) ke->ke_sched->ske_cpticks) <<
+ ? ((fixpt_t) ke->ke_cpticks) <<
(FSHIFT - CCPU_SHIFT) :
- 100 * (((fixpt_t) ke->ke_sched->ske_cpticks)
+ 100 * (((fixpt_t) ke->ke_cpticks)
<< (FSHIFT - CCPU_SHIFT)) / realstathz;
#else
ke->ke_pctcpu += ((FSCALE - ccpu) *
- (ke->ke_sched->ske_cpticks *
+ (ke->ke_cpticks *
FSCALE / realstathz)) >> FSHIFT;
#endif
- ke->ke_sched->ske_cpticks = 0;
+ ke->ke_cpticks = 0;
} /* end of kse loop */
/*
* If there are ANY running threads in this KSEGRP,
@@ -468,7 +575,18 @@
sched_tdcnt++;
}
-/* External interfaces start here */
+
+
+
+
+/*******************************************************************
+ * These functions represent entrypoints that a scheduler needs to *
+ * Supply in order to give the rest of the system opportunities to *
+ * notify it of important events and to let it do housekeeping *
+ * in a timely and efficient manner. Events include the creation, *
+ * allocation and death of various entities, timer events, and *
+ * direct calls for scheduler services. *
+ ******************************************************************/
int
sched_runnable(void)
{
@@ -511,7 +629,7 @@
kg = td->td_ksegrp;
ke = td->td_kse;
- ke->ke_sched->ske_cpticks++;
+ ke->ke_cpticks++;
kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
resetpriority(kg);
@@ -529,56 +647,62 @@
* aggregated all the estcpu into the 'built-in' ksegrp.
*/
void
-sched_exit(struct proc *p, struct proc *p1)
+sched_exit(struct proc *parent, struct thread *childtd)
{
- sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1));
- sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1));
- sched_exit_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1));
+ sched_exit_ksegrp(parent, childtd);
+ sched_exit_thread(parent, childtd);
}
+/* This PROBABLY gives the estcpu to the wrong kseg */
void
-sched_exit_kse(struct kse *ke, struct kse *child)
+sched_exit_ksegrp(struct proc *parent, struct thread *child)
{
-}
+ struct ksegrp *kg;
-void
-sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child)
-{
-
mtx_assert(&sched_lock, MA_OWNED);
- kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + child->kg_estcpu);
+ kg = FIRST_KSEGRP_IN_PROC(parent); /* XXXKSE */
+ child->td_kse->ke_flags |= KEF_EXIT;
+ kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + child->td_ksegrp->kg_estcpu);
}
void
-sched_exit_thread(struct thread *td, struct thread *child)
+sched_exit_thread(struct proc *parent, struct thread *childtd)
{
- if ((child->td_proc->p_flag & P_NOLOAD) == 0)
+ if ((childtd->td_proc->p_flag & P_NOLOAD) == 0)
sched_tdcnt--;
+ if (childtd->td_kse)
+ sched_exit_kse(parent, childtd);
}
void
-sched_fork(struct proc *p, struct proc *p1)
+sched_fork(struct thread *parenttd, struct proc *child)
{
- sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1));
- sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1));
- sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1));
+ struct thread *td2;
+ struct kse *ke2;
+
+ td2 = FIRST_THREAD_IN_PROC(child);
+ ke2 = FIRST_KSE_IN_PROC(child);
+ sched_fork_kse(parenttd, ke2);
+ sched_fork_ksegrp(parenttd, FIRST_KSEGRP_IN_PROC(child));
+ ke2->ke_thread = td2;
+ td2->td_kse = ke2;
+ /* sched_fork_thread(parenttd, td2);*/
}
void
-sched_fork_kse(struct kse *ke, struct kse *child)
+sched_fork_ksegrp(struct thread *parent, struct ksegrp *child)
{
- child->ke_sched->ske_cpticks = 0;
-}
+ struct ksegrp *kg;
-void
-sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child)
-{
mtx_assert(&sched_lock, MA_OWNED);
+ kg = parent->td_ksegrp;
child->kg_estcpu = kg->kg_estcpu;
}
+
+
void
-sched_fork_thread(struct thread *td, struct thread *child)
+sched_fork_thread(struct thread *parent, struct thread *child)
{
}
@@ -706,7 +830,7 @@
ke->ke_runq = &runq;
} else {
CTR1(KTR_4BSD, "adding kse:%p to pcpu runq", ke);
- if (!SKE_RUNQ_PCPU(ke))
+ if (!KE_RUNQ_PCPU(ke))
ke->ke_runq = &runq_pcpu[PCPU_GET(cpuid)];
}
#else
@@ -731,7 +855,7 @@
if ((td->td_proc->p_flag & P_NOLOAD) == 0)
sched_tdcnt--;
- runq_remove(ke->ke_sched->ske_runq, ke);
+ runq_remove(ke->ke_runq, ke);
ke->ke_state = KES_THREAD;
ke->ke_ksegrp->kg_runq_kses--;
@@ -838,24 +962,21 @@
}
int
-sched_sizeof_kse(void)
-{
- return (sizeof(struct kse) + sizeof(struct ke_sched));
-}
-int
sched_sizeof_ksegrp(void)
{
- return (sizeof(struct ksegrp));
+ return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
}
+
int
sched_sizeof_proc(void)
{
return (sizeof(struct proc));
}
+
int
sched_sizeof_thread(void)
{
- return (sizeof(struct thread));
+ return (sizeof(struct thread) + sizeof(struct td_sched));
}
fixpt_t
@@ -871,3 +992,926 @@
return (0);
}
+
+
+
+static uma_zone_t kse_zone;
+
+struct kse kse0;
+static struct kg_sched kg_sched0;
+static struct td_sched td_sched0;
+
+static struct kse * kse_alloc(void);
+static void kse_link(struct proc *p, struct kse *ke, struct ksegrp *kg);
+static void kse_unlink(struct kse *ke);
+
+extern struct mtx kse_zombie_lock;
+TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
+
+void
+sched_GC(void)
+{
+ struct kse *ke_first, *ke_next;
+
+ if (!TAILQ_EMPTY(&zombie_kses)) {
+ mtx_lock_spin(&kse_zombie_lock);
+ ke_first = TAILQ_FIRST(&zombie_kses);
+ if (ke_first)
+ TAILQ_INIT(&zombie_kses);
+ while (ke_first) {
+ ke_next = TAILQ_NEXT(ke_first, ke_procq);
+ kse_free(ke_first);
+ ke_first = ke_next;
+ }
+ mtx_unlock_spin(&kse_zombie_lock);
+ }
+}
+
+void
+schedinit(void)
+{
+ /*
+ * Set up the scheduler specific parts of proc0.
+ */
+ ksegrp0.kg_sched = &kg_sched0;
+ proc0.p_sched = NULL; /* XXX */
+ thread0.td_sched = &td_sched0;
+
+ proc_linkup(&proc0, &ksegrp0, &thread0);
+ kse_link(&proc0, &kse0, &ksegrp0);
+ kse0.ke_thread = &thread0;
+ thread0.td_kse = &kse0; /* we are running */
+ kse0.ke_state = KES_THREAD;
+
+ kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
+}
+
+/*
+ * for now have special thr code
+ * later on, clean these up into common code.
+ */
+#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
+int
+sched_thr_newthread(struct thread *td, struct thread *td0, int flags)
+{
+ struct kse *ke0;
+ /* Initialize our kse structure. */
+ ke0 = kse_alloc();
+ bzero(&ke0->ke_startzero,
+ RANGEOF(struct kse, ke_startzero, ke_endzero));
+
+ /* Link the thread and kse into the ksegrp and make it runnable. */
+ mtx_lock_spin(&sched_lock);
+
+ thread_link(td0, td->td_ksegrp);
+ kse_link(td->td_proc, ke0, td->td_ksegrp);
+
+ /* Bind this thread and kse together. */
+ td0->td_kse = ke0;
+ ke0->ke_thread = td0;
+
+ sched_fork_kse(td, td->td_kse);
+ sched_fork_thread(td, td0);
+
+ TD_SET_CAN_RUN(td0);
+ if ((flags & THR_SUSPENDED) == 0)
+ setrunqueue(td0);
+
+ mtx_unlock_spin(&sched_lock);
+ return (0); /* the API could fail but not in this case */
+}
+#undef RANGEOF
+
+void
+sched_thr_exit(struct thread *td)
+{
+ struct kse *ke;
+
+ ke = td->td_kse;
+
+ ke->ke_state = KES_UNQUEUED;
+ ke->ke_thread = NULL;
+ kse_unlink(ke);
+ sched_exit_kse(td->td_proc->p_pptr, td);
+ td->td_kse = NULL;
+ td->td_last_kse = NULL;
+}
+
+/*
+ * Allocate a kse.
+ */
+static struct kse *
+kse_alloc(void)
+{
+ return (uma_zalloc(kse_zone, M_WAITOK));
+}
+
+/*
+ * Deallocate a kse.
+ */
+void
+kse_free(struct kse *td)
+{
+ uma_zfree(kse_zone, td);
+}
+
+/*
+ * Stash an embarasingly extra kse into the zombie kse queue.
+ */
+void
+kse_stash(struct kse *ke)
+{
+ mtx_lock_spin(&kse_zombie_lock);
+ TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
+ mtx_unlock_spin(&kse_zombie_lock);
+}
+
+/*
+ * KSE is linked into kse group.
+ */
+void
+kse_link(struct proc *p, struct kse *ke, struct ksegrp *kg)
+{
+ TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
+ kg->kg_kses++;
+ ke->ke_state = KES_UNQUEUED;
+ ke->ke_proc = p;
+ ke->ke_ksegrp = kg;
+ ke->ke_thread = NULL;
+ ke->ke_oncpu = NOCPU;
+ ke->ke_flags = 0;
+}
+
+int
+sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
+{
+ struct kse *ke;
+
+ ke = kse_alloc();
+ if (ke) {
+ kse_link(p, ke, kg);
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ return (0);
+ }
+ return (ENOMEM );
+}
+
+/* Assumes kg->kg_sched is already set up */
+void
+sched_newkseg(struct ksegrp *kg)
+{
+
+ TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */
+ TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */
+ kg->kg_kses = 0;
+ kg->kg_runq_kses = 0; /* XXXKSE change name */
+ kg->kg_idle_kses = 0;
+}
+
+/* Assumes td->td_sched is already set up */
+void
+sched_newthread(struct thread *td)
+{
+ td->td_last_kse = NULL;
+ td->td_kse = NULL;
+}
+
+void
+kse_unlink(struct kse *ke)
+{
+ struct ksegrp *kg;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ kg = ke->ke_ksegrp;
+ TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
+ if (ke->ke_state == KES_IDLE) {
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses--;
+ }
+ if (--kg->kg_kses == 0)
+ ksegrp_unlink(kg);
+ /*
+ * Aggregate stats from the KSE
+ */
+ kse_stash(ke);
+}
+
+/* need the thread to know if we are on that ksegrp or not. */
+void
+sched_clean_ksegrp(struct ksegrp *kg, struct thread *td)
+{
+ struct kse *ke;
+
+ while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
+ KASSERT(ke->ke_state == KES_IDLE,
+ ("%s: wrong idle KSE state", __func__));
+ kse_unlink(ke);
+ }
+ KASSERT((kg->kg_kses == 1),
+ ("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
+ KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
+ ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
+ ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
+}
+
+
+#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
+
+/* new version of sched-fork() */
+void
+sched_fork_kse(struct thread *parenttd, struct kse *ke2)
+{
+ struct ksegrp *kg2;
+
+ kg2 = ke2->ke_ksegrp;
+ bzero(&ke2->ke_startzero,
+ (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
+ ke2->ke_state = KES_THREAD;
+ ke2->ke_cpticks = 0;
+ kg2->kg_estcpu = parenttd->td_ksegrp->kg_estcpu;
+}
+
+/*
+ * Try handle the generic case where there may be > 1 kseg even
+ * though that should never happen. It should be cheap to do so.
+ */
+void
+sched_destroyproc(struct proc *p)
+{
+ struct ksegrp *kg;
+ struct kse *ke;
+
+ /* remove all the kses we can find and free them */
+ FOREACH_KSEGRP_IN_PROC(p, kg) {
+ while (!TAILQ_EMPTY(&kg->kg_kseq)) {
+ ke = TAILQ_FIRST(&kg->kg_kseq);
+ TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
+ if (ke->ke_thread)
+ ke->ke_thread->td_kse = NULL;
+ kse_free(ke);
+ }
+ }
+}
+
+void
+sched_exit_kse(struct proc *parent, struct thread *childtd)
+{
+ struct ksegrp *kg;
+ struct kse *ke;
+
+ kg = childtd->td_ksegrp;
+ ke = childtd->td_kse;
+ KASSERT((ke),("unexpected null KSE ptr in sched_exit_kse()"));
+ ke->ke_state = KES_UNQUEUED;
+ ke->ke_thread = NULL;
+ /*
+ * Decide what to do with the KSE attached to this thread.
+ */
+ if (ke->ke_flags & KEF_EXIT) {
+ kse_unlink(ke);
+ if (kg->kg_kses == 0) {
+ sched_exit_ksegrp(parent, childtd); /* XXXKSE */
+ ksegrp_unlink(kg);
+ }
+ } else {
+ kse_reassign(ke);
+ }
+ childtd->td_kse = NULL;
+ childtd->td_last_kse = NULL;
+}
+
+void
+sched_set_concurrancy(struct ksegrp *kg, int concurrancy)
+{
+ struct kse *newke;
+
+ while (kg->kg_kses < concurrancy) {
+ newke = kse_alloc();
+ bzero(&newke->ke_startzero, RANGEOF(struct kse,
+ ke_startzero, ke_endzero));
+#if 0
+ mtx_lock_spin(&sched_lock);
+ bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
+ RANGEOF(struct kse, ke_startcopy, ke_endcopy));
+ mtx_unlock_spin(&sched_lock);
+#endif
+ mtx_lock_spin(&sched_lock);
+ kse_link(kg->kg_proc, newke, kg);
+ sched_fork_kse(curthread, newke);
+ /* Add engine */
+ kse_reassign(newke);
+ mtx_unlock_spin(&sched_lock);
+ }
+}
+
+/* From kern_switch.c
+ * Parts Copyright (c) 2001 Jake Burkholder <jake at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/************************************************************************
+ * Functions that manipulate runnability from a thread perspective. *
+ * These are a 'threading fairness' front-end to the BSD scheduler *
+ ************************************************************************/
+/***
+Here is the logic..
+
+If there are N processors, then there are at most N KSEs (kernel
+schedulable entities) working to process threads that belong to a
+KSEGROUP (kg). If there are X of these KSEs actually running at the
+moment in question, then there are at most M (N-X) of these KSEs on
+the run queue, as running KSEs are not on the queue.
+
+Runnable threads are queued off the KSEGROUP in priority order.
+If there are M or more threads runnable, the top M threads
+(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
+their priority from those threads and are put on the system run queue.
+
+The last thread that had a priority high enough to have a KSE associated
+with it, AND IS ON THE RUN QUEUE is pointed to by
+kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
+assigned as all the available KSEs are activly running, or because there
+are no threads queued, that pointer is NULL.
+
+When a KSE is removed from the run queue to become runnable, we know
+it was associated with the highest priority thread in the queue (at the head
+of the queue). If it is also the last assigned we know M was 1 and must
+now be 0. Since the thread is no longer queued that pointer must be
+removed from it. Since we know there were no more KSEs available,
+(M was 1 and is now 0) and since we are not FREEING our KSE
+but using it, we know there are STILL no more KSEs available, we can prove
+that the next thread in the ksegrp list will not have a KSE to assign to
+it, so we can show that the pointer must be made 'invalid' (NULL).
+
+The pointer exists so that when a new thread is made runnable, it can
+have its priority compared with the last assigned thread to see if
+it should 'steal' its KSE or not.. i.e. is it 'earlier'
+on the list than that thread or later.. If it's earlier, then the KSE is
+removed from the last assigned (which is now not assigned a KSE)
+and reassigned to the new thread, which is placed earlier in the list.
+The pointer is then backed up to the previous thread (which may or may not
+be the new thread).
+
+When a thread sleeps or is removed, the KSE becomes available and if there
+are queued threads that are not assigned KSEs, the highest priority one of
+them is assigned the KSE, which is then placed back on the run queue at
+the appropriate place, and the kg->kg_last_assigned pointer is adjusted down
+to point to it.
+
+The following diagram shows 2 KSEs and 3 threads from a single process.
+
+ RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads)
+ \ \____
+ \ \
+ KSEGROUP---thread--thread--thread (queued in priority order)
+ \ /
+ \_______________/
+ (last_assigned)
+
+The result of this scheme is that the M available KSEs are always
+queued at the priorities they have inherrited from the M highest priority
+threads for that KSEGROUP. If this situation changes, the KSEs are
+reassigned to keep this true.
+***/
+
+
+CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
+
+#if 0
+static void runq_readjust(struct runq *rq, struct kse *ke);
+#endif
+/*
+ * Select the KSE that will be run next. From that find the thread, and
+ * remove it from the KSEGRP's run queue. If there is thread clustering,
+ * this will be what does it.
+ * XXX Change to take an argument indicating
+ * if the switch is voluntary or involuntary.
+ */
+struct thread *
+choosethread(void)
+{
+ struct kse *ke;
+ struct thread *td;
+ struct ksegrp *kg;
+
+#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
+ if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
+ /* Shutting down, run idlethread on AP's */
+ td = PCPU_GET(idlethread);
+ ke = td->td_kse;
+ CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+ ke->ke_flags |= KEF_DIDRUN;
+ TD_SET_RUNNING(td);
+ return (td);
+ }
+#endif
+
+retry:
+ ke = sched_choose();
+ if (ke) {
+ td = ke->ke_thread;
+ KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
+ kg = ke->ke_ksegrp;
+ if (td->td_proc->p_flag & P_SA) {
+ if (kg->kg_last_assigned == td) {
+ kg->kg_last_assigned = TAILQ_PREV(td,
+ threadqueue, td_runq);
+ }
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ }
+ kg->kg_runnable--;
+ CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
+ td, td->td_priority);
+ } else {
+ /* Simulate runq_choose() having returned the idle thread */
+ td = PCPU_GET(idlethread);
+ ke = td->td_kse;
+ CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+ }
+ ke->ke_flags |= KEF_DIDRUN;
+
+ /*
+ * If we are in panic, only allow system threads,
+ * plus the one we are running in, to be run.
+ */
+ if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
+ (td->td_flags & TDF_INPANIC) == 0)) {
+ /* note that it is no longer on the run queue */
+ TD_SET_CAN_RUN(td);
+ goto retry;
+ }
+
+ TD_SET_RUNNING(td);
+ return (td);
+}
+
+/*
+ * Given a surplus KSE, either assign a new runable thread to it
+ * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
+ * Assumes that the original thread is not runnable.
+ */
+void
+kse_reassign(struct kse *ke)
+{
+ struct ksegrp *kg;
+ struct thread *td;
+ struct thread *original;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ original = ke->ke_thread;
+ KASSERT(original == NULL || TD_IS_INHIBITED(original),
+ ("reassigning KSE with runnable thread"));
+ kg = ke->ke_ksegrp;
+ if (original)
+ original->td_kse = NULL;
+
+ /*
+ * Find the first unassigned thread
+ */
+ if ((td = kg->kg_last_assigned) != NULL)
+ td = TAILQ_NEXT(td, td_runq);
+ else
+ td = TAILQ_FIRST(&kg->kg_runq);
+
+ /*
+ * If we found one, assign it the kse, otherwise idle the kse.
+ */
+ if (td) {
+ kg->kg_last_assigned = td;
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ sched_add(td);
+ CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
+ return;
+ }
+
+ ke->ke_state = KES_IDLE;
+ ke->ke_thread = NULL;
+ TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses++;
+ CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
+ return;
+}
+
+#if 0
+/*
+ * Remove a thread from its KSEGRP's run queue.
+ * This in turn may remove it from a KSE if it was already assigned
+ * to one, possibly causing a new thread to be assigned to the KSE
+ * and the KSE getting a new priority.
+ */
+static void
+remrunqueue(struct thread *td)
+{
+ struct thread *td2, *td3;
+ struct ksegrp *kg;
+ struct kse *ke;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
+ kg = td->td_ksegrp;
+ ke = td->td_kse;
+ CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
+ kg->kg_runnable--;
+ TD_SET_CAN_RUN(td);
+ /*
+ * If it is not a threaded process, take the shortcut.
+ */
+ if ((td->td_proc->p_flag & P_SA) == 0) {
+ /* Bring its kse with it, leave the thread attached */
+ sched_rem(td);
+ ke->ke_state = KES_THREAD;
+ return;
+ }
+ td3 = TAILQ_PREV(td, threadqueue, td_runq);
+ TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+ if (ke) {
+ /*
+ * This thread has been assigned to a KSE.
+ * We need to dissociate it and try assign the
+ * KSE to the next available thread. Then, we should
+ * see if we need to move the KSE in the run queues.
+ */
+ sched_rem(td);
+ ke->ke_state = KES_THREAD;
+ td2 = kg->kg_last_assigned;
+ KASSERT((td2 != NULL), ("last assigned has wrong value"));
+ if (td2 == td)
+ kg->kg_last_assigned = td3;
+ kse_reassign(ke);
+ }
+}
+#endif
+
+/*
+ * Change the priority of a thread that is on the run queue.
+ */
+void
+adjustrunqueue( struct thread *td, int newpri)
+{
+ struct ksegrp *kg;
+ struct kse *ke;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
+
+ ke = td->td_kse;
+ CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
+ /*
+ * If it is not a threaded process, take the shortcut.
+ */
+ if ((td->td_proc->p_flag & P_SA) == 0) {
+ /* We only care about the kse in the run queue. */
+ td->td_priority = newpri;
+ if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
+ sched_rem(td);
+ sched_add(td);
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list