PERFORCE change 50807 for review

Sat Apr 10 18:34:46 PDT 2004

http://perforce.freebsd.org/chv.cgi?CH=50807

Change 50807 by julian at julian_jules1 on 2004/04/10 18:34:01

	       Having thought about it, put everything into sched_4bsd.c
	This will still need cleaning but it cleans up stuff.
	Now to make it not crash.

Affected files ...

.. //depot/projects/nsched/sys/conf/files#5 edit
.. //depot/projects/nsched/sys/kern/sched_4bsd.c#3 edit

Differences ...

==== //depot/projects/nsched/sys/conf/files#5 (text+ko) ====

@@ -1080,9 +1080,7 @@
 kern/link_elf.c		standard
 kern/md4c.c		optional netsmb
 kern/md5c.c		standard
-kern/scheduler/4bsd/sched_4bsd.c	optional sched_4bsd
-kern/scheduler/4bsd/sched_4bsd_kse.c	optional sched_4bsd
-kern/scheduler/4bsd/sched_4bsd_runq.c	optional sched_4bsd
+kern/sched_4bsd.c	optional sched_4bsd
 kern/scheduler/ule/sched_ule.c		optional sched_ule
 kern/scheduler/ule/sched_ule_kse.c	optional sched_ule
 kern/scheduler/ule/sched_ule_runq.c	optional sched_ule

==== //depot/projects/nsched/sys/kern/sched_4bsd.c#3 (text+ko) ====

@@ -15,6 +15,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
+ * 3. [Condition rescinded by the Regents of the University of California.]
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.37 2004/04/05 21:03:35 imp Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.35 2004/03/05 19:27:04 rwatson Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -48,7 +49,122 @@
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
+#include <sys/queue.h>
+#include <machine/critical.h>
+#include <sys/thr.h> /* XXXKSE */
+#if 0
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#endif
+#include <vm/uma.h>
+#include <machine/critical.h>
 
+/*
+ * This is a scheduler private structure that it uses (for now)
+ * to implement the thread fairness algorythm.
+ * The threads are made runnable by the rest of the system, but 
+ * only NKSE of them are actually put on the run queues to compete with
+ * threads from other processes. For normal processes there is only one KSE
+ * but for threaded processes we allocate only upto NKSE kses per ksegrp.
+ * KSEs are what is put on the system run queue, so multithreaded
+ * processes must multiplex their threads onto their KSEs, making them compete
+ * on a similar basis to nonthreaded processes.
+ * In pictures:
+ * With a single run queue used by all processors:
+ *
+ * RUNQ: --->KSE---KSE--...   
+ *	     |   / 
+ *	     KSEG---THREAD--THREAD--THREAD
+ *
+ *  (processors run THREADs from the KSEG until they are exhausted or
+ *  the KSEG exhausts its quantum)
+ *
+ * XXX This structure is stictly speaking, not needed any more
+ * and its fields will move to the thread structure. The first 'N'
+ * threads can be kept trak of with a simple count. Do this soon.
+ */
+
+struct kse {
+	struct proc	*ke_proc;	/* (*) Associated process. */
+	struct ksegrp	*ke_ksegrp;	/* (*) Associated KSEG. */
+	TAILQ_ENTRY(kse) ke_kglist;	/* (*) Queue of KSEs in ke_ksegrp. */
+	TAILQ_ENTRY(kse) ke_kgrlist;	/* (*) Queue of KSEs in this state. */
+	TAILQ_ENTRY(kse) ke_procq;	/* (j/z) Run queue. */
+
+	struct thread	*ke_thread;	/* (*) Active associated thread. */
+#define	ke_startzero ke_flags
+	int		ke_flags;	/* (j) KEF_* flags. */
+	fixpt_t		ke_pctcpu;	/* (j) %cpu during p_swtime. */
+	u_char		ke_oncpu;	/* (j) Which cpu we are on. */
+	char		ke_rqindex;	/* (j) Run queue index. */
+	enum {
+		KES_UNUSED = 0x0,
+		KES_IDLE,
+		KES_ONRUNQ,
+		KES_UNQUEUED,		/* in transit */
+		KES_THREAD		/* slaved to thread state */
+	} ke_state;			/* (j) KSE status. */
+#define	ke_endzero ke_cpticks
+	int		ke_cpticks;	/* (j) Ticks of cpu time. */
+	struct runq	*ke_runq;	/* runq the kse is currently on */
+};
+
+/* flags kept in ke_flags */
+#define KEF_BOUND	0x00001	/* Stuck on a thread.. (as per normal procs) */
+#define	KEF_EXIT	0x00002	/* KSE is being killed. */
+#define	KEF_DIDRUN	0x00004	/* KSE actually ran. */
+
+/*
+ * Scheduler private extensions to thread, ksegrp and proc structures.
+ * These are invisile outside the scheduler.
+ * They are usually allocated beyond the end of the proc, thread or ksegrp
+ * structure and always accessed via an indirection. 
+ * This allows proc0 and friends to be allocated statically
+ * without knowing the sise of the extension. This allows different 
+ * schedulers to have different extensions.
+ * Proc0 case:    thread0:[...  ptr]   [td_sched0]
+ *                               |      ^
+ *                               \------/
+ *
+ * Usual case:     td->   [...  ptr|td_sched]  ( allocated together )
+ *                               |   ^
+ *                               \---/
+ */
+
+struct td_sched {
+	struct kse	*std_last_kse;	/* (j) Previous value of td_kse. */
+	struct kse	*std_kse;	/* (j) Current KSE if running. */
+};
+#define td_last_kse 	td_sched->std_last_kse
+#define td_kse		td_sched->std_kse
+
+struct kg_sched {
+	TAILQ_HEAD(, kse) skg_kseq;	/* (ke_kglist) All KSEs. */
+	TAILQ_HEAD(, kse) skg_iq;	/* (ke_kgrlist) All idle KSEs. */
+	struct thread	*skg_last_assigned; /* (j) Last thread assigned */
+					/* ( to a KSE). */
+	int		skg_runq_kses;	/* (j) Num KSEs on runq. */
+	int		skg_idle_kses;	/* (j) Num KSEs on iq. */
+	int		skg_kses;	/* (j) Num KSEs in group. */
+}; 
+#define	kg_kseq			kg_sched->skg_kseq
+#define	kg_iq			kg_sched->skg_iq
+#define	kg_last_assigned 	kg_sched->skg_last_assigned
+#define	kg_runq_kses		kg_sched->skg_runq_kses
+#define	kg_idle_kses		kg_sched->skg_idle_kses
+#define	kg_kses			kg_sched->skg_kses
+
+#define	FIRST_KSE_IN_KSEGRP(kg)	TAILQ_FIRST(&(kg)->kg_kseq)
+#define	FIRST_KSE_IN_PROC(p)	FIRST_KSE_IN_KSEGRP(FIRST_KSEGRP_IN_PROC(p))
+
+void	kse_free(struct kse *ke);
+void	kse_stash(struct kse *ke);
+void	kse_reassign(struct kse *ke);
+void	sched_fork_kse(struct thread *parenttd, struct kse *newke);
+void	sched_exit_kse(struct proc *parent, struct thread *childtd);
+
 #define KTR_4BSD	0x0
 
 /*
@@ -65,14 +181,7 @@
 #endif
 #define	NICE_WEIGHT		1	/* Priorities per nice level. */
 
-struct ke_sched {
-	int		ske_cpticks;	/* (j) Ticks of cpu time. */
-	struct runq	*ske_runq;	/* runq the kse is currently on */
-};
-#define ke_runq 	ke_sched->ske_runq
-#define KEF_BOUND	KEF_SCHED1
-
-#define SKE_RUNQ_PCPU(ke)						\
+#define KE_RUNQ_PCPU(ke)						\
     ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq)
 
 /*
@@ -81,13 +190,7 @@
  */
 #define KSE_CAN_MIGRATE(ke)						\
     ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0)
-static struct ke_sched ke_sched;
 
-struct ke_sched *kse0_sched = &ke_sched;
-struct kg_sched *ksegrp0_sched = NULL;
-struct p_sched *proc0_sched = NULL;
-struct td_sched *thread0_sched = NULL;
-
 static int	sched_tdcnt;	/* Total runnable threads in the system. */
 static int	sched_quantum;	/* Roundrobin scheduling quantum in ticks. */
 #define	SCHED_QUANTUM	(hz / 10)	/* Default sched quantum */
@@ -169,6 +272,10 @@
 		curthread->td_flags |= TDF_NEEDRESCHED;
 }
 
+
+/****************************************************************
+ * BSD 4 based scheduling policy engine.			*
+ ***************************************************************/
 /*
  * Force switch among equal priority processes every 100ms.
  * We don't actually need to force a context switch of the current process.
@@ -338,20 +445,20 @@
 				 * stop recalculating its priority until
 				 * it wakes up.
 				 */
-				if (ke->ke_sched->ske_cpticks == 0)
+				if (ke->ke_cpticks == 0)
 					continue;
 #if	(FSHIFT >= CCPU_SHIFT)
 				ke->ke_pctcpu += (realstathz == 100)
-				    ? ((fixpt_t) ke->ke_sched->ske_cpticks) <<
+				    ? ((fixpt_t) ke->ke_cpticks) <<
 				    (FSHIFT - CCPU_SHIFT) :
-				    100 * (((fixpt_t) ke->ke_sched->ske_cpticks)
+				    100 * (((fixpt_t) ke->ke_cpticks)
 				    << (FSHIFT - CCPU_SHIFT)) / realstathz;
 #else
 				ke->ke_pctcpu += ((FSCALE - ccpu) *
-				    (ke->ke_sched->ske_cpticks *
+				    (ke->ke_cpticks *
 				    FSCALE / realstathz)) >> FSHIFT;
 #endif
-				ke->ke_sched->ske_cpticks = 0;
+				ke->ke_cpticks = 0;
 			} /* end of kse loop */
 			/* 
 			 * If there are ANY running threads in this KSEGRP,
@@ -468,7 +575,18 @@
 	sched_tdcnt++;
 }
 
-/* External interfaces start here */
+
+
+
+
+/*******************************************************************
+ * These functions represent entrypoints that a scheduler needs to *
+ * Supply in order to give the rest of the system opportunities to *
+ * notify it of important events and to let it do housekeeping     *
+ * in a timely and efficient manner. Events include the creation,  *
+ * allocation and death of various entities, timer events, and     *
+ * direct calls for scheduler services.                            *
+ ******************************************************************/
 int
 sched_runnable(void)
 {
@@ -511,7 +629,7 @@
 	kg = td->td_ksegrp;
 	ke = td->td_kse;
 
-	ke->ke_sched->ske_cpticks++;
+	ke->ke_cpticks++;
 	kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
 	if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
 		resetpriority(kg);
@@ -529,56 +647,62 @@
  * aggregated all the estcpu into the 'built-in' ksegrp.
  */
 void
-sched_exit(struct proc *p, struct proc *p1)
+sched_exit(struct proc *parent, struct thread *childtd)
 {
-	sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1));
-	sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1));
-	sched_exit_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1));
+	sched_exit_ksegrp(parent, childtd);
+	sched_exit_thread(parent, childtd);
 }
 
+/* This PROBABLY gives the estcpu to the wrong kseg */
 void
-sched_exit_kse(struct kse *ke, struct kse *child)
+sched_exit_ksegrp(struct proc *parent, struct thread *child)
 {
-}
+	struct ksegrp *kg;
 
-void
-sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child)
-{
-
 	mtx_assert(&sched_lock, MA_OWNED);
-	kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + child->kg_estcpu);
+	kg = FIRST_KSEGRP_IN_PROC(parent);  /* XXXKSE */ 
+	child->td_kse->ke_flags |= KEF_EXIT;
+	kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + child->td_ksegrp->kg_estcpu);
 }
 
 void
-sched_exit_thread(struct thread *td, struct thread *child)
+sched_exit_thread(struct proc *parent, struct thread *childtd)
 {
-	if ((child->td_proc->p_flag & P_NOLOAD) == 0)
+	if ((childtd->td_proc->p_flag & P_NOLOAD) == 0)
 		sched_tdcnt--;
+	if (childtd->td_kse)
+		sched_exit_kse(parent, childtd);
 }
 
 void
-sched_fork(struct proc *p, struct proc *p1)
+sched_fork(struct thread *parenttd, struct proc *child)
 {
-	sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1));
-	sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1));
-	sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1));
+	struct thread *td2;
+	struct kse *ke2;
+
+	td2 = FIRST_THREAD_IN_PROC(child);
+	ke2 = FIRST_KSE_IN_PROC(child);
+	sched_fork_kse(parenttd, ke2);
+	sched_fork_ksegrp(parenttd, FIRST_KSEGRP_IN_PROC(child));
+        ke2->ke_thread = td2;
+	td2->td_kse = ke2;
+	/* sched_fork_thread(parenttd, td2);*/
 }
 
 void
-sched_fork_kse(struct kse *ke, struct kse *child)
+sched_fork_ksegrp(struct thread *parent, struct ksegrp *child)
 {
-	child->ke_sched->ske_cpticks = 0;
-}
+	struct ksegrp *kg;
 
-void
-sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child)
-{
 	mtx_assert(&sched_lock, MA_OWNED);
+	kg =  parent->td_ksegrp;
 	child->kg_estcpu = kg->kg_estcpu;
 }
 
+
+
 void
-sched_fork_thread(struct thread *td, struct thread *child)
+sched_fork_thread(struct thread *parent, struct thread *child)
 {
 }
 
@@ -706,7 +830,7 @@
 		ke->ke_runq = &runq;
 	} else {
 		CTR1(KTR_4BSD, "adding kse:%p to pcpu runq", ke);
-		if (!SKE_RUNQ_PCPU(ke))
+		if (!KE_RUNQ_PCPU(ke))
 			ke->ke_runq = &runq_pcpu[PCPU_GET(cpuid)];
 	}
 #else
@@ -731,7 +855,7 @@
 
 	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
 		sched_tdcnt--;
-	runq_remove(ke->ke_sched->ske_runq, ke);
+	runq_remove(ke->ke_runq, ke);
 
 	ke->ke_state = KES_THREAD;
 	ke->ke_ksegrp->kg_runq_kses--;
@@ -838,24 +962,21 @@
 }
 
 int
-sched_sizeof_kse(void)
-{
-	return (sizeof(struct kse) + sizeof(struct ke_sched));
-}
-int
 sched_sizeof_ksegrp(void)
 {
-	return (sizeof(struct ksegrp));
+	return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
 }
+
 int
 sched_sizeof_proc(void)
 {
 	return (sizeof(struct proc));
 }
+
 int
 sched_sizeof_thread(void)
 {
-	return (sizeof(struct thread));
+	return (sizeof(struct thread) + sizeof(struct td_sched));
 }
 
 fixpt_t
@@ -871,3 +992,926 @@
 
 	return (0);
 }
+
+
+
+static uma_zone_t kse_zone;
+
+struct  kse kse0;
+static struct kg_sched kg_sched0;
+static struct td_sched td_sched0;
+
+static struct kse * kse_alloc(void);
+static void	kse_link(struct proc *p, struct kse *ke, struct ksegrp *kg);
+static void	kse_unlink(struct kse *ke);
+
+extern struct mtx kse_zombie_lock;
+TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
+
+void
+sched_GC(void)
+{
+	struct kse *ke_first, *ke_next;
+
+	if (!TAILQ_EMPTY(&zombie_kses)) {
+		mtx_lock_spin(&kse_zombie_lock);
+		ke_first = TAILQ_FIRST(&zombie_kses);
+		if (ke_first)
+			TAILQ_INIT(&zombie_kses);
+		while (ke_first) {
+			ke_next = TAILQ_NEXT(ke_first, ke_procq);
+			kse_free(ke_first);
+			ke_first = ke_next;
+		}
+		mtx_unlock_spin(&kse_zombie_lock);
+	}
+}
+
+void
+schedinit(void)
+{
+	/*
+	 * Set up the scheduler specific parts of proc0.
+	 */
+	ksegrp0.kg_sched = &kg_sched0;
+	proc0.p_sched = NULL; /* XXX */
+	thread0.td_sched = &td_sched0;
+
+	proc_linkup(&proc0, &ksegrp0, &thread0);
+	kse_link(&proc0, &kse0, &ksegrp0);
+	kse0.ke_thread = &thread0;
+	thread0.td_kse = &kse0; /* we are running */
+	kse0.ke_state = KES_THREAD;
+
+	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
+}
+
+/* 
+ * for now have special thr code
+ * later on, clean these up into common code.
+ */
+#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
+int
+sched_thr_newthread(struct thread *td, struct thread *td0, int flags)
+{
+	struct kse *ke0;
+	/* Initialize our kse structure. */
+	ke0 = kse_alloc();
+	bzero(&ke0->ke_startzero,
+	    RANGEOF(struct kse, ke_startzero, ke_endzero));
+
+	/* Link the thread and kse into the ksegrp and make it runnable. */
+	mtx_lock_spin(&sched_lock);
+
+	thread_link(td0, td->td_ksegrp);
+	kse_link(td->td_proc, ke0, td->td_ksegrp);
+
+	/* Bind this thread and kse together. */
+	td0->td_kse = ke0;
+	ke0->ke_thread = td0;
+
+	sched_fork_kse(td, td->td_kse);
+	sched_fork_thread(td, td0);
+
+	TD_SET_CAN_RUN(td0);
+	if ((flags & THR_SUSPENDED) == 0)
+		setrunqueue(td0);
+
+	mtx_unlock_spin(&sched_lock);
+	return (0);	/* the API could fail but not in this case */
+}
+#undef RANGEOF
+
+void
+sched_thr_exit(struct thread *td)
+{
+	struct kse *ke;
+
+	ke = td->td_kse;
+
+	ke->ke_state = KES_UNQUEUED;
+	ke->ke_thread = NULL;
+	kse_unlink(ke);
+	sched_exit_kse(td->td_proc->p_pptr, td);
+	td->td_kse = NULL;
+	td->td_last_kse = NULL;
+}
+
+/*
+ * Allocate a kse.
+ */
+static struct kse *
+kse_alloc(void)
+{
+	return (uma_zalloc(kse_zone, M_WAITOK));
+}
+
+/*
+ * Deallocate a kse.
+ */
+void
+kse_free(struct kse *td)
+{
+	uma_zfree(kse_zone, td);
+}
+
+/*
+ * Stash an embarasingly extra kse into the zombie kse queue.
+ */
+void
+kse_stash(struct kse *ke)
+{
+	mtx_lock_spin(&kse_zombie_lock);
+	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
+	mtx_unlock_spin(&kse_zombie_lock);
+}
+
+/*
+ * KSE is linked into kse group.   
+ */
+void
+kse_link(struct proc *p, struct kse *ke, struct ksegrp *kg)
+{
+	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
+	kg->kg_kses++;
+	ke->ke_state    = KES_UNQUEUED;
+	ke->ke_proc     = p;
+	ke->ke_ksegrp   = kg;
+	ke->ke_thread   = NULL;
+	ke->ke_oncpu    = NOCPU;
+	ke->ke_flags    = 0;
+}
+
+int
+sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
+{
+	struct kse *ke;
+
+	ke = kse_alloc();
+	if (ke) {
+		kse_link(p, ke, kg);
+		td->td_kse = ke;
+		ke->ke_thread = td;
+		return (0);
+	}
+	return (ENOMEM );
+}
+
+/* Assumes kg->kg_sched is already set up */
+void
+sched_newkseg(struct ksegrp *kg)
+{
+
+	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
+	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
+	kg->kg_kses       = 0;
+	kg->kg_runq_kses  = 0; /* XXXKSE change name */
+	kg->kg_idle_kses  = 0;
+}
+
+/* Assumes td->td_sched is already set up */
+void
+sched_newthread(struct thread *td)
+{
+	td->td_last_kse = NULL;
+	td->td_kse      = NULL;
+}
+
+void
+kse_unlink(struct kse *ke)
+{
+	struct ksegrp *kg;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	kg = ke->ke_ksegrp;
+	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
+	if (ke->ke_state == KES_IDLE) {
+		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+		kg->kg_idle_kses--;
+	}
+	if (--kg->kg_kses == 0)
+		ksegrp_unlink(kg);
+	/*
+	 * Aggregate stats from the KSE
+	 */
+	kse_stash(ke);
+}
+
+/* need the thread to know if we are on that ksegrp or not. */
+void
+sched_clean_ksegrp(struct ksegrp *kg, struct thread *td)
+{
+	struct kse *ke;
+
+	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
+		KASSERT(ke->ke_state == KES_IDLE,
+			("%s: wrong idle KSE state", __func__));
+		kse_unlink(ke);
+	}
+	KASSERT((kg->kg_kses == 1),
+		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
+	KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
+	        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
+	        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
+}
+
+
+#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
+
+/* new version of sched-fork() */
+void
+sched_fork_kse(struct thread *parenttd, struct kse *ke2)
+{
+	struct ksegrp *kg2;
+
+        kg2 = ke2->ke_ksegrp;
+        bzero(&ke2->ke_startzero,
+            (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
+        ke2->ke_state = KES_THREAD;
+        ke2->ke_cpticks = 0;
+        kg2->kg_estcpu = parenttd->td_ksegrp->kg_estcpu;
+}
+
+/*
+ * Try handle the generic case where there may be > 1 kseg even
+ * though that should never happen. It should be cheap to do so.
+ */
+void
+sched_destroyproc(struct proc *p)
+{
+	struct ksegrp *kg;
+	struct kse *ke;
+
+	/* remove all the kses we can find and free them */
+	FOREACH_KSEGRP_IN_PROC(p, kg) {
+		while (!TAILQ_EMPTY(&kg->kg_kseq)) {
+			ke = TAILQ_FIRST(&kg->kg_kseq);
+			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
+			if (ke->ke_thread) 
+				ke->ke_thread->td_kse = NULL;
+			kse_free(ke);
+		}
+	}
+}
+
+void
+sched_exit_kse(struct proc *parent, struct thread *childtd)
+{
+		struct ksegrp *kg;
+		struct kse *ke;
+
+		kg = childtd->td_ksegrp;
+		ke = childtd->td_kse;
+		KASSERT((ke),("unexpected null KSE ptr in sched_exit_kse()"));
+		ke->ke_state = KES_UNQUEUED;
+		ke->ke_thread = NULL;
+		/*
+		 * Decide what to do with the KSE attached to this thread.
+		 */
+		if (ke->ke_flags & KEF_EXIT) {
+			kse_unlink(ke);
+			if (kg->kg_kses == 0) {
+				sched_exit_ksegrp(parent, childtd); /* XXXKSE */
+				ksegrp_unlink(kg);
+			}
+		} else {
+			kse_reassign(ke);
+		}
+		childtd->td_kse	= NULL;
+		childtd->td_last_kse	= NULL;
+}
+
+void
+sched_set_concurrancy(struct ksegrp *kg, int concurrancy)
+{
+	struct kse *newke;
+
+	while (kg->kg_kses < concurrancy) {
+		newke = kse_alloc();
+		bzero(&newke->ke_startzero, RANGEOF(struct kse,
+		      ke_startzero, ke_endzero));
+#if 0
+		mtx_lock_spin(&sched_lock);
+		bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
+		      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
+		mtx_unlock_spin(&sched_lock);
+#endif
+		mtx_lock_spin(&sched_lock);
+		kse_link(kg->kg_proc, newke, kg);
+		sched_fork_kse(curthread, newke);
+		/* Add engine */
+		kse_reassign(newke);
+		mtx_unlock_spin(&sched_lock);
+	}
+}
+
+/* From kern_switch.c
+ * Parts Copyright (c) 2001 Jake Burkholder <jake at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/************************************************************************
+ * Functions that manipulate runnability from a thread perspective.	*
+ * These are a 'threading fairness' front-end to the BSD scheduler	*
+ ************************************************************************/
+/***
+Here is the logic..
+
+If there are N processors, then there are at most N KSEs (kernel
+schedulable entities) working to process threads that belong to a
+KSEGROUP (kg). If there are X of these KSEs actually running at the
+moment in question, then there are at most M (N-X) of these KSEs on
+the run queue, as running KSEs are not on the queue.
+
+Runnable threads are queued off the KSEGROUP in priority order.
+If there are M or more threads runnable, the top M threads
+(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
+their priority from those threads and are put on the system run queue.
+
+The last thread that had a priority high enough to have a KSE associated
+with it, AND IS ON THE RUN QUEUE is pointed to by
+kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
+assigned as all the available KSEs are activly running, or because there
+are no threads queued, that pointer is NULL.
+
+When a KSE is removed from the run queue to become runnable, we know
+it was associated with the highest priority thread in the queue (at the head
+of the queue). If it is also the last assigned we know M was 1 and must
+now be 0. Since the thread is no longer queued that pointer must be
+removed from it. Since we know there were no more KSEs available,
+(M was 1 and is now 0) and since we are not FREEING our KSE
+but using it, we know there are STILL no more KSEs available, we can prove
+that the next thread in the ksegrp list will not have a KSE to assign to
+it, so we can show that the pointer must be made 'invalid' (NULL).
+
+The pointer exists so that when a new thread is made runnable, it can
+have its priority compared with the last assigned thread to see if
+it should 'steal' its KSE or not.. i.e. is it 'earlier'
+on the list than that thread or later.. If it's earlier, then the KSE is
+removed from the last assigned (which is now not assigned a KSE)
+and reassigned to the new thread, which is placed earlier in the list.
+The pointer is then backed up to the previous thread (which may or may not
+be the new thread).
+
+When a thread sleeps or is removed, the KSE becomes available and if there 
+are queued threads that are not assigned KSEs, the highest priority one of
+them is assigned the KSE, which is then placed back on the run queue at
+the appropriate place, and the kg->kg_last_assigned pointer is adjusted down
+to point to it.
+
+The following diagram shows 2 KSEs and 3 threads from a single process.
+
+ RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
+              \    \____   
+               \        \
+    KSEGROUP---thread--thread--thread    (queued in priority order)
+        \                 / 
+         \_______________/
+          (last_assigned)
+
+The result of this scheme is that the M available KSEs are always
+queued at the priorities they have inherrited from the M highest priority
+threads for that KSEGROUP. If this situation changes, the KSEs are 
+reassigned to keep this true.
+***/
+
+
+CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
+
+#if 0
+static void runq_readjust(struct runq *rq, struct kse *ke);
+#endif
+/*
+ * Select the KSE that will be run next.  From that find the thread, and
+ * remove it from the KSEGRP's run queue.  If there is thread clustering,
+ * this will be what does it.
+ * XXX Change to take an argument indicating 
+ * if the switch is voluntary or involuntary.
+ */
+struct thread *
+choosethread(void)
+{
+	struct kse *ke;
+	struct thread *td;
+	struct ksegrp *kg;
+
+#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
+	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
+		/* Shutting down, run idlethread on AP's */
+		td = PCPU_GET(idlethread);
+		ke = td->td_kse;
+		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+		ke->ke_flags |= KEF_DIDRUN;
+		TD_SET_RUNNING(td);
+		return (td);
+	}
+#endif
+
+retry:
+	ke = sched_choose();
+	if (ke) {
+		td = ke->ke_thread;
+		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
+		kg = ke->ke_ksegrp;
+		if (td->td_proc->p_flag & P_SA) {
+			if (kg->kg_last_assigned == td) {
+				kg->kg_last_assigned = TAILQ_PREV(td,
+				    threadqueue, td_runq);
+			}
+			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+		}
+		kg->kg_runnable--;
+		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
+		    td, td->td_priority);
+	} else {
+		/* Simulate runq_choose() having returned the idle thread */
+		td = PCPU_GET(idlethread);
+		ke = td->td_kse;
+		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+	}
+	ke->ke_flags |= KEF_DIDRUN;
+
+	/*
+	 * If we are in panic, only allow system threads,
+	 * plus the one we are running in, to be run.
+	 */
+	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
+	    (td->td_flags & TDF_INPANIC) == 0)) {
+		/* note that it is no longer on the run queue */
+		TD_SET_CAN_RUN(td);
+		goto retry;
+	}
+
+	TD_SET_RUNNING(td);
+	return (td);
+}
+
+/*
+ * Given a surplus KSE, either assign a new runable thread to it
+ * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
+ * Assumes that the original thread is not runnable.
+ */
+void
+kse_reassign(struct kse *ke)
+{
+	struct ksegrp *kg;
+	struct thread *td;
+	struct thread *original;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	original = ke->ke_thread;
+	KASSERT(original == NULL || TD_IS_INHIBITED(original),
+    	    ("reassigning KSE with runnable thread"));
+	kg = ke->ke_ksegrp;
+	if (original)
+		original->td_kse = NULL;
+
+	/*
+	 * Find the first unassigned thread
+	 */
+	if ((td = kg->kg_last_assigned) != NULL)
+		td = TAILQ_NEXT(td, td_runq);
+	else 
+		td = TAILQ_FIRST(&kg->kg_runq);
+
+	/*
+	 * If we found one, assign it the kse, otherwise idle the kse.
+	 */
+	if (td) {
+		kg->kg_last_assigned = td;
+		td->td_kse = ke;
+		ke->ke_thread = td;
+		sched_add(td);
+		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
+		return;
+	}
+
+	ke->ke_state = KES_IDLE;
+	ke->ke_thread = NULL;
+	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
+	kg->kg_idle_kses++;
+	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
+	return;
+}
+
+#if 0
+/*
+ * Remove a thread from its KSEGRP's run queue.
+ * This in turn may remove it from a KSE if it was already assigned
+ * to one, possibly causing a new thread to be assigned to the KSE
+ * and the KSE getting a new priority.
+ */
+static void
+remrunqueue(struct thread *td)
+{
+	struct thread *td2, *td3;
+	struct ksegrp *kg;
+	struct kse *ke;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
+	kg = td->td_ksegrp;
+	ke = td->td_kse;
+	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
+	kg->kg_runnable--;
+	TD_SET_CAN_RUN(td);
+	/*
+	 * If it is not a threaded process, take the shortcut.
+	 */
+	if ((td->td_proc->p_flag & P_SA) == 0) {
+		/* Bring its kse with it, leave the thread attached */
+		sched_rem(td);
+		ke->ke_state = KES_THREAD; 
+		return;
+	}
+   	td3 = TAILQ_PREV(td, threadqueue, td_runq);
+	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+	if (ke) {
+		/*
+		 * This thread has been assigned to a KSE.
+		 * We need to dissociate it and try assign the
+		 * KSE to the next available thread. Then, we should
+		 * see if we need to move the KSE in the run queues.
+		 */
+		sched_rem(td);
+		ke->ke_state = KES_THREAD; 
+		td2 = kg->kg_last_assigned;
+		KASSERT((td2 != NULL), ("last assigned has wrong value"));
+		if (td2 == td) 
+			kg->kg_last_assigned = td3;
+		kse_reassign(ke);
+	}
+}
+#endif
+
+/*
+ * Change the priority of a thread that is on the run queue.
+ */
+void
+adjustrunqueue( struct thread *td, int newpri) 
+{
+	struct ksegrp *kg;
+	struct kse *ke;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
+
+	ke = td->td_kse;
+	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
+	/*
+	 * If it is not a threaded process, take the shortcut.
+	 */
+	if ((td->td_proc->p_flag & P_SA) == 0) {
+		/* We only care about the kse in the run queue. */
+		td->td_priority = newpri;
+		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
+			sched_rem(td);
+			sched_add(td);

>>> TRUNCATED FOR MAIL (1000 lines) <<<