PERFORCE change 55771 for review
Julian Elischer
julian at FreeBSD.org
Fri Jun 25 07:33:09 GMT 2004
http://perforce.freebsd.org/chv.cgi?CH=55771
Change 55771 by julian at julian_jules1 on 2004/06/25 07:32:03
safety safe for churn
nowhere near finished (or even compiles)
Affected files ...
.. //depot/projects/nsched/sys/kern/sched_4bsd.c#18 edit
.. //depot/projects/nsched/sys/kern/sched_ule.c#7 edit
Differences ...
==== //depot/projects/nsched/sys/kern/sched_4bsd.c#18 (text+ko) ====
@@ -52,12 +52,6 @@
#include <sys/queue.h>
#include <machine/critical.h>
#include <sys/thr.h> /* XXXKSE */
-#if 0
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#endif
#include <vm/uma.h>
#include <machine/critical.h>
==== //depot/projects/nsched/sys/kern/sched_ule.c#7 (text+ko) ====
@@ -1,3 +1,4 @@
+
/*-
* Copyright (c) 2002-2003, Jeffrey Roberson <jeff at freebsd.org>
* All rights reserved.
@@ -34,6 +35,7 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/queue.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
@@ -55,6 +57,10 @@
#define KTR_ULE KTR_NFS
+#include <vm/uma.h>
+#include <machine/critical.h>
+
+
/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
/* XXX This is bogus compatability crap for ps */
static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
@@ -78,45 +84,221 @@
* These datastructures are allocated within their parent datastructure but
* are scheduler specific.
*/
+/*-
+ * Description of a process.
+ *
+ * Below is a key of locks used to protect each member of struct proc. The
+ * lock is indicated by a reference to a specific character in parens in the
+ * associated comment.
+ * * - not yet protected
+ * a - only touched by curproc or parent during fork/wait
+ * b - created at fork, never changes
+ * (exception aiods switch vmspaces, but they are also
+ * marked 'P_SYSTEM' so hopefully it will be left alone)
+ * c - locked by proc mtx
+ * d - locked by allproc_lock lock
+ * e - locked by proctree_lock lock
+ * f - session mtx
+ * g - process group mtx
+ * h - callout_lock mtx
+ * i - by curproc or the master session mtx
+ * j - locked by sched_lock mtx
+ * k - only accessed by curthread
+ * l - the attaching proc or attaching proc parent
+ * m - Giant
+ * n - not locked, lazy
+ * o - ktrace lock
+ * p - select lock (sellock)
+ * q - td_contested lock
+ * r - p_peers lock
+ * x - created at fork, only changes during single threading in exec
+ * z - zombie threads/kse/ksegroup lock
+ *
+ */
+/***************
+ * In pictures:
+ With a single run queue used by all processors:
+
+ RUNQ: --->KSE---KSE--... SLEEPQ:[]---THREAD---THREAD---THREAD
+ | / []---THREAD
+ KSEG---THREAD--THREAD--THREAD []
+ []---THREAD---THREAD
+
+ (processors run THREADs from the KSEG until they are exhausted or
+ the KSEG exhausts its quantum)
+
+With PER-CPU run queues:
+KSEs on the separate run queues directly
+They would be given priorities calculated from the KSEG.
+
+ *
+ *****************/
+/************************************************************************
+ * Definitions of the run queues we use here.
+ */
+
+/*
+ * Copyright (c) 2001 Jake Burkholder <jake at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/sys/runq.h,v 1.4 2002/05/25 01:12:23 jake Exp $
+ */
+
+#ifndef _RUNQ_H_
+#define _RUNQ_H_
+
+#include <machine/runq.h>
+
+struct kse;
+
+/*
+ * Run queue parameters.
+ */
+
+#define RQ_NQS (64) /* Number of run queues. */
+#define RQ_PPQ (4) /* Priorities per queue. */
-struct ke_sched {
- int ske_slice;
- struct runq *ske_runq;
+/*
+ * Head of run queues.
+ */
+TAILQ_HEAD(rqhead, kse);
+
+/*
+ * Bit array which maintains the status of a run queue. When a queue is
+ * non-empty the bit corresponding to the queue number will be set.
+ */
+struct rqbits {
+ rqb_word_t rqb_bits[RQB_LEN];
+};
+
+/*
+ * Run queue structure. Contains an array of run queues on which processes
+ * are placed, and a structure to maintain the status of each queue.
+ */
+struct runq {
+ struct rqbits rq_status;
+ struct rqhead rq_queues[RQ_NQS];
+};
+
+#endif /* end of Jake copyright file */
+
+
+/*
+ * The schedulable entity that can be given a context to run.
+ * A process may have several of these. Probably one per processor
+ * but posibly a few more. In this universe they are grouped
+ * with a KSEG that contains the priority and niceness
+ * for the group.
+ */
+struct kse {
+ struct proc *ke_proc; /* (*) Associated process. */
+ struct ksegrp *ke_ksegrp; /* (*) Associated KSEG. */
+ TAILQ_ENTRY(kse) ke_kglist; /* (*) Queue of KSEs in ke_ksegrp. */
+ TAILQ_ENTRY(kse) ke_kgrlist; /* (*) Queue of KSEs in this state. */
+ TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */
+
+#define ke_startzero ke_flags
+ int ke_flags; /* (j) KEF_* flags. */
+ struct thread *ke_thread; /* (*) Active associated thread. */
+ fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */
+ u_char ke_oncpu; /* (j) Which cpu we are on. */
+ char ke_rqindex; /* (j) Run queue index. */
+ enum {
+ KES_UNUSED = 0x0,
+ KES_IDLE,
+ KES_ONRUNQ,
+ KES_UNQUEUED, /* in transit */
+ KES_THREAD /* slaved to thread state */
+ } ke_state; /* (j) KSE status. */
+#define ke_endzero ke_dummy
+ u_char ke_dummy;
+ int ke_slice;
+ struct runq *ke_runq;
/* The following variables are only used for pctcpu calculation */
- int ske_ltick; /* Last tick that we were running on */
- int ske_ftick; /* First tick that we were running on */
- int ske_ticks; /* Tick count */
+ int ke_ltick; /* Last tick that we were running on */
+ int ke_ftick; /* First tick that we were running on */
+ int ke_ticks; /* Tick count */
/* CPU that we have affinity for. */
- u_char ske_cpu;
+ u_char ke_cpu;
};
-#define ke_slice ke_sched->ske_slice
-#define ke_runq ke_sched->ske_runq
-#define ke_ltick ke_sched->ske_ltick
-#define ke_ftick ke_sched->ske_ftick
-#define ke_ticks ke_sched->ske_ticks
-#define ke_cpu ke_sched->ske_cpu
-#define ke_assign ke_procq.tqe_next
+
+/* flags kept in ke_flags */
+#define KEF_ASSIGNED 0x00001 /* KSE is being migrated. */
+#define KEF_BOUND 0x00002 /* KSE can not migrate. */
+#define KEF_DIDRUN 0x02000 /* KSE actually ran. */
+#define KEF_EXIT 0x04000 /* KSE is being killed. */
+
+#define FIRST_KSE_IN_KSEGRP(kg) TAILQ_FIRST(&(kg)->kg_kseq)
+#define FIRST_KSE_IN_PROC(p) FIRST_KSE_IN_KSEGRP(FIRST_KSEGRP_IN_PROC(p))
+
+static struct kse kse0; /* Primary kse in proc0. */
+static struct kse *kse_alloc(void);
+static void kse_free(struct kse *ke);
+static void kse_stash(struct kse *ke);
+static void kse_unlink(struct kse *ke);
+static void kse_reassign(struct kse *ke);
+static void kse_link(struct kse *ke, struct ksegrp *kg);
-#define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */
-#define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */
+/*
+ * Scheduler specific extensions to various structures.
+ */
struct kg_sched {
int skg_slptime; /* Number of ticks we vol. slept */
int skg_runtime; /* Number of ticks we were running */
+ TAILQ_HEAD(, kse) skg_kseq; /* (ke_kglist) All KSEs. */
+ TAILQ_HEAD(, kse) skg_iq; /* (ke_kgrlist) All idle KSEs. */
+ struct thread *skg_last_assigned; /* (j) Last thread assigned */
+ /* ( to a KSE). */
+ int skg_runq_kses; /* (j) Num KSEs on runq. */
+ int skg_idle_kses; /* (j) Num KSEs on iq. */
+ int skg_kses; /* (j) Num KSEs in group. */
+ int skg_concurrancy; /* (j) desired concurrancy */
+
};
#define kg_slptime kg_sched->skg_slptime
#define kg_runtime kg_sched->skg_runtime
+#define kg_kseq kg_sched->skg_kseq
+#define kg_iq kg_sched->skg_iq
+#define kg_last_assigned kg_sched->skg_last_assigned
+#define kg_runq_kses kg_sched->skg_runq_kses
+#define kg_idle_kses kg_sched->skg_idle_kses
+#define kg_kses kg_sched->skg_kses
+
struct td_sched {
int std_slptime;
+ struct kse *std_last_kse; /* (j) Previous value of td_kse. */
+ struct kse *std_kse; /* (j) Current KSE if running. */
};
#define td_slptime td_sched->std_slptime
+#define td_last_kse td_sched->std_last_kse
+#define td_kse td_sched->std_kse
struct td_sched td_sched;
-struct ke_sched ke_sched;
struct kg_sched kg_sched;
-struct ke_sched *kse0_sched = &ke_sched;
struct kg_sched *ksegrp0_sched = &kg_sched;
struct p_sched *proc0_sched = NULL;
struct td_sched *thread0_sched = &td_sched;
@@ -286,6 +468,31 @@
static void kseq_notify(struct kse *ke, int cpu);
static void kseq_assign(struct kseq *);
static struct kse *kseq_steal(struct kseq *kseq, int stealidle);
+#endif
+
+static void runq_add(struct runq *, struct kse *);
+static int runq_check(struct runq *);
+static struct kse *runq_choose(struct runq *);
+static void runq_init(struct runq *);
+static void runq_remove(struct runq *, struct kse *);
+
+
+static void setup_runqs(void);
+static void roundrobin(void *arg);
+static void schedcpu(void);
+static void schedcpu_thread(void);
+static void maybe_resched(struct thread *td);
+static void updatepri(struct ksegrp *kg);
+static void resetpriority(struct ksegrp *kg);
+static void sched_add(struct thread *td);
+static void sched_rem(struct thread *td);
+static struct kse * sched_choose(void);
+static void adjustrunqueue( struct thread *td, int newpri) ;
+
+static void sched_fork_kse(struct thread *td, struct kse *child);
+static void sched_exit_kse(struct kse *ke, struct thread *td);
+
+#ifdef SMP
/*
* On P4 Xeons the round-robin interrupt delivery is broken. As a result of
* this, we can't pin interrupts to the cpu that they were delivered to,
@@ -301,6 +508,7 @@
#endif /* !__i386__ */
#endif
+#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
void
kseq_print(int cpu)
{
@@ -1123,7 +1331,7 @@
}
void
-sched_switch(struct thread *td)
+sched_switch(struct thread *td , int flags)
{
struct thread *newtd;
struct kse *ke;
@@ -1158,7 +1366,7 @@
kse_reassign(ke);
}
}
- newtd = choosethread();
+ newtd = choosethread(flags);
if (td != newtd)
cpu_switch(td, newtd);
sched_lock.mtx_lock = (uintptr_t)td;
@@ -1247,20 +1455,25 @@
* priority.
*/
void
-sched_fork(struct proc *p, struct proc *p1)
+sched_fork(struct thread *td, struct proc *p1)
{
+ struct proc *p;
+
+ p = td->td_proc;
+
mtx_assert(&sched_lock, MA_OWNED);
p1->p_nice = p->p_nice;
- sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1));
- sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1));
- sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1));
+ sched_fork_ksegrp(td, FIRST_KSEGRP_IN_PROC(p1));
+ sched_fork_kse(td, FIRST_KSE_IN_PROC(p1));
+ sched_fork_thread(td, FIRST_THREAD_IN_PROC(p1));
}
-void
-sched_fork_kse(struct kse *ke, struct kse *child)
+static void
+sched_fork_kse(struct thread *td, struct kse *child)
{
+ struct kse *ke = td->td_kse;
child->ke_slice = 1; /* Attempt to quickly learn interactivity. */
child->ke_cpu = ke->ke_cpu;
@@ -1273,8 +1486,10 @@
}
void
-sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child)
+sched_fork_ksegrp(struct thread *td, struct ksegrp *child)
{
+ struct ksegrp *kg = td->td_ksegrp;
+
PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED);
child->kg_slptime = kg->kg_slptime;
@@ -1290,11 +1505,6 @@
}
void
-sched_fork_thread(struct thread *td, struct thread *child)
-{
-}
-
-void
sched_class(struct ksegrp *kg, int class)
{
struct kseq *kseq;
@@ -1348,29 +1558,31 @@
* Return some of the child's priority and interactivity to the parent.
*/
void
-sched_exit(struct proc *p, struct proc *child)
+sched_exit(struct proc *p, struct thread *td )
{
mtx_assert(&sched_lock, MA_OWNED);
- sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child));
- sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child));
+ sched_exit_kse(FIRST_KSE_IN_PROC(p), td);
+ sched_exit_ksegrp(p, td);
}
void
-sched_exit_kse(struct kse *ke, struct kse *child)
+sched_exit_kse(struct kse *ke, struct thread *td)
{
- kseq_load_rem(KSEQ_CPU(child->ke_cpu), child);
+ kseq_load_rem(KSEQ_CPU(child->ke_cpu), td->td_kse);
}
void
-sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child)
+sched_exit_ksegrp(struct proc *p, struct thread *td)
{
- /* kg->kg_slptime += child->kg_slptime; */
- kg->kg_runtime += child->kg_runtime;
+ struct ksegrp *kg = FIRST_KSEGRP_IN_PROC(p);
+
+ /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */
+ kg->kg_runtime += td->td_ksegrp->kg_runtime;
sched_interact_update(kg);
}
void
-sched_exit_thread(struct thread *td, struct thread *child)
+sched_exit_thread(struct proc *p, struct thread *child)
{
}
@@ -1726,12 +1938,6 @@
}
int
-sched_sizeof_kse(void)
-{
- return (sizeof(struct kse) + sizeof(struct ke_sched));
-}
-
-int
sched_sizeof_ksegrp(void)
{
return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
@@ -1748,3 +1954,1062 @@
{
return (sizeof(struct thread) + sizeof(struct td_sched));
}
+
+/*
+ * Copyright (c) 2001 Jake Burkholder <jake at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/***
+Here is the logic..
+
+If there are N processors, then there are at most N KSEs (kernel
+schedulable entities) working to process threads that belong to a
+KSEGROUP (kg). If there are X of these KSEs actually running at the
+moment in question, then there are at most M (N-X) of these KSEs on
+the run queue, as running KSEs are not on the queue.
+
+Runnable threads are queued off the KSEGROUP in priority order.
+If there are M or more threads runnable, the top M threads
+(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
+their priority from those threads and are put on the run queue.
+
+The last thread that had a priority high enough to have a KSE associated
+with it, AND IS ON THE RUN QUEUE is pointed to by
+kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
+assigned as all the available KSEs are activly running, or because there
+are no threads queued, that pointer is NULL.
+
+When a KSE is removed from the run queue to become runnable, we know
+it was associated with the highest priority thread in the queue (at the head
+of the queue). If it is also the last assigned we know M was 1 and must
+now be 0. Since the thread is no longer queued that pointer must be
+removed from it. Since we know there were no more KSEs available,
+(M was 1 and is now 0) and since we are not FREEING our KSE
+but using it, we know there are STILL no more KSEs available, we can prove
+that the next thread in the ksegrp list will not have a KSE to assign to
+it, so we can show that the pointer must be made 'invalid' (NULL).
+
+The pointer exists so that when a new thread is made runnable, it can
+have its priority compared with the last assigned thread to see if
+it should 'steal' its KSE or not.. i.e. is it 'earlier'
+on the list than that thread or later.. If it's earlier, then the KSE is
+removed from the last assigned (which is now not assigned a KSE)
+and reassigned to the new thread, which is placed earlier in the list.
+The pointer is then backed up to the previous thread (which may or may not
+be the new thread).
+
+When a thread sleeps or is removed, the KSE becomes available and if there
+are queued threads that are not assigned KSEs, the highest priority one of
+them is assigned the KSE, which is then placed back on the run queue at
+the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
+to point to it.
+
+The following diagram shows 2 KSEs and 3 threads from a single process.
+
+ RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads)
+ \ \____
+ \ \
+ KSEGROUP---thread--thread--thread (queued in priority order)
+ \ /
+ \_______________/
+ (last_assigned)
+
+The result of this scheme is that the M available KSEs are always
+queued at the priorities they have inherrited from the M highest priority
+threads for that KSEGROUP. If this situation changes, the KSEs are
+reassigned to keep this true.
+***/
+
+
+CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
+
+
+/* END */
+
+void
+sched_thread_exit(struct thread *td)
+{
+ struct kse *ke;
+
+ ke = td->td_kse;
+
+ if ((td->td_proc->p_flag & P_SA) && ke != NULL) {
+ ke->ke_thread = NULL;
+ td->td_kse = NULL;
+ kse_reassign(ke);
+ }
+ if ((td->td_proc->p_flag & P_NOLOAD) == 0)
+ sched_tdcnt--;
+
+}
+
+/*
+ * special version of the above for thr..
+ * work towards merging them.
+ * Special code for thr library thread
+ * Called from:
+ * thr_exit1()
+ */
+void
+sched_thr_exit(struct thread *td)
+{
+ struct kse *ke;
+
+ ke = td->td_kse;
+
+ if ((td->td_proc->p_flag & P_NOLOAD) == 0)
+ sched_tdcnt--;
+
+ /* td is about to be freed, but keep it clean */
+ td->td_kse = NULL;
+ td->td_last_kse = NULL;
+ kse_unlink(ke); /* also frees it */
+}
+
+
+/*
+ * Allocate any resources the scheduler needs to allocate or set up
+ * on a new process at fork() time.
+ * Called from:
+ * fork1()
+ */
+void
+sched_fork(struct thread *td, struct proc *child)
+{
+ struct thread *newtd;
+ struct kse *newke;
+
+ newtd = FIRST_THREAD_IN_PROC(child);
+ newke = FIRST_KSE_IN_PROC(child);
+ bzero(&newke->ke_startzero,
+ (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
+ newke->ke_state = KES_THREAD;
+ newke->ke_cpticks = 0;
+ sched_fork_ksegrp(td, FIRST_KSEGRP_IN_PROC(child));
+ newke->ke_thread = newtd;
+ newtd->td_kse = newke;
+}
+
+static uma_zone_t kse_zone;
+
+struct kse kse0;
+static struct kg_sched kg_sched0;
+static struct td_sched td_sched0;
+
+
+extern struct mtx kse_zombie_lock;
+TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
+
+/*
+ * Occasionally the scheduler may need to do some GC..
+ * Called from:
+ * thread_reap()
+ */
+void
+sched_GC(void)
+{
+ struct kse *ke_first, *ke_next;
+
+ if (!TAILQ_EMPTY(&zombie_kses)) {
+ mtx_lock_spin(&kse_zombie_lock);
+ ke_first = TAILQ_FIRST(&zombie_kses);
+ if (ke_first)
+ TAILQ_INIT(&zombie_kses);
+ mtx_unlock_spin(&kse_zombie_lock);
+ while (ke_first) {
+ ke_next = TAILQ_NEXT(ke_first, ke_procq);
+ kse_free(ke_first);
+ ke_first = ke_next;
+ }
+ }
+}
+
+/*
+ * Very early in the boot some setup of scheduler-specific
+ * parts of proc0 and of soem scheduler resources needs to be done.
+ * Called from:
+ * proc0_init()
+ */
+void
+schedinit(void)
+{
+ /*
+ * Set up the scheduler specific parts of proc0.
+ */
+ ksegrp0.kg_sched = &kg_sched0;
+ proc0.p_sched = NULL; /* XXX */
+ thread0.td_sched = &td_sched0;
+
+ /*
+ * and link in our own per scheduler struct
+ */
+ kse_link(&kse0, &ksegrp0);
+ /*
+ * and set it up as if BOUND and running
+ */
+ kse0.ke_thread = &thread0;
+ thread0.td_kse = &kse0; /* we are running */
+ kse0.ke_state = KES_THREAD;
+
+ kse_zone = uma_zcreate("KSE", sizeof (struct kse),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
+}
+
+/*
+ * for now have special thr code
+ * later on, clean these up into common code.
+ * Called from:
+ * thr_create()
+ */
+int
+sched_thr_newthread(struct thread *td, struct thread *newtd, int flags)
+{
+ struct kse *newke;
+ /* Initialize our kse structure. */
+ newke = kse_alloc();
+ bzero(&newke->ke_startzero,
+ RANGEOF(struct kse, ke_startzero, ke_endzero));
+
+ /* Link the thread and kse into the ksegrp and make it runnable. */
+ mtx_lock_spin(&sched_lock);
+
+ thread_link(newtd, td->td_ksegrp);
+ kse_link(newke, td->td_ksegrp);
+
+ /* Bind this thread and kse together. */
+ newtd->td_kse = newke;
+ newke->ke_thread = newtd;
+ bzero(&newke->ke_startzero,
+ (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
+ newke->ke_state = KES_THREAD;
+ newke->ke_cpticks = 0;
+ sched_fork_kse(td->td_kse, newke);
+
+ TD_SET_CAN_RUN(newtd);
+ if ((flags & THR_SUSPENDED) == 0)
+ setrunqueue(newtd);
+
+ mtx_unlock_spin(&sched_lock);
+ return (0); /* the API could fail but not in this case */
+}
+
+/*****************************
+ * KSE zone/allocation methods.
+ */
+/*
+ * Allocate a kse.
+ */
+static struct kse *
+kse_alloc(void)
+{
+ return (uma_zalloc(kse_zone, M_WAITOK));
+}
+
+/*
+ * Deallocate a kse.
+ */
+static void
+kse_free(struct kse *td)
+{
+ uma_zfree(kse_zone, td);
+}
+
+/*
+ * Stash an embarasingly extra kse into the zombie kse queue.
+ * Called from:
+ * kse_unlink() (local)
+ */
+static void
+kse_stash(struct kse *ke)
+{
+ mtx_lock_spin(&kse_zombie_lock);
+ TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
+ mtx_unlock_spin(&kse_zombie_lock);
+}
+
+/*
+ * KSE is linked into kse group.
+ * Called from:
+ * sched_newproc() (local)
+ * sched_thr_newthread() (local)
+ * schedinit() (local)
+ * sched_set_concurrancy() (local)
+ *
+ */
+static void
+kse_link( struct kse *ke, struct ksegrp *kg)
+{
+ TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
+ kg->kg_kses++;
+ ke->ke_state = KES_UNQUEUED;
+ ke->ke_proc = kg->kg_proc; /* really just a shortcut */
+ ke->ke_ksegrp = kg;
+ ke->ke_thread = NULL;
+ ke->ke_oncpu = NOCPU;
+ ke->ke_flags = 0;
+}
+
+/*
+ * Allocate scheduler specific per-process resources.
+ * The thread and ksegrp have already been linked in.
+ * Called from:
+ * proc_init() (UMA init method)
+ */
+int
+sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
+{
+ struct kse *ke;
+
+ /*
+ * For a new process, allocate a single KSE to the ksegrp.
+ */
+ ke = kse_alloc();
+ if (ke) {
+ kse_link(ke, kg);
+ td->td_kse = ke;
+ ke->ke_thread = td;
+ return (0);
+ }
+ return (ENOMEM );
+}
+
+/*
+ * Ksegrp is being either created or recycled.
+ * Fix up the per-scheduler resources associated with it.
+ * Called from:
+ * ksegrp_dtor()
+ * ksegrp_initi()
+ */
+void
+sched_init_ksegrp(struct ksegrp *kg)
+{
+
+ TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */
+ TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */
+ kg->kg_kses = 0;
+ kg->kg_runq_kses = 0; /* XXXKSE change name */
+ kg->kg_idle_kses = 0;
+}
+
+/*
+ * thread is being either created or recycled.
+ * Fix up the per-scheduler resources associated with it.
+ * Called from:
+ * thread_dtor()
+ * thread_initi()
+ */
+/* Assumes td->td_sched is already set up */
+void
+sched_init_thread(struct thread *td)
+{
+ td->td_last_kse = NULL;
+ td->td_kse = NULL;
+}
+
+
+/*
+ * code to take the per-scheduler KSE structure
+ * off the ksegrp it is hanging off and free it
+ * Called from:
+ * sched_destroyproc()
+ * sched_thr_exit()
+ * sched_set_concurrancy() via REDUCE_KSES()
+ * kse_reassign() via REDUCE_KSES()
+ */
+static void
+kse_unlink(struct kse *ke)
+{
+ struct ksegrp *kg;
+
+ mtx_assert(&sched_lock, MA_OWNED);
+ kg = ke->ke_ksegrp;
+ TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
+ if (ke->ke_state == KES_IDLE) {
+ TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
+ kg->kg_idle_kses--;
+ }
+ /*
+ * Aggregate stats from the KSE
+ * ## none yet ##
+ */
+
+ kse_stash(ke);
+}
+
+/*
+ * Whenever we have idle KSEs and there are too many for the concurrancy,
+ * then free as many as we can. Don't free too many if we have threads
+ * to run/kill.
+ */
+#define REDUCE_KSES(kg, skg) \
+do { \
+ while ((skg->skg_concurrancy < skg->skg_kses) && \
+ (skg->skg_idle_kses > 0) && \
+ (skg->skg_kses > kg->kg_numthreads)) { \
+ kse_unlink(TAILQ_FIRST(&skg->skg_iq)); \
+ } \
+} while (0)
+
+/*
+ * Called by the uma process fini routine..
+ * undo anything we may have done in the uma_init method.
+ * Panic if it's not all 1:1:1:1
+ * Called from:
+ * proc_fini() (UMA method)
+ */
+void
+sched_destroyproc(struct proc *p)
+{
+ struct ksegrp *kg;
+ struct kg_sched *skg;
+
+ KASSERT((p->p_numthreads == 1), ("Cached proc with > 1 thread "));
+ KASSERT((p->p_numksegrps == 1), ("Cached proc with > 1 ksegrp "));
+
+ kg = FIRST_KSEGRP_IN_PROC(p);
+
+ KASSERT((kg->kg_kses == 1), ("Cached proc with > 1 kse "));
+
+ skg = kg->kg_sched;
+ kse_unlink(TAILQ_FIRST(&skg->skg_iq)); \
+}
+
+/*
+ * (Re) assign resources to allow the ksegrp to implement
+ * teh requested concurrancy. At this time it means allocating
+ * or freeing KSE structures.
+ * Called from:
+ * kern_execve() (reverting to non threaded)
+ * kern_exit() (reverting to non threaded)
+ * thread_exit() (during removal of ksegrp)
+ * sched_exit_ksegrp() (local)
+ * kse_exit() (decreasing)
+ * kse_create() (increasing)
+ */
+void
+sched_set_concurrancy(struct ksegrp *kg, int concurrancy)
+{
+ struct kse *newke;
+ struct kg_sched *skg;
+
+ skg = kg->kg_sched;
+ skg->skg_concurrancy = concurrancy;
+ REDUCE_KSES(kg, skg);
+ while (skg->skg_kses < skg->skg_concurrancy) {
+ newke = kse_alloc();
+ bzero(&newke->ke_startzero, RANGEOF(struct kse,
+ ke_startzero, ke_endzero));
+#if 0
+ mtx_lock_spin(&sched_lock);
+ bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
+ RANGEOF(struct kse, ke_startcopy, ke_endcopy));
+ mtx_unlock_spin(&sched_lock);
+#endif
+ mtx_lock_spin(&sched_lock);
+ kse_link(newke, kg);
+ bzero(&newke->ke_startzero,
+ (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
+ newke->ke_state = KES_THREAD;
+ newke->ke_cpticks = 0;
+ /* Add engine */
+ kse_reassign(newke);
+ mtx_unlock_spin(&sched_lock);
+ }
+}
+
+
+CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
+
+#if 0
+static void runq_readjust(struct runq *rq, struct kse *ke);
+#endif
+/*
+ * Select the KSE that will be run next. From that find the thread, and
+ * remove it from the KSEGRP's run queue. If there is thread clustering,
+ * this will be what does it.
+ * XXX Change to take an argument indicating
+ * if the switch is voluntary or involuntary.
+ * Called from:
+ * thr_exit1()
+ * thread_exit()
+ * sched_switch() (local)
+ * init_secondary() (start up 2ndary processors)
+ */
+struct thread *
+choosethread(int flags)
+{
+ struct kse *ke;
+ struct thread *td;
+ struct ksegrp *kg;
+
+#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
+ if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
+ /* Shutting down, run idlethread on AP's */
+ td = PCPU_GET(idlethread);
+ ke = td->td_kse;
+ CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+ ke->ke_flags |= KEF_DIDRUN;
+ TD_SET_RUNNING(td);
+ return (td);
+ }
+#endif
+
+retry:
+ kg = curthread->td_ksegrp;
+#if 0
+ if (flags & SW_VOL) {
+ if (kg->kg_runnable) {
+ td = TAILQ_FIRST(&kg->kg_runq);
+ }
+ }
+ if (ke == NULL)
+#endif
+ ke = sched_choose();
+ if (ke) {
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list