PERFORCE change 103283 for review
John Birrell
jb at FreeBSD.org
Sat Aug 5 22:07:43 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=103283
Change 103283 by jb at jb_freebsd2 on 2006/08/05 22:06:59
Merge KSE back in, but only if the kernel option KSE is defined.
Affected files ...
.. //depot/projects/dtrace/src/sys/kern/kern_idle.c#3 edit
.. //depot/projects/dtrace/src/sys/kern/kern_intr.c#6 edit
.. //depot/projects/dtrace/src/sys/kern/kern_mib.c#3 edit
.. //depot/projects/dtrace/src/sys/sys/proc.h#11 edit
.. //depot/projects/dtrace/src/sys/sys/rtprio.h#3 edit
.. //depot/projects/dtrace/src/sys/sys/sched.h#4 edit
Differences ...
==== //depot/projects/dtrace/src/sys/kern/kern_idle.c#3 (text+ko) ====
@@ -78,8 +78,13 @@
mtx_lock_spin(&sched_lock);
td = FIRST_THREAD_IN_PROC(p);
TD_SET_CAN_RUN(td);
+#ifdef KSE
+ td->td_flags |= TDF_IDLETD;
+ sched_class(td->td_ksegrp, PRI_IDLE);
+#else
atomic_set_int(&td->td_flags, TDF_IDLETD);
sched_class(td, PRI_IDLE);
+#endif
sched_prio(td, PRI_MAX_IDLE);
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
@@ -118,8 +123,12 @@
#ifdef SMP
idle_cpus_mask &= ~mycpu;
#endif
+#ifdef KSE
+ mi_switch(SW_VOL, NULL);
+#else
if ((td = choosethread()) != curthread)
sched_switch(curthread, td, SW_VOL);
+#endif
#ifdef SMP
idle_cpus_mask |= mycpu;
#endif
==== //depot/projects/dtrace/src/sys/kern/kern_intr.c#6 (text+ko) ====
@@ -296,7 +296,11 @@
panic("kthread_create() failed with %d", error);
td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */
mtx_lock_spin(&sched_lock);
+#ifdef KSE
+ td->td_ksegrp->kg_pri_class = PRI_ITHD;
+#else
td->td_pri_class = PRI_ITHD;
+#endif
TD_SET_IWAIT(td);
mtx_unlock_spin(&sched_lock);
td->td_pflags |= TDP_ITHREAD;
@@ -531,7 +535,11 @@
CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
p->p_comm);
TD_CLR_IWAIT(td);
+#ifdef KSE
+ setrunqueue(td, SRQ_INTR);
+#else
sched_run_ithread(td);
+#endif
} else {
CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
__func__, p->p_pid, p->p_comm, it->it_need, td->td_state);
==== //depot/projects/dtrace/src/sys/kern/kern_mib.c#3 (text+ko) ====
@@ -146,7 +146,7 @@
SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD,
0, BYTE_ORDER, "System byte order");
-SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD | CTLFLAG_MPSAFE,
+SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD,
0, PAGE_SIZE, "System memory page size");
static int
==== //depot/projects/dtrace/src/sys/sys/proc.h#11 (text+ko) ====
@@ -152,23 +152,120 @@
*/
struct auditinfo;
struct kaudit_record;
+#ifdef KSE
+struct kg_sched;
+#else
struct td_sched;
+#endif
struct nlminfo;
struct kaioinfo;
struct p_sched;
struct proc;
struct sleepqueue;
+#ifdef KSE
+struct td_sched;
+#else
struct thread;
+#endif
struct trapframe;
struct turnstile;
struct mqueue_notifier;
+#ifdef KSE
+/*
+ * Here we define the three structures used for process information.
+ *
+ * The first is the thread. It might be thought of as a "Kernel
+ * Schedulable Entity Context".
+ * This structure contains all the information as to where a thread of
+ * execution is now, or was when it was suspended, why it was suspended,
+ * and anything else that will be needed to restart it when it is
+ * rescheduled. Always associated with a KSE when running, but can be
+ * reassigned to an equivalent KSE when being restarted for
+ * load balancing. Each of these is associated with a kernel stack
+ * and a pcb.
+ *
+ * It is important to remember that a particular thread structure may only
+ * exist as long as the system call or kernel entrance (e.g. by pagefault)
+ * which it is currently executing. It should therefore NEVER be referenced
+ * by pointers in long lived structures that live longer than a single
+ * request. If several threads complete their work at the same time,
+ * they will all rewind their stacks to the user boundary, report their
+ * completion state, and all but one will be freed. That last one will
+ * be kept to provide a kernel stack and pcb for the NEXT syscall or kernel
+ * entrance (basically to save freeing and then re-allocating it). The existing
+ * thread keeps a cached spare thread available to allow it to quickly
+ * get one when it needs a new one. There is also a system
+ * cache of free threads. Threads have priority and partake in priority
+ * inheritance schemes.
+ */
+struct thread;
+
+/*
+ * The KSEGRP is allocated resources across a number of CPUs.
+ * (Including a number of CPUxQUANTA. It parcels these QUANTA up among
+ * its threads, each of which should be running in a different CPU.
+ * BASE priority and total available quanta are properties of a KSEGRP.
+ * Multiple KSEGRPs in a single process compete against each other
+ * for total quanta in the same way that a forked child competes against
+ * it's parent process.
+ */
+struct ksegrp;
+
/*
+ * A process is the owner of all system resources allocated to a task
+ * except CPU quanta.
+ * All KSEGs under one process see, and have the same access to, these
+ * resources (e.g. files, memory, sockets, credential, kqueues).
+ * A process may compete for CPU cycles on the same basis as a
+ * forked process cluster by spawning several KSEGRPs.
+ */
+struct proc;
+
+/***************
+ * In pictures:
+ With a single run queue used by all processors:
+
+ RUNQ: --->KSE---KSE--... SLEEPQ:[]---THREAD---THREAD---THREAD
+ \ \ []---THREAD
+ KSEG---THREAD--THREAD--THREAD []
+ []---THREAD---THREAD
+
+ (processors run THREADs from the KSEG until they are exhausted or
+ the KSEG exhausts its quantum)
+
+With PER-CPU run queues:
+KSEs on the separate run queues directly
+They would be given priorities calculated from the KSEG.
+
+ *
+ *****************/
+#endif
+
+#ifdef KSE
+/*
+ * Kernel runnable context (thread).
+ * This is what is put to sleep and reactivated.
+ * The first KSE available in the correct group will run this thread.
+ * If several are available, use the one on the same CPU as last time.
+ * When waiting to be run, threads are hung off the KSEGRP in priority order.
+ * With N runnable and queued KSEs in the KSEGRP, the first N threads
+ * are linked to them. Other threads are not yet assigned.
+ */
+#else
+/*
* Thread context. Processes may have multiple threads.
*/
+#endif
struct thread {
struct proc *td_proc; /* (*) Associated process. */
+#ifdef KSE
+ struct ksegrp *td_ksegrp; /* (*) Associated KSEG. */
+#endif
TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */
+#ifdef KSE
+ TAILQ_ENTRY(thread) td_kglist; /* (*) All threads in this ksegrp. */
+#endif
/* The two queues below should someday be merged. */
TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. */
@@ -202,9 +299,17 @@
struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
int td_intr_nesting_level; /* (k) Interrupt recursion. */
int td_pinned; /* (k) Temporary cpu pin count. */
+#ifdef KSE
+ struct kse_thr_mailbox *td_mailbox; /* (*) Userland mailbox address. */
+#endif
struct ucred *td_ucred; /* (k) Reference to credentials. */
+#ifdef KSE
+ struct thread *td_standin; /* (k + a) Use this for an upcall. */
+ struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */
+#else
u_int td_estcpu; /* (j) Sum of the same field in KSEs. */
u_int td_slptime; /* (j) How long completely blocked. */
+#endif
u_int td_pticks; /* (k) Statclock hits for profiling */
u_int td_sticks; /* (k) Statclock hits in system mode. */
u_int td_iticks; /* (k) Statclock hits in intr mode. */
@@ -216,6 +321,9 @@
sigset_t td_sigmask; /* (c) Current signal mask. */
volatile u_int td_generation; /* (k) For detection of preemption */
stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */
+#ifdef KSE
+ int td_kflags; /* (c) Flags for KSE threading. */
+#endif
int td_xsig; /* (c) Signal for ptrace */
int td_xsig_why; /* (c) reason for ptrace signal PL_EVENT_* */
u_long td_profil_addr; /* (k) Temporary addr until AST. */
@@ -334,15 +442,27 @@
#define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */
#define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */
#define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */
+#ifdef KSE
+#define TDP_UPCALLING 0x00000008 /* This thread is doing an upcall. */
+#else
/* 0x00000008 */
+#endif
#define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
#define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */
#define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */
+#ifdef KSE
+#define TDP_SA 0x00000080 /* A scheduler activation based thread. */
+#else
/* 0x00000080 */
+#endif
#define TDP_NOSLEEPING 0x00000100 /* Thread is not allowed to sleep on a sq. */
#define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */
#define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */
+#ifdef KSE
+#define TDP_CAN_UNBIND 0x00000800 /* Only temporarily bound. */
+#else
/* 0x00000800 */
+#endif
#define TDP_SCHED1 0x00001000 /* Reserved for scheduler private use */
#define TDP_SCHED2 0x00002000 /* Reserved for scheduler private use */
#define TDP_SCHED3 0x00004000 /* Reserved for scheduler private use */
@@ -363,6 +483,19 @@
#define TDI_LOCK 0x0008 /* Stopped on a lock. */
#define TDI_IWAIT 0x0010 /* Awaiting interrupt. */
+#ifdef KSE
+/*
+ * flags (in kflags) related to M:N threading.
+ */
+#define TDK_KSEREL 0x0001 /* Blocked in msleep on kg->kg_completed. */
+#define TDK_KSERELSIG 0x0002 /* Blocked in msleep on p->p_siglist. */
+#define TDK_WAKEUP 0x0004 /* Thread has been woken by kse_wakeup. */
+
+#define TD_CAN_UNBIND(td) \
+ (((td)->td_pflags & TDP_CAN_UNBIND) && \
+ ((td)->td_upcall != NULL))
+#endif
+
#define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING)
#define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL)
#define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED)
@@ -408,7 +541,57 @@
#define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ
#define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN
+#ifdef KSE
+/*
+ * An upcall is used when returning to userland. If a thread does not have
+ * an upcall on return to userland the thread exports its context and exits.
+ */
+struct kse_upcall {
+ TAILQ_ENTRY(kse_upcall) ku_link; /* List of upcalls in KSEG. */
+ struct ksegrp *ku_ksegrp; /* Associated KSEG. */
+ struct thread *ku_owner; /* Owning thread. */
+ int ku_flags; /* KUF_* flags. */
+ struct kse_mailbox *ku_mailbox; /* Userland mailbox address. */
+ stack_t ku_stack; /* Userland upcall stack. */
+ void *ku_func; /* Userland upcall function. */
+ unsigned int ku_mflags; /* Cached upcall mbox flags. */
+};
+
+#define KUF_DOUPCALL 0x00001 /* Do upcall now; don't wait. */
+#define KUF_EXITING 0x00002 /* Upcall structure is exiting. */
+
/*
+ * Kernel-scheduled entity group (KSEG). The scheduler considers each KSEG to
+ * be an indivisible unit from a time-sharing perspective, though each KSEG may
+ * contain multiple KSEs.
+ */
+struct ksegrp {
+ struct proc *kg_proc; /* (*) Proc that contains this KSEG. */
+ TAILQ_ENTRY(ksegrp) kg_ksegrp; /* (*) Queue of KSEGs in kg_proc. */
+ TAILQ_HEAD(, thread) kg_threads;/* (td_kglist) All threads. */
+ TAILQ_HEAD(, thread) kg_runq; /* (td_runq) waiting RUNNABLE threads */
+ TAILQ_HEAD(, kse_upcall) kg_upcalls; /* All upcalls in the group. */
+
+#define kg_startzero kg_estcpu
+ u_int kg_estcpu; /* (j) Sum of the same field in KSEs. */
+ u_int kg_slptime; /* (j) How long completely blocked. */
+ int kg_numupcalls; /* (j) Num upcalls. */
+ int kg_upsleeps; /* (c) Num threads in kse_release(). */
+ struct kse_thr_mailbox *kg_completed; /* (c) Completed thread mboxes. */
+ int kg_nextupcall; /* (n) Next upcall time. */
+ int kg_upquantum; /* (n) Quantum to schedule an upcall. */
+#define kg_endzero kg_pri_class
+
+#define kg_startcopy kg_endzero
+ u_char kg_pri_class; /* (j) Scheduling class. */
+ u_char kg_user_pri; /* (j) User pri from estcpu and nice. */
+#define kg_endcopy kg_numthreads
+ int kg_numthreads; /* (j) Num threads in total. */
+ struct kg_sched *kg_sched; /* (*) Scheduler-specific data. */
+};
+#endif
+
+/*
* XXX: Does this belong in resource.h or resourcevar.h instead?
* Resource usage extension. The times in rusage structs in the kernel are
* never up to date. The actual times are kept as runtimes and tick counts
@@ -434,6 +617,9 @@
*/
struct proc {
LIST_ENTRY(proc) p_list; /* (d) List of all processes. */
+#ifdef KSE
+ TAILQ_HEAD(, ksegrp) p_ksegrps; /* (c)(kg_ksegrp) All KSEGs. */
+#endif
TAILQ_HEAD(, thread) p_threads; /* (j)(td_plist) Threads. (shortcut) */
TAILQ_HEAD(, thread) p_suspended; /* (td_runq) Suspended threads. */
struct ucred *p_ucred; /* (c) Process owner's identity. */
@@ -496,6 +682,9 @@
int p_suspcount; /* (c) Num threads in suspended mode. */
struct thread *p_xthread; /* (c) Trap thread */
int p_boundary_count;/* (c) Num threads at user boundary */
+#ifdef KSE
+ struct ksegrp *p_procscopegrp;
+#endif
int p_pendingcnt; /* how many signals are pending */
struct itimers *p_itimers; /* (c) POSIX interval timers. */
/* End area that is zeroed on creation. */
@@ -516,6 +705,9 @@
u_short p_xstat; /* (c) Exit status; also stop sig. */
struct knlist p_klist; /* (c) Knotes attached to this proc. */
int p_numthreads; /* (j) Number of threads. */
+#ifdef KSE
+ int p_numksegrps; /* (c) Number of ksegrps. */
+#endif
struct mdproc p_md; /* Any machine-dependent fields. */
struct callout p_itcallout; /* (h + c) Interval timer callout. */
u_short p_acflag; /* (c) Accounting flags. */
@@ -627,11 +819,22 @@
#define FOREACH_PROC_IN_SYSTEM(p) \
LIST_FOREACH((p), &allproc, p_list)
+#ifdef KSE
+#define FOREACH_KSEGRP_IN_PROC(p, kg) \
+ TAILQ_FOREACH((kg), &(p)->p_ksegrps, kg_ksegrp)
+#define FOREACH_THREAD_IN_GROUP(kg, td) \
+ TAILQ_FOREACH((td), &(kg)->kg_threads, td_kglist)
+#define FOREACH_UPCALL_IN_GROUP(kg, ku) \
+ TAILQ_FOREACH((ku), &(kg)->kg_upcalls, ku_link)
+#endif
#define FOREACH_THREAD_IN_PROC(p, td) \
TAILQ_FOREACH((td), &(p)->p_threads, td_plist)
/* XXXKSE the following lines should probably only be used in 1:1 code: */
#define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads)
+#ifdef KSE
+#define FIRST_KSEGRP_IN_PROC(p) TAILQ_FIRST(&(p)->p_ksegrps)
+#endif
/*
* We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
@@ -742,6 +945,9 @@
extern struct sx allproc_lock;
extern struct sx proctree_lock;
extern struct mtx ppeers_lock;
+#ifdef KSE
+extern struct ksegrp ksegrp0; /* Primary ksegrp in proc0. */
+#endif
extern struct proc proc0; /* Process slot for swapper. */
extern struct thread thread0; /* Primary thread in proc0. */
extern struct vmspace vmspace0; /* VM space for proc0. */
@@ -792,7 +998,11 @@
void pargs_free(struct pargs *pa);
void pargs_hold(struct pargs *pa);
void procinit(void);
+#ifdef KSE
+void proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td);
+#else
void proc_linkup(struct proc *p, struct thread *td);
+#endif
void proc_reparent(struct proc *child, struct proc *newparent);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
@@ -820,6 +1030,11 @@
void cpu_set_fork_handler(struct thread *, void (*)(void *), void *);
/* New in KSE. */
+#ifdef KSE
+struct ksegrp *ksegrp_alloc(void);
+void ksegrp_free(struct ksegrp *kg);
+void ksegrp_stash(struct ksegrp *kg);
+#endif
void kse_GC(void);
void kseinit(void);
void cpu_set_upcall(struct thread *td, struct thread *td0);
@@ -830,13 +1045,24 @@
void cpu_thread_setup(struct thread *td);
void cpu_thread_swapin(struct thread *);
void cpu_thread_swapout(struct thread *);
+#ifdef KSE
+void ksegrp_link(struct ksegrp *kg, struct proc *p);
+void ksegrp_unlink(struct ksegrp *kg);
+#endif
struct thread *thread_alloc(void);
void thread_continued(struct proc *p);
void thread_exit(void) __dead2;
int thread_export_context(struct thread *td, int willexit);
void thread_free(struct thread *td);
+#ifdef KSE
+void thread_link(struct thread *td, struct ksegrp *kg);
+#else
void thread_link(struct thread *td, struct proc *p);
+#endif
void thread_reap(void);
+#ifdef KSE
+struct thread *thread_schedule_upcall(struct thread *td, struct kse_upcall *ku);
+#endif
void thread_signal_add(struct thread *td, ksiginfo_t *);
int thread_single(int how);
void thread_single_end(void);
@@ -854,9 +1080,21 @@
void thread_unsuspend(struct proc *p);
void thread_unsuspend_one(struct thread *td);
void thread_unthread(struct thread *td);
+#ifdef KSE
+int thread_userret(struct thread *td, struct trapframe *frame);
+void thread_user_enter(struct thread *td);
+#endif
void thread_wait(struct proc *p);
struct thread *thread_find(struct proc *p, lwpid_t tid);
void thr_exit1(void);
+#ifdef KSE
+struct kse_upcall *upcall_alloc(void);
+void upcall_free(struct kse_upcall *ku);
+void upcall_link(struct kse_upcall *ku, struct ksegrp *kg);
+void upcall_unlink(struct kse_upcall *ku);
+void upcall_remove(struct thread *td);
+void upcall_stash(struct kse_upcall *ke);
+#endif
#endif /* _KERNEL */
==== //depot/projects/dtrace/src/sys/sys/rtprio.h#3 (text+ko) ====
@@ -75,11 +75,17 @@
};
#ifdef _KERNEL
+#ifdef KSE
+struct ksegrp;
+int rtp_to_pri(struct rtprio *, struct ksegrp *);
+void pri_to_rtp(struct ksegrp *, struct rtprio *);
+#else
struct thread;
int rtp_to_pri(struct rtprio *, struct thread *);
void pri_to_rtp(struct thread *, struct rtprio *);
#endif
#endif
+#endif
#ifndef _KERNEL
#include <sys/cdefs.h>
==== //depot/projects/dtrace/src/sys/sys/sched.h#4 (text+ko) ====
@@ -52,19 +52,32 @@
* KSE Groups contain scheduling priority information. They record the
* behavior of groups of KSEs and threads.
*/
+#ifdef KSE
+void sched_class(struct ksegrp *kg, int class);
+void sched_exit_ksegrp(struct ksegrp *kg, struct thread *childtd);
+void sched_fork_ksegrp(struct thread *td, struct ksegrp *child);
+#else
void sched_class(struct thread *td, int class);
+#endif
void sched_nice(struct proc *p, int nice);
/*
* Threads are switched in and out, block on resources, have temporary
* priorities inherited from their ksegs, and use up cpu time.
*/
+#ifdef KSE
+void sched_exit_thread(struct thread *td, struct thread *child);
+void sched_fork_thread(struct thread *td, struct thread *child);
+#endif
fixpt_t sched_pctcpu(struct thread *td);
void sched_prio(struct thread *td, u_char prio);
void sched_lend_prio(struct thread *td, u_char prio);
void sched_sleep(struct thread *td);
void sched_switch(struct thread *td, struct thread *newtd, int flags);
void sched_unlend_prio(struct thread *td, u_char prio);
+#ifdef KSE
+void sched_userret(struct thread *td);
+#endif
void sched_wakeup(struct thread *td);
/*
@@ -75,7 +88,9 @@
void sched_rem(struct thread *td);
void sched_tick(void);
void sched_relinquish(struct thread *td);
+#ifndef KSE
void sched_run_ithread(struct thread *td);
+#endif
/*
* Binding makes cpu affinity permanent while pinning is used to temporarily
@@ -91,6 +106,9 @@
* These procedures tell the process data structure allocation code how
* many bytes to actually allocate.
*/
+#ifdef KSE
+int sched_sizeof_ksegrp(void);
+#endif
int sched_sizeof_proc(void);
int sched_sizeof_thread(void);
@@ -108,7 +126,15 @@
/* temporarily here */
void schedinit(void);
+#ifdef KSE
+void sched_init_concurrency(struct ksegrp *kg);
+void sched_set_concurrency(struct ksegrp *kg, int cuncurrency);
+#endif
void sched_schedinit(void);
+#ifdef KSE
+void sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td);
+void sched_thread_exit(struct thread *td);
+#endif
void sched_newthread(struct thread *td);
#endif /* !_SYS_SCHED_H_ */
More information about the p4-projects
mailing list