PERFORCE change 104659 for review
Chris Jones
cdjones at FreeBSD.org
Mon Aug 21 07:30:52 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=104659
Change 104659 by cdjones at cdjones-impulse on 2006/08/21 07:30:17
Rename sched_hier.c to sched_4bsd.c
Affected files ...
.. //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 integrate
Differences ...
==== //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 (text+ko) ====
@@ -41,6 +41,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -176,6 +177,11 @@
static int forward_wakeup(int cpunum);
#endif
+static uint32_t total_cpu_sched_shares;
+static u_int total_est_cpu;
+extern struct mtx allprison_mtx;
+extern int prisoncount;
+
static struct kproc_desc sched_kp = {
"schedcpu",
schedcpu_thread,
@@ -289,6 +295,18 @@
&sched_kgfollowons, 0,
"number of followons done in a ksegrp");
+static int sched_limitjailcpu = 0;
+SYSCTL_INT(_kern_sched, OID_AUTO, limit_jail_cpu,
+ CTLFLAG_RW,
+ &sched_limitjailcpu, 0,
+ "limit jailed process cpu usage");
+
+static int sched_unjailedProcessShares = 0;
+SYSCTL_INT(_kern_sched, OID_AUTO, system_cpu_shares,
+ CTLTYPE_INT | CTLFLAG_RW,
+ &sched_unjailedProcessShares, 0,
+ "number of shares to allocate to unjailed processes");
+
static __inline void
sched_load_add(void)
{
@@ -435,10 +453,23 @@
struct proc *p;
struct kse *ke;
struct ksegrp *kg;
+ struct prison *pr;
int awake, realstathz;
realstathz = stathz ? stathz : hz;
+ /*
+ * Need to acquire each jail's mutex and hold throughout to keep
+ * everything out while we recalculate per-jail CPU usage.
+ * TODO: this is excessively icky.
+ */
sx_slock(&allproc_lock);
+ mtx_lock(&allprison_mtx);
+ if (prisoncount) {
+ LIST_FOREACH(pr, &allprison, pr_list) {
+ pr->pr_estcpu = 0;
+ }
+ }
+ total_est_cpu = 0;
FOREACH_PROC_IN_SYSTEM(p) {
/*
* Prevent state changes and protect run queue.
@@ -523,6 +554,12 @@
if (kg->kg_slptime > 1)
continue;
kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu);
+ total_est_cpu += kg->kg_estcpu;
+ if (sched_limitjailcpu &&
+ NULL != kg->kg_proc->p_ucred &&
+ NULL != kg->kg_proc->p_ucred->cr_prison)
+ kg->kg_proc->p_ucred->cr_prison->pr_estcpu +=
+ kg->kg_estcpu;
resetpriority(kg);
FOREACH_THREAD_IN_GROUP(kg, td) {
resetpriority_thread(td, kg);
@@ -530,6 +567,7 @@
} /* end of ksegrp loop */
mtx_unlock_spin(&sched_lock);
} /* end of process loop */
+ mtx_unlock(&allprison_mtx);
sx_sunlock(&allproc_lock);
}
@@ -540,8 +578,29 @@
schedcpu_thread(void)
{
int nowake;
+ struct prison *pr;
+ u_int32_t shares = 0;
for (;;) {
+ if (sched_limitjailcpu) {
+ /*
+ * Update total jail CPU shares in case they've changed.
+ * Safe to read pr_sched_shares without mutex because
+ * in worst case, we get a bogus value which will be
+ * corrected on the next pass.
+ *
+ * TODO: this should be done by forcing a recalculation
+ * when jail CPU shares are added / changed, rather than
+ * doing it every secondc.
+ */
+
+ shares = sched_unjailedProcessShares;
+ LIST_FOREACH(pr, &allprison, pr_list) {
+ shares += pr->pr_sched_shares;
+ }
+ total_cpu_sched_shares = shares;
+ }
+
schedcpu();
tsleep(&nowake, 0, "-", hz);
}
@@ -579,12 +638,43 @@
resetpriority(struct ksegrp *kg)
{
register unsigned int newpriority;
+ struct prison *pr = NULL;
+ if (NULL != kg->kg_proc->p_ucred)
+ pr = kg->kg_proc->p_ucred->cr_prison;
if (kg->kg_pri_class == PRI_TIMESHARE) {
newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT +
- NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN);
- newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
- PRI_MAX_TIMESHARE);
+ NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN);
+ if (sched_limitjailcpu && NULL != pr) {
+ /*
+ * Skew the priority by the jail's share of CPU resources.
+ * The unjailed processes get half the CPU time.
+ *
+ * TODO: this is a hard limit. We should really also have
+ * soft limits available. Also, the amount of CPU time
+ * reserved to unjailed processes really should be sysctl'd.
+ */
+ register unsigned int np = newpriority;
+ register unsigned int skew;
+ skew = pr->pr_estcpu * total_cpu_sched_shares;
+ skew /= max(total_est_cpu, 1) * max(pr->pr_sched_shares, 1);
+ if (skew > 0) {
+ /* wait your turn until your cpu usage's proportionate */
+ newpriority = PRI_MAX_IDLE;
+ } else {
+ newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
+ PRI_MAX_TIMESHARE);
+ }
+ printf("skew KSE %p (%d / %d cpu, %d / %d shares) from %d to %d\n",
+ &kg, pr->pr_estcpu, total_est_cpu,
+ pr->pr_sched_shares,
+ total_cpu_sched_shares,
+ np, newpriority);
+ } else {
+ newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
+ PRI_MAX_TIMESHARE);
+ }
+
kg->kg_user_pri = newpriority;
}
}
More information about the p4-projects
mailing list