PERFORCE change 187209 for review
Edward Tomasz Napierala
trasz at FreeBSD.org
Mon Dec 27 10:01:19 UTC 2010
http://p4web.freebsd.org/@@187209?ac=10
Change 187209 by trasz at trasz_victim on 2010/12/27 10:01:12
Christmas fixes.
Affected files ...
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#45 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#28 edit
.. //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#20 edit
Differences ...
==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#45 (text+ko) ====
@@ -34,7 +34,9 @@
#include "opt_kdtrace.h"
+#include <sys/param.h>
#include <sys/container.h>
+#include <sys/eventhandler.h>
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
@@ -619,35 +621,37 @@
#endif
}
-/*
- * Stuff below runs from a "containerd" kernel process.
- */
static void
-rusage_throttle(struct thread *td, int throttle)
+rusage_throttle(struct proc *p, int throttle)
{
+ struct thread *td;
u_char oldpri;
u_char newpri;
int type;
if (throttle) {
- td->td_flags |= TDF_THROTTLED;
+ p->p_flag |= P_THROTTLED;
newpri = PRI_MIN_IDLE;
type = RTP_PRIO_IDLE;
- } else if (td->td_flags & TDF_THROTTLED) {
- td->td_flags &= ~TDF_THROTTLED;
+ } else if (p->p_flag & P_THROTTLED) {
+ p->p_flag &= ~P_THROTTLED;
newpri = PRI_MIN_TIMESHARE;
type = RTP_PRIO_NORMAL;
} else
return;
- /* Mostly copied from rtp_to_pri(). */
- sched_class(td, type); /* XXX fix */
- oldpri = td->td_user_pri;
- sched_user_prio(td, newpri);
- if (TD_IS_RUNNING(td) || TD_CAN_RUN(td))
- sched_prio(td, td->td_user_pri); /* XXX dubious */
- if (TD_ON_UPILOCK(td) && oldpri != newpri)
- umtx_pi_adjust(td, oldpri);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ thread_lock(td);
+ /* Mostly copied from rtp_to_pri(). */
+ sched_class(td, type); /* XXX fix */
+ oldpri = td->td_user_pri;
+ sched_user_prio(td, newpri);
+ if (TD_IS_RUNNING(td) || TD_CAN_RUN(td))
+ sched_prio(td, td->td_user_pri); /* XXX dubious */
+ if (TD_ON_UPILOCK(td) && oldpri != newpri)
+ umtx_pi_adjust(td, oldpri);
+ thread_unlock(td);
+ }
}
static void
@@ -663,29 +667,24 @@
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
pctcpu_limit = rusage_get_limit(p, RUSAGE_PCTCPU);
+ pctcpu = 0;
PROC_SLOCK(p);
- pctcpu = 0;
FOREACH_THREAD_IN_PROC(p, td) {
ruxagg(p, td);
thread_lock(td);
pctcpu += sched_pctcpu(td);
- /*
- * We are making this decision based on data from
- * the previous run. The assumption is that this runs
- * so often it doesn't matter.
- */
- if (pctcpu > pctcpu_limit)
- rusage_throttle(td, 1);
- else
- rusage_throttle(td, 0);
thread_unlock(td);
}
+ pctcpu = ((pctcpu * 10000 + FSCALE / 2) >> FSHIFT) / 100;
+ if (pctcpu > pctcpu_limit)
+ rusage_throttle(p, 1);
+ else
+ rusage_throttle(p, 0);
PROC_SUNLOCK(p);
rusage_set(p, RUSAGE_CPU, cputick2usec(p->p_rux.rux_runtime));
microuptime(&wallclock);
timevalsub(&wallclock, &p->p_stats->p_start);
rusage_set(p, RUSAGE_WALLCLOCK, wallclock.tv_sec * 1000000 + wallclock.tv_usec);
- pctcpu = ((pctcpu * 10000 + FSCALE / 2) >> FSHIFT) / 100;
rusage_set(p, RUSAGE_PCTCPU, pctcpu);
}
sx_sunlock(&allproc_lock);
@@ -700,6 +699,30 @@
};
SYSINIT(containerd, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &containerd_kp);
+static void
+container_proc_fork_sched(void *arg __unused, struct proc *p1,
+ struct proc *newproc, int flags)
+{
+ uint64_t pctcpu_limit;
+
+ /*
+ * Newly created process may already be over the %CPU limit. Throttle
+ * it immediately after fork instead of waiting for containerd.
+ */
+ pctcpu_limit = rusage_get_limit(newproc, RUSAGE_PCTCPU);
+ if (pctcpu_limit <= 0)
+ rusage_throttle(newproc, 1);
+}
+
+static void
+container_init(void)
+{
+
+ EVENTHANDLER_REGISTER(process_fork, container_proc_fork_sched, NULL,
+ EVENTHANDLER_PRI_ANY);
+}
+SYSINIT(container, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, container_init, NULL);
+
#else /* !CONTAINERS */
int
==== //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#28 (text+ko) ====
@@ -355,7 +355,7 @@
#define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */
#define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */
#define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */
-#define TDF_THROTTLED 0x00080000 /* Throttled due to %cpu usage */
+#define TDF_UNUSED19 0x00080000 /* --available-- */
#define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */
#define TDF_UNUSED21 0x00200000 /* --available-- */
#define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */
@@ -603,6 +603,7 @@
#define P_INMEM 0x10000000 /* Loaded into memory. */
#define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */
#define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */
+#define P_THROTTLED 0x80000000 /* Throttled due to %cpu usage */
#define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
#define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED)
==== //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#20 (text+ko) ====
@@ -1711,6 +1711,17 @@
rusage_set(p, RUSAGE_RSS, IDX_TO_OFF(size));
maxsize = OFF_TO_IDX(rusage_get_limit(p, RUSAGE_RSS));
if (size > maxsize) {
+ /*
+ * Don't be overly aggressive; this might be
+ * an innocent process, and the limit could've
+ * been exceeded by some memory hog. Don't
+ * try to deactivate more than half of process'
+ * resident set size.
+ *
+ * XXX: Reconsider.
+ */
+ if (maxsize < size / 2)
+ maxsize = size / 2;
vm_pageout_map_deactivate_pages(
&vm->vm_map, maxsize);
/* Update RSS usage after paging out. */
More information about the p4-projects
mailing list