PERFORCE change 187209 for review

Edward Tomasz Napierala trasz at FreeBSD.org
Mon Dec 27 10:01:19 UTC 2010


http://p4web.freebsd.org/@@187209?ac=10

Change 187209 by trasz at trasz_victim on 2010/12/27 10:01:12

	Christmas fixes.

Affected files ...

.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#45 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#28 edit
.. //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#20 edit

Differences ...

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#45 (text+ko) ====

@@ -34,7 +34,9 @@
 
 #include "opt_kdtrace.h"
 
+#include <sys/param.h>
 #include <sys/container.h>
+#include <sys/eventhandler.h>
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
@@ -619,35 +621,37 @@
 #endif
 }
 
-/*
- * Stuff below runs from a "containerd" kernel process.
- */
 static void
-rusage_throttle(struct thread *td, int throttle)
+rusage_throttle(struct proc *p, int throttle)
 {
+	struct thread *td;
 	u_char oldpri;
 	u_char newpri;
 	int type;
 
 	if (throttle) {
-		td->td_flags |= TDF_THROTTLED;
+		p->p_flag |= P_THROTTLED;
 		newpri = PRI_MIN_IDLE;
 		type = RTP_PRIO_IDLE;
-	} else if (td->td_flags & TDF_THROTTLED) {
-		td->td_flags &= ~TDF_THROTTLED;
+	} else if (p->p_flag & P_THROTTLED) {
+		p->p_flag &= ~P_THROTTLED;
 		newpri = PRI_MIN_TIMESHARE;
 		type = RTP_PRIO_NORMAL;
 	} else
 		return;
 
-	/* Mostly copied from rtp_to_pri(). */
-	sched_class(td, type);	/* XXX fix */
-	oldpri = td->td_user_pri;
-	sched_user_prio(td, newpri);
-	if (TD_IS_RUNNING(td) || TD_CAN_RUN(td))
-		sched_prio(td, td->td_user_pri); /* XXX dubious */
-	if (TD_ON_UPILOCK(td) && oldpri != newpri)
-		umtx_pi_adjust(td, oldpri);
+	FOREACH_THREAD_IN_PROC(p, td) {
+		thread_lock(td);
+		/* Mostly copied from rtp_to_pri(). */
+		sched_class(td, type);	/* XXX fix */
+		oldpri = td->td_user_pri;
+		sched_user_prio(td, newpri);
+		if (TD_IS_RUNNING(td) || TD_CAN_RUN(td))
+			sched_prio(td, td->td_user_pri); /* XXX dubious */
+		if (TD_ON_UPILOCK(td) && oldpri != newpri)
+			umtx_pi_adjust(td, oldpri);
+		thread_unlock(td);
+	}
 }
 
 static void
@@ -663,29 +667,24 @@
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			pctcpu_limit = rusage_get_limit(p, RUSAGE_PCTCPU);
+			pctcpu = 0;
 			PROC_SLOCK(p);
-			pctcpu = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
 				ruxagg(p, td);
 				thread_lock(td);
 				pctcpu += sched_pctcpu(td);
-				/*
-				 * We are making this decision based on data from
-				 * the previous run.  The assumption is that this runs
-				 * so often it doesn't matter.
-				 */
-				if (pctcpu > pctcpu_limit)
-					rusage_throttle(td, 1);
-				else
-					rusage_throttle(td, 0);
 				thread_unlock(td);
 			}
+			pctcpu = ((pctcpu * 10000 + FSCALE / 2) >> FSHIFT) / 100;
+			if (pctcpu > pctcpu_limit)
+				rusage_throttle(p, 1);
+			else
+				rusage_throttle(p, 0);
 			PROC_SUNLOCK(p);
 			rusage_set(p, RUSAGE_CPU, cputick2usec(p->p_rux.rux_runtime));
 			microuptime(&wallclock);
 			timevalsub(&wallclock, &p->p_stats->p_start);
 			rusage_set(p, RUSAGE_WALLCLOCK, wallclock.tv_sec * 1000000 + wallclock.tv_usec);
-			pctcpu = ((pctcpu * 10000 + FSCALE / 2) >> FSHIFT) / 100;
 			rusage_set(p, RUSAGE_PCTCPU, pctcpu);
 		}
 		sx_sunlock(&allproc_lock);
@@ -700,6 +699,30 @@
 };
 SYSINIT(containerd, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &containerd_kp);
 
+static void
+container_proc_fork_sched(void *arg __unused, struct proc *p1,
+    struct proc *newproc, int flags)
+{
+	uint64_t pctcpu_limit;
+
+	/*
+	 * Newly created process may already be over the %CPU limit.  Throttle
+	 * it immediately after fork instead of waiting for containerd.
+	 */
+	pctcpu_limit = rusage_get_limit(newproc, RUSAGE_PCTCPU);
+	if (pctcpu_limit <= 0)
+		rusage_throttle(newproc, 1);
+}
+
+static void
+container_init(void)
+{
+
+	EVENTHANDLER_REGISTER(process_fork, container_proc_fork_sched, NULL,
+	    EVENTHANDLER_PRI_ANY);
+}
+SYSINIT(container, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, container_init, NULL);
+
 #else /* !CONTAINERS */
 
 int

==== //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#28 (text+ko) ====

@@ -355,7 +355,7 @@
 #define	TDF_NEEDRESCHED	0x00010000 /* Thread needs to yield. */
 #define	TDF_NEEDSIGCHK	0x00020000 /* Thread may need signal delivery. */
 #define	TDF_NOLOAD	0x00040000 /* Ignore during load avg calculations. */
-#define	TDF_THROTTLED	0x00080000 /* Throttled due to %cpu usage */
+#define	TDF_UNUSED19	0x00080000 /* --available-- */
 #define	TDF_THRWAKEUP	0x00100000 /* Libthr thread must not suspend itself. */
 #define	TDF_UNUSED21	0x00200000 /* --available-- */
 #define	TDF_SWAPINREQ	0x00400000 /* Swapin request due to wakeup. */
@@ -603,6 +603,7 @@
 #define	P_INMEM		0x10000000 /* Loaded into memory. */
 #define	P_SWAPPINGOUT	0x20000000 /* Process is being swapped out. */
 #define	P_SWAPPINGIN	0x40000000 /* Process is being swapped in. */
+#define	P_THROTTLED	0x80000000 /* Throttled due to %cpu usage */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)

==== //depot/projects/soc2009/trasz_limits/sys/vm/vm_pageout.c#20 (text+ko) ====

@@ -1711,6 +1711,17 @@
 			rusage_set(p, RUSAGE_RSS, IDX_TO_OFF(size));
 			maxsize = OFF_TO_IDX(rusage_get_limit(p, RUSAGE_RSS));
 			if (size > maxsize) {
+				/*
+				 * Don't be overly aggressive; this might be
+				 * an innocent process, and the limit could've
+				 * been exceeded by some memory hog.  Don't
+				 * try to deactivate more than half of process'
+				 * resident set size.
+				 *
+				 * XXX: Reconsider.
+				 */
+				if (maxsize < size / 2)
+					maxsize = size / 2;
 				vm_pageout_map_deactivate_pages(
 				    &vm->vm_map, maxsize);
 				/* Update RSS usage after paging out. */


More information about the p4-projects mailing list