PERFORCE change 102540 for review
Chris Jones
cdjones at FreeBSD.org
Thu Jul 27 09:24:33 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=102540
Change 102540 by cdjones at cdjones-impulse on 2006/07/27 09:24:09
Under memory pressure, remove a page from each process in the jail until the pressure goes away. This doesn't prevent a jail from exceeding its memory limit, but rather tends to return it to the limit; there may merit in pushing it further below the limit.
Affected files ...
.. //depot/projects/soc2006/cdjones_jail/src/sys/kern/kern_jail.c#14 edit
.. //depot/projects/soc2006/cdjones_jail/src/sys/sys/jail.h#14 edit
.. //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.c#3 edit
.. //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.h#2 edit
Differences ...
==== //depot/projects/soc2006/cdjones_jail/src/sys/kern/kern_jail.c#14 (text+ko) ====
@@ -27,6 +27,7 @@
#include <vm/vm_page.h>
#include <vm/vm_object.h>
#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
#include <sys/taskqueue.h>
#include <sys/jail.h>
#include <sys/lock.h>
@@ -119,22 +120,88 @@
static void
jpager_td(void *arg)
{
+ struct proc *p;
struct prison *pr;
+ struct thread *td;
+ vm_pindex_t limit, size, usage;
+ int breakout;
+
pr = arg;
for (;;) {
if (pr->pr_pager_flags & J_PAGER_TD_DIE)
break;
- if (prison_memory(pr) > prison_memory_limit(pr)) {
- /* the logic from vm_daemon() really needs to go here.
- TODO: refactor vm_daemon to optionally act on specific jails. */
+ /* TODO: consider whether it might be better to start
+ * pushing back when we approach the limit, rather than
+ * when we hit it.
+ */
+ limit = prison_memory_limit(pr);
+ usage = prison_memory(pr);
+
+ if ((usage - limit) > 0)
+ continue;
+
+ /* The logic from vm_daemon() really needs to go here.
+ * Problem: we want to push things below their rlimits.
+ *
+ * TODO: refactor vm_daemon to optionally act on specific jails?
+ */
+
+ sx_slock(&allproc_lock);
+ LIST_FOREACH(p, &allproc, p_list) {
+
+ if (pr != p->p_ucred->cr_prison)
+ continue;
+
+ PROC_LOCK(p);
+ if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+
+ mtx_lock_spin(&sched_lock);
+ breakout = 0;
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (!TD_ON_RUNQ(td) &&
+ !TD_IS_RUNNING(td) &&
+ !TD_IS_SLEEPING(td)) {
+ breakout = 1;
+ break;
+ }
+ }
+ mtx_unlock_spin(&sched_lock);
+ if (breakout) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+
+ /* NOTE: we differ here from vm_daemon b/c we don't
+ * care about the rlimit; things that are exceeding that will
+ * get caught in due course. We need, however, to decrease
+ * the pressure on our permitted memory allocation. Fortunately,
+ * we only care about eventually hitting the limit, so if we
+ * don't get there right away, it's okay.
+ */
+
+ /* TODO: this arbitrarily reduces each process's space by
+ * one page (until it's completely swapped out) while
+ * we're under memory pressure. A better way would be
+ * to either hit large processes first, or to hit the
+ * least-active processes first, or ....
+ */
+ size = vmspace_resident_count(p->p_vmspace) - 1;
+ if (size < 0)
+ size = 0;
+ vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, size);
+
+ sx_sunlock(&allproc_lock);
}
-
+
/* TODO --- make interval into a sysctl? */
tsleep(pr, 0, "-", hz);
}
-
+
pr->pr_pager_flags = J_PAGER_TD_DEAD;
kthread_exit(0);
}
@@ -462,7 +529,7 @@
}
/* Given credential, return memory usage in bytes. */
-int
+vm_pindex_t
prison_memory(struct prison *pr)
{
struct proc *p;
@@ -491,7 +558,7 @@
}
/* Given credential, return permitted memory usage in bytes. */
-int
+vm_pindex_t
prison_memory_limit(struct prison *pr)
{
return pr->pr_mem_limit;
==== //depot/projects/soc2006/cdjones_jail/src/sys/sys/jail.h#14 (text+ko) ====
@@ -143,8 +143,8 @@
void prison_hold(struct prison *pr);
int prison_if(struct ucred *cred, struct sockaddr *sa);
int prison_ip(struct ucred *cred, int flag, u_int32_t *ip);
-int prison_memory(struct prison *pr);
-int prison_memory_limit(struct prison *pr);
+vm_pindex_t prison_memory(struct prison *pr);
+vm_pindex_t prison_memory_limit(struct prison *pr);
void prison_remote_ip(struct ucred *cred, int flags, u_int32_t *ip);
#endif /* _KERNEL */
==== //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.c#3 (text+ko) ====
@@ -205,7 +205,7 @@
int vm_page_max_wired; /* XXX max # of wired pages system-wide */
#if !defined(NO_SWAPPING)
-static void vm_pageout_map_deactivate_pages(vm_map_t, long);
+/* static void vm_pageout_map_deactivate_pages(vm_map_t, long); */
static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
static void vm_req_vmdaemon(void);
#endif
@@ -592,7 +592,7 @@
* deactivate some number of pages in a map, try to do it fairly, but
* that is really hard to do.
*/
-static void
+void
vm_pageout_map_deactivate_pages(map, desired)
vm_map_t map;
long desired;
==== //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.h#2 (text+ko) ====
@@ -87,6 +87,8 @@
* Exported routines.
*/
+void vm_pageout_map_deactivate_pages(vm_map_t map, long desired);
+
/*
* Signal pageout-daemon and wait for it.
*/
More information about the p4-projects
mailing list