PERFORCE change 188243 for review

Edward Tomasz Napierala trasz at FreeBSD.org
Thu Jan 27 21:11:16 UTC 2011


http://p4web.freebsd.org/@@188243?ac=10

Change 188243 by trasz at trasz_victim on 2011/01/27 21:10:21

	Don't embed "struct container" inside "struct proc" et al; it's
	somewhat big, and we don't want #ifdefs in structures, so embedding
	it would be pessimisation for people who don't want to use containers.
	While here, clean up includes somewhat.

Affected files ...

.. //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#37 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#66 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_descrip.c#20 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_jail.c#33 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_loginclass.c#28 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_rctl.c#18 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#60 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/vfs_vnops.c#22 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/container.h#25 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#20 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/kernel.h#7 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/loginclass.h#13 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#30 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#25 edit

Differences ...

==== //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#37 (text+ko) ====

@@ -526,6 +526,9 @@
 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
 	p->p_cpulimit = RLIM_INFINITY;
 
+	/* Initialize resource accounting structures. */
+	container_create(&p->p_container);
+
 	p->p_stats = pstats_alloc();
 
 	/* Allocate a prototype map so we have something to fork. */

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#66 (text+ko) ====

@@ -67,6 +67,8 @@
 static struct mtx container_lock;
 MTX_SYSINIT(container_lock, &container_lock, "container lock", MTX_DEF);
 
+static uma_zone_t container_zone;
+
 static void container_sub(struct container *dest, const struct container *src);
 static void rusage_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount);
 static void rusage_add_cred_locked(struct ucred *cred, int resource, uint64_t amount);
@@ -261,26 +263,29 @@
 }
 
 void
-container_create(struct container *container)
+container_create(struct container **containerp)
 {
-	int i;
+
+	SDT_PROBE(container, kernel, container, create, containerp, 0, 0, 0, 0);
 
-	SDT_PROBE(container, kernel, container, create, container, 0, 0, 0, 0);
+	KASSERT(*containerp == NULL, ("container already allocated"));
 
-	for (i = 0; i <= RUSAGE_MAX; i++)
-		KASSERT(container->c_resources[i] == 0,
-		    ("container->c_resources[%d] != 0", i));
+	*containerp = uma_zalloc(container_zone, M_WAITOK | M_ZERO);
 }
 
 static void
-container_destroy_locked(struct container *container)
+container_destroy_locked(struct container **containerp)
 {
 	int i;
+	struct container *container;
 
-	SDT_PROBE(container, kernel, container, destroy, container, 0, 0, 0, 0);
+	SDT_PROBE(container, kernel, container, destroy, containerp, 0, 0, 0, 0);
 
 	mtx_assert(&container_lock, MA_OWNED);
-	KASSERT(container != NULL, ("NULL container"));
+	KASSERT(containerp != NULL, ("NULL containerp"));
+	KASSERT(*containerp != NULL, ("NULL container"));
+
+	container = *containerp;
 
 	for (i = 0; i <= RUSAGE_MAX; i++) {
 		if (rusage_is_sloppy(i))
@@ -294,10 +299,12 @@
 		    "%ju allocated for resource %d\n",
 		    container->c_resources[i], i));
 	}
+	uma_zfree(container_zone, container);
+	*containerp = NULL;
 }
 
 void
-container_destroy(struct container *container)
+container_destroy(struct container **container)
 {
 
 	mtx_lock(&container_lock);
@@ -349,6 +356,7 @@
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(amount >= 0, ("rusage_add: invalid amount for resource %d: %ju",
 	    resource, amount));
+	KASSERT(p->p_container != NULL, ("rusage_add: NULL container for proc %p", p));
 
 	mtx_lock(&container_lock);
 #ifdef RCTL
@@ -359,7 +367,7 @@
 		return (error);
 	}
 #endif
-	container_alloc_resource(&p->p_container, resource, amount);
+	container_alloc_resource(p->p_container, resource, amount);
 	rusage_add_cred_locked(p->p_ucred, resource, amount);
 	mtx_unlock(&container_lock);
 
@@ -376,10 +384,10 @@
 	KASSERT(amount >= 0, ("rusage_add_cred: invalid amount for resource %d: %ju",
 	    resource, amount));
 
-	container_alloc_resource(&cred->cr_ruidinfo->ui_container, resource, amount);
+	container_alloc_resource(cred->cr_ruidinfo->ui_container, resource, amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
-		container_alloc_resource(&pr->pr_container, resource, amount);
-	container_alloc_resource(&cred->cr_loginclass->lc_container, resource, amount);
+		container_alloc_resource(pr->pr_container, resource, amount);
+	container_alloc_resource(cred->cr_loginclass->lc_container, resource, amount);
 }
 
 /*
@@ -416,9 +424,10 @@
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(amount >= 0, ("rusage_add_force: invalid amount for resource %d: %ju",
 	    resource, amount));
+	KASSERT(p->p_container != NULL, ("rusage_add_force: NULL container for proc %p", p));
 
 	mtx_lock(&container_lock);
-	container_alloc_resource(&p->p_container, resource, amount);
+	container_alloc_resource(p->p_container, resource, amount);
 	mtx_unlock(&container_lock);
 	rusage_add_cred(p->p_ucred, resource, amount);
 }
@@ -442,8 +451,9 @@
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(amount >= 0, ("rusage_set: invalid amount for resource %d: %ju",
 	    resource, amount));
+	KASSERT(p->p_container != NULL, ("rusage_set_locked: NULL container for proc %p", p));
 
-	diff = amount - p->p_container.c_resources[resource];
+	diff = amount - p->p_container->c_resources[resource];
 #ifdef notyet
 	KASSERT(diff >= 0 || rusage_is_reclaimable(resource),
 	    ("rusage_set: usage of non-reclaimable resource %d dropping",
@@ -458,7 +468,7 @@
 		}
 	}
 #endif
-	container_alloc_resource(&p->p_container, resource, diff);
+	container_alloc_resource(p->p_container, resource, diff);
 	if (diff > 0)
 		rusage_add_cred_locked(p->p_ucred, resource, diff);
 	else if (diff < 0)
@@ -499,12 +509,13 @@
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	KASSERT(amount >= 0, ("rusage_set: invalid amount for resource %d: %ju",
+	KASSERT(amount >= 0, ("rusage_set_force: invalid amount for resource %d: %ju",
 	    resource, amount));
+	KASSERT(p->p_container != NULL, ("rusage_set_force: NULL container for proc %p", p));
 
 	mtx_lock(&container_lock);
-	diff = amount - p->p_container.c_resources[resource];
-	container_alloc_resource(&p->p_container, resource, diff);
+	diff = amount - p->p_container->c_resources[resource];
+	container_alloc_resource(p->p_container, resource, diff);
 	if (diff > 0)
 		rusage_add_cred_locked(p->p_ucred, resource, diff);
 	else if (diff < 0)
@@ -564,16 +575,17 @@
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(amount >= 0, ("rusage_sub: invalid amount for resource %d: %ju",
 	    resource, amount));
+	KASSERT(p->p_container != NULL, ("rusage_sub: NULL container for proc %p", p));
 	KASSERT(rusage_is_reclaimable(resource),
 	    ("rusage_sub: called for non-reclaimable resource %d", resource));
 
 	mtx_lock(&container_lock);
-	KASSERT(amount <= p->p_container.c_resources[resource],
+	KASSERT(amount <= p->p_container->c_resources[resource],
 	    ("rusage_sub: freeing %ju of resource %d, which is more than allocated "
 	    "%ld for %s (pid %d)", amount, resource,
-	    p->p_container.c_resources[resource], p->p_comm, p->p_pid));
+	    p->p_container->c_resources[resource], p->p_comm, p->p_pid));
 
-	container_alloc_resource(&p->p_container, resource, -amount);
+	container_alloc_resource(p->p_container, resource, -amount);
 	rusage_sub_cred_locked(p->p_ucred, resource, amount);
 	mtx_unlock(&container_lock);
 }
@@ -592,10 +604,10 @@
 	    ("rusage_sub_cred: called for non-reclaimable resource %d", resource));
 #endif
 
-	container_alloc_resource(&cred->cr_ruidinfo->ui_container, resource, -amount);
+	container_alloc_resource(cred->cr_ruidinfo->ui_container, resource, -amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
-		container_alloc_resource(&pr->pr_container, resource, -amount);
-	container_alloc_resource(&cred->cr_loginclass->lc_container, resource, -amount);
+		container_alloc_resource(pr->pr_container, resource, -amount);
+	container_alloc_resource(cred->cr_loginclass->lc_container, resource, -amount);
 }
 
 /*
@@ -620,6 +632,11 @@
 	int i, error = 0;
 
 	/*
+	 * Create container for the child process.
+	 */
+	container_create(&child->p_container);
+
+	/*
 	 * No resource accounting for kernel processes.
 	 */
 	if (child->p_flag & P_SYSTEM)
@@ -629,49 +646,46 @@
 	PROC_LOCK(child);
 	mtx_lock(&container_lock);
 
-	/*
-	 * Create container for the child process.
-	 */
-	bzero(&child->p_container, sizeof(child->p_container));
-	container_create(&child->p_container);
+	if (parent->p_container != NULL) {
+		/*
+		 * Inherit resource usage.
+		 */
+		for (i = 0; i <= RUSAGE_MAX; i++) {
+			if (parent->p_container->c_resources[i] == 0 ||
+			    !rusage_is_inheritable(i))
+				continue;
 
-	/*
-	 * Inherit resource usage.
-	 */
-	for (i = 0; i <= RUSAGE_MAX; i++) {
-		if (parent->p_container.c_resources[i] == 0 ||
-		    !rusage_is_inheritable(i))
-			continue;
-
-		error = rusage_set_locked(child, i, parent->p_container.c_resources[i]);
-		if (error != 0) {
-			/*
-			 * XXX: The only purpose of these two lines is to prevent from
-			 * tripping checks in container_destroy().
-			 */
-			for (i = 0; i <= RUSAGE_MAX; i++)
-				rusage_set_locked(child, i, 0);
-			container_destroy_locked(&child->p_container);
-			goto out;
+			error = rusage_set_locked(child, i, parent->p_container->c_resources[i]);
+			if (error != 0) {
+				/*
+				 * XXX: The only purpose of these two lines is to prevent from
+				 * tripping checks in container_destroy().
+				 */
+				for (i = 0; i <= RUSAGE_MAX; i++)
+					rusage_set_locked(child, i, 0);
+				goto out;
+			}
 		}
+	} else {
+		KASSERT(parent->p_flag & P_SYSTEM,
+		    ("non-system process without container; p = %p", parent));
 	}
 
-out:
 #ifdef RCTL
-	if (error == 0) {
-		error = rctl_proc_fork(parent, child);
-		if (error != 0) {
-			/*
-			 * XXX: The only purpose of these two lines is to prevent from
-			 * tripping checks in container_destroy().
-			 */
-			for (i = 0; i <= RUSAGE_MAX; i++)
-				rusage_set_locked(child, i, 0);
-			container_destroy_locked(&child->p_container);
-		}
+	error = rctl_proc_fork(parent, child);
+	if (error != 0) {
+		/*
+		 * XXX: The only purpose of these two lines is to prevent from
+		 * tripping checks in container_destroy().
+		 */
+		for (i = 0; i <= RUSAGE_MAX; i++)
+			rusage_set_locked(child, i, 0);
 	}
 #endif
 
+out:
+	if (error != 0)
+		container_destroy_locked(&child->p_container);
 	mtx_unlock(&container_lock);
 	PROC_UNLOCK(child);
 	PROC_UNLOCK(parent);
@@ -737,18 +751,18 @@
 
 	mtx_lock(&container_lock);
 	if (newuip != olduip) {
-		container_sub(&olduip->ui_container, &p->p_container);
-		container_add(&newuip->ui_container, &p->p_container);
+		container_sub(olduip->ui_container, p->p_container);
+		container_add(newuip->ui_container, p->p_container);
 	}
 	if (newlc != oldlc) {
-		container_sub(&oldlc->lc_container, &p->p_container);
-		container_add(&newlc->lc_container, &p->p_container);
+		container_sub(oldlc->lc_container, p->p_container);
+		container_add(newlc->lc_container, p->p_container);
 	}
 	if (newpr != oldpr) {
 		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
-			container_sub(&pr->pr_container, &p->p_container);
+			container_sub(pr->pr_container, p->p_container);
 		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
-			container_add(&pr->pr_container, &p->p_container);
+			container_add(pr->pr_container, p->p_container);
 	}
 	mtx_unlock(&container_lock);
 
@@ -833,6 +847,11 @@
 			timevalsub(&wallclock, &p->p_stats->p_start);
 			pctcpu_limit = rusage_get_available(p, RUSAGE_PCTCPU);
 			PROC_LOCK(p);
+			if (p->p_flag & P_SYSTEM) {
+				PROC_UNLOCK(p);
+				continue;
+			}
+
 			PROC_SLOCK(p);
 			FOREACH_THREAD_IN_PROC(p, td) {
 				ruxagg(p, td);
@@ -856,7 +875,7 @@
 				rusage_throttle(p, 0);
 			mtx_lock(&container_lock);
 			rusage_set_locked(p, RUSAGE_CPU, runtime);
-			p->p_container.c_resources[RUSAGE_PCTCPU] = 0;
+			p->p_container->c_resources[RUSAGE_PCTCPU] = 0;
 			rusage_set_locked(p, RUSAGE_PCTCPU, pctcpu);
 			rusage_set_locked(p, RUSAGE_WALLCLOCK,
 			    wallclock.tv_sec * 1000000 + wallclock.tv_usec);
@@ -873,7 +892,7 @@
 	containerd,
 	NULL
 };
-SYSINIT(containerd, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &containerd_kp);
+SYSINIT(containerd, SI_SUB_CONTAINERD, SI_ORDER_FIRST, kproc_start, &containerd_kp);
 
 static void
 container_proc_fork_sched(void *arg __unused, struct proc *p1,
@@ -894,10 +913,16 @@
 container_init(void)
 {
 
+	container_zone = uma_zcreate("container", sizeof(struct container),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	EVENTHANDLER_REGISTER(process_fork, container_proc_fork_sched, NULL,
 	    EVENTHANDLER_PRI_ANY);
+	/*
+	 * XXX: Move this somewhere.
+	 */
+	container_create(&prison0.pr_container);
 }
-SYSINIT(container, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, container_init, NULL);
+SYSINIT(container, SI_SUB_CONTAINER, SI_ORDER_FIRST, container_init, NULL);
 
 #else /* !CONTAINERS */
 
@@ -945,12 +970,12 @@
 }
 
 void
-container_create(struct container *container)
+container_create(struct container **containerp)
 {
 }
 
 void
-container_destroy(struct container *container)
+container_destroy(struct container **containerp)
 {
 }
 

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_descrip.c#20 (text+ko) ====

@@ -45,6 +45,7 @@
 #include <sys/systm.h>
 
 #include <sys/conf.h>
+#include <sys/container.h>
 #include <sys/domain.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_jail.c#33 (text+ko) ====

@@ -38,6 +38,7 @@
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
+#include <sys/container.h>
 #include <sys/errno.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
@@ -4270,7 +4271,7 @@
 
 	sx_slock(&allprison_lock);
 	TAILQ_FOREACH(pr, &allprison, pr_list)
-		(callback)(&pr->pr_container, arg2, arg3);
+		(callback)(pr->pr_container, arg2, arg3);
 	sx_sunlock(&allprison_lock);
 }
 

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_loginclass.c#28 (text+ko) ====

@@ -41,6 +41,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/container.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -116,6 +117,7 @@
 	    ("loginclass_find: got too long name"));
 
 	newlc = malloc(sizeof(*newlc), M_LOGINCLASS, M_ZERO | M_WAITOK);
+	container_create(&newlc->lc_container);
 
 	mtx_lock(&loginclasses_lock);
 	LIST_FOREACH(lc, &loginclasses, lc_next) {
@@ -125,12 +127,12 @@
 		/* Found loginclass with a matching name? */
 		loginclass_acquire(lc);
 		mtx_unlock(&loginclasses_lock);
+		container_destroy(&newlc->lc_container);
 		free(newlc, M_LOGINCLASS);
 		return (lc);
 	}
 
 	/* Add new loginclass. */
-	container_create(&newlc->lc_container);
 	strcpy(newlc->lc_name, name);
 	refcount_init(&newlc->lc_refcount, 1);
 	LIST_INSERT_HEAD(&loginclasses, newlc, lc_next);
@@ -222,7 +224,7 @@
 
 	mtx_lock(&loginclasses_lock);
 	LIST_FOREACH(lc, &loginclasses, lc_next)
-		(callback)(&lc->lc_container, arg2, arg3);
+		(callback)(lc->lc_container, arg2, arg3);
 	mtx_unlock(&loginclasses_lock);
 }
 

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_rctl.c#18 (text+ko) ====

@@ -135,7 +135,7 @@
 	{ NULL, -1 }};
 
 static void rctl_init(void);
-SYSINIT(rctl, SI_SUB_CPU, SI_ORDER_FIRST, rctl_init, NULL);
+SYSINIT(rctl, SI_SUB_CONTAINER, SI_ORDER_FIRST, rctl_init, NULL);
 
 static uma_zone_t rctl_rule_link_zone;
 static uma_zone_t rctl_rule_zone;
@@ -203,19 +203,19 @@
 	switch (rule->rr_per) {
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		available = rule->rr_amount -
-		    p->p_container.c_resources[resource];
+		    p->p_container->c_resources[resource];
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		available = rule->rr_amount -
-		    cred->cr_ruidinfo->ui_container.c_resources[resource];
+		    cred->cr_ruidinfo->ui_container->c_resources[resource];
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		available = rule->rr_amount -
-		    cred->cr_loginclass->lc_container.c_resources[resource];
+		    cred->cr_loginclass->lc_container->c_resources[resource];
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		available = rule->rr_amount -
-		    cred->cr_prison->pr_container.c_resources[resource];
+		    cred->cr_prison->pr_container->c_resources[resource];
 		break;
 	default:
 		panic("rctl_compute_available: unknown per %d",
@@ -278,7 +278,7 @@
 	 * There may be more than one matching rule; go through all of them.
 	 * Denial should be done last, after logging and sending signals.
 	 */
-	LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) {
 		rule = link->rrl_rule;
 		if (rule->rr_resource != resource)
 			continue;
@@ -357,7 +357,7 @@
 	 * There may be more than one matching rule; go through all of them.
 	 * Denial should be done last, after logging and sending signals.
 	 */
-	LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) {
 		rule = link->rrl_rule;
 		if (rule->rr_resource != resource)
 			continue;
@@ -387,7 +387,7 @@
 	 * There may be more than one matching rule; go through all of them.
 	 * Denial should be done last, after logging and sending signals.
 	 */
-	LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) {
 		rule = link->rrl_rule;
 		if (rule->rr_resource != resource)
 			continue;
@@ -403,7 +403,7 @@
 	/*
 	 * XXX: Think about this _hard_.
 	 */
-	allocated = p->p_container.c_resources[resource];
+	allocated = p->p_container->c_resources[resource];
 	if (minavailable < INT64_MAX - allocated)
 		minavailable += allocated;
 	if (minavailable < 0)
@@ -923,7 +923,7 @@
 	case RCTL_SUBJECT_TYPE_PROCESS:
 		p = rule->rr_subject.rs_proc;
 		KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
-		rctl_container_add_rule(&p->p_container, rule);
+		rctl_container_add_rule(p->p_container, rule);
 		/*
 		 * In case of per-process rule, we don't have anything more
 		 * to do.
@@ -933,19 +933,19 @@
 	case RCTL_SUBJECT_TYPE_USER:
 		uip = rule->rr_subject.rs_uip;
 		KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
-		rctl_container_add_rule(&uip->ui_container, rule);
+		rctl_container_add_rule(uip->ui_container, rule);
 		break;
 
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		lc = rule->rr_subject.hr_loginclass;
 		KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
-		rctl_container_add_rule(&lc->lc_container, rule);
+		rctl_container_add_rule(lc->lc_container, rule);
 		break;
 
 	case RCTL_SUBJECT_TYPE_JAIL:
 		pr = rule->rr_subject.rs_prison;
 		KASSERT(pr != NULL, ("rctl_rule_add: NULL pr"));
-		rctl_container_add_rule(&pr->pr_container, rule);
+		rctl_container_add_rule(pr->pr_container, rule);
 		break;
 
 	default:
@@ -986,7 +986,7 @@
 			    rule->rr_subject_type);
 		}
 
-		rctl_container_add_rule(&p->p_container, rule);
+		rctl_container_add_rule(p->p_container, rule);
 	}
 
 	return (0);
@@ -1018,7 +1018,7 @@
 	    filter->rr_subject.rs_proc != NULL) {
 		p = filter->rr_subject.rs_proc;
 		rw_wlock(&rctl_lock);
-		found = rctl_container_remove_rules(&p->p_container, filter);
+		found = rctl_container_remove_rules(p->p_container, filter);
 		rw_wunlock(&rctl_lock);
 		if (found)
 			return (0);
@@ -1035,7 +1035,7 @@
 	sx_assert(&allproc_lock, SA_LOCKED);
 	rw_wlock(&rctl_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
-		found += rctl_container_remove_rules(&p->p_container, filter);
+		found += rctl_container_remove_rules(p->p_container, filter);
 	}
 	rw_wunlock(&rctl_lock);
 
@@ -1195,7 +1195,7 @@
 			error = EINVAL;
 			goto out;
 		}
-		outputsbuf = rctl_container_to_sbuf(&p->p_container, 0);
+		outputsbuf = rctl_container_to_sbuf(p->p_container, 0);
 		break;
 	case RCTL_SUBJECT_TYPE_USER:
 		uip = filter->rr_subject.rs_uip;
@@ -1203,7 +1203,7 @@
 			error = EINVAL;
 			goto out;
 		}
-		outputsbuf = rctl_container_to_sbuf(&uip->ui_container, 1);
+		outputsbuf = rctl_container_to_sbuf(uip->ui_container, 1);
 		break;
 	case RCTL_SUBJECT_TYPE_LOGINCLASS:
 		lc = filter->rr_subject.hr_loginclass;
@@ -1211,7 +1211,7 @@
 			error = EINVAL;
 			goto out;
 		}
-		outputsbuf = rctl_container_to_sbuf(&lc->lc_container, 1);
+		outputsbuf = rctl_container_to_sbuf(lc->lc_container, 1);
 		break;
 	case RCTL_SUBJECT_TYPE_JAIL:
 		pr = filter->rr_subject.rs_prison;
@@ -1219,7 +1219,7 @@
 			error = EINVAL;
 			goto out;
 		}
-		outputsbuf = rctl_container_to_sbuf(&pr->pr_container, 1);
+		outputsbuf = rctl_container_to_sbuf(pr->pr_container, 1);
 		break;
 	default:
 		error = EINVAL;
@@ -1283,7 +1283,7 @@
 	sx_assert(&allproc_lock, SA_LOCKED);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		rw_rlock(&rctl_lock);
-		LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) {
+		LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) {
 			/*
 			 * Non-process rules will be added to the buffer later.
 			 * Adding them here would result in duplicated output.
@@ -1366,7 +1366,7 @@
 	KASSERT(sb != NULL, ("sbuf_new failed"));
 
 	rw_rlock(&rctl_lock);
-	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_container->c_rule_links, rrl_next) {
 		rctl_rule_to_sbuf(sb, link->rrl_rule);
 		sbuf_printf(sb, ",");
 	}
@@ -1498,16 +1498,16 @@
 	 */
 	rulecnt = 0;
 	rw_rlock(&rctl_lock);
-	LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) {
 		if (link->rrl_rule->rr_subject_type ==
 		    RCTL_SUBJECT_TYPE_PROCESS)
 			rulecnt++;
 	}
-	LIST_FOREACH(link, &newuip->ui_container.c_rule_links, rrl_next)
+	LIST_FOREACH(link, &newuip->ui_container->c_rule_links, rrl_next)
 		rulecnt++;
-	LIST_FOREACH(link, &newlc->lc_container.c_rule_links, rrl_next)
+	LIST_FOREACH(link, &newlc->lc_container->c_rule_links, rrl_next)
 		rulecnt++;
-	LIST_FOREACH(link, &newpr->pr_container.c_rule_links, rrl_next)
+	LIST_FOREACH(link, &newpr->pr_container->c_rule_links, rrl_next)
 		rulecnt++;
 	rw_runlock(&rctl_lock);
 
@@ -1527,7 +1527,7 @@
 	 * Assign rules to the newly allocated list entries.
 	 */
 	rw_wlock(&rctl_lock);
-	LIST_FOREACH(link, &p->p_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &p->p_container->c_rule_links, rrl_next) {
 		if (link->rrl_rule->rr_subject_type ==
 		    RCTL_SUBJECT_TYPE_PROCESS) {
 			if (newlink == NULL)
@@ -1539,7 +1539,7 @@
 		}
 	}
 	
-	LIST_FOREACH(link, &newuip->ui_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &newuip->ui_container->c_rule_links, rrl_next) {
 		if (newlink == NULL)
 			goto goaround;
 		rctl_rule_acquire(link->rrl_rule);
@@ -1548,7 +1548,7 @@
 		rulecnt--;
 	}
 
-	LIST_FOREACH(link, &newlc->lc_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &newlc->lc_container->c_rule_links, rrl_next) {
 		if (newlink == NULL)
 			goto goaround;
 		rctl_rule_acquire(link->rrl_rule);
@@ -1557,7 +1557,7 @@
 		rulecnt--;
 	}
 
-	LIST_FOREACH(link, &newpr->pr_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &newpr->pr_container->c_rule_links, rrl_next) {
 		if (newlink == NULL)
 			goto goaround;
 		rctl_rule_acquire(link->rrl_rule);
@@ -1570,8 +1570,8 @@
 		/*
 		 * Free the old rule list.
 		 */
-		while (!LIST_EMPTY(&p->p_container.c_rule_links)) {
-			link = LIST_FIRST(&p->p_container.c_rule_links);
+		while (!LIST_EMPTY(&p->p_container->c_rule_links)) {
+			link = LIST_FIRST(&p->p_container->c_rule_links);
 			LIST_REMOVE(link, rrl_next);
 			rctl_rule_release(link->rrl_rule);
 			uma_zfree(rctl_rule_link_zone, link);
@@ -1586,7 +1586,7 @@
 		while (!LIST_EMPTY(&newrules)) {
 			newlink = LIST_FIRST(&newrules);
 			LIST_REMOVE(newlink, rrl_next);
-			LIST_INSERT_HEAD(&p->p_container.c_rule_links,
+			LIST_INSERT_HEAD(&p->p_container->c_rule_links,
 			    newlink, rrl_next);
 		}
 
@@ -1623,12 +1623,23 @@
 	struct rctl_rule_link *link;
 	struct rctl_rule *rule;
 
+	LIST_INIT(&child->p_container->c_rule_links);
+
 	/*
 	 * No limits for kernel processes.
 	 */
 	if (child->p_flag & P_SYSTEM)
 		return (0);
 
+	/*
+	 * Nothing to inherit from P_SYSTEM parents.
+	 */
+	if (parent->p_container == NULL) {
+		KASSERT(parent->p_flag & P_SYSTEM,
+		    ("non-system process without container; p = %p", parent));
+		return (0);
+	}
+
 	rw_wlock(&rctl_lock);
 
 	/*
@@ -1636,7 +1647,7 @@
 	 * Rules with 'process' subject have to be duplicated in order to make their
 	 * rr_subject point to the new process.
 	 */
-	LIST_FOREACH(link, &parent->p_container.c_rule_links, rrl_next) {
+	LIST_FOREACH(link, &parent->p_container->c_rule_links, rrl_next) {
 		if (link->rrl_rule->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS) {
 			rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
 			if (rule == NULL)
@@ -1644,12 +1655,12 @@
 			KASSERT(rule->rr_subject.rs_proc == parent,
 			    ("rule->rr_subject.rs_proc == parent"));
 			rule->rr_subject.rs_proc = child;
-			error = rctl_container_add_rule_locked(&child->p_container, rule);
+			error = rctl_container_add_rule_locked(child->p_container, rule);
 			rctl_rule_release(rule);
 			if (error != 0)
 				goto fail;
 		} else {
-			error = rctl_container_add_rule_locked(&child->p_container, link->rrl_rule);
+			error = rctl_container_add_rule_locked(child->p_container, link->rrl_rule);
 			if (error != 0)
 				goto fail;
 		}
@@ -1659,8 +1670,8 @@
 	return (0);
 
 fail:
-	while (!LIST_EMPTY(&child->p_container.c_rule_links)) {
-		link = LIST_FIRST(&child->p_container.c_rule_links);
+	while (!LIST_EMPTY(&child->p_container->c_rule_links)) {
+		link = LIST_FIRST(&child->p_container->c_rule_links);
 		LIST_REMOVE(link, rrl_next);
 		rctl_rule_release(link->rrl_rule);
 		uma_zfree(rctl_rule_link_zone, link);
@@ -1678,8 +1689,8 @@
 	struct rctl_rule_link *link;
 
 	rw_wlock(&rctl_lock);
-	while (!LIST_EMPTY(&p->p_container.c_rule_links)) {
-		link = LIST_FIRST(&p->p_container.c_rule_links);
+	while (!LIST_EMPTY(&p->p_container->c_rule_links)) {
+		link = LIST_FIRST(&p->p_container->c_rule_links);
 		LIST_REMOVE(link, rrl_next);
 		rctl_rule_release(link->rrl_rule);
 		uma_zfree(rctl_rule_link_zone, link);

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#60 (text+ko) ====

@@ -1205,6 +1205,7 @@
 	if (uip == NULL) {
 		rw_runlock(&uihashtbl_lock);
 		uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
+		container_create(&uip->ui_container);
 		rw_wlock(&uihashtbl_lock);
 		/*
 		 * There's a chance someone created our uidinfo while we
@@ -1213,6 +1214,7 @@
 		 */
 		if ((old_uip = uilookup(uid)) != NULL) {
 			/* Someone else beat us to it. */
+			container_destroy(&uip->ui_container);
 			free(uip, M_UIDINFO);
 			uip = old_uip;
 		} else {
@@ -1221,7 +1223,6 @@
 			mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL,
 			    MTX_DEF);
 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
-			container_create(&uip->ui_container);
 		}
 	}
 	uihold(uip);
@@ -1302,7 +1303,7 @@
 	rw_rlock(&uihashtbl_lock);
 	for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
 		LIST_FOREACH(uip, uih, ui_hash) {
-			(callback)(&uip->ui_container, arg2, arg3);
+			(callback)(uip->ui_container, arg2, arg3);
 		}
 	}
 	rw_runlock(&uihashtbl_lock);

==== //depot/projects/soc2009/trasz_limits/sys/kern/vfs_vnops.c#22 (text+ko) ====

@@ -39,6 +39,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/container.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/kdb.h>

==== //depot/projects/soc2009/trasz_limits/sys/sys/container.h#25 (text+ko) ====

@@ -104,8 +104,8 @@
 uint64_t	rusage_get_limit(struct proc *p, int resource);
 uint64_t	rusage_get_available(struct proc *p, int resource);
 
-void	container_create(struct container *container);
-void	container_destroy(struct container *container);
+void	container_create(struct container **containerp);
+void	container_destroy(struct container **containerp);
 
 int	container_proc_fork(struct proc *parent, struct proc *child);
 void	container_proc_exit(struct proc *p);

==== //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#20 (text+ko) ====

@@ -30,8 +30,6 @@
 #ifndef _SYS_JAIL_H_
 #define _SYS_JAIL_H_
 
-#include <sys/container.h>
-
 #ifdef _KERNEL
 struct jail_v0 {
 	u_int32_t	version;
@@ -137,6 +135,8 @@
 
 #define	HOSTUUIDLEN	64
 
+struct container;
+
 /*
  * This structure describes a prison.  It is pointed to by all struct
  * ucreds's of the inmates.  pr_ref keeps track of them and is used to
@@ -181,7 +181,7 @@
 	char		 pr_hostname[MAXHOSTNAMELEN];	/* (p) jail hostname */
 	char		 pr_domainname[MAXHOSTNAMELEN];	/* (p) jail domainname */
 	char		 pr_hostuuid[HOSTUUIDLEN];	/* (p) jail hostuuid */
-	struct container pr_container;			/* (*) resource accounting */
+	struct container *pr_container;			/* (c) resource accounting */
 };
 #endif /* _KERNEL || _WANT_PRISON */
 

==== //depot/projects/soc2009/trasz_limits/sys/sys/kernel.h#7 (text+ko) ====

@@ -109,6 +109,7 @@
 	SI_SUB_VNET_PRELINK	= 0x1E00000,	/* vnet init before modules */
 	SI_SUB_KLD		= 0x2000000,	/* KLD and module setup */
 	SI_SUB_CPU		= 0x2100000,	/* CPU resource(s)*/
+	SI_SUB_CONTAINER	= 0x2110000,	/* resource accounting */
 	SI_SUB_RANDOM		= 0x2120000,	/* random number generator */
 	SI_SUB_KDTRACE		= 0x2140000,	/* Kernel dtrace hooks */
 	SI_SUB_MAC		= 0x2180000,	/* TrustedBSD MAC subsystem */
@@ -169,6 +170,7 @@
 	SI_SUB_KTHREAD_UPDATE	= 0xec00000,	/* update daemon*/
 	SI_SUB_KTHREAD_IDLE	= 0xee00000,	/* idle procs*/
 	SI_SUB_SMP		= 0xf000000,	/* start the APs*/
+	SI_SUB_CONTAINERD	= 0xf100000,	/* start containerd*/
 	SI_SUB_RUN_SCHEDULER	= 0xfffffff	/* scheduler*/
 };
 

==== //depot/projects/soc2009/trasz_limits/sys/sys/loginclass.h#13 (text+ko) ====

@@ -27,7 +27,7 @@
 #ifndef _SYS_LOGINCLASS_H_
 #define	_SYS_LOGINCLASS_H_
 
-#include <sys/container.h>
+struct container;
 
 /*
  * Exactly one of these structures exists per login class.
@@ -36,7 +36,7 @@
 	LIST_ENTRY(loginclass)	lc_next;
 	char			lc_name[MAXLOGNAME];
 	u_int			lc_refcount;
-	struct container	lc_container;
+	struct container	*lc_container;
 };
 
 void	loginclass_acquire(struct loginclass *lc);

==== //depot/projects/soc2009/trasz_limits/sys/sys/proc.h#30 (text+ko) ====

@@ -44,7 +44,6 @@
 #ifndef _KERNEL
 #include <sys/filedesc.h>
 #endif
-#include <sys/container.h>
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/lock_profile.h>
@@ -158,6 +157,7 @@
  * either lock is sufficient for read access, but both locks must be held
  * for write access.
  */
+struct container;
 struct kaudit_record;
 struct td_sched;
 struct nlminfo;
@@ -526,7 +526,6 @@
 	int		p_boundary_count;/* (c) Num threads at user boundary */
 	int		p_pendingcnt;	/* how many signals are pending */
 	struct itimers	*p_itimers;	/* (c) POSIX interval timers. */
-	uint64_t	p_prev_runtime;	/* (c) Resource usage accounting. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
@@ -560,7 +559,8 @@
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 	struct kdtrace_proc	*p_dtrace; /* (*) DTrace-specific data. */
 	struct cv	p_pwait;	/* (*) wait cv for exit/exec */
-	struct container p_container;	/* (*) Resource usage accounting. */
+	uint64_t	p_prev_runtime;	/* (c) Resource usage accounting. */
+	struct container *p_container;	/* (b) Resource usage accounting. */
 };
 
 #define	p_session	p_pgrp->pg_session

==== //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#25 (text+ko) ====

@@ -38,7 +38,6 @@
 #ifdef _KERNEL
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
-#include <sys/container.h>
 #endif
 
 /*
@@ -80,6 +79,8 @@
 	int	pl_refcnt;		/* number of references */
 };
 
+struct container;
+
 /*-
  * Per uid resource consumption.  This structure is used to track
  * the total resource consumption (process count, socket buffer size,
@@ -100,7 +101,7 @@
 	long	ui_ptscnt;		/* (b) number of pseudo-terminals */
 	uid_t	ui_uid;			/* (a) uid */
 	u_int	ui_ref;			/* (b) reference count */
-	struct container ui_container;	/* (*) resource usage accounting */
+	struct container *ui_container;	/* (a) resource usage accounting */
 };

>>> TRUNCATED FOR MAIL (1000 lines) <<<


More information about the p4-projects mailing list