PERFORCE change 186657 for review

Edward Tomasz Napierala trasz at FreeBSD.org
Sun Dec 5 20:31:19 UTC 2010


http://p4web.freebsd.org/@@186657?ac=10

Change 186657 by trasz at trasz_victim on 2010/12/05 20:30:37

	Get rid of the containers hierarchy.  Experience has shown that
	it always mirrors ucred hierarchy (where ucred points to uidinfo,
	prison and loginclass), and since some of the resources might
	be accounted only per-ucred, the containers hierarchy can't possibly
	be different from it.

Affected files ...

.. //depot/projects/soc2009/trasz_limits/TODO#33 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#32 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#41 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_fork.c#27 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#99 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#30 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/container.h#18 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#49 edit

Differences ...

==== //depot/projects/soc2009/trasz_limits/TODO#33 (text+ko) ====

@@ -30,10 +30,9 @@
    allow the HRL code to send a signal to the offending thread instead of the
    offending process.
 
- - Do we need separate container hierarchy, or should we just drop it and use
-   ucred?
+ - Add support for hierarchical jails.
 
- - Remove CONTAINERS #ifdefs.
+ - Make sure we enforce limits whenever it's needed.
 
 Issues:
 

==== //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#32 (text+ko) ====

@@ -494,11 +494,6 @@
 #endif
 	td->td_ucred = crhold(p->p_ucred);
 
-#ifdef HRL
-	/* Let the HRL know about the new process. */
-	hrl_proc_init(p);
-#endif
-
 	/* Create sigacts. */
 	p->p_sigacts = sigacts_alloc();
 

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_container.c#41 (text+ko) ====

@@ -61,7 +61,7 @@
 #ifdef CONTAINERS
 
 static struct mtx container_lock;
-MTX_SYSINIT(container_lock, &container_lock, "container lock", MTX_DEF);
+MTX_SYSINIT(container_lock, &container_lock, "container lock", MTX_RECURSE);
 
 static void container_sub(struct container *dest, const struct container *src);
 
@@ -153,28 +153,14 @@
 	}
 }
 
-static int
+static void
 container_add(struct container *dest, const struct container *src)
 {
-	int i, error;
+	int i;
 
 	mtx_assert(&container_lock, MA_OWNED);
 
 	/*
-	 * Update resource usage in dest's parents.
-	 */
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++) {
-		if (dest->c_parents[i] == NULL)
-			continue;
-		error = container_add(dest->c_parents[i], src);
-		if (error != 0) {
-			for (i--; i >= 0; i--)
-				container_sub(dest->c_parents[i], src);
-			return (error);
-		}
-	}
-
-	/*
 	 * Update resource usage in dest.
 	 */
 	for (i = 0; i <= RUSAGE_MAX; i++) {
@@ -182,14 +168,8 @@
 		    ("resource usage propagation meltdown: dest < 0"));
 		KASSERT(src->c_resources[i] >= 0,
 		    ("resource usage propagation meltdown: src < 0"));
-		/*
-		 * XXX: Enforce limit here; if exceeded, undo everything
-		 *      and return error.
-		 */
 		dest->c_resources[i] += src->c_resources[i];
 	}
-
-	return (0);
 }
 
 static void
@@ -217,87 +197,9 @@
 				dest->c_resources[i] = 0;
 		}
 	}
-
-	/*
-	 * Update resource usage in dest's parents.
-	 */
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++) {
-		if (dest->c_parents[i] == NULL)
-			continue;
-		container_sub(dest->c_parents[i], src);
-	}
-}
-
-static int
-container_join_locked(struct container *child, struct container *parent)
-{
-	int i, error;
-
-	SDT_PROBE(container, kernel, container, join, child, parent, 0, 0, 0);
-
-	mtx_assert(&container_lock, MA_OWNED);
-	KASSERT(child != NULL, ("child != NULL"));
-	KASSERT(parent != NULL, ("parent != NULL"));
-
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++) {
-		KASSERT(child->c_parents[i] != parent,
-		    ("container already joined"));
-		if (child->c_parents[i] == NULL) {
-			error = container_add(parent, child);
-			if (error != 0) {
-				SDT_PROBE(container, kernel, container, join_failure, child, parent, 0, 0, 0);
-				return (error);
-			}
-			child->c_parents[i] = parent;
-			return (0);
-		}
-	}
-	panic("container has too many parents");
 }
 
-int
-container_join(struct container *child, struct container *parent)
-{
-	int error;
-
-	mtx_lock(&container_lock);
-	error = container_join_locked(child, parent);
-	mtx_unlock(&container_lock);
-
-	return (error);
-}
-
-static void
-container_leave_locked(struct container *child, struct container *parent)
-{
-	int i;
-
-	SDT_PROBE(container, kernel, container, leave, child, parent, 0, 0, 0);
-
-	mtx_assert(&container_lock, MA_OWNED);
-	KASSERT(child != NULL, ("child != NULL"));
-	KASSERT(parent != NULL, ("parent != NULL"));
-
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++) {
-		if (child->c_parents[i] == parent) {
-			container_sub(parent, child);
-			child->c_parents[i] = NULL;
-			return;
-		}
-	}
-	panic("container not joined");
-}
-
 void
-container_leave(struct container *child, struct container *parent)
-{
-
-	mtx_lock(&container_lock);
-	container_leave_locked(child, parent);
-	mtx_unlock(&container_lock);
-}
-
-void
 container_create(struct container *container)
 {
 	int i;
@@ -307,9 +209,6 @@
 	for (i = 0; i <= RUSAGE_MAX; i++)
 		KASSERT(container->c_resources[i] == 0,
 		    ("container->c_resources[%d] != 0", i));
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++)
-		KASSERT(container->c_parents[i] == NULL,
-		    ("container->c_parents[%d] != NULL", i));
 }
 
 static void
@@ -331,9 +230,6 @@
 		    "%ju allocated for resource %d\n",
 		    container->c_resources[i], i));
 	}
-
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++)
-		container->c_parents[i] = NULL;
 }
 
 void
@@ -345,40 +241,6 @@
 	mtx_unlock(&container_lock);
 }
 
-#ifdef DIAGNOSTIC
-/*
- * Go through the resource consumption information and make sure it makes sense.
- */
-static void
-container_assert(const struct container *container)
-{
-	int i, resource;
-	struct container *parent;
-
-	mtx_assert(&container_lock, MA_OWNED);
-	KASSERT(container != NULL, ("NULL container"));
-
-	for (resource = 0; resource <= RUSAGE_MAX; resource++) {
-		KASSERT(container->c_resources[resource] >= 0,
-		    ("resource usage propagation meltdown: resource < 0"));
-	}
-
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++) {
-		parent = container->c_parents[i];
-		if (parent == NULL);
-			continue;
-		container_assert(parent);
-		for (resource = 0; resource <= RUSAGE_MAX; resource++) {
-			if (container_resource_sloppy(resource))
-				continue;
-			KASSERT(parent->c_resources[resource] >=
-			    container->c_resources[resource],
-			    ("resource usage propagation meltdown: child > parent"));
-		}
-	}
-}
-#endif /* DIAGNOSTIC */
-
 /*
  * Increase consumption of 'resource' by 'amount' for 'container'
  * and all its parents.  Differently from other cases, 'amount' here
@@ -388,7 +250,6 @@
 container_alloc_resource(struct container *container, int resource,
     uint64_t amount)
 {
-	int i;
 
 	mtx_assert(&container_lock, MA_OWNED);
 	KASSERT(container != NULL, ("NULL container"));
@@ -396,15 +257,6 @@
 	container->c_resources[resource] += amount;
 	if (container_resource_sloppy(resource) && container->c_resources[resource] < 0)
 		container->c_resources[resource] = 0;
-
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++) {
-		if (container->c_parents[i] == NULL)
-			continue;
-		container_alloc_resource(container->c_parents[i], resource, amount);
-	}
-#ifdef DIAGNOSTIC
-	container_assert(container);
-#endif
 }
 
 /*
@@ -437,6 +289,7 @@
 #endif
 	container_alloc_resource(&p->p_container, resource, amount);
 	mtx_unlock(&container_lock);
+	rusage_add_cred(p->p_ucred, resource, amount);
 
 	return (0);
 }
@@ -444,6 +297,8 @@
 /*
  * Increase allocation of 'resource' by 'amount' for credential 'cred'.  Doesn't
  * check for limits and never fails.
+ *
+ * XXX: Shouldn't this ever return an error?
  */
 void
 rusage_add_cred(struct ucred *cred, int resource, uint64_t amount)
@@ -453,8 +308,6 @@
 
 	KASSERT(amount >= 0, ("rusage_add_cred: invalid amount for resource %d: %ju",
 	    resource, amount));
-	KASSERT(container_resource_sloppy(resource),
-	    ("rusage_add_cred: called for non-sloppy resource %d", resource));
 
 	mtx_lock(&container_lock);
 	container_alloc_resource(&cred->cr_ruidinfo->ui_container, resource, amount);
@@ -482,6 +335,7 @@
 	mtx_lock(&container_lock);
 	container_alloc_resource(&p->p_container, resource, amount);
 	mtx_unlock(&container_lock);
+	rusage_add_cred(p->p_ucred, resource, amount);
 }
 
 static int
@@ -516,6 +370,13 @@
 	}
 #endif
 	container_alloc_resource(&p->p_container, resource, diff);
+	/*
+	 * XXX: Mutex recursion.
+	 */
+	if (diff > 0)
+		rusage_add_cred(p->p_ucred, resource, diff);
+	else if (diff < 0)
+		rusage_sub_cred(p->p_ucred, resource, -diff);
 
 	return (0);
 }
@@ -535,7 +396,6 @@
 	mtx_lock(&container_lock);
 	error = rusage_set_locked(p, resource, amount);
 	mtx_unlock(&container_lock);
-
 	return (error);
 }
 
@@ -580,6 +440,7 @@
 
 	container_alloc_resource(&p->p_container, resource, -amount);
 	mtx_unlock(&container_lock);
+	rusage_sub_cred(p->p_ucred, resource, amount);
 }
 
 /*
@@ -593,10 +454,10 @@
 
 	KASSERT(amount >= 0, ("rusage_sub_cred: invalid amount for resource %d: %ju",
 	    resource, amount));
+#ifdef notyet
 	KASSERT(container_resource_reclaimable(resource),
 	    ("rusage_sub_cred: called for non-reclaimable resource %d", resource));
-	KASSERT(container_resource_sloppy(resource),
-	    ("rusage_sub_cred: called for non-sloppy resource %d", resource));
+#endif
 
 	mtx_lock(&container_lock);
 	container_alloc_resource(&cred->cr_ruidinfo->ui_container, resource, -amount);
@@ -613,7 +474,6 @@
 container_proc_fork(struct proc *parent, struct proc *child)
 {
 	int i, error = 0;
-	struct container *container;
 
 	/*
 	 * No resource accounting for kernel processes.
@@ -652,26 +512,6 @@
 		}
 	}
 
-	/*
-	 * Inherit containing containers from the parent.
-	 */
-	for (i = 0; i <= CONTAINER_PARENTS_MAX; i++) {
-		container = parent->p_container.c_parents[i];
-		if (container == NULL)
-			continue;
-		error = container_join_locked(&child->p_container, container);
-		if (error != 0) {
-			/*
-			 * XXX: The only purpose of these two lines is to prevent from
-			 * tripping checks in container_destroy().
-			 */
-			for (i = 0; i <= RUSAGE_MAX; i++)
-				rusage_set_locked(child, i, 0);
-			container_destroy_locked(&child->p_container);
-			goto out;
-		}
-	}
-
 out:
 	mtx_unlock(&container_lock);
 	PROC_UNLOCK(child);
@@ -709,6 +549,46 @@
 }
 
 /*
+ * Called before credentials change, to move resource utilisation
+ * between containers.
+ */
+void
+container_proc_ucred_changing(struct proc *p, struct ucred *newcred)
+{
+	struct uidinfo *olduip, *newuip;
+	struct loginclass *oldlc, *newlc;
+	struct prison *oldpr, *newpr;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	newuip = newcred->cr_ruidinfo;
+	olduip = p->p_ucred->cr_ruidinfo;
+	newlc = newcred->cr_loginclass;
+	oldlc = p->p_ucred->cr_loginclass;
+	newpr = newcred->cr_prison;
+	oldpr = p->p_ucred->cr_prison;
+
+	mtx_lock(&container_lock);
+	if (newuip != olduip) {
+		container_sub(&olduip->ui_container, &p->p_container);
+		container_add(&newuip->ui_container, &p->p_container);
+	}
+	if (newlc != oldlc) {
+		container_sub(&oldlc->lc_container, &p->p_container);
+		container_add(&newlc->lc_container, &p->p_container);
+	}
+	if (newpr != oldpr) {
+		container_sub(&oldpr->pr_container, &p->p_container);
+		container_add(&newpr->pr_container, &p->p_container);
+	}
+	mtx_unlock(&container_lock);
+
+#ifdef HRL
+	hrl_proc_ucred_changing(p, newcred);
+#endif
+}
+
+/*
  * Stuff below runs from a "containerd" kernel process.
  */
 static void
@@ -845,18 +725,6 @@
 }
 
 int
-container_join(struct container *child, struct container *parent)
-{
-
-	return (0);
-}
-
-void
-container_leave(struct container *child, struct container *parent)
-{
-}
-
-int
 container_proc_fork(struct proc *parent, struct proc *child)
 {
 

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_fork.c#27 (text+ko) ====

@@ -351,6 +351,12 @@
 	}
 
 	/*
+	 * XXX: This is ugly; when we copy resource usage, need to bump
+	 *      per-cred resource counters.
+	 */
+	newproc->p_ucred = p1->p_ucred;
+
+	/*
 	 * Initialize resource container for the child process.
 	 */
 	error = container_proc_fork(p1, newproc);

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#99 (text+ko) ====

@@ -1379,28 +1379,6 @@
 	return (error);
 }
 
-/*
- * Called from kern/init_main.c, for proc0 and initproc.
- */
-void
-hrl_proc_init(struct proc *p)
-{
-	int error;
-	struct ucred *cred = p->p_ucred;
-
-	container_create(&p->p_container);
-	error = container_join(&p->p_container, &cred->cr_ruidinfo->ui_container);
-	KASSERT(error == 0, ("hrl_proc_init: container_join failed"));
-	error = container_join(&p->p_container, &cred->cr_loginclass->lc_container);
-	KASSERT(error == 0, ("hrl_proc_init: container_join failed"));
-	error = container_join(&p->p_container, &cred->cr_prison->pr_container);
-	KASSERT(error == 0, ("hrl_proc_init: container_join failed"));
-}
-
-/*
- * Called before credentials change, to adjust HRL data structures
- * assigned to the process.
- */
 void
 hrl_proc_ucred_changing(struct proc *p, struct ucred *newcred)
 {
@@ -1460,10 +1438,6 @@
 			KASSERT(error == 0, ("XXX: better error handling needed"));
 		}
 		mtx_unlock(&hrl_lock);
-
-		container_leave(&p->p_container, &olduip->ui_container);
-		error = container_join(&p->p_container, &newuip->ui_container);
-		KASSERT(error == 0, ("hrl_proc_ucred_changing: better error handling needed"));
 	}
 	if (newlc != oldlc) {
 		mtx_lock(&hrl_lock);
@@ -1472,10 +1446,6 @@
 			KASSERT(error == 0, ("XXX: better error handling needed"));
 		}
 		mtx_unlock(&hrl_lock);
-
-		container_leave(&p->p_container, &oldlc->lc_container);
-		error = container_join(&p->p_container, &newlc->lc_container);
-		KASSERT(error == 0, ("hrl_proc_ucred_changing: better error handling needed"));
 	}
 	if (newpr != oldpr) {
 		mtx_lock(&hrl_lock);
@@ -1484,10 +1454,6 @@
 			KASSERT(error == 0, ("XXX: better error handling needed"));
 		}
 		mtx_unlock(&hrl_lock);
-
-		container_leave(&p->p_container, &oldpr->pr_container);
-		error = container_join(&p->p_container, &newpr->pr_container);
-		KASSERT(error == 0, ("hrl_proc_ucred_changing: better error handling needed"));
 	}
 }
 

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#30 (text+ko) ====

@@ -51,7 +51,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
-#include <sys/hrl.h>
+#include <sys/container.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -2130,8 +2130,8 @@
 		chgproccnt(newcred->cr_ruidinfo, 1, 0);
 	}
 
-#ifdef HRL
-	hrl_proc_ucred_changing(p, newcred);
+#ifdef CONTAINERS
+	container_proc_ucred_changing(p, newcred);
 #endif
 	p->p_ucred = newcred;
 }

==== //depot/projects/soc2009/trasz_limits/sys/sys/container.h#18 (text+ko) ====

@@ -44,14 +44,6 @@
  * Resource containers.
  */
 
-/*
- * Processes may have at most three parent containers - prison, uidinfo,
- * and loginclass.  Other subjects have less - struct prison may have only
- * one parent container, loginclass and uidinfo structures have none.
- * This may change when - and if - we add per-group resource limits.
- */
-#define	CONTAINER_PARENTS_MAX	3
-
 #define	RUSAGE_UNDEFINED	-1
 #define	RUSAGE_CPU		0
 #define	RUSAGE_FSIZE		1
@@ -99,7 +91,6 @@
  */
 struct container {
 	int64_t				c_resources[RUSAGE_MAX + 1];
-	struct container		*c_parents[CONTAINER_PARENTS_MAX + 1];
 	LIST_HEAD(, hrl_rule_link)	c_rule_links;
 };
 
@@ -114,10 +105,9 @@
 void	container_create(struct container *container);
 void	container_destroy(struct container *container);
 
-int	container_join(struct container *child, struct container *parent);
-void	container_leave(struct container *child, struct container *parent);
-
 int	container_proc_fork(struct proc *parent, struct proc *child);
 void	container_proc_exit(struct proc *p);
 
+void	container_proc_ucred_changing(struct proc *p, struct ucred *newcred);
+
 #endif /* !_CONTAINER_H_ */

==== //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#49 (text+ko) ====

@@ -107,7 +107,6 @@
 
 #ifdef _KERNEL
 
-void	hrl_proc_init(struct proc *p);
 void	hrl_proc_ucred_changing(struct proc *p, struct ucred *newcred);
 
 struct hrl_rule	*hrl_rule_alloc(int flags);


More information about the p4-projects mailing list