PERFORCE change 164465 for review

Edward Tomasz Napierala trasz at FreeBSD.org
Mon Jun 15 20:55:58 UTC 2009


http://perforce.freebsd.org/chv.cgi?CH=164465

Change 164465 by trasz at trasz_victim on 2009/06/15 20:55:11

	Code for per-jail and per-group resource accounting.
	Not really tested.
	
	XXX: What about hierarchical jails?
	XXX2: This is going to be really slow, unless I invent
	      something clever.  ;-/

Affected files ...

.. //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#4 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#9 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_proc.c#4 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#6 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#6 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#8 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#3 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#4 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/ucred.h#3 edit
.. //depot/projects/soc2009/trasz_limits/usr.sbin/hrl/hrl.c#8 edit

Differences ...

==== //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#4 (text+ko) ====

@@ -453,6 +453,7 @@
 	/* Create credentials. */
 	p->p_ucred = crget();
 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
+	p->p_ucred->cr_gidinfos[0] = gifind(0);
 	p->p_ucred->cr_uidinfo = uifind(0);
 	p->p_ucred->cr_ruidinfo = uifind(0);
 	p->p_ucred->cr_prison = &prison0;

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#9 (text+ko) ====

@@ -31,10 +31,12 @@
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
+#include <sys/sx.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
@@ -110,6 +112,7 @@
 	 */
 	p->p_accounting.ha_resources[resource] += amount;
 	p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] += amount;
+	p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] += amount;
 
 	/*
 	 * XXX: When denying, return proper errno - EFSIZ, ENOMEM etc.
@@ -126,6 +129,7 @@
 	diff = amount - p->p_accounting.ha_resources[resource];
 	p->p_accounting.ha_resources[resource] += diff;
 	p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] += diff;
+	p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] += diff;
 
 	/*
 	 * XXX: Make sure process can lower its resource consumption,
@@ -143,6 +147,7 @@
 
 	p->p_accounting.ha_resources[resource] -= amount;
 	p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] -= amount;
+	p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] -= amount;
 }
 
 int
@@ -292,12 +297,12 @@
 }
 
 static int
-hrl_get_acc_uid(struct thread *td, id_t pid, void *bufp, size_t buflen)
+hrl_get_acc_uid(struct thread *td, id_t uid, void *bufp, size_t buflen)
 {
 	int error;
 	struct uidinfo *uip;
 
-	uip = uifind(pid);
+	uip = uifind(uid);
 	if (uip == NULL)
 		return (ESRCH);
 	error = copyout(&uip->ui_accounting, bufp, sizeof(uip->ui_accounting));
@@ -306,35 +311,73 @@
 	return (error);
 }
 
-int
-hrl(struct thread *td, struct hrl_args *uap)
+static int
+hrl_get_acc_gid(struct thread *td, id_t gid, void *bufp, size_t buflen)
 {
 	int error;
-	id_t id;
+	struct gidinfo *gip;
+
+	gip = gifind(gid);
+	if (gip == NULL)
+		return (ESRCH);
+	error = copyout(&gip->gi_accounting, bufp, sizeof(gip->gi_accounting));
+	gifree(gip);
+
+	return (error);
+}
 
-	if (uap->op == HRL_OP_GET_RULES)
-		return (hrl_get_rules(td, uap->outbufp, uap->outbuflen));
+static int
+hrl_get_acc_jid(struct thread *td, id_t jid, void *bufp, size_t buflen)
+{
+	int error;
+	struct prison *pr;
 
-	if (uap->inbuflen != sizeof(id_t))
-		return (EINVAL);
+	sx_xlock(&allprison_lock);
+	pr = prison_find(jid);
+	if (pr == NULL) {
+		sx_xunlock(&allprison_lock);
+		return (ENOENT);
+	}
+	error = copyout(&pr->pr_accounting, bufp, sizeof(pr->pr_accounting));
+	prison_free(pr);
+	sx_xunlock(&allprison_lock);
 
-	error = copyin(uap->inbufp, &id, sizeof(id_t));
-	if (error)
-		return (error);
+	return (error);
+}
 
-	if (id <= 0)
-		return (EINVAL);
+int
+hrl(struct thread *td, struct hrl_args *uap)
+{
+	int error;
+	id_t id;
 
-	if (uap->outbuflen < sizeof(struct hrl_acc))
-		return (EFBIG);
+	if (uap->op != HRL_OP_GET_RULES) {
+		if (uap->inbuflen != sizeof(id_t))
+			return (EINVAL);
+		error = copyin(uap->inbufp, &id, sizeof(id_t));
+		if (error)
+			return (error);
+		if (id <= 0)
+			return (EINVAL);
+		if (uap->outbuflen < sizeof(struct hrl_acc))
+			return (EFBIG);
+	}
 
-	if (uap->op == HRL_OP_GET_ACC_PID)
+	switch (uap->op) {
+	case HRL_OP_GET_RULES:
+		return (hrl_get_rules(td, uap->outbufp, uap->outbuflen));
+	case HRL_OP_GET_ACC_PID:
 		return (hrl_get_acc_pid(td, id, uap->outbufp, uap->outbuflen));
-
-	if (uap->op == HRL_OP_GET_ACC_UID)
+	case HRL_OP_GET_ACC_UID:
 		return (hrl_get_acc_uid(td, id, uap->outbufp, uap->outbuflen));
-
-	return (EINVAL);
+	case HRL_OP_GET_ACC_GID:
+		return (hrl_get_acc_gid(td, id, uap->outbufp, uap->outbuflen));
+	case HRL_OP_GET_ACC_JAILID:
+		return (hrl_get_acc_jid(td, id, uap->outbufp, uap->outbuflen));
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
 }
 
 static void

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_proc.c#4 (text+ko) ====

@@ -165,6 +165,7 @@
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uihashinit();
+	gihashinit();
 }
 
 /*

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#6 (text+ko) ====

@@ -807,7 +807,7 @@
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
-	int error;
+	int i, error;
 
 	if (ngrp > NGROUPS)
 		return (EINVAL);
@@ -839,9 +839,15 @@
 		 * when running non-BSD software if we do not do the same.
 		 */
 		newcred->cr_ngroups = 1;
+		for (i = 1; i < newcred->cr_ngroups; i++)
+			gifree(newcred->cr_gidinfos[i]);
 	} else {
+		for (i = 0; i < newcred->cr_ngroups; i++)
+			gifree(newcred->cr_gidinfos[i]);
 		bcopy(groups, newcred->cr_groups, ngrp * sizeof(gid_t));
 		newcred->cr_ngroups = ngrp;
+		for (i = 0; i < newcred->cr_ngroups; i++)
+			newcred->cr_gidinfos[i] = gifind(newcred->cr_groups[i]);
 	}
 	setsugid(p);
 	p->p_ucred = newcred;
@@ -1802,6 +1808,7 @@
 void
 crfree(struct ucred *cr)
 {
+	int i;
 
 	KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
 	KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
@@ -1815,6 +1822,8 @@
 			uifree(cr->cr_uidinfo);
 		if (cr->cr_ruidinfo != NULL)
 			uifree(cr->cr_ruidinfo);
+		for (i = 0; i < cr->cr_ngroups; i++)
+			gifree(cr->cr_gidinfos[i]);
 		/*
 		 * Free a prison, if any.
 		 */
@@ -1851,6 +1860,7 @@
 void
 crcopy(struct ucred *dest, struct ucred *src)
 {
+	int i;
 
 	KASSERT(crshared(dest) == 0, ("crcopy of shared ucred"));
 	bcopy(&src->cr_startcopy, &dest->cr_startcopy,
@@ -1858,6 +1868,8 @@
 		(caddr_t)&src->cr_startcopy));
 	uihold(dest->cr_uidinfo);
 	uihold(dest->cr_ruidinfo);
+	for (i = 0; i < dest->cr_ngroups; i++)
+		gihold(dest->cr_gidinfos[i]);
 	prison_hold(dest->cr_prison);
 #ifdef VIMAGE
 	KASSERT(src->cr_vimage != NULL, ("cr_vimage == NULL"));
@@ -2014,7 +2026,9 @@
 change_egid(struct ucred *newcred, gid_t egid)
 {
 
+	gifree(newcred->cr_gidinfos[0]);
 	newcred->cr_groups[0] = egid;
+	newcred->cr_gidinfos[0] = gifind(egid);
 }
 
 /*-

==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#6 (text+ko) ====

@@ -72,11 +72,17 @@
 static struct rwlock uihashtbl_lock;
 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
 static u_long uihash;		/* size of hash table - 1 */
+static MALLOC_DEFINE(M_GIDINFO, "gidinfo", "gidinfo structures");
+#define	GIHASH(gid)	(&gihashtbl[(gid) & gihash])
+static struct rwlock gihashtbl_lock;
+static LIST_HEAD(gihashhead, gidinfo) *gihashtbl;
+static u_long gihash;		/* size of hash table - 1 */
 
 static void	calcru1(struct proc *p, struct rusage_ext *ruxp,
 		    struct timeval *up, struct timeval *sp);
 static int	donice(struct thread *td, struct proc *chgp, int n);
 static struct uidinfo *uilookup(uid_t uid);
+static struct gidinfo *gilookup(gid_t gid);
 
 /*
  * Resource controls and accounting.
@@ -1358,6 +1364,128 @@
 }
 
 /*
+ * Find the gidinfo structure for a gid.  This structure is used to
+ * track the total resource consumption (process count, socket buffer
+ * size, etc.) for the gid and impose limits.
+ */
+void
+gihashinit()
+{
+
+	gihashtbl = hashinit(maxproc / 16, M_GIDINFO, &gihash);
+	rw_init(&gihashtbl_lock, "gidinfo hash");
+}
+
+/*
+ * Look up a gidinfo struct for the parameter gid.
+ * gihashtbl_lock must be locked.
+ */
+static struct gidinfo *
+gilookup(gid)
+	gid_t gid;
+{
+	struct gihashhead *gipp;
+	struct gidinfo *gip;
+
+	rw_assert(&gihashtbl_lock, RA_LOCKED);
+	gipp = GIHASH(gid);
+	LIST_FOREACH(gip, gipp, gi_hash)
+		if (gip->gi_gid == gid)
+			break;
+
+	return (gip);
+}
+
+/*
+ * Find or allocate a struct gidinfo for a particular gid.
+ * Increase refcount on gidinfo struct returned.
+ * gifree() should be called on a struct gidinfo when released.
+ */
+struct gidinfo *
+gifind(gid)
+	gid_t gid;
+{
+	struct gidinfo *old_gip, *gip;
+
+	rw_rlock(&gihashtbl_lock);
+	gip = gilookup(gid);
+	if (gip == NULL) {
+		rw_runlock(&gihashtbl_lock);
+		gip = malloc(sizeof(*gip), M_GIDINFO, M_WAITOK | M_ZERO);
+		rw_wlock(&gihashtbl_lock);
+		/*
+		 * There's a chance someone created our gidinfo while we
+		 * were in malloc and not holding the lock, so we have to
+		 * make sure we don't insert a duplicate gidinfo.
+		 */
+		if ((old_gip = gilookup(gid)) != NULL) {
+			/* Someone else beat us to it. */
+			free(gip, M_GIDINFO);
+			gip = old_gip;
+		} else {
+			refcount_init(&gip->gi_ref, 0);
+			gip->gi_gid = gid;
+			LIST_INSERT_HEAD(GIHASH(gid), gip, gi_hash);
+		}
+	}
+	gihold(gip);
+	rw_unlock(&gihashtbl_lock);
+	return (gip);
+}
+
+/*
+ * Place another refcount on a gidinfo struct.
+ */
+void
+gihold(gip)
+	struct gidinfo *gip;
+{
+
+	refcount_acquire(&gip->gi_ref);
+}
+
+/*-
+ * Since gidinfo structs have a long lifetime, we use an
+ * opportunistic refcounting scheme to avoid locking the lookup hash
+ * for each release.
+ *
+ * If the refcount hits 0, we need to free the structure,
+ * which means we need to lock the hash.
+ * Optimal case:
+ *   After locking the struct and lowering the refcount, if we find
+ *   that we don't need to free, simply unlock and return.
+ * Suboptimal case:
+ *   If refcount lowering results in need to free, bump the count
+ *   back up, lose the lock and acquire the locks in the proper
+ *   order to try again.
+ */
+void
+gifree(gip)
+	struct gidinfo *gip;
+{
+	int old;
+
+	/* Prepare for optimal case. */
+	old = gip->gi_ref;
+	if (old > 1 && atomic_cmpset_int(&gip->gi_ref, old, old - 1))
+		return;
+
+	/* Prepare for suboptimal case. */
+	rw_wlock(&gihashtbl_lock);
+	if (refcount_release(&gip->gi_ref)) {
+		LIST_REMOVE(gip, gi_hash);
+		rw_wunlock(&gihashtbl_lock);
+		free(gip, M_GIDINFO);
+		return;
+	}
+	/*
+	 * Someone added a reference between atomic_cmpset_int() and
+	 * rw_wlock(&gihashtbl_lock).
+	 */
+	rw_wunlock(&gihashtbl_lock);
+}
+
+/*
  * Change the count associated with number of processes
  * a given user is using.  When 'max' is 0, don't enforce a limit
  */

==== //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#8 (text+ko) ====

@@ -96,6 +96,7 @@
 #define	HRL_OP_GET_ACC_PID	2
 #define	HRL_OP_GET_ACC_UID	3
 #define	HRL_OP_GET_ACC_GID	4
+#define	HRL_OP_GET_ACC_JAILID	5
 
 /*
  * 'hrl_acc' defines resource consumption for a particular

==== //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#3 (text+ko) ====

@@ -30,6 +30,8 @@
 #ifndef _SYS_JAIL_H_
 #define _SYS_JAIL_H_
 
+#include <sys/hrl.h>
+
 #ifdef _KERNEL
 struct jail_v0 {
 	u_int32_t	version;
@@ -171,6 +173,7 @@
 	char		 pr_domain[MAXHOSTNAMELEN];	/* (p) jail domainname */
 	char		 pr_uuid[HOSTUUIDLEN];		/* (p) jail hostuuid */
 	unsigned long	 pr_hostid;			/* (p) jail hostid */
+	struct hrl_acc	 pr_accounting;			/* (*) HRL resource accounting */
 };
 #endif /* _KERNEL || _WANT_PRISON */
 

==== //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#4 (text+ko) ====

@@ -98,6 +98,21 @@
 	struct hrl_acc	ui_accounting;	/* (*) HRL resource accounting */
 };
 
+/*
+ * Per gid resource consumption
+ *
+ * Locking guide:
+ * (a) Constant from inception
+ * (b) Lockless, updated using atomics
+ * (c) Locked by global uihashtbl_mtx
+ */
+struct gidinfo {
+	LIST_ENTRY(gidinfo) gi_hash;	/* (c) hash chain of gidinfos */
+	gid_t	gi_gid;			/* (a) gid */
+	u_int	gi_ref;			/* (b) reference count */
+	struct hrl_acc	gi_accounting;	/* (*) HRL resource accounting */
+};
+
 struct proc;
 struct rusage_ext;
 struct thread;
@@ -134,6 +149,11 @@
 void	 uifree(struct uidinfo *uip);
 void	 uihashinit(void);
 void	 uihold(struct uidinfo *uip);
+struct gidinfo
+	*gifind(gid_t gid);
+void	 gifree(struct gidinfo *gip);
+void	 gihashinit(void);
+void	 gihold(struct gidinfo *gip);
 
 #endif /* _KERNEL */
 #endif /* !_SYS_RESOURCEVAR_H_ */

==== //depot/projects/soc2009/trasz_limits/sys/sys/ucred.h#3 (text+ko) ====

@@ -50,6 +50,7 @@
 	uid_t	cr_svuid;		/* saved user id */
 	short	cr_ngroups;		/* number of groups */
 	gid_t	cr_groups[NGROUPS];	/* groups */
+	struct gidinfo	*cr_gidinfos[NGROUPS]; /* group resource consumption */
 	gid_t	cr_rgid;		/* real group id */
 	gid_t	cr_svgid;		/* saved group id */
 	struct uidinfo	*cr_uidinfo;	/* per euid resource consumption */

==== //depot/projects/soc2009/trasz_limits/usr.sbin/hrl/hrl.c#8 (text+ko) ====

@@ -302,18 +302,19 @@
 usage(void)
 {
 
-	fprintf(stderr, "usage: hrl [-u user | -g group | -p pid]\n");
+	fprintf(stderr, "usage: hrl [-u user | -g group | -p pid | -j jailid]\n");
+	exit(1);
 }
 
 int
 main(int argc __unused, char **argv __unused)
 {
-	int ch, op, pflag = 0, uflag = 0, gflag = 0;
+	int ch, op, pflag = 0, uflag = 0, gflag = 0, jflag = 0;
 	id_t id = 0;
 
 	op = HRL_OP_GET_RULES;
 
-	while ((ch = getopt(argc, argv, "p:u:g:")) != -1) {
+	while ((ch = getopt(argc, argv, "p:u:g:j:")) != -1) {
 		switch (ch) {
 		case 'p':
 			pflag = 1;
@@ -330,14 +331,19 @@
 			op = HRL_OP_GET_ACC_GID;
 			id = parse_group(optarg);
 			break;
+		case 'j':
+			jflag = 1;
+			op = HRL_OP_GET_ACC_JAILID;
+			id = parse_group(optarg);
+			break;
 		case '?':
 		default:
 			usage();
 		}
 	}
 
-	if (pflag + uflag + gflag > 1)
-		errx(1, "only one of the -p, -u and -g may be specified "
+	if (pflag + uflag + gflag + jflag > 1)
+		errx(1, "only one of the -p, -u, -g and -j may be specified "
 		    "at the same time");
 
 	switch (op) {
@@ -348,6 +354,7 @@
 	case HRL_OP_GET_ACC_PID:
 	case HRL_OP_GET_ACC_UID:
 	case HRL_OP_GET_ACC_GID:
+	case HRL_OP_GET_ACC_JAILID:
 		print_accounting(op, id);
 		break;
 	}


More information about the p4-projects mailing list