PERFORCE change 164465 for review
Edward Tomasz Napierala
trasz at FreeBSD.org
Mon Jun 15 20:55:58 UTC 2009
http://perforce.freebsd.org/chv.cgi?CH=164465
Change 164465 by trasz at trasz_victim on 2009/06/15 20:55:11
Code for per-jail and per-group resource accounting.
Not really tested.
XXX: What about hierarchical jails?
XXX2: This is going to be really slow, unless I invent
something clever. ;-/
Affected files ...
.. //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#4 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#9 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_proc.c#4 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#6 edit
.. //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#6 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#8 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#3 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#4 edit
.. //depot/projects/soc2009/trasz_limits/sys/sys/ucred.h#3 edit
.. //depot/projects/soc2009/trasz_limits/usr.sbin/hrl/hrl.c#8 edit
Differences ...
==== //depot/projects/soc2009/trasz_limits/sys/kern/init_main.c#4 (text+ko) ====
@@ -453,6 +453,7 @@
/* Create credentials. */
p->p_ucred = crget();
p->p_ucred->cr_ngroups = 1; /* group 0 */
+ p->p_ucred->cr_gidinfos[0] = gifind(0);
p->p_ucred->cr_uidinfo = uifind(0);
p->p_ucred->cr_ruidinfo = uifind(0);
p->p_ucred->cr_prison = &prison0;
==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_hrl.c#9 (text+ko) ====
@@ -31,10 +31,12 @@
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/queue.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/sx.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
@@ -110,6 +112,7 @@
*/
p->p_accounting.ha_resources[resource] += amount;
p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] += amount;
+ p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] += amount;
/*
* XXX: When denying, return proper errno - EFSIZ, ENOMEM etc.
@@ -126,6 +129,7 @@
diff = amount - p->p_accounting.ha_resources[resource];
p->p_accounting.ha_resources[resource] += diff;
p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] += diff;
+ p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] += diff;
/*
* XXX: Make sure process can lower its resource consumption,
@@ -143,6 +147,7 @@
p->p_accounting.ha_resources[resource] -= amount;
p->p_ucred->cr_ruidinfo->ui_accounting.ha_resources[resource] -= amount;
+ p->p_ucred->cr_prison->pr_accounting.ha_resources[resource] -= amount;
}
int
@@ -292,12 +297,12 @@
}
static int
-hrl_get_acc_uid(struct thread *td, id_t pid, void *bufp, size_t buflen)
+hrl_get_acc_uid(struct thread *td, id_t uid, void *bufp, size_t buflen)
{
int error;
struct uidinfo *uip;
- uip = uifind(pid);
+ uip = uifind(uid);
if (uip == NULL)
return (ESRCH);
error = copyout(&uip->ui_accounting, bufp, sizeof(uip->ui_accounting));
@@ -306,35 +311,73 @@
return (error);
}
-int
-hrl(struct thread *td, struct hrl_args *uap)
+static int
+hrl_get_acc_gid(struct thread *td, id_t gid, void *bufp, size_t buflen)
{
int error;
- id_t id;
+ struct gidinfo *gip;
+
+ gip = gifind(gid);
+ if (gip == NULL)
+ return (ESRCH);
+ error = copyout(&gip->gi_accounting, bufp, sizeof(gip->gi_accounting));
+ gifree(gip);
+
+ return (error);
+}
- if (uap->op == HRL_OP_GET_RULES)
- return (hrl_get_rules(td, uap->outbufp, uap->outbuflen));
+static int
+hrl_get_acc_jid(struct thread *td, id_t jid, void *bufp, size_t buflen)
+{
+ int error;
+ struct prison *pr;
- if (uap->inbuflen != sizeof(id_t))
- return (EINVAL);
+ sx_xlock(&allprison_lock);
+ pr = prison_find(jid);
+ if (pr == NULL) {
+ sx_xunlock(&allprison_lock);
+ return (ENOENT);
+ }
+ error = copyout(&pr->pr_accounting, bufp, sizeof(pr->pr_accounting));
+ prison_free(pr);
+ sx_xunlock(&allprison_lock);
- error = copyin(uap->inbufp, &id, sizeof(id_t));
- if (error)
- return (error);
+ return (error);
+}
- if (id <= 0)
- return (EINVAL);
+int
+hrl(struct thread *td, struct hrl_args *uap)
+{
+ int error;
+ id_t id;
- if (uap->outbuflen < sizeof(struct hrl_acc))
- return (EFBIG);
+ if (uap->op != HRL_OP_GET_RULES) {
+ if (uap->inbuflen != sizeof(id_t))
+ return (EINVAL);
+ error = copyin(uap->inbufp, &id, sizeof(id_t));
+ if (error)
+ return (error);
+ if (id <= 0)
+ return (EINVAL);
+ if (uap->outbuflen < sizeof(struct hrl_acc))
+ return (EFBIG);
+ }
- if (uap->op == HRL_OP_GET_ACC_PID)
+ switch (uap->op) {
+ case HRL_OP_GET_RULES:
+ return (hrl_get_rules(td, uap->outbufp, uap->outbuflen));
+ case HRL_OP_GET_ACC_PID:
return (hrl_get_acc_pid(td, id, uap->outbufp, uap->outbuflen));
-
- if (uap->op == HRL_OP_GET_ACC_UID)
+ case HRL_OP_GET_ACC_UID:
return (hrl_get_acc_uid(td, id, uap->outbufp, uap->outbuflen));
-
- return (EINVAL);
+ case HRL_OP_GET_ACC_GID:
+ return (hrl_get_acc_gid(td, id, uap->outbufp, uap->outbuflen));
+ case HRL_OP_GET_ACC_JAILID:
+ return (hrl_get_acc_jid(td, id, uap->outbufp, uap->outbuflen));
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
}
static void
==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_proc.c#4 (text+ko) ====
@@ -165,6 +165,7 @@
proc_ctor, proc_dtor, proc_init, proc_fini,
UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uihashinit();
+ gihashinit();
}
/*
==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_prot.c#6 (text+ko) ====
@@ -807,7 +807,7 @@
{
struct proc *p = td->td_proc;
struct ucred *newcred, *oldcred;
- int error;
+ int i, error;
if (ngrp > NGROUPS)
return (EINVAL);
@@ -839,9 +839,15 @@
* when running non-BSD software if we do not do the same.
*/
newcred->cr_ngroups = 1;
+ for (i = 1; i < newcred->cr_ngroups; i++)
+ gifree(newcred->cr_gidinfos[i]);
} else {
+ for (i = 0; i < newcred->cr_ngroups; i++)
+ gifree(newcred->cr_gidinfos[i]);
bcopy(groups, newcred->cr_groups, ngrp * sizeof(gid_t));
newcred->cr_ngroups = ngrp;
+ for (i = 0; i < newcred->cr_ngroups; i++)
+ newcred->cr_gidinfos[i] = gifind(newcred->cr_groups[i]);
}
setsugid(p);
p->p_ucred = newcred;
@@ -1802,6 +1808,7 @@
void
crfree(struct ucred *cr)
{
+ int i;
KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
@@ -1815,6 +1822,8 @@
uifree(cr->cr_uidinfo);
if (cr->cr_ruidinfo != NULL)
uifree(cr->cr_ruidinfo);
+ for (i = 0; i < cr->cr_ngroups; i++)
+ gifree(cr->cr_gidinfos[i]);
/*
* Free a prison, if any.
*/
@@ -1851,6 +1860,7 @@
void
crcopy(struct ucred *dest, struct ucred *src)
{
+ int i;
KASSERT(crshared(dest) == 0, ("crcopy of shared ucred"));
bcopy(&src->cr_startcopy, &dest->cr_startcopy,
@@ -1858,6 +1868,8 @@
(caddr_t)&src->cr_startcopy));
uihold(dest->cr_uidinfo);
uihold(dest->cr_ruidinfo);
+ for (i = 0; i < dest->cr_ngroups; i++)
+ gihold(dest->cr_gidinfos[i]);
prison_hold(dest->cr_prison);
#ifdef VIMAGE
KASSERT(src->cr_vimage != NULL, ("cr_vimage == NULL"));
@@ -2014,7 +2026,9 @@
change_egid(struct ucred *newcred, gid_t egid)
{
+ gifree(newcred->cr_gidinfos[0]);
newcred->cr_groups[0] = egid;
+ newcred->cr_gidinfos[0] = gifind(egid);
}
/*-
==== //depot/projects/soc2009/trasz_limits/sys/kern/kern_resource.c#6 (text+ko) ====
@@ -72,11 +72,17 @@
static struct rwlock uihashtbl_lock;
static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
static u_long uihash; /* size of hash table - 1 */
+static MALLOC_DEFINE(M_GIDINFO, "gidinfo", "gidinfo structures");
+#define GIHASH(gid) (&gihashtbl[(gid) & gihash])
+static struct rwlock gihashtbl_lock;
+static LIST_HEAD(gihashhead, gidinfo) *gihashtbl;
+static u_long gihash; /* size of hash table - 1 */
static void calcru1(struct proc *p, struct rusage_ext *ruxp,
struct timeval *up, struct timeval *sp);
static int donice(struct thread *td, struct proc *chgp, int n);
static struct uidinfo *uilookup(uid_t uid);
+static struct gidinfo *gilookup(gid_t gid);
/*
* Resource controls and accounting.
@@ -1358,6 +1364,128 @@
}
/*
+ * Find the gidinfo structure for a gid. This structure is used to
+ * track the total resource consumption (process count, socket buffer
+ * size, etc.) for the gid and impose limits.
+ */
+void
+gihashinit()
+{
+
+ gihashtbl = hashinit(maxproc / 16, M_GIDINFO, &gihash);
+ rw_init(&gihashtbl_lock, "gidinfo hash");
+}
+
+/*
+ * Look up a gidinfo struct for the parameter gid.
+ * gihashtbl_lock must be locked.
+ */
+static struct gidinfo *
+gilookup(gid)
+ gid_t gid;
+{
+ struct gihashhead *gipp;
+ struct gidinfo *gip;
+
+ rw_assert(&gihashtbl_lock, RA_LOCKED);
+ gipp = GIHASH(gid);
+ LIST_FOREACH(gip, gipp, gi_hash)
+ if (gip->gi_gid == gid)
+ break;
+
+ return (gip);
+}
+
+/*
+ * Find or allocate a struct gidinfo for a particular gid.
+ * Increase refcount on gidinfo struct returned.
+ * gifree() should be called on a struct gidinfo when released.
+ */
+struct gidinfo *
+gifind(gid)
+ gid_t gid;
+{
+ struct gidinfo *old_gip, *gip;
+
+ rw_rlock(&gihashtbl_lock);
+ gip = gilookup(gid);
+ if (gip == NULL) {
+ rw_runlock(&gihashtbl_lock);
+ gip = malloc(sizeof(*gip), M_GIDINFO, M_WAITOK | M_ZERO);
+ rw_wlock(&gihashtbl_lock);
+ /*
+ * There's a chance someone created our gidinfo while we
+ * were in malloc and not holding the lock, so we have to
+ * make sure we don't insert a duplicate gidinfo.
+ */
+ if ((old_gip = gilookup(gid)) != NULL) {
+ /* Someone else beat us to it. */
+ free(gip, M_GIDINFO);
+ gip = old_gip;
+ } else {
+ refcount_init(&gip->gi_ref, 0);
+ gip->gi_gid = gid;
+ LIST_INSERT_HEAD(GIHASH(gid), gip, gi_hash);
+ }
+ }
+ gihold(gip);
+ rw_unlock(&gihashtbl_lock);
+ return (gip);
+}
+
+/*
+ * Place another refcount on a gidinfo struct.
+ */
+void
+gihold(gip)
+ struct gidinfo *gip;
+{
+
+ refcount_acquire(&gip->gi_ref);
+}
+
+/*-
+ * Since gidinfo structs have a long lifetime, we use an
+ * opportunistic refcounting scheme to avoid locking the lookup hash
+ * for each release.
+ *
+ * If the refcount hits 0, we need to free the structure,
+ * which means we need to lock the hash.
+ * Optimal case:
+ * After locking the struct and lowering the refcount, if we find
+ * that we don't need to free, simply unlock and return.
+ * Suboptimal case:
+ * If refcount lowering results in need to free, bump the count
+ * back up, lose the lock and acquire the locks in the proper
+ * order to try again.
+ */
+void
+gifree(gip)
+ struct gidinfo *gip;
+{
+ int old;
+
+ /* Prepare for optimal case. */
+ old = gip->gi_ref;
+ if (old > 1 && atomic_cmpset_int(&gip->gi_ref, old, old - 1))
+ return;
+
+ /* Prepare for suboptimal case. */
+ rw_wlock(&gihashtbl_lock);
+ if (refcount_release(&gip->gi_ref)) {
+ LIST_REMOVE(gip, gi_hash);
+ rw_wunlock(&gihashtbl_lock);
+ free(gip, M_GIDINFO);
+ return;
+ }
+ /*
+ * Someone added a reference between atomic_cmpset_int() and
+ * rw_wlock(&gihashtbl_lock).
+ */
+ rw_wunlock(&gihashtbl_lock);
+}
+
+/*
* Change the count associated with number of processes
* a given user is using. When 'max' is 0, don't enforce a limit
*/
==== //depot/projects/soc2009/trasz_limits/sys/sys/hrl.h#8 (text+ko) ====
@@ -96,6 +96,7 @@
#define HRL_OP_GET_ACC_PID 2
#define HRL_OP_GET_ACC_UID 3
#define HRL_OP_GET_ACC_GID 4
+#define HRL_OP_GET_ACC_JAILID 5
/*
* 'hrl_acc' defines resource consumption for a particular
==== //depot/projects/soc2009/trasz_limits/sys/sys/jail.h#3 (text+ko) ====
@@ -30,6 +30,8 @@
#ifndef _SYS_JAIL_H_
#define _SYS_JAIL_H_
+#include <sys/hrl.h>
+
#ifdef _KERNEL
struct jail_v0 {
u_int32_t version;
@@ -171,6 +173,7 @@
char pr_domain[MAXHOSTNAMELEN]; /* (p) jail domainname */
char pr_uuid[HOSTUUIDLEN]; /* (p) jail hostuuid */
unsigned long pr_hostid; /* (p) jail hostid */
+ struct hrl_acc pr_accounting; /* (*) HRL resource accounting */
};
#endif /* _KERNEL || _WANT_PRISON */
==== //depot/projects/soc2009/trasz_limits/sys/sys/resourcevar.h#4 (text+ko) ====
@@ -98,6 +98,21 @@
struct hrl_acc ui_accounting; /* (*) HRL resource accounting */
};
+/*
+ * Per gid resource consumption
+ *
+ * Locking guide:
+ * (a) Constant from inception
+ * (b) Lockless, updated using atomics
+ * (c) Locked by global uihashtbl_mtx
+ */
+struct gidinfo {
+ LIST_ENTRY(gidinfo) gi_hash; /* (c) hash chain of gidinfos */
+ gid_t gi_gid; /* (a) gid */
+ u_int gi_ref; /* (b) reference count */
+ struct hrl_acc gi_accounting; /* (*) HRL resource accounting */
+};
+
struct proc;
struct rusage_ext;
struct thread;
@@ -134,6 +149,11 @@
void uifree(struct uidinfo *uip);
void uihashinit(void);
void uihold(struct uidinfo *uip);
+struct gidinfo
+ *gifind(gid_t gid);
+void gifree(struct gidinfo *gip);
+void gihashinit(void);
+void gihold(struct gidinfo *gip);
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCEVAR_H_ */
==== //depot/projects/soc2009/trasz_limits/sys/sys/ucred.h#3 (text+ko) ====
@@ -50,6 +50,7 @@
uid_t cr_svuid; /* saved user id */
short cr_ngroups; /* number of groups */
gid_t cr_groups[NGROUPS]; /* groups */
+ struct gidinfo *cr_gidinfos[NGROUPS]; /* group resource consumption */
gid_t cr_rgid; /* real group id */
gid_t cr_svgid; /* saved group id */
struct uidinfo *cr_uidinfo; /* per euid resource consumption */
==== //depot/projects/soc2009/trasz_limits/usr.sbin/hrl/hrl.c#8 (text+ko) ====
@@ -302,18 +302,19 @@
usage(void)
{
- fprintf(stderr, "usage: hrl [-u user | -g group | -p pid]\n");
+ fprintf(stderr, "usage: hrl [-u user | -g group | -p pid | -j jailid]\n");
+ exit(1);
}
int
main(int argc __unused, char **argv __unused)
{
- int ch, op, pflag = 0, uflag = 0, gflag = 0;
+ int ch, op, pflag = 0, uflag = 0, gflag = 0, jflag = 0;
id_t id = 0;
op = HRL_OP_GET_RULES;
- while ((ch = getopt(argc, argv, "p:u:g:")) != -1) {
+ while ((ch = getopt(argc, argv, "p:u:g:j:")) != -1) {
switch (ch) {
case 'p':
pflag = 1;
@@ -330,14 +331,19 @@
op = HRL_OP_GET_ACC_GID;
id = parse_group(optarg);
break;
+ case 'j':
+ jflag = 1;
+ op = HRL_OP_GET_ACC_JAILID;
+ id = parse_group(optarg);
+ break;
case '?':
default:
usage();
}
}
- if (pflag + uflag + gflag > 1)
- errx(1, "only one of the -p, -u and -g may be specified "
+ if (pflag + uflag + gflag + jflag > 1)
+ errx(1, "only one of the -p, -u, -g and -j may be specified "
"at the same time");
switch (op) {
@@ -348,6 +354,7 @@
case HRL_OP_GET_ACC_PID:
case HRL_OP_GET_ACC_UID:
case HRL_OP_GET_ACC_GID:
+ case HRL_OP_GET_ACC_JAILID:
print_accounting(op, id);
break;
}
More information about the p4-projects
mailing list