[PATCH] RUSAGE_THREAD

Kostik Belousov kostikbel at gmail.com
Mon May 3 16:35:29 UTC 2010


On Mon, May 03, 2010 at 08:13:00PM +0000, Alexander Krizhanovsky wrote:
> Kostik,
> 
> thank you very much for the review!
> 
> Kostik Belousov wrote:
> >On Mon, Apr 19, 2010 at 12:46:48AM +0000, Alexander Krizhanovsky wrote:
> >  
> >>Hi all!
> >>
> >>This patch implements per-thread rusage statistic (like RUSAGE_THREAD in 
> >>Linux and RUSAGE_LWP in OpenSolaris).
> >>
> >>Unfortunately, we have to acquire a number of locks to read/update 
> >>system and user times for current thread rusage information because it's 
> >>also used for whole process statistic and needs to be zeroed.
> >>
> >>Any comments are very appreciated.
> >>
> >>It's tested against 8.0-RELEASE. Appropriate PR is submitted.
> >>
> >>-- 
> >>Alexander Krizhanovsky
> >>NatSys Lab. (http://natsys-lab.com)
> >>tel: +7 (916) 717-3899, +7 (499) 747-6304
> >>e-mail: ak at natsys-lab.com
> >>
> >>    
> >I think this should be somewhat modified before commit.
> >
> >First, please separate patch into two pieces. One would introduce
> >ruxagg_tlock() helper and use it in existing locations instead of
> >three-liners. Possibly, add K&R->ANSI C kern_getrusage definition
> >change.
> >
> >Second would actually add RUSAGE_THREAD. There, please follow
> >the style(9), in particular, do not initialize the local variables
> >at the declaration, instead, use explicit initialization in the code.
> >  
> Done. I have also introduced third patch for casting microseconds to 
> timeval and replaced a few appropriate places in whole kernel code.
> patch-rusage-thread-03052010.txt is incremental for 
> patch-ruxagg_tlock-03052010.txt, which is by turn incremental for 
> patch-usec2timeval-03052010.txt.
> 
> I have made following change for calcru1():
> -       if ((int64_t)tu < 0) {
> -               /* XXX: this should be an assert /phk */
> -               printf("calcru: negative runtime of %jd usec for pid %d 
> (%s)\n",
> -                   (intmax_t)tu, p->p_pid, p->p_comm);
> -               tu = ruxp->rux_tu;
> -       }
> +       KASSERT((int64_t)tu < 0, ("calcru: negative runtime of %jd usec "
> +                               "for pid %d (%s)\n",
> +                               (intmax_t)tu, p->p_pid, p->p_comm));
> 
> tu can become negative at about 300 thousand years of uptime, so this 
> condition seems quite unrealistic and indeed should be replaced by an 
> assertion. However I didn't understand why it isn't done so far and the 
> comment only was added. Did I miss something?
> 
> >Should calctru() share the code with calcru1() ? I am esp. concerned
> >with sanity check like negative runtime etc. Possibly, this code
> >from calcru1() should be separated into function used from both
> >calcru1() and calctru().
> >
> >Man page for getrusage(2) should be updated.
> >  
> It's added to patch-rusage-thread-03052010.txt.
> 
> In the end - shoud I raise a new PR (the original one is kern/145813)?
> 
> >Thanks for the submission.

It was some time, I already committed ruxagg_tlock() part, that caused
some feedback, and I reworked the rest of the patch myself. See svn-src@
for some discussion, and the latest version that I intend to commit
shortly is below. Your comments are welcome.

Lets discuss third patch after this is landed.

diff --git a/lib/libc/sys/getrusage.2 b/lib/libc/sys/getrusage.2
index bdf5d45..423503f 100644
--- a/lib/libc/sys/getrusage.2
+++ b/lib/libc/sys/getrusage.2
@@ -28,7 +28,7 @@
 .\"     @(#)getrusage.2	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD$
 .\"
-.Dd June 4, 1993
+.Dd May 1, 2010
 .Dt GETRUSAGE 2
 .Os
 .Sh NAME
@@ -42,6 +42,7 @@
 .In sys/resource.h
 .Fd "#define	RUSAGE_SELF	 0"
 .Fd "#define	RUSAGE_CHILDREN	-1"
+.Fd "#define	RUSAGE_THREAD	1"
 .Ft int
 .Fn getrusage "int who" "struct rusage *rusage"
 .Sh DESCRIPTION
@@ -49,11 +50,12 @@ The
 .Fn getrusage
 system call
 returns information describing the resources utilized by the current
-process, or all its terminated child processes.
+thread, the current process, or all its terminated child processes.
 The
 .Fa who
 argument is either
-.Dv RUSAGE_SELF
+.Dv RUSAGE_THREAD ,
+.Dv RUSAGE_SELF ,
 or
 .Dv RUSAGE_CHILDREN .
 The buffer to which
@@ -175,6 +177,10 @@ The
 .Fn getrusage
 system call appeared in
 .Bx 4.2 .
+The
+.Dv RUSAGE_THREAD
+facility first appeared in
+.Fx 8.1 .
 .Sh BUGS
 There is no way to obtain information about a child process
 that has not yet terminated.
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 49a3097..6c50f1f 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -901,7 +901,7 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
 	kp->ki_pri.pri_user = td->td_user_pri;
 
 	if (preferthread) {
-		kp->ki_runtime = cputick2usec(td->td_runtime);
+		kp->ki_runtime = td->td_rux.rux_runtime;
 		kp->ki_pctcpu = sched_pctcpu(td);
 		kp->ki_estcpu = td->td_estcpu;
 	}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index a3ed75d..0bc78d0 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -76,7 +76,7 @@ static void	calcru1(struct proc *p, struct rusage_ext *ruxp,
 		    struct timeval *up, struct timeval *sp);
 static int	donice(struct thread *td, struct proc *chgp, int n);
 static struct uidinfo *uilookup(uid_t uid);
-static void	ruxagg_tlock(struct proc *p, struct thread *td);
+static void	ruxagg(struct proc *p, struct thread *td);
 
 /*
  * Resource controls and accounting.
@@ -630,7 +630,7 @@ lim_cb(void *arg)
 		return;
 	PROC_SLOCK(p);
 	FOREACH_THREAD_IN_PROC(p, td) {
-		ruxagg_tlock(p, td);
+		ruxagg(p, td);
 	}
 	PROC_SUNLOCK(p);
 	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
@@ -841,7 +841,7 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp)
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td->td_incruntime == 0)
 			continue;
-		ruxagg_tlock(p, td);
+		ruxagg(p, td);
 	}
 	calcru1(p, &p->p_rux, up, sp);
 }
@@ -961,6 +961,16 @@ kern_getrusage(struct thread *td, int who, struct rusage *rup)
 		calccru(p, &rup->ru_utime, &rup->ru_stime);
 		break;
 
+	case RUSAGE_THREAD:
+		PROC_SLOCK(p);
+		ruxagg(p, td);
+		PROC_SUNLOCK(p);
+		thread_lock(td);
+		*rup = td->td_ru;
+		calcru1(p, &td->td_rux, &rup->ru_utime, &rup->ru_stime);
+		thread_unlock(td);
+		break;
+
 	default:
 		error = EINVAL;
 	}
@@ -1001,7 +1011,7 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
  * Aggregate tick counts into the proc's rusage_ext.
  */
 void
-ruxagg(struct rusage_ext *rux, struct thread *td)
+ruxagg_locked(struct rusage_ext *rux, struct thread *td)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -1010,18 +1020,19 @@ ruxagg(struct rusage_ext *rux, struct thread *td)
 	rux->rux_uticks += td->td_uticks;
 	rux->rux_sticks += td->td_sticks;
 	rux->rux_iticks += td->td_iticks;
-	td->td_incruntime = 0;
-	td->td_uticks = 0;
-	td->td_iticks = 0;
-	td->td_sticks = 0;
 }
 
 static void
-ruxagg_tlock(struct proc *p, struct thread *td)
+ruxagg(struct proc *p, struct thread *td)
 {
 
 	thread_lock(td);
-	ruxagg(&p->p_rux, td);
+	ruxagg_locked(&p->p_rux, td);
+	ruxagg_locked(&td->td_rux, td);
+	td->td_incruntime = 0;
+	td->td_uticks = 0;
+	td->td_iticks = 0;
+	td->td_sticks = 0;
 	thread_unlock(td);
 }
 
@@ -1039,7 +1050,7 @@ rufetch(struct proc *p, struct rusage *ru)
 	*ru = p->p_ru;
 	if (p->p_numthreads > 0)  {
 		FOREACH_THREAD_IN_PROC(p, td) {
-			ruxagg_tlock(p, td);
+			ruxagg(p, td);
 			rucollect(ru, &td->td_ru);
 		}
 	}
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 9be4c2f..b32c584 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -432,7 +432,7 @@ thread_exit(void)
 	PROC_UNLOCK(p);
 	thread_lock(td);
 	/* Save our tick information with both the thread and proc locked */
-	ruxagg(&p->p_rux, td);
+	ruxagg_locked(&p->p_rux, td);
 	PROC_SUNLOCK(p);
 	td->td_state = TDS_INACTIVE;
 #ifdef WITNESS
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fb31cfc..e32e494 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -173,6 +173,27 @@ struct kdtrace_thread;
 struct cpuset;
 
 /*
+ * XXX: Does this belong in resource.h or resourcevar.h instead?
+ * Resource usage extension.  The times in rusage structs in the kernel are
+ * never up to date.  The actual times are kept as runtimes and tick counts
+ * (with control info in the "previous" times), and are converted when
+ * userland asks for rusage info.  Backwards compatibility prevents putting
+ * this directly in the user-visible rusage struct.
+ *
+ * Locking for p_rux: (cj) means (j) for p_rux and (c) for p_crux.
+ * Locking for td_rux: (t) for all fields.
+ */
+struct rusage_ext {
+	u_int64_t	rux_runtime;    /* (cj) Real time. */
+	u_int64_t	rux_uticks;     /* (cj) Statclock hits in user mode. */
+	u_int64_t	rux_sticks;     /* (cj) Statclock hits in sys mode. */
+	u_int64_t	rux_iticks;     /* (cj) Statclock hits in intr mode. */
+	u_int64_t	rux_uu;         /* (c) Previous user time in usec. */
+	u_int64_t	rux_su;         /* (c) Previous sys time in usec. */
+	u_int64_t	rux_tu;         /* (c) Previous total time in usec. */
+};
+
+/*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * Thread context.  Processes may have multiple threads.
@@ -219,7 +240,8 @@ struct thread {
 	u_int		td_estcpu;	/* (t) estimated cpu utilization */
 	int		td_slptick;	/* (t) Time at sleep. */
 	int		td_blktick;	/* (t) Time spent blocked. */
-	struct rusage	td_ru;		/* (t) rusage information */
+	struct rusage	td_ru;		/* (t) rusage information. */
+	struct rusage_ext td_rux;	/* (t) Internal rusage information. */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */
 	u_int 		td_pticks;	/* (t) Statclock hits for profiling */
@@ -426,26 +448,6 @@ do {									\
 #define	TD_SET_CAN_RUN(td)	(td)->td_state = TDS_CAN_RUN
 
 /*
- * XXX: Does this belong in resource.h or resourcevar.h instead?
- * Resource usage extension.  The times in rusage structs in the kernel are
- * never up to date.  The actual times are kept as runtimes and tick counts
- * (with control info in the "previous" times), and are converted when
- * userland asks for rusage info.  Backwards compatibility prevents putting
- * this directly in the user-visible rusage struct.
- *
- * Locking: (cj) means (j) for p_rux and (c) for p_crux.
- */
-struct rusage_ext {
-	u_int64_t	rux_runtime;    /* (cj) Real time. */
-	u_int64_t	rux_uticks;     /* (cj) Statclock hits in user mode. */
-	u_int64_t	rux_sticks;     /* (cj) Statclock hits in sys mode. */
-	u_int64_t	rux_iticks;     /* (cj) Statclock hits in intr mode. */
-	u_int64_t	rux_uu;         /* (c) Previous user time in usec. */
-	u_int64_t	rux_su;         /* (c) Previous sys time in usec. */
-	u_int64_t	rux_tu;         /* (c) Previous total time in usec. */
-};
-
-/*
  * Process structure.
  */
 struct proc {
diff --git a/sys/sys/resource.h b/sys/sys/resource.h
index 9af96af..e703744 100644
--- a/sys/sys/resource.h
+++ b/sys/sys/resource.h
@@ -56,6 +56,7 @@
 
 #define	RUSAGE_SELF	0
 #define	RUSAGE_CHILDREN	-1
+#define	RUSAGE_THREAD	1
 
 struct rusage {
 	struct timeval ru_utime;	/* user time used */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 21728aa..95a9b49 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -131,7 +131,7 @@ void	 rucollect(struct rusage *ru, struct rusage *ru2);
 void	 rufetch(struct proc *p, struct rusage *ru);
 void	 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
 	    struct timeval *sp);
-void	 ruxagg(struct rusage_ext *rux, struct thread *td);
+void	 ruxagg_locked(struct rusage_ext *rux, struct thread *td);
 int	 suswintr(void *base, int word);
 struct uidinfo
 	*uifind(uid_t uid);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-hackers/attachments/20100503/67f56d61/attachment.pgp


More information about the freebsd-hackers mailing list