calcru: negative time ... followed by freeze
Bruce Evans
bde at zeta.org.au
Mon Jun 21 13:38:12 GMT 2004
On Mon, 21 Jun 2004, Bruce Evans wrote:
> On Mon, 21 Jun 2004, Don Lewis wrote:
> > It looks like the bug is in the exit code. I tweaked the printf() in
> > calcru() to print out p_state, p_flag, and p_sflag in addition to the
> > other info. In all cases, the processes that trigger the printf were
> > zombies, and show up as [running] in ttyinfo() on a uniprocessor box.
>
> There is a PR about this (#52490). The oops in my test fixes it for bogus
> reasons. I'm currently adding similar printfs to help figure out what is
> going wrong.
>
> > Jun 21 03:17:03 hairball kernel: calcru: negative time of 179 usec for
> > pid 4543 (sh) p_state=0x2 p_flag=0x2002 p_sflag=0x1
> >
> > load: 0.71 cmd: sh 4543 [running] 0.00u 0.00s 3% 0k
> >
> > p_runtime only gets updated in mi_switch(), and it appears that it never
> > gets updated after the calcru() call in exit1().
>
> That explains the problem. The calcru() values sets up to date (final)
> values for the components of ptu. When we look at the process after it
> has become a zombie, we use only p_runtime since the process is not
> running, but p_runtime is stale.
I happened to have fixed this already without really noticing. (My kernel
doesn't call calcru() in exit1() or in wait1(); it accumulates p_runtime
and tick counts (instead of those values converted to timevals by calcru())
in the child stats, so it has to get the final p_runtime right.)
%%%
Index: kern_exit.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.236
diff -u -2 -r1.236 kern_exit.c
--- kern_exit.c 18 Jun 2004 11:13:49 -0000 1.236
+++ kern_exit.c 21 Jun 2004 13:04:31 -0000
@@ -104,4 +104,5 @@
exit1(struct thread *td, int rv)
{
+ struct bintime new_switchtime;
struct proc *p, *nq, *q;
struct tty *tp;
@@ -518,8 +519,14 @@
mtx_lock_spin(&sched_lock);
critical_exit();
- cnt.v_swtch++;
- binuptime(PCPU_PTR(switchtime));
+
+ /* Do the same timestamp bookkeeping that mi_switch() would do. */
+ binuptime(&new_switchtime);
+ bintime_add(&p->p_runtime, &new_switchtime);
+ bintime_sub(&p->p_runtime, PCPU_PTR(switchtime));
+ PCPU_SET(switchtime, new_switchtime);
PCPU_SET(switchticks, ticks);
+ cnt.v_swtch++;
+
/*
* Allow the scheduler to adjust the priority of the
%%%
I will commit this soon.
Workaround for the main problem:
%%%
Index: kern_resource.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_resource.c,v
retrieving revision 1.139
diff -u -2 -r1.139 kern_resource.c
--- kern_resource.c 16 Jun 2004 00:26:29 -0000 1.139
+++ kern_resource.c 21 Jun 2004 12:55:24 -0000
@@ -702,8 +702,10 @@
struct timeval *ip;
{
- struct bintime bt;
+ struct bintime bt, rt;
struct timeval tv;
+ struct thread *td;
/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
+ int problemcase;
mtx_assert(&sched_lock, MA_OWNED);
@@ -719,22 +721,40 @@
tt = 1;
}
- if (p == curthread->td_proc) {
+ rt = p->p_runtime;
+ problemcase = 0;
+ FOREACH_THREAD_IN_PROC(p, td) {
/*
* Adjust for the current time slice. This is actually fairly
* important since the error here is on the order of a time
* quantum, which is much greater than the sampling error.
- * XXXKSE use a different test due to threads on other
- * processors also being 'current'.
*/
- binuptime(&bt);
- bintime_sub(&bt, PCPU_PTR(switchtime));
- bintime_add(&bt, &p->p_runtime);
- } else
- bt = p->p_runtime;
- bintime2timeval(&bt, &tv);
+ if (td == curthread) {
+ binuptime(&bt);
+ bintime_sub(&bt, PCPU_PTR(switchtime));
+ bintime_add(&rt, &bt);
+ } else if (TD_IS_RUNNING(td)) {
+ /*
+ * This case should add the current time less the
+ * switch time as above, but the switch time is
+ * inaccessible. So we might end up with rt too
+ * small and then the monotonicity check might detect
+ * the problem. Just set a flag to avoid warning
+ * about this known problem.
+ */
+ problemcase = 1;
+ }
+ }
+ bintime2timeval(&rt, &tv);
tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
ptu = p->p_uu + p->p_su + p->p_iu;
- if (tu < ptu || (int64_t)tu < 0) {
- printf("calcru: negative time of %jd usec for pid %d (%s)\n",
+ if (tu < ptu) {
+ if (!problemcase)
+ printf(
+"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
+ (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
+ tu = ptu;
+ }
+ if ((int64_t)tu < 0) {
+ printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
(intmax_t)tu, p->p_pid, p->p_comm);
tu = ptu;
%%%
Bruce
More information about the freebsd-threads
mailing list