ports/157176: Heartbeat crashes when clock_t (signed 32bit int) rolls over
Rudolf Polzer
rpolzer at mucke-novak.de
Thu May 19 11:00:27 UTC 2011
>Number: 157176
>Category: ports
>Synopsis: Heartbeat crashes when clock_t (signed 32bit int) rolls over
>Confidential: no
>Severity: critical
>Priority: medium
>Responsible: freebsd-ports-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Thu May 19 11:00:25 UTC 2011
>Closed-Date:
>Last-Modified:
>Originator: Rudolf Polzer
>Release: 7.4-RELEASE
>Organization:
Mucke-Novak
>Environment:
FreeBSD bamgmtbsd1-tst.ba.vftest.net 7.4-RELEASE FreeBSD 7.4-RELEASE #0: Fri Feb 18 01:55:22 UTC 2011 root at driscoll.cse.buffalo.edu:/usr/obj/usr/src/sys/GENERIC amd64
>Description:
On FreeBSD amd64, after about 196 days uptime, the clock_t data type rolls over
from 2147483647 (0x7FFFFFFF) to -2147483648 (0x80000000).
When this happens, cl_times()'s return value, which is unsigned long (64bit)
jumps from 0x000000007FFFFFFF to 0xFFFFFFFF80000000, wreaking all sorts of
havoc, e.g. total spam of messages like:
May 18 10:00:05 baweb01-tev heartbeat: [84855]: WARN: Gmain_timeout_dispatch:
Dispatch function for memory stats was delayed 144115187989455872 ms (> 5000
ms) before being called (GSource: 0x802531318)
May 18 10:00:05 baweb01-tev heartbeat: [84855]: WARN: Gmain_timeout_dispatch:
Dispatch function for memory stats was delayed 144115187989455879 ms (> 5000
ms) before being called (GSource: 0x802531318)
till heartbeat finally shuts down.
Furthermore, on this platform, ./configure does not detect that the rollover
workaround has to be used, as sizeof(long) is 8, which for some odd reason is
the condition for "clock_t is long enough".
Note: I placed the same bug report upstream on http://developerbugs.linux-foundation.org/show_bug.cgi?id=2596
>How-To-Repeat:
Also, for testing purposes, I provided the source code of a "fake uptime"
preload library to fake the return value of times(). That way, the problem can
be reproduced without having access to a FreeBSD amd64 system with an uptime of
196 days. Usage:
gcc -shared -o /tmp/fu.so fu.c -Os -fPIC -s -Wall -Wextra
env LD_PRELOAD=/tmp/fu.so FU_TIMES=0x7FFF0000 /usr/local/etc/rc.d/heartbeat
start
Note that the library may need changes if libc is not /lib/libc.so.7
Source code:
#include <sys/times.h>
#include <err.h>
#include <stdbool.h>
#include <stdlib.h>
#include <dlfcn.h>
typedef clock_t (times_t) (struct tms *buffer);
static times_t *orig_times = NULL;
static bool initialized = false;
static clock_t offset = 0;
static void init()
{
struct tms t;
if(initialized)
return;
initialized = true;
orig_times = (times_t *) dlfunc(RTLD_NEXT, "times");
if(!orig_times)
{
void *libc = dlopen("/lib/libc.so.7", RTLD_LAZY);
orig_times = (times_t *) dlfunc(libc, "times");
}
if(!orig_times)
errx(1, "cannot find times()");
const char *p = getenv("FU_TIMES");
if(p)
{
clock_t ret = times(&t);
offset = strtoll(p, NULL, 0) - ret;
}
else
offset = 0;
initialized = true;
}
clock_t times(struct tms *buffer)
{
init();
clock_t ret = orig_times(buffer);
return ret + offset;
}
>Fix:
I attached a patch doing the following changes:
- fix to ./configure to check size of clock_t, not long
- changed return type of cl_times() from unsigned long to clock_t
- make all callers able to cope with that change - time_longclock() is the only
caller that doesn't immediately convert cl_times()'s return value to a clock_t,
so only that one needed changes
- changed wraparound logic to no longer rely on unsigned long being "good
enough". Instead, the static variables are changed to longclock_t, and sign
extension is explicitly prevented by an AND with the maximum "unsigned" clock_t
value
It fixes the issue in our lab, and should do no harm on other platforms.
Patch attached with submission follows:
diff -ru STABLE-2.1.4.orig/include/clplumbing/longclock.h include/clplumbing/longclock.h
--- STABLE-2.1.4.orig/include/clplumbing/longclock.h 2011-05-12 16:19:17.000000000 +0200
+++ include/clplumbing/longclock.h 2011-05-17 11:02:47.000000000 +0200
@@ -37,7 +37,7 @@
*
* The functions provided here are:
*
- * unsigned long cl_times(void);
+ * clock_t cl_times(void);
* A rational wrapper for the times(2) call
* for those cases where only the return value
* is wanted.
@@ -79,7 +79,7 @@
*
* extern const longclock_t zero_longclock;
*/
-extern unsigned long cl_times(void);
+extern clock_t cl_times(void);
#ifdef CLOCK_T_IS_LONG_ENOUGH
# ifndef HAVE_LONGCLOCK_ARITHMETIC
diff -ru STABLE-2.1.4.orig/lib/clplumbing/longclock.c lib/clplumbing/longclock.c
--- STABLE-2.1.4.orig/lib/clplumbing/longclock.c 2011-05-12 16:19:17.000000000 +0200
+++ lib/clplumbing/longclock.c 2011-05-17 11:25:06.000000000 +0200
@@ -68,7 +68,7 @@
# define TIMES_PARAM &dummy_longclock_tms_struct
#endif
-unsigned long
+clock_t
cl_times(void) /* Make times(2) behave rationally on Linux */
{
clock_t ret;
@@ -108,7 +107,7 @@
}
errno = save_errno;
#endif /* DISABLE_TIMES_KLUDGE */
- return (unsigned long)ret;
+ return ret;
}
#ifdef CLOCK_T_IS_LONG_ENOUGH
@@ -124,8 +133,9 @@
#define BITSPERBYTE 8
#define WRAPSHIFT (BITSPERBYTE*sizeof(clock_t))
-#define MAXIMUMULONG ((unsigned long)~(0UL))
-#define MINJUMP ((MAXIMUMULONG/100UL)*99UL)
+#define WRAPAMOUNT (((longclock_t) 1) << WRAPSHIFT)
+#define MAXIMUMCLOCK (WRAPAMOUNT - 1)
+#define MINJUMP ((MAXIMUMCLOCK/100UL)*99UL)
longclock_t
time_longclock(void)
@@ -136,20 +146,19 @@
* because then this can recurse infinitely; that is why the
* cl_log call is where it is; found by Simon Graham. */
static gboolean calledbefore = FALSE;
- static unsigned long lasttimes = 0L;
- static unsigned long wrapcount = 0L;
+ static longclock_t lasttimes = 0L;
+ static longclock_t wrapcount = 0L;
static unsigned long callcount = 0L;
- static longclock_t lc_wrapcount = 0L;
- unsigned long timesval;
+ longclock_t timesval;
++callcount;
- timesval = (unsigned long) cl_times();
+ timesval = ((longclock_t) cl_times()) & MAXIMUMCLOCK; /* this AND prevents sign extension */
if (calledbefore && timesval < lasttimes) {
- clock_t jumpbackby = lasttimes - timesval;
+ longclock_t jumpbackby = lasttimes - timesval;
- if (jumpbackby < (clock_t)MINJUMP) {
+ if (jumpbackby < MINJUMP) {
/* Kernel weirdness */
cl_log(LOG_CRIT
, "%s: clock_t from times(2) appears to"
@@ -172,8 +181,7 @@
to double update of wrapcount! */
lasttimes = timesval;
- ++wrapcount;
- lc_wrapcount = ((longclock_t)wrapcount) << WRAPSHIFT;
+ wrapcount += WRAPAMOUNT;
cl_log(LOG_INFO
, "%s: clock_t wrapped around (uptime)."
@@ -184,7 +192,7 @@
lasttimes = timesval;
calledbefore = TRUE;
}
- return (lc_wrapcount | (longclock_t)timesval);
+ return (wrapcount | timesval);
}
#endif /* ! CLOCK_T_IS_LONG_ENOUGH */
--- STABLE-2.1.4.orig/configure.in 2011-05-17 13:49:34.000000000 +0200
+++ configure.in 2011-05-17 13:49:37.000000000 +0200
@@ -1726,6 +1726,7 @@
AC_CHECK_SIZEOF(int)
AC_CHECK_SIZEOF(long)
AC_CHECK_SIZEOF(long long)
+AC_CHECK_SIZEOF(clock_t, [], [#include <sys/times.h>])
AC_ARG_ENABLE([all],
[ --enable-all Activate ALL features
@@ -2387,7 +2388,7 @@
AC_CHECK_LIB(evs, evs_initialize , , [openais_intalled="no"])
AM_CONDITIONAL(BUILD_OPENAIS_MODULE, test "x${openais_installed}" = "xyes")
AC_MSG_CHECKING(if clock_t is long enough)
-if test $ac_cv_sizeof_long -ge 8; then
+if test $ac_cv_sizeof_clock_t -ge 8; then
AC_MSG_RESULT(yes)
AC_DEFINE(CLOCK_T_IS_LONG_ENOUGH, 1, [Set if CLOCK_T is adequate by itself for the "indefinite future" (>= 100 years)])
else
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-ports-bugs
mailing list