svn commit: r195710 - in head/sys/cddl/dev/dtrace: amd64 i386

Wed Jul 15 17:07:40 UTC 2009

Author: avg
Date: Wed Jul 15 17:07:39 2009
New Revision: 195710
URL: http://svn.freebsd.org/changeset/base/195710

Log:
  dtrace_gethrtime: improve scaling of TSC ticks to nanoseconds
  
  Currently dtrace_gethrtime uses formula similar to the following for
  converting TSC ticks to nanoseconds:
  rdtsc() * 10^9 / tsc_freq
  The dividend overflows 64-bit type and wraps-around every 2^64/10^9 =
  18446744073 ticks which is just a few seconds on modern machines.
  
  Now we instead use precalculated scaling factor of
  10^9*2^N/tsc_freq < 2^32 and perform TSC value multiplication separately
  for each 32-bit half.  This allows to avoid overflow of the dividend
  described above.
  The idea is taken from OpenSolaris.
  This has an added feature of always scaling TSC with invariant value
  regardless of TSC frequency changes. Thus the timestamps will not be
  accurate if TSC actually changes, but they are always proportional to
  TSC ticks and thus monotonic. This should be much better than current
  formula which produces wildly different non-monotonic results on when
  tsc_freq changes.
  
  Also drop write-only 'cp' variable from amd64 dtrace_gethrtime_init()
  to make it identical to the i386 twin.
  
  PR:		kern/127441
  Tested by:	Thomas Backman <serenity at exscape.org>
  Reviewed by:	jhb
  Discussed with:	current@, bde, gnn
  Silence from:	jb
  Approved by:	re (gnn)
  MFC after:	1 week

Modified:
  head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
  head/sys/cddl/dev/dtrace/i386/dtrace_subr.c

Modified: head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
==============================================================================

--- head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c	Wed Jul 15 13:50:06 2009	(r195709)
+++ head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c	Wed Jul 15 17:07:39 2009	(r195710)
@@ -366,6 +366,10 @@ dtrace_safe_defer_signal(void)
 static int64_t	tgt_cpu_tsc;
 static int64_t	hst_cpu_tsc;
 static int64_t	tsc_skew[MAXCPU];
+static uint64_t	nsec_scale;
+
+/* See below for the explanation of this macro. */
+#define SCALE_SHIFT	28
 
 static void
 dtrace_gethrtime_init_sync(void *arg)
@@ -401,9 +405,36 @@ dtrace_gethrtime_init_cpu(void *arg)
 static void
 dtrace_gethrtime_init(void *arg)
 {
+	uint64_t tsc_f;
 	cpumask_t map;
 	int i;
-	struct pcpu *cp;
+
+	/*
+	 * Get TSC frequency known at this moment.
+	 * This should be constant if TSC is invariant.
+	 * Otherwise tick->time conversion will be inaccurate, but
+	 * will preserve monotonic property of TSC.
+	 */
+	tsc_f = tsc_freq;
+
+	/*
+	 * The following line checks that nsec_scale calculated below
+	 * doesn't overflow 32-bit unsigned integer, so that it can multiply
+	 * another 32-bit integer without overflowing 64-bit.
+	 * Thus minimum supported TSC frequency is 62.5MHz.
+	 */
+	KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low"));
+
+	/*
+	 * We scale up NANOSEC/tsc_f ratio to preserve as much precision
+	 * as possible.
+	 * 2^28 factor was chosen quite arbitrarily from practical
+	 * considerations:
+	 * - it supports TSC frequencies as low as 62.5MHz (see above);
+	 * - it provides quite good precision (e < 0.01%) up to THz
+	 *   (terahertz) values;
+	 */
+	nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;
 
 	/* The current CPU is the reference one. */
 	tsc_skew[curcpu] = 0;
@@ -412,7 +443,7 @@ dtrace_gethrtime_init(void *arg)
 		if (i == curcpu)
 			continue;
 
-		if ((cp = pcpu_find(i)) == NULL)
+		if (pcpu_find(i) == NULL)
 			continue;
 
 		map = 0;
@@ -439,7 +470,21 @@ SYSINIT(dtrace_gethrtime_init, SI_SUB_SM
 uint64_t
 dtrace_gethrtime()
 {
-	return ((rdtsc() + tsc_skew[curcpu]) * (int64_t) 1000000000 / tsc_freq);
+	uint64_t tsc;
+	uint32_t lo;
+	uint32_t hi;
+
+	/*
+	 * We split TSC value into lower and higher 32-bit halves and separately
+	 * scale them with nsec_scale, then we scale them down by 2^28
+	 * (see nsec_scale calculations) taking into account 32-bit shift of
+	 * the higher half and finally add.
+	 */
+	tsc = rdtsc() + tsc_skew[curcpu];
+	lo = tsc;
+	hi = tsc >> 32;
+	return (((lo * nsec_scale) >> SCALE_SHIFT) +
+	    ((hi * nsec_scale) << (32 - SCALE_SHIFT)));
 }
 
 uint64_t

Modified: head/sys/cddl/dev/dtrace/i386/dtrace_subr.c
==============================================================================
--- head/sys/cddl/dev/dtrace/i386/dtrace_subr.c	Wed Jul 15 13:50:06 2009	(r195709)
+++ head/sys/cddl/dev/dtrace/i386/dtrace_subr.c	Wed Jul 15 17:07:39 2009	(r195710)
@@ -366,6 +366,10 @@ dtrace_safe_defer_signal(void)
 static int64_t	tgt_cpu_tsc;
 static int64_t	hst_cpu_tsc;
 static int64_t	tsc_skew[MAXCPU];
+static uint64_t	nsec_scale;
+
+/* See below for the explanation of this macro. */
+#define SCALE_SHIFT	28
 
 static void
 dtrace_gethrtime_init_sync(void *arg)
@@ -401,9 +405,37 @@ dtrace_gethrtime_init_cpu(void *arg)
 static void
 dtrace_gethrtime_init(void *arg)
 {
+	uint64_t tsc_f;
 	cpumask_t map;
 	int i;
 
+	/*
+	 * Get TSC frequency known at this moment.
+	 * This should be constant if TSC is invariant.
+	 * Otherwise tick->time conversion will be inaccurate, but
+	 * will preserve monotonic property of TSC.
+	 */
+	tsc_f = tsc_freq;
+
+	/*
+	 * The following line checks that nsec_scale calculated below
+	 * doesn't overflow 32-bit unsigned integer, so that it can multiply
+	 * another 32-bit integer without overflowing 64-bit.
+	 * Thus minimum supported TSC frequency is 62.5MHz.
+	 */
+	KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low"));
+
+	/*
+	 * We scale up NANOSEC/tsc_f ratio to preserve as much precision
+	 * as possible.
+	 * 2^28 factor was chosen quite arbitrarily from practical
+	 * considerations:
+	 * - it supports TSC frequencies as low as 62.5MHz (see above);
+	 * - it provides quite good precision (e < 0.01%) up to THz
+	 *   (terahertz) values;
+	 */
+	nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;
+
 	/* The current CPU is the reference one. */
 	tsc_skew[curcpu] = 0;
 
@@ -438,7 +470,21 @@ SYSINIT(dtrace_gethrtime_init, SI_SUB_SM
 uint64_t
 dtrace_gethrtime()
 {
-	return ((rdtsc() + tsc_skew[curcpu]) * (int64_t) 1000000000 / tsc_freq);
+	uint64_t tsc;
+	uint32_t lo;
+	uint32_t hi;
+
+	/*
+	 * We split TSC value into lower and higher 32-bit halves and separately
+	 * scale them with nsec_scale, then we scale them down by 2^28
+	 * (see nsec_scale calculations) taking into account 32-bit shift of
+	 * the higher half and finally add.
+	 */
+	tsc = rdtsc() + tsc_skew[curcpu];
+	lo = tsc;
+	hi = tsc >> 32;
+	return (((lo * nsec_scale) >> SCALE_SHIFT) +
+	    ((hi * nsec_scale) << (32 - SCALE_SHIFT)));
 }
 
 uint64_t