prefer tsc timecounter when it's good

Jung-uk Kim jkim at FreeBSD.org
Tue Apr 26 17:49:59 UTC 2011


On Friday 08 April 2011 01:43 am, Andriy Gapon wrote:
> on 07/04/2011 23:00 Jung-uk Kim said the following:
> > Although it looks okay, please don't commit it just yet.  I am
> > working in this area actively.  Also, if the Intel's claim is
> > true, i.e., TSCs reset to zero when APs start, we cannot use TSC
> > as a timecounter hardware until all APs are started properly.
>
> Great!  Thank you for the info and your work.

Can you please test attached patch?  You can get it from here, too:

http://people.freebsd.org/~jkim/tsc_smp_test.diff

Currently this patch samples 3,000 times to determine if any CPU has 
out-of-order TSC but it may be too much, especially for large SMP 
machines.  If it takes too long, I'll lower the number.  Please 
report if that's the case.

Please note this patch also changes HPET, ACPI-fast and TSC qualities.  
However, TSC on SMP does not change the default quality, i.e., HPET 
or ACPI timer will be chosen by default, because we cannot be sure if 
they'll drift later.  If the user is sure that they don't drift AND 
it is absolutely constant, kern.timecounter.smp_tsc tunable can be 
used to set better quality.

Thanks,

Jung-uk Kim
-------------- next part --------------
Index: sys/dev/acpica/acpi_hpet.c
===================================================================
--- sys/dev/acpica/acpi_hpet.c	(revision 221069)
+++ sys/dev/acpica/acpi_hpet.c	(working copy)
@@ -476,7 +476,7 @@ hpet_attach(device_t dev)
 		sc->tc.tc_get_timecount = hpet_get_timecount,
 		sc->tc.tc_counter_mask = ~0u,
 		sc->tc.tc_name = "HPET",
-		sc->tc.tc_quality = 900,
+		sc->tc.tc_quality = 950,
 		sc->tc.tc_frequency = sc->freq;
 		sc->tc.tc_priv = sc;
 		tc_init(&sc->tc);
Index: sys/dev/acpica/acpi_timer.c
===================================================================
--- sys/dev/acpica/acpi_timer.c	(revision 221069)
+++ sys/dev/acpica/acpi_timer.c	(working copy)
@@ -203,7 +203,7 @@ acpi_timer_probe(device_t dev)
     if (j == 10) {
 	acpi_timer_timecounter.tc_name = "ACPI-fast";
 	acpi_timer_timecounter.tc_get_timecount = acpi_timer_get_timecount;
-	acpi_timer_timecounter.tc_quality = 1000;
+	acpi_timer_timecounter.tc_quality = 900;
     } else {
 	acpi_timer_timecounter.tc_name = "ACPI-safe";
 	acpi_timer_timecounter.tc_get_timecount = acpi_timer_get_timecount_safe;
Index: sys/x86/x86/tsc.c
===================================================================
--- sys/x86/x86/tsc.c	(revision 221069)
+++ sys/x86/x86/tsc.c	(working copy)
@@ -242,7 +242,55 @@ init_TSC(void)
 	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
 }
 
-void
+#ifdef SMP
+
+#define	TSC_READ(x)			\
+static void				\
+tsc_read_##x(void *arg)			\
+{					\
+	__volatile uint32_t *tsc;	\
+					\
+	tsc = arg;			\
+	tsc += PCPU_GET(cpuid) * 3 + x;	\
+	*tsc = rdtsc32();		\
+}
+TSC_READ(0)
+TSC_READ(1)
+TSC_READ(2)
+#undef TSC_READ
+
+static int
+test_smp_tc(void)
+{
+	uint32_t tsc[MAXCPU * 3];
+	int32_t d[4];
+	u_int i, j, k, m, n;
+
+	if (!tsc_is_invariant)
+		return (-100);
+	for (i = 0; i < 1000; i++) {
+		smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
+		CPU_FOREACH(j) {
+			for (k = j + 1; k <= mp_maxid; k++)
+				if (!CPU_ABSENT(k)) {
+					m = j * 3;
+					n = k * 3;
+					d[0] = tsc[m + 1] - tsc[n];
+					d[1] = tsc[m + 2] - tsc[n + 1];
+					d[2] = tsc[n + 1] - tsc[m];
+					d[3] = tsc[n + 2] - tsc[m + 1];
+					for (m = 0; m < 4; m++)
+						if (d[m] <= 0)
+							return (-100);
+				}
+		}
+	}
+	return (tsc_timecounter.tc_quality);
+}
+
+#endif /* SMP */
+
+static void
 init_TSC_tc(void)
 {
 
@@ -268,21 +316,23 @@ init_TSC_tc(void)
 #ifdef SMP
 	/*
 	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
-	 * are somehow synchronized.  Some hardware configurations do
-	 * this, but we have no way of determining whether this is the
-	 * case, so we do not use the TSC in multi-processor systems
-	 * unless the user indicated (by setting kern.timecounter.smp_tsc
-	 * to 1) that he believes that his TSCs are synchronized.
+	 * are synchronized.  If the user is sure that the system has
+	 * synchronized TSCs, the above test can be bypassed by setting
+	 * kern.timecounter.smp_tsc tunable to a non-zero value.
 	 */
-	if (mp_ncpus > 1 && !smp_tsc)
-		tsc_timecounter.tc_quality = -100;
+	if (smp_cpus > 1 && !smp_tsc)
+		tsc_timecounter.tc_quality = test_smp_tc();
+	else
 #endif
+	if (tsc_is_invariant)
+		tsc_timecounter.tc_quality = 1000;
 
 	if (tsc_freq != 0) {
 		tsc_timecounter.tc_frequency = tsc_freq;
 		tc_init(&tsc_timecounter);
 	}
 }
+SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
 
 /*
  * When cpufreq levels change, find out about the (new) max frequency.  We
Index: sys/x86/isa/clock.c
===================================================================
--- sys/x86/isa/clock.c	(revision 221069)
+++ sys/x86/isa/clock.c	(working copy)
@@ -498,7 +498,6 @@ void
 cpu_initclocks(void)
 {
 
-	init_TSC_tc();
 	cpu_initclocks_bsp();
 }
 
Index: sys/i386/include/clock.h
===================================================================
--- sys/i386/include/clock.h	(revision 221069)
+++ sys/i386/include/clock.h	(working copy)
@@ -30,7 +30,6 @@ void	i8254_init(void);
 void	startrtclock(void);
 void	timer_restore(void);
 void	init_TSC(void);
-void	init_TSC_tc(void);
 
 #define	HAS_TIMER_SPKR 1
 int	timer_spkr_acquire(void);
Index: sys/amd64/include/clock.h
===================================================================
--- sys/amd64/include/clock.h	(revision 221069)
+++ sys/amd64/include/clock.h	(working copy)
@@ -29,7 +29,6 @@ void	i8254_init(void);
 
 void	startrtclock(void);
 void	init_TSC(void);
-void	init_TSC_tc(void);
 
 #define	HAS_TIMER_SPKR 1
 int	timer_spkr_acquire(void);


More information about the freebsd-current mailing list