git: 2800e3a8ae1d - stable/13 - cxgbe: Compute timestamps via sbintime_t.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Fri, 11 Nov 2022 01:48:23 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=2800e3a8ae1daead79759efb361729447c608634

commit 2800e3a8ae1daead79759efb361729447c608634
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2022-09-26 21:57:26 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2022-11-11 01:26:27 +0000

    cxgbe: Compute timestamps via sbintime_t.
    
    This uses fixed-point math already used elsewhere in the kernel for
    sub-second time values.  To avoid overflows this does require updating
    the calibration once a second rather than once every 30 seconds.  Note
    that the cxgbe driver already queries multiple registers once a second
    for the statistics timers.  This version also uses fewer instructions
    with no branches (for the math portion) in the per-packet fast path.
    
    Reviewed by:    np
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D36663
    
    (cherry picked from commit 2c74c9dac3a6e74a2c33d519cdcf1de145e7664c)
---
 sys/dev/cxgbe/adapter.h |  4 ++--
 sys/dev/cxgbe/t4_main.c | 39 ++++++---------------------------------
 sys/dev/cxgbe/t4_sge.c  | 41 +++++------------------------------------
 3 files changed, 13 insertions(+), 71 deletions(-)

diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 1fb0d9ff61c2..4d246512b868 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -865,8 +865,8 @@ struct clip_entry;
 struct clock_sync {
 	uint64_t hw_cur;
 	uint64_t hw_prev;
-	uint64_t rt_cur;
-	uint64_t rt_prev;
+	sbintime_t sbt_cur;
+	sbintime_t sbt_prev;
 	uint32_t gen;
 };
 
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 9ac790abfcf4..eff4db09aa12 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -319,18 +319,6 @@ static int t4_rsrv_noflowq = 0;
 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
     0, "Reserve TX queue 0 of each VI for non-flowid packets");
 
-static int t4_clocksync_fast = 1;
-SYSCTL_INT(_hw_cxgbe, OID_AUTO, csfast, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_fast, 0,
-    "During initial clock sync how fast do we update in seconds");
-
-static int t4_clocksync_normal = 30;
-SYSCTL_INT(_hw_cxgbe, OID_AUTO, csnormal, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_normal, 0,
-    "During normal clock sync how fast do we update in seconds");
-
-static int t4_fast_2_normal = 30;
-SYSCTL_INT(_hw_cxgbe, OID_AUTO, cscount, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_fast_2_normal, 0,
-    "How many clock syncs do we need to do to transition to slow");
-
 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
 #define NOFLDTXQ 8
 static int t4_nofldtxq = -NOFLDTXQ;
@@ -1122,17 +1110,10 @@ t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
 	return (-1);
 }
 
-static inline uint64_t
-t4_get_ns_timestamp(struct timespec *ts)
-{
-	return ((ts->tv_sec * 1000000000) + ts->tv_nsec);
-}
-
 static void
 t4_calibration(void *arg)
 {
 	struct adapter *sc;
-	struct timespec ts;
 	struct clock_sync *cur, *nex;
 	int next_up;
 
@@ -1144,17 +1125,15 @@ t4_calibration(void *arg)
 	if (__predict_false(sc->cal_count == 0)) {
 		/* First time in, just get the values in */
 		cur->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO);
-		nanouptime(&ts);
-		cur->rt_cur = t4_get_ns_timestamp(&ts);
+		cur->sbt_cur = sbinuptime();
 		sc->cal_count++;
 		goto done;
 	}
 	nex->hw_prev = cur->hw_cur;
-	nex->rt_prev = cur->rt_cur;
-	KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibtration"));
+	nex->sbt_prev = cur->sbt_cur;
+	KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibration"));
 	nex->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO);
-	nanouptime(&ts);	
-	nex->rt_cur = t4_get_ns_timestamp(&ts);
+	nex->sbt_cur = sbinuptime();
 	if ((nex->hw_cur - nex->hw_prev) == 0) {
 		/* The clock is not advancing? */
 		sc->cal_count = 0;
@@ -1165,17 +1144,11 @@ t4_calibration(void *arg)
 	sc->cal_current = next_up;
 	sc->cal_gen++;
 	atomic_store_rel_int(&nex->gen, sc->cal_gen);
-	if (sc->cal_count < t4_fast_2_normal)
-		sc->cal_count++;
 done:
-	callout_reset_sbt_curcpu(&sc->cal_callout,
-				 ((sc->cal_count < t4_fast_2_normal)  ?
-				 t4_clocksync_fast : t4_clocksync_normal) * SBT_1S, 0,
-				 t4_calibration, sc, C_DIRECT_EXEC);
+	callout_reset_sbt_curcpu(&sc->cal_callout, SBT_1S, 0, t4_calibration,
+	    sc, C_DIRECT_EXEC);
 }
 
-
-
 static void
 t4_calibration_start(struct adapter *sc)
 {
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 48d87be4e0b6..fc8a55111a4c 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -1516,10 +1516,9 @@ static inline uint64_t
 t4_tstmp_to_ns(struct adapter *sc, uint64_t lf)
 {
 	struct clock_sync *cur, dcur;
-	uint64_t tstmp_sec, tstmp_nsec;
 	uint64_t hw_clocks;
-	uint64_t rt_cur_to_prev, res_s, res_n, res_s_modulo, res;
-	uint64_t hw_clk_div, cclk;
+	uint64_t hw_clk_div;
+	sbintime_t sbt_cur_to_prev, sbt;
 	uint64_t hw_tstmp = lf & 0xfffffffffffffffULL;	/* 60b, not 64b. */
 	uint32_t gen;
 
@@ -1541,42 +1540,12 @@ t4_tstmp_to_ns(struct adapter *sc, uint64_t lf)
 	 *
 	 * With the constraints that we cannot use float and we
 	 * don't want to overflow the uint64_t numbers we are using.
-	 *
-	 * The plan is to take the clocking value of the hw timestamps
-	 * and split them into seconds and nanosecond equivalent portions.
-	 * Then we operate on the two portions seperately making sure to
-	 * bring back the carry over from the seconds when we divide.
-	 *
-	 * First up lets get the two divided into separate entities
-	 * i.e. the seconds. We use the clock frequency for this.
-	 * Note that vpd.cclk is in khz, we need it in raw hz so
-	 * convert to hz.
 	 */
-	cclk = (uint64_t)sc->params.vpd.cclk * 1000;
 	hw_clocks = hw_tstmp - dcur.hw_prev;
-	tstmp_sec = hw_clocks / cclk;
-	tstmp_nsec = hw_clocks % cclk;
-	/* Now work with them separately */
-	rt_cur_to_prev = (dcur.rt_cur - dcur.rt_prev);
-	res_s = tstmp_sec * rt_cur_to_prev;
-	res_n = tstmp_nsec * rt_cur_to_prev;
-	/* Now lets get our divider */
+	sbt_cur_to_prev = (dcur.sbt_cur - dcur.sbt_prev);
 	hw_clk_div = dcur.hw_cur - dcur.hw_prev;
-	/* Make sure to save the remainder from the seconds divide */
-	res_s_modulo = res_s % hw_clk_div;
-	res_s /= hw_clk_div;
-	/* scale the remainder to where it should be */
-	res_s_modulo *= cclk;
-	/* Now add in the remainder */
-	res_n += res_s_modulo;
-	/* Now do the divide */
-	res_n /= hw_clk_div;
-	res_s *= cclk;
-	/* Recombine the two */
-	res = res_s + res_n;
-	/* And now add in the base time to get to the real timestamp */
-	res += dcur.rt_prev;
-	return (res);
+	sbt = hw_clocks * sbt_cur_to_prev / hw_clk_div + dcur.sbt_prev;
+	return (sbttons(sbt));
 }
 
 static inline void