git: 2800e3a8ae1d - stable/13 - cxgbe: Compute timestamps via sbintime_t.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 11 Nov 2022 01:48:23 UTC
The branch stable/13 has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=2800e3a8ae1daead79759efb361729447c608634 commit 2800e3a8ae1daead79759efb361729447c608634 Author: John Baldwin <jhb@FreeBSD.org> AuthorDate: 2022-09-26 21:57:26 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2022-11-11 01:26:27 +0000 cxgbe: Compute timestamps via sbintime_t. This uses fixed-point math already used elsewhere in the kernel for sub-second time values. To avoid overflows this does require updating the calibration once a second rather than once every 30 seconds. Note that the cxgbe driver already queries multiple registers once a second for the statistics timers. This version also uses fewer instructions with no branches (for the math portion) in the per-packet fast path. Reviewed by: np Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D36663 (cherry picked from commit 2c74c9dac3a6e74a2c33d519cdcf1de145e7664c) --- sys/dev/cxgbe/adapter.h | 4 ++-- sys/dev/cxgbe/t4_main.c | 39 ++++++--------------------------------- sys/dev/cxgbe/t4_sge.c | 41 +++++------------------------------------ 3 files changed, 13 insertions(+), 71 deletions(-) diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 1fb0d9ff61c2..4d246512b868 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -865,8 +865,8 @@ struct clip_entry; struct clock_sync { uint64_t hw_cur; uint64_t hw_prev; - uint64_t rt_cur; - uint64_t rt_prev; + sbintime_t sbt_cur; + sbintime_t sbt_prev; uint32_t gen; }; diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 9ac790abfcf4..eff4db09aa12 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -319,18 +319,6 @@ static int t4_rsrv_noflowq = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq, 0, "Reserve TX queue 0 of each VI for non-flowid packets"); -static int t4_clocksync_fast = 1; -SYSCTL_INT(_hw_cxgbe, OID_AUTO, csfast, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_fast, 0, - "During initial clock sync how fast do we update in seconds"); - -static int t4_clocksync_normal = 30; -SYSCTL_INT(_hw_cxgbe, OID_AUTO, csnormal, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_normal, 0, - "During normal clock sync how fast do we update in seconds"); - -static int t4_fast_2_normal = 30; -SYSCTL_INT(_hw_cxgbe, OID_AUTO, cscount, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_fast_2_normal, 0, - "How many clock syncs do we need to do to transition to slow"); - #if defined(TCP_OFFLOAD) || defined(RATELIMIT) #define NOFLDTXQ 8 static int t4_nofldtxq = -NOFLDTXQ; @@ -1122,17 +1110,10 @@ t4_ifnet_unit(struct adapter *sc, struct port_info *pi) return (-1); } -static inline uint64_t -t4_get_ns_timestamp(struct timespec *ts) -{ - return ((ts->tv_sec * 1000000000) + ts->tv_nsec); -} - static void t4_calibration(void *arg) { struct adapter *sc; - struct timespec ts; struct clock_sync *cur, *nex; int next_up; @@ -1144,17 +1125,15 @@ t4_calibration(void *arg) if (__predict_false(sc->cal_count == 0)) { /* First time in, just get the values in */ cur->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO); - nanouptime(&ts); - cur->rt_cur = t4_get_ns_timestamp(&ts); + cur->sbt_cur = sbinuptime(); sc->cal_count++; goto done; } nex->hw_prev = cur->hw_cur; - nex->rt_prev = cur->rt_cur; - KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibtration")); + nex->sbt_prev = cur->sbt_cur; + KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibration")); nex->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO); - nanouptime(&ts); - nex->rt_cur = t4_get_ns_timestamp(&ts); + nex->sbt_cur = sbinuptime(); if ((nex->hw_cur - nex->hw_prev) == 0) { /* The clock is not advancing? */ sc->cal_count = 0; @@ -1165,17 +1144,11 @@ t4_calibration(void *arg) sc->cal_current = next_up; sc->cal_gen++; atomic_store_rel_int(&nex->gen, sc->cal_gen); - if (sc->cal_count < t4_fast_2_normal) - sc->cal_count++; done: - callout_reset_sbt_curcpu(&sc->cal_callout, - ((sc->cal_count < t4_fast_2_normal) ? - t4_clocksync_fast : t4_clocksync_normal) * SBT_1S, 0, - t4_calibration, sc, C_DIRECT_EXEC); + callout_reset_sbt_curcpu(&sc->cal_callout, SBT_1S, 0, t4_calibration, + sc, C_DIRECT_EXEC); } - - static void t4_calibration_start(struct adapter *sc) { diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 48d87be4e0b6..fc8a55111a4c 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1516,10 +1516,9 @@ static inline uint64_t t4_tstmp_to_ns(struct adapter *sc, uint64_t lf) { struct clock_sync *cur, dcur; - uint64_t tstmp_sec, tstmp_nsec; uint64_t hw_clocks; - uint64_t rt_cur_to_prev, res_s, res_n, res_s_modulo, res; - uint64_t hw_clk_div, cclk; + uint64_t hw_clk_div; + sbintime_t sbt_cur_to_prev, sbt; uint64_t hw_tstmp = lf & 0xfffffffffffffffULL; /* 60b, not 64b. */ uint32_t gen; @@ -1541,42 +1540,12 @@ t4_tstmp_to_ns(struct adapter *sc, uint64_t lf) * * With the constraints that we cannot use float and we * don't want to overflow the uint64_t numbers we are using. - * - * The plan is to take the clocking value of the hw timestamps - * and split them into seconds and nanosecond equivalent portions. - * Then we operate on the two portions seperately making sure to - * bring back the carry over from the seconds when we divide. - * - * First up lets get the two divided into separate entities - * i.e. the seconds. We use the clock frequency for this. - * Note that vpd.cclk is in khz, we need it in raw hz so - * convert to hz. */ - cclk = (uint64_t)sc->params.vpd.cclk * 1000; hw_clocks = hw_tstmp - dcur.hw_prev; - tstmp_sec = hw_clocks / cclk; - tstmp_nsec = hw_clocks % cclk; - /* Now work with them separately */ - rt_cur_to_prev = (dcur.rt_cur - dcur.rt_prev); - res_s = tstmp_sec * rt_cur_to_prev; - res_n = tstmp_nsec * rt_cur_to_prev; - /* Now lets get our divider */ + sbt_cur_to_prev = (dcur.sbt_cur - dcur.sbt_prev); hw_clk_div = dcur.hw_cur - dcur.hw_prev; - /* Make sure to save the remainder from the seconds divide */ - res_s_modulo = res_s % hw_clk_div; - res_s /= hw_clk_div; - /* scale the remainder to where it should be */ - res_s_modulo *= cclk; - /* Now add in the remainder */ - res_n += res_s_modulo; - /* Now do the divide */ - res_n /= hw_clk_div; - res_s *= cclk; - /* Recombine the two */ - res = res_s + res_n; - /* And now add in the base time to get to the real timestamp */ - res += dcur.rt_prev; - return (res); + sbt = hw_clocks * sbt_cur_to_prev / hw_clk_div + dcur.sbt_prev; + return (sbttons(sbt)); } static inline void