git: 2c74c9dac3a6 - main - cxgbe: Compute timestamps via sbintime_t.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 26 Sep 2022 22:10:41 UTC
The branch main has been updated by jhb:
URL: https://cgit.FreeBSD.org/src/commit/?id=2c74c9dac3a6e74a2c33d519cdcf1de145e7664c
commit 2c74c9dac3a6e74a2c33d519cdcf1de145e7664c
Author: John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2022-09-26 21:57:26 +0000
Commit: John Baldwin <jhb@FreeBSD.org>
CommitDate: 2022-09-26 21:58:30 +0000
cxgbe: Compute timestamps via sbintime_t.
This uses fixed-point math already used elsewhere in the kernel for
sub-second time values. To avoid overflows this does require updating
the calibration once a second rather than once every 30 seconds. Note
that the cxgbe driver already queries multiple registers once a second
for the statistics timers. This version also uses fewer instructions
with no branches (for the math portion) in the per-packet fast path.
Reviewed by: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D36663
---
sys/dev/cxgbe/adapter.h | 4 ++--
sys/dev/cxgbe/t4_main.c | 39 ++++++---------------------------------
sys/dev/cxgbe/t4_sge.c | 41 +++++------------------------------------
3 files changed, 13 insertions(+), 71 deletions(-)
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 4080f04246c2..9312549cf7ba 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -869,8 +869,8 @@ struct clip_entry;
struct clock_sync {
uint64_t hw_cur;
uint64_t hw_prev;
- uint64_t rt_cur;
- uint64_t rt_prev;
+ sbintime_t sbt_cur;
+ sbintime_t sbt_prev;
uint32_t gen;
};
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 6547d199ca02..9f982ca32097 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -314,18 +314,6 @@ static int t4_rsrv_noflowq = 0;
SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
0, "Reserve TX queue 0 of each VI for non-flowid packets");
-static int t4_clocksync_fast = 1;
-SYSCTL_INT(_hw_cxgbe, OID_AUTO, csfast, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_fast, 0,
- "During initial clock sync how fast do we update in seconds");
-
-static int t4_clocksync_normal = 30;
-SYSCTL_INT(_hw_cxgbe, OID_AUTO, csnormal, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_normal, 0,
- "During normal clock sync how fast do we update in seconds");
-
-static int t4_fast_2_normal = 30;
-SYSCTL_INT(_hw_cxgbe, OID_AUTO, cscount, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_fast_2_normal, 0,
- "How many clock syncs do we need to do to transition to slow");
-
#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
#define NOFLDTXQ 8
static int t4_nofldtxq = -NOFLDTXQ;
@@ -1121,17 +1109,10 @@ t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
return (-1);
}
-static inline uint64_t
-t4_get_ns_timestamp(struct timespec *ts)
-{
- return ((ts->tv_sec * 1000000000) + ts->tv_nsec);
-}
-
static void
t4_calibration(void *arg)
{
struct adapter *sc;
- struct timespec ts;
struct clock_sync *cur, *nex;
int next_up;
@@ -1143,17 +1124,15 @@ t4_calibration(void *arg)
if (__predict_false(sc->cal_count == 0)) {
/* First time in, just get the values in */
cur->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO);
- nanouptime(&ts);
- cur->rt_cur = t4_get_ns_timestamp(&ts);
+ cur->sbt_cur = sbinuptime();
sc->cal_count++;
goto done;
}
nex->hw_prev = cur->hw_cur;
- nex->rt_prev = cur->rt_cur;
- KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibtration"));
+ nex->sbt_prev = cur->sbt_cur;
+ KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibration"));
nex->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO);
- nanouptime(&ts);
- nex->rt_cur = t4_get_ns_timestamp(&ts);
+ nex->sbt_cur = sbinuptime();
if ((nex->hw_cur - nex->hw_prev) == 0) {
/* The clock is not advancing? */
sc->cal_count = 0;
@@ -1164,17 +1143,11 @@ t4_calibration(void *arg)
sc->cal_current = next_up;
sc->cal_gen++;
atomic_store_rel_int(&nex->gen, sc->cal_gen);
- if (sc->cal_count < t4_fast_2_normal)
- sc->cal_count++;
done:
- callout_reset_sbt_curcpu(&sc->cal_callout,
- ((sc->cal_count < t4_fast_2_normal) ?
- t4_clocksync_fast : t4_clocksync_normal) * SBT_1S, 0,
- t4_calibration, sc, C_DIRECT_EXEC);
+ callout_reset_sbt_curcpu(&sc->cal_callout, SBT_1S, 0, t4_calibration,
+ sc, C_DIRECT_EXEC);
}
-
-
static void
t4_calibration_start(struct adapter *sc)
{
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index e0b73ccd8b51..161a753cc4ee 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -1526,10 +1526,9 @@ static inline uint64_t
t4_tstmp_to_ns(struct adapter *sc, uint64_t lf)
{
struct clock_sync *cur, dcur;
- uint64_t tstmp_sec, tstmp_nsec;
uint64_t hw_clocks;
- uint64_t rt_cur_to_prev, res_s, res_n, res_s_modulo, res;
- uint64_t hw_clk_div, cclk;
+ uint64_t hw_clk_div;
+ sbintime_t sbt_cur_to_prev, sbt;
uint64_t hw_tstmp = lf & 0xfffffffffffffffULL; /* 60b, not 64b. */
uint32_t gen;
@@ -1551,42 +1550,12 @@ t4_tstmp_to_ns(struct adapter *sc, uint64_t lf)
*
* With the constraints that we cannot use float and we
* don't want to overflow the uint64_t numbers we are using.
- *
- * The plan is to take the clocking value of the hw timestamps
- * and split them into seconds and nanosecond equivalent portions.
- * Then we operate on the two portions seperately making sure to
- * bring back the carry over from the seconds when we divide.
- *
- * First up lets get the two divided into separate entities
- * i.e. the seconds. We use the clock frequency for this.
- * Note that vpd.cclk is in khz, we need it in raw hz so
- * convert to hz.
*/
- cclk = (uint64_t)sc->params.vpd.cclk * 1000;
hw_clocks = hw_tstmp - dcur.hw_prev;
- tstmp_sec = hw_clocks / cclk;
- tstmp_nsec = hw_clocks % cclk;
- /* Now work with them separately */
- rt_cur_to_prev = (dcur.rt_cur - dcur.rt_prev);
- res_s = tstmp_sec * rt_cur_to_prev;
- res_n = tstmp_nsec * rt_cur_to_prev;
- /* Now lets get our divider */
+ sbt_cur_to_prev = (dcur.sbt_cur - dcur.sbt_prev);
hw_clk_div = dcur.hw_cur - dcur.hw_prev;
- /* Make sure to save the remainder from the seconds divide */
- res_s_modulo = res_s % hw_clk_div;
- res_s /= hw_clk_div;
- /* scale the remainder to where it should be */
- res_s_modulo *= cclk;
- /* Now add in the remainder */
- res_n += res_s_modulo;
- /* Now do the divide */
- res_n /= hw_clk_div;
- res_s *= cclk;
- /* Recombine the two */
- res = res_s + res_n;
- /* And now add in the base time to get to the real timestamp */
- res += dcur.rt_prev;
- return (res);
+ sbt = hw_clocks * sbt_cur_to_prev / hw_clk_div + dcur.sbt_prev;
+ return (sbttons(sbt));
}
static inline void