git: ce33f96fcf2f - main - mlx5e: Ensure rx timestamps are monotonically increasing
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 16 Apr 2026 16:30:50 UTC
The branch main has been updated by gallatin:
URL: https://cgit.FreeBSD.org/src/commit/?id=ce33f96fcf2f2d0d49c406274bcc64df72fe530e
commit ce33f96fcf2f2d0d49c406274bcc64df72fe530e
Author: Andrew Gallatin <gallatin@FreeBSD.org>
AuthorDate: 2026-04-16 16:26:07 +0000
Commit: Andrew Gallatin <gallatin@FreeBSD.org>
CommitDate: 2026-04-16 16:27:27 +0000
mlx5e: Ensure rx timestamps are monotonically increasing
The clock calibration routine currently can result in rx timestamps
jumping backwards, which can confuse the TCP stack.
Ensure they are monotonically increasing by estimating what
we'd calculate as the next timestamp and clamp the calibration
so new timestamps are no earlier in time.
Reviewed by: kib, nickbanks_netflix.com
Tested by: nickbanks_netflix.com
Differential Revision: https://reviews.freebsd.org/D56427
Sponsored by: Netflix
---
sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 50 +++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index fb8b79c8f787..9bcb0dcf8e16 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1134,6 +1134,25 @@ mlx5e_hw_clock(struct mlx5e_priv *priv)
return (((uint64_t)hw_h << 32) | hw_l);
}
+/*
+ * Seed the first calibration point so that base_prev and clbr_hw_prev
+ * are always valid. Called once during attach before the first
+ * calibration callout fires.
+ */
+static void
+mlx5e_seed_calibration(struct mlx5e_priv *priv)
+{
+ struct mlx5e_clbr_point *cp;
+ struct timespec ts;
+
+ cp = &priv->clbr_points[0];
+ cp->clbr_hw_curr = mlx5e_hw_clock(priv);
+ nanouptime(&ts);
+ cp->base_curr = mlx5e_timespec2usec(&ts);
+ cp->clbr_hw_prev = cp->clbr_hw_curr - 1;
+ cp->base_prev = cp->base_curr - 1;
+}
+
/*
* The calibration callout, it runs either in the context of the
* thread which enables calibration, or in callout. It takes the
@@ -1147,6 +1166,9 @@ mlx5e_calibration_callout(void *arg)
struct mlx5e_priv *priv;
struct mlx5e_clbr_point *next, *curr;
struct timespec ts;
+ uint64_t hw_delta_new, hw_delta_old;
+ uint64_t old_nsec, old_projected, old_sec;
+ uint64_t res_n, res_s, res_s_mod, rt_delta_old;
int clbr_curr_next;
priv = arg;
@@ -1175,6 +1197,33 @@ mlx5e_calibration_callout(void *arg)
nanouptime(&ts);
next->base_curr = mlx5e_timespec2usec(&ts);
+ /*
+ * Ensure monotonicity across calibration transitions. Compute
+ * what the old calibration would extrapolate to at the new
+ * hw_curr. If the new base_curr is less, clamp it so the new
+ * slope is at least as steep as the old one. This prevents
+ * packets from seeing time go backwards when the slope drops.
+ *
+ * Use the same split-seconds technique as mlx5e_mbuf_tstmp()
+ * to avoid overflowing uint64_t in the multiplication.
+ */
+ hw_delta_new = next->clbr_hw_curr - curr->clbr_hw_curr;
+ rt_delta_old = curr->base_curr - curr->base_prev;
+ hw_delta_old = curr->clbr_hw_curr - curr->clbr_hw_prev;
+ old_sec = hw_delta_new / priv->cclk;
+ old_nsec = hw_delta_new % priv->cclk;
+ res_s = old_sec * rt_delta_old;
+ res_n = old_nsec * rt_delta_old;
+ res_s_mod = res_s % hw_delta_old;
+ res_s /= hw_delta_old;
+ res_s_mod *= priv->cclk;
+ res_n += res_s_mod;
+ res_n /= hw_delta_old;
+ res_s *= priv->cclk;
+ old_projected = curr->base_curr + res_s + res_n;
+ if (next->base_curr < old_projected)
+ next->base_curr = old_projected;
+
curr->clbr_gen = 0;
atomic_thread_fence_rel();
priv->clbr_curr = clbr_curr_next;
@@ -4887,6 +4936,7 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
callout_init(&priv->tstmp_clbr, 1);
/* Pull out the frequency of the clock in hz */
priv->cclk = (uint64_t)MLX5_CAP_GEN(mdev, device_frequency_khz) * 1000ULL;
+ mlx5e_seed_calibration(priv);
mlx5e_reset_calibration_callout(priv);
pa.pa_version = PFIL_VERSION;