git: 9e4d9e4c4d79 - main - tcp: Preparation for allowing hardware TLS to be able to kick a tcp connection that is retransmitting too much out of hardware and back to software.

Randall Stewart rrs at FreeBSD.org
Fri Jun 25 13:32:28 UTC 2021


The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=9e4d9e4c4d79db58740a05d67645351e640aa32c

commit 9e4d9e4c4d79db58740a05d67645351e640aa32c
Author:     Randall Stewart <rrs at FreeBSD.org>
AuthorDate: 2021-06-25 13:30:54 +0000
Commit:     Randall Stewart <rrs at FreeBSD.org>
CommitDate: 2021-06-25 13:30:54 +0000

    tcp: Preparation for allowing hardware TLS to be able to kick a tcp connection that is retransmitting too much out of hardware and back to software.
    
    Hardware TLS is now supported in some interface cards and it works well. Except that
    when we have connections that retransmit a lot we get into trouble with all the retransmits.
    This prep step makes way for change that Drew will be making so that we can "kick out" a
    session from hardware TLS.
    
    Reviewed by: mtuexen, gallatin
    Sponsored by: Netflix Inc
    Differential Revision: https://reviews.freebsd.org/D30895
---
 sys/netinet/tcp_output.c          |  2 +-
 sys/netinet/tcp_stacks/bbr.c      |  2 +-
 sys/netinet/tcp_stacks/rack.c     | 68 +++++++++++++++++++++++++++++++--------
 sys/netinet/tcp_stacks/tcp_rack.h |  6 ++--
 sys/netinet/tcp_var.h             |  6 ++--
 5 files changed, 63 insertions(+), 21 deletions(-)

diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 8d78e639636b..d0b56072e9af 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1560,7 +1560,7 @@ send:
 
 out:
 	if (error == 0)
-		tcp_account_for_send(tp, len, (tp->snd_nxt != tp->snd_max), 0);
+		tcp_account_for_send(tp, len, (tp->snd_nxt != tp->snd_max), 0, hw_tls);
 	/*
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 57abbbf694b6..f8cd0bec1c95 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -13750,7 +13750,7 @@ out:
 	 * retransmit.  In persist state, just set snd_max.
 	 */
 	if (error == 0) {
-		tcp_account_for_send(tp, len, (rsm != NULL), doing_tlp);
+		tcp_account_for_send(tp, len, (rsm != NULL), doing_tlp, hw_tls);
 		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 		    (tp->t_flags & TF_SACK_PERMIT) &&
 		    tp->rcv_numsacks > 0)
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 521fd6c625f5..6678ca92738b 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 #include "opt_ratelimit.h"
+#include "opt_kern_tls.h"
 #include <sys/param.h>
 #include <sys/arb.h>
 #include <sys/module.h>
@@ -483,7 +484,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
 static void
 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
     uint32_t seq_out, uint8_t th_flags, int32_t err, uint64_t ts,
-    struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff);
+    struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls);
 
 static void
 rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
@@ -6061,7 +6062,8 @@ rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm,
 	/* Push bit must go to the right edge as well */
 	if (rsm->r_flags & RACK_HAD_PUSH)
 		rsm->r_flags &= ~RACK_HAD_PUSH;
-
+	/* Clone over the state of the hw_tls flag */
+	nrsm->r_hw_tls = rsm->r_hw_tls;
 	/*
 	 * Now we need to find nrsm's new location in the mbuf chain
 	 * we basically calculate a new offset, which is soff +
@@ -7191,7 +7193,7 @@ rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
 static void
 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
 		uint32_t seq_out, uint8_t th_flags, int32_t err, uint64_t cts,
-		struct rack_sendmap *hintrsm, uint16_t add_flag, struct mbuf *s_mb, uint32_t s_moff)
+		struct rack_sendmap *hintrsm, uint16_t add_flag, struct mbuf *s_mb, uint32_t s_moff, int hw_tls)
 {
 	struct tcp_rack *rack;
 	struct rack_sendmap *rsm, *nrsm, *insret, fe;
@@ -7295,6 +7297,8 @@ again:
 		} else {
 			rsm->r_flags = add_flag;
 		}
+		if (hw_tls)
+			rsm->r_hw_tls = 1;
 		rsm->r_tim_lastsent[0] = cts;
 		rsm->r_rtr_cnt = 1;
 		rsm->r_rtr_bytes = 0;
@@ -14875,7 +14879,7 @@ rack_log_fsb(struct tcp_rack *rack, struct tcpcb *tp, struct socket *so, uint32_
 static struct mbuf *
 rack_fo_base_copym(struct mbuf *the_m, uint32_t the_off, int32_t *plen,
 		   struct rack_fast_send_blk *fsb,
-		   int32_t seglimit, int32_t segsize)
+		   int32_t seglimit, int32_t segsize, int hw_tls)
 {
 #ifdef KERN_TLS
 	struct ktls_session *tls, *ntls;
@@ -15059,7 +15063,7 @@ rack_fo_m_copym(struct tcp_rack *rack, int32_t *plen,
 	*s_mb = rack->r_ctl.fsb.m;
 	n = rack_fo_base_copym(m, soff, plen,
 			       &rack->r_ctl.fsb,
-			       seglimit, segsize);
+			       seglimit, segsize, rack->r_ctl.fsb.hw_tls);
 	return (n);
 }
 
@@ -15243,7 +15247,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 		/* Fix up the orig_m_len and possibly the mbuf offset */
 		rack_adjust_orig_mlen(rsm);
 	}
-	m->m_next = rack_fo_base_copym(rsm->m, rsm->soff, &len, NULL, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize);
+	m->m_next = rack_fo_base_copym(rsm->m, rsm->soff, &len, NULL, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, rsm->r_hw_tls);
 	if (len <= segsiz) {
 		/*
 		 * Must have ran out of mbufs for the copy
@@ -15405,13 +15409,13 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 		goto failed;
 	}
 	rack_log_output(tp, &to, len, rsm->r_start, flags, error, rack_to_usec_ts(tv),
-			rsm, RACK_SENT_FP, rsm->m, rsm->soff);
+			rsm, RACK_SENT_FP, rsm->m, rsm->soff, rsm->r_hw_tls);
 	if (doing_tlp && (rack->fast_rsm_hack == 0)) {
 		rack->rc_tlp_in_progress = 1;
 		rack->r_ctl.rc_tlp_cnt_out++;
 	}
 	if (error == 0)
-		tcp_account_for_send(tp, len, 1, doing_tlp);
+		tcp_account_for_send(tp, len, 1, doing_tlp, rsm->r_hw_tls);
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	rack->forced_ack = 0;	/* If we send something zap the FA flag */
 	if (IN_FASTRECOVERY(tp->t_flags) && rsm)
@@ -15688,7 +15692,8 @@ again:
 		goto failed;
 
 	/* s_mb and s_soff are saved for rack_log_output */
-	m->m_next = rack_fo_m_copym(rack, &len, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, &s_mb, &s_soff);
+	m->m_next = rack_fo_m_copym(rack, &len, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize,
+				    &s_mb, &s_soff);
 	if (len <= segsiz) {
 		/*
 		 * Must have ran out of mbufs for the copy
@@ -15883,7 +15888,7 @@ again:
 		goto failed;
 	}
 	rack_log_output(tp, &to, len, tp->snd_max, flags, error, rack_to_usec_ts(tv),
-			NULL, add_flag, s_mb, s_soff);
+			NULL, add_flag, s_mb, s_soff, rack->r_ctl.fsb.hw_tls);
 	m = NULL;
 	if (tp->snd_una == tp->snd_max) {
 		rack->r_ctl.rc_tlp_rxt_last_time = cts;
@@ -15891,7 +15896,7 @@ again:
 		tp->t_acktime = ticks;
 	}
 	if (error == 0)
-		tcp_account_for_send(tp, len, 0, 0);
+		tcp_account_for_send(tp, len, 0, 0, rack->r_ctl.fsb.hw_tls);
 
 	rack->forced_ack = 0;	/* If we send something zap the FA flag */
 	tot_len += len;
@@ -17030,6 +17035,10 @@ just_return_nolock:
 				rack->r_ctl.fsb.o_m_len = rack->r_ctl.fsb.m->m_len;
 				rack->r_ctl.fsb.tcp_flags = flags;
 				rack->r_ctl.fsb.left_to_send = orig_len - len;
+				if (hw_tls)
+					rack->r_ctl.fsb.hw_tls = 1;
+				else
+					rack->r_ctl.fsb.hw_tls = 0;
 				KASSERT((rack->r_ctl.fsb.left_to_send <= (sbavail(sb) - (tp->snd_max - tp->snd_una))),
 					("rack:%p left_to_send:%u sbavail:%u out:%u",
 					rack, rack->r_ctl.fsb.left_to_send, sbavail(sb),
@@ -18118,7 +18127,7 @@ out:
 	 * retransmit.  In persist state, just set snd_max.
 	 */
 	if (error == 0) {
-		tcp_account_for_send(tp, len, (rsm != NULL), doing_tlp);
+		tcp_account_for_send(tp, len, (rsm != NULL), doing_tlp, hw_tls);
 		rack->forced_ack = 0;	/* If we send something zap the FA flag */
 		if (rsm && (doing_tlp == 0)) {
 			/* Set we retransmitted */
@@ -18166,7 +18175,7 @@ out:
 	}
 	rack_log_output(tp, &to, len, rack_seq, (uint8_t) flags, error,
 			rack_to_usec_ts(&tv),
-			rsm, add_flag, s_mb, s_moff);
+			rsm, add_flag, s_mb, s_moff, hw_tls);
 
 
 	if ((error == 0) &&
@@ -18489,6 +18498,10 @@ enobufs:
 			rack->r_ctl.fsb.o_m_len = rack->r_ctl.fsb.m->m_len;
 			rack->r_ctl.fsb.tcp_flags = flags;
 			rack->r_ctl.fsb.left_to_send = orig_len - len;
+			if (hw_tls)
+				rack->r_ctl.fsb.hw_tls = 1;
+			else
+				rack->r_ctl.fsb.hw_tls = 0;
 			KASSERT((rack->r_ctl.fsb.left_to_send <= (sbavail(sb) - (tp->snd_max - tp->snd_una))),
 				("rack:%p left_to_send:%u sbavail:%u out:%u",
 				 rack, rack->r_ctl.fsb.left_to_send, sbavail(sb),
@@ -18535,6 +18548,10 @@ enobufs:
 			rack->r_ctl.fsb.o_m_len = rack->r_ctl.fsb.m->m_len;
 			rack->r_ctl.fsb.tcp_flags = flags;
 			rack->r_ctl.fsb.left_to_send = orig_len - len;
+			if (hw_tls)
+				rack->r_ctl.fsb.hw_tls = 1;
+			else
+				rack->r_ctl.fsb.hw_tls = 0;
 			KASSERT((rack->r_ctl.fsb.left_to_send <= (sbavail(sb) - (tp->snd_max - tp->snd_una))),
 				("rack:%p left_to_send:%u sbavail:%u out:%u",
 				 rack, rack->r_ctl.fsb.left_to_send, sbavail(sb),
@@ -19458,6 +19475,29 @@ rack_apply_deferred_options(struct tcp_rack *rack)
 	}
 }
 
+static void
+rack_hw_tls_change(struct tcpcb *tp, int chg)
+{
+	/*
+	 * HW tls state has changed.. fix all
+	 * rsm's in flight.
+	 */
+	struct tcp_rack *rack;
+	struct rack_sendmap *rsm;
+
+	rack = (struct tcp_rack *)tp->t_fb_ptr;
+	RB_FOREACH(rsm, rack_rb_tree_head, &rack->r_ctl.rc_mtree) {
+		if (chg)
+			rsm->r_hw_tls = 1;
+		else
+			rsm->r_hw_tls = 0;
+	}
+	if (chg)
+		rack->r_ctl.fsb.hw_tls = 1;
+	else
+		rack->r_ctl.fsb.hw_tls = 0;
+}
+
 static int
 rack_pru_options(struct tcpcb *tp, int flags)
 {
@@ -19483,7 +19523,7 @@ static struct tcp_function_block __tcp_rack = {
 	.tfb_tcp_handoff_ok = rack_handoff_ok,
 	.tfb_tcp_mtu_chg = rack_mtu_change,
 	.tfb_pru_options = rack_pru_options,
-
+	.tfb_hwtls_change = rack_hw_tls_change,
 };
 
 /*
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 349f6daec2f4..54582d4fd9c5 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -68,7 +68,8 @@ struct rack_sendmap {
 	uint8_t r_just_ret : 1, /* After sending, the next pkt was just returned, i.e. limited  */
 		r_one_out_nr : 1,	/* Special case 1 outstanding and not in recovery */
 		r_no_rtt_allowed : 1, /* No rtt measurement allowed */
-		r_avail : 5;
+		r_hw_tls : 1,
+		r_avail : 4;
 	uint64_t r_tim_lastsent[RACK_NUM_OF_RETRANS];
 	uint64_t r_ack_arrival;	/* This is the time of ack-arrival (if SACK'd) */
 	RB_ENTRY(rack_sendmap) r_next;		/* RB Tree next */
@@ -330,7 +331,8 @@ struct rack_fast_send_blk {
 	struct mbuf *m;
 	uint32_t o_m_len;
 	uint32_t rfo_apply_push : 1,
-		unused : 31;
+		hw_tls : 1,
+		unused : 30;
 };
 
 struct rack_control {
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 553abaf6b334..dd30f89896d2 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -367,6 +367,7 @@ struct tcp_function_block {
 	int	(*tfb_tcp_handoff_ok)(struct tcpcb *);
 	void	(*tfb_tcp_mtu_chg)(struct tcpcb *);
 	int	(*tfb_pru_options)(struct tcpcb *, int);
+	void	(*tfb_hwtls_change)(struct tcpcb *, int);
 	volatile uint32_t tfb_refcnt;
 	uint32_t  tfb_flags;
 	uint8_t	tfb_id;
@@ -1137,7 +1138,8 @@ tcp_fields_to_net(struct tcphdr *th)
 }
 
 static inline void
-tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt, uint8_t is_tlp)
+tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt,
+    uint8_t is_tlp, int hw_tls __unused)
 {
 	if (is_tlp) {
 		tp->t_sndtlppack++;
@@ -1148,9 +1150,7 @@ tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt, uint8_t is_
 		tp->t_snd_rxt_bytes += len;
 	else
 		tp->t_sndbytes += len;
-
 }
-
 #endif /* _KERNEL */
 
 #endif /* _NETINET_TCP_VAR_H_ */


More information about the dev-commits-src-all mailing list