git: a540cdca3183 - main - tcp_hpts: use queue(9) STAILQ for the input queue

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Mon, 17 Apr 2023 16:09:29 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=a540cdca3183da08b2a865d6af30794a79c4a8c2

commit a540cdca3183da08b2a865d6af30794a79c4a8c2
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2023-04-17 16:07:23 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2023-04-17 16:07:23 +0000

    tcp_hpts: use queue(9) STAILQ for the input queue
    
    Reviewed by:            rrs
    Differential Revision:  https://reviews.freebsd.org/D39574
---
 sys/netinet/tcp_hpts.c                   |  8 +++---
 sys/netinet/tcp_lro.c                    | 22 +++++++---------
 sys/netinet/tcp_stacks/bbr.c             |  2 +-
 sys/netinet/tcp_stacks/rack.c            |  2 +-
 sys/netinet/tcp_stacks/rack_bbr_common.c |  6 ++---
 sys/netinet/tcp_subr.c                   | 43 +++++++++++---------------------
 sys/netinet/tcp_var.h                    |  3 +--
 7 files changed, 33 insertions(+), 53 deletions(-)

diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 644811b44a19..e61547104775 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -92,9 +92,8 @@ __FBSDID("$FreeBSD$");
  *
  * There is a common functions within the rack_bbr_common code
  * version i.e. ctf_do_queued_segments(). This function
- * knows how to take the input queue of packets from
- * tp->t_in_pkts and process them digging out
- * all the arguments, calling any bpf tap and
+ * knows how to take the input queue of packets from tp->t_inqueue
+ * and process them digging out all the arguments, calling any bpf tap and
  * calling into tfb_do_segment_nounlock(). The common
  * function (ctf_do_queued_segments())  requires that
  * you have defined the tfb_do_segment_nounlock() as
@@ -1331,7 +1330,8 @@ again:
 				kern_prefetch(tp->t_fb_ptr, &did_prefetch);
 				did_prefetch = 1;
 			}
-			if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) && tp->t_in_pkt) {
+			if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) &&
+			    !STAILQ_EMPTY(&tp->t_inqueue)) {
 				error = (*tp->t_fb->tfb_do_queued_segments)(tp, 0);
 				if (error) {
 					/* The input killed the connection */
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 3ce49171a65c..7cbf535a9263 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1179,18 +1179,14 @@ again:
 
 #ifdef TCPHPTS
 static void
-tcp_queue_pkts(struct inpcb *inp, struct tcpcb *tp, struct lro_entry *le)
+tcp_queue_pkts(struct tcpcb *tp, struct lro_entry *le)
 {
-	INP_WLOCK_ASSERT(inp);
-	if (tp->t_in_pkt == NULL) {
-		/* Nothing yet there */
-		tp->t_in_pkt = le->m_head;
-		tp->t_tail_pkt = le->m_last_mbuf;
-	} else {
-		/* Already some there */
-		tp->t_tail_pkt->m_nextpkt = le->m_head;
-		tp->t_tail_pkt = le->m_last_mbuf;
-	}
+
+	INP_WLOCK_ASSERT(tptoinpcb(tp));
+
+	STAILQ_HEAD(, mbuf) q = { le->m_head,
+	    &STAILQ_NEXT(le->m_last_mbuf, m_stailqpkt) };
+	STAILQ_CONCAT(&tp->t_inqueue, &q);
 	le->m_head = NULL;
 	le->m_last_mbuf = NULL;
 }
@@ -1221,7 +1217,7 @@ tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le,
 
 	/* Look at the last mbuf if any in queue */
  	if (can_append_old_cmp) {
-		m = tp->t_tail_pkt;
+		m = STAILQ_LAST(&tp->t_inqueue, mbuf, m_stailqpkt);
 		if (m != NULL && (m->m_flags & M_ACKCMP) != 0) {
 			if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) {
 				tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0);
@@ -1451,7 +1447,7 @@ tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
 	if (le->m_head != NULL) {
 		counter_u64_add(tcp_inp_lro_direct_queue, 1);
 		tcp_lro_log(tp, lc, le, NULL, 22, 1, inp->inp_flags2, 0, 1);
-		tcp_queue_pkts(inp, tp, le);
+		tcp_queue_pkts(tp, le);
 	}
 	if (should_wake) {
 		/* Wakeup */
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index bce17b57205c..fab85e88a382 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -11600,7 +11600,7 @@ bbr_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	int retval;
 
 	/* First lets see if we have old packets */
-	if (tp->t_in_pkt) {
+	if (!STAILQ_EMPTY(&tp->t_inqueue)) {
 		if (ctf_do_queued_segments(tp, 1)) {
 			m_freem(m);
 			return;
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 7b97a8e9c5d9..c134c059ec89 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -17069,7 +17069,7 @@ rack_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	struct timeval tv;
 
 	/* First lets see if we have old packets */
-	if (tp->t_in_pkt) {
+	if (!STAILQ_EMPTY(&tp->t_inqueue)) {
 		if (ctf_do_queued_segments(tp, 1)) {
 			m_freem(m);
 			return;
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index 7f5f8817466a..91bf32159004 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -493,10 +493,8 @@ ctf_do_queued_segments(struct tcpcb *tp, int have_pkt)
 	struct mbuf *m;
 
 	/* First lets see if we have old packets */
-	if (tp->t_in_pkt) {
-		m = tp->t_in_pkt;
-		tp->t_in_pkt = NULL;
-		tp->t_tail_pkt = NULL;
+	if ((m = STAILQ_FIRST(&tp->t_inqueue)) != NULL) {
+		STAILQ_INIT(&tp->t_inqueue);
 		if (ctf_process_inbound_raw(tp, m, have_pkt)) {
 			/* We lost the tcpcb (maybe a RST came in)? */
 			return(1);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 3a6a902c35be..cc879999fe26 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2262,6 +2262,7 @@ tcp_newtcpcb(struct inpcb *inp)
 #endif
 
 	TAILQ_INIT(&tp->t_segq);
+	STAILQ_INIT(&tp->t_inqueue);
 	tp->t_maxseg =
 #ifdef INET6
 		isipv6 ? V_tcp_v6mssdflt :
@@ -2437,8 +2438,10 @@ tcp_discardcb(struct tcpcb *tp)
 		}
 	}
 	TCPSTATES_DEC(tp->t_state);
+
 	if (tp->t_fb->tfb_tcp_fb_fini)
 		(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+	MPASS(STAILQ_EMPTY(&tp->t_inqueue));
 
 	/*
 	 * If we got enough samples through the srtt filter,
@@ -4242,7 +4245,8 @@ tcp_handle_orphaned_packets(struct tcpcb *tp)
 
 	if (tptoinpcb(tp)->inp_flags2 & INP_MBUF_L_ACKS)
 		return;
-	if ((tptoinpcb(tp)->inp_flags2 & INP_SUPPORTS_MBUFQ) == 0) {
+	if ((tptoinpcb(tp)->inp_flags2 & INP_SUPPORTS_MBUFQ) == 0 &&
+	    !STAILQ_EMPTY(&tp->t_inqueue)) {
 		/*
 		 * It is unsafe to process the packets since a
 		 * reset may be lurking in them (its rare but it
@@ -4253,44 +4257,27 @@ tcp_handle_orphaned_packets(struct tcpcb *tp)
 		 * This new stack does not do any fancy LRO features
 		 * so all we can do is toss the packets.
 		 */
-		m = tp->t_in_pkt;
-		tp->t_in_pkt = NULL;
-		tp->t_tail_pkt = NULL;
-		while (m) {
-			save = m->m_nextpkt;
-			m->m_nextpkt = NULL;
+		m = STAILQ_FIRST(&tp->t_inqueue);
+		STAILQ_INIT(&tp->t_inqueue);
+		STAILQ_FOREACH_FROM_SAFE(m, &tp->t_inqueue, m_stailqpkt, save)
 			m_freem(m);
-			m = save;
-		}
 	} else {
 		/*
 		 * Here we have a stack that does mbuf queuing but
 		 * does not support compressed ack's. We must
 		 * walk all the mbufs and discard any compressed acks.
 		 */
-		m = tp->t_in_pkt;
-		prev = NULL;
-		while (m) {
+		STAILQ_FOREACH_SAFE(m, &tp->t_inqueue, m_stailqpkt, save) {
 			if (m->m_flags & M_ACKCMP) {
-				/* We must toss this packet */
-				if (tp->t_tail_pkt == m)
-					tp->t_tail_pkt = prev;
-				if (prev)
-					prev->m_nextpkt = m->m_nextpkt;
+				if (m == STAILQ_FIRST(&tp->t_inqueue))
+					STAILQ_REMOVE_HEAD(&tp->t_inqueue,
+					    m_stailqpkt);
 				else
-					tp->t_in_pkt =  m->m_nextpkt;
-				m->m_nextpkt = NULL;
+					STAILQ_REMOVE_AFTER(&tp->t_inqueue,
+					    prev, m_stailqpkt);
 				m_freem(m);
-				/* move forward */
-				if (prev)
-					m = prev->m_nextpkt;
-				else
-					m = tp->t_in_pkt;
-			} else {
-				/* this one is ok */
+			} else
 				prev = m;
-				m = m->m_nextpkt;
-			}
 		}
 	}
 }
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 9e58c4c3576b..5632e5d8bb41 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -355,8 +355,7 @@ struct tcpcb {
 	int	t_segqlen;		/* segment reassembly queue length */
 	uint32_t t_segqmbuflen;		/* total reassembly queue byte length */
 	struct	tsegqe_head t_segq;	/* segment reassembly queue */
-	struct mbuf *t_in_pkt;
-	struct mbuf *t_tail_pkt;
+	STAILQ_HEAD(, mbuf) t_inqueue;	/* HPTS input queue */
 	uint32_t snd_ssthresh;		/* snd_cwnd size threshold for
 					 * for slow start exponential to
 					 * linear switch