git: ac56d68b1b71 - stable/14 - tcp: simplify endpoint creation at the passive side

From: Michael Tuexen <tuexen_at_FreeBSD.org>
Date: Sat, 03 Aug 2024 23:07:33 UTC
The branch stable/14 has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=ac56d68b1b71031370e94577adc5fbc12a0353d2

commit ac56d68b1b71031370e94577adc5fbc12a0353d2
Author:     Michael Tuexen <tuexen@FreeBSD.org>
AuthorDate: 2024-07-21 09:49:37 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2024-08-03 23:06:50 +0000

    tcp: simplify endpoint creation at the passive side
    
    Use the intended TCP stack when creating a TCP endpoint instead of
    creating it the endpoint the default stack first and after that
    switching it to use the intended TCP stack.
    
    Reviewed by:            Peter Lei, rrs and jtl (older version)
    Sponsored by:           Netflix, Inc.
    Differential Revision:  https://reviews.freebsd.org/D45411
    
    (cherry picked from commit baee801c92d26f4b7d1b2a9edd755c1b07b84167)
---
 sys/netinet/tcp_subr.c     | 19 +++++++++++++++++--
 sys/netinet/tcp_syncache.c | 39 ++++++---------------------------------
 sys/netinet/tcp_usrreq.c   |  2 +-
 sys/netinet/tcp_var.h      |  2 +-
 4 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 6feb1916bb35..5d87d0327148 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2221,9 +2221,11 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
  * Create a new TCP control block, making an empty reassembly queue and hooking
  * it to the argument protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up by tcpcbstor declaration.
+ * The caller can provide a pointer to a tcpcb of the listener to inherit the
+ * TCP function block from the listener.
  */
 struct tcpcb *
-tcp_newtcpcb(struct inpcb *inp)
+tcp_newtcpcb(struct inpcb *inp, struct tcpcb *listening_tcb)
 {
 	struct tcpcb *tp = intotcpcb(inp);
 #ifdef INET6
@@ -2241,8 +2243,21 @@ tcp_newtcpcb(struct inpcb *inp)
 	tp->t_ccv.type = IPPROTO_TCP;
 	tp->t_ccv.ccvc.tcp = tp;
 	rw_rlock(&tcp_function_lock);
-	tp->t_fb = V_tcp_func_set_ptr;
+	if (listening_tcb != NULL) {
+		INP_LOCK_ASSERT(tptoinpcb(listening_tcb));
+		KASSERT(listening_tcb->t_fb != NULL,
+		    ("tcp_newtcpcb: listening_tcb->t_fb is NULL"));
+		if (listening_tcb->t_fb->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+			rw_runlock(&tcp_function_lock);
+			return (NULL);
+		}
+		tp->t_fb = listening_tcb->t_fb;
+	} else {
+		tp->t_fb = V_tcp_func_set_ptr;
+	}
 	refcount_acquire(&tp->t_fb->tfb_refcnt);
+	KASSERT((tp->t_fb->tfb_flags & TCP_FUNC_BEING_REMOVED) == 0,
+	    ("tcp_newtcpcb: using TFB being removed"));
 	rw_runlock(&tcp_function_lock);
 	/*
 	 * Use the current system default CC algorithm.
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 21dfe8dd123e..052e257a3cb5 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -777,7 +777,7 @@ done:
 static struct socket *
 syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 {
-	struct tcp_function_block *blk;
+	struct tcpcb *listening_tcb;
 	struct inpcb *inp = NULL;
 	struct socket *so;
 	struct tcpcb *tp;
@@ -802,7 +802,11 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 		goto allocfail;
 	}
 	inp = sotoinpcb(so);
-	if ((tp = tcp_newtcpcb(inp)) == NULL) {
+	if (V_functions_inherit_listen_socket_stack)
+		listening_tcb = sototcpcb(lso);
+	else
+		listening_tcb = NULL;
+	if ((tp = tcp_newtcpcb(inp, listening_tcb)) == NULL) {
 		in_pcbfree(inp);
 		sodealloc(so);
 		goto allocfail;
@@ -912,37 +916,6 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 	tp->t_port = sc->sc_port;
 	tcp_rcvseqinit(tp);
 	tcp_sendseqinit(tp);
-	blk = sototcpcb(lso)->t_fb;
-	if (V_functions_inherit_listen_socket_stack && blk != tp->t_fb) {
-		/*
-		 * Our parents t_fb was not the default,
-		 * we need to release our ref on tp->t_fb and
-		 * pickup one on the new entry.
-		 */
-		struct tcp_function_block *rblk;
-		void *ptr = NULL;
-
-		rblk = find_and_ref_tcp_fb(blk);
-		KASSERT(rblk != NULL,
-		    ("cannot find blk %p out of syncache?", blk));
-
-		if (rblk->tfb_tcp_fb_init == NULL ||
-		    (*rblk->tfb_tcp_fb_init)(tp, &ptr) == 0) {
-			/* Release the old stack */
-			if (tp->t_fb->tfb_tcp_fb_fini != NULL)
-				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
-			refcount_release(&tp->t_fb->tfb_refcnt);
-			/* Now set in all the pointers */
-			tp->t_fb = rblk;
-			tp->t_fb_ptr = ptr;
-		} else {
-			/*
-			 * Initialization failed. Release the reference count on
-			 * the looked up default stack.
-			 */
-			refcount_release(&rblk->tfb_refcnt);
-		}
-	}
 	tp->snd_wl1 = sc->sc_irs;
 	tp->snd_max = tp->iss + 1;
 	tp->snd_nxt = tp->iss + 1;
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 4e754965487b..384c27b3a6cd 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -174,7 +174,7 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 	if (error)
 		goto out;
 	inp = sotoinpcb(so);
-	tp = tcp_newtcpcb(inp);
+	tp = tcp_newtcpcb(inp, NULL);
 	if (tp == NULL) {
 		error = ENOBUFS;
 		in_pcbfree(inp);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 010ad748260a..4632474c1d11 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1463,7 +1463,7 @@ void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
 void	 tcp_mss(struct tcpcb *, int);
 int	 tcp_mssopt(struct in_conninfo *);
 struct tcpcb *
-	 tcp_newtcpcb(struct inpcb *);
+	 tcp_newtcpcb(struct inpcb *, struct tcpcb *);
 int	 tcp_default_output(struct tcpcb *);
 void	 tcp_state_change(struct tcpcb *, int);
 void	 tcp_respond(struct tcpcb *, void *,