git: 86c9325d341f - main - tcp: simplify stack switching protocol

From: Michael Tuexen <tuexen_at_FreeBSD.org>
Date: Thu, 06 Jun 2024 06:33:00 UTC
The branch main has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=86c9325d341fc3f39543bcfaf7c3bb3ceeacbe5d

commit 86c9325d341fc3f39543bcfaf7c3bb3ceeacbe5d
Author:     Michael Tuexen <tuexen@FreeBSD.org>
AuthorDate: 2024-06-06 06:29:05 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2024-06-06 06:29:05 +0000

    tcp: simplify stack switching protocol
    
    Before this patch, a stack (tfb) accepts a tcpcb (tp), if the
    tp->t_state is TCPS_CLOSED or tfb->tfb_tcp_handoff_ok is not NULL
    and tfb->tfb_tcp_handoff_ok(tp) returns 0.
    After this patch, the only check is tfb->tfb_tcp_handoff_ok(tp)
    returns 0. tfb->tfb_tcp_handoff_ok must always be provided.
    For existing TCP stacks (FreeBSD, RACK and BBR) there is no
    functional change. However, the logic is simpler.
    
    Reviewed by:            lstewart, peter_lei_ieee_.org, rrs
    MFC after:              1 week
    Sponsored by:           Netflix, Inc.
    Differential Revision:  https://reviews.freebsd.org/D45253
---
 share/man/man9/tcp_functions.9 | 45 ++++++++++++++----------------------------
 sys/netinet/tcp_subr.c         | 12 +++++------
 sys/netinet/tcp_usrreq.c       | 27 ++++++-------------------
 sys/netinet/tcp_var.h          | 11 ++++-------
 4 files changed, 30 insertions(+), 65 deletions(-)

diff --git a/share/man/man9/tcp_functions.9 b/share/man/man9/tcp_functions.9
index eb9b299eae9e..1e0616e03a9f 100644
--- a/share/man/man9/tcp_functions.9
+++ b/share/man/man9/tcp_functions.9
@@ -23,7 +23,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd March 10, 2017
+.Dd June 6, 2024
 .Dt TCP_FUNCTIONS 9
 .Os
 .Sh NAME
@@ -176,9 +176,10 @@ struct tcp_function_block {
 			    uint32_t, u_int);
 	int	(*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
 	void	(*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
-	/* Optional functions */
+	/* Optional function */
 	void	(*tfb_tcp_rexmit_tmr)(struct tcpcb *);
-	void	(*tfb_tcp_handoff_ok)(struct tcpcb *);
+	/* Mandatory function */
+	int	(*tfb_tcp_handoff_ok)(struct tcpcb *);
 	/* System use */
 	volatile uint32_t tfb_refcnt;
 	uint32_t  tfb_flags;
@@ -261,37 +262,21 @@ However, care must be taken to ensure the retransmit timer leaves the
 TCP control block in a valid state for the remainder of the retransmit
 timer logic.
 .Pp
-A user may select a new TCP stack before calling
-.Xr connect 2
-or
-.Xr listen 2 .
-Optionally, a TCP stack may also allow a user to begin using the TCP stack for
-a connection that is in a later state by setting a non-NULL function pointer in
-the
+A user may select a new TCP stack before calling at any time.
+Therefore, the function pointer
 .Va tfb_tcp_handoff_ok
-field.
-If this field is non-NULL and a user attempts to select that TCP stack after
-calling
-.Xr connect 2
-or
-.Xr listen 2
-for that socket, the kernel will call the function pointed to by the
+field must be non-NULL.
+If a user attempts to select that TCP stack, the kernel will call the function
+pointed to by the
 .Va tfb_tcp_handoff_ok
 field.
 The function should return 0 if the user is allowed to switch the socket to use
-the TCP stack.
-Otherwise, the function should return an error code, which will be returned to
-the user.
-If the
-.Va tfb_tcp_handoff_ok
-field is
-.Dv NULL
-and a user attempts to select the TCP stack after calling
-.Xr connect 2
-or
-.Xr listen 2
-for that socket, the operation will fail and the kernel will return
-.Er EINVAL .
+the TCP stack. In this case, the kernel will call the function pointed to by
+.Va tfb_tcp_fb_init
+if this function pointer is non-NULL and finally perform the stack switch.
+If the user is not allowed to switch the socket, the function should undo any
+changes it made to the connection state configuration and return an error code,
+which will be returned to the user.
 .Pp
 The
 .Va tfb_refcnt
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 7259d3607869..b871d8416b19 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -516,8 +516,7 @@ tcp_switch_back_to_default(struct tcpcb *tp)
 		tfb = NULL;
 	}
 	/* Does the stack accept this connection? */
-	if (tfb != NULL && tfb->tfb_tcp_handoff_ok != NULL &&
-	    (*tfb->tfb_tcp_handoff_ok)(tp)) {
+	if (tfb != NULL && (*tfb->tfb_tcp_handoff_ok)(tp)) {
 		refcount_release(&tfb->tfb_refcnt);
 		tfb = NULL;
 	}
@@ -551,11 +550,9 @@ tcp_switch_back_to_default(struct tcpcb *tp)
 		/* there always should be a default */
 		panic("Can't refer to tcp_def_funcblk");
 	}
-	if (tfb->tfb_tcp_handoff_ok != NULL) {
-		if ((*tfb->tfb_tcp_handoff_ok) (tp)) {
-			/* The default stack cannot say no */
-			panic("Default stack rejects a new session?");
-		}
+	if ((*tfb->tfb_tcp_handoff_ok)(tp)) {
+		/* The default stack cannot say no */
+		panic("Default stack rejects a new session?");
 	}
 	if (tfb->tfb_tcp_fb_init != NULL &&
 	    (*tfb->tfb_tcp_fb_init)(tp, &ptr)) {
@@ -1186,6 +1183,7 @@ register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
 	if ((blk->tfb_tcp_output == NULL) ||
 	    (blk->tfb_tcp_do_segment == NULL) ||
 	    (blk->tfb_tcp_ctloutput == NULL) ||
+	    (blk->tfb_tcp_handoff_ok == NULL) ||
 	    (strlen(blk->tfb_tcp_block_name) == 0)) {
 		/*
 		 * These functions are required and you
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index f768f42114d4..3bc283c5a9db 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1731,32 +1731,17 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 			INP_WUNLOCK(inp);
 			return (0);
 		}
-		if (tp->t_state != TCPS_CLOSED) {
-			/*
-			 * The user has advanced the state
-			 * past the initial point, we may not
-			 * be able to switch.
-			 */
-			if (blk->tfb_tcp_handoff_ok != NULL) {
-				/*
-				 * Does the stack provide a
-				 * query mechanism, if so it may
-				 * still be possible?
-				 */
-				error = (*blk->tfb_tcp_handoff_ok)(tp);
-			} else
-				error = EINVAL;
-			if (error) {
-				refcount_release(&blk->tfb_refcnt);
-				INP_WUNLOCK(inp);
-				return(error);
-			}
-		}
 		if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 			refcount_release(&blk->tfb_refcnt);
 			INP_WUNLOCK(inp);
 			return (ENOENT);
 		}
+		error = (*blk->tfb_tcp_handoff_ok)(tp);
+		if (error) {
+			refcount_release(&blk->tfb_refcnt);
+			INP_WUNLOCK(inp);
+			return (error);
+		}
 		/*
 		 * Ensure the new stack takes ownership with a
 		 * clean slate on peak rate threshold.
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 3fdc1f4a9d74..e81ebf301c8e 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -542,13 +542,10 @@ typedef enum {
 #define	TCP_FUNC_OUTPUT_CANDROP	0x02   	/* tfb_tcp_output may ask tcp_drop */
 
 /**
- * Adding a tfb_tcp_handoff_ok function allows the socket
- * option to change stacks to query you even if the
- * connection is in a later stage. You return 0 to
- * say you can take over and run your stack, you return
- * non-zero (an error number) to say no you can't.
- * If the function is undefined you can only change
- * in the early states (before connect or listen).
+ * tfb_tcp_handoff_ok is a mandatory function allowing
+ * to query a stack, if it can take over a tcpcb.
+ * You return 0 to say you can take over and run your stack,
+ * you return non-zero (an error number) to say no you can't.
  *
  * tfb_tcp_fb_init is used to allow the new stack to
  * setup its control block. Among the things it must