git: 493105c2a8f9 - main - tcp: fix simultaneous open and refine e80062a2d43

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Wed, 21 Sep 2022 21:04:51 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=493105c2a8f95be99b5299af650fcf8b59b91f55

commit 493105c2a8f95be99b5299af650fcf8b59b91f55
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-09-21 21:02:49 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-09-21 21:02:49 +0000

    tcp: fix simultaneous open and refine e80062a2d43
    
    - The soisconnected() call on transition from SYN_RCVD to ESTABLISHED
      is also necessary for a half-synchronized connection.  Fix that
      just setting the flag, when we transfer SYN-SENT -> SYN-RECEIVED.
    - Provide a comment that explains at what conditions the call to
      soisconnected() is necessary.
    - Hence mechanically rename the TF_INCQUEUE flag to TF_SONOTCONN.
    - Extend the change to the BBR and RACK stacks.
    
    Note: the interaction between the accept_filter(9) and the socket layer
    is not fully consistent, yet.  For most accept filters this call to
    soisconnected() will not move the connection from the incomplete queue
    to the complete.  The move would happen only when the filter has received
    the desired data, and soisconnected() would be called once again from
    sorwakeup().  Ideally, we should mark socket as connected only there,
    and leave the soisconnected() from SYN_RCVD->ESTABLISHED only for the
    simultaneous open case.  However, this doesn't yet work.
    
    Reviewed by:            rscheff, tuexen, rrs
    Differential revision:  https://reviews.freebsd.org/D36641
---
 sys/netinet/tcp_input.c       | 15 ++++++++++++---
 sys/netinet/tcp_stacks/bbr.c  |  7 +++++--
 sys/netinet/tcp_stacks/rack.c |  7 +++++--
 sys/netinet/tcp_syncache.c    |  2 +-
 sys/netinet/tcp_usrreq.c      |  4 ++--
 sys/netinet/tcp_var.h         |  2 +-
 6 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 86071141b174..e79cf48b9e55 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2105,7 +2105,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			 *        SYN-SENT -> SYN-RECEIVED
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 */
-			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN | TF_SONOTCONN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
@@ -2433,8 +2433,17 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	case TCPS_SYN_RECEIVED:
 
 		TCPSTAT_INC(tcps_connects);
-		if (tp->t_flags & TF_INCQUEUE) {
-			tp->t_flags &= ~TF_INCQUEUE;
+		if (tp->t_flags & TF_SONOTCONN) {
+			/*
+			 * Usually SYN_RECEIVED had been created from a LISTEN,
+			 * and solisten_enqueue() has already marked the socket
+			 * layer as connected.  If it didn't, which can happen
+			 * only with an accept_filter(9), then the tp is marked
+			 * with TF_SONOTCONN.  The other reason for this mark
+			 * to be set is a simultaneous open, a SYN_RECEIVED
+			 * that had been created from SYN_SENT.
+			 */
+			tp->t_flags &= ~TF_SONOTCONN;
 			soisconnected(so);
 		}
 		/* Do window scaling? */
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 47c7f1ee0eee..dd6b823c6142 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -8902,7 +8902,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		 * SYN-SENT -> SYN-RECEIVED SYN-SENT* -> SYN-RECEIVED* If
 		 * there was no CC option, clear cached CC value.
 		 */
-		tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+		tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN | TF_SONOTCONN);
 		tcp_state_change(tp, TCPS_SYN_RECEIVED);
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -9088,7 +9088,10 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 					 tiwin, thflags, nxt_pkt));
 	}
 	KMOD_TCPSTAT_INC(tcps_connects);
-	soisconnected(so);
+	if (tp->t_flags & TF_SONOTCONN) {
+		tp->t_flags &= ~TF_SONOTCONN;
+		soisconnected(so);
+	}
 	/* Do window scaling? */
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 3ab4494865a5..6e323350100a 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -11318,7 +11318,7 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		 * SYN-SENT -> SYN-RECEIVED SYN-SENT* -> SYN-RECEIVED* If
 		 * there was no CC option, clear cached CC value.
 		 */
-		tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+		tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN | TF_SONOTCONN);
 		tcp_state_change(tp, TCPS_SYN_RECEIVED);
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -11507,7 +11507,10 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		    tiwin, thflags, nxt_pkt));
 	}
 	KMOD_TCPSTAT_INC(tcps_connects);
-	soisconnected(so);
+	if (tp->t_flags & TF_SONOTCONN) {
+		tp->t_flags &= ~TF_SONOTCONN;
+		soisconnected(so);
+	}
 	/* Do window scaling? */
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index f0ee539fa1ec..4fe5422b96ed 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1040,7 +1040,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, TCPS_LISTEN);
 
 	if (!solisten_enqueue(so, SS_ISCONNECTED))
-		tp->t_flags |= TF_INCQUEUE;
+		tp->t_flags |= TF_SONOTCONN;
 
 	return (so);
 
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 474cdb4d7787..ba7ed388c6b9 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2978,8 +2978,8 @@ db_print_tflags(u_int t_flags)
 		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 		comma = 1;
 	}
-	if (t_flags & TF_INCQUEUE) {
-		db_printf("%sTF_INCQUEUE", comma ? ", " : "");
+	if (t_flags & TF_SONOTCONN) {
+		db_printf("%sTF_SONOTCONN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_LASTIDLE) {
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index d6146841a1ab..1b7e25099ced 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -515,7 +515,7 @@ tcp_unlock_or_drop(struct tcpcb *tp, int tcp_output_retval)
 #define	TF_WAKESOR	0x00004000	/* wake up receive socket */
 #define	TF_GPUTINPROG	0x00008000	/* Goodput measurement in progress */
 #define	TF_MORETOCOME	0x00010000	/* More data to be appended to sock */
-#define	TF_INCQUEUE	0x00020000	/* on incomplete queue of listener */
+#define	TF_SONOTCONN	0x00020000	/* needs soisconnected() on ESTAB */
 #define	TF_LASTIDLE	0x00040000	/* connection was previously idle */
 #define	TF_RXWIN0SENT	0x00080000	/* sent a receiver win 0 in response */
 #define	TF_FASTRECOVERY	0x00100000	/* in NewReno Fast Recovery */