svn commit: r367492 - in head/sys/netinet: . tcp_stacks

Richard Scheffenegger rscheff at FreeBSD.org
Sun Nov 8 18:47:07 UTC 2020


Author: rscheff
Date: Sun Nov  8 18:47:05 2020
New Revision: 367492
URL: https://svnweb.freebsd.org/changeset/base/367492

Log:
  Prevent premature SACK block transmission during loss recovery
  
  Under specific conditions, a window update can be sent with
  outdated SACK information. Some clients react to this by
  subsequently delaying loss recovery, making TCP perform very
  poorly.
  
  Reported by:	chengc_netapp.com
  Reviewed by:	rrs, jtl
  MFC after:	2 weeks
  Sponsored by:	NetApp, Inc.
  Differential Revision:	https://reviews.freebsd.org/D24237

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_reass.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Sun Nov  8 18:27:49 2020	(r367491)
+++ head/sys/netinet/tcp_input.c	Sun Nov  8 18:47:05 2020	(r367492)
@@ -1462,6 +1462,29 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru
 }
 
 void
+tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
+{
+	/*
+	 * Since tp might be gone if the session entered
+	 * the TIME_WAIT state before coming here, we need
+	 * to check if the socket is still connected.
+	 */
+	if ((so->so_state & SS_ISCONNECTED) == 0)
+		return;
+	INP_LOCK_ASSERT(tp->t_inpcb);
+	if (tp->t_flags & TF_WAKESOR) {
+		tp->t_flags &= ~TF_WAKESOR;
+		SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+		sorwakeup(so);
+	}
+	if (tp->t_flags & TF_WAKESOW) {
+		tp->t_flags &= ~TF_WAKESOW;
+		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+		sowwakeup(so);
+	}
+}
+
+void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
@@ -1811,7 +1834,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 				else if (!tcp_timer_active(tp, TT_PERSIST))
 					tcp_timer_activate(tp, TT_REXMT,
 						      tp->t_rxtcur);
-				sowwakeup(so);
+				tp->t_flags |= TF_WAKESOW;
 				if (sbavail(&so->so_snd))
 					(void) tp->t_fb->tfb_tcp_output(tp);
 				goto check_delack;
@@ -1876,8 +1899,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			}
-			/* NB: sorwakeup_locked() does an implicit unlock. */
-			sorwakeup_locked(so);
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			tp->t_flags |= TF_WAKESOR;
 			if (DELAY_ACK(tp, tlen)) {
 				tp->t_flags |= TF_DELACK;
 			} else {
@@ -2811,8 +2834,8 @@ process_ACK:
 				tp->snd_wnd = 0;
 			ourfinisacked = 0;
 		}
-		/* NB: sowwakeup_locked() does an implicit unlock. */
-		sowwakeup_locked(so);
+		SOCKBUF_UNLOCK(&so->so_snd);
+		tp->t_flags |= TF_WAKESOW;
 		m_freem(mfree);
 		/* Detect una wraparound. */
 		if (!IN_RECOVERY(tp->t_flags) &&
@@ -3033,8 +3056,8 @@ dodata:							/* XXX */
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m, 0);
-			/* NB: sorwakeup_locked() does an implicit unlock. */
-			sorwakeup_locked(so);
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			tp->t_flags |= TF_WAKESOR;
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
@@ -3101,6 +3124,8 @@ dodata:							/* XXX */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
+			/* The socket upcall is handled by socantrcvmore. */
+			tp->t_flags &= ~TF_WAKESOR;
 			/*
 			 * If connection is half-synchronized
 			 * (ie NEEDSYN flag on) then delay ACK,
@@ -3164,6 +3189,7 @@ check_delack:
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
+	tcp_handle_wakeup(tp, so);
 	INP_WUNLOCK(tp->t_inpcb);
 	return;
 
@@ -3197,6 +3223,7 @@ dropafterack:
 	TCP_PROBE3(debug__input, tp, th, m);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
+	tcp_handle_wakeup(tp, so);
 	INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
@@ -3204,6 +3231,7 @@ dropafterack:
 dropwithreset:
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
+		tcp_handle_wakeup(tp, so);
 		INP_WUNLOCK(tp->t_inpcb);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
@@ -3219,8 +3247,10 @@ drop:
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, m);
-	if (tp != NULL)
+	if (tp != NULL) {
+		tcp_handle_wakeup(tp, so);
 		INP_WUNLOCK(tp->t_inpcb);
+	}
 	m_freem(m);
 }
 

Modified: head/sys/netinet/tcp_reass.c
==============================================================================
--- head/sys/netinet/tcp_reass.c	Sun Nov  8 18:27:49 2020	(r367491)
+++ head/sys/netinet/tcp_reass.c	Sun Nov  8 18:47:05 2020	(r367492)
@@ -959,7 +959,8 @@ new_entry:
 		} else {
 			sbappendstream_locked(&so->so_rcv, m, 0);
 		}
-		sorwakeup_locked(so);
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		tp->t_flags |= TF_WAKESOR;
 		return (flags);
 	}
 	if (tcp_new_limits) {
@@ -1107,6 +1108,7 @@ present:
 #ifdef TCP_REASS_LOGGING
 	tcp_reass_log_dump(tp);
 #endif
-	sorwakeup_locked(so);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	tp->t_flags |= TF_WAKESOR;
 	return (flags);
 }

Modified: head/sys/netinet/tcp_stacks/bbr.c
==============================================================================
--- head/sys/netinet/tcp_stacks/bbr.c	Sun Nov  8 18:27:49 2020	(r367491)
+++ head/sys/netinet/tcp_stacks/bbr.c	Sun Nov  8 18:47:05 2020	(r367492)
@@ -7876,8 +7876,8 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, str
 	acked_amount = min(acked, (int)sbavail(&so->so_snd));
 	tp->snd_wnd -= acked_amount;
 	mfree = sbcut_locked(&so->so_snd, acked_amount);
-	/* NB: sowwakeup_locked() does an implicit unlock. */
-	sowwakeup_locked(so);
+	SOCKBUF_UNLOCK(&so->so_snd);
+	tp->t_flags |= TF_WAKESOW;
 	m_freem(mfree);
 	if (SEQ_GT(th->th_ack, tp->snd_una)) {
 		bbr_collapse_rtt(tp, bbr, TCP_REXMTVAL(tp));
@@ -8353,8 +8353,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, st
 				appended =
 #endif
 					sbappendstream_locked(&so->so_rcv, m, 0);
-			/* NB: sorwakeup_locked() does an implicit unlock. */
-			sorwakeup_locked(so);
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
 			if (so->so_rcv.sb_shlim && appended != mcnt)
 				counter_fo_release(so->so_rcv.sb_shlim,
@@ -8414,6 +8414,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, st
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
+			/* The socket upcall is handled by socantrcvmore. */
+			tp->t_flags &= ~TF_WAKESOR;
 			/*
 			 * If connection is half-synchronized (ie NEEDSYN
 			 * flag on) then delay ACK, so it may be piggybacked
@@ -8604,8 +8606,8 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
 			sbappendstream_locked(&so->so_rcv, m, 0);
 		ctf_calc_rwin(so, tp);
 	}
-	/* NB: sorwakeup_locked() does an implicit unlock. */
-	sorwakeup_locked(so);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
 	if (so->so_rcv.sb_shlim && mcnt != appended)
 		counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
@@ -8796,7 +8798,7 @@ bbr_fastack(struct mbuf *m, struct tcphdr *th, struct 
 		    &tcp_savetcp, 0);
 #endif
 	/* Wake up the socket if we have room to write more */
-	sowwakeup(so);
+	tp->t_flags |= TF_WAKESOW;
 	if (tp->snd_una == tp->snd_max) {
 		/* Nothing left outstanding */
 		bbr_log_progress_event(bbr, tp, ticks, PROGRESS_CLEAR, __LINE__);
@@ -11740,8 +11742,10 @@ bbr_do_segment(struct mbuf *m, struct tcphdr *th, stru
 	}
 	retval = bbr_do_segment_nounlock(m, th, so, tp,
 					 drop_hdrlen, tlen, iptos, 0, &tv);
-	if (retval == 0)
+	if (retval == 0) {
+		tcp_handle_wakeup(tp, so);
 		INP_WUNLOCK(tp->t_inpcb);
+	}
 }
 
 /*

Modified: head/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack.c	Sun Nov  8 18:27:49 2020	(r367491)
+++ head/sys/netinet/tcp_stacks/rack.c	Sun Nov  8 18:47:05 2020	(r367492)
@@ -8344,8 +8344,8 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, st
 		 */
 		ourfinisacked = 1;
 	}
-	/* NB: sowwakeup_locked() does an implicit unlock. */
-	sowwakeup_locked(so);
+	SOCKBUF_UNLOCK(&so->so_snd);
+	tp->t_flags |= TF_WAKESOW;
 	m_freem(mfree);
 	if (rack->r_ctl.rc_early_recovery == 0) {
 		if (IN_RECOVERY(tp->t_flags)) {
@@ -8665,8 +8665,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
 				appended =
 #endif
 					sbappendstream_locked(&so->so_rcv, m, 0);
-			/* NB: sorwakeup_locked() does an implicit unlock. */
-			sorwakeup_locked(so);
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
 			if (so->so_rcv.sb_shlim && appended != mcnt)
 				counter_fo_release(so->so_rcv.sb_shlim,
@@ -8731,6 +8731,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
+			/* The socket upcall is handled by socantrcvmore. */
+			tp->t_flags &= ~TF_WAKESOR;
 			/*
 			 * If connection is half-synchronized (ie NEEDSYN
 			 * flag on) then delay ACK, so it may be piggybacked
@@ -8922,8 +8924,8 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th,
 			sbappendstream_locked(&so->so_rcv, m, 0);
 		ctf_calc_rwin(so, tp);
 	}
-	/* NB: sorwakeup_locked() does an implicit unlock. */
-	sorwakeup_locked(so);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
 	if (so->so_rcv.sb_shlim && mcnt != appended)
 		counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
@@ -9140,7 +9142,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct
 		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
 	}
 	/* Wake up the socket if we have room to write more */
-	sowwakeup(so);
+	tp->t_flags |= TF_WAKESOW;
 	if (sbavail(&so->so_snd)) {
 		rack->r_wanted_output = 1;
 	}
@@ -11188,8 +11190,10 @@ rack_do_segment(struct mbuf *m, struct tcphdr *th, str
 		tcp_get_usecs(&tv);
 	}
 	if(rack_do_segment_nounlock(m, th, so, tp,
-				    drop_hdrlen, tlen, iptos, 0, &tv) == 0)
+				    drop_hdrlen, tlen, iptos, 0, &tv) == 0) {
+		tcp_handle_wakeup(tp, so);
 		INP_WUNLOCK(tp->t_inpcb);
+	}
 }
 
 struct rack_sendmap *

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c	Sun Nov  8 18:27:49 2020	(r367491)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c	Sun Nov  8 18:47:05 2020	(r367492)
@@ -458,6 +458,7 @@ ctf_do_queued_segments(struct socket *so, struct tcpcb
 			/* We lost the tcpcb (maybe a RST came in)? */
 			return(1);
 		}
+		tcp_handle_wakeup(tp, so);
 	}
 	return (0);
 }

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Sun Nov  8 18:27:49 2020	(r367491)
+++ head/sys/netinet/tcp_var.h	Sun Nov  8 18:47:05 2020	(r367492)
@@ -381,7 +381,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
 #define	TF_NEEDFIN	0x00000800	/* send FIN (implicit state) */
 #define	TF_NOPUSH	0x00001000	/* don't push */
 #define	TF_PREVVALID	0x00002000	/* saved values for bad rxmit valid */
-#define	TF_UNUSED1	0x00004000	/* unused */
+#define	TF_WAKESOR	0x00004000	/* wake up receive socket */
 #define	TF_GPUTINPROG	0x00008000	/* Goodput measurement in progress */
 #define	TF_MORETOCOME	0x00010000	/* More data to be appended to sock */
 #define	TF_LQ_OVERFLOW	0x00020000	/* listen queue overflow */
@@ -393,9 +393,9 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
 #define	TF_FORCEDATA	0x00800000	/* force out a byte */
 #define	TF_TSO		0x01000000	/* TSO enabled on this connection */
 #define	TF_TOE		0x02000000	/* this connection is offloaded */
-#define	TF_UNUSED3	0x04000000	/* unused */
-#define	TF_UNUSED4	0x08000000	/* unused */
-#define	TF_UNUSED5	0x10000000	/* unused */
+#define	TF_WAKESOW	0x04000000	/* wake up send socket */
+#define	TF_UNUSED1	0x08000000	/* unused */
+#define	TF_UNUSED2	0x10000000	/* unused */
 #define	TF_CONGRECOVERY	0x20000000	/* congestion recovery mode */
 #define	TF_WASCRECOVERY	0x40000000	/* was in congestion recovery */
 #define	TF_FASTOPEN	0x80000000	/* TCP Fast Open indication */
@@ -931,7 +931,8 @@ char	*tcp_log_addrs(struct in_conninfo *, struct tcphd
 	    const void *);
 char	*tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
 	    const void *);
-int	 tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *);
+int	 tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *,
+	    struct mbuf *);
 void	 tcp_reass_global_init(void);
 void	 tcp_reass_flush(struct tcpcb *);
 void	 tcp_dooptions(struct tcpopt *, u_char *, int, int);
@@ -955,6 +956,7 @@ void	hhook_run_tcp_est_in(struct tcpcb *tp,
 int	 tcp_input(struct mbuf **, int *, int);
 int	 tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
 	    struct tcpcb *, int);
+void	 tcp_handle_wakeup(struct tcpcb *, struct socket *);
 void	 tcp_do_segment(struct mbuf *, struct tcphdr *,
 			struct socket *, struct tcpcb *, int, int, uint8_t);
 


More information about the svn-src-head mailing list