svn commit: r351725 - in head/sys/netinet: . tcp_stacks

Michael Tuexen tuexen at FreeBSD.org
Mon Sep 2 19:04:04 UTC 2019


Author: tuexen
Date: Mon Sep  2 19:04:02 2019
New Revision: 351725
URL: https://svnweb.freebsd.org/changeset/base/351725

Log:
  This patch improves the DSACK handling to conform with RFC 2883.
  The lowest SACK block is used when multiple Blocks would be elegible as
  DSACK blocks ACK blocks get reordered - while maintaining the ordering of
  SACK blocks not relevant in the DSACK context is maintained.
  
  Reviewed by:		rrs@, tuexen@
  Obtained from:		Richard Scheffenegger
  MFC after:		1 week
  Differential Revision:	https://reviews.freebsd.org/D21038

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_sack.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Mon Sep  2 18:32:08 2019	(r351724)
+++ head/sys/netinet/tcp_input.c	Mon Sep  2 19:04:02 2019	(r351725)
@@ -1486,7 +1486,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru
 	} else {
 		tp->rfbuf_cnt += tlen;	/* add up */
 	}
-
 	return (newsize);
 }
 

Modified: head/sys/netinet/tcp_sack.c
==============================================================================
--- head/sys/netinet/tcp_sack.c	Mon Sep  2 18:32:08 2019	(r351724)
+++ head/sys/netinet/tcp_sack.c	Mon Sep  2 19:04:02 2019	(r351725)
@@ -149,7 +149,109 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, 
     &VNET_NAME(tcp_sack_globalholes), 0,
     "Global number of TCP SACK holes currently allocated");
 
+
 /*
+ * This function will find overlaps with the currently stored sackblocks
+ * and add any overlap as a dsack block upfront
+ */
+void
+tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
+{
+	struct sackblk head_blk,mid_blk,saved_blks[MAX_SACK_BLKS];
+	int i, j, n, identical;
+	tcp_seq start, end;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
+
+	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+		log(LOG_DEBUG, "\nDSACK update: %d..%d, rcv_nxt: %u\n",
+		rcv_start, rcv_end, tp->rcv_nxt);
+	}
+
+	if (SEQ_LT(rcv_end, tp->rcv_nxt) ||
+	    ((rcv_end == tp->rcv_nxt) &&
+	     (tp->rcv_numsacks > 0 ) &&
+	     (tp->sackblks[0].end == tp->rcv_nxt))) {
+		saved_blks[0].start = rcv_start;
+		saved_blks[0].end = rcv_end;
+	} else {
+		saved_blks[0].start = saved_blks[0].end = 0;
+	}
+
+	head_blk.start = head_blk.end = 0;
+	mid_blk.start = rcv_start;
+	mid_blk.end = rcv_end;
+	identical = 0;
+
+	for (i = 0; i < tp->rcv_numsacks; i++) {
+		start = tp->sackblks[i].start;
+		end = tp->sackblks[i].end;
+		if (SEQ_LT(rcv_end, start)) {
+			/* pkt left to sack blk */
+			continue;
+		}
+		if (SEQ_GT(rcv_start, end)) {
+			/* pkt right to sack blk */
+			continue;
+		}
+		if (SEQ_GT(tp->rcv_nxt, end)) {
+			if ((SEQ_MAX(rcv_start, start) != SEQ_MIN(rcv_end, end)) &&
+			    (SEQ_GT(head_blk.start, SEQ_MAX(rcv_start, start)) ||
+			    (head_blk.start == head_blk.end))) {
+				head_blk.start = SEQ_MAX(rcv_start, start);
+				head_blk.end = SEQ_MIN(rcv_end, end);
+			}
+			continue;
+		}
+		if (((head_blk.start == head_blk.end) ||
+		     SEQ_LT(start, head_blk.start)) &&
+		     (SEQ_GT(end, rcv_start) &&
+		      SEQ_LEQ(start, rcv_end))) {
+			head_blk.start = start;
+			head_blk.end = end;
+		}
+		mid_blk.start = SEQ_MIN(mid_blk.start, start);
+		mid_blk.end = SEQ_MAX(mid_blk.end, end);
+		if ((mid_blk.start == start) &&
+		    (mid_blk.end == end))
+			identical = 1;
+	}
+	if (SEQ_LT(head_blk.start, head_blk.end)) {
+		/* store overlapping range */
+		saved_blks[0].start = SEQ_MAX(rcv_start, head_blk.start);
+		saved_blks[0].end   = SEQ_MIN(rcv_end, head_blk.end);
+	}
+	n = 1;
+	/*
+	 * Second, if not ACKed, store the SACK block that
+	 * overlaps with the DSACK block unless it is identical
+	 */
+	if ((SEQ_LT(tp->rcv_nxt, mid_blk.end) &&
+	    !((mid_blk.start == saved_blks[0].start) &&
+	    (mid_blk.end == saved_blks[0].end))) ||
+	    identical == 1) {
+		saved_blks[n].start = mid_blk.start;
+		saved_blks[n++].end = mid_blk.end;
+	}
+	for (j = 0; (j < tp->rcv_numsacks) && (j < MAX_SACK_BLKS-1); j++) {
+		if (((SEQ_LT(tp->sackblks[j].end, mid_blk.start) ||
+		      SEQ_GT(tp->sackblks[j].start, mid_blk.end)) &&
+		    (SEQ_GT(tp->sackblks[j].start, tp->rcv_nxt))))
+		saved_blks[n++] = tp->sackblks[j];
+	}
+	j = 0;
+	for (i = 0; i < n; i++) {
+		/* we can end up with a stale inital entry */
+		if (SEQ_LT(saved_blks[i].start, saved_blks[i].end)) {
+			tp->sackblks[j++] = saved_blks[i];
+		}
+	}
+	tp->rcv_numsacks = j;
+}
+
+/*
  * This function is called upon receipt of new valid data (while not in
  * header prediction mode), and it updates the ordered list of sacks.
  */
@@ -170,9 +272,16 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
 	/* Check arguments. */
 	KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
 
-	/* SACK block for the received segment. */
-	head_blk.start = rcv_start;
-	head_blk.end = rcv_end;
+	if ((rcv_start == rcv_end) &&
+	    (tp->rcv_numsacks >= 1) &&
+	    (rcv_end == tp->sackblks[0].end)) {
+		/* retaining DSACK block below rcv_nxt (todrop) */
+		head_blk = tp->sackblks[0];
+	} else {
+		/* SACK block for the received segment. */
+		head_blk.start = rcv_start;
+		head_blk.end = rcv_end;
+	}
 
 	/*
 	 * Merge updated SACK blocks into head_blk, and save unchanged SACK
@@ -266,6 +375,10 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
 		 */
 		if (num_saved >= MAX_SACK_BLKS)
 			num_saved--;
+	}
+	if ((rcv_start == rcv_end) &&
+	    (rcv_start == tp->sackblks[0].end)) {
+		num_head = 1;
 	}
 	if (num_saved > 0) {
 		/*

Modified: head/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack.c	Mon Sep  2 18:32:08 2019	(r351724)
+++ head/sys/netinet/tcp_stacks/rack.c	Mon Sep  2 19:04:02 2019	(r351725)
@@ -1783,6 +1783,14 @@ rack_drop_checks(struct tcpopt *to, struct mbuf *m, st
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
+		if (tp->t_flags & TF_SACK_PERMIT) {
+			/*
+			 * record the left, to-be-dropped edge of data
+			 * here, for use as dsack block further down
+			 */
+			tcp_update_sack_list(tp, th->th_seq,
+			    th->th_seq + todrop);
+		}
 		*drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
@@ -4900,7 +4908,8 @@ dodata:				/* XXX */
 		    (TCPS_HAVEESTABLISHED(tp->t_state) ||
 		    tfo_syn)) {
 			if (DELAY_ACK(tp, tlen) || tfo_syn) {
-				rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+				rack_timer_cancel(tp, rack,
+				    rack->r_ctl.rc_rcvtime, __LINE__);
 				tp->t_flags |= TF_DELACK;
 			} else {
 				rack->r_wanted_output++;
@@ -4934,18 +4943,29 @@ dodata:				/* XXX */
 			 * DSACK actually handled in the fastpath
 			 * above.
 			 */
-			tcp_update_sack_list(tp, save_start, save_start + save_tlen);
+			tcp_update_sack_list(tp, save_start,
+			    save_start + save_tlen);
 		} else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
 			/*
 			 * Cleaning sackblks by using zero length
 			 * update.
 			 */
-			tcp_update_sack_list(tp, save_start, save_start);
+			if ((tp->rcv_numsacks >= 1) &&
+			    (tp->sackblks[0].end == save_start)) {
+				/* partial overlap, recorded at todrop above */
+				tcp_update_sack_list(tp, tp->sackblks[0].start,
+				    tp->sackblks[0].end);
+			} else {
+				tcp_update_dsack_list(tp, save_start,
+				    save_start + save_tlen);
+			}
 		} else if ((tlen > 0) && (tlen >= save_tlen)) {
 			/* Update of sackblks. */
-			tcp_update_sack_list(tp, save_start, save_start + save_tlen);
+			tcp_update_dsack_list(tp, save_start,
+			    save_start + save_tlen);
 		} else if (tlen > 0) {
-			tcp_update_sack_list(tp, save_start, save_start+tlen);
+			tcp_update_dsack_list(tp, save_start,
+			    save_start + tlen);
 		}
 	} else {
 		m_freem(m);
@@ -4967,7 +4987,8 @@ dodata:				/* XXX */
 			 * now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN) {
-				rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+				rack_timer_cancel(tp, rack,
+				    rack->r_ctl.rc_rcvtime, __LINE__);
 				tp->t_flags |= TF_DELACK;
 			} else {
 				tp->t_flags |= TF_ACKNOW;
@@ -4984,7 +5005,8 @@ dodata:				/* XXX */
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
-			rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+			rack_timer_cancel(tp, rack,
+			    rack->r_ctl.rc_rcvtime, __LINE__);
 			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
@@ -4993,7 +5015,8 @@ dodata:				/* XXX */
 			 * acked so enter the CLOSING state.
 			 */
 		case TCPS_FIN_WAIT_1:
-			rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+			rack_timer_cancel(tp, rack,
+			    rack->r_ctl.rc_rcvtime, __LINE__);
 			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
@@ -5003,7 +5026,8 @@ dodata:				/* XXX */
 			 * other standard timers.
 			 */
 		case TCPS_FIN_WAIT_2:
-			rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+			rack_timer_cancel(tp, rack,
+			    rack->r_ctl.rc_rcvtime, __LINE__);
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			tcp_twstart(tp);
 			return (1);
@@ -5012,7 +5036,8 @@ dodata:				/* XXX */
 	/*
 	 * Return any desired output.
 	 */
-	if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
+	if ((tp->t_flags & TF_ACKNOW) ||
+	    (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
 		rack->r_wanted_output++;
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c	Mon Sep  2 18:32:08 2019	(r351724)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c	Mon Sep  2 19:04:02 2019	(r351725)
@@ -495,7 +495,8 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, str
 		 * DSACK - add SACK block for dropped range
 		 */
 		if (tp->t_flags & TF_SACK_PERMIT) {
-			tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen);
+			tcp_update_sack_list(tp, th->th_seq,
+			    th->th_seq + todrop);
 			/*
 			 * ACK now, as the next in-sequence segment
 			 * will clear the DSACK block again

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Mon Sep  2 18:32:08 2019	(r351724)
+++ head/sys/netinet/tcp_var.h	Mon Sep  2 19:04:02 2019	(r351725)
@@ -939,6 +939,7 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *);
 tcp_seq	 tcp_new_isn(struct in_conninfo *);
 
 int	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+void	 tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
 void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
 void	 tcp_clean_dsack_blocks(struct tcpcb *tp);
 void	 tcp_clean_sackreport(struct tcpcb *tp);


More information about the svn-src-all mailing list