kern/75122: [PATCH] Incorrect inflight bandwidth calculation on first packet

Dan Nelson dnelson at allantgroup.com
Thu Dec 16 11:06:15 PST 2004


Updated patch including Matt's recommended fix:

Index: tcp_hostcache.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_hostcache.c,v
retrieving revision 1.7
diff -u -p -r1.7 tcp_hostcache.c
--- tcp_hostcache.c	16 Aug 2004 18:32:07 -0000	1.7
+++ tcp_hostcache.c	15 Dec 2004 17:42:02 -0000
@@ -175,7 +175,7 @@ SYSCTL_INT(_net_inet_tcp_hostcache, OID_
      &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
 
 SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
-	CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
+	CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
 	sysctl_tcp_hc_list, "A", "List of all hostcache entries");
 
 
@@ -676,7 +676,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
 				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
 			    msec(hc_entry->rmx_rttvar *
 				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
-			    hc_entry->rmx_bandwidth * 8,
+			    hc_entry->rmx_bandwidth,
 			    hc_entry->rmx_cwnd,
 			    hc_entry->rmx_sendpipe,
 			    hc_entry->rmx_recvpipe,
Index: tcp_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.201.2.4
diff -u -p -r1.201.2.4 tcp_subr.c
--- tcp_subr.c	1 Dec 2004 05:37:28 -0000	1.201.2.4
+++ tcp_subr.c	16 Dec 2004 06:38:48 -0000
@@ -631,7 +631,6 @@ tcp_newtcpcb(inp)
 	tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
-	tp->t_bw_rtttime = ticks;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
@@ -1907,9 +1906,10 @@ tcp_twrespond(struct tcptw *tw, int flag
 void
 tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
 {
-	u_long bw;
+	u_long bw, avgbw;
 	u_long bwnd;
 	int save_ticks;
+	int delta_ticks;
 
 	/*
 	 * If inflight_enable is disabled in the middle of a tcp connection,
@@ -1922,29 +1922,42 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t
 	}
 
 	/*
+	  * Validate the delta time.  If a connection is new or has been idle
+	  * a long time we have to reset the bandwidth calculator.
+	  */
+	save_ticks = ticks;
+	delta_ticks = save_ticks - tp->t_bw_rtttime;
+	if (tp->t_bw_rtttime == 0 || delta_ticks < 0 || delta_ticks > hz * 10) {
+		tp->t_bw_rtttime = ticks;
+		tp->t_bw_rtseq = ack_seq;
+		if (tp->snd_bandwidth == 0)
+			tp->snd_bandwidth = tcp_inflight_min;
+		return;
+	}
+	if (delta_ticks == 0)
+		return;
+	
+	/*
+	 * Sanity check, plus ignore pure window update acks.
+	 */
+	if ((int)(ack_seq - tp->t_bw_rtseq) <= 0)
+		return;
+	
+	/*
 	 * Figure out the bandwidth.  Due to the tick granularity this
 	 * is a very rough number and it MUST be averaged over a fairly
 	 * long period of time.  XXX we need to take into account a link
 	 * that is not using all available bandwidth, but for now our
 	 * slop will ramp us up if this case occurs and the bandwidth later
 	 * increases.
-	 *
-	 * Note: if ticks rollover 'bw' may wind up negative.  We must
-	 * effectively reset t_bw_rtttime for this case.
 	 */
-	save_ticks = ticks;
-	if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
-		return;
 
-	bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
-	    (save_ticks - tp->t_bw_rtttime);
+	bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz / delta_ticks;
+
 	tp->t_bw_rtttime = save_ticks;
 	tp->t_bw_rtseq = ack_seq;
-	if (tp->t_bw_rtttime == 0 || (int)bw < 0)
-		return;
-	bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
 
-	tp->snd_bandwidth = bw;
+	avgbw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
 
 	/*
 	 * Calculate the semi-static bandwidth delay product, plus two maximal
@@ -1975,22 +1988,25 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t
 	 *	    no other choice.
 	 */
 #define USERTT	((tp->t_srtt + tp->t_rttbest) / 2)
-	bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10;
+	bwnd = (int64_t)avgbw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10;
 #undef USERTT
 
 	if (tcp_inflight_debug > 0) {
 		static int ltime;
 		if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
 			ltime = ticks;
-			printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
+			printf("%p bw %lu(%lu,%lu) rttbest %d srtt %d bwnd %ld\n",
 			    tp,
-			    bw,
+			    avgbw, tp->snd_bandwidth, bw,
 			    tp->t_rttbest,
 			    tp->t_srtt,
 			    bwnd
 			);
 		}
 	}
+	
+	tp->snd_bandwidth = avgbw;
+	
 	if ((long)bwnd < tcp_inflight_min)
 		bwnd = tcp_inflight_min;
 	if (bwnd > tcp_inflight_max)
Index: tcp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.107
diff -u -p -r1.107 tcp_usrreq.c
--- tcp_usrreq.c	16 Aug 2004 18:32:07 -0000	1.107
+++ tcp_usrreq.c	16 Dec 2004 06:38:59 -0000
@@ -864,7 +864,6 @@ tcp_connect(tp, nam, td)
 	tp->t_state = TCPS_SYN_SENT;
 	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
 	tp->iss = tcp_new_isn(tp);
-	tp->t_bw_rtseq = tp->iss;
 	tcp_sendseqinit(tp);
 
 	/*
@@ -959,7 +958,6 @@ tcp6_connect(tp, nam, td)
 	tp->t_state = TCPS_SYN_SENT;
 	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
 	tp->iss = tcp_new_isn(tp);
-	tp->t_bw_rtseq = tp->iss;
 	tcp_sendseqinit(tp);
 
 	/*


-- 
	Dan Nelson
	dnelson at allantgroup.com


More information about the freebsd-bugs mailing list