kern/75122: [PATCH] Incorrect inflight bandwidth calculation on
first packet
Dan Nelson
dnelson at allantgroup.com
Mon Feb 7 23:20:17 PST 2005
The following reply was made to PR kern/75122; it has been noted by GNATS.
From: Dan Nelson <dnelson at allantgroup.com>
To: Uwe Doering <gemini at geminix.org>
Cc: FreeBSD-gnats-submit at FreeBSD.org, freebsd-bugs at FreeBSD.org
Subject: Re: kern/75122: [PATCH] Incorrect inflight bandwidth calculation on first packet
Date: Tue, 8 Feb 2005 01:15:22 -0600
In the last episode (Dec 22), Dan Nelson said:
> I am pretty sure only useful acks get to this point. I've changed that
> bit of code to just be
>
> + /*
> + * Ignore pure window update acks.
> + */
> + if (ack_seq == tp->t_bw_rtseq)
> + return;
>
> and added another check for negative sequences that increments a sysctl
> counter. So far netstat -s has counted 567 out-of-order packets but my
> counter is still at 0.
And after 40 days of uptime on my mail/squid servers, the counter is
still at zero, so the code works perfectly for me. It'd be nice if
this was committed before 5.4. Final patch follows:
Index: tcp_hostcache.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_hostcache.c,v
retrieving revision 1.7.2.1
diff -u -p -r1.7.2.1 tcp_hostcache.c
--- tcp_hostcache.c 31 Jan 2005 23:26:36 -0000 1.7.2.1
+++ tcp_hostcache.c 8 Feb 2005 07:09:31 -0000
@@ -175,7 +175,7 @@ SYSCTL_INT(_net_inet_tcp_hostcache, OID_
&tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
- CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
+ CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
sysctl_tcp_hc_list, "A", "List of all hostcache entries");
@@ -676,7 +676,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
msec(hc_entry->rmx_rttvar *
(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
- hc_entry->rmx_bandwidth * 8,
+ hc_entry->rmx_bandwidth,
hc_entry->rmx_cwnd,
hc_entry->rmx_sendpipe,
hc_entry->rmx_recvpipe,
Index: tcp_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.201.2.12
diff -u -p -r1.201.2.12 tcp_subr.c
--- tcp_subr.c 31 Jan 2005 23:26:36 -0000 1.201.2.12
+++ tcp_subr.c 8 Feb 2005 07:09:33 -0000
@@ -628,7 +632,6 @@ tcp_newtcpcb(inp)
tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->t_rcvtime = ticks;
- tp->t_bw_rtttime = ticks;
/*
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
* because the socket may be bound to an IPv6 wildcard address,
@@ -1934,9 +1937,10 @@ tcp_twrespond(struct tcptw *tw, int flag
void
tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
{
- u_long bw;
+ u_long bw, avgbw;
u_long bwnd;
int save_ticks;
+ int delta_ticks;
INP_LOCK_ASSERT(tp->t_inpcb);
@@ -1951,29 +1955,50 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t
}
/*
+ * Validate the delta time. If a connection is new or has been idle
+ * a long time we have to reset the bandwidth calculator.
+ */
+ save_ticks = ticks;
+ delta_ticks = save_ticks - tp->t_bw_rtttime;
+ if (tp->t_bw_rtttime == 0 || delta_ticks < 0 || delta_ticks > hz * 10) {
+ tp->t_bw_rtttime = ticks;
+ tp->t_bw_rtseq = ack_seq;
+ if (tp->snd_bandwidth == 0)
+ tp->snd_bandwidth = tcp_inflight_min;
+ return;
+ }
+ if (delta_ticks == 0)
+ return;
+
+ /*
+ * Ignore pure window update acks.
+ */
+ if (ack_seq == tp->t_bw_rtseq)
+ return;
+
+ /* Sanity check */
+ if (SEQ_LT(ack_seq, tp->t_bw_rtseq)) {
+ if (tcp_inflight_debug > 0) {
+ printf ("%p invalid sequence: %u vs %u",
+ tp, ack_seq, tp->t_bw_rtseq);
+ }
+ }
+
+ /*
* Figure out the bandwidth. Due to the tick granularity this
* is a very rough number and it MUST be averaged over a fairly
* long period of time. XXX we need to take into account a link
* that is not using all available bandwidth, but for now our
* slop will ramp us up if this case occurs and the bandwidth later
* increases.
- *
- * Note: if ticks rollover 'bw' may wind up negative. We must
- * effectively reset t_bw_rtttime for this case.
*/
- save_ticks = ticks;
- if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
- return;
- bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
- (save_ticks - tp->t_bw_rtttime);
+ bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz / delta_ticks;
+
tp->t_bw_rtttime = save_ticks;
tp->t_bw_rtseq = ack_seq;
- if (tp->t_bw_rtttime == 0 || (int)bw < 0)
- return;
- bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
- tp->snd_bandwidth = bw;
+ avgbw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
/*
* Calculate the semi-static bandwidth delay product, plus two maximal
@@ -2004,22 +2030,25 @@ tcp_xmit_bandwidth_limit(struct tcpcb *t
* no other choice.
*/
#define USERTT ((tp->t_srtt + tp->t_rttbest) / 2)
- bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10;
+ bwnd = (int64_t)avgbw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10;
#undef USERTT
if (tcp_inflight_debug > 0) {
static int ltime;
if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
ltime = ticks;
- printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
+ printf("%p bw %lu(%lu,%lu) rttbest %d srtt %d bwnd %ld\n",
tp,
- bw,
+ avgbw, tp->snd_bandwidth, bw,
tp->t_rttbest,
tp->t_srtt,
bwnd
);
}
}
+
+ tp->snd_bandwidth = avgbw;
+
if ((long)bwnd < tcp_inflight_min)
bwnd = tcp_inflight_min;
if (bwnd > tcp_inflight_max)
Index: tcp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.107.2.1
diff -u -p -r1.107.2.1 tcp_usrreq.c
--- tcp_usrreq.c 31 Jan 2005 23:26:37 -0000 1.107.2.1
+++ tcp_usrreq.c 8 Feb 2005 07:09:34 -0000
@@ -864,7 +864,6 @@ tcp_connect(tp, nam, td)
tp->t_state = TCPS_SYN_SENT;
callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
tp->iss = tcp_new_isn(tp);
- tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
/*
@@ -959,7 +958,6 @@ tcp6_connect(tp, nam, td)
tp->t_state = TCPS_SYN_SENT;
callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
tp->iss = tcp_new_isn(tp);
- tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
/*
--
Dan Nelson
dnelson at allantgroup.com
More information about the freebsd-bugs
mailing list