FreeBSD 7 TCP syncache fix: request for testers
Mike Silbersack
silby at silby.com
Tue Jul 10 05:47:42 UTC 2007
I've found one of the causes of the network instability of FreeBSD 7; the
tcp syncache fails to retransmit SYN-ACK packets. This causes interesting
problems when packet loss is experienced during connection setup. The
symptoms that I have witnessed are twofold:
1. If the third part of the 3WHS is lost, the client will believe that
the connection is in the ESTABLISHED state, while the server will still
have the connection in the syncache.
2. Subsequently, the above syncache entry will stay stuck in the syncache
forever. If you attempt to re-use that same 4-tuple, the syncache will
ack the new SYN with the old sequence number.
Anyway, the attached patch simplifies the syncache structure a bit and
makes it retransmit properly. I'd appreciate testing from anyone who has
experienced TCP problems with FreeBSD 7, as well as anyone who is pushing
significant traffic through FreeBSD 7.
I'm not interested in FreeBSD 6 testers, since the FreeBSD 6 syncache has
a different structure and is not affected by this bug.
FWIW, here's how to prove the existence of the bug. Install nemesis from
ports, then use it to send SYN packets at your FreeBSD 7 machine. As of
now, you should see only one SYN-ACK reply, and you should also notice
that the sysctl net.inet.tcp.syncache.count goes up, but does not come
back down.
Once you have applied the patch, you should see the behavior demonstrated
below:
>From your client machine: (nemesis will pick an IP to spoof, change that
if you wish.)
nemesis tcp -y 80 -D 10.1.1.6
TCP Packet Injected
On your FreeBSD 7 machine:
patrocles# tcpdump -n port 80
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on nve0, link-type EN10MB (Ethernet), capture size 96 bytes
23:49:02.075118 IP 133.120.85.92.48922 > 10.1.1.6.80: S 1519649939:1519649939(0) win 4096
23:49:02.075165 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 1519649940 win 65535 <mss 1460>
23:49:05.164195 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 1519649940 win 65535 <mss 1460>
23:49:11.264245 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 1519649940 win 65535 <mss 1460>
23:49:23.364342 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 1519649940 win 65535 <mss 1460>
Thanks,
Mike "Silby" Silbersack
-------------- next part --------------
--- /usr/src/sys.old/netinet/tcp_syncache.c 2007-06-24 20:17:31.000000000 -0500
+++ /usr/src/sys/netinet/tcp_syncache.c 2007-07-09 00:46:18.000000000 -0500
@@ -149,7 +150,6 @@
struct mtx sch_mtx;
TAILQ_HEAD(sch_head, syncache) sch_bucket;
struct callout sch_timer;
- int sch_nextc;
u_int sch_length;
u_int sch_oddeven;
u_int32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
@@ -240,16 +240,10 @@
#define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
-#define SYNCACHE_TIMEOUT(sc, sch, co) do { \
+#define SYNCACHE_TIMEOUT(sc) do { \
(sc)->sc_rxmits++; \
(sc)->sc_rxttime = ticks + \
TCPTV_RTOBASE * tcp_backoff[(sc)->sc_rxmits - 1]; \
- if ((sch)->sch_nextc > (sc)->sc_rxttime) \
- (sch)->sch_nextc = (sc)->sc_rxttime; \
- if (!TAILQ_EMPTY(&(sch)->sch_bucket) && !(co)) \
- callout_reset(&(sch)->sch_timer, \
- (sch)->sch_nextc - ticks, \
- syncache_timer, (void *)(sch)); \
} while (0)
#define SCH_LOCK(sch) mtx_lock(&(sch)->sch_mtx)
@@ -275,6 +269,7 @@
syncache_init(void)
{
int i;
+ struct syncache_head *sch;
tcp_syncache.cache_count = 0;
tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
@@ -317,6 +312,17 @@
tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
+
+ /*
+ * Start the syncache head timers running. They each run ten times
+ * a second, and are spread out so that they are not all running on
+ * the same clock tick.
+ */
+ for (i = 0; i < tcp_syncache.hashsize; i++) {
+ sch = &tcp_syncache.hashbase[i];
+ callout_reset(&(sch)->sch_timer, i * (hz / 10),
+ syncache_timer, (void *)(sch));
+ }
}
/*
@@ -346,8 +352,8 @@
TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
- /* Reinitialize the bucket row's timer. */
- SYNCACHE_TIMEOUT(sc, sch, 1);
+ /* Set the retransmit timer for this socket. */
+ SYNCACHE_TIMEOUT(sc);
SCH_UNLOCK(sch);
@@ -398,8 +404,6 @@
* host does the SYN/ACK->ACK.
*/
if (sc->sc_rxttime >= tick) {
- if (sc->sc_rxttime < sch->sch_nextc)
- sch->sch_nextc = sc->sc_rxttime;
continue;
}
@@ -416,11 +420,10 @@
(void) syncache_respond(sc);
tcpstat.tcps_sc_retransmitted++;
- SYNCACHE_TIMEOUT(sc, sch, 0);
+ SYNCACHE_TIMEOUT(sc);
}
- if (!TAILQ_EMPTY(&(sch)->sch_bucket))
- callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
- syncache_timer, (void *)(sch));
+ callout_reset(&(sch)->sch_timer, hz / 10,
+ syncache_timer, (void *)(sch));
}
/*
@@ -1007,7 +1010,7 @@
("%s: label not initialized", __func__));
#endif
if (syncache_respond(sc) == 0) {
- SYNCACHE_TIMEOUT(sc, sch, 1);
+ SYNCACHE_TIMEOUT(sc);
tcpstat.tcps_sndacks++;
tcpstat.tcps_sndtotal++;
}
More information about the freebsd-current
mailing list