PERFORCE change 111257 for review
Marko Zec
zec at FreeBSD.org
Thu Dec 7 12:20:26 PST 2006
http://perforce.freebsd.org/chv.cgi?CH=111257
Change 111257 by zec at zec_tca51 on 2006/12/07 20:19:28
Virtualize tcp_syncache.
Affected files ...
.. //depot/projects/vimage/src/sys/netinet/tcp_subr.c#6 edit
.. //depot/projects/vimage/src/sys/netinet/tcp_syncache.c#4 edit
.. //depot/projects/vimage/src/sys/netinet/tcp_syncache.h#1 add
.. //depot/projects/vimage/src/sys/netinet/vinet.h#4 edit
Differences ...
==== //depot/projects/vimage/src/sys/netinet/tcp_subr.c#6 (text+ko) ====
@@ -387,6 +387,7 @@
#undef TCP_MINPROTOHDR
tcp_timer_init();
+ syncache_init();
tcp_hc_init();
#ifdef VIMAGE
@@ -394,7 +395,6 @@
return;
#endif
- syncache_init();
tcp_reass_init();
ISN_LOCK_INIT();
callout_init(&isn_callout, CALLOUT_MPSAFE);
==== //depot/projects/vimage/src/sys/netinet/tcp_syncache.c#4 (text+ko) ====
@@ -79,6 +79,7 @@
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -112,51 +113,6 @@
&tcp_syncookiesonly, 0,
"Use only TCP SYN cookies");
-#define SYNCOOKIE_SECRET_SIZE 8 /* dwords */
-#define SYNCOOKIE_LIFETIME 16 /* seconds */
-
-struct syncache {
- TAILQ_ENTRY(syncache) sc_hash;
- struct in_conninfo sc_inc; /* addresses */
- u_long sc_rxttime; /* retransmit time */
- u_int16_t sc_rxmits; /* retransmit counter */
-
- u_int32_t sc_tsreflect; /* timestamp to reflect */
- u_int32_t sc_ts; /* our timestamp to send */
- u_int32_t sc_tsoff; /* ts offset w/ syncookies */
- u_int32_t sc_flowlabel; /* IPv6 flowlabel */
- tcp_seq sc_irs; /* seq from peer */
- tcp_seq sc_iss; /* our ISS */
- struct mbuf *sc_ipopts; /* source route */
-
- u_int16_t sc_peer_mss; /* peer's MSS */
- u_int16_t sc_wnd; /* advertised window */
- u_int8_t sc_ip_ttl; /* IPv4 TTL */
- u_int8_t sc_ip_tos; /* IPv4 TOS */
- u_int8_t sc_requested_s_scale:4,
- sc_requested_r_scale:4;
- u_int8_t sc_flags;
-#define SCF_NOOPT 0x01 /* no TCP options */
-#define SCF_WINSCALE 0x02 /* negotiated window scaling */
-#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */
- /* MSS is implicit */
-#define SCF_UNREACH 0x10 /* icmp unreachable received */
-#define SCF_SIGNATURE 0x20 /* send MD5 digests */
-#define SCF_SACK 0x80 /* send SACK option */
-};
-
-struct syncache_head {
- struct mtx sch_mtx;
- TAILQ_HEAD(sch_head, syncache) sch_bucket;
- struct callout sch_timer;
- int sch_nextc;
- u_int sch_length;
- u_int sch_oddeven;
- u_int32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
- u_int32_t sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
- u_int sch_reseed; /* time_uptime, seconds */
-};
-
static void syncache_drop(struct syncache *, struct syncache_head *);
static void syncache_free(struct syncache *);
static void syncache_insert(struct syncache *, struct syncache_head *);
@@ -183,46 +139,42 @@
#define TCP_SYNCACHE_HASHSIZE 512
#define TCP_SYNCACHE_BUCKETLIMIT 30
-struct tcp_syncache {
- struct syncache_head *hashbase;
- uma_zone_t zone;
- u_int hashsize;
- u_int hashmask;
- u_int bucket_limit;
- u_int cache_count; /* XXX: unprotected */
- u_int cache_limit;
- u_int rexmt_limit;
- u_int hash_secret;
-};
+#ifndef VIMAGE
static struct tcp_syncache tcp_syncache;
+#endif
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache");
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
- &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+ bucketlimit, CTLFLAG_RDTUN,
+ tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache");
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
- &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+ cachelimit, CTLFLAG_RDTUN,
+ tcp_syncache.cache_limit, 0, "Overall entry limit for syncache");
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
- &tcp_syncache.cache_count, 0, "Current number of entries in syncache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+ count, CTLFLAG_RD,
+ tcp_syncache.cache_count, 0, "Current number of entries in syncache");
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
- &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+ hashsize, CTLFLAG_RDTUN,
+ tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable");
-SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
- &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO,
+ rexmtlimit, CTLFLAG_RW,
+ tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
#define SYNCACHE_HASH(inc, mask) \
- ((tcp_syncache.hash_secret ^ \
+ ((V_tcp_syncache.hash_secret ^ \
(inc)->inc_faddr.s_addr ^ \
((inc)->inc_faddr.s_addr >> 16) ^ \
(inc)->inc_fport ^ (inc)->inc_lport) & mask)
#define SYNCACHE_HASH6(inc, mask) \
- ((tcp_syncache.hash_secret ^ \
+ ((V_tcp_syncache.hash_secret ^ \
(inc)->inc6_faddr.s6_addr32[0] ^ \
(inc)->inc6_faddr.s6_addr32[3] ^ \
(inc)->inc_fport ^ (inc)->inc_lport) & mask)
@@ -258,58 +210,66 @@
static void
syncache_free(struct syncache *sc)
{
+ INIT_VNET_INET(curvnetb);
+
if (sc->sc_ipopts)
(void) m_free(sc->sc_ipopts);
- uma_zfree(tcp_syncache.zone, sc);
+ uma_zfree(V_tcp_syncache.zone, sc);
}
void
syncache_init(void)
{
+ INIT_VNET_INET(curvnetb);
int i;
- tcp_syncache.cache_count = 0;
- tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
- tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
- tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
- tcp_syncache.hash_secret = arc4random();
+ V_tcp_syncache.cache_count = 0;
+ V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
+ V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
+ V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
+ V_tcp_syncache.hash_secret = arc4random();
TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
- &tcp_syncache.hashsize);
+ &V_tcp_syncache.hashsize);
TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
- &tcp_syncache.bucket_limit);
- if (!powerof2(tcp_syncache.hashsize) || tcp_syncache.hashsize == 0) {
+ &V_tcp_syncache.bucket_limit);
+ if (!powerof2(V_tcp_syncache.hashsize) ||
+ V_tcp_syncache.hashsize == 0) {
printf("WARNING: syncache hash size is not a power of 2.\n");
- tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
+ V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
}
- tcp_syncache.hashmask = tcp_syncache.hashsize - 1;
+ V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1;
/* Set limits. */
- tcp_syncache.cache_limit =
- tcp_syncache.hashsize * tcp_syncache.bucket_limit;
+ V_tcp_syncache.cache_limit =
+ V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit;
TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
- &tcp_syncache.cache_limit);
+ &V_tcp_syncache.cache_limit);
/* Allocate the hash table. */
- MALLOC(tcp_syncache.hashbase, struct syncache_head *,
- tcp_syncache.hashsize * sizeof(struct syncache_head),
+ MALLOC(V_tcp_syncache.hashbase, struct syncache_head *,
+ V_tcp_syncache.hashsize * sizeof(struct syncache_head),
M_SYNCACHE, M_WAITOK | M_ZERO);
/* Initialize the hash buckets. */
- for (i = 0; i < tcp_syncache.hashsize; i++) {
- TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket);
- mtx_init(&tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
+ for (i = 0; i < V_tcp_syncache.hashsize; i++) {
+#ifdef VIMAGE
+ V_tcp_syncache.hashbase[i].sch_vnetb = curvnetb;
+#endif
+ TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
+ mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
NULL, MTX_DEF);
- callout_init_mtx(&tcp_syncache.hashbase[i].sch_timer,
- &tcp_syncache.hashbase[i].sch_mtx, 0);
- tcp_syncache.hashbase[i].sch_length = 0;
+ callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
+ &V_tcp_syncache.hashbase[i].sch_mtx, 0);
+ V_tcp_syncache.hashbase[i].sch_length = 0;
}
/* Create the syncache entry zone. */
- tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
+ /* XXX one zone for all vnets should do fine - revisit!!! */
+ V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
+ uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit);
}
/*
@@ -319,7 +279,7 @@
static void
syncache_insert(struct syncache *sc, struct syncache_head *sch)
{
- INIT_VNET_INET(curvnetb);
+ INIT_VNET_INET(sch->sch_vnetb);
struct syncache *sc2;
SCH_LOCK(sch);
@@ -328,7 +288,7 @@
* Make sure that we don't overflow the per-bucket limit.
* If the bucket is full, toss the oldest element.
*/
- if (sch->sch_length >= tcp_syncache.bucket_limit) {
+ if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
("sch->sch_length incorrect"));
sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
@@ -345,7 +305,7 @@
SCH_UNLOCK(sch);
- tcp_syncache.cache_count++;
+ V_tcp_syncache.cache_count++;
V_tcpstat.tcps_sc_added++;
}
@@ -356,6 +316,7 @@
static void
syncache_drop(struct syncache *sc, struct syncache_head *sch)
{
+ INIT_VNET_INET(sch->sch_vnetb);
SCH_LOCK_ASSERT(sch);
@@ -363,7 +324,7 @@
sch->sch_length--;
syncache_free(sc);
- tcp_syncache.cache_count--;
+ V_tcp_syncache.cache_count--;
}
/*
@@ -374,10 +335,10 @@
static void
syncache_timer(void *xsch)
{
- INIT_VNET_INET(curvnetb); /* XXX this can't work !!! */
struct syncache_head *sch = (struct syncache_head *)xsch;
struct syncache *sc, *nsc;
int tick = ticks;
+ INIT_VNET_INET(sch->sch_vnetb);
/* NB: syncache_head has already been locked by the callout. */
SCH_LOCK_ASSERT(sch);
@@ -397,7 +358,7 @@
continue;
}
- if (sc->sc_rxmits > tcp_syncache.rexmt_limit) {
+ if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) {
syncache_drop(sc, sch);
V_tcpstat.tcps_sc_stale++;
continue;
@@ -419,13 +380,14 @@
struct syncache *
syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
{
+ INIT_VNET_INET(curvnetb);
struct syncache *sc;
struct syncache_head *sch;
#ifdef INET6
if (inc->inc_isipv6) {
- sch = &tcp_syncache.hashbase[
- SYNCACHE_HASH6(inc, tcp_syncache.hashmask)];
+ sch = &V_tcp_syncache.hashbase[
+ SYNCACHE_HASH6(inc, V_tcp_syncache.hashmask)];
*schp = sch;
SCH_LOCK(sch);
@@ -438,8 +400,8 @@
} else
#endif
{
- sch = &tcp_syncache.hashbase[
- SYNCACHE_HASH(inc, tcp_syncache.hashmask)];
+ sch = &V_tcp_syncache.hashbase[
+ SYNCACHE_HASH(inc, V_tcp_syncache.hashmask)];
*schp = sch;
SCH_LOCK(sch);
@@ -795,7 +757,7 @@
/* Pull out the entry to unlock the bucket row. */
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
- tcp_syncache.cache_count--;
+ V_tcp_syncache.cache_count--;
SCH_UNLOCK(sch);
}
@@ -933,7 +895,7 @@
goto done;
}
- sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
+ sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
/*
* The zone allocator couldn't provide more entries.
@@ -943,7 +905,7 @@
V_tcpstat.tcps_sc_zonefail++;
sc = TAILQ_LAST(&sch->sch_bucket, sch_head);
syncache_drop(sc, sch);
- sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
+ sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
if (tcp_syncookies) {
bzero(&scs, sizeof(scs));
==== //depot/projects/vimage/src/sys/netinet/vinet.h#4 (text+ko) ====
@@ -48,6 +48,7 @@
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_hostcache.h>
+#include <netinet/tcp_syncache.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
@@ -68,6 +69,7 @@
struct tcpstat _tcpstat; /* tcp statistics */
TAILQ_HEAD(, tcptw) _twq_2msl;
struct tcp_hostcache _tcp_hostcache;
+ struct tcp_syncache _tcp_syncache;
struct inpcbhead _udb;
struct inpcbinfo _udbinfo;
@@ -113,6 +115,7 @@
#define V_tcpstat VNET_INET(tcpstat)
#define V_twq_2msl VNET_INET(twq_2msl)
#define V_tcp_hostcache VNET_INET(tcp_hostcache)
+#define V_tcp_syncache VNET_INET(tcp_syncache)
#define V_udb VNET_INET(udb)
#define V_udbinfo VNET_INET(udbinfo)
More information about the p4-projects
mailing list