PERFORCE change 111893 for review
Marko Zec
zec at FreeBSD.org
Mon Dec 18 05:55:30 PST 2006
http://perforce.freebsd.org/chv.cgi?CH=111893
Change 111893 by zec at zec_tpx32 on 2006/12/18 13:54:41
First pass on virtualizing the IP datagram reassembly process.
Affected files ...
.. //depot/projects/vimage/src/sys/netinet/in_var.h#3 edit
.. //depot/projects/vimage/src/sys/netinet/ip_input.c#5 edit
.. //depot/projects/vimage/src/sys/netinet/vinet.h#6 edit
Differences ...
==== //depot/projects/vimage/src/sys/netinet/in_var.h#3 (text+ko) ====
@@ -92,16 +92,25 @@
extern u_long in_ifaddrhmask; /* mask for hash table */
#endif
+/*
+ * IP datagram reassembly.
+ */
+#define IPREASS_NHASH_LOG2 6
+#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
+#define IPREASS_HMASK (IPREASS_NHASH - 1)
+#define IPREASS_HASH(x,y) \
+ (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr)
+ * corresponding to one of our IP addresses (in_addr).
+ */
#define INADDR_NHASH_LOG2 9
#define INADDR_NHASH (1 << INADDR_NHASH_LOG2)
#define INADDR_HASHVAL(x) fnv_32_buf((&(x)), sizeof(x), FNV1_32_INIT)
#define INADDR_HASH(x) \
(&V_in_ifaddrhashtbl[INADDR_HASHVAL(x) & V_in_ifaddrhmask])
-/*
- * Macro for finding the internet address structure (in_ifaddr)
- * corresponding to one of our IP addresses (in_addr).
- */
#define INADDR_TO_IFADDR(addr, ia) \
/* struct in_addr addr; */ \
/* struct in_ifaddr *ia; */ \
==== //depot/projects/vimage/src/sys/netinet/ip_input.c#5 (text+ko) ====
@@ -160,18 +160,14 @@
SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
-/*
- * IP datagram reassembly.
- */
-#define IPREASS_NHASH_LOG2 6
-#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
-#define IPREASS_HMASK (IPREASS_NHASH - 1)
-#define IPREASS_HASH(x,y) \
- (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
-
+static struct mtx ipqlock;
+#ifndef VIMAGE
+static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
static uma_zone_t ipq_zone;
-static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
-static struct mtx ipqlock;
+static int nipq = 0; /* Total # of reass queues */
+static int maxnipq; /* Administrative limit on # reass queues. */
+static int maxfragsperpacket;
+#endif
#define IPQ_LOCK() mtx_lock(&ipqlock)
#define IPQ_UNLOCK() mtx_unlock(&ipqlock)
@@ -181,14 +177,12 @@
static void maxnipq_update(void);
static void ipq_zone_change(void *);
-static int maxnipq; /* Administrative limit on # reass queues. */
-static int nipq = 0; /* Total # of reass queues */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &nipq, 0,
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, fragpackets,
+ CTLFLAG_RD, nipq, 0,
"Current number of IPv4 fragment reassembly queue entries");
-static int maxfragsperpacket;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
- &maxfragsperpacket, 0,
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, maxfragsperpacket,
+ CTLFLAG_RW, maxfragsperpacket, 0,
"Maximum number of IPv4 fragments allowed per packet");
struct callout ipport_tick_callout;
@@ -262,6 +256,16 @@
V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR,
&V_in_ifaddrhmask);
+ /* Initialize IP reassembly queue. */
+ IPQ_LOCK_INIT();
+ for (i = 0; i < IPREASS_NHASH; i++)
+ TAILQ_INIT(&V_ipq[i]);
+ V_maxnipq = nmbclusters / 32;
+ V_maxfragsperpacket = 16;
+ V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
+ NULL, UMA_ALIGN_PTR, 0);
+ maxnipq_update();
+
#ifdef VIMAGE
/*
* Skip global initialization stuff
@@ -298,16 +302,6 @@
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
- /* Initialize IP reassembly queue. */
- IPQ_LOCK_INIT();
- for (i = 0; i < IPREASS_NHASH; i++)
- TAILQ_INIT(&ipq[i]);
- maxnipq = nmbclusters / 32;
- maxfragsperpacket = 16;
- ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
- NULL, UMA_ALIGN_PTR, 0);
- maxnipq_update();
-
/* Start ipport_tick. */
callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
ipport_tick(NULL);
@@ -723,32 +717,34 @@
static void
maxnipq_update(void)
{
+ INIT_VNET_INET(curvnetb);
/*
* -1 for unlimited allocation.
*/
- if (maxnipq < 0)
- uma_zone_set_max(ipq_zone, 0);
+ if (V_maxnipq < 0)
+ uma_zone_set_max(V_ipq_zone, 0);
/*
* Positive number for specific bound.
*/
- if (maxnipq > 0)
- uma_zone_set_max(ipq_zone, maxnipq);
+ if (V_maxnipq > 0)
+ uma_zone_set_max(V_ipq_zone, V_maxnipq);
/*
* Zero specifies no further fragment queue allocation -- set the
* bound very low, but rely on implementation elsewhere to actually
* prevent allocation and reclaim current queues.
*/
- if (maxnipq == 0)
- uma_zone_set_max(ipq_zone, 1);
+ if (V_maxnipq == 0)
+ uma_zone_set_max(V_ipq_zone, 1);
}
static void
ipq_zone_change(void *tag)
{
+ INIT_VNET_INET(curvnetb);
- if (maxnipq > 0 && maxnipq < (nmbclusters / 32)) {
- maxnipq = nmbclusters / 32;
+ if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
+ V_maxnipq = nmbclusters / 32;
maxnipq_update();
}
}
@@ -756,9 +752,10 @@
static int
sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
{
+ INIT_VNET_INET(curvnetb);
int error, i;
- i = maxnipq;
+ i = V_maxnipq;
error = sysctl_handle_int(oidp, &i, 0, req);
if (error || !req->newptr)
return (error);
@@ -769,7 +766,7 @@
*/
if (i < -1)
return (EINVAL);
- maxnipq = i;
+ V_maxnipq = i;
maxnipq_update();
return (0);
}
@@ -802,7 +799,7 @@
u_short hash;
/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
- if (maxnipq == 0 || maxfragsperpacket == 0) {
+ if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
V_ipstat.ips_fragments++;
V_ipstat.ips_fragdropped++;
m_freem(m);
@@ -813,7 +810,7 @@
hlen = ip->ip_hl << 2;
hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
- head = &ipq[hash];
+ head = &V_ipq[hash];
IPQ_LOCK();
/*
@@ -836,7 +833,7 @@
* Attempt to trim the number of allocated fragment queues if it
* exceeds the administrative limit.
*/
- if ((nipq > maxnipq) && (maxnipq > 0)) {
+ if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
/*
* drop something from the tail of the current queue
* before proceeding further
@@ -844,11 +841,11 @@
struct ipq *q = TAILQ_LAST(head, ipqhead);
if (q == NULL) { /* gak */
for (i = 0; i < IPREASS_NHASH; i++) {
- struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
+ struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
if (r) {
V_ipstat.ips_fragtimeout +=
r->ipq_nfrags;
- ip_freef(&ipq[i], r);
+ ip_freef(&V_ipq[i], r);
break;
}
}
@@ -898,19 +895,19 @@
* If first fragment to arrive, create a reassembly queue.
*/
if (fp == NULL) {
- fp = uma_zalloc(ipq_zone, M_NOWAIT);
+ fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
if (fp == NULL)
goto dropfrag;
#ifdef MAC
if (mac_init_ipq(fp, M_NOWAIT) != 0) {
- uma_zfree(ipq_zone, fp);
+ uma_zfree(V_ipq_zone, fp);
fp = NULL;
goto dropfrag;
}
mac_create_ipq(m, fp);
#endif
TAILQ_INSERT_HEAD(head, fp, ipq_list);
- nipq++;
+ V_nipq++;
fp->ipq_nfrags = 1;
fp->ipq_ttl = IPFRAGTTL;
fp->ipq_p = ip->ip_p;
@@ -1012,7 +1009,7 @@
next = 0;
for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
if (GETIP(q)->ip_off != next) {
- if (fp->ipq_nfrags > maxfragsperpacket) {
+ if (fp->ipq_nfrags > V_maxfragsperpacket) {
V_ipstat.ips_fragdropped += fp->ipq_nfrags;
ip_freef(head, fp);
}
@@ -1022,7 +1019,7 @@
}
/* Make sure the last packet didn't have the IP_MF flag */
if (p->m_flags & M_FRAG) {
- if (fp->ipq_nfrags > maxfragsperpacket) {
+ if (fp->ipq_nfrags > V_maxfragsperpacket) {
V_ipstat.ips_fragdropped += fp->ipq_nfrags;
ip_freef(head, fp);
}
@@ -1078,8 +1075,8 @@
ip->ip_src = fp->ipq_src;
ip->ip_dst = fp->ipq_dst;
TAILQ_REMOVE(head, fp, ipq_list);
- nipq--;
- uma_zfree(ipq_zone, fp);
+ V_nipq--;
+ uma_zfree(V_ipq_zone, fp);
m->m_len += (ip->ip_hl << 2);
m->m_data -= (ip->ip_hl << 2);
/* some debugging cruft by sklower, below, will go away soon */
@@ -1110,6 +1107,7 @@
struct ipqhead *fhp;
struct ipq *fp;
{
+ INIT_VNET_INET(curvnetb);
register struct mbuf *q;
IPQ_LOCK_ASSERT();
@@ -1120,8 +1118,8 @@
m_freem(q);
}
TAILQ_REMOVE(fhp, fp, ipq_list);
- uma_zfree(ipq_zone, fp);
- nipq--;
+ uma_zfree(V_ipq_zone, fp);
+ V_nipq--;
}
/*
@@ -1139,14 +1137,14 @@
VNETB_ITERLOOP_BEGIN();
INIT_VNET_INET(vnetb_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
- for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
+ for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
struct ipq *fpp;
fpp = fp;
fp = TAILQ_NEXT(fp, ipq_list);
if(--fpp->ipq_ttl == 0) {
V_ipstat.ips_fragtimeout += fpp->ipq_nfrags;
- ip_freef(&ipq[i], fpp);
+ ip_freef(&V_ipq[i], fpp);
}
}
}
@@ -1155,12 +1153,12 @@
* (due to the limit being lowered), drain off
* enough to get down to the new limit.
*/
- if (maxnipq >= 0 && nipq > maxnipq) {
+ if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
for (i = 0; i < IPREASS_NHASH; i++) {
- while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
+ while (V_nipq > V_maxnipq && !TAILQ_EMPTY(&V_ipq[i])) {
V_ipstat.ips_fragdropped +=
- TAILQ_FIRST(&ipq[i])->ipq_nfrags;
- ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
+ TAILQ_FIRST(&V_ipq[i])->ipq_nfrags;
+ ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
}
}
}
@@ -1180,10 +1178,10 @@
VNETB_ITERLOOP_BEGIN();
INIT_VNET_INET(vnetb_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
- while(!TAILQ_EMPTY(&ipq[i])) {
+ while(!TAILQ_EMPTY(&V_ipq[i])) {
V_ipstat.ips_fragdropped +=
- TAILQ_FIRST(&ipq[i])->ipq_nfrags;
- ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
+ TAILQ_FIRST(&V_ipq[i])->ipq_nfrags;
+ ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
}
}
VNETB_ITERLOOP_END();
==== //depot/projects/vimage/src/sys/netinet/vinet.h#6 (text+ko) ====
@@ -65,6 +65,12 @@
int _ipforwarding;
+ TAILQ_HEAD(ipqhead, ipq) _ipq[IPREASS_NHASH];
+ uma_zone_t _ipq_zone;
+ int _nipq; /* Total # of reass queues */
+ int _maxnipq; /* Admin. limit on # reass queues. */
+ int _maxfragsperpacket;
+
struct inpcbhead _tcb; /* head of queue of active tcpcb's */
struct inpcbinfo _tcbinfo;
struct tcpstat _tcpstat; /* tcp statistics */
@@ -112,6 +118,12 @@
#define V_ipforwarding VNET_INET(ipforwarding)
+#define V_ipq VNET_INET(ipq)
+#define V_ipq_zone VNET_INET(ipq_zone)
+#define V_nipq VNET_INET(nipq)
+#define V_maxnipq VNET_INET(maxnipq)
+#define V_maxfragsperpacket VNET_INET(maxfragsperpacket)
+
#define V_tcb VNET_INET(tcb)
#define V_tcbinfo VNET_INET(tcbinfo)
#define V_tcpstat VNET_INET(tcpstat)
More information about the p4-projects
mailing list