PERFORCE change 111893 for review

Marko Zec zec at FreeBSD.org
Mon Dec 18 05:55:30 PST 2006


http://perforce.freebsd.org/chv.cgi?CH=111893

Change 111893 by zec at zec_tpx32 on 2006/12/18 13:54:41

	First pass on virtualizing the IP datagram reassembly process.

Affected files ...

.. //depot/projects/vimage/src/sys/netinet/in_var.h#3 edit
.. //depot/projects/vimage/src/sys/netinet/ip_input.c#5 edit
.. //depot/projects/vimage/src/sys/netinet/vinet.h#6 edit

Differences ...

==== //depot/projects/vimage/src/sys/netinet/in_var.h#3 (text+ko) ====

@@ -92,16 +92,25 @@
 extern	u_long in_ifaddrhmask;			/* mask for hash table */
 #endif
 
+/*
+ * IP datagram reassembly.
+ */
+#define IPREASS_NHASH_LOG2      6
+#define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
+#define IPREASS_HMASK           (IPREASS_NHASH - 1)
+#define IPREASS_HASH(x,y) \
+        (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr)
+ * corresponding to one of our IP addresses (in_addr).
+ */
 #define INADDR_NHASH_LOG2       9
 #define INADDR_NHASH		(1 << INADDR_NHASH_LOG2)
 #define INADDR_HASHVAL(x)	fnv_32_buf((&(x)), sizeof(x), FNV1_32_INIT)
 #define INADDR_HASH(x) \
 	(&V_in_ifaddrhashtbl[INADDR_HASHVAL(x) & V_in_ifaddrhmask])
 
-/*
- * Macro for finding the internet address structure (in_ifaddr)
- * corresponding to one of our IP addresses (in_addr).
- */
 #define INADDR_TO_IFADDR(addr, ia) \
 	/* struct in_addr addr; */ \
 	/* struct in_ifaddr *ia; */ \

==== //depot/projects/vimage/src/sys/netinet/ip_input.c#5 (text+ko) ====

@@ -160,18 +160,14 @@
 SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
     ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
 
-/*
- * IP datagram reassembly.
- */
-#define IPREASS_NHASH_LOG2      6
-#define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
-#define IPREASS_HMASK           (IPREASS_NHASH - 1)
-#define IPREASS_HASH(x,y) \
-	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
-
+static struct mtx ipqlock;
+#ifndef VIMAGE
+static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
 static uma_zone_t ipq_zone;
-static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
-static struct mtx ipqlock;
+static int	nipq = 0;	/* Total # of reass queues */
+static int	maxnipq;	/* Administrative limit on # reass queues. */
+static int	maxfragsperpacket;
+#endif
 
 #define	IPQ_LOCK()	mtx_lock(&ipqlock)
 #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
@@ -181,14 +177,12 @@
 static void	maxnipq_update(void);
 static void	ipq_zone_change(void *);
 
-static int	maxnipq;	/* Administrative limit on # reass queues. */
-static int	nipq = 0;	/* Total # of reass queues */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &nipq, 0,
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, fragpackets,
+	CTLFLAG_RD, nipq, 0,
 	"Current number of IPv4 fragment reassembly queue entries");
 
-static int	maxfragsperpacket;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
-	&maxfragsperpacket, 0,
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, maxfragsperpacket,
+	CTLFLAG_RW, maxfragsperpacket, 0,
 	"Maximum number of IPv4 fragments allowed per packet");
 
 struct callout	ipport_tick_callout;
@@ -262,6 +256,16 @@
 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR,
 				      &V_in_ifaddrhmask);
 
+	/* Initialize IP reassembly queue. */
+	IPQ_LOCK_INIT();
+	for (i = 0; i < IPREASS_NHASH; i++)
+	    TAILQ_INIT(&V_ipq[i]);
+	V_maxnipq = nmbclusters / 32;
+	V_maxfragsperpacket = 16;
+	V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
+	    NULL, UMA_ALIGN_PTR, 0);
+	maxnipq_update();
+
 #ifdef VIMAGE
 	/*
 	 * Skip global initialization stuff
@@ -298,16 +302,6 @@
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
-	/* Initialize IP reassembly queue. */
-	IPQ_LOCK_INIT();
-	for (i = 0; i < IPREASS_NHASH; i++)
-	    TAILQ_INIT(&ipq[i]);
-	maxnipq = nmbclusters / 32;
-	maxfragsperpacket = 16;
-	ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
-	    NULL, UMA_ALIGN_PTR, 0);
-	maxnipq_update();
-
 	/* Start ipport_tick. */
 	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
 	ipport_tick(NULL);
@@ -723,32 +717,34 @@
 static void
 maxnipq_update(void)
 {
+	INIT_VNET_INET(curvnetb);
 
 	/*
 	 * -1 for unlimited allocation.
 	 */
-	if (maxnipq < 0)
-		uma_zone_set_max(ipq_zone, 0);
+	if (V_maxnipq < 0)
+		uma_zone_set_max(V_ipq_zone, 0);
 	/*
 	 * Positive number for specific bound.
 	 */
-	if (maxnipq > 0)
-		uma_zone_set_max(ipq_zone, maxnipq);
+	if (V_maxnipq > 0)
+		uma_zone_set_max(V_ipq_zone, V_maxnipq);
 	/*
 	 * Zero specifies no further fragment queue allocation -- set the
 	 * bound very low, but rely on implementation elsewhere to actually
 	 * prevent allocation and reclaim current queues.
 	 */
-	if (maxnipq == 0)
-		uma_zone_set_max(ipq_zone, 1);
+	if (V_maxnipq == 0)
+		uma_zone_set_max(V_ipq_zone, 1);
 }
 
 static void
 ipq_zone_change(void *tag)
 {
+	INIT_VNET_INET(curvnetb);
 
-	if (maxnipq > 0 && maxnipq < (nmbclusters / 32)) {
-		maxnipq = nmbclusters / 32;
+	if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
+		V_maxnipq = nmbclusters / 32;
 		maxnipq_update();
 	}
 }
@@ -756,9 +752,10 @@
 static int
 sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
 {
+	INIT_VNET_INET(curvnetb);
 	int error, i;
 
-	i = maxnipq;
+	i = V_maxnipq;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
@@ -769,7 +766,7 @@
 	 */
 	if (i < -1)
 		return (EINVAL);
-	maxnipq = i;
+	V_maxnipq = i;
 	maxnipq_update();
 	return (0);
 }
@@ -802,7 +799,7 @@
 	u_short hash;
 
 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
-	if (maxnipq == 0 || maxfragsperpacket == 0) {
+	if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
 		V_ipstat.ips_fragments++;
 		V_ipstat.ips_fragdropped++;
 		m_freem(m);
@@ -813,7 +810,7 @@
 	hlen = ip->ip_hl << 2;
 
 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
-	head = &ipq[hash];
+	head = &V_ipq[hash];
 	IPQ_LOCK();
 
 	/*
@@ -836,7 +833,7 @@
 	 * Attempt to trim the number of allocated fragment queues if it
 	 * exceeds the administrative limit.
 	 */
-	if ((nipq > maxnipq) && (maxnipq > 0)) {
+	if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
 		/*
 		 * drop something from the tail of the current queue
 		 * before proceeding further
@@ -844,11 +841,11 @@
 		struct ipq *q = TAILQ_LAST(head, ipqhead);
 		if (q == NULL) {   /* gak */
 			for (i = 0; i < IPREASS_NHASH; i++) {
-				struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
+				struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
 				if (r) {
 					V_ipstat.ips_fragtimeout +=
 								r->ipq_nfrags;
-					ip_freef(&ipq[i], r);
+					ip_freef(&V_ipq[i], r);
 					break;
 				}
 			}
@@ -898,19 +895,19 @@
 	 * If first fragment to arrive, create a reassembly queue.
 	 */
 	if (fp == NULL) {
-		fp = uma_zalloc(ipq_zone, M_NOWAIT);
+		fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
 		if (fp == NULL)
 			goto dropfrag;
 #ifdef MAC
 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
-			uma_zfree(ipq_zone, fp);
+			uma_zfree(V_ipq_zone, fp);
 			fp = NULL;
 			goto dropfrag;
 		}
 		mac_create_ipq(m, fp);
 #endif
 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
-		nipq++;
+		V_nipq++;
 		fp->ipq_nfrags = 1;
 		fp->ipq_ttl = IPFRAGTTL;
 		fp->ipq_p = ip->ip_p;
@@ -1012,7 +1009,7 @@
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
 		if (GETIP(q)->ip_off != next) {
-			if (fp->ipq_nfrags > maxfragsperpacket) {
+			if (fp->ipq_nfrags > V_maxfragsperpacket) {
 				V_ipstat.ips_fragdropped += fp->ipq_nfrags;
 				ip_freef(head, fp);
 			}
@@ -1022,7 +1019,7 @@
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
 	if (p->m_flags & M_FRAG) {
-		if (fp->ipq_nfrags > maxfragsperpacket) {
+		if (fp->ipq_nfrags > V_maxfragsperpacket) {
 			V_ipstat.ips_fragdropped += fp->ipq_nfrags;
 			ip_freef(head, fp);
 		}
@@ -1078,8 +1075,8 @@
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
-	nipq--;
-	uma_zfree(ipq_zone, fp);
+	V_nipq--;
+	uma_zfree(V_ipq_zone, fp);
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
 	/* some debugging cruft by sklower, below, will go away soon */
@@ -1110,6 +1107,7 @@
 	struct ipqhead *fhp;
 	struct ipq *fp;
 {
+	INIT_VNET_INET(curvnetb);
 	register struct mbuf *q;
 
 	IPQ_LOCK_ASSERT();
@@ -1120,8 +1118,8 @@
 		m_freem(q);
 	}
 	TAILQ_REMOVE(fhp, fp, ipq_list);
-	uma_zfree(ipq_zone, fp);
-	nipq--;
+	uma_zfree(V_ipq_zone, fp);
+	V_nipq--;
 }
 
 /*
@@ -1139,14 +1137,14 @@
 	VNETB_ITERLOOP_BEGIN();
 	INIT_VNET_INET(vnetb_iter);
 	for (i = 0; i < IPREASS_NHASH; i++) {
-		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
+		for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
 			struct ipq *fpp;
 
 			fpp = fp;
 			fp = TAILQ_NEXT(fp, ipq_list);
 			if(--fpp->ipq_ttl == 0) {
 				V_ipstat.ips_fragtimeout += fpp->ipq_nfrags;
-				ip_freef(&ipq[i], fpp);
+				ip_freef(&V_ipq[i], fpp);
 			}
 		}
 	}
@@ -1155,12 +1153,12 @@
 	 * (due to the limit being lowered), drain off
 	 * enough to get down to the new limit.
 	 */
-	if (maxnipq >= 0 && nipq > maxnipq) {
+	if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
 		for (i = 0; i < IPREASS_NHASH; i++) {
-			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
+			while (V_nipq > V_maxnipq && !TAILQ_EMPTY(&V_ipq[i])) {
 				V_ipstat.ips_fragdropped +=
-				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
-				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
+				    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags;
+				ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
 			}
 		}
 	}
@@ -1180,10 +1178,10 @@
 	VNETB_ITERLOOP_BEGIN();
 	INIT_VNET_INET(vnetb_iter);
 	for (i = 0; i < IPREASS_NHASH; i++) {
-		while(!TAILQ_EMPTY(&ipq[i])) {
+		while(!TAILQ_EMPTY(&V_ipq[i])) {
 			V_ipstat.ips_fragdropped +=
-			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
-			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
+			    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags;
+			ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
 		}
 	}
 	VNETB_ITERLOOP_END();

==== //depot/projects/vimage/src/sys/netinet/vinet.h#6 (text+ko) ====

@@ -65,6 +65,12 @@
 
 	int	_ipforwarding;
 
+	TAILQ_HEAD(ipqhead, ipq) _ipq[IPREASS_NHASH];
+	uma_zone_t _ipq_zone;
+	int	_nipq;			/* Total # of reass queues */
+	int	_maxnipq;		/* Admin. limit on # reass queues. */
+	int	_maxfragsperpacket;
+
 	struct	inpcbhead _tcb;		/* head of queue of active tcpcb's */
 	struct	inpcbinfo _tcbinfo;
 	struct	tcpstat _tcpstat;	/* tcp statistics */
@@ -112,6 +118,12 @@
 
 #define V_ipforwarding		VNET_INET(ipforwarding)
 
+#define V_ipq			VNET_INET(ipq)
+#define V_ipq_zone		VNET_INET(ipq_zone)
+#define V_nipq			VNET_INET(nipq)
+#define V_maxnipq		VNET_INET(maxnipq)
+#define V_maxfragsperpacket	VNET_INET(maxfragsperpacket)
+
 #define V_tcb			VNET_INET(tcb)
 #define V_tcbinfo		VNET_INET(tcbinfo)
 #define V_tcpstat		VNET_INET(tcpstat)


More information about the p4-projects mailing list