svn commit: r215207 - in head: sys/net sys/netinet tools/regression/netinet/arphold

George V. Neville-Neil gnn at FreeBSD.org
Fri Nov 12 22:03:02 UTC 2010


Author: gnn
Date: Fri Nov 12 22:03:02 2010
New Revision: 215207
URL: http://svn.freebsd.org/changeset/base/215207

Log:
  Add a queue to hold packets while we await an ARP reply.
  
  When a fast machine first brings up some non TCP networking program
  it is quite possible that we will drop packets due to the fact that
  only one packet can be held per ARP entry.  This leads to packets
  being missed when a program starts or restarts if the ARP data is
  not currently in the ARP cache.
  
  This code adds a new sysctl, net.link.ether.inet.maxhold, which defines
  a system wide maximum number of packets to be held in each ARP entry.
  Up to maxhold packets are queued until an ARP reply is received or
  the ARP times out.  The default setting is the old value of 1
  which has been part of the BSD networking code since time
  immemorial.
  
  Expose the time we hold an incomplete ARP entry by adding
  the sysctl net.link.ether.inet.wait, which defaults to 20
  seconds, the value used when the new ARP code was added..
  
  Reviewed by:	bz, rpaulo
  MFC after: 3 weeks

Added:
  head/tools/regression/netinet/arphold/
  head/tools/regression/netinet/arphold/Makefile   (contents, props changed)
  head/tools/regression/netinet/arphold/arphold.c   (contents, props changed)
  head/tools/regression/netinet/arphold/arphold.t   (contents, props changed)
Modified:
  head/sys/net/if_llatbl.c
  head/sys/net/if_llatbl.h
  head/sys/netinet/if_ether.c
  head/sys/netinet/in.c

Modified: head/sys/net/if_llatbl.c
==============================================================================
--- head/sys/net/if_llatbl.c	Fri Nov 12 21:47:36 2010	(r215206)
+++ head/sys/net/if_llatbl.c	Fri Nov 12 22:03:02 2010	(r215207)
@@ -100,18 +100,34 @@ done:
  * This function is called by the timer functions
  * such as arptimer() and nd6_llinfo_timer(), and
  * the caller does the locking.
+ *
+ * Returns the number of held packets, if any, that were dropped.
  */
-void
+size_t
 llentry_free(struct llentry *lle)
 {
-	
+	size_t pkts_dropped;
+	struct mbuf *next;
+
+	pkts_dropped = 0;
 	LLE_WLOCK_ASSERT(lle);
 	LIST_REMOVE(lle, lle_next);
 
-	if (lle->la_hold != NULL)
+	while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
+		next = lle->la_hold->m_nextpkt;
 		m_freem(lle->la_hold);
+		lle->la_hold = next;
+		lle->la_numheld--;
+		pkts_dropped++;
+	}
+
+	KASSERT(lle->la_numheld == 0, 
+		("%s: la_numheld %d > 0, pkts_droped %ld", __func__, 
+		 lle->la_numheld, pkts_dropped));
 
 	LLE_FREE_LOCKED(lle);
+
+	return (pkts_dropped);
 }
 
 /*
@@ -412,6 +428,7 @@ llatbl_lle_show(struct llentry_sa *la)
 	db_printf(" lle_tbl=%p\n", lle->lle_tbl);
 	db_printf(" lle_head=%p\n", lle->lle_head);
 	db_printf(" la_hold=%p\n", lle->la_hold);
+	db_printf(" la_numheld=%d\n", lle->la_numheld);
 	db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
 	db_printf(" la_flags=0x%04x\n", lle->la_flags);
 	db_printf(" la_asked=%u\n", lle->la_asked);

Modified: head/sys/net/if_llatbl.h
==============================================================================
--- head/sys/net/if_llatbl.h	Fri Nov 12 21:47:36 2010	(r215206)
+++ head/sys/net/if_llatbl.h	Fri Nov 12 22:03:02 2010	(r215207)
@@ -58,6 +58,7 @@ struct llentry {
 	struct lltable		 *lle_tbl;
 	struct llentries	 *lle_head;
 	struct mbuf		 *la_hold;
+	int     		 la_numheld;  /* # of packets currently held */
 	time_t			 la_expire;
 	uint16_t		 la_flags;    
 	uint16_t		 la_asked;
@@ -191,7 +192,7 @@ void		lltable_drain(int);
 #endif
 int		lltable_sysctl_dumparp(int, struct sysctl_req *);
 
-void		llentry_free(struct llentry *);
+size_t		llentry_free(struct llentry *);
 int		llentry_update(struct llentry **, struct lltable *,
                        struct sockaddr_storage *, struct ifnet *);
 

Modified: head/sys/netinet/if_ether.c
==============================================================================
--- head/sys/netinet/if_ether.c	Fri Nov 12 21:47:36 2010	(r215206)
+++ head/sys/netinet/if_ether.c	Fri Nov 12 22:03:02 2010	(r215207)
@@ -89,13 +89,16 @@ VNET_DEFINE(int, useloopback) = 1;	/* us
 static VNET_DEFINE(int, arp_proxyall) = 0;
 static VNET_DEFINE(int, arpt_down) = 20;      /* keep incomplete entries for
 					       * 20 seconds */
-static VNET_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+VNET_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+
+static VNET_DEFINE(int, arp_maxhold) = 1;
 
 #define	V_arpt_keep		VNET(arpt_keep)
 #define	V_arpt_down		VNET(arpt_down)
 #define	V_arp_maxtries		VNET(arp_maxtries)
 #define	V_arp_proxyall		VNET(arp_proxyall)
 #define	V_arpstat		VNET(arpstat)
+#define	V_arp_maxhold		VNET(arp_maxhold)
 
 SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
 	&VNET_NAME(arpt_keep), 0,
@@ -109,9 +112,15 @@ SYSCTL_VNET_INT(_net_link_ether_inet, OI
 SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
 	&VNET_NAME(arp_proxyall), 0,
 	"Enable proxy ARP for all suitable requests");
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW,
+	&VNET_NAME(arpt_down), 0,
+	"Incomplete ARP entry lifetime in seconds");
 SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW,
 	&VNET_NAME(arpstat), arpstat,
 	"ARP statistics (struct arpstat, net/if_arp.h)");
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW,
+	&VNET_NAME(arp_maxhold), 0, 
+	"Number of packets to hold per ARP entry");
 
 static void	arp_init(void);
 void		arprequest(struct ifnet *,
@@ -162,6 +171,7 @@ arptimer(void *arg)
 {
 	struct ifnet *ifp;
 	struct llentry   *lle;
+	int pkts_dropped;
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	lle = (struct llentry *)arg;
@@ -176,7 +186,8 @@ arptimer(void *arg)
 		    callout_active(&lle->la_timer)) {
 			callout_stop(&lle->la_timer);
 			LLE_REMREF(lle);
-			(void) llentry_free(lle);
+			pkts_dropped = llentry_free(lle);
+			ARPSTAT_ADD(dropped, pkts_dropped);
 			ARPSTAT_INC(timeouts);
 		} else {
 #ifdef DIAGNOSTIC
@@ -275,6 +286,8 @@ arpresolve(struct ifnet *ifp, struct rte
 {
 	struct llentry *la = 0;
 	u_int flags = 0;
+	struct mbuf *curr = NULL;
+	struct mbuf *next = NULL;
 	int error, renew;
 
 	*lle = NULL;
@@ -348,15 +361,28 @@ retry:
 	}
 	/*
 	 * There is an arptab entry, but no ethernet address
-	 * response yet.  Replace the held mbuf with this
-	 * latest one.
+	 * response yet.  Add the mbuf to the list, dropping
+	 * the oldest packet if we have exceeded the system
+	 * setting.
 	 */
 	if (m != NULL) {
+		if (la->la_numheld >= V_arp_maxhold) {
+			if (la->la_hold != NULL) {
+				next = la->la_hold->m_nextpkt;
+				m_freem(la->la_hold);
+				la->la_hold = next;
+				la->la_numheld--;
+				ARPSTAT_INC(dropped);
+			}
+		} 
 		if (la->la_hold != NULL) {
-			m_freem(la->la_hold);
-			ARPSTAT_INC(dropped);
-		}
-		la->la_hold = m;
+			curr = la->la_hold;
+			while (curr->m_nextpkt != NULL)
+				curr = curr->m_nextpkt;
+			curr->m_nextpkt = m;
+		} else 
+			la->la_hold = m;
+		la->la_numheld++;
 		if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
 			flags &= ~LLE_EXCLUSIVE;
 			LLE_DOWNGRADE(la);
@@ -483,7 +509,6 @@ in_arpinput(struct mbuf *m)
 	struct rtentry *rt;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
-	struct mbuf *hold;
 	struct sockaddr sa;
 	struct in_addr isaddr, itaddr, myaddr;
 	u_int8_t *enaddr = NULL;
@@ -698,15 +723,29 @@ match:
 		}
 		la->la_asked = 0;
 		la->la_preempt = V_arp_maxtries;
-		hold = la->la_hold;
-		if (hold != NULL) {
-			la->la_hold = NULL;
+		/* 
+		 * The packets are all freed within the call to the output
+		 * routine.
+		 *
+		 * NB: The lock MUST be released before the call to the
+		 * output routine.
+		 */
+		if (la->la_hold != NULL) {
+			struct mbuf *m_hold, *m_hold_next;
+
 			memcpy(&sa, L3_ADDR(la), sizeof(sa));
-		}
-		LLE_WUNLOCK(la);
-		if (hold != NULL)
-			(*ifp->if_output)(ifp, hold, &sa, NULL);
-	}
+			LLE_WUNLOCK(la);
+			for (m_hold = la->la_hold, la->la_hold = NULL;
+			     m_hold != NULL; m_hold = m_hold_next) {
+				m_hold_next = m_hold->m_nextpkt;
+				m_hold->m_nextpkt = NULL;
+				(*ifp->if_output)(ifp, m_hold, &sa, NULL);
+			}
+		} else
+			LLE_WUNLOCK(la);
+		la->la_hold = NULL;
+		la->la_numheld = 0;
+	} /* end of FIB loop */
 reply:
 	if (op != ARPOP_REQUEST)
 		goto drop;

Modified: head/sys/netinet/in.c
==============================================================================
--- head/sys/netinet/in.c	Fri Nov 12 21:47:36 2010	(r215206)
+++ head/sys/netinet/in.c	Fri Nov 12 22:03:02 2010	(r215207)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 
 #include <net/if.h>
 #include <net/if_var.h>
+#include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
@@ -89,6 +90,9 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
+VNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+#define	V_arpstat		VNET(arpstat)
+
 /*
  * Return 1 if an internet address is for a ``local'' host
  * (one to which we have a connection).  If subnetsarelocal
@@ -1363,6 +1367,7 @@ in_lltable_prefix_free(struct lltable *l
 	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
 	struct llentry *lle, *next;
 	register int i;
+	size_t pkts_dropped;
 
 	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
@@ -1375,7 +1380,8 @@ in_lltable_prefix_free(struct lltable *l
 				LLE_WLOCK(lle);
 				if (canceled)
 					LLE_REMREF(lle);
-				llentry_free(lle);
+				pkts_dropped = llentry_free(lle);
+				ARPSTAT_ADD(dropped, pkts_dropped);
 			}
 		}
 	}

Added: head/tools/regression/netinet/arphold/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/netinet/arphold/Makefile	Fri Nov 12 22:03:02 2010	(r215207)
@@ -0,0 +1,7 @@
+# $FreeBSD$
+
+PROG=	arphold
+NO_MAN=
+CFLAGS+=	-Wall
+
+.include <bsd.prog.mk>

Added: head/tools/regression/netinet/arphold/arphold.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/netinet/arphold/arphold.c	Fri Nov 12 22:03:02 2010	(r215207)
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2010 Advanced Computing Technologies LLC
+ * Written by George Neville-Neil gnn at freebsd.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Description: The following is a test of the arp entry packet queues
+ * which replaced the single packet hold entry that existed in the BSDs
+ * since time immemorial.  The test process is:
+ *
+ * 1) Find out the current system limit (maxhold)
+ * 2) Using an IP address for which we do not yet have an entry
+ *    load up an ARP entry packet queue with exactly that many packets.
+ * 3) Check the arp dropped stat to make sure that we have not dropped
+ *    any packets as yet.
+ * 4) Add one more packet to the queue.
+ * 5) Make sure that only one packet was dropped.
+ *
+ * CAVEAT: The ARP timer will flush the queue after 1 second so it is
+ * important not to run this code in a fast loop or the test will
+ * fail.
+ *
+ * $FreeBSD$
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <net/if_arp.h>
+
+#define MSG_SIZE 1024
+#define PORT 6969
+
+int
+main(int argc, char **argv)
+{
+
+	int sock;
+	int maxhold;
+	int wait;
+	size_t size = sizeof(maxhold);
+	struct sockaddr_in dest;
+	char message[MSG_SIZE];
+	struct arpstat arpstat;
+	size_t len = sizeof(arpstat);
+	unsigned long dropped = 0;
+
+	memset(&message, 1, sizeof(message));
+
+	if (sysctlbyname("net.link.ether.inet.maxhold", &maxhold, &size,
+			 NULL, 0) < 0) {
+		perror("not ok 1 - sysctlbyname failed");
+		exit(1);
+	}
+	    
+#ifdef DEBUG
+	printf("maxhold is %d\n", maxhold);
+#endif /* DEBUG */
+
+	if ((sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) {
+		perror("not ok 1 - could not open socket");
+		exit(1);
+	}
+
+	bzero(&dest, sizeof(dest));
+	if (inet_pton(AF_INET, argv[1], &dest.sin_addr.s_addr) != 1) {
+		perror("not ok 1 - could not parse address");
+		exit(1);
+	}
+	dest.sin_len = sizeof(dest);
+	dest.sin_family = AF_INET;
+	dest.sin_port = htons(PORT);
+
+	if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len,
+			 NULL, 0) < 0) {
+		perror("not ok 1 - could not get initial arp stats");
+		exit(1);
+	}
+
+	dropped = arpstat.dropped;
+#ifdef DEBUG
+	printf("dropped before %ld\n", dropped);
+#endif /* DEBUG */
+
+	/* 
+	 * Load up the queue in the ARP entry to the maximum.
+	 * We should not drop any packets at this point. 
+	 */
+
+	while (maxhold > 0) {
+		if (sendto(sock, message, sizeof(message), 0,
+			   (struct sockaddr *)&dest, sizeof(dest)) < 0) {
+			perror("not ok 1 - could not send packet");
+			exit(1);
+		}
+		maxhold--;
+	}
+
+	if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len,
+			 NULL, 0) < 0) {
+		perror("not ok 1 - could not get new arp stats");
+		exit(1);
+	}
+
+#ifdef DEBUG
+	printf("dropped after %ld\n", arpstat.dropped);
+#endif /* DEBUG */
+
+	if (arpstat.dropped != dropped) {
+		printf("not ok 1 - Failed, drops changed:"
+		       "before %ld after %ld\n", dropped, arpstat.dropped);
+		exit(1);
+	}
+	
+	dropped = arpstat.dropped;
+
+	/* Now add one extra and make sure it is dropped. */
+	if (sendto(sock, message, sizeof(message), 0,
+		   (struct sockaddr *)&dest, sizeof(dest)) < 0) {
+		perror("not ok 1 - could not send packet");
+		exit(1);
+	}
+
+	if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len,
+			 NULL, 0) < 0) {
+		perror("not ok 1 - could not get new arp stats");
+		exit(1);
+	}
+
+	if (arpstat.dropped != (dropped + 1)) {
+		printf("not ok 1 - Failed to drop one packet: before"
+		       " %ld after %ld\n", dropped, arpstat.dropped);
+		exit(1);
+	}
+
+	printf("ok\n");
+	return (0);
+}

Added: head/tools/regression/netinet/arphold/arphold.t
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/netinet/arphold/arphold.t	Fri Nov 12 22:03:02 2010	(r215207)
@@ -0,0 +1,7 @@
+#!/bin/sh
+#
+# $FreeBSD$
+
+make arphold 2>&1 > /dev/null
+
+./arphold 192.168.1.222


More information about the svn-src-head mailing list