PERFORCE change 40358 for review

Sam Leffler sam at FreeBSD.org
Thu Oct 23 19:52:19 PDT 2003


http://perforce.freebsd.org/chv.cgi?CH=40358

Change 40358 by sam at sam_ebb on 2003/10/23 19:51:36

	revert tcp hostcache changes so they can be incorporated
	on a separate branch

Affected files ...

.. //depot/projects/netperf/sys/conf/files#17 edit
.. //depot/projects/netperf/sys/net/if_arcsubr.c#3 edit
.. //depot/projects/netperf/sys/net/if_ef.c#3 edit
.. //depot/projects/netperf/sys/net/if_ethersubr.c#9 edit
.. //depot/projects/netperf/sys/net/if_faith.c#6 edit
.. //depot/projects/netperf/sys/net/if_fddisubr.c#4 edit
.. //depot/projects/netperf/sys/net/if_iso88025subr.c#4 edit
.. //depot/projects/netperf/sys/net/if_loop.c#9 edit
.. //depot/projects/netperf/sys/net/if_ppp.c#4 edit
.. //depot/projects/netperf/sys/net/route.c#18 edit
.. //depot/projects/netperf/sys/net/route.h#8 edit
.. //depot/projects/netperf/sys/net/rtsock.c#7 edit
.. //depot/projects/netperf/sys/netatalk/ddp_output.c#4 edit
.. //depot/projects/netperf/sys/netinet/icmp_var.h#3 edit
.. //depot/projects/netperf/sys/netinet/in_pcb.c#7 edit
.. //depot/projects/netperf/sys/netinet/in_pcb.h#8 edit
.. //depot/projects/netperf/sys/netinet/in_rmx.c#9 edit
.. //depot/projects/netperf/sys/netinet/in_var.h#4 edit
.. //depot/projects/netperf/sys/netinet/ip_divert.c#8 edit
.. //depot/projects/netperf/sys/netinet/ip_flow.c#6 add
.. //depot/projects/netperf/sys/netinet/ip_flow.h#5 add
.. //depot/projects/netperf/sys/netinet/ip_fw.h#3 edit
.. //depot/projects/netperf/sys/netinet/ip_fw2.c#12 edit
.. //depot/projects/netperf/sys/netinet/ip_icmp.c#7 edit
.. //depot/projects/netperf/sys/netinet/ip_input.c#15 edit
.. //depot/projects/netperf/sys/netinet/ip_output.c#11 edit
.. //depot/projects/netperf/sys/netinet/ip_var.h#9 edit
.. //depot/projects/netperf/sys/netinet/raw_ip.c#8 edit
.. //depot/projects/netperf/sys/netinet/tcp.h#3 edit
.. //depot/projects/netperf/sys/netinet/tcp_input.c#7 edit
.. //depot/projects/netperf/sys/netinet/tcp_output.c#4 edit
.. //depot/projects/netperf/sys/netinet/tcp_subr.c#6 edit
.. //depot/projects/netperf/sys/netinet/tcp_syncache.c#7 edit
.. //depot/projects/netperf/sys/netinet/tcp_timer.c#3 edit
.. //depot/projects/netperf/sys/netinet/tcp_usrreq.c#4 edit
.. //depot/projects/netperf/sys/netinet/tcp_var.h#3 edit
.. //depot/projects/netperf/sys/netinet/udp_usrreq.c#7 edit
.. //depot/projects/netperf/sys/netinet6/icmp6.c#11 edit
.. //depot/projects/netperf/sys/netinet6/in6_pcb.c#10 edit
.. //depot/projects/netperf/sys/netinet6/in6_rmx.c#10 edit
.. //depot/projects/netperf/sys/netinet6/in6_src.c#13 edit
.. //depot/projects/netperf/sys/netinet6/ip6_forward.c#11 edit
.. //depot/projects/netperf/sys/netinet6/ip6_input.c#13 edit
.. //depot/projects/netperf/sys/netinet6/ip6_output.c#18 edit
.. //depot/projects/netperf/sys/netinet6/raw_ip6.c#5 edit
.. //depot/projects/netperf/sys/netinet6/udp6_output.c#4 edit
.. //depot/projects/netperf/sys/netipx/ipx_input.c#5 edit
.. //depot/projects/netperf/sys/netipx/ipx_outputfl.c#3 edit

Differences ...

==== //depot/projects/netperf/sys/conf/files#17 (text+ko) ====

@@ -1424,7 +1424,7 @@
 netinet/ip_ecn.c	optional inet6
 netinet/ip_encap.c	optional inet
 netinet/ip_encap.c	optional inet6
-netinet/ip_fastforward.c	optional inet
+netinet/ip_flow.c	optional inet
 netinet/ip_fw2.c	optional ipfirewall
 netinet/ip_icmp.c	optional inet
 netinet/ip_input.c	optional inet
@@ -1432,7 +1432,6 @@
 netinet/ip_output.c	optional inet
 netinet/raw_ip.c	optional inet
 netinet/tcp_debug.c	optional tcpdebug
-netinet/tcp_hostcache.c	optional inet
 netinet/tcp_input.c	optional inet
 netinet/tcp_output.c	optional inet
 netinet/tcp_subr.c	optional inet

==== //depot/projects/netperf/sys/net/if_arcsubr.c#3 (text+ko) ====

@@ -543,14 +543,14 @@
 #ifdef INET
 	case ARCTYPE_IP:
 		m_adj(m, ARC_HDRNEWLEN);
-		if (ip_fastforward(m))
+		if (ipflow_fastforward(m))
 			return;
 		isr = NETISR_IP;
 		break;
 
 	case ARCTYPE_IP_OLD:
 		m_adj(m, ARC_HDRLEN);
-		if (ip_fastforward(m))
+		if (ipflow_fastforward(m))
 			return;
 		isr = NETISR_IP;
 		break;

==== //depot/projects/netperf/sys/net/if_ef.c#3 (text+ko) ====

@@ -252,8 +252,8 @@
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
-		if (ip_fastforward(m))
-			return;
+		if (ipflow_fastforward(m))
+			return (0);
 		isr = NETISR_IP;
 		break;
 

==== //depot/projects/netperf/sys/net/if_ethersubr.c#9 (text+ko) ====

@@ -717,7 +717,7 @@
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
-		if (ip_fastforward(m))
+		if (ipflow_fastforward(m))
 			return;
 		isr = NETISR_IP;
 		break;

==== //depot/projects/netperf/sys/net/if_faith.c#6 (text+ko) ====

@@ -271,8 +271,17 @@
 	struct rt_addrinfo *info;
 {
 	RT_LOCK_ASSERT(rt);
-	if (rt)
-		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+
+	if (rt) {
+		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
+		/*
+		 * For optimal performance, the send and receive buffers
+		 * should be at least twice the MTU plus a little more for
+		 * overhead.
+		 */
+		rt->rt_rmx.rmx_recvpipe =
+			rt->rt_rmx.rmx_sendpipe = 3 * FAITHMTU;
+	}
 }
 
 /*

==== //depot/projects/netperf/sys/net/if_fddisubr.c#4 (text+ko) ====

@@ -471,7 +471,7 @@
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
-			if (ip_fastforward(m))
+			if (ipflow_fastforward(m))
 				return;
 			isr = NETISR_IP;
 			break;

==== //depot/projects/netperf/sys/net/if_iso88025subr.c#4 (text+ko) ====

@@ -556,7 +556,7 @@
 #ifdef INET
 		case ETHERTYPE_IP:
 			th->iso88025_shost[0] &= ~(TR_RII); 
-			if (ip_fastforward(m))
+			if (ipflow_fastforward(m))
 				return;
 			isr = NETISR_IP;
 			break;

==== //depot/projects/netperf/sys/net/if_loop.c#9 (text+ko) ====

@@ -357,8 +357,17 @@
 	struct rt_addrinfo *info;
 {
 	RT_LOCK_ASSERT(rt);
-	if (rt)
-		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+
+	if (rt) {
+		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
+		/*
+		 * For optimal performance, the send and receive buffers
+		 * should be at least twice the MTU plus a little more for
+		 * overhead.
+		 */
+		rt->rt_rmx.rmx_recvpipe =
+			rt->rt_rmx.rmx_sendpipe = 3 * LOMTU;
+	}
 }
 
 /*

==== //depot/projects/netperf/sys/net/if_ppp.c#4 (text+ko) ====

@@ -1538,8 +1538,8 @@
 	m->m_pkthdr.len -= PPP_HDRLEN;
 	m->m_data += PPP_HDRLEN;
 	m->m_len -= PPP_HDRLEN;
-	if (ip_fastforward(m))
-		return;
+	if (ipflow_fastforward(m))
+	    return;
 	isr = NETISR_IP;
 	break;
 #endif

==== //depot/projects/netperf/sys/net/route.c#18 (text+ko) ====

@@ -139,7 +139,7 @@
 		 */
 		newrt = rt = (struct rtentry *)rn;
 		nflags = rt->rt_flags & ~ignflags;
-		if (report && (nflags & RTF_CLONING)) {
+		if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
 			/*
 			 * We are apparently adding (report = 0 in delete).
 			 * If it requires that it be cloned, do so.
@@ -548,7 +548,7 @@
 	 */
 	if (flags & RTF_HOST) {
 		netmask = 0;
-		flags &= ~RTF_CLONING;
+		flags &= ~(RTF_CLONING | RTF_PRCLONING);
 	}
 	switch (req) {
 	case RTM_DELETE:
@@ -570,7 +570,7 @@
 		 * Now search what's left of the subtree for any cloned
 		 * routes which might have been formed from this node.
 		 */
-		if ((rt->rt_flags & RTF_CLONING) &&
+		if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
 		    rt_mask(rt)) {
 			rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
 					       rt_fixdelete, rt);
@@ -617,7 +617,7 @@
 		ifa = rt->rt_ifa;
 		/* XXX locking? */
 		flags = rt->rt_flags &
-		    ~(RTF_CLONING | RTF_STATIC);
+		    ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
 		flags |= RTF_WASCLONED;
 		gateway = rt->rt_gateway;
 		if ((netmask = rt->rt_genmask) == 0)
@@ -678,11 +678,11 @@
 			/*
 			 * Uh-oh, we already have one of these in the tree.
 			 * We do a special hack: if the route that's already
-			 * there was generated by the cloning mechanism
-			 * then we just blow it away and retry the insertion
-			 * of the new one.
+			 * there was generated by the protocol-cloning
+			 * mechanism, then we just blow it away and retry
+			 * the insertion of the new one.
 			 */
-			rt2 = rtalloc1(dst, 0, 0);
+			rt2 = rtalloc1(dst, 0, RTF_PRCLONING);
 			if (rt2 && rt2->rt_parent) {
 				rtrequest(RTM_DELETE,
 					  rt_key(rt2),
@@ -724,7 +724,7 @@
 				("no route to clone from"));
 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
 			rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
-			if ((*ret_nrt)->rt_flags & RTF_CLONING) {
+			if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
 				/*
 				 * NB: We do not bump the refcnt on the parent
 				 * entry under the assumption that it will
@@ -800,7 +800,7 @@
 	struct rtentry *rt0 = vp;
 
 	if (rt->rt_parent == rt0 &&
-	    !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
+	    !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
 		return rtrequest(RTM_DELETE, rt_key(rt),
 				 (struct sockaddr *)0, rt_mask(rt),
 				 rt->rt_flags, (struct rtentry **)0);
@@ -841,7 +841,7 @@
 #endif
 
 	if (!rt->rt_parent ||
-	    (rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
+	    (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
 #ifdef DEBUG
 		if(rtfcdebug) printf("no parent, pinned or cloning\n");
 #endif
@@ -992,10 +992,9 @@
 	 * correct choice anyway), and avoid the resulting reference loops
 	 * by disallowing any route to run through itself as a gateway.
 	 * This is obviously mandatory when we get rt->rt_output().
-	 * XXX: After removal of PRCLONING this probably not needed anymore.
 	 */
 	if (rt->rt_flags & RTF_GATEWAY) {
-		rt->rt_gwroute = rtalloc1(gate, 1, 0);
+		rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING);
 		if (rt->rt_gwroute == rt) {
 			RTFREE_LOCKED(rt->rt_gwroute);
 			rt->rt_gwroute = 0;

==== //depot/projects/netperf/sys/net/route.h#8 (text+ko) ====

@@ -58,12 +58,6 @@
  * These numbers are used by reliable protocols for determining
  * retransmission behavior and are included in the routing structure.
  */
-struct rt_metrics_lite {
-	u_long	rmx_mtu;	/* MTU for this path */
-	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
-	u_long	rmx_pksent;	/* packets sent using this route */
-};
-
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
 	u_long	rmx_mtu;	/* MTU for this path */
@@ -110,10 +104,10 @@
 	long	rt_refcnt;		/* # held references */
 	u_long	rt_flags;		/* up/down?, host/net */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
-	struct	ifaddr *rt_ifa;		/* the answer: interface address to use */
+	struct	ifaddr *rt_ifa;		/* the answer: interface to use */
 	struct	sockaddr *rt_genmask;	/* for generation of cloned routes */
 	caddr_t	rt_llinfo;		/* pointer to link level info cache */
-	struct	rt_metrics_lite rt_rmx;	/* metrics used by rx'ing protocols */
+	struct	rt_metrics rt_rmx;	/* metrics used by rx'ing protocols */
 	struct	rtentry *rt_gwroute;	/* implied entry for gatewayed routes */
 	int	(*rt_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
@@ -157,7 +151,7 @@
 #define RTF_PROTO2	0x4000		/* protocol specific routing flag */
 #define RTF_PROTO1	0x8000		/* protocol specific routing flag */
 
-/*			0x10000		   unused */
+#define RTF_PRCLONING	0x10000		/* protocol requires cloning */
 #define RTF_WASCLONED	0x20000		/* route generated through cloning */
 #define RTF_PROTO3	0x40000		/* protocol specific routing flag */
 /*			0x80000		   unused */

==== //depot/projects/netperf/sys/net/rtsock.c#7 (text+ko) ====

@@ -86,8 +86,7 @@
 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	route_output(struct mbuf *, struct socket *);
-static void	rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics_lite *);
-static void	rt_getmetrics(struct rt_metrics_lite *, struct rt_metrics *);
+static void	rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *);
 static void	rt_dispatch(struct mbuf *, struct sockaddr *);
 
 /*
@@ -355,6 +354,9 @@
 			RT_LOCK(saved_nrt);
 			rt_setmetrics(rtm->rtm_inits,
 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
+			saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
+			saved_nrt->rt_rmx.rmx_locks |=
+				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
 			saved_nrt->rt_refcnt--;
 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
 			RT_UNLOCK(saved_nrt);
@@ -425,7 +427,7 @@
 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
 				(struct walkarg *)0);
 			rtm->rtm_flags = rt->rt_flags;
-			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+			rtm->rtm_rmx = rt->rt_rmx;
 			rtm->rtm_addrs = info.rti_addrs;
 			break;
 
@@ -475,7 +477,9 @@
 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
 			/* FALLTHROUGH */
 		case RTM_LOCK:
-			/* We don't support locks anymore */
+			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
+			rt->rt_rmx.rmx_locks |=
+				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
 			break;
 		}
 		RT_UNLOCK(rt);
@@ -537,28 +541,20 @@
 }
 
 static void
-rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics_lite *out)
+rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
 {
 #define metric(f, e) if (which & (f)) out->e = in->e;
-	/*
-	 * Only these are stored in the routing entry since introduction
-	 * of tcp hostcache. The rest is ignored.
-	 */
+	metric(RTV_RPIPE, rmx_recvpipe);
+	metric(RTV_SPIPE, rmx_sendpipe);
+	metric(RTV_SSTHRESH, rmx_ssthresh);
+	metric(RTV_RTT, rmx_rtt);
+	metric(RTV_RTTVAR, rmx_rttvar);
+	metric(RTV_HOPCOUNT, rmx_hopcount);
 	metric(RTV_MTU, rmx_mtu);
 	metric(RTV_EXPIRE, rmx_expire);
 #undef metric
 }
 
-static void
-rt_getmetrics(struct rt_metrics_lite *in, struct rt_metrics *out)
-{
-#define metric(e) out->e = in->e;
-	bzero(out, sizeof(*out));
-	metric(rmx_mtu);
-	metric(rmx_expire);
-#undef metric
-}
-
 #define ROUNDUP(a) \
 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
 
@@ -948,8 +944,8 @@
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
 		rtm->rtm_flags = rt->rt_flags;
-		rtm->rtm_use = rt->rt_rmx.rmx_pksent;
-		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+		rtm->rtm_use = rt->rt_use;
+		rtm->rtm_rmx = rt->rt_rmx;
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
 		rtm->rtm_addrs = info.rti_addrs;

==== //depot/projects/netperf/sys/netatalk/ddp_output.c#4 (text+ko) ====

@@ -217,7 +217,7 @@
 	elh->el_type = ELAP_DDPEXTEND;
 	elh->el_dnode = gate.sat_addr.s_node;
     }
-    ro->ro_rt->rt_rmx.rmx_pksent++;
+    ro->ro_rt->rt_use++;
 
 #ifdef NETATALK_DEBUG
     printf ("ddp_route: from %d.%d to %d.%d, via %d.%d (%s%d)\n",

==== //depot/projects/netperf/sys/netinet/icmp_var.h#3 (text+ko) ====

@@ -81,12 +81,11 @@
 extern int badport_bandlim(int);
 #define BANDLIM_UNLIMITED -1
 #define BANDLIM_ICMP_UNREACH 0
-#define BANDLIM_ICMP_UNREACH_HOST 1
-#define BANDLIM_ICMP_ECHO 2
-#define BANDLIM_ICMP_TSTAMP 3
-#define BANDLIM_RST_CLOSEDPORT 4	/* No connection, and no listeners */
-#define BANDLIM_RST_OPENPORT 5		/* No connection, listener */
-#define BANDLIM_MAX 5
+#define BANDLIM_ICMP_ECHO 1
+#define BANDLIM_ICMP_TSTAMP 2
+#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
+#define BANDLIM_RST_OPENPORT 4   /* No connection, listener */
+#define BANDLIM_MAX 4
 #endif
 
 #endif

==== //depot/projects/netperf/sys/netinet/in_pcb.c#7 (text+ko) ====

@@ -536,6 +536,7 @@
 		if (error)
 			return (error);
 	}
+
 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
@@ -553,8 +554,7 @@
 			    &in_ifaddrhead)->ia_broadaddr)->sin_addr;
 	}
 	if (laddr.s_addr == INADDR_ANY) {
-		struct route *ro;
-		struct route sro;
+		register struct route *ro;
 
 		ia = (struct in_ifaddr *)0;
 		/*
@@ -563,10 +563,19 @@
 		 * Note that we should check the address family of the cached
 		 * destination, in case of sharing the cache with IPv6.
 		 */
-		ro = &sro;
-		bzero(ro, sizeof(*ro));
-		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) {
-			/* Find out route to destination */
+		ro = &inp->inp_route;
+		if (ro->ro_rt &&
+		    (ro->ro_dst.sa_family != AF_INET ||
+		     satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr ||
+		     inp->inp_socket->so_options & SO_DONTROUTE)) {
+			RTFREE(ro->ro_rt);
+			ro->ro_rt = (struct rtentry *)0;
+		}
+		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+		    (ro->ro_rt == (struct rtentry *)0 ||
+		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+			/* No route yet, so try to acquire one */
+			bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
 			ro->ro_dst.sa_family = AF_INET;
 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
 			((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr;
@@ -580,8 +589,6 @@
 		 */
 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
 			ia = ifatoia(ro->ro_rt->rt_ifa);
-		if (ro->ro_rt)
-			RTFREE(ro->ro_rt);
 		if (ia == 0) {
 			bzero(&sa, sizeof(sa));
 			sa.sin_addr = faddr;
@@ -668,6 +675,8 @@
 	}
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
+	if (inp->inp_route.ro_rt)
+		RTFREE(inp->inp_route.ro_rt);
 	ip_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
 	INP_LOCK_DESTROY(inp);
@@ -841,6 +850,60 @@
 }
 
 /*
+ * Check for alternatives when higher level complains
+ * about service problems.  For now, invalidate cached
+ * routing information.  If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+void
+in_losing(inp)
+	struct inpcb *inp;
+{
+	register struct rtentry *rt;
+	struct rt_addrinfo info;
+
+	if ((rt = inp->inp_route.ro_rt)) {
+		RT_LOCK(rt);
+		inp->inp_route.ro_rt = NULL;
+		bzero((caddr_t)&info, sizeof(info));
+		info.rti_flags = rt->rt_flags;
+		info.rti_info[RTAX_DST] = rt_key(rt);
+		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
+		if (rt->rt_flags & RTF_DYNAMIC) {
+			RT_UNLOCK(rt);		/* XXX refcnt? */
+			(void) rtrequest1(RTM_DELETE, &info, NULL);
+		} else
+			rtfree(rt);
+		/*
+		 * A new route can be allocated
+		 * the next time output is attempted.
+		 */
+	}
+}
+
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+struct inpcb *
+in_rtchange(inp, errno)
+	register struct inpcb *inp;
+	int errno;
+{
+	if (inp->inp_route.ro_rt) {
+		RTFREE(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = 0;
+		/*
+		 * A new route can be allocated the next time
+		 * output is attempted.
+		 */
+	}
+	return inp;
+}
+
+/*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *

==== //depot/projects/netperf/sys/netinet/in_pcb.h#8 (text+ko) ====

@@ -94,22 +94,31 @@
 
 /*
  * XXX
- * the defines for inc_* are hacks and should be changed to direct references
+ * At some point struct route should possibly change to:
+ *   struct rtentry *rt
+ *   struct in_endpoints *ie; 
  */
 struct in_conninfo {
 	u_int8_t	inc_flags;
 	u_int8_t	inc_len;
 	u_int16_t	inc_pad;	/* XXX alignment for in_endpoints */
-	/* protocol dependent part */
+	/* protocol dependent part; cached route */
 	struct	in_endpoints inc_ie;
+	union {
+		/* placeholder for routing entry */
+		struct	route inc4_route;
+		struct	route_in6 inc6_route;
+	} inc_dependroute;
 };
 #define inc_isipv6	inc_flags	/* temp compatability */
 #define	inc_fport	inc_ie.ie_fport
 #define	inc_lport	inc_ie.ie_lport
 #define	inc_faddr	inc_ie.ie_faddr
 #define	inc_laddr	inc_ie.ie_laddr
+#define	inc_route	inc_dependroute.inc4_route
 #define	inc6_faddr	inc_ie.ie6_faddr
 #define	inc6_laddr	inc_ie.ie6_laddr
+#define	inc6_route	inc_dependroute.inc6_route
 
 struct	icmp6_filter;
 
@@ -147,6 +156,7 @@
 #define inp_lport	inp_inc.inc_lport
 #define	inp_faddr	inp_inc.inc_faddr
 #define	inp_laddr	inp_inc.inc_laddr
+#define	inp_route	inp_inc.inc_route
 #define	inp_ip_tos	inp_depend4.inp4_ip_tos
 #define	inp_options	inp_depend4.inp4_options
 #define	inp_moptions	inp_depend4.inp4_moptions
@@ -172,6 +182,7 @@
 
 #define	in6p_faddr	inp_inc.inc6_faddr
 #define	in6p_laddr	inp_inc.inc6_laddr
+#define	in6p_route	inp_inc.inc6_route
 #define	in6p_ip6_hlim	inp_depend6.inp6_hlim
 #define	in6p_hops	inp_depend6.inp6_hops	/* default hop limit */
 #define	in6p_ip6_nxt	inp_ip_p
@@ -316,6 +327,9 @@
 extern int	ipport_hilastauto;
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
+void	in_losing(struct inpcb *);
+struct inpcb *
+	in_rtchange(struct inpcb *, int);
 int	in_pcballoc(struct socket *, struct inpcbinfo *, struct thread *);
 int	in_pcbbind(struct inpcb *, struct sockaddr *, struct thread *);
 int	in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,

==== //depot/projects/netperf/sys/netinet/in_rmx.c#9 (text+ko) ====

@@ -73,6 +73,15 @@
 	struct radix_node *ret;
 
 	/*
+	 * For IP, all unicast non-host routes are automatically cloning.
+	 */
+	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+		rt->rt_flags |= RTF_MULTICAST;
+
+	if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST)))
+		rt->rt_flags |= RTF_PRCLONING;
+
+	/*
 	 * A little bit of help for both IP output and input:
 	 *   For host routes, we make sure that RTF_BROADCAST
 	 *   is set for anything that looks like a broadcast address.
@@ -85,7 +94,8 @@
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
-	 * dubious since it's so easy to inspect the address).
+	 * dubious since it's so easy to inspect the address).  (This
+	 * is done above.)
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
@@ -95,10 +105,9 @@
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
-	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
-		rt->rt_flags |= RTF_MULTICAST;
 
-	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
+	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
+	    rt->rt_ifp)
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
@@ -109,7 +118,8 @@
 		 * Find out if it is because of an
 		 * ARP entry and delete it if so.
 		 */
-		rt2 = rtalloc1((struct sockaddr *)sin, 0, RTF_CLONING);
+		rt2 = rtalloc1((struct sockaddr *)sin, 0,
+				RTF_CLONING | RTF_PRCLONING);
 		if (rt2) {
 			if (rt2->rt_flags & RTF_LLINFO &&
 			    rt2->rt_flags & RTF_HOST &&
@@ -128,6 +138,14 @@
 			RTFREE_LOCKED(rt2);
 		}
 	}
+
+	/*
+	 * If the new route created successfully, and we are forwarding,
+	 * flush any cached routes to avoid using a stale value.
+	 */
+	if (ret != NULL && ipforwarding)
+		ip_forward_cacheinval();
+
 	return ret;
 }
 
@@ -380,7 +398,7 @@
 		 * so that behavior is not needed there.
 		 */
 		RT_LOCK(rt);
-		rt->rt_flags &= ~RTF_CLONING;
+		rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING);
 		RT_UNLOCK(rt);
 		err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
 				rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);

==== //depot/projects/netperf/sys/netinet/in_var.h#4 (text+ko) ====

@@ -230,7 +230,9 @@
 void	ip_input(struct mbuf *);
 int	in_ifadown(struct ifaddr *ifa, int);
 void	in_ifscrub(struct ifnet *, struct in_ifaddr *);
-int	ip_fastforward(struct mbuf *);
+int	ipflow_fastforward(struct mbuf *);
+void	ipflow_create(const struct route *, struct mbuf *);
+void	ipflow_slowtimo(void);
 
 #endif /* _KERNEL */
 

==== //depot/projects/netperf/sys/netinet/ip_divert.c#8 (text+ko) ====

@@ -333,7 +333,7 @@
 		/* Send packet to output processing */
 		ipstat.ips_rawout++;			/* XXX */
 		error = ip_output((struct mbuf *)&divert_tag,
-			    inp->inp_options, NULL,
+			    inp->inp_options, &inp->inp_route,
 			    (so->so_options & SO_DONTROUTE) |
 			    IP_ALLOWBROADCAST | IP_RAWOUTPUT,
 			    inp->inp_moptions, NULL);

==== //depot/projects/netperf/sys/netinet/ip_fw.h#3 (text+ko) ====

@@ -28,7 +28,6 @@
 #ifndef _IPFW2_H
 #define _IPFW2_H
 #define IPFW2  1
-
 /*
  * The kernel representation of ipfw rules is made of a list of
  * 'instructions' (for all practical purposes equivalent to BPF

==== //depot/projects/netperf/sys/netinet/ip_fw2.c#12 (text+ko) ====

@@ -461,16 +461,13 @@
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = src;
 
-		rtalloc_ign(&ro, RTF_CLONING);
+		rtalloc_ign(&ro, RTF_CLONING|RTF_PRCLONING);
 	}
 
-	if (ro.ro_rt == NULL)
+	if ((ro.ro_rt == NULL) || (ifp == NULL) ||
+	    (ro.ro_rt->rt_ifp->if_index != ifp->if_index))
 		return 0;
-	if ((ifp == NULL) || (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) {
-		RTFREE(ro.ro_rt);
-		return 0;
-	}
-	RTFREE(ro.ro_rt);
+
 	return 1;
 }
 
@@ -1162,6 +1159,7 @@
 	struct mbuf *m;
 	struct ip *ip;
 	struct tcphdr *tcp;
+	struct route sro;	/* fake route */
 
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 	if (m == 0)
@@ -1227,8 +1225,12 @@
 	 */
 	ip->ip_ttl = ip_defttl;
 	ip->ip_len = m->m_pkthdr.len;
+	bzero (&sro, sizeof (sro));
+	ip_rtaddr(ip->ip_dst, &sro);
 	m->m_flags |= M_SKIP_FIREWALL;
-	ip_output(m, NULL, NULL, 0, NULL, NULL);
+	ip_output(m, NULL, &sro, 0, NULL, NULL);
+	if (sro.ro_rt)
+		RTFREE(sro.ro_rt);
 }
 
 /*

==== //depot/projects/netperf/sys/netinet/ip_icmp.c#7 (text+ko) ====

@@ -52,15 +52,11 @@
 #include <net/route.h>
 
 #include <netinet/in.h>
-#include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcpip.h>
 #include <netinet/icmp_var.h>
 
 #ifdef IPSEC
@@ -124,7 +120,7 @@
 #endif
 
 static void	icmp_reflect(struct mbuf *);
-static void	icmp_send(struct mbuf *, struct mbuf *);
+static void	icmp_send(struct mbuf *, struct mbuf *, struct route *);
 static int	ip_next_mtu(int, int);
 
 extern	struct protosw inetsw[];
@@ -169,18 +165,6 @@
 	if (n->m_flags & (M_BCAST|M_MCAST))
 		goto freeit;
 	/*
-	 * Limit sending of ICMP host unreachable messages.
-	 * If we are acting as a router and someone is doing a sweep
-	 * scan (eg. nmap and/or numerous windows worms) for destinations
-	 * we are the gateway for but are not reachable (ie. a /24 on a
-	 * interface and only a couple of hosts on the ethernet) we would
-	 * generate a storm of ICMP host unreachable messages.
-	 */
-	if (type == ICMP_UNREACH && code == ICMP_UNREACH_HOST) {
-		if (badport_bandlim(BANDLIM_ICMP_UNREACH_HOST) < 0)
-			goto freeit;
-	}
-	/*
 	 * First, formulate icmp message
 	 */
 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
@@ -249,34 +233,27 @@
 	m_freem(n);
 }
 
+static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
+
 /*
  * Process a received ICMP message.
  */
 void
 icmp_input(m, off)
-	struct mbuf *m;
+	register struct mbuf *m;
 	int off;
 {
 	int hlen = off;
-	struct icmp *icp;
+	register struct icmp *icp;
+	register struct ip *ip = mtod(m, struct ip *);
+	int icmplen = ip->ip_len;
+	register int i;
 	struct in_ifaddr *ia;
-	struct ip *ip = mtod(m, struct ip *);
-	int icmplen = ip->ip_len;
-	int i, code;
 	void (*ctlfunc)(int, struct sockaddr *, void *);
-	struct sockaddr_in icmpsrc, icmpdst, icmpgw;
+	int code;
 
-	/* Initialize */
-	bzero(&icmpsrc, sizeof(icmpsrc));
-	icmpsrc.sin_len = sizeof(struct sockaddr_in);
-	icmpsrc.sin_family = AF_INET;
-	bzero(&icmpdst, sizeof(icmpdst));
-	icmpdst.sin_len = sizeof(struct sockaddr_in);
-	icmpdst.sin_family = AF_INET;
-	bzero(&icmpgw, sizeof(icmpgw));
-	icmpgw.sin_len = sizeof(struct sockaddr_in);
-	icmpgw.sin_family = AF_INET;
-	
 	/*
 	 * Locate icmp structure in mbuf, and check
 	 * that not corrupted and of at least minimum length.
@@ -410,7 +387,7 @@
 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
 #endif
 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
-
+#if 1
 		/*
 		 * MTU discovery:
 		 * If we got a needfrag and there is a host route to the
@@ -420,38 +397,40 @@
 		 * notice that the MTU has changed and adapt accordingly.
 		 * If no new MTU was suggested, then we guess a new one
 		 * less than the current value.  If the new MTU is 
-		 * unreasonably small (defined by sysctl tcp_minmss), then
-		 * we don't update the MTU value.
-		 *
-		 * XXX: All this should be done in tcp_mtudisc() because
-		 * the way we do it now, everyone can send us bogus ICMP
-		 * MSGSIZE packets for any destination. By doing this far
-		 * higher in the chain we have a matching tcp connection.
-		 * Thus spoofing is much harder. However there is no easy
-		 * non-hackish way to pass the new MTU up to tcp_mtudisc().
-		 * Also see next XXX regarding IPv4 AH TCP.
+		 * unreasonably small (arbitrarily set at 296), then
+		 * we reset the MTU to the interface value and enable the
+		 * lock bit, indicating that we are no longer doing MTU
+		 * discovery.
 		 */
 		if (code == PRC_MSGSIZE) {
+			struct rtentry *rt;
 			int mtu;
-			struct in_conninfo inc;
 
-			bzero(&inc, sizeof(inc));
-			inc.inc_flags = 0; /* IPv4 */
-			inc.inc_faddr = icmpsrc.sin_addr;
-
-			mtu = ntohs(icp->icmp_nextmtu);
-			if (!mtu)
-				mtu = ip_next_mtu(mtu, 1);
-
-			if (mtu >= max(296, (tcp_minmss + sizeof(struct tcpiphdr))))
-				tcp_hc_updatemtu(&inc, mtu);
-
+			rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
+				      RTF_CLONING | RTF_PRCLONING);
+			if (rt && (rt->rt_flags & RTF_HOST)
+			    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
+				mtu = ntohs(icp->icmp_nextmtu);
+				if (!mtu)
+					mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
+							  1);
 #ifdef DEBUG_MTUDISC
-			printf("MTU for %s reduced to %d\n",
-				inet_ntoa(icmpsrc.sin_addr), mtu);
+				printf("MTU for %s reduced to %d\n",
+					inet_ntoa(icmpsrc.sin_addr), mtu);
 #endif
+				if (mtu < 296) {
+					/* rt->rt_rmx.rmx_mtu =
+						rt->rt_ifp->if_mtu; */
+					rt->rt_rmx.rmx_locks |= RTV_MTU;
+				} else if (rt->rt_rmx.rmx_mtu > mtu) {
+					rt->rt_rmx.rmx_mtu = mtu;
+				}
+			}
+			if (rt)
+				rtfree(rt);
 		}
 
+#endif
 		/*
 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
 		 * notification to TCP layer.
@@ -628,6 +607,7 @@
 	struct in_addr t;
 	struct mbuf *opts = 0;
 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
+	struct route *ro = NULL, rt;
 
 	if (!in_canforward(ip->ip_src) &&
 	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
@@ -638,6 +618,8 @@
 	}
 	t = ip->ip_dst;
 	ip->ip_dst = ip->ip_src;
+	ro = &rt;
+	bzero(ro, sizeof(*ro));
 	/*
 	 * If the incoming packet was addressed directly to us,
 	 * use dst as the src for the reply.  Otherwise (broadcast
@@ -658,7 +640,7 @@
 				goto match;
 		}
 	}
-	ia = ip_rtaddr(ip->ip_dst);
+	ia = ip_rtaddr(ip->ip_dst, ro);
 	/* We need a route to do anything useful. */
 	if (ia == NULL) {
 		m_freem(m);

>>> TRUNCATED FOR MAIL (1000 lines) <<<


More information about the p4-projects mailing list