PERFORCE change 40358 for review
Sam Leffler
sam at FreeBSD.org
Thu Oct 23 19:52:19 PDT 2003
http://perforce.freebsd.org/chv.cgi?CH=40358
Change 40358 by sam at sam_ebb on 2003/10/23 19:51:36
revert tcp hostcache changes so they can be incorporated
on a separate branch
Affected files ...
.. //depot/projects/netperf/sys/conf/files#17 edit
.. //depot/projects/netperf/sys/net/if_arcsubr.c#3 edit
.. //depot/projects/netperf/sys/net/if_ef.c#3 edit
.. //depot/projects/netperf/sys/net/if_ethersubr.c#9 edit
.. //depot/projects/netperf/sys/net/if_faith.c#6 edit
.. //depot/projects/netperf/sys/net/if_fddisubr.c#4 edit
.. //depot/projects/netperf/sys/net/if_iso88025subr.c#4 edit
.. //depot/projects/netperf/sys/net/if_loop.c#9 edit
.. //depot/projects/netperf/sys/net/if_ppp.c#4 edit
.. //depot/projects/netperf/sys/net/route.c#18 edit
.. //depot/projects/netperf/sys/net/route.h#8 edit
.. //depot/projects/netperf/sys/net/rtsock.c#7 edit
.. //depot/projects/netperf/sys/netatalk/ddp_output.c#4 edit
.. //depot/projects/netperf/sys/netinet/icmp_var.h#3 edit
.. //depot/projects/netperf/sys/netinet/in_pcb.c#7 edit
.. //depot/projects/netperf/sys/netinet/in_pcb.h#8 edit
.. //depot/projects/netperf/sys/netinet/in_rmx.c#9 edit
.. //depot/projects/netperf/sys/netinet/in_var.h#4 edit
.. //depot/projects/netperf/sys/netinet/ip_divert.c#8 edit
.. //depot/projects/netperf/sys/netinet/ip_flow.c#6 add
.. //depot/projects/netperf/sys/netinet/ip_flow.h#5 add
.. //depot/projects/netperf/sys/netinet/ip_fw.h#3 edit
.. //depot/projects/netperf/sys/netinet/ip_fw2.c#12 edit
.. //depot/projects/netperf/sys/netinet/ip_icmp.c#7 edit
.. //depot/projects/netperf/sys/netinet/ip_input.c#15 edit
.. //depot/projects/netperf/sys/netinet/ip_output.c#11 edit
.. //depot/projects/netperf/sys/netinet/ip_var.h#9 edit
.. //depot/projects/netperf/sys/netinet/raw_ip.c#8 edit
.. //depot/projects/netperf/sys/netinet/tcp.h#3 edit
.. //depot/projects/netperf/sys/netinet/tcp_input.c#7 edit
.. //depot/projects/netperf/sys/netinet/tcp_output.c#4 edit
.. //depot/projects/netperf/sys/netinet/tcp_subr.c#6 edit
.. //depot/projects/netperf/sys/netinet/tcp_syncache.c#7 edit
.. //depot/projects/netperf/sys/netinet/tcp_timer.c#3 edit
.. //depot/projects/netperf/sys/netinet/tcp_usrreq.c#4 edit
.. //depot/projects/netperf/sys/netinet/tcp_var.h#3 edit
.. //depot/projects/netperf/sys/netinet/udp_usrreq.c#7 edit
.. //depot/projects/netperf/sys/netinet6/icmp6.c#11 edit
.. //depot/projects/netperf/sys/netinet6/in6_pcb.c#10 edit
.. //depot/projects/netperf/sys/netinet6/in6_rmx.c#10 edit
.. //depot/projects/netperf/sys/netinet6/in6_src.c#13 edit
.. //depot/projects/netperf/sys/netinet6/ip6_forward.c#11 edit
.. //depot/projects/netperf/sys/netinet6/ip6_input.c#13 edit
.. //depot/projects/netperf/sys/netinet6/ip6_output.c#18 edit
.. //depot/projects/netperf/sys/netinet6/raw_ip6.c#5 edit
.. //depot/projects/netperf/sys/netinet6/udp6_output.c#4 edit
.. //depot/projects/netperf/sys/netipx/ipx_input.c#5 edit
.. //depot/projects/netperf/sys/netipx/ipx_outputfl.c#3 edit
Differences ...
==== //depot/projects/netperf/sys/conf/files#17 (text+ko) ====
@@ -1424,7 +1424,7 @@
netinet/ip_ecn.c optional inet6
netinet/ip_encap.c optional inet
netinet/ip_encap.c optional inet6
-netinet/ip_fastforward.c optional inet
+netinet/ip_flow.c optional inet
netinet/ip_fw2.c optional ipfirewall
netinet/ip_icmp.c optional inet
netinet/ip_input.c optional inet
@@ -1432,7 +1432,6 @@
netinet/ip_output.c optional inet
netinet/raw_ip.c optional inet
netinet/tcp_debug.c optional tcpdebug
-netinet/tcp_hostcache.c optional inet
netinet/tcp_input.c optional inet
netinet/tcp_output.c optional inet
netinet/tcp_subr.c optional inet
==== //depot/projects/netperf/sys/net/if_arcsubr.c#3 (text+ko) ====
@@ -543,14 +543,14 @@
#ifdef INET
case ARCTYPE_IP:
m_adj(m, ARC_HDRNEWLEN);
- if (ip_fastforward(m))
+ if (ipflow_fastforward(m))
return;
isr = NETISR_IP;
break;
case ARCTYPE_IP_OLD:
m_adj(m, ARC_HDRLEN);
- if (ip_fastforward(m))
+ if (ipflow_fastforward(m))
return;
isr = NETISR_IP;
break;
==== //depot/projects/netperf/sys/net/if_ef.c#3 (text+ko) ====
@@ -252,8 +252,8 @@
#endif
#ifdef INET
case ETHERTYPE_IP:
- if (ip_fastforward(m))
- return;
+ if (ipflow_fastforward(m))
+ return (0);
isr = NETISR_IP;
break;
==== //depot/projects/netperf/sys/net/if_ethersubr.c#9 (text+ko) ====
@@ -717,7 +717,7 @@
switch (ether_type) {
#ifdef INET
case ETHERTYPE_IP:
- if (ip_fastforward(m))
+ if (ipflow_fastforward(m))
return;
isr = NETISR_IP;
break;
==== //depot/projects/netperf/sys/net/if_faith.c#6 (text+ko) ====
@@ -271,8 +271,17 @@
struct rt_addrinfo *info;
{
RT_LOCK_ASSERT(rt);
- if (rt)
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+
+ if (rt) {
+ rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
+ /*
+ * For optimal performance, the send and receive buffers
+ * should be at least twice the MTU plus a little more for
+ * overhead.
+ */
+ rt->rt_rmx.rmx_recvpipe =
+ rt->rt_rmx.rmx_sendpipe = 3 * FAITHMTU;
+ }
}
/*
==== //depot/projects/netperf/sys/net/if_fddisubr.c#4 (text+ko) ====
@@ -471,7 +471,7 @@
switch (type) {
#ifdef INET
case ETHERTYPE_IP:
- if (ip_fastforward(m))
+ if (ipflow_fastforward(m))
return;
isr = NETISR_IP;
break;
==== //depot/projects/netperf/sys/net/if_iso88025subr.c#4 (text+ko) ====
@@ -556,7 +556,7 @@
#ifdef INET
case ETHERTYPE_IP:
th->iso88025_shost[0] &= ~(TR_RII);
- if (ip_fastforward(m))
+ if (ipflow_fastforward(m))
return;
isr = NETISR_IP;
break;
==== //depot/projects/netperf/sys/net/if_loop.c#9 (text+ko) ====
@@ -357,8 +357,17 @@
struct rt_addrinfo *info;
{
RT_LOCK_ASSERT(rt);
- if (rt)
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+
+ if (rt) {
+ rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
+ /*
+ * For optimal performance, the send and receive buffers
+ * should be at least twice the MTU plus a little more for
+ * overhead.
+ */
+ rt->rt_rmx.rmx_recvpipe =
+ rt->rt_rmx.rmx_sendpipe = 3 * LOMTU;
+ }
}
/*
==== //depot/projects/netperf/sys/net/if_ppp.c#4 (text+ko) ====
@@ -1538,8 +1538,8 @@
m->m_pkthdr.len -= PPP_HDRLEN;
m->m_data += PPP_HDRLEN;
m->m_len -= PPP_HDRLEN;
- if (ip_fastforward(m))
- return;
+ if (ipflow_fastforward(m))
+ return;
isr = NETISR_IP;
break;
#endif
==== //depot/projects/netperf/sys/net/route.c#18 (text+ko) ====
@@ -139,7 +139,7 @@
*/
newrt = rt = (struct rtentry *)rn;
nflags = rt->rt_flags & ~ignflags;
- if (report && (nflags & RTF_CLONING)) {
+ if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
/*
* We are apparently adding (report = 0 in delete).
* If it requires that it be cloned, do so.
@@ -548,7 +548,7 @@
*/
if (flags & RTF_HOST) {
netmask = 0;
- flags &= ~RTF_CLONING;
+ flags &= ~(RTF_CLONING | RTF_PRCLONING);
}
switch (req) {
case RTM_DELETE:
@@ -570,7 +570,7 @@
* Now search what's left of the subtree for any cloned
* routes which might have been formed from this node.
*/
- if ((rt->rt_flags & RTF_CLONING) &&
+ if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
rt_mask(rt)) {
rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
rt_fixdelete, rt);
@@ -617,7 +617,7 @@
ifa = rt->rt_ifa;
/* XXX locking? */
flags = rt->rt_flags &
- ~(RTF_CLONING | RTF_STATIC);
+ ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
flags |= RTF_WASCLONED;
gateway = rt->rt_gateway;
if ((netmask = rt->rt_genmask) == 0)
@@ -678,11 +678,11 @@
/*
* Uh-oh, we already have one of these in the tree.
* We do a special hack: if the route that's already
- * there was generated by the cloning mechanism
- * then we just blow it away and retry the insertion
- * of the new one.
+ * there was generated by the protocol-cloning
+ * mechanism, then we just blow it away and retry
+ * the insertion of the new one.
*/
- rt2 = rtalloc1(dst, 0, 0);
+ rt2 = rtalloc1(dst, 0, RTF_PRCLONING);
if (rt2 && rt2->rt_parent) {
rtrequest(RTM_DELETE,
rt_key(rt2),
@@ -724,7 +724,7 @@
("no route to clone from"));
rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
- if ((*ret_nrt)->rt_flags & RTF_CLONING) {
+ if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
/*
* NB: We do not bump the refcnt on the parent
* entry under the assumption that it will
@@ -800,7 +800,7 @@
struct rtentry *rt0 = vp;
if (rt->rt_parent == rt0 &&
- !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
+ !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
return rtrequest(RTM_DELETE, rt_key(rt),
(struct sockaddr *)0, rt_mask(rt),
rt->rt_flags, (struct rtentry **)0);
@@ -841,7 +841,7 @@
#endif
if (!rt->rt_parent ||
- (rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
+ (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
#ifdef DEBUG
if(rtfcdebug) printf("no parent, pinned or cloning\n");
#endif
@@ -992,10 +992,9 @@
* correct choice anyway), and avoid the resulting reference loops
* by disallowing any route to run through itself as a gateway.
* This is obviously mandatory when we get rt->rt_output().
- * XXX: After removal of PRCLONING this probably not needed anymore.
*/
if (rt->rt_flags & RTF_GATEWAY) {
- rt->rt_gwroute = rtalloc1(gate, 1, 0);
+ rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING);
if (rt->rt_gwroute == rt) {
RTFREE_LOCKED(rt->rt_gwroute);
rt->rt_gwroute = 0;
==== //depot/projects/netperf/sys/net/route.h#8 (text+ko) ====
@@ -58,12 +58,6 @@
* These numbers are used by reliable protocols for determining
* retransmission behavior and are included in the routing structure.
*/
-struct rt_metrics_lite {
- u_long rmx_mtu; /* MTU for this path */
- u_long rmx_expire; /* lifetime for route, e.g. redirect */
- u_long rmx_pksent; /* packets sent using this route */
-};
-
struct rt_metrics {
u_long rmx_locks; /* Kernel must leave these values alone */
u_long rmx_mtu; /* MTU for this path */
@@ -110,10 +104,10 @@
long rt_refcnt; /* # held references */
u_long rt_flags; /* up/down?, host/net */
struct ifnet *rt_ifp; /* the answer: interface to use */
- struct ifaddr *rt_ifa; /* the answer: interface address to use */
+ struct ifaddr *rt_ifa; /* the answer: interface to use */
struct sockaddr *rt_genmask; /* for generation of cloned routes */
caddr_t rt_llinfo; /* pointer to link level info cache */
- struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */
+ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */
struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */
int (*rt_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
@@ -157,7 +151,7 @@
#define RTF_PROTO2 0x4000 /* protocol specific routing flag */
#define RTF_PROTO1 0x8000 /* protocol specific routing flag */
-/* 0x10000 unused */
+#define RTF_PRCLONING 0x10000 /* protocol requires cloning */
#define RTF_WASCLONED 0x20000 /* route generated through cloning */
#define RTF_PROTO3 0x40000 /* protocol specific routing flag */
/* 0x80000 unused */
==== //depot/projects/netperf/sys/net/rtsock.c#7 (text+ko) ====
@@ -86,8 +86,7 @@
static int sysctl_dumpentry(struct radix_node *rn, void *vw);
static int sysctl_iflist(int af, struct walkarg *w);
static int route_output(struct mbuf *, struct socket *);
-static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics_lite *);
-static void rt_getmetrics(struct rt_metrics_lite *, struct rt_metrics *);
+static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *);
static void rt_dispatch(struct mbuf *, struct sockaddr *);
/*
@@ -355,6 +354,9 @@
RT_LOCK(saved_nrt);
rt_setmetrics(rtm->rtm_inits,
&rtm->rtm_rmx, &saved_nrt->rt_rmx);
+ saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
+ saved_nrt->rt_rmx.rmx_locks |=
+ (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
saved_nrt->rt_refcnt--;
saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
RT_UNLOCK(saved_nrt);
@@ -425,7 +427,7 @@
(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
(struct walkarg *)0);
rtm->rtm_flags = rt->rt_flags;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ rtm->rtm_rmx = rt->rt_rmx;
rtm->rtm_addrs = info.rti_addrs;
break;
@@ -475,7 +477,9 @@
rt->rt_genmask = info.rti_info[RTAX_GENMASK];
/* FALLTHROUGH */
case RTM_LOCK:
- /* We don't support locks anymore */
+ rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
+ rt->rt_rmx.rmx_locks |=
+ (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
break;
}
RT_UNLOCK(rt);
@@ -537,28 +541,20 @@
}
static void
-rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics_lite *out)
+rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
{
#define metric(f, e) if (which & (f)) out->e = in->e;
- /*
- * Only these are stored in the routing entry since introduction
- * of tcp hostcache. The rest is ignored.
- */
+ metric(RTV_RPIPE, rmx_recvpipe);
+ metric(RTV_SPIPE, rmx_sendpipe);
+ metric(RTV_SSTHRESH, rmx_ssthresh);
+ metric(RTV_RTT, rmx_rtt);
+ metric(RTV_RTTVAR, rmx_rttvar);
+ metric(RTV_HOPCOUNT, rmx_hopcount);
metric(RTV_MTU, rmx_mtu);
metric(RTV_EXPIRE, rmx_expire);
#undef metric
}
-static void
-rt_getmetrics(struct rt_metrics_lite *in, struct rt_metrics *out)
-{
-#define metric(e) out->e = in->e;
- bzero(out, sizeof(*out));
- metric(rmx_mtu);
- metric(rmx_expire);
-#undef metric
-}
-
#define ROUNDUP(a) \
((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
@@ -948,8 +944,8 @@
struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
rtm->rtm_flags = rt->rt_flags;
- rtm->rtm_use = rt->rt_rmx.rmx_pksent;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ rtm->rtm_use = rt->rt_use;
+ rtm->rtm_rmx = rt->rt_rmx;
rtm->rtm_index = rt->rt_ifp->if_index;
rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
rtm->rtm_addrs = info.rti_addrs;
==== //depot/projects/netperf/sys/netatalk/ddp_output.c#4 (text+ko) ====
@@ -217,7 +217,7 @@
elh->el_type = ELAP_DDPEXTEND;
elh->el_dnode = gate.sat_addr.s_node;
}
- ro->ro_rt->rt_rmx.rmx_pksent++;
+ ro->ro_rt->rt_use++;
#ifdef NETATALK_DEBUG
printf ("ddp_route: from %d.%d to %d.%d, via %d.%d (%s%d)\n",
==== //depot/projects/netperf/sys/netinet/icmp_var.h#3 (text+ko) ====
@@ -81,12 +81,11 @@
extern int badport_bandlim(int);
#define BANDLIM_UNLIMITED -1
#define BANDLIM_ICMP_UNREACH 0
-#define BANDLIM_ICMP_UNREACH_HOST 1
-#define BANDLIM_ICMP_ECHO 2
-#define BANDLIM_ICMP_TSTAMP 3
-#define BANDLIM_RST_CLOSEDPORT 4 /* No connection, and no listeners */
-#define BANDLIM_RST_OPENPORT 5 /* No connection, listener */
-#define BANDLIM_MAX 5
+#define BANDLIM_ICMP_ECHO 1
+#define BANDLIM_ICMP_TSTAMP 2
+#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
+#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */
+#define BANDLIM_MAX 4
#endif
#endif
==== //depot/projects/netperf/sys/netinet/in_pcb.c#7 (text+ko) ====
@@ -536,6 +536,7 @@
if (error)
return (error);
}
+
if (!TAILQ_EMPTY(&in_ifaddrhead)) {
/*
* If the destination address is INADDR_ANY,
@@ -553,8 +554,7 @@
&in_ifaddrhead)->ia_broadaddr)->sin_addr;
}
if (laddr.s_addr == INADDR_ANY) {
- struct route *ro;
- struct route sro;
+ register struct route *ro;
ia = (struct in_ifaddr *)0;
/*
@@ -563,10 +563,19 @@
* Note that we should check the address family of the cached
* destination, in case of sharing the cache with IPv6.
*/
- ro = &sro;
- bzero(ro, sizeof(*ro));
- if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) {
- /* Find out route to destination */
+ ro = &inp->inp_route;
+ if (ro->ro_rt &&
+ (ro->ro_dst.sa_family != AF_INET ||
+ satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr ||
+ inp->inp_socket->so_options & SO_DONTROUTE)) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)0;
+ }
+ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+ (ro->ro_rt == (struct rtentry *)0 ||
+ ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+ /* No route yet, so try to acquire one */
+ bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
ro->ro_dst.sa_family = AF_INET;
ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr;
@@ -580,8 +589,6 @@
*/
if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
ia = ifatoia(ro->ro_rt->rt_ifa);
- if (ro->ro_rt)
- RTFREE(ro->ro_rt);
if (ia == 0) {
bzero(&sa, sizeof(sa));
sa.sin_addr = faddr;
@@ -668,6 +675,8 @@
}
if (inp->inp_options)
(void)m_free(inp->inp_options);
+ if (inp->inp_route.ro_rt)
+ RTFREE(inp->inp_route.ro_rt);
ip_freemoptions(inp->inp_moptions);
inp->inp_vflag = 0;
INP_LOCK_DESTROY(inp);
@@ -841,6 +850,60 @@
}
/*
+ * Check for alternatives when higher level complains
+ * about service problems. For now, invalidate cached
+ * routing information. If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+void
+in_losing(inp)
+ struct inpcb *inp;
+{
+ register struct rtentry *rt;
+ struct rt_addrinfo info;
+
+ if ((rt = inp->inp_route.ro_rt)) {
+ RT_LOCK(rt);
+ inp->inp_route.ro_rt = NULL;
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_flags = rt->rt_flags;
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
+ if (rt->rt_flags & RTF_DYNAMIC) {
+ RT_UNLOCK(rt); /* XXX refcnt? */
+ (void) rtrequest1(RTM_DELETE, &info, NULL);
+ } else
+ rtfree(rt);
+ /*
+ * A new route can be allocated
+ * the next time output is attempted.
+ */
+ }
+}
+
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+struct inpcb *
+in_rtchange(inp, errno)
+ register struct inpcb *inp;
+ int errno;
+{
+ if (inp->inp_route.ro_rt) {
+ RTFREE(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = 0;
+ /*
+ * A new route can be allocated the next time
+ * output is attempted.
+ */
+ }
+ return inp;
+}
+
+/*
* Lookup a PCB based on the local address and port.
*/
struct inpcb *
==== //depot/projects/netperf/sys/netinet/in_pcb.h#8 (text+ko) ====
@@ -94,22 +94,31 @@
/*
* XXX
- * the defines for inc_* are hacks and should be changed to direct references
+ * At some point struct route should possibly change to:
+ * struct rtentry *rt
+ * struct in_endpoints *ie;
*/
struct in_conninfo {
u_int8_t inc_flags;
u_int8_t inc_len;
u_int16_t inc_pad; /* XXX alignment for in_endpoints */
- /* protocol dependent part */
+ /* protocol dependent part; cached route */
struct in_endpoints inc_ie;
+ union {
+ /* placeholder for routing entry */
+ struct route inc4_route;
+ struct route_in6 inc6_route;
+ } inc_dependroute;
};
#define inc_isipv6 inc_flags /* temp compatability */
#define inc_fport inc_ie.ie_fport
#define inc_lport inc_ie.ie_lport
#define inc_faddr inc_ie.ie_faddr
#define inc_laddr inc_ie.ie_laddr
+#define inc_route inc_dependroute.inc4_route
#define inc6_faddr inc_ie.ie6_faddr
#define inc6_laddr inc_ie.ie6_laddr
+#define inc6_route inc_dependroute.inc6_route
struct icmp6_filter;
@@ -147,6 +156,7 @@
#define inp_lport inp_inc.inc_lport
#define inp_faddr inp_inc.inc_faddr
#define inp_laddr inp_inc.inc_laddr
+#define inp_route inp_inc.inc_route
#define inp_ip_tos inp_depend4.inp4_ip_tos
#define inp_options inp_depend4.inp4_options
#define inp_moptions inp_depend4.inp4_moptions
@@ -172,6 +182,7 @@
#define in6p_faddr inp_inc.inc6_faddr
#define in6p_laddr inp_inc.inc6_laddr
+#define in6p_route inp_inc.inc6_route
#define in6p_ip6_hlim inp_depend6.inp6_hlim
#define in6p_hops inp_depend6.inp6_hops /* default hop limit */
#define in6p_ip6_nxt inp_ip_p
@@ -316,6 +327,9 @@
extern int ipport_hilastauto;
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
+void in_losing(struct inpcb *);
+struct inpcb *
+ in_rtchange(struct inpcb *, int);
int in_pcballoc(struct socket *, struct inpcbinfo *, struct thread *);
int in_pcbbind(struct inpcb *, struct sockaddr *, struct thread *);
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
==== //depot/projects/netperf/sys/netinet/in_rmx.c#9 (text+ko) ====
@@ -73,6 +73,15 @@
struct radix_node *ret;
/*
+ * For IP, all unicast non-host routes are automatically cloning.
+ */
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ rt->rt_flags |= RTF_MULTICAST;
+
+ if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST)))
+ rt->rt_flags |= RTF_PRCLONING;
+
+ /*
* A little bit of help for both IP output and input:
* For host routes, we make sure that RTF_BROADCAST
* is set for anything that looks like a broadcast address.
@@ -85,7 +94,8 @@
*
* We also mark routes to multicast addresses as such, because
* it's easy to do and might be useful (but this is much more
- * dubious since it's so easy to inspect the address).
+ * dubious since it's so easy to inspect the address). (This
+ * is done above.)
*/
if (rt->rt_flags & RTF_HOST) {
if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
@@ -95,10 +105,9 @@
rt->rt_flags |= RTF_LOCAL;
}
}
- if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
- rt->rt_flags |= RTF_MULTICAST;
- if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
+ if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
+ rt->rt_ifp)
rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
ret = rn_addroute(v_arg, n_arg, head, treenodes);
@@ -109,7 +118,8 @@
* Find out if it is because of an
* ARP entry and delete it if so.
*/
- rt2 = rtalloc1((struct sockaddr *)sin, 0, RTF_CLONING);
+ rt2 = rtalloc1((struct sockaddr *)sin, 0,
+ RTF_CLONING | RTF_PRCLONING);
if (rt2) {
if (rt2->rt_flags & RTF_LLINFO &&
rt2->rt_flags & RTF_HOST &&
@@ -128,6 +138,14 @@
RTFREE_LOCKED(rt2);
}
}
+
+ /*
+ * If the new route created successfully, and we are forwarding,
+ * flush any cached routes to avoid using a stale value.
+ */
+ if (ret != NULL && ipforwarding)
+ ip_forward_cacheinval();
+
return ret;
}
@@ -380,7 +398,7 @@
* so that behavior is not needed there.
*/
RT_LOCK(rt);
- rt->rt_flags &= ~RTF_CLONING;
+ rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING);
RT_UNLOCK(rt);
err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
==== //depot/projects/netperf/sys/netinet/in_var.h#4 (text+ko) ====
@@ -230,7 +230,9 @@
void ip_input(struct mbuf *);
int in_ifadown(struct ifaddr *ifa, int);
void in_ifscrub(struct ifnet *, struct in_ifaddr *);
-int ip_fastforward(struct mbuf *);
+int ipflow_fastforward(struct mbuf *);
+void ipflow_create(const struct route *, struct mbuf *);
+void ipflow_slowtimo(void);
#endif /* _KERNEL */
==== //depot/projects/netperf/sys/netinet/ip_divert.c#8 (text+ko) ====
@@ -333,7 +333,7 @@
/* Send packet to output processing */
ipstat.ips_rawout++; /* XXX */
error = ip_output((struct mbuf *)&divert_tag,
- inp->inp_options, NULL,
+ inp->inp_options, &inp->inp_route,
(so->so_options & SO_DONTROUTE) |
IP_ALLOWBROADCAST | IP_RAWOUTPUT,
inp->inp_moptions, NULL);
==== //depot/projects/netperf/sys/netinet/ip_fw.h#3 (text+ko) ====
@@ -28,7 +28,6 @@
#ifndef _IPFW2_H
#define _IPFW2_H
#define IPFW2 1
-
/*
* The kernel representation of ipfw rules is made of a list of
* 'instructions' (for all practical purposes equivalent to BPF
==== //depot/projects/netperf/sys/netinet/ip_fw2.c#12 (text+ko) ====
@@ -461,16 +461,13 @@
dst->sin_len = sizeof(*dst);
dst->sin_addr = src;
- rtalloc_ign(&ro, RTF_CLONING);
+ rtalloc_ign(&ro, RTF_CLONING|RTF_PRCLONING);
}
- if (ro.ro_rt == NULL)
+ if ((ro.ro_rt == NULL) || (ifp == NULL) ||
+ (ro.ro_rt->rt_ifp->if_index != ifp->if_index))
return 0;
- if ((ifp == NULL) || (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) {
- RTFREE(ro.ro_rt);
- return 0;
- }
- RTFREE(ro.ro_rt);
+
return 1;
}
@@ -1162,6 +1159,7 @@
struct mbuf *m;
struct ip *ip;
struct tcphdr *tcp;
+ struct route sro; /* fake route */
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == 0)
@@ -1227,8 +1225,12 @@
*/
ip->ip_ttl = ip_defttl;
ip->ip_len = m->m_pkthdr.len;
+ bzero (&sro, sizeof (sro));
+ ip_rtaddr(ip->ip_dst, &sro);
m->m_flags |= M_SKIP_FIREWALL;
- ip_output(m, NULL, NULL, 0, NULL, NULL);
+ ip_output(m, NULL, &sro, 0, NULL, NULL);
+ if (sro.ro_rt)
+ RTFREE(sro.ro_rt);
}
/*
==== //depot/projects/netperf/sys/netinet/ip_icmp.c#7 (text+ko) ====
@@ -52,15 +52,11 @@
#include <net/route.h>
#include <netinet/in.h>
-#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netinet/ip_var.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcpip.h>
#include <netinet/icmp_var.h>
#ifdef IPSEC
@@ -124,7 +120,7 @@
#endif
static void icmp_reflect(struct mbuf *);
-static void icmp_send(struct mbuf *, struct mbuf *);
+static void icmp_send(struct mbuf *, struct mbuf *, struct route *);
static int ip_next_mtu(int, int);
extern struct protosw inetsw[];
@@ -169,18 +165,6 @@
if (n->m_flags & (M_BCAST|M_MCAST))
goto freeit;
/*
- * Limit sending of ICMP host unreachable messages.
- * If we are acting as a router and someone is doing a sweep
- * scan (eg. nmap and/or numerous windows worms) for destinations
- * we are the gateway for but are not reachable (ie. a /24 on a
- * interface and only a couple of hosts on the ethernet) we would
- * generate a storm of ICMP host unreachable messages.
- */
- if (type == ICMP_UNREACH && code == ICMP_UNREACH_HOST) {
- if (badport_bandlim(BANDLIM_ICMP_UNREACH_HOST) < 0)
- goto freeit;
- }
- /*
* First, formulate icmp message
*/
m = m_gethdr(M_DONTWAIT, MT_HEADER);
@@ -249,34 +233,27 @@
m_freem(n);
}
+static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
+
/*
* Process a received ICMP message.
*/
void
icmp_input(m, off)
- struct mbuf *m;
+ register struct mbuf *m;
int off;
{
int hlen = off;
- struct icmp *icp;
+ register struct icmp *icp;
+ register struct ip *ip = mtod(m, struct ip *);
+ int icmplen = ip->ip_len;
+ register int i;
struct in_ifaddr *ia;
- struct ip *ip = mtod(m, struct ip *);
- int icmplen = ip->ip_len;
- int i, code;
void (*ctlfunc)(int, struct sockaddr *, void *);
- struct sockaddr_in icmpsrc, icmpdst, icmpgw;
+ int code;
- /* Initialize */
- bzero(&icmpsrc, sizeof(icmpsrc));
- icmpsrc.sin_len = sizeof(struct sockaddr_in);
- icmpsrc.sin_family = AF_INET;
- bzero(&icmpdst, sizeof(icmpdst));
- icmpdst.sin_len = sizeof(struct sockaddr_in);
- icmpdst.sin_family = AF_INET;
- bzero(&icmpgw, sizeof(icmpgw));
- icmpgw.sin_len = sizeof(struct sockaddr_in);
- icmpgw.sin_family = AF_INET;
-
/*
* Locate icmp structure in mbuf, and check
* that not corrupted and of at least minimum length.
@@ -410,7 +387,7 @@
printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
#endif
icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
-
+#if 1
/*
* MTU discovery:
* If we got a needfrag and there is a host route to the
@@ -420,38 +397,40 @@
* notice that the MTU has changed and adapt accordingly.
* If no new MTU was suggested, then we guess a new one
* less than the current value. If the new MTU is
- * unreasonably small (defined by sysctl tcp_minmss), then
- * we don't update the MTU value.
- *
- * XXX: All this should be done in tcp_mtudisc() because
- * the way we do it now, everyone can send us bogus ICMP
- * MSGSIZE packets for any destination. By doing this far
- * higher in the chain we have a matching tcp connection.
- * Thus spoofing is much harder. However there is no easy
- * non-hackish way to pass the new MTU up to tcp_mtudisc().
- * Also see next XXX regarding IPv4 AH TCP.
+ * unreasonably small (arbitrarily set at 296), then
+ * we reset the MTU to the interface value and enable the
+ * lock bit, indicating that we are no longer doing MTU
+ * discovery.
*/
if (code == PRC_MSGSIZE) {
+ struct rtentry *rt;
int mtu;
- struct in_conninfo inc;
- bzero(&inc, sizeof(inc));
- inc.inc_flags = 0; /* IPv4 */
- inc.inc_faddr = icmpsrc.sin_addr;
-
- mtu = ntohs(icp->icmp_nextmtu);
- if (!mtu)
- mtu = ip_next_mtu(mtu, 1);
-
- if (mtu >= max(296, (tcp_minmss + sizeof(struct tcpiphdr))))
- tcp_hc_updatemtu(&inc, mtu);
-
+ rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
+ RTF_CLONING | RTF_PRCLONING);
+ if (rt && (rt->rt_flags & RTF_HOST)
+ && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
+ mtu = ntohs(icp->icmp_nextmtu);
+ if (!mtu)
+ mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
+ 1);
#ifdef DEBUG_MTUDISC
- printf("MTU for %s reduced to %d\n",
- inet_ntoa(icmpsrc.sin_addr), mtu);
+ printf("MTU for %s reduced to %d\n",
+ inet_ntoa(icmpsrc.sin_addr), mtu);
#endif
+ if (mtu < 296) {
+ /* rt->rt_rmx.rmx_mtu =
+ rt->rt_ifp->if_mtu; */
+ rt->rt_rmx.rmx_locks |= RTV_MTU;
+ } else if (rt->rt_rmx.rmx_mtu > mtu) {
+ rt->rt_rmx.rmx_mtu = mtu;
+ }
+ }
+ if (rt)
+ rtfree(rt);
}
+#endif
/*
* XXX if the packet contains [IPv4 AH TCP], we can't make a
* notification to TCP layer.
@@ -628,6 +607,7 @@
struct in_addr t;
struct mbuf *opts = 0;
int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
+ struct route *ro = NULL, rt;
if (!in_canforward(ip->ip_src) &&
((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
@@ -638,6 +618,8 @@
}
t = ip->ip_dst;
ip->ip_dst = ip->ip_src;
+ ro = &rt;
+ bzero(ro, sizeof(*ro));
/*
* If the incoming packet was addressed directly to us,
* use dst as the src for the reply. Otherwise (broadcast
@@ -658,7 +640,7 @@
goto match;
}
}
- ia = ip_rtaddr(ip->ip_dst);
+ ia = ip_rtaddr(ip->ip_dst, ro);
/* We need a route to do anything useful. */
if (ia == NULL) {
m_freem(m);
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list