svn commit: r234650 - in user/andre/routelocking: net netinet
Andre Oppermann
andre at FreeBSD.org
Tue Apr 24 12:54:05 UTC 2012
Author: andre
Date: Tue Apr 24 12:54:04 2012
New Revision: 234650
URL: http://svn.freebsd.org/changeset/base/234650
Log:
Add a function rtlookup() that copies out the relevant information
from an rtentry instead of returning the rtentry. This avoids the
need to lock the rtentry and to increase the refcount on it.
Convert ip_output() to use rtlookup() in a simplistic way. Certain
seldom used functionality may not work anymore and the flowtable
isn't available at the moment.
Convert ip_fastfwd() to use rtlookup().
This code is meant to be used for profiling and to be experimented
with further to determine which locking strategy returns the best
results.
Modified:
user/andre/routelocking/net/route.c
user/andre/routelocking/net/route.h
user/andre/routelocking/netinet/ip_fastfwd.c
user/andre/routelocking/netinet/ip_output.c
user/andre/routelocking/netinet/tcp_output.c
Modified: user/andre/routelocking/net/route.c
==============================================================================
--- user/andre/routelocking/net/route.c Tue Apr 24 12:43:29 2012 (r234649)
+++ user/andre/routelocking/net/route.c Tue Apr 24 12:54:04 2012 (r234650)
@@ -425,6 +425,55 @@ done:
return (newrt);
}
+int
+rtlookup_fib(struct rtlookup *rtl, u_int fibnum, int flags)
+{
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ struct rtentry *rt;
+ struct rm_priotracker tracker;
+
+ KASSERT((fibnum < rt_numfibs),
+ ("%s: bad fibnum", __func__));
+
+ switch (rtl->rtl_dst->sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ break;
+ default:
+ fibnum = 0;
+ }
+
+ rnh = rt_tables_get_rnh(fibnum, rtl->rtl_dst->sa_family);
+ if (rnh == NULL) {
+ V_rtstat.rts_unreach++;
+ return (ENETUNREACH);
+ }
+
+ /* Look up the address in the table for that Address Family. */
+ RADIX_NODE_HEAD_RLOCK(rnh, &tracker);
+ rn = rnh->rnh_matchaddr(rtl->rtl_dst, rnh);
+ if (rn == NULL || (rn->rn_flags & RNF_ROOT))
+ return (ENETUNREACH);
+
+ rt = RNTORT(rn);
+ if (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))
+ return (ENETUNREACH);
+
+ /* Only copy DST when a gateway, otherwise route to interface. */
+ if (rtl->rtl_gw != NULL && rt->rt_flags & RTF_GATEWAY)
+ bcopy(rt->rt_gateway, rtl->rtl_gw, SA_SIZE(rt->rt_gateway));
+ rtl->rtl_ifp = rt->rt_ifp;
+ rtl->rtl_ifa = rt->rt_ifa;
+ rtl->rtl_mtu = rt->rt_rmx.rmx_mtu;
+ rtl->rtl_flags = rt->rt_flags;
+ if (flags & RTL_PKSENT)
+ rt->rt_rmx.rmx_pksent++; /* racy but ok - XXX WHY?*/
+ RADIX_NODE_HEAD_RUNLOCK(rnh, &tracker);
+
+ return (0);
+}
+
/*
* Remove a reference count from an rtentry.
* If the count gets low enough, take it out of the routing table
Modified: user/andre/routelocking/net/route.h
==============================================================================
--- user/andre/routelocking/net/route.h Tue Apr 24 12:43:29 2012 (r234649)
+++ user/andre/routelocking/net/route.h Tue Apr 24 12:54:04 2012 (r234650)
@@ -83,6 +83,20 @@ struct rt_metrics {
};
/*
+ * Pointers to structures on the stack for pure routing
+ * table lookups.
+ */
+struct rtlookup {
+ struct sockaddr *rtl_dst; /* Request */
+ struct sockaddr *rtl_gw; /* Answer */
+ struct ifnet *rtl_ifp; /* Answer */
+ struct ifaddr *rtl_ifa; /* Answer */
+ u_long rtl_mtu; /* Answer */
+ int rtl_flags; /* Answer */
+};
+#define RTL_PKSENT 0x0001 /* increment packet sent counter */
+
+/*
* rmx_rtt and rmx_rttvar are stored as microseconds;
* RTTTOPRHZ(rtt) converts to a value suitable for use
* by a protocol slowtimo counter.
@@ -120,13 +134,13 @@ struct rtentry {
*/
#define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
#define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
- struct sockaddr *rt_gateway; /* value */
int rt_flags; /* up/down?, host/net */
- int rt_refcnt; /* # held references */
+ struct sockaddr *rt_gateway; /* the answer: nexthop to use */
struct ifnet *rt_ifp; /* the answer: interface to use */
struct ifaddr *rt_ifa; /* the answer: interface address to use */
struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */
u_int rt_fibnum; /* which FIB */
+ int rt_refcnt; /* # held references */
#ifdef _KERNEL
/* XXX ugly, user apps use this definition but don't have a mtx def */
struct mtx rt_mtx; /* mutex for routing entry */
@@ -399,6 +413,7 @@ void rtalloc_ign_fib(struct route *ro,
void rtalloc_fib(struct route *ro, u_int fibnum);
struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
int rtioctl_fib(u_long, caddr_t, u_int);
+int rtlookup_fib(struct rtlookup *, u_int, int);
void rtredirect_fib(struct sockaddr *, struct sockaddr *,
struct sockaddr *, int, struct sockaddr *, u_int);
int rtrequest_fib(int, struct sockaddr *,
Modified: user/andre/routelocking/netinet/ip_fastfwd.c
==============================================================================
--- user/andre/routelocking/netinet/ip_fastfwd.c Tue Apr 24 12:43:29 2012 (r234649)
+++ user/andre/routelocking/netinet/ip_fastfwd.c Tue Apr 24 12:54:04 2012 (r234650)
@@ -112,40 +112,22 @@ static VNET_DEFINE(int, ipfastforward_ac
SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
&VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding");
-static struct sockaddr_in *
-ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
+static int
+ip_findroute(struct rtlookup *rtl, struct mbuf *m)
{
- struct sockaddr_in *dst;
- struct rtentry *rt;
/*
* Find route to destination.
- */
- bzero(ro, sizeof(*ro));
- dst = (struct sockaddr_in *)&ro->ro_dst;
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(*dst);
- dst->sin_addr.s_addr = dest.s_addr;
- in_rtalloc_ign(ro, 0, M_GETFIB(m));
-
- /*
* Route there and interface still up?
*/
- rt = ro->ro_rt;
- if (rt && (rt->rt_flags & RTF_UP) &&
- (rt->rt_ifp->if_flags & IFF_UP) &&
- (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- if (rt->rt_flags & RTF_GATEWAY)
- dst = (struct sockaddr_in *)rt->rt_gateway;
- } else {
+ if (rtlookup_fib(rtl, M_GETFIB(m), 0) ||
+ !(rtl->rtl_flags & RTF_GATEWAY)) {
IPSTAT_INC(ips_noroute);
IPSTAT_INC(ips_cantforward);
- if (rt)
- RTFREE(rt);
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
- return NULL;
+ return (0);
}
- return dst;
+ return (1);
}
/*
@@ -160,13 +142,13 @@ ip_fastforward(struct mbuf *m)
{
struct ip *ip;
struct mbuf *m0 = NULL;
- struct route ro;
- struct sockaddr_in *dst = NULL;
+ struct sockaddr_in dst;
struct ifnet *ifp;
struct in_addr odest, dest;
u_short sum, ip_len;
int error = 0;
int hlen, mtu;
+ struct rtlookup rtl;
#ifdef IPFIREWALL_FORWARD
struct m_tag *fwd_tag;
#endif
@@ -180,7 +162,7 @@ ip_fastforward(struct mbuf *m)
M_ASSERTVALID(m);
M_ASSERTPKTHDR(m);
- bzero(&ro, sizeof(ro));
+ bzero(&dst, sizeof(dst));
/*
* Step 1: check for packet drop conditions (and sanity checks)
@@ -420,16 +402,23 @@ passin:
/*
* Find route to destination.
*/
- if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+ bzero(&dst, sizeof(dst));
+ dst.sin_family = AF_INET;
+ dst.sin_len = sizeof(dst);
+ dst.sin_addr.s_addr = dest.s_addr;
+
+ bzero(&rtl, sizeof(rtl));
+ rtl.rtl_dst = (struct sockaddr *)&dst;
+ if (ip_findroute(&rtl, m) != 1)
return NULL; /* icmp unreach already sent */
- ifp = ro.ro_rt->rt_ifp;
+ ifp = rtl.rtl_ifp;
/*
* Immediately drop blackholed traffic, and directed broadcasts
* for either the all-ones or all-zero subnet addresses on
* locally attached networks.
*/
- if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0)
+ if ((rtl.rtl_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0)
goto drop;
/*
@@ -476,8 +465,6 @@ forwardlocal:
* "ours"-label.
*/
m->m_flags |= M_FASTFWD_OURS;
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
return m;
}
/*
@@ -490,10 +477,10 @@ forwardlocal:
m_tag_delete(m, fwd_tag);
}
#endif /* IPFIREWALL_FORWARD */
- RTFREE(ro.ro_rt);
- if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+ dst.sin_addr.s_addr = dest.s_addr;
+ if (ip_findroute(&rtl, m) != 1)
return NULL; /* icmp unreach already sent */
- ifp = ro.ro_rt->rt_ifp;
+ ifp = rtl.rtl_ifp;
}
passout:
@@ -504,9 +491,7 @@ passout:
/*
* Check if route is dampned (when ARP is unable to resolve)
*/
- if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
- (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
- time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
+ if (rtl.rtl_flags & RTF_REJECT) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
goto consumed;
}
@@ -514,6 +499,7 @@ passout:
#ifndef ALTQ
/*
* Check if there is enough space in the interface queue
+ * XXXAO: ifq access not locked and could be outdated!
*/
if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
ifp->if_snd.ifq_maxlen) {
@@ -534,8 +520,8 @@ passout:
/*
* Check if packet fits MTU or if hardware will fragment for us
*/
- if (ro.ro_rt->rt_rmx.rmx_mtu)
- mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+ if (rtl.rtl_mtu > 0)
+ mtu = min(rtl.rtl_mtu, ifp->if_mtu);
else
mtu = ifp->if_mtu;
@@ -550,7 +536,7 @@ passout:
* Send off the packet via outgoing interface
*/
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, &ro);
+ (struct sockaddr *)&dst, NULL);
} else {
/*
* Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
@@ -583,7 +569,7 @@ passout:
m->m_nextpkt = NULL;
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, &ro);
+ (struct sockaddr *)&dst, NULL);
if (error)
break;
} while ((m = m0) != NULL);
@@ -601,17 +587,13 @@ passout:
if (error != 0)
IPSTAT_INC(ips_odropped);
else {
- ro.ro_rt->rt_rmx.rmx_pksent++;
IPSTAT_INC(ips_forward);
IPSTAT_INC(ips_fastforward);
}
consumed:
- RTFREE(ro.ro_rt);
return NULL;
drop:
if (m)
m_freem(m);
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
return NULL;
}
Modified: user/andre/routelocking/netinet/ip_output.c
==============================================================================
--- user/andre/routelocking/netinet/ip_output.c Tue Apr 24 12:43:29 2012 (r234649)
+++ user/andre/routelocking/netinet/ip_output.c Tue Apr 24 12:54:04 2012 (r234650)
@@ -119,12 +119,11 @@ ip_output(struct mbuf *m, struct mbuf *o
int mtu;
int n; /* scratchpad */
int error = 0;
- int nortfree = 0;
struct sockaddr_in *dst;
+ struct sockaddr_in dstn;
struct in_ifaddr *ia = NULL;
int isbroadcast, sw_csum;
- struct route iproute;
- struct rtentry *rte; /* cache for ro->ro_rt */
+ struct rtlookup rtl;
struct in_addr odst;
#ifdef IPFIREWALL_FORWARD
struct m_tag *fwd_tag = NULL;
@@ -143,28 +142,9 @@ ip_output(struct mbuf *m, struct mbuf *o
}
}
- if (ro == NULL) {
- ro = &iproute;
- bzero(ro, sizeof (*ro));
-
-#ifdef FLOWTABLE
- {
- struct flentry *fle;
-
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) {
- flow_to_route(fle, ro);
- nortfree = 1;
- }
- }
-#endif
- }
-
+ /*
+ * Insert IP options.
+ */
if (opt) {
int len = 0;
m = ip_insertoptions(m, opt, &len);
@@ -194,36 +174,20 @@ ip_output(struct mbuf *m, struct mbuf *o
hlen = ip->ip_hl << 2;
}
- dst = (struct sockaddr_in *)&ro->ro_dst;
-again:
- /*
- * If there is a cached route,
- * check that it is to the same destination
- * and is still up. If not, free it and try again.
- * The address family should also be checked in case of sharing the
- * cache with IPv6.
- */
- rte = ro->ro_rt;
- if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
- rte->rt_ifp == NULL ||
- !RT_LINK_IS_UP(rte->rt_ifp) ||
- dst->sin_family != AF_INET ||
- dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
- if (!nortfree)
- RTFREE(rte);
- rte = ro->ro_rt = (struct rtentry *)NULL;
- ro->ro_lle = (struct llentry *)NULL;
- }
-#ifdef IPFIREWALL_FORWARD
- if (rte == NULL && fwd_tag == NULL) {
-#else
- if (rte == NULL) {
-#endif
- bzero(dst, sizeof(*dst));
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(*dst);
- dst->sin_addr = ip->ip_dst;
+ if (ro != NULL) {
+ /* XXXAO: May use dst for nexthop. */
+ if (ro->ro_rt != NULL)
+ RTFREE(ro->ro_rt);
+ ro = NULL;
}
+ bzero(&rtl, sizeof(rtl));
+again:
+ dst = &dstn;
+ bzero(dst, sizeof(*dst));
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr = ip->ip_dst;
+
/*
* If routing to interface only, short circuit routing lookup.
* The use of an all-ones broadcast address implies this; an
@@ -263,24 +227,13 @@ again:
isbroadcast = 0; /* fool gcc */
} else {
/*
- * We want to do any cloning requested by the link layer,
- * as this is probably required in all cases for correct
- * operation (as it is for ARP).
+ * Look up the route to the destination.
*/
- if (rte == NULL) {
-#ifdef RADIX_MPATH
- rtalloc_mpath_fib(ro,
- ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
- inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
-#else
- in_rtalloc_ign(ro, 0,
- inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
-#endif
- rte = ro->ro_rt;
- }
- if (rte == NULL ||
- rte->rt_ifp == NULL ||
- !RT_LINK_IS_UP(rte->rt_ifp)) {
+ rtl.rtl_dst = (struct sockaddr *)dst;
+ rtl.rtl_gw = (struct sockaddr *)dst;
+ rtl.rtl_ifp = ifp;
+
+ if (rtlookup_fib(&rtl, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m), 0)) {
#ifdef IPSEC
/*
* There is no route for this packet, but it is
@@ -294,38 +247,29 @@ again:
error = EHOSTUNREACH;
goto bad;
}
- ia = ifatoia(rte->rt_ifa);
+ ifp = rtl.rtl_ifp;
+ ia = (struct in_ifaddr *)rtl.rtl_ifa;
ifa_ref(&ia->ia_ifa);
- ifp = rte->rt_ifp;
- rte->rt_rmx.rmx_pksent++;
- if (rte->rt_flags & RTF_GATEWAY)
- dst = (struct sockaddr_in *)rte->rt_gateway;
- if (rte->rt_flags & RTF_HOST)
- isbroadcast = (rte->rt_flags & RTF_BROADCAST);
+ if (rtl.rtl_flags & RTF_HOST)
+ isbroadcast = (rtl.rtl_flags & RTF_BROADCAST);
else
isbroadcast = in_broadcast(dst->sin_addr, ifp);
}
+ KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
+
/*
* Calculate MTU. If we have a route that is up, use that,
* otherwise use the interface's MTU.
*/
- if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) {
- /*
- * This case can happen if the user changed the MTU
- * of an interface after enabling IP on it. Because
- * most netifs don't keep track of routes pointing to
- * them, there is no way for one to update all its
- * routes when the MTU is changed.
- */
- if (rte->rt_rmx.rmx_mtu > ifp->if_mtu)
- rte->rt_rmx.rmx_mtu = ifp->if_mtu;
- mtu = rte->rt_rmx.rmx_mtu;
- } else {
+ if (rtl.rtl_mtu > 0)
+ mtu = min(rtl.rtl_mtu, ifp->if_mtu);
+ else
mtu = ifp->if_mtu;
- }
+
/* Catch a possible divide by zero later. */
- KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
- __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
+ KASSERT(mtu > 0, ("%s: mtu %d <= 0, rtl=%p (rtl_flags=0x%08x) ifp=%p",
+ __func__, mtu, &rtl, rtl.rtl_flags, ifp));
+
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
/*
@@ -333,7 +277,7 @@ again:
* still points to the address in "ro". (It may have been
* changed to point to a gateway address, above.)
*/
- dst = (struct sockaddr_in *)&ro->ro_dst;
+ //dst = (struct sockaddr_in *)&ro->ro_dst;
/*
* See if the caller provided any multicast options
*/
@@ -546,12 +490,12 @@ sendit:
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
+ m->m_pkthdr.csum_flags |=
+ CSUM_IP_CHECKED | CSUM_IP_VALID;
#ifdef SCTP
if (m->m_pkthdr.csum_flags & CSUM_SCTP)
m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
#endif
- m->m_pkthdr.csum_flags |=
- CSUM_IP_CHECKED | CSUM_IP_VALID;
error = netisr_queue(NETISR_IP, m);
goto done;
@@ -559,7 +503,6 @@ sendit:
/* Or forward to some other address? */
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (fwd_tag) {
- dst = (struct sockaddr_in *)&ro->ro_dst;
bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
m->m_flags |= M_SKIP_FIREWALL;
m_tag_delete(m, fwd_tag);
@@ -629,7 +572,7 @@ passout:
*/
m->m_flags &= ~(M_PROTOFLAGS);
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, ro);
+ (struct sockaddr *)dst, NULL);
goto done;
}
@@ -649,7 +592,7 @@ passout:
goto bad;
for (; m; m = m0) {
m0 = m->m_nextpkt;
- m->m_nextpkt = 0;
+ m->m_nextpkt = NULL;
if (error == 0) {
/* Record statistics for this interface address. */
if (ia != NULL) {
@@ -663,7 +606,7 @@ passout:
m->m_flags &= ~(M_PROTOFLAGS);
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, ro);
+ (struct sockaddr *)dst, NULL);
} else
m_freem(m);
}
@@ -672,9 +615,6 @@ passout:
IPSTAT_INC(ips_fragmented);
done:
- if (ro == &iproute && ro->ro_rt && !nortfree) {
- RTFREE(ro->ro_rt);
- }
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (error);
Modified: user/andre/routelocking/netinet/tcp_output.c
==============================================================================
--- user/andre/routelocking/netinet/tcp_output.c Tue Apr 24 12:43:29 2012 (r234649)
+++ user/andre/routelocking/netinet/tcp_output.c Tue Apr 24 12:54:04 2012 (r234650)
@@ -1232,7 +1232,7 @@ timer:
ip->ip_off |= IP_DF;
error = ip_output(m, tp->t_inpcb->inp_options, NULL,
- ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
+ ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), NULL,
tp->t_inpcb);
}
#endif /* INET */
More information about the svn-src-user
mailing list