svn commit: r189342 - in user/kmacy/HEAD_fast_net_merge: sbin/route
sys/net sys/netinet usr.sbin/route6d
Kip Macy
kmacy at FreeBSD.org
Tue Mar 3 18:38:39 PST 2009
Author: kmacy
Date: Wed Mar 4 02:38:38 2009
New Revision: 189342
URL: http://svn.freebsd.org/changeset/base/189342
Log:
add route weighting and generalizing of affinity to source ip
instead of per-flow
186625:
- import kernel support for route shutdown
186626:
- import user support for route shutdown
186628:
- don't lookup laddr or lport if they're already set
186630:
- fix route shutdown merge
186923:
- Add kernel support for weighting routes
186924:
- remove RTA_GENMASK
186925:
- remove genmask
- add -weight option to route for adding / changing
route weight
1868994:
- add kernel support for "sticky" routes
(all connections from a given source ip will
be routed to the same dst ip)
186995:
- add support to the route command for making
routes sticky
187003:
- add new flags to route output
- remove hopcount
187004:
- update route flags and metricnames in route command
187005:
- fetch weight when getting metrics
187006:
- try to improve formatting slightly in route
187007:
- more output futzing
- add show as alias for get
187008:
- update show handling
187009:
- remove shutdown
- update route flags
187010:
- fix flag setting in RTM_CHANGE
187011:
- add debug cruft to route selection
187012:
- fix rn_mpath_count and reduce frequency of printing
187013:
- update loop condition print hash earlier
187040:
- reduce default timeouts in the flowtable
- remove references to shutdown (redundant with
zero weight route)
- simplify weight checking
187041:
- fix radix_mpath comment
- remove shutdown flag and message
187206:
- include opt_mpath.h so that RADIX_MPATH will be
pulled in
- remove locking overhead to forwarding workloads
by making forwarding table pcpu
Modified:
user/kmacy/HEAD_fast_net_merge/sbin/route/keywords
user/kmacy/HEAD_fast_net_merge/sbin/route/route.c
user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c
user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c
user/kmacy/HEAD_fast_net_merge/sys/net/route.c
user/kmacy/HEAD_fast_net_merge/sys/net/route.h
user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c
user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c
user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c
Modified: user/kmacy/HEAD_fast_net_merge/sbin/route/keywords
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sbin/route/keywords Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sbin/route/keywords Wed Mar 4 02:38:38 2009 (r189342)
@@ -33,6 +33,7 @@ mtu
net
netmask
nostatic
+nostick
osi
prefixlen
proto1
@@ -44,8 +45,11 @@ rtt
rttvar
sa
sendpipe
+show
ssthresh
static
+sticky
+weight
x25
xns
xresolve
Modified: user/kmacy/HEAD_fast_net_merge/sbin/route/route.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sbin/route/route.c Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sbin/route/route.c Wed Mar 4 02:38:38 2009 (r189342)
@@ -169,6 +169,7 @@ main(argc, argv)
if (*argv)
switch (keyword(*argv)) {
case K_GET:
+ case K_SHOW:
uid = 0;
/* FALLTHROUGH */
@@ -548,6 +549,7 @@ set_metric(value, key)
caseof(K_SSTHRESH, RTV_SSTHRESH, rmx_ssthresh);
caseof(K_RTT, RTV_RTT, rmx_rtt);
caseof(K_RTTVAR, RTV_RTTVAR, rmx_rttvar);
+ caseof(K_WEIGHT, RTV_WEIGHT, rmx_weight);
}
rtm_inits |= flag;
if (lockrest || locking)
@@ -571,8 +573,9 @@ newroute(argc, argv)
errx(EX_NOPERM, "must be root to alter routing table");
}
cmd = argv[0];
- if (*cmd != 'g')
+ if (*cmd != 'g' && *cmd != 's')
shutdown(s, SHUT_RD); /* Don't want to read back our messages */
+
while (--argc > 0) {
if (**(++argv)== '-') {
switch (key = keyword(1 + *argv)) {
@@ -635,6 +638,12 @@ newroute(argc, argv)
case K_STATIC:
flags |= RTF_STATIC;
break;
+ case K_STICKY:
+ flags |= RTF_STICKY;
+ break;
+ case K_NOSTICK:
+ flags &= ~RTF_STICKY;
+ break;
case K_IFA:
if (!--argc)
usage((char *)NULL);
@@ -645,11 +654,6 @@ newroute(argc, argv)
usage((char *)NULL);
(void) getaddr(RTA_IFP, *++argv, 0);
break;
- case K_GENMASK:
- if (!--argc)
- usage((char *)NULL);
- (void) getaddr(RTA_GENMASK, *++argv, 0);
- break;
case K_GATEWAY:
if (!--argc)
usage((char *)NULL);
@@ -688,6 +692,7 @@ newroute(argc, argv)
case K_SSTHRESH:
case K_RTT:
case K_RTTVAR:
+ case K_WEIGHT:
if (!--argc)
usage((char *)NULL);
set_metric(*++argv, key);
@@ -741,7 +746,7 @@ newroute(argc, argv)
} else
break;
}
- if (*cmd == 'g')
+ if (*cmd == 'g' || *cmd == 's')
exit(ret != 0);
if (!qflag) {
oerrno = errno;
@@ -925,9 +930,6 @@ getaddr(which, s, hpp)
case RTA_NETMASK:
su = &so_mask;
break;
- case RTA_GENMASK:
- su = &so_genmask;
- break;
case RTA_IFP:
su = &so_ifp;
afamily = AF_LINK;
@@ -1191,7 +1193,7 @@ rtmsg(cmd, flags)
cmd = RTM_ADD;
else if (cmd == 'c')
cmd = RTM_CHANGE;
- else if (cmd == 'g') {
+ else if (cmd == 'g' || cmd == 's') {
cmd = RTM_GET;
if (so_ifp.sa.sa_family == 0) {
so_ifp.sa.sa_family = AF_LINK;
@@ -1208,13 +1210,11 @@ rtmsg(cmd, flags)
rtm.rtm_addrs = rtm_addrs;
rtm.rtm_rmx = rt_metrics;
rtm.rtm_inits = rtm_inits;
-
if (rtm_addrs & RTA_NETMASK)
mask_addr();
NEXTADDR(RTA_DST, so_dst);
NEXTADDR(RTA_GATEWAY, so_gate);
NEXTADDR(RTA_NETMASK, so_mask);
- NEXTADDR(RTA_GENMASK, so_genmask);
NEXTADDR(RTA_IFP, so_ifp);
NEXTADDR(RTA_IFA, so_ifa);
rtm.rtm_msglen = l = cp - (char *)&m_rtmsg;
@@ -1295,13 +1295,13 @@ char *msgtypes[] = {
};
char metricnames[] =
-"\011pksent\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire\2hopcount"
+"\011weight\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire"
"\1mtu";
char routeflags[] =
-"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE\010MASK_PRESENT"
-"\011CLONING\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE\016b016"
-"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3\024CHAINDELETE"
-"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST";
+"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE"
+"\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE"
+"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3"
+"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST\035STICKY";
char ifnetflags[] =
"\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5PTP\6b6\7RUNNING\010NOARP"
"\011PPROMISC\012ALLMULTI\013OACTIVE\014SIMPLEX\015LINK0\016LINK1"
@@ -1464,14 +1464,13 @@ print_getmsg(rtm, msglen)
#define msec(u) (((u) + 500) / 1000) /* usec to msec */
(void) printf("\n%s\n", "\
- recvpipe sendpipe ssthresh rtt,msec rttvar hopcount mtu expire");
+ recvpipe sendpipe ssthresh rtt,msec mtu weight expire");
printf("%8ld%c ", rtm->rtm_rmx.rmx_recvpipe, lock(RPIPE));
printf("%8ld%c ", rtm->rtm_rmx.rmx_sendpipe, lock(SPIPE));
printf("%8ld%c ", rtm->rtm_rmx.rmx_ssthresh, lock(SSTHRESH));
printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rtt), lock(RTT));
- printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rttvar), lock(RTTVAR));
- printf("%8ld%c ", rtm->rtm_rmx.rmx_hopcount, lock(HOPCOUNT));
printf("%8ld%c ", rtm->rtm_rmx.rmx_mtu, lock(MTU));
+ printf("%8ld%c ", rtm->rtm_rmx.rmx_weight, lock(WEIGHT));
if (rtm->rtm_rmx.rmx_expire)
rtm->rtm_rmx.rmx_expire -= time(0);
printf("%8ld%c\n", rtm->rtm_rmx.rmx_expire, lock(EXPIRE));
Modified: user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c Wed Mar 4 02:38:38 2009 (r189342)
@@ -232,13 +232,10 @@ struct flentry_v6 {
#define fl_rt fl_entry.fl_rt
#define fl_lle fl_entry.fl_lle
-#define SECS_PER_HOUR 3600
-#define SECS_PER_DAY (24*SECS_PER_HOUR)
-
-#define SYN_IDLE 300
-#define UDP_IDLE 300
-#define FIN_WAIT_IDLE 600
-#define TCP_IDLE SECS_PER_DAY
+#define SYN_IDLE 120
+#define UDP_IDLE 60
+#define FIN_WAIT_IDLE 300
+#define TCP_IDLE 1200
typedef void fl_lock_t(struct flowtable *, uint32_t);
@@ -331,13 +328,14 @@ flowtable_pcpu_unlock(struct flowtable *
static uint32_t
ipv4_flow_lookup_hash_internal(struct mbuf *m, struct route *ro,
- uint32_t *key, uint16_t *flags, uint8_t *protop)
+ uint32_t *key, uint16_t *flags, uint8_t *protop, uint32_t *hash,
+ uint32_t *hash_noports)
{
uint16_t sport = 0, dport = 0;
struct ip *ip;
uint8_t proto = 0;
int iphlen;
- uint32_t hash;
+ uint32_t rh;
struct sockaddr_in *sin;
struct tcphdr *th;
struct udphdr *uh;
@@ -353,14 +351,16 @@ ipv4_flow_lookup_hash_internal(struct mb
key[1] = 0;
key[2] = sin->sin_addr.s_addr;
- if (m == NULL || (*flags & FL_HASH_PORTS) == 0)
+ if (m == NULL)
goto skipports;
-
ip = mtod(m, struct ip *);
proto = ip->ip_p;
iphlen = ip->ip_hl << 2; /* XXX options? */
key[1] = ip->ip_src.s_addr;
-
+
+ if ((*flags & FL_HASH_PORTS) == 0)
+ goto skipports;
+
switch (proto) {
case IPPROTO_TCP:
th = (struct tcphdr *)((caddr_t)ip + iphlen);
@@ -387,30 +387,27 @@ ipv4_flow_lookup_hash_internal(struct mb
break;;
}
- *protop = proto;
-
- /*
- * If this is a transmit route cache then
- * hash all flows to a given destination to
- * the same bucket
- */
- if ((*flags & FL_HASH_PORTS) == 0)
- proto = sport = dport = 0;
-
- ((uint16_t *)key)[0] = sport;
- ((uint16_t *)key)[1] = dport;
skipports:
- hash = hashword(key, 3, hashjitter + proto);
+ rh = hashword(key, 3, hashjitter + proto);
+ *hash_noports = rh;
+ *hash = 0;
+ if ((*flags & FL_HASH_PORTS) && sport) {
+ ((uint16_t *)key)[0] = sport;
+ ((uint16_t *)key)[1] = dport;
+ rh = hashword(key, 3, hashjitter + proto);
+ *hash = rh;
+ }
if (m != NULL && (m->m_flags & M_FLOWID) == 0)
- m->m_pkthdr.flowid = hash;
-
- CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n", proto, hash, key[0], sport, dport);
-
- return (hash);
+ m->m_pkthdr.flowid = rh;
+
+ CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n",
+ proto, *hash, key[0], sport, dport);
+
+ return (0);
noop:
*protop = proto;
- return (0);
+ return (ENOENT);
}
static bitstr_t *
@@ -567,7 +564,7 @@ flowtable_key_equal(struct flentry *fle,
int
flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
{
- uint32_t key[9], hash;
+ uint32_t key[9], hash, hash_noports;
struct flentry *fle;
uint16_t flags;
uint8_t proto = 0;
@@ -578,13 +575,14 @@ flowtable_lookup(struct flowtable *ft, s
flags = ft ? ft->ft_flags : 0;
ro->ro_rt = NULL;
ro->ro_lle = NULL;
-
+ hash = hash_noports = 0;
+
/*
* The internal hash lookup is the only IPv4 specific bit
* remaining
*/
- hash = ipv4_flow_lookup_hash_internal(m, ro, key,
- &flags, &proto);
+ error = ipv4_flow_lookup_hash_internal(m, ro, key,
+ &flags, &proto, &hash, &hash_noports);
/*
* Ports are zero and this isn't a transmit cache
@@ -592,10 +590,13 @@ flowtable_lookup(struct flowtable *ft, s
* statex
* FL_HASH_PORTS => key[0] != 0 for TCP || UDP || SCTP
*/
- if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) {
+ if (error == ENOENT || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) {
cache = 0;
goto uncached;
}
+ if ((ft->ft_flags & FL_HASH_PORTS) == 0)
+ goto skipports;
+
FL_ENTRY_LOCK(ft, hash);
fle = FL_ENTRY(ft, hash);
rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
@@ -615,6 +616,27 @@ flowtable_lookup(struct flowtable *ft, s
}
FL_ENTRY_UNLOCK(ft, hash);
+skipports:
+ key[0] = 0;
+ FL_ENTRY_LOCK(ft, hash_noports);
+ fle = FL_ENTRY(ft, hash_noports);
+ rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
+ lle = __DEVOLATILE(struct llentry *, fle->f_lle);
+ if ((rt != NULL)
+ && fle->f_fhash == hash_noports
+ && flowtable_key_equal(fle, key, flags)
+ && (proto == fle->f_proto)
+ && (rt->rt_flags & RTF_UP)
+ && (rt->rt_ifp != NULL)) {
+ fle->f_uptime = time_uptime;
+ fle->f_flags |= flags;
+ ro->ro_rt = rt;
+ ro->ro_lle = lle;
+ FL_ENTRY_UNLOCK(ft, hash_noports);
+ return (0);
+ }
+ FL_ENTRY_UNLOCK(ft, hash_noports);
+
uncached:
/*
* This bit of code ends up locking the
@@ -640,6 +662,18 @@ uncached:
struct rtentry *rt = ro->ro_rt;
struct ifnet *ifp = rt->rt_ifp;
+ if (rt->rt_flags & RTF_STICKY) {
+ RTFREE(rt);
+ hash = hash_noports;
+ ft->ft_rtalloc(ro, hash, fib);
+ if (ro->ro_rt == NULL) {
+ error = ENETUNREACH;
+ goto done;
+ }
+ rt = ro->ro_rt;
+ ifp = rt->rt_ifp;
+ }
+
if (rt->rt_flags & RTF_GATEWAY)
l3addr = rt->rt_gateway;
else
@@ -671,7 +705,7 @@ uncached:
}
error = 0;
}
-
+done:
return (error);
}
@@ -720,7 +754,7 @@ flowtable_alloc(int nentry, int flags)
ft->ft_masks[i] = bit_alloc(nentry);
}
} else {
- ft->ft_lock_count = 2*(powerof2(mp_ncpus) ? mp_ncpus :
+ ft->ft_lock_count = 8*(powerof2(mp_ncpus) ? mp_ncpus :
(fls(mp_ncpus) << 1));
ft->ft_lock = flowtable_global_lock;
Modified: user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c Wed Mar 4 02:38:38 2009 (r189342)
@@ -77,15 +77,18 @@ rn_mpath_next(struct radix_node *rn)
return NULL;
}
-u_int32_t
+uint32_t
rn_mpath_count(struct radix_node *rn)
{
- u_int32_t i;
-
- i = 1;
- while ((rn = rn_mpath_next(rn)) != NULL)
- i++;
- return i;
+ uint32_t i = 0;
+ struct rtentry *rt;
+
+ while (rn != NULL) {
+ rt = (struct rtentry *)rn;
+ i += rt->rt_rmx.rmx_weight;
+ rn = rn_mpath_next(rn);
+ }
+ return (i);
}
struct rtentry *
@@ -256,10 +259,12 @@ different:
}
void
-rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
{
struct radix_node *rn0, *rn;
u_int32_t n;
+ struct rtentry *rt;
+ int64_t weight;
/*
* XXX we don't attempt to lookup cached route again; what should
@@ -284,25 +289,31 @@ rtalloc_mpath_fib(struct route *ro, u_in
/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
hash += hashjitter;
hash %= n;
- while (hash-- > 0 && rn) {
+ for (weight = abs((int32_t)hash), rt = ro->ro_rt;
+ weight >= rt->rt_rmx.rmx_weight && rn;
+ weight -= rt->rt_rmx.rmx_weight) {
+
/* stay within the multipath routes */
if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
break;
rn = rn->rn_dupedkey;
+ rt = (struct rtentry *)rn;
}
-
/* XXX try filling rt_gwroute and avoid unreachable gw */
- /* if gw selection fails, use the first match (default) */
+ /* gw selection has failed - there must be only zero weight routes */
if (!rn) {
RT_UNLOCK(ro->ro_rt);
+ ro->ro_rt = NULL;
return;
}
-
- RTFREE_LOCKED(ro->ro_rt);
- ro->ro_rt = (struct rtentry *)rn;
- RT_LOCK(ro->ro_rt);
- RT_ADDREF(ro->ro_rt);
+ if (ro->ro_rt != rt) {
+ RTFREE_LOCKED(ro->ro_rt);
+ ro->ro_rt = (struct rtentry *)rn;
+ RT_LOCK(ro->ro_rt);
+ RT_ADDREF(ro->ro_rt);
+
+ }
RT_UNLOCK(ro->ro_rt);
}
Modified: user/kmacy/HEAD_fast_net_merge/sys/net/route.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/route.c Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/route.c Wed Mar 4 02:38:38 2009 (r189342)
@@ -803,6 +803,103 @@ bad:
return (error);
}
+#ifdef RADIX_MPATH
+static int
+rn_mpath_update(int req, struct rt_addrinfo *info,
+ struct radix_node_head *rnh, struct rtentry **ret_nrt)
+{
+ /*
+ * if we got multipath routes, we require users to specify
+ * a matching RTAX_GATEWAY.
+ */
+ struct rtentry *rt, *rto = NULL;
+ register struct radix_node *rn;
+ int error = 0;
+
+ rn = rnh->rnh_matchaddr(dst, rnh);
+ if (rn == NULL)
+ return (ESRCH);
+ rto = rt = RNTORT(rn);
+ rt = rt_mpath_matchgate(rt, gateway);
+ if (rt == NULL)
+ return (ESRCH);
+ /*
+ * this is the first entry in the chain
+ */
+ if (rto == rt) {
+ rn = rn_mpath_next((struct radix_node *)rt);
+ /*
+ * there is another entry, now it's active
+ */
+ if (rn) {
+ rto = RNTORT(rn);
+ RT_LOCK(rto);
+ rto->rt_flags |= RTF_UP;
+ RT_UNLOCK(rto);
+ } else if (rt->rt_flags & RTF_GATEWAY) {
+ /*
+ * For gateway routes, we need to
+ * make sure that we we are deleting
+ * the correct gateway.
+ * rt_mpath_matchgate() does not
+ * check the case when there is only
+ * one route in the chain.
+ */
+ if (gateway &&
+ (rt->rt_gateway->sa_len != gateway->sa_len ||
+ memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
+ error = ESRCH;
+ goto done;
+ }
+ /*
+ * use the normal delete code to remove
+ * the first entry
+ */
+ if (req != RTM_DELETE)
+ goto nondelete;
+
+ error = ENOENT;
+ goto done;
+ }
+
+ /*
+ * if the entry is 2nd and on up
+ */
+ if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+ panic ("rtrequest1: rt_mpath_deldup");
+ RT_LOCK(rt);
+ RT_ADDREF(rt);
+ if (req == RTM_DELETE) {
+ rt->rt_flags &= ~RTF_UP;
+ /*
+ * One more rtentry floating around that is not
+ * linked to the routing table. rttrash will be decremented
+ * when RTFREE(rt) is eventually called.
+ */
+ V_rttrash++;
+
+ }
+
+nondelete:
+ if (req != RTM_DELETE)
+ panic("unrecognized request %d", req);
+
+
+ /*
+ * If the caller wants it, then it can have it,
+ * but it's up to it to free the rtentry as we won't be
+ * doing it.
+ */
+ if (ret_nrt) {
+ *ret_nrt = rt;
+ RT_UNLOCK(rt);
+ } else
+ RTFREE_LOCKED(rt);
+done:
+ return (error);
+}
+#endif
+
int
rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
u_int fibnum)
@@ -841,65 +938,15 @@ rtrequest1_fib(int req, struct rt_addrin
switch (req) {
case RTM_DELETE:
#ifdef RADIX_MPATH
- /*
- * if we got multipath routes, we require users to specify
- * a matching RTAX_GATEWAY.
- */
if (rn_mpath_capable(rnh)) {
- struct rtentry *rto = NULL;
-
- rn = rnh->rnh_matchaddr(dst, rnh);
- if (rn == NULL)
- senderr(ESRCH);
- rto = rt = RNTORT(rn);
- rt = rt_mpath_matchgate(rt, gateway);
- if (!rt)
- senderr(ESRCH);
- /*
- * this is the first entry in the chain
- */
- if (rto == rt) {
- rn = rn_mpath_next((struct radix_node *)rt);
- /*
- * there is another entry, now it's active
- */
- if (rn) {
- rto = RNTORT(rn);
- RT_LOCK(rto);
- rto->rt_flags |= RTF_UP;
- RT_UNLOCK(rto);
- } else if (rt->rt_flags & RTF_GATEWAY) {
- /*
- * For gateway routes, we need to
- * make sure that we we are deleting
- * the correct gateway.
- * rt_mpath_matchgate() does not
- * check the case when there is only
- * one route in the chain.
- */
- if (gateway &&
- (rt->rt_gateway->sa_len != gateway->sa_len ||
- memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
- senderr(ESRCH);
- }
- /*
- * use the normal delete code to remove
- * the first entry
- */
- goto normal_rtdel;
- }
+ error = rn_mpath_update(req, info, rnh, ret_nrt);
/*
- * if the entry is 2nd and on up
+ * "bad" holds true for the success case
+ * as well
*/
- if (!rt_mpath_deldup(rto, rt))
- panic ("rtrequest1: rt_mpath_deldup");
- RT_LOCK(rt);
- RT_ADDREF(rt);
- rt->rt_flags &= ~RTF_UP;
- goto deldone; /* done with the RTM_DELETE command */
+ if (error != ENOENT)
+ goto bad;
}
-
-normal_rtdel:
#endif
/*
* Remove the item from the tree and return it.
@@ -921,9 +968,6 @@ normal_rtdel:
if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
ifa->ifa_rtrequest(RTM_DELETE, rt, info);
-#ifdef RADIX_MPATH
-deldone:
-#endif
/*
* One more rtentry floating around that is not
* linked to the routing table. rttrash will be decremented
@@ -951,11 +995,13 @@ deldone:
case RTM_ADD:
if ((flags & RTF_GATEWAY) && !gateway)
senderr(EINVAL);
- if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
- (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
+ if (dst && gateway && (dst->sa_family != gateway->sa_family)
+ && (gateway->sa_family != AF_UNSPEC)
+ && (gateway->sa_family != AF_LINK))
senderr(EINVAL);
- if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum)))
+ if (info->rti_ifa == NULL &&
+ (error = rt_getifa_fib(info, fibnum)))
senderr(error);
ifa = info->rti_ifa;
rt = uma_zalloc(rtzone, M_NOWAIT | M_ZERO);
@@ -996,6 +1042,7 @@ deldone:
IFAREF(ifa);
rt->rt_ifa = ifa;
rt->rt_ifp = ifa->ifa_ifp;
+ rt->rt_rmx.rmx_weight = 1;
#ifdef RADIX_MPATH
/* do not permit exactly the same dst/mask/gw pair */
Modified: user/kmacy/HEAD_fast_net_merge/sys/net/route.h
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/route.h Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/route.h Wed Mar 4 02:38:38 2009 (r189342)
@@ -59,6 +59,7 @@ struct rt_metrics_lite {
u_long rmx_mtu; /* MTU for this path */
u_long rmx_expire; /* lifetime for route, e.g. redirect */
u_long rmx_pksent; /* packets sent using this route */
+ u_long rmx_weight; /* absolute weight */
};
struct rt_metrics {
@@ -72,7 +73,8 @@ struct rt_metrics {
u_long rmx_rtt; /* estimated round trip time */
u_long rmx_rttvar; /* estimated rtt variance */
u_long rmx_pksent; /* packets sent using this route */
- u_long rmx_filler[4]; /* will be used for T/TCP later */
+ u_long rmx_weight; /* route weight */
+ u_long rmx_filler[3]; /* will be used for T/TCP later */
};
/*
@@ -194,13 +196,15 @@ struct ortentry {
#define RTF_LOCAL 0x200000 /* route represents a local address */
#define RTF_BROADCAST 0x400000 /* route represents a bcast address */
#define RTF_MULTICAST 0x800000 /* route represents a mcast address */
- /* 0x1000000 and up unassigned */
-#define RTF_RNH_LOCKED 0x40000000 /* radix node head locked by caller */
+ /* 0x8000000 and up unassigned */
+#define RTF_STICKY 0x10000000 /* always route dst->src */
+
+#define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */
/* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
#define RTF_FMASK \
(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
- RTF_REJECT | RTF_STATIC)
+ RTF_REJECT | RTF_STATIC | RTF_STICKY)
/*
* Routing statistics.
@@ -226,12 +230,11 @@ struct rt_msghdr {
int rtm_seq; /* for sender to identify action */
int rtm_errno; /* why failed */
int rtm_fmask; /* bitmask used in RTM_CHANGE message */
-#define rtm_use rtm_fmask /* deprecated, use rtm_rmx->rmx_pksent */
u_long rtm_inits; /* which metrics we are initializing */
struct rt_metrics rtm_rmx; /* metrics themselves */
};
-#define RTM_VERSION 5 /* Up the ante and ignore older versions */
+#define RTM_VERSION 6 /* Up the ante and ignore older versions */
/*
* Message types.
@@ -266,6 +269,7 @@ struct rt_msghdr {
#define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */
#define RTV_RTT 0x40 /* init or lock _rtt */
#define RTV_RTTVAR 0x80 /* init or lock _rttvar */
+#define RTV_WEIGHT 0x100 /* init or lock _weight */
/*
* Bitmask values for rtm_addrs.
@@ -273,7 +277,7 @@ struct rt_msghdr {
#define RTA_DST 0x1 /* destination sockaddr present */
#define RTA_GATEWAY 0x2 /* gateway sockaddr present */
#define RTA_NETMASK 0x4 /* netmask sockaddr present */
-#define RTA_GENMASK 0x8 /* cloning mask sockaddr present */
+#define RTA_SPARE 0x8 /* unused */
#define RTA_IFP 0x10 /* interface name sockaddr present */
#define RTA_IFA 0x20 /* interface addr sockaddr present */
#define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */
@@ -285,7 +289,7 @@ struct rt_msghdr {
#define RTAX_DST 0 /* destination sockaddr present */
#define RTAX_GATEWAY 1 /* gateway sockaddr present */
#define RTAX_NETMASK 2 /* netmask sockaddr present */
-#define RTAX_GENMASK 3 /* cloning mask sockaddr present */
+#define RTAX_SPARE 3 /* spare field */
#define RTAX_IFP 4 /* interface name sockaddr present */
#define RTAX_IFA 5 /* interface addr sockaddr present */
#define RTAX_AUTHOR 6 /* sockaddr for author of redirect */
@@ -293,11 +297,11 @@ struct rt_msghdr {
#define RTAX_MAX 8 /* size of array to allocate */
struct rt_addrinfo {
- int rti_addrs;
- struct sockaddr *rti_info[RTAX_MAX];
- int rti_flags;
- struct ifaddr *rti_ifa;
- struct ifnet *rti_ifp;
+ int rti_addrs;
+ struct sockaddr *rti_info[RTAX_MAX];
+ int rti_flags;
+ struct ifaddr *rti_ifa;
+ struct ifnet *rti_ifp;
};
/*
Modified: user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c Wed Mar 4 02:38:38 2009 (r189342)
@@ -601,7 +601,6 @@ route_output(struct mbuf *m, struct sock
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info.rti_info[RTAX_NETMASK] = rt_mask(rt);
- info.rti_info[RTAX_GENMASK] = 0;
if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
ifp = rt->rt_ifp;
if (ifp) {
@@ -637,7 +636,6 @@ route_output(struct mbuf *m, struct sock
}
(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
rtm->rtm_flags = rt->rt_flags;
- rtm->rtm_use = 0;
rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
rtm->rtm_addrs = info.rti_addrs;
break;
@@ -691,10 +689,8 @@ route_output(struct mbuf *m, struct sock
rt->rt_ifp = info.rti_ifp;
}
/* Allow some flags to be toggled on change. */
- if (rtm->rtm_fmask & RTF_FMASK)
- rt->rt_flags = (rt->rt_flags &
- ~rtm->rtm_fmask) |
- (rtm->rtm_flags & rtm->rtm_fmask);
+ rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
+ (rtm->rtm_flags & RTF_FMASK);
rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
&rt->rt_rmx);
rtm->rtm_index = rt->rt_ifp->if_index;
@@ -767,12 +763,14 @@ static void
rt_setmetrics(u_long which, const struct rt_metrics *in,
struct rt_metrics_lite *out)
{
-#define metric(f, e) if (which & (f)) out->e = in->e;
+#define metric(f, e) if (which & (f)) { printf("setting 0x%x", f); out->e = in->e; }
+
/*
* Only these are stored in the routing entry since introduction
* of tcp hostcache. The rest is ignored.
*/
metric(RTV_MTU, rmx_mtu);
+ metric(RTV_WEIGHT, rmx_weight);
/* Userland -> kernel timebase conversion. */
if (which & RTV_EXPIRE)
out->rmx_expire = in->rmx_expire ?
@@ -786,6 +784,7 @@ rt_getmetrics(const struct rt_metrics_li
#define metric(e) out->e = in->e;
bzero(out, sizeof(*out));
metric(rmx_mtu);
+ metric(rmx_weight);
/* Kernel -> userland timebase conversion. */
out->rmx_expire = in->rmx_expire ?
in->rmx_expire - time_uptime + time_second : 0;
@@ -1245,7 +1244,6 @@ sysctl_dumpentry(struct radix_node *rn,
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info.rti_info[RTAX_NETMASK] = rt_mask(rt);
- info.rti_info[RTAX_GENMASK] = 0;
if (rt->rt_ifp) {
info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
@@ -1257,7 +1255,10 @@ sysctl_dumpentry(struct radix_node *rn,
struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
rtm->rtm_flags = rt->rt_flags;
- rtm->rtm_use = rt->rt_rmx.rmx_pksent;
+ /*
+ * let's be honest about this being a retarded hack
+ */
+ rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
rtm->rtm_index = rt->rt_ifp->if_index;
rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
Modified: user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c Wed Mar 4 02:38:38 2009 (r189342)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include "opt_route.h"
#include "opt_mac.h"
#include "opt_carp.h"
+#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -340,7 +341,7 @@ ip_init(void)
netisr_register(NETISR_IP, ip_input, &ipintrq, 0);
ipv4_ft = flowtable_alloc(ip_pcpu_flowtable_size, FL_PCPU);
- ipv4_forward_ft = flowtable_alloc(ip_global_flowtable_size, FL_HASH_PORTS);
+ ipv4_forward_ft = flowtable_alloc(ip_global_flowtable_size, FL_HASH_PORTS|FL_PCPU);
}
void
Modified: user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c Wed Mar 4 02:12:29 2009 (r189341)
+++ user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c Wed Mar 4 02:38:38 2009 (r189342)
@@ -2661,10 +2661,6 @@ rt_entry(rtm, again)
sin6_mask = (struct sockaddr_in6 *)rtmp;
rtmp += ROUNDUP(sin6_mask->sin6_len);
}
- if (rtm->rtm_addrs & RTA_GENMASK) {
- sin6_genmask = (struct sockaddr_in6 *)rtmp;
- rtmp += ROUNDUP(sin6_genmask->sin6_len);
- }
if (rtm->rtm_addrs & RTA_IFP) {
sin6_ifp = (struct sockaddr_in6 *)rtmp;
rtmp += ROUNDUP(sin6_ifp->sin6_len);
More information about the svn-src-user
mailing list