svn commit: r190012 - in head: . sys/modules/ip6_mroute_mod
sys/modules/ip_mroute_mod sys/netinet sys/netinet6 usr.bin/netstat
Bruce M Simpson
bms at FreeBSD.org
Wed Mar 18 18:43:05 PDT 2009
Author: bms
Date: Thu Mar 19 01:43:03 2009
New Revision: 190012
URL: http://svn.freebsd.org/changeset/base/190012
Log:
Introduce a number of changes to the MROUTING code.
This is purely a forwarding plane cleanup; no control plane
code is involved.
Summary:
* Split IPv4 and IPv6 MROUTING support. The static compile-time
kernel option remains the same, however, the modules may now
be built for IPv4 and IPv6 separately as ip_mroute_mod and
ip6_mroute_mod.
* Clean up the IPv4 multicast forwarding code to use BSD queue
and hash table constructs. Don't build our own timer abstractions
when ratecheck() and timevalclear() etc will do.
* Expose the multicast forwarding cache (MFC) and virtual interface
table (VIF) as sysctls, to reduce netstat's dependence on libkvm
for this information for running kernels.
* bandwidth meters however still require libkvm.
* Make the MFC hash table size a boot/load-time tunable ULONG,
net.inet.ip.mfchashsize (defaults to 256).
* Remove unused members from struct vif and struct mfc.
* Kill RSVP support, as no current RSVP implementation uses it.
These stubs could be moved to raw_ip.c.
* Don't share locks or initialization between IPv4 and IPv6.
* Don't use a static struct route_in6 in ip6_mroute.c.
The v6 code is still using a cached struct route_in6, this is
moved to mif6 for the time being.
* More cleanup remains to be merged from ip_mroute.c to ip6_mroute.c.
v4 path tested using ports/net/mcast-tools.
v6 changes are mostly mechanical locking and *have not* been tested.
As these changes partially break some kernel ABIs, they will not
be MFCed. There is a lot more work to be done here.
Reviewed by: Pavlin Radoslavov
Added:
head/sys/modules/ip6_mroute_mod/
head/sys/modules/ip6_mroute_mod/Makefile (contents, props changed)
Modified:
head/UPDATING
head/sys/modules/ip_mroute_mod/Makefile
head/sys/netinet/ip_mroute.c
head/sys/netinet/ip_mroute.h
head/sys/netinet6/ip6_mroute.c
head/sys/netinet6/ip6_mroute.h
head/usr.bin/netstat/main.c
head/usr.bin/netstat/mroute.c
head/usr.bin/netstat/netstat.h
Modified: head/UPDATING
==============================================================================
--- head/UPDATING Thu Mar 19 01:15:26 2009 (r190011)
+++ head/UPDATING Thu Mar 19 01:43:03 2009 (r190012)
@@ -22,6 +22,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
+20090319:
+ The multicast forwarding code has been cleaned up. netstat(1)
+ only relies on KVM now for printing bandwidth upcall meters.
+ The IPv4 and IPv6 modules are split into ip_mroute_mod and
+ ip6_mroute_mod respectively. The config(5) options for statically
+ compiling this code remain the same, i.e. 'options MROUTING'.
+
20090315:
Support for the IFF_NEEDSGIANT network interface flag has been
removed, which means that non-MPSAFE network device drivers are no
Added: head/sys/modules/ip6_mroute_mod/Makefile
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sys/modules/ip6_mroute_mod/Makefile Thu Mar 19 01:43:03 2009 (r190012)
@@ -0,0 +1,19 @@
+# $FreeBSD$
+
+.include <bsd.own.mk>
+
+.PATH: ${.CURDIR}/../../netinet6
+
+KMOD= ip6_mroute
+
+SRCS= ip6_mroute.c
+SRCS+= opt_inet6.h opt_mac.h opt_mrouting.h
+
+.if !defined(KERNBUILDDIR)
+opt_inet6.h:
+ echo "#define INET6 1" > ${.TARGET}
+opt_mrouting.h:
+ echo "#define MROUTING 1" > ${.TARGET}
+.endif
+
+.include <bsd.kmod.mk>
Modified: head/sys/modules/ip_mroute_mod/Makefile
==============================================================================
--- head/sys/modules/ip_mroute_mod/Makefile Thu Mar 19 01:15:26 2009 (r190011)
+++ head/sys/modules/ip_mroute_mod/Makefile Thu Mar 19 01:43:03 2009 (r190012)
@@ -8,21 +8,12 @@ KMOD= ip_mroute
SRCS= ip_mroute.c
SRCS+= opt_inet.h opt_mac.h opt_mrouting.h opt_route.h
-SRCS+= opt_inet6.h
-
-.if ${MK_INET6_SUPPORT} != "no"
-SRCS+= ip6_mroute.c
-.endif
.if !defined(KERNBUILDDIR)
opt_inet.h:
echo "#define INET 1" > ${.TARGET}
opt_mrouting.h:
echo "#define MROUTING 1" > ${.TARGET}
-.if ${MK_INET6_SUPPORT} != "no"
-opt_inet6.h:
- echo "#define INET6 1" > ${.TARGET}
-.endif
.endif
.include <bsd.kmod.mk>
Modified: head/sys/netinet/ip_mroute.c
==============================================================================
--- head/sys/netinet/ip_mroute.c Thu Mar 19 01:15:26 2009 (r190011)
+++ head/sys/netinet/ip_mroute.c Thu Mar 19 01:43:03 2009 (r190012)
@@ -53,11 +53,24 @@
* bandwidth metering and signaling
*/
+/*
+ * TODO: Prefix functions with ipmf_.
+ * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
+ * domain attachment (if_afdata) so we can track consumers of that service.
+ * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
+ * move it to socket options.
+ * TODO: Rototile log_debug to use KTR.
+ * TODO: Cleanup LSRR removal further.
+ * TODO: Push RSVP stubs into raw_ip.c.
+ * TODO: Use bitstring.h for vif set.
+ * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
+ * TODO: Sync ip6_mroute.c with this file.
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
-#include "opt_inet6.h"
#include "opt_mac.h"
#include "opt_mrouting.h"
@@ -81,9 +94,11 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vimage.h>
+
#include <net/if.h>
#include <net/netisr.h>
#include <net/route.h>
+
#include <netinet/in.h>
#include <netinet/igmp.h>
#include <netinet/in_systm.h>
@@ -98,96 +113,84 @@ __FBSDID("$FreeBSD$");
#include <netinet/udp.h>
#include <netinet/vinet.h>
-#ifdef INET6
-#include <netinet/ip6.h>
-#include <netinet6/in6_var.h>
-#include <netinet6/ip6_mroute.h>
-#include <netinet6/ip6_var.h>
-#endif
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
-/*
- * Control debugging code for rsvp and multicast routing code.
- * Can only set them with the debugger.
- */
-static u_int rsvpdebug; /* non-zero enables debugging */
-
-static u_int mrtdebug; /* any set of the flags below */
#define DEBUG_MFC 0x02
#define DEBUG_FORWARD 0x04
#define DEBUG_EXPIRE 0x08
#define DEBUG_XMIT 0x10
#define DEBUG_PIM 0x20
+static u_int mrtdebug;
+SYSCTL_INT(_debug, OID_AUTO, mrtdebug, CTLFLAG_RW, &mrtdebug, 0,
+ "Enable/disable IPv4 multicast forwarding debugging flags");
#define VIFI_INVALID ((vifi_t) -1)
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
-#define M_HASCL(m) ((m)->m_flags & M_EXT)
-
-static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables");
+static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
/*
* Locking. We use two locks: one for the virtual interface table and
* one for the forwarding table. These locks may be nested in which case
* the VIF lock must always be taken first. Note that each lock is used
* to cover not only the specific data structure but also related data
- * structures. It may be better to add more fine-grained locking later;
- * it's not clear how performance-critical this code is.
- *
- * XXX: This module could particularly benefit from being cleaned
- * up to use the <sys/queue.h> macros.
- *
+ * structures.
*/
-static struct mrtstat mrtstat;
-SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
- &mrtstat, mrtstat,
- "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)");
-
-static struct mfc *mfctable[MFCTBLSIZ];
-SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
- &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]",
- "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)");
-
static struct mtx mrouter_mtx;
#define MROUTER_LOCK() mtx_lock(&mrouter_mtx)
#define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx)
#define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED)
-#define MROUTER_LOCK_INIT() \
+#define MROUTER_LOCK_INIT() \
mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
#define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx)
+static struct mrtstat mrtstat;
+SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
+ &mrtstat, mrtstat,
+ "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
+ "netinet/ip_mroute.h)");
+
+static u_long mfchash;
+#define MFCHASH(a, g) \
+ ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
+ ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash)
+#define MFCHASHSIZE 256
+
+static u_char *nexpire; /* 0..mfchashsize-1 */
+static u_long mfchashsize; /* Hash size */
+LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
+
static struct mtx mfc_mtx;
-#define MFC_LOCK() mtx_lock(&mfc_mtx)
-#define MFC_UNLOCK() mtx_unlock(&mfc_mtx)
+#define MFC_LOCK() mtx_lock(&mfc_mtx)
+#define MFC_UNLOCK() mtx_unlock(&mfc_mtx)
#define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED)
-#define MFC_LOCK_INIT() mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF)
+#define MFC_LOCK_INIT() \
+ mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx)
+static vifi_t numvifs;
static struct vif viftable[MAXVIFS];
SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD,
&viftable, sizeof(viftable), "S,vif[MAXVIFS]",
- "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
+ "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
static struct mtx vif_mtx;
-#define VIF_LOCK() mtx_lock(&vif_mtx)
-#define VIF_UNLOCK() mtx_unlock(&vif_mtx)
+#define VIF_LOCK() mtx_lock(&vif_mtx)
+#define VIF_UNLOCK() mtx_unlock(&vif_mtx)
#define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED)
-#define VIF_LOCK_INIT() mtx_init(&vif_mtx, "mroute vif table", NULL, MTX_DEF)
+#define VIF_LOCK_INIT() \
+ mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
#define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx)
-static u_char nexpire[MFCTBLSIZ];
-
static eventhandler_tag if_detach_event_tag = NULL;
static struct callout expire_upcalls_ch;
-
#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
#define UPCALL_EXPIRE 6 /* number of timeouts */
-#define ENCAP_TTL 64
-
/*
* Bandwidth meter variables and constants
*/
@@ -223,7 +226,7 @@ SYSCTL_ULONG(_net_inet_pim, OID_AUTO, sq
"Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
extern struct domain inetdomain;
-struct protosw in_pim_protosw = {
+static const struct protosw in_pim_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_PIM,
@@ -235,18 +238,6 @@ struct protosw in_pim_protosw = {
};
static const struct encaptab *pim_encap_cookie;
-#ifdef INET6
-/* ip6_mroute.c glue */
-extern struct in6_protosw in6_pim_protosw;
-static const struct encaptab *pim6_encap_cookie;
-
-extern int X_ip6_mrouter_set(struct socket *, struct sockopt *);
-extern int X_ip6_mrouter_get(struct socket *, struct sockopt *);
-extern int X_ip6_mrouter_done(void);
-extern int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
-extern int X_mrt6_ioctl(int, caddr_t);
-#endif
-
static int pim_encapcheck(const struct mbuf *, int, int, void *);
/*
@@ -264,6 +255,7 @@ struct pim_encap_pimhdr {
struct pim pim;
uint32_t flags;
};
+#define PIM_ENCAP_TTL 64
static struct ip pim_encap_iphdr = {
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -277,7 +269,7 @@ static struct ip pim_encap_iphdr = {
sizeof(struct ip), /* total length */
0, /* id */
0, /* frag offset */
- ENCAP_TTL,
+ PIM_ENCAP_TTL,
IPPROTO_PIM,
0, /* checksum */
};
@@ -297,129 +289,97 @@ static vifi_t reg_vif_num = VIFI_INVALID
/*
* Private variables.
*/
-static vifi_t numvifs;
-static u_long X_ip_mcast_src(int vifi);
-static int X_ip_mforward(struct ip *ip, struct ifnet *ifp,
- struct mbuf *m, struct ip_moptions *imo);
+static u_long X_ip_mcast_src(int);
+static int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
+ struct ip_moptions *);
static int X_ip_mrouter_done(void);
-static int X_ip_mrouter_get(struct socket *so, struct sockopt *m);
-static int X_ip_mrouter_set(struct socket *so, struct sockopt *m);
-static int X_legal_vif_num(int vif);
-static int X_mrt_ioctl(int cmd, caddr_t data, int fibnum);
-
-static int get_sg_cnt(struct sioc_sg_req *);
-static int get_vif_cnt(struct sioc_vif_req *);
-static void if_detached_event(void *arg __unused, struct ifnet *);
-static int ip_mrouter_init(struct socket *, int);
-static int add_vif(struct vifctl *);
-static int del_vif_locked(vifi_t);
-static int del_vif(vifi_t);
-static int add_mfc(struct mfcctl2 *);
-static int del_mfc(struct mfcctl2 *);
-static int set_api_config(uint32_t *); /* chose API capabilities */
-static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
-static int set_assert(int);
-static void expire_upcalls(void *);
-static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
-static void phyint_send(struct ip *, struct vif *, struct mbuf *);
-static void send_packet(struct vif *, struct mbuf *);
-
-/*
- * Bandwidth monitoring
- */
-static void free_bw_list(struct bw_meter *list);
-static int add_bw_upcall(struct bw_upcall *);
-static int del_bw_upcall(struct bw_upcall *);
-static void bw_meter_receive_packet(struct bw_meter *x, int plen,
- struct timeval *nowp);
-static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp);
-static void bw_upcalls_send(void);
-static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp);
-static void unschedule_bw_meter(struct bw_meter *x);
-static void bw_meter_process(void);
-static void expire_bw_upcalls_send(void *);
-static void expire_bw_meter_process(void *);
-
-static int pim_register_send(struct ip *, struct vif *,
- struct mbuf *, struct mfc *);
-static int pim_register_send_rp(struct ip *, struct vif *,
- struct mbuf *, struct mfc *);
-static int pim_register_send_upcall(struct ip *, struct vif *,
- struct mbuf *, struct mfc *);
-static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *);
-
-/*
- * whether or not special PIM assert processing is enabled.
- */
-static int pim_assert;
-/*
- * Rate limit for assert notification messages, in usec
- */
-#define ASSERT_MSG_TIME 3000000
+static int X_ip_mrouter_get(struct socket *, struct sockopt *);
+static int X_ip_mrouter_set(struct socket *, struct sockopt *);
+static int X_legal_vif_num(int);
+static int X_mrt_ioctl(int, caddr_t, int);
+
+static int add_bw_upcall(struct bw_upcall *);
+static int add_mfc(struct mfcctl2 *);
+static int add_vif(struct vifctl *);
+static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
+static void bw_meter_process(void);
+static void bw_meter_receive_packet(struct bw_meter *, int,
+ struct timeval *);
+static void bw_upcalls_send(void);
+static int del_bw_upcall(struct bw_upcall *);
+static int del_mfc(struct mfcctl2 *);
+static int del_vif(vifi_t);
+static int del_vif_locked(vifi_t);
+static void expire_bw_meter_process(void *);
+static void expire_bw_upcalls_send(void *);
+static void expire_mfc(struct mfc *);
+static void expire_upcalls(void *);
+static void free_bw_list(struct bw_meter *);
+static int get_sg_cnt(struct sioc_sg_req *);
+static int get_vif_cnt(struct sioc_vif_req *);
+static void if_detached_event(void *, struct ifnet *);
+static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
+static int ip_mrouter_init(struct socket *, int);
+static __inline struct mfc *
+ mfc_find(struct in_addr *, struct in_addr *);
+static void phyint_send(struct ip *, struct vif *, struct mbuf *);
+static struct mbuf *
+ pim_register_prepare(struct ip *, struct mbuf *);
+static int pim_register_send(struct ip *, struct vif *,
+ struct mbuf *, struct mfc *);
+static int pim_register_send_rp(struct ip *, struct vif *,
+ struct mbuf *, struct mfc *);
+static int pim_register_send_upcall(struct ip *, struct vif *,
+ struct mbuf *, struct mfc *);
+static void schedule_bw_meter(struct bw_meter *, struct timeval *);
+static void send_packet(struct vif *, struct mbuf *);
+static int set_api_config(uint32_t *);
+static int set_assert(int);
+static int socket_send(struct socket *, struct mbuf *,
+ struct sockaddr_in *);
+static void unschedule_bw_meter(struct bw_meter *);
/*
- * Kernel multicast routing API capabilities and setup.
+ * Kernel multicast forwarding API capabilities and setup.
* If more API capabilities are added to the kernel, they should be
* recorded in `mrt_api_support'.
*/
+#define MRT_API_VERSION 0x0305
+
+static const int mrt_api_version = MRT_API_VERSION;
static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
MRT_MFC_FLAGS_BORDER_VIF |
MRT_MFC_RP |
MRT_MFC_BW_UPCALL);
static uint32_t mrt_api_config = 0;
-/*
- * Hash function for a source, group entry
- */
-#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
- ((g) >> 20) ^ ((g) >> 10) ^ (g))
+static int pim_assert_enabled;
+static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */
/*
- * Find a route for a given origin IP address and Multicast group address
- * Statistics are updated by the caller if needed
- * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
+ * Find a route for a given origin IP address and multicast group address.
+ * Statistics must be updated by the caller.
*/
-static struct mfc *
-mfc_find(in_addr_t o, in_addr_t g)
+static __inline struct mfc *
+mfc_find(struct in_addr *o, struct in_addr *g)
{
- struct mfc *rt;
+ struct mfc *rt;
- MFC_LOCK_ASSERT();
+ MFC_LOCK_ASSERT();
- for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next)
- if ((rt->mfc_origin.s_addr == o) &&
- (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL))
- break;
- return rt;
-}
+ LIST_FOREACH(rt, &mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
+ if (in_hosteq(rt->mfc_origin, *o) &&
+ in_hosteq(rt->mfc_mcastgrp, *g) &&
+ TAILQ_EMPTY(&rt->mfc_stall))
+ break;
+ }
-/*
- * Macros to compute elapsed time efficiently
- * Borrowed from Van Jacobson's scheduling code
- */
-#define TV_DELTA(a, b, delta) { \
- int xxs; \
- delta = (a).tv_usec - (b).tv_usec; \
- if ((xxs = (a).tv_sec - (b).tv_sec)) { \
- switch (xxs) { \
- case 2: \
- delta += 1000000; \
- /* FALLTHROUGH */ \
- case 1: \
- delta += 1000000; \
- break; \
- default: \
- delta += (1000000 * xxs); \
- } \
- } \
+ return (rt);
}
-#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
- (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
-
/*
- * Handle MRT setsockopt commands to modify the multicast routing tables.
+ * Handle MRT setsockopt commands to modify the multicast forwarding tables.
*/
static int
X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
@@ -526,15 +486,15 @@ static int
X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
{
int error;
- static int version = 0x0305; /* !!! why is this here? XXX */
switch (sopt->sopt_name) {
case MRT_VERSION:
- error = sooptcopyout(sopt, &version, sizeof version);
+ error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
break;
case MRT_ASSERT:
- error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
+ error = sooptcopyout(sopt, &pim_assert_enabled,
+ sizeof pim_assert_enabled);
break;
case MRT_API_SUPPORT:
@@ -556,7 +516,7 @@ X_ip_mrouter_get(struct socket *so, stru
* Handle ioctl commands to obtain information from the cache
*/
static int
-X_mrt_ioctl(int cmd, caddr_t data, int fibnum)
+X_mrt_ioctl(int cmd, caddr_t data, int fibnum __unused)
{
int error = 0;
@@ -593,7 +553,7 @@ get_sg_cnt(struct sioc_sg_req *req)
struct mfc *rt;
MFC_LOCK();
- rt = mfc_find(req->src.s_addr, req->grp.s_addr);
+ rt = mfc_find(&req->src, &req->grp);
if (rt == NULL) {
MFC_UNLOCK();
req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
@@ -632,10 +592,8 @@ get_vif_cnt(struct sioc_vif_req *req)
static void
ip_mrouter_reset(void)
{
- bzero((caddr_t)mfctable, sizeof(mfctable));
- bzero((caddr_t)nexpire, sizeof(nexpire));
- pim_assert = 0;
+ pim_assert_enabled = 0;
mrt_api_config = 0;
callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE);
@@ -652,55 +610,40 @@ if_detached_event(void *arg __unused, st
INIT_VNET_INET(curvnet);
vifi_t vifi;
int i;
- struct mfc *mfc;
- struct mfc *nmfc;
- struct mfc **ppmfc; /* Pointer to previous node's next-pointer */
- struct rtdetq *pq;
- struct rtdetq *npq;
MROUTER_LOCK();
+
if (V_ip_mrouter == NULL) {
MROUTER_UNLOCK();
+ return;
}
+ VIF_LOCK();
+ MFC_LOCK();
+
/*
* Tear down multicast forwarder state associated with this ifnet.
* 1. Walk the vif list, matching vifs against this ifnet.
* 2. Walk the multicast forwarding cache (mfc) looking for
* inner matches with this vif's index.
- * 3. Free any pending mbufs for this mfc.
- * 4. Free the associated mfc entry and state associated with this vif.
- * Be very careful about unlinking from a singly-linked list whose
- * "head node" is a pointer in a simple array.
- * 5. Free vif state. This should disable ALLMULTI on the interface.
+ * 3. Expire any matching multicast forwarding cache entries.
+ * 4. Free vif state. This should disable ALLMULTI on the interface.
*/
- VIF_LOCK();
- MFC_LOCK();
for (vifi = 0; vifi < numvifs; vifi++) {
if (viftable[vifi].v_ifp != ifp)
continue;
- for (i = 0; i < MFCTBLSIZ; i++) {
- ppmfc = &mfctable[i];
- for (mfc = mfctable[i]; mfc != NULL; ) {
- nmfc = mfc->mfc_next;
- if (mfc->mfc_parent == vifi) {
- for (pq = mfc->mfc_stall; pq != NULL; ) {
- npq = pq->next;
- m_freem(pq->m);
- free(pq, M_MRTABLE);
- pq = npq;
- }
- free_bw_list(mfc->mfc_bw_meter);
- free(mfc, M_MRTABLE);
- *ppmfc = nmfc;
- } else {
- ppmfc = &mfc->mfc_next;
+ for (i = 0; i < mfchashsize; i++) {
+ struct mfc *rt, *nrt;
+ for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
+ nrt = LIST_NEXT(rt, mfc_hash);
+ if (rt->mfc_parent == vifi) {
+ expire_mfc(rt);
+ }
}
- mfc = nmfc;
- }
}
del_vif_locked(vifi);
}
+
MFC_UNLOCK();
VIF_UNLOCK();
@@ -708,7 +651,7 @@ if_detached_event(void *arg __unused, st
}
/*
- * Enable multicast routing
+ * Enable multicast forwarding.
*/
static int
ip_mrouter_init(struct socket *so, int version)
@@ -739,6 +682,8 @@ ip_mrouter_init(struct socket *so, int v
return (ENOMEM);
}
+ mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &mfchash, HASH_NOWAIT);
+
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL);
callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD,
@@ -756,7 +701,7 @@ ip_mrouter_init(struct socket *so, int v
}
/*
- * Disable multicast routing
+ * Disable multicast forwarding.
*/
static int
X_ip_mrouter_done(void)
@@ -766,8 +711,6 @@ X_ip_mrouter_done(void)
int i;
struct ifnet *ifp;
struct ifreq ifr;
- struct mfc *rt;
- struct rtdetq *rte;
MROUTER_LOCK();
@@ -783,12 +726,13 @@ X_ip_mrouter_done(void)
mrt_api_config = 0;
VIF_LOCK();
+
/*
* For each phyint in use, disable promiscuous reception of all IP
* multicasts.
*/
for (vifi = 0; vifi < numvifs; vifi++) {
- if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
+ if (!in_nullhost(viftable[vifi].v_lcl_addr) &&
!(viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr);
@@ -801,38 +745,37 @@ X_ip_mrouter_done(void)
}
bzero((caddr_t)viftable, sizeof(viftable));
numvifs = 0;
- pim_assert = 0;
+ pim_assert_enabled = 0;
+
VIF_UNLOCK();
+
EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
- /*
- * Free all multicast forwarding cache entries.
- */
callout_stop(&expire_upcalls_ch);
callout_stop(&bw_upcalls_ch);
callout_stop(&bw_meter_ch);
MFC_LOCK();
- for (i = 0; i < MFCTBLSIZ; i++) {
- for (rt = mfctable[i]; rt != NULL; ) {
- struct mfc *nr = rt->mfc_next;
-
- for (rte = rt->mfc_stall; rte != NULL; ) {
- struct rtdetq *n = rte->next;
- m_freem(rte->m);
- free(rte, M_MRTABLE);
- rte = n;
- }
- free_bw_list(rt->mfc_bw_meter);
- free(rt, M_MRTABLE);
- rt = nr;
+ /*
+ * Free all multicast forwarding cache entries.
+ * Do not use hashdestroy(), as we must perform other cleanup.
+ */
+ for (i = 0; i < mfchashsize; i++) {
+ struct mfc *rt, *nrt;
+ for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
+ nrt = LIST_NEXT(rt, mfc_hash);
+ expire_mfc(rt);
}
}
- bzero((caddr_t)mfctable, sizeof(mfctable));
- bzero((caddr_t)nexpire, sizeof(nexpire));
+ free(mfchashtbl, M_MRTABLE);
+ mfchashtbl = NULL;
+
+ bzero(nexpire, sizeof(nexpire[0]) * mfchashsize);
+
bw_upcalls_n = 0;
bzero(bw_meter_timers, sizeof(bw_meter_timers));
+
MFC_UNLOCK();
reg_vif_num = VIFI_INVALID;
@@ -854,7 +797,7 @@ set_assert(int i)
if ((i != 1) && (i != 0))
return EINVAL;
- pim_assert = i;
+ pim_assert_enabled = i;
return 0;
}
@@ -878,17 +821,22 @@ set_api_config(uint32_t *apival)
*apival = 0;
return EPERM;
}
- if (pim_assert) {
+ if (pim_assert_enabled) {
*apival = 0;
return EPERM;
}
- for (i = 0; i < MFCTBLSIZ; i++) {
- if (mfctable[i] != NULL) {
+
+ MFC_LOCK();
+
+ for (i = 0; i < mfchashsize; i++) {
+ if (LIST_FIRST(&mfchashtbl[i]) != NULL) {
*apival = 0;
return EPERM;
}
}
+ MFC_UNLOCK();
+
mrt_api_config = *apival & mrt_api_support;
*apival = mrt_api_config;
@@ -918,11 +866,11 @@ add_vif(struct vifctl *vifcp)
VIF_UNLOCK();
return EINVAL;
}
- if (vifp->v_lcl_addr.s_addr != INADDR_ANY) {
+ if (!in_nullhost(vifp->v_lcl_addr)) {
VIF_UNLOCK();
return EADDRINUSE;
}
- if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) {
+ if (in_nullhost(vifcp->vifc_lcl_addr)) {
VIF_UNLOCK();
return EADDRNOTAVAIL;
}
@@ -978,8 +926,6 @@ add_vif(struct vifctl *vifcp)
vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
vifp->v_ifp = ifp;
- vifp->v_rsvp_on = 0;
- vifp->v_rsvpd = NULL;
/* initialize per vif pkt counters */
vifp->v_pkt_in = 0;
vifp->v_pkt_out = 0;
@@ -988,7 +934,8 @@ add_vif(struct vifctl *vifcp)
bzero(&vifp->v_route, sizeof(vifp->v_route));
/* Adjust numvifs up if the vifi is higher than numvifs */
- if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
+ if (numvifs <= vifcp->vifc_vifi)
+ numvifs = vifcp->vifc_vifi + 1;
VIF_UNLOCK();
@@ -1017,7 +964,7 @@ del_vif_locked(vifi_t vifi)
return EINVAL;
}
vifp = &viftable[vifi];
- if (vifp->v_lcl_addr.s_addr == INADDR_ANY) {
+ if (in_nullhost(vifp->v_lcl_addr)) {
return EADDRNOTAVAIL;
}
@@ -1034,7 +981,7 @@ del_vif_locked(vifi_t vifi)
/* Adjust numvifs down */
for (vifi = numvifs; vifi > 0; vifi--)
- if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY)
+ if (!in_nullhost(viftable[vifi-1].v_lcl_addr))
break;
numvifs = vifi;
@@ -1089,9 +1036,25 @@ init_mfc_params(struct mfc *rt, struct m
rt->mfc_pkt_cnt = 0;
rt->mfc_byte_cnt = 0;
rt->mfc_wrong_if = 0;
- rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+ timevalclear(&rt->mfc_last_assert);
}
+static void
+expire_mfc(struct mfc *rt)
+{
+ struct rtdetq *rte, *nrte;
+
+ free_bw_list(rt->mfc_bw_meter);
+
+ TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
+ m_freem(rte->m);
+ TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
+ free(rte, M_MRTABLE);
+ }
+
+ LIST_REMOVE(rt, mfc_hash);
+ free(rt, M_MRTABLE);
+}
/*
* Add an mfc entry
@@ -1100,14 +1063,14 @@ static int
add_mfc(struct mfcctl2 *mfccp)
{
struct mfc *rt;
- u_long hash;
- struct rtdetq *rte;
+ struct rtdetq *rte, *nrte;
+ u_long hash = 0;
u_short nstl;
VIF_LOCK();
MFC_LOCK();
- rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
+ rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
/* If an entry already exists, just update the fields */
if (rt) {
@@ -1120,47 +1083,48 @@ add_mfc(struct mfcctl2 *mfccp)
update_mfc_params(rt, mfccp);
MFC_UNLOCK();
VIF_UNLOCK();
- return 0;
+ return (0);
}
/*
* Find the entry for which the upcall was made and update
*/
- hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
- for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
-
- if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
- (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
- (rt->mfc_stall != NULL)) {
-
- if (nstl++)
- log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
- "multiple kernel entries",
- (u_long)ntohl(mfccp->mfcc_origin.s_addr),
- (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
- mfccp->mfcc_parent, (void *)rt->mfc_stall);
-
- if (mrtdebug & DEBUG_MFC)
- log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
- (u_long)ntohl(mfccp->mfcc_origin.s_addr),
- (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
- mfccp->mfcc_parent, (void *)rt->mfc_stall);
-
- init_mfc_params(rt, mfccp);
+ nstl = 0;
+ hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
+ if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
+ in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
+ !TAILQ_EMPTY(&rt->mfc_stall)) {
+ if (nstl++) {
+ log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
+ "multiple kernel entries",
+ (u_long)ntohl(mfccp->mfcc_origin.s_addr),
+ (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent,
+ (void *)TAILQ_FIRST(&rt->mfc_stall));
+ }
- rt->mfc_expire = 0; /* Don't clean this guy up */
- nexpire[hash]--;
+ if (mrtdebug & DEBUG_MFC) {
+ log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
+ (u_long)ntohl(mfccp->mfcc_origin.s_addr),
+ (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+ mfccp->mfcc_parent,
+ (void *)TAILQ_FIRST(&rt->mfc_stall));
+ }
- /* free packets Qed at the end of this entry */
- for (rte = rt->mfc_stall; rte != NULL; ) {
- struct rtdetq *n = rte->next;
+ init_mfc_params(rt, mfccp);
+ rt->mfc_expire = 0; /* Don't clean this guy up */
+ nexpire[hash]--;
- ip_mdq(rte->m, rte->ifp, rt, -1);
- m_freem(rte->m);
- free(rte, M_MRTABLE);
- rte = n;
- }
- rt->mfc_stall = NULL;
+ /* Free queued packets, but attempt to forward them first. */
+ TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
+ if (rte->ifp != NULL)
+ ip_mdq(rte->m, rte->ifp, rt, -1);
+ m_freem(rte->m);
+ TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
+ rt->mfc_nstall--;
+ free(rte, M_MRTABLE);
+ }
}
}
@@ -1168,43 +1132,50 @@ add_mfc(struct mfcctl2 *mfccp)
* It is possible that an entry is being inserted without an upcall
*/
if (nstl == 0) {
+ /*
+ * No mfc; make a new one
+ */
if (mrtdebug & DEBUG_MFC)
log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr),
(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
mfccp->mfcc_parent);
- for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
- if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
- (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
- init_mfc_params(rt, mfccp);
- if (rt->mfc_expire)
- nexpire[hash]--;
- rt->mfc_expire = 0;
- break; /* XXX */
- }
+ LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
+ if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
+ in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
+ init_mfc_params(rt, mfccp);
+ if (rt->mfc_expire)
+ nexpire[hash]--;
+ rt->mfc_expire = 0;
+ break; /* XXX */
+ }
}
+
if (rt == NULL) { /* no upcall, so make a new entry */
rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
if (rt == NULL) {
MFC_UNLOCK();
VIF_UNLOCK();
- return ENOBUFS;
+ return (ENOBUFS);
}
init_mfc_params(rt, mfccp);
- rt->mfc_expire = 0;
- rt->mfc_stall = NULL;
+ TAILQ_INIT(&rt->mfc_stall);
+ rt->mfc_nstall = 0;
+ rt->mfc_expire = 0;
rt->mfc_bw_meter = NULL;
+
/* insert new entry at head of hash chain */
- rt->mfc_next = mfctable[hash];
- mfctable[hash] = rt;
+ LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
}
}
+
MFC_UNLOCK();
VIF_UNLOCK();
- return 0;
+
+ return (0);
}
/*
@@ -1216,49 +1187,40 @@ del_mfc(struct mfcctl2 *mfccp)
struct in_addr origin;
struct in_addr mcastgrp;
struct mfc *rt;
- struct mfc **nptr;
- u_long hash;
- struct bw_meter *list;
origin = mfccp->mfcc_origin;
mcastgrp = mfccp->mfcc_mcastgrp;
- if (mrtdebug & DEBUG_MFC)
+ if (mrtdebug & DEBUG_MFC) {
log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
- (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
+ (u_long)ntohl(origin.s_addr),
+ (u_long)ntohl(mcastgrp.s_addr));
+ }
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list