svn commit: r335141 - head/sys/net
Andrey V. Elsukov
ae at FreeBSD.org
Thu Jun 14 14:53:25 UTC 2018
Author: ae
Date: Thu Jun 14 14:53:24 2018
New Revision: 335141
URL: https://svnweb.freebsd.org/changeset/base/335141
Log:
Convert if_me(4) driver to use encap_lookup_t method and be lockless on
data path.
Modified:
head/sys/net/if_me.c
Modified: head/sys/net/if_me.c
==============================================================================
--- head/sys/net/if_me.c Thu Jun 14 14:53:01 2018 (r335140)
+++ head/sys/net/if_me.c Thu Jun 14 14:53:24 2018 (r335141)
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
+ * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae at FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,22 +28,20 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/proc.h>
-#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <sys/systm.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@@ -68,8 +66,6 @@ __FBSDID("$FreeBSD$");
#define MEMTU (1500 - sizeof(struct mobhdr))
static const char mename[] = "me";
static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
-static VNET_DEFINE(struct mtx, me_mtx);
-#define V_me_mtx VNET(me_mtx)
/* Minimal forwarding header RFC 2004 */
struct mobhdr {
uint8_t mob_proto; /* protocol */
@@ -82,32 +78,27 @@ struct mobhdr {
struct me_softc {
struct ifnet *me_ifp;
- LIST_ENTRY(me_softc) me_list;
- struct rmlock me_lock;
u_int me_fibnum;
- const struct encaptab *me_ecookie;
struct in_addr me_src;
struct in_addr me_dst;
+
+ CK_LIST_ENTRY(me_softc) chain;
};
+CK_LIST_HEAD(me_list, me_softc);
#define ME2IFP(sc) ((sc)->me_ifp)
#define ME_READY(sc) ((sc)->me_src.s_addr != 0)
-#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc")
-#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock)
-#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker
-#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker)
-#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker)
-#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED)
-#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock)
-#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock)
-#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED)
+#define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt)
+#define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt)
+#define ME_WAIT() epoch_wait_preempt(net_epoch_preempt)
-#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
-#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx)
-#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx)
-#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx)
+#ifndef ME_HASH_SIZE
+#define ME_HASH_SIZE (1 << 4)
+#endif
+static VNET_DEFINE(struct me_list *, me_hashtbl) = NULL;
+#define V_me_hashtbl VNET(me_hashtbl)
+#define ME_HASH(src, dst) (V_me_hashtbl[\
+ me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)])
-static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
-#define V_me_softc_list VNET(me_softc_list)
static struct sx me_ioctl_sx;
SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
@@ -123,21 +114,9 @@ static int me_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
static int me_input(struct mbuf *, int, int, void *);
-static int me_set_tunnel(struct ifnet *, struct sockaddr_in *,
- struct sockaddr_in *);
-static void me_delete_tunnel(struct ifnet *);
-static int me_encapcheck(const struct mbuf *, int, int, void *);
+static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t);
+static void me_delete_tunnel(struct me_softc *);
-#define ME_MINLEN (sizeof(struct ip) + sizeof(struct mobhdr) -\
- sizeof(in_addr_t))
-static const struct encap_config ipv4_encap_cfg = {
- .proto = IPPROTO_MOBILE,
- .min_length = ME_MINLEN,
- .exact_match = (sizeof(in_addr_t) << 4) + 8,
- .check = me_encapcheck,
- .input = me_input
-};
-
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
"Minimal Encapsulation for IP (RFC 2004)");
@@ -150,11 +129,32 @@ static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
+static uint32_t
+me_hashval(in_addr_t src, in_addr_t dst)
+{
+ uint32_t ret;
+
+ ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+ return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static struct me_list *
+me_hashinit(void)
+{
+ struct me_list *hash;
+ int i;
+
+ hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE,
+ M_IFME, M_WAITOK);
+ for (i = 0; i < ME_HASH_SIZE; i++)
+ CK_LIST_INIT(&hash[i]);
+
+ return (hash);
+}
+
static void
vnet_me_init(const void *unused __unused)
{
- LIST_INIT(&V_me_softc_list);
- ME_LIST_LOCK_INIT();
V_me_cloner = if_clone_simple(mename, me_clone_create,
me_clone_destroy, 0);
}
@@ -165,8 +165,9 @@ static void
vnet_me_uninit(const void *unused __unused)
{
+ if (V_me_hashtbl != NULL)
+ free(V_me_hashtbl, M_IFME);
if_clone_detach(V_me_cloner);
- ME_LIST_LOCK_DESTROY();
}
VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_me_uninit, NULL);
@@ -179,7 +180,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_
sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
sc->me_fibnum = curthread->td_proc->p_fibnum;
ME2IFP(sc) = if_alloc(IFT_TUNNEL);
- ME_LOCK_INIT(sc);
ME2IFP(sc)->if_softc = sc;
if_initname(ME2IFP(sc), mename, unit);
@@ -193,9 +193,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_
ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(ME2IFP(sc));
bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- ME_LIST_LOCK();
- LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
- ME_LIST_UNLOCK();
return (0);
}
@@ -206,24 +203,20 @@ me_clone_destroy(struct ifnet *ifp)
sx_xlock(&me_ioctl_sx);
sc = ifp->if_softc;
- me_delete_tunnel(ifp);
- ME_LIST_LOCK();
- LIST_REMOVE(sc, me_list);
- ME_LIST_UNLOCK();
+ me_delete_tunnel(sc);
bpfdetach(ifp);
if_detach(ifp);
ifp->if_softc = NULL;
sx_xunlock(&me_ioctl_sx);
+ ME_WAIT();
if_free(ifp);
- ME_LOCK_DESTROY(sc);
free(sc, M_IFME);
}
static int
me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- ME_RLOCK_TRACKER;
struct ifreq *ifr = (struct ifreq *)data;
struct sockaddr_in *src, *dst;
struct me_softc *sc;
@@ -251,10 +244,8 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = 0;
switch (cmd) {
case SIOCSIFPHYADDR:
- src = (struct sockaddr_in *)
- &(((struct in_aliasreq *)data)->ifra_addr);
- dst = (struct sockaddr_in *)
- &(((struct in_aliasreq *)data)->ifra_dstaddr);
+ src = &((struct in_aliasreq *)data)->ifra_addr;
+ dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
if (src->sin_family != dst->sin_family ||
src->sin_family != AF_INET ||
src->sin_len != dst->sin_len ||
@@ -267,17 +258,16 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EADDRNOTAVAIL;
break;
}
- error = me_set_tunnel(ifp, src, dst);
+ error = me_set_tunnel(sc, src->sin_addr.s_addr,
+ dst->sin_addr.s_addr);
break;
case SIOCDIFPHYADDR:
- me_delete_tunnel(ifp);
+ me_delete_tunnel(sc);
break;
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
- ME_RLOCK(sc);
if (!ME_READY(sc)) {
error = EADDRNOTAVAIL;
- ME_RUNLOCK(sc);
break;
}
src = (struct sockaddr_in *)&ifr->ifr_addr;
@@ -292,7 +282,6 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
src->sin_addr = sc->me_dst;
break;
}
- ME_RUNLOCK(sc);
error = prison_if(curthread->td_ucred, sintosa(src));
if (error != 0)
memset(src, 0, sizeof(*src));
@@ -318,81 +307,71 @@ end:
}
static int
-me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+me_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
- ME_RLOCK_TRACKER;
+ const struct ip *ip;
struct me_softc *sc;
- struct ip *ip;
- int ret;
- sc = (struct me_softc *)arg;
- if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
- return (0);
-
- M_ASSERTPKTHDR(m);
-
- ret = 0;
- ME_RLOCK(sc);
- if (ME_READY(sc)) {
- ip = mtod(m, struct ip *);
+ MPASS(in_epoch());
+ ip = mtod(m, const struct ip *);
+ CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
+ ip->ip_src.s_addr), chain) {
if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
- sc->me_dst.s_addr == ip->ip_src.s_addr)
- ret = 32 * 2 + 8;
+ sc->me_dst.s_addr == ip->ip_src.s_addr) {
+ if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+ *arg = sc;
+ return (ENCAP_DRV_LOOKUP);
+ }
}
- ME_RUNLOCK(sc);
- return (ret);
+ return (0);
}
static int
-me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
- struct sockaddr_in *dst)
+me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst)
{
- struct me_softc *sc, *tsc;
+ struct me_softc *tmp;
sx_assert(&me_ioctl_sx, SA_XLOCKED);
- ME_LIST_LOCK();
- sc = ifp->if_softc;
- LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
- if (tsc == sc || !ME_READY(tsc))
+
+ if (V_me_hashtbl == NULL)
+ V_me_hashtbl = me_hashinit();
+
+ if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst)
+ return (0);
+
+ CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) {
+ if (tmp == sc)
continue;
- if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
- tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
- ME_LIST_UNLOCK();
+ if (tmp->me_src.s_addr == src &&
+ tmp->me_dst.s_addr == dst)
return (EADDRNOTAVAIL);
- }
}
- ME_LIST_UNLOCK();
- ME_WLOCK(sc);
- sc->me_dst = dst->sin_addr;
- sc->me_src = src->sin_addr;
- ME_WUNLOCK(sc);
+ me_delete_tunnel(sc);
+ sc->me_dst.s_addr = dst;
+ sc->me_src.s_addr = src;
+ CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain);
- if (sc->me_ecookie == NULL)
- sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg,
- sc, M_WAITOK);
- if (sc->me_ecookie != NULL) {
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- if_link_state_change(ifp, LINK_STATE_UP);
- }
+ ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ME2IFP(sc), LINK_STATE_UP);
return (0);
}
static void
-me_delete_tunnel(struct ifnet *ifp)
+me_delete_tunnel(struct me_softc *sc)
{
- struct me_softc *sc = ifp->if_softc;
sx_assert(&me_ioctl_sx, SA_XLOCKED);
- if (sc->me_ecookie != NULL)
- ip_encap_detach(sc->me_ecookie);
- sc->me_ecookie = NULL;
- ME_WLOCK(sc);
- sc->me_src.s_addr = 0;
- sc->me_dst.s_addr = 0;
- ME_WUNLOCK(sc);
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- if_link_state_change(ifp, LINK_STATE_DOWN);
+ if (ME_READY(sc)) {
+ CK_LIST_REMOVE(sc, chain);
+ ME_WAIT();
+
+ sc->me_src.s_addr = 0;
+ sc->me_dst.s_addr = 0;
+ ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN);
+ }
}
static uint16_t
@@ -505,58 +484,48 @@ me_check_nesting(struct ifnet *ifp, struct mbuf *m)
static int
me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
- struct route *ro)
+ struct route *ro __unused)
{
uint32_t af;
- int error;
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error != 0)
- goto drop;
-#endif
- if ((ifp->if_flags & IFF_MONITOR) != 0 ||
- (ifp->if_flags & IFF_UP) == 0) {
- error = ENETDOWN;
- goto drop;
- }
-
- error = me_check_nesting(ifp, m);
- if (error != 0)
- goto drop;
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
else
af = dst->sa_family;
- if (af != AF_INET) {
- error = EAFNOSUPPORT;
- goto drop;
- }
- BPF_MTAP2(ifp, &af, sizeof(af), m);
+ m->m_pkthdr.csum_data = af;
return (ifp->if_transmit(ifp, m));
-drop:
- m_freem(m);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (error);
}
static int
me_transmit(struct ifnet *ifp, struct mbuf *m)
{
- ME_RLOCK_TRACKER;
struct mobhdr mh;
struct me_softc *sc;
struct ip *ip;
+ uint32_t af;
int error, hlen, plen;
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
+#endif
+ error = ENETDOWN;
+ ME_RLOCK();
sc = ifp->if_softc;
- if (sc == NULL) {
- error = ENETDOWN;
+ if (sc == NULL || !ME_READY(sc) ||
+ (ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0 ||
+ (error = me_check_nesting(ifp, m) != 0)) {
m_freem(m);
goto drop;
}
+ af = m->m_pkthdr.csum_data;
+ if (af != AF_INET) {
+ error = EAFNOSUPPORT;
+ m_freem(m);
+ goto drop;
+ }
if (m->m_len < sizeof(struct ip))
m = m_pullup(m, sizeof(struct ip));
if (m == NULL) {
@@ -573,13 +542,6 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
mh.mob_proto = ip->ip_p;
mh.mob_src = ip->ip_src;
mh.mob_dst = ip->ip_dst;
- ME_RLOCK(sc);
- if (!ME_READY(sc)) {
- ME_RUNLOCK(sc);
- error = ENETDOWN;
- m_freem(m);
- goto drop;
- }
if (in_hosteq(sc->me_src, ip->ip_src)) {
hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
mh.mob_flags = 0;
@@ -590,8 +552,8 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
plen = m->m_pkthdr.len;
ip->ip_src = sc->me_src;
ip->ip_dst = sc->me_dst;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
M_SETFIB(m, sc->me_fibnum);
- ME_RUNLOCK(sc);
M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
@@ -619,6 +581,7 @@ drop:
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
}
+ ME_RUNLOCK();
return (error);
}
@@ -628,13 +591,26 @@ me_qflush(struct ifnet *ifp __unused)
}
+static const struct encaptab *ecookie = NULL;
+static const struct encap_config me_encap_cfg = {
+ .proto = IPPROTO_MOBILE,
+ .min_length = sizeof(struct ip) + sizeof(struct mobhdr) -
+ sizeof(in_addr_t),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = me_lookup,
+ .input = me_input
+};
+
static int
memodevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
+ ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK);
+ break;
case MOD_UNLOAD:
+ ip_encap_detach(ecookie);
break;
default:
return (EOPNOTSUPP);
More information about the svn-src-all
mailing list