svn commit: r334673 - in head: share/man/man4 sys/net sys/netinet sys/netinet6

Andrey V. Elsukov ae at FreeBSD.org
Tue Jun 5 21:25:01 UTC 2018


Author: ae
Date: Tue Jun  5 21:24:59 2018
New Revision: 334673
URL: https://svnweb.freebsd.org/changeset/base/334673

Log:
  Rework if_gif(4) to use new encap_lookup_t method to speedup lookup
  of needed interface when many gif interfaces are present.
  
  Remove rmlock from gif_softc, use epoch(9) and CK_LIST instead.
  Move more AF-related code into AF-related locations.
  Use hash table to speedup lookup of needed softc. Interfaces
  with GIF_IGNORE_SOURCE flag are stored in plain CK_LIST.
  Sysctl net.link.gif.parallel_tunnels is removed. The removal was planed
  16 years ago, and actually it could work only for outbound direction.
  Each protocol, that can be handled by if_gif(4) interface is registered
  by separate encap handler, this helps avoid invoking the handler
  for unrelated protocols (GRE, PIM, etc.).
  
  This change allows dramatically improve performance when many gif(4)
  interfaces are used.
  
  Sponsored by:	Yandex LLC

Modified:
  head/share/man/man4/gif.4
  head/sys/net/if_gif.c
  head/sys/net/if_gif.h
  head/sys/netinet/in_gif.c
  head/sys/netinet6/in6_gif.c

Modified: head/share/man/man4/gif.4
==============================================================================
--- head/share/man/man4/gif.4	Tue Jun  5 20:54:29 2018	(r334672)
+++ head/share/man/man4/gif.4	Tue Jun  5 21:24:59 2018	(r334673)
@@ -29,7 +29,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 10, 2015
+.Dd June 5, 2018
 .Dt GIF 4
 .Os
 .Sh NAME
@@ -169,14 +169,6 @@ This behavior may be modified at runtime by setting th
 variable
 .Va net.link.gif.max_nesting
 to the desired level of nesting.
-Additionally,
-.Nm
-tunnels are restricted to one per pair of end points.
-Parallel tunnels may be enabled by setting the
-.Xr sysctl 8
-variable
-.Va net.link.gif.parallel_tunnels
-to 1.
 .Sh SEE ALSO
 .Xr gre 4 ,
 .Xr inet 4 ,

Modified: head/sys/net/if_gif.c
==============================================================================
--- head/sys/net/if_gif.c	Tue Jun  5 20:54:29 2018	(r334672)
+++ head/sys/net/if_gif.c	Tue Jun  5 21:24:59 2018	(r334673)
@@ -2,6 +2,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -39,7 +40,6 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/syslog.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
-#include <sys/protosw.h>
 #include <sys/conf.h>
 #include <machine/cpu.h>
 
@@ -85,8 +84,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip6.h>
 #include <netinet6/ip6_ecn.h>
 #include <netinet6/ip6_var.h>
-#include <netinet6/scope6_var.h>
-#include <netinet6/ip6protosw.h>
 #endif /* INET6 */
 
 #include <netinet/ip_encap.h>
@@ -98,32 +95,17 @@ __FBSDID("$FreeBSD$");
 
 static const char gifname[] = "gif";
 
-/*
- * gif_mtx protects a per-vnet gif_softc_list.
- */
-static VNET_DEFINE(struct mtx, gif_mtx);
-#define	V_gif_mtx		VNET(gif_mtx)
-static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
-static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
-#define	V_gif_softc_list	VNET(gif_softc_list)
+MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static struct sx gif_ioctl_sx;
 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
 
-#define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
-					    NULL, MTX_DEF)
-#define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
-#define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
-#define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
-
 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void	(*ng_gif_attach_p)(struct ifnet *ifp);
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
 static int	gif_check_nesting(struct ifnet *, struct mbuf *);
-static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
-    struct sockaddr *);
-static void	gif_delete_tunnel(struct ifnet *);
+static void	gif_delete_tunnel(struct gif_softc *);
 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gif_transmit(struct ifnet *, struct mbuf *);
 static void	gif_qflush(struct ifnet *);
@@ -132,8 +114,6 @@ static void	gif_clone_destroy(struct ifnet *);
 static VNET_DEFINE(struct if_clone *, gif_cloner);
 #define	V_gif_cloner	VNET(gif_cloner)
 
-static int gifmodevent(module_t, int, void *);
-
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
     "Generic Tunnel Interface");
@@ -153,21 +133,6 @@ static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NES
 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
 
-/*
- * By default, we disallow creation of multiple tunnels between the same
- * pair of addresses.  Some applications require this functionality so
- * we allow control over this check here.
- */
-#ifdef XBONEHACK
-static VNET_DEFINE(int, parallel_tunnels) = 1;
-#else
-static VNET_DEFINE(int, parallel_tunnels) = 0;
-#endif
-#define	V_parallel_tunnels	VNET(parallel_tunnels)
-SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
-    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
-    "Allow parallel tunnels?");
-
 static int
 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
@@ -176,20 +141,15 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr
 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
 	GIF2IFP(sc) = if_alloc(IFT_GIF);
-	GIF_LOCK_INIT(sc);
 	GIF2IFP(sc)->if_softc = sc;
 	if_initname(GIF2IFP(sc), gifname, unit);
 
 	GIF2IFP(sc)->if_addrlen = 0;
 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
-#if 0
-	/* turn off ingress filter */
-	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
-#endif
 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
-	GIF2IFP(sc)->if_transmit  = gif_transmit;
-	GIF2IFP(sc)->if_qflush  = gif_qflush;
+	GIF2IFP(sc)->if_transmit = gif_transmit;
+	GIF2IFP(sc)->if_qflush = gif_qflush;
 	GIF2IFP(sc)->if_output = gif_output;
 	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
@@ -198,9 +158,6 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr
 	if (ng_gif_attach_p != NULL)
 		(*ng_gif_attach_p)(GIF2IFP(sc));
 
-	GIF_LIST_LOCK();
-	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
-	GIF_LIST_UNLOCK();
 	return (0);
 }
 
@@ -211,10 +168,7 @@ gif_clone_destroy(struct ifnet *ifp)
 
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
-	gif_delete_tunnel(ifp);
-	GIF_LIST_LOCK();
-	LIST_REMOVE(sc, gif_list);
-	GIF_LIST_UNLOCK();
+	gif_delete_tunnel(sc);
 	if (ng_gif_detach_p != NULL)
 		(*ng_gif_detach_p)(ifp);
 	bpfdetach(ifp);
@@ -222,8 +176,8 @@ gif_clone_destroy(struct ifnet *ifp)
 	ifp->if_softc = NULL;
 	sx_xunlock(&gif_ioctl_sx);
 
+	GIF_WAIT();
 	if_free(ifp);
-	GIF_LOCK_DESTROY(sc);
 	free(sc, M_GIF);
 }
 
@@ -231,10 +185,14 @@ static void
 vnet_gif_init(const void *unused __unused)
 {
 
-	LIST_INIT(&V_gif_softc_list);
-	GIF_LIST_LOCK_INIT();
 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
 	    gif_clone_destroy, 0);
+#ifdef INET
+	in_gif_init();
+#endif
+#ifdef INET6
+	in6_gif_init();
+#endif
 }
 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_init, NULL);
@@ -244,7 +202,12 @@ vnet_gif_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_gif_cloner);
-	GIF_LIST_LOCK_DESTROY();
+#ifdef INET
+	in_gif_uninit();
+#endif
+#ifdef INET6
+	in6_gif_uninit();
+#endif
 }
 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_uninit, NULL);
@@ -272,65 +235,25 @@ static moduledata_t gif_mod = {
 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gif, 1);
 
-int
-gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+struct gif_list *
+gif_hashinit(void)
 {
-	GIF_RLOCK_TRACKER;
-	const struct ip *ip;
-	struct gif_softc *sc;
-	int ret;
+	struct gif_list *hash;
+	int i;
 
-	sc = (struct gif_softc *)arg;
-	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
-		return (0);
+	hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE,
+	    M_GIF, M_WAITOK);
+	for (i = 0; i < GIF_HASH_SIZE; i++)
+		CK_LIST_INIT(&hash[i]);
 
-	ret = 0;
-	GIF_RLOCK(sc);
+	return (hash);
+}
 
-	/* no physical address */
-	if (sc->gif_family == 0)
-		goto done;
+void
+gif_hashdestroy(struct gif_list *hash)
+{
 
-	switch (proto) {
-#ifdef INET
-	case IPPROTO_IPV4:
-#endif
-#ifdef INET6
-	case IPPROTO_IPV6:
-#endif
-	case IPPROTO_ETHERIP:
-		break;
-	default:
-		goto done;
-	}
-
-	/* Bail on short packets */
-	M_ASSERTPKTHDR(m);
-	if (m->m_pkthdr.len < sizeof(struct ip))
-		goto done;
-
-	ip = mtod(m, const struct ip *);
-	switch (ip->ip_v) {
-#ifdef INET
-	case 4:
-		if (sc->gif_family != AF_INET)
-			goto done;
-		ret = in_gif_encapcheck(m, off, proto, arg);
-		break;
-#endif
-#ifdef INET6
-	case 6:
-		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
-			goto done;
-		if (sc->gif_family != AF_INET6)
-			goto done;
-		ret = in6_gif_encapcheck(m, off, proto, arg);
-		break;
-#endif
-	}
-done:
-	GIF_RUNLOCK(sc);
-	return (ret);
+	free(hash, M_GIF);
 }
 
 static int
@@ -357,6 +280,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
 	}
 #endif
 	error = ENETDOWN;
+	GIF_RLOCK();
 	sc = ifp->if_softc;
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
@@ -444,6 +368,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
 err:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+	GIF_RUNLOCK();
 	return (error);
 }
 
@@ -616,7 +541,8 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto
 		break;
 #endif
 	case AF_LINK:
-		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
+		n = sizeof(struct etherip_header) +
+		    sizeof(struct ether_header);
 		if (n > m->m_len)
 			m = m_pullup(m, n);
 		if (m == NULL)
@@ -674,20 +600,11 @@ drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 }
 
-/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
-int
+static int
 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	GIF_RLOCK_TRACKER;
 	struct ifreq *ifr = (struct ifreq*)data;
-	struct sockaddr *dst, *src;
 	struct gif_softc *sc;
-#ifdef INET
-	struct sockaddr_in *sin = NULL;
-#endif
-#ifdef INET6
-	struct sockaddr_in6 *sin6 = NULL;
-#endif
 	u_int options;
 	int error;
 
@@ -715,176 +632,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	}
 	error = 0;
 	switch (cmd) {
-	case SIOCSIFPHYADDR:
-#ifdef INET6
-	case SIOCSIFPHYADDR_IN6:
-#endif
-		error = EINVAL;
-		switch (cmd) {
-#ifdef INET
-		case SIOCSIFPHYADDR:
-			src = (struct sockaddr *)
-				&(((struct in_aliasreq *)data)->ifra_addr);
-			dst = (struct sockaddr *)
-				&(((struct in_aliasreq *)data)->ifra_dstaddr);
+	case SIOCDIFPHYADDR:
+		if (sc->gif_family == 0)
 			break;
-#endif
-#ifdef INET6
-		case SIOCSIFPHYADDR_IN6:
-			src = (struct sockaddr *)
-				&(((struct in6_aliasreq *)data)->ifra_addr);
-			dst = (struct sockaddr *)
-				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
-			break;
-#endif
-		default:
-			goto bad;
-		}
-		/* sa_family must be equal */
-		if (src->sa_family != dst->sa_family ||
-		    src->sa_len != dst->sa_len)
-			goto bad;
-
-		/* validate sa_len */
-		/* check sa_family looks sane for the cmd */
-		switch (src->sa_family) {
-#ifdef INET
-		case AF_INET:
-			if (src->sa_len != sizeof(struct sockaddr_in))
-				goto bad;
-			if (cmd != SIOCSIFPHYADDR) {
-				error = EAFNOSUPPORT;
-				goto bad;
-			}
-			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
-			    satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
-				error = EADDRNOTAVAIL;
-				goto bad;
-			}
-			break;
-#endif
-#ifdef INET6
-		case AF_INET6:
-			if (src->sa_len != sizeof(struct sockaddr_in6))
-				goto bad;
-			if (cmd != SIOCSIFPHYADDR_IN6) {
-				error = EAFNOSUPPORT;
-				goto bad;
-			}
-			error = EADDRNOTAVAIL;
-			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
-			    ||
-			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
-				goto bad;
-			/*
-			 * Check validity of the scope zone ID of the
-			 * addresses, and convert it into the kernel
-			 * internal form if necessary.
-			 */
-			error = sa6_embedscope(satosin6(src), 0);
-			if (error != 0)
-				goto bad;
-			error = sa6_embedscope(satosin6(dst), 0);
-			if (error != 0)
-				goto bad;
-			break;
-#endif
-		default:
-			error = EAFNOSUPPORT;
-			goto bad;
-		}
-		error = gif_set_tunnel(ifp, src, dst);
+		gif_delete_tunnel(sc);
 		break;
-	case SIOCDIFPHYADDR:
-		gif_delete_tunnel(ifp);
-		break;
+#ifdef INET
+	case SIOCSIFPHYADDR:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
+		error = in_gif_ioctl(sc, cmd, data);
+		break;
+#endif
 #ifdef INET6
+	case SIOCSIFPHYADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
-#endif
-		if (sc->gif_family == 0) {
-			error = EADDRNOTAVAIL;
-			break;
-		}
-		GIF_RLOCK(sc);
-		switch (cmd) {
-#ifdef INET
-		case SIOCGIFPSRCADDR:
-		case SIOCGIFPDSTADDR:
-			if (sc->gif_family != AF_INET) {
-				error = EADDRNOTAVAIL;
-				break;
-			}
-			sin = (struct sockaddr_in *)&ifr->ifr_addr;
-			memset(sin, 0, sizeof(*sin));
-			sin->sin_family = AF_INET;
-			sin->sin_len = sizeof(*sin);
-			break;
-#endif
-#ifdef INET6
-		case SIOCGIFPSRCADDR_IN6:
-		case SIOCGIFPDSTADDR_IN6:
-			if (sc->gif_family != AF_INET6) {
-				error = EADDRNOTAVAIL;
-				break;
-			}
-			sin6 = (struct sockaddr_in6 *)
-				&(((struct in6_ifreq *)data)->ifr_addr);
-			memset(sin6, 0, sizeof(*sin6));
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_len = sizeof(*sin6);
-			break;
-#endif
-		default:
-			error = EAFNOSUPPORT;
-		}
-		if (error == 0) {
-			switch (cmd) {
-#ifdef INET
-			case SIOCGIFPSRCADDR:
-				sin->sin_addr = sc->gif_iphdr->ip_src;
-				break;
-			case SIOCGIFPDSTADDR:
-				sin->sin_addr = sc->gif_iphdr->ip_dst;
-				break;
-#endif
-#ifdef INET6
-			case SIOCGIFPSRCADDR_IN6:
-				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
-				break;
-			case SIOCGIFPDSTADDR_IN6:
-				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
-				break;
-#endif
-			}
-		}
-		GIF_RUNLOCK(sc);
-		if (error != 0)
-			break;
-		switch (cmd) {
-#ifdef INET
-		case SIOCGIFPSRCADDR:
-		case SIOCGIFPDSTADDR:
-			error = prison_if(curthread->td_ucred,
-			    (struct sockaddr *)sin);
-			if (error != 0)
-				memset(sin, 0, sizeof(*sin));
-			break;
-#endif
-#ifdef INET6
-		case SIOCGIFPSRCADDR_IN6:
-		case SIOCGIFPDSTADDR_IN6:
-			error = prison_if(curthread->td_ucred,
-			    (struct sockaddr *)sin6);
-			if (error == 0)
-				error = sa6_recoverscope(sin6);
-			if (error != 0)
-				memset(sin6, 0, sizeof(*sin6));
-#endif
-		}
+		error = in6_gif_ioctl(sc, cmd, data);
 		break;
+#endif
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->gif_fibnum;
 		break;
@@ -908,171 +674,63 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		    sizeof(options));
 		if (error)
 			break;
-		if (options & ~GIF_OPTMASK)
+		if (options & ~GIF_OPTMASK) {
 			error = EINVAL;
-		else
-			sc->gif_options = options;
-		break;
-	default:
-		error = EINVAL;
-		break;
-	}
-bad:
-	sx_xunlock(&gif_ioctl_sx);
-	return (error);
-}
-
-static void
-gif_detach(struct gif_softc *sc, int family)
-{
-
-	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
-	if (sc->gif_ecookie != NULL) {
-		switch (family) {
-#ifdef INET
-		case AF_INET:
-			ip_encap_detach(sc->gif_ecookie);
 			break;
-#endif
-#ifdef INET6
-		case AF_INET6:
-			ip6_encap_detach(sc->gif_ecookie);
-			break;
-#endif
 		}
-	}
-	sc->gif_ecookie = NULL;
-}
-
-static int
-gif_attach(struct gif_softc *sc, int af)
-{
-
-	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
-	switch (af) {
+		if (sc->gif_options != options) {
+			switch (sc->gif_family) {
 #ifdef INET
-	case AF_INET:
-		return (in_gif_attach(sc));
+			case AF_INET:
+				error = in_gif_setopts(sc, options);
+				break;
 #endif
 #ifdef INET6
-	case AF_INET6:
-		return (in6_gif_attach(sc));
+			case AF_INET6:
+				error = in6_gif_setopts(sc, options);
+				break;
 #endif
-	}
-	return (EAFNOSUPPORT);
-}
-
-static int
-gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
-{
-	struct gif_softc *sc = ifp->if_softc;
-	struct gif_softc *tsc;
-#ifdef INET
-	struct ip *ip;
-#endif
-#ifdef INET6
-	struct ip6_hdr *ip6;
-#endif
-	void *hdr;
-	int error = 0;
-
-	if (sc == NULL)
-		return (ENXIO);
-	/* Disallow parallel tunnels unless instructed otherwise. */
-	if (V_parallel_tunnels == 0) {
-		GIF_LIST_LOCK();
-		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
-			if (tsc == sc || tsc->gif_family != src->sa_family)
-				continue;
-#ifdef INET
-			if (tsc->gif_family == AF_INET &&
-			    tsc->gif_iphdr->ip_src.s_addr ==
-			    satosin(src)->sin_addr.s_addr &&
-			    tsc->gif_iphdr->ip_dst.s_addr ==
-			    satosin(dst)->sin_addr.s_addr) {
-				error = EADDRNOTAVAIL;
-				GIF_LIST_UNLOCK();
-				goto bad;
+			default:
+				/* No need to invoke AF-handler */
+				sc->gif_options = options;
 			}
-#endif
-#ifdef INET6
-			if (tsc->gif_family == AF_INET6 &&
-			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
-			    &satosin6(src)->sin6_addr) &&
-			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
-			    &satosin6(dst)->sin6_addr)) {
-				error = EADDRNOTAVAIL;
-				GIF_LIST_UNLOCK();
-				goto bad;
-			}
-#endif
 		}
-		GIF_LIST_UNLOCK();
+		break;
+	default:
+		error = EINVAL;
+		break;
 	}
-	switch (src->sa_family) {
+	if (error == 0 && sc->gif_family != 0) {
+		if (
 #ifdef INET
-	case AF_INET:
-		hdr = ip = malloc(sizeof(struct ip), M_GIF,
-		    M_WAITOK | M_ZERO);
-		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
-		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
-		break;
+		    cmd == SIOCSIFPHYADDR ||
 #endif
 #ifdef INET6
-	case AF_INET6:
-		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
-		    M_WAITOK | M_ZERO);
-		ip6->ip6_src = satosin6(src)->sin6_addr;
-		ip6->ip6_dst = satosin6(dst)->sin6_addr;
-		ip6->ip6_vfc = IPV6_VERSION;
-		break;
+		    cmd == SIOCSIFPHYADDR_IN6 ||
 #endif
-	default:
-		return (EAFNOSUPPORT);
+		    0) {
+			ifp->if_drv_flags |= IFF_DRV_RUNNING;
+			if_link_state_change(ifp, LINK_STATE_UP);
+		}
 	}
-
-	if (sc->gif_family != src->sa_family)
-		gif_detach(sc, sc->gif_family);
-	if (sc->gif_family == 0 ||
-	    sc->gif_family != src->sa_family)
-		error = gif_attach(sc, src->sa_family);
-
-	GIF_WLOCK(sc);
-	if (sc->gif_family != 0)
-		free(sc->gif_hdr, M_GIF);
-	sc->gif_family = src->sa_family;
-	sc->gif_hdr = hdr;
-	GIF_WUNLOCK(sc);
-#if defined(INET) || defined(INET6)
 bad:
-#endif
-	if (error == 0 && sc->gif_family != 0) {
-		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		if_link_state_change(ifp, LINK_STATE_UP);
-	} else {
-		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-		if_link_state_change(ifp, LINK_STATE_DOWN);
-	}
+	sx_xunlock(&gif_ioctl_sx);
 	return (error);
 }
 
 static void
-gif_delete_tunnel(struct ifnet *ifp)
+gif_delete_tunnel(struct gif_softc *sc)
 {
-	struct gif_softc *sc = ifp->if_softc;
-	int family;
 
-	if (sc == NULL)
-		return;
-
-	GIF_WLOCK(sc);
-	family = sc->gif_family;
-	sc->gif_family = 0;
-	GIF_WUNLOCK(sc);
-	if (family != 0) {
-		gif_detach(sc, family);
+	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+	if (sc->gif_family != 0) {
+		CK_LIST_REMOVE(sc, chain);
+		/* Wait until it become safe to free gif_hdr */
+		GIF_WAIT();
 		free(sc->gif_hdr, M_GIF);
 	}
-	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	if_link_state_change(ifp, LINK_STATE_DOWN);
+	sc->gif_family = 0;
+	GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+	if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN);
 }
+

Modified: head/sys/net/if_gif.h
==============================================================================
--- head/sys/net/if_gif.h	Tue Jun  5 20:54:29 2018	(r334672)
+++ head/sys/net/if_gif.h	Tue Jun  5 21:24:59 2018	(r334673)
@@ -5,6 +5,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,14 +37,9 @@
 #define _NET_IF_GIF_H_
 
 #ifdef _KERNEL
-#include "opt_inet.h"
-#include "opt_inet6.h"
 
-#include <netinet/in.h>
-
 struct ip;
 struct ip6_hdr;
-struct encaptab;
 
 extern	void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
 		int af);
@@ -55,8 +51,6 @@ extern	void (*ng_gif_detach_p)(struct ifnet *ifp);
 
 struct gif_softc {
 	struct ifnet		*gif_ifp;
-	struct rmlock		gif_lock;
-	const struct encaptab	*gif_ecookie;
 	int			gif_family;
 	int			gif_flags;
 	u_int			gif_fibnum;
@@ -65,28 +59,22 @@ struct gif_softc {
 	union {
 		void		*hdr;
 		struct ip	*iphdr;
-#ifdef INET6
 		struct ip6_hdr	*ip6hdr;
-#endif
 	} gif_uhdr;
-	LIST_ENTRY(gif_softc)	gif_list; /* all gif's are linked */
+
+	CK_LIST_ENTRY(gif_softc) chain;
 };
-#define	GIF2IFP(sc)	((sc)->gif_ifp)
-#define	GIF_LOCK_INIT(sc)	rm_init(&(sc)->gif_lock, "gif softc")
-#define	GIF_LOCK_DESTROY(sc)	rm_destroy(&(sc)->gif_lock)
-#define	GIF_RLOCK_TRACKER	struct rm_priotracker gif_tracker
-#define	GIF_RLOCK(sc)		rm_rlock(&(sc)->gif_lock, &gif_tracker)
-#define	GIF_RUNLOCK(sc)		rm_runlock(&(sc)->gif_lock, &gif_tracker)
-#define	GIF_RLOCK_ASSERT(sc)	rm_assert(&(sc)->gif_lock, RA_RLOCKED)
-#define	GIF_WLOCK(sc)		rm_wlock(&(sc)->gif_lock)
-#define	GIF_WUNLOCK(sc)		rm_wunlock(&(sc)->gif_lock)
-#define	GIF_WLOCK_ASSERT(sc)	rm_assert(&(sc)->gif_lock, RA_WLOCKED)
+CK_LIST_HEAD(gif_list, gif_softc);
+MALLOC_DECLARE(M_GIF);
 
+#ifndef GIF_HASH_SIZE
+#define	GIF_HASH_SIZE	(1 << 4)
+#endif
+
+#define	GIF2IFP(sc)	((sc)->gif_ifp)
 #define	gif_iphdr	gif_uhdr.iphdr
 #define	gif_hdr		gif_uhdr.hdr
-#ifdef INET6
 #define	gif_ip6hdr	gif_uhdr.ip6hdr
-#endif
 
 #define GIF_MTU		(1280)	/* Default MTU */
 #define	GIF_MTU_MIN	(1280)	/* Minimum MTU */
@@ -108,21 +96,29 @@ struct etherip_header {
 /* mbuf adjust factor to force 32-bit alignment of IP header */
 #define	ETHERIP_ALIGN		2
 
+#define	GIF_RLOCK()	epoch_enter_preempt(net_epoch_preempt)
+#define	GIF_RUNLOCK()	epoch_exit_preempt(net_epoch_preempt)
+#define	GIF_WAIT()	epoch_wait_preempt(net_epoch_preempt)
+
 /* Prototypes */
+struct gif_list *gif_hashinit(void);
+void gif_hashdestroy(struct gif_list *);
+
 void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
 int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	       struct route *);
-int gif_encapcheck(const struct mbuf *, int, int, void *);
-#ifdef INET
+
+void in_gif_init(void);
+void in_gif_uninit(void);
 int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
-int in_gif_encapcheck(const struct mbuf *, int, int, void *);
-int in_gif_attach(struct gif_softc *);
-#endif
-#ifdef INET6
+int in_gif_ioctl(struct gif_softc *, u_long, caddr_t);
+int in_gif_setopts(struct gif_softc *, u_int);
+
+void in6_gif_init(void);
+void in6_gif_uninit(void);
 int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
-int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
-int in6_gif_attach(struct gif_softc *);
-#endif
+int in6_gif_ioctl(struct gif_softc *, u_long, caddr_t);
+int in6_gif_setopts(struct gif_softc *, u_int);
 #endif /* _KERNEL */
 
 #define GIFGOPTS	_IOWR('i', 150, struct ifreq)

Modified: head/sys/netinet/in_gif.c
==============================================================================
--- head/sys/netinet/in_gif.c	Tue Jun  5 20:54:29 2018	(r334672)
+++ head/sys/netinet/in_gif.c	Tue Jun  5 21:24:59 2018	(r334673)
@@ -2,6 +2,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae at FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,9 +39,8 @@ __FBSDID("$FreeBSD$");
 #include "opt_inet6.h"
 
 #include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
 #include <sys/systm.h>
+#include <sys/jail.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 
+#include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
@@ -75,15 +76,155 @@ static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets");
 
+/*
+ * We keep interfaces in a hash table using src+dst as key.
+ * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
+ */
+static VNET_DEFINE(struct gif_list *, ipv4_hashtbl) = NULL;
+static VNET_DEFINE(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER();
+#define	V_ipv4_hashtbl		VNET(ipv4_hashtbl)
+#define	V_ipv4_list		VNET(ipv4_list)
+
+#define	GIF_HASH(src, dst)	(V_ipv4_hashtbl[\
+    in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
+#define	GIF_HASH_SC(sc)		GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\
+    (sc)->gif_iphdr->ip_dst.s_addr)
+static uint32_t
+in_gif_hashval(in_addr_t src, in_addr_t dst)
+{
+	uint32_t ret;
+
+	ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+	return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static int
+in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst)
+{
+	struct gif_softc *tmp;
+
+	if (sc->gif_family == AF_INET &&
+	    sc->gif_iphdr->ip_src.s_addr == src &&
+	    sc->gif_iphdr->ip_dst.s_addr == dst)
+		return (EEXIST);
+
+	CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
+		if (tmp == sc)
+			continue;
+		if (tmp->gif_iphdr->ip_src.s_addr == src &&
+		    tmp->gif_iphdr->ip_dst.s_addr == dst)
+			return (EADDRNOTAVAIL);
+	}
+	return (0);
+}
+
+static void
+in_gif_attach(struct gif_softc *sc)
+{
+
+	if (sc->gif_options & GIF_IGNORE_SOURCE)
+		CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain);
+	else
+		CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
+}
+
 int
+in_gif_setopts(struct gif_softc *sc, u_int options)
+{
+
+	/* NOTE: we are protected with gif_ioctl_sx lock */
+	MPASS(sc->gif_family == AF_INET);
+	MPASS(sc->gif_options != options);
+
+	if ((options & GIF_IGNORE_SOURCE) !=
+	    (sc->gif_options & GIF_IGNORE_SOURCE)) {
+		CK_LIST_REMOVE(sc, chain);
+		sc->gif_options = options;
+		in_gif_attach(sc);
+	}
+	return (0);
+}
+
+int
+in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
+{
+	struct ifreq *ifr = (struct ifreq *)data;
+	struct sockaddr_in *dst, *src;
+	struct ip *ip;
+	int error;
+
+	/* NOTE: we are protected with gif_ioctl_sx lock */
+	error = EINVAL;
+	switch (cmd) {
+	case SIOCSIFPHYADDR:
+		src = &((struct in_aliasreq *)data)->ifra_addr;
+		dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
+
+		/* sanity checks */
+		if (src->sin_family != dst->sin_family ||
+		    src->sin_family != AF_INET ||
+		    src->sin_len != dst->sin_len ||
+		    src->sin_len != sizeof(*src))
+			break;
+		if (src->sin_addr.s_addr == INADDR_ANY ||
+		    dst->sin_addr.s_addr == INADDR_ANY) {
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		if (V_ipv4_hashtbl == NULL)
+			V_ipv4_hashtbl = gif_hashinit();
+		error = in_gif_checkdup(sc, src->sin_addr.s_addr,
+		    dst->sin_addr.s_addr);
+		if (error == EADDRNOTAVAIL)
+			break;
+		if (error == EEXIST) {
+			/* Addresses are the same. Just return. */
+			error = 0;
+			break;
+		}
+		ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO);
+		ip->ip_src.s_addr = src->sin_addr.s_addr;
+		ip->ip_dst.s_addr = dst->sin_addr.s_addr;
+		if (sc->gif_family != 0) {
+			/* Detach existing tunnel first */
+			CK_LIST_REMOVE(sc, chain);
+			GIF_WAIT();
+			free(sc->gif_hdr, M_GIF);
+			/* XXX: should we notify about link state change? */
+		}
+		sc->gif_family = AF_INET;
+		sc->gif_iphdr = ip;
+		in_gif_attach(sc);
+		break;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list