svn commit: r194062 - in head/sys: conf net netinet netipsec sys

VANHULLEBUS Yvan vanhu at FreeBSD.org
Fri Jun 12 15:44:37 UTC 2009


Author: vanhu
Date: Fri Jun 12 15:44:35 2009
New Revision: 194062
URL: http://svn.freebsd.org/changeset/base/194062

Log:
  Added support for NAT-Traversal (RFC 3948) in IPsec stack.
  
  Thanks to (no special order) Emmanuel Dreyfus (manu at netbsd.org), Larry
  Baird (lab at gta.com), gnn, bz, and other FreeBSD devs, Julien Vanherzeele
  (julien.vanherzeele at netasq.com, for years of bug reporting), the PFSense
  team, and all people who used / tried the NAT-T patch for years and
  reported bugs, patches, etc...
  
  X-MFC: never
  
  Reviewed by:	bz
  Approved by:	gnn(mentor)
  Obtained from:	NETASQ

Modified:
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/net/pfkeyv2.h
  head/sys/netinet/in_proto.c
  head/sys/netinet/udp.h
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet/udp_var.h
  head/sys/netipsec/ipsec_input.c
  head/sys/netipsec/ipsec_output.c
  head/sys/netipsec/key.c
  head/sys/netipsec/key.h
  head/sys/netipsec/keydb.h
  head/sys/sys/mbuf.h

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/conf/NOTES	Fri Jun 12 15:44:35 2009	(r194062)
@@ -534,6 +534,11 @@ options 	IPSEC			#IP security (requires 
 # using ipfw(8)'s 'ipsec' keyword, when this option is enabled.
 #
 #options 	IPSEC_FILTERTUNNEL	#filter ipsec packets from a tunnel
+#
+# Set IPSEC_NAT_T to enable NAT-Traversal support.  This enables
+# optional UDP encapsulation of ESP packets.
+#
+options		IPSEC_NAT_T		#NAT-T support, UDP encap of ESP
 
 options 	IPX			#IPX/SPX communications protocols
 

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/conf/options	Fri Jun 12 15:44:35 2009	(r194062)
@@ -401,6 +401,7 @@ IPFIREWALL_VERBOSE_LIMIT	opt_ipfw.h
 IPSEC			opt_ipsec.h
 IPSEC_DEBUG		opt_ipsec.h
 IPSEC_FILTERTUNNEL	opt_ipsec.h
+IPSEC_NAT_T		opt_ipsec.h
 IPSTEALTH
 IPX
 KRPC

Modified: head/sys/net/pfkeyv2.h
==============================================================================
--- head/sys/net/pfkeyv2.h	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/net/pfkeyv2.h	Fri Jun 12 15:44:35 2009	(r194062)
@@ -255,6 +255,34 @@ struct sadb_x_ipsecrequest {
    */
 };
 
+/* NAT-Traversal type, see RFC 3948 (and drafts). */
+/* sizeof(struct sadb_x_nat_t_type) == 8 */
+struct sadb_x_nat_t_type {
+  u_int16_t sadb_x_nat_t_type_len;
+  u_int16_t sadb_x_nat_t_type_exttype;
+  u_int8_t sadb_x_nat_t_type_type;
+  u_int8_t sadb_x_nat_t_type_reserved[3];
+};
+
+/* NAT-Traversal source or destination port. */
+/* sizeof(struct sadb_x_nat_t_port) == 8 */
+struct sadb_x_nat_t_port { 
+  u_int16_t sadb_x_nat_t_port_len;
+  u_int16_t sadb_x_nat_t_port_exttype;
+  u_int16_t sadb_x_nat_t_port_port;
+  u_int16_t sadb_x_nat_t_port_reserved;
+};
+
+/* ESP fragmentation size. */
+/* sizeof(struct sadb_x_nat_t_frag) == 8 */
+struct sadb_x_nat_t_frag {
+  u_int16_t sadb_x_nat_t_frag_len;
+  u_int16_t sadb_x_nat_t_frag_exttype;
+  u_int16_t sadb_x_nat_t_frag_fraglen;
+  u_int16_t sadb_x_nat_t_frag_reserved;
+};
+
+
 #define SADB_EXT_RESERVED             0
 #define SADB_EXT_SA                   1
 #define SADB_EXT_LIFETIME_CURRENT     2
@@ -275,7 +303,14 @@ struct sadb_x_ipsecrequest {
 #define SADB_X_EXT_KMPRIVATE          17
 #define SADB_X_EXT_POLICY             18
 #define SADB_X_EXT_SA2                19
-#define SADB_EXT_MAX                  19
+#define SADB_X_EXT_NAT_T_TYPE         20
+#define SADB_X_EXT_NAT_T_SPORT        21
+#define SADB_X_EXT_NAT_T_DPORT        22
+#define SADB_X_EXT_NAT_T_OA           23	/* Deprecated. */
+#define SADB_X_EXT_NAT_T_OAI          23	/* Peer's NAT_OA for src of SA. */
+#define SADB_X_EXT_NAT_T_OAR          24	/* Peer's NAT_OA for dst of SA. */
+#define SADB_X_EXT_NAT_T_FRAG         25	/* Manual MTU override. */
+#define SADB_EXT_MAX                  25
 
 #define SADB_SATYPE_UNSPEC	0
 #define SADB_SATYPE_AH		2

Modified: head/sys/netinet/in_proto.c
==============================================================================
--- head/sys/netinet/in_proto.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/in_proto.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -124,7 +124,7 @@ struct protosw inetsw[] = {
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		udp_input,
 	.pr_ctlinput =		udp_ctlinput,
-	.pr_ctloutput =		ip_ctloutput,
+	.pr_ctloutput =		udp_ctloutput,
 	.pr_init =		udp_init,
 #ifdef VIMAGE
 	.pr_destroy =		udp_destroy,

Modified: head/sys/netinet/udp.h
==============================================================================
--- head/sys/netinet/udp.h	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/udp.h	Fri Jun 12 15:44:35 2009	(r194062)
@@ -45,4 +45,23 @@ struct udphdr {
 	u_short	uh_sum;			/* udp checksum */
 };
 
+/* 
+ * User-settable options (used with setsockopt).
+ */
+#define	UDP_ENCAP			0x01
+
+
+/*
+ * UDP Encapsulation of IPsec Packets options.
+ */
+/* Encapsulation types. */
+#define	UDP_ENCAP_ESPINUDP_NON_IKE 	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
+#define	UDP_ENCAP_ESPINUDP		2 /* draft-ietf-ipsec-udp-encaps-02+ */
+
+/* Default ESP in UDP encapsulation port. */
+#define	UDP_ENCAP_ESPINUDP_PORT		500
+
+/* Maximum UDP fragment size for ESP over UDP. */
+#define	UDP_ENCAP_ESPINUDP_MAXFRAGLEN	552
+
 #endif

Modified: head/sys/netinet/udp_usrreq.c
==============================================================================
--- head/sys/netinet/udp_usrreq.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/udp_usrreq.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
+#include <netipsec/esp.h>
 #endif
 
 #include <machine/in_cksum.h>
@@ -151,6 +152,14 @@ SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_i
 static void	udp_detach(struct socket *so);
 static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
 		    struct mbuf *, struct thread *);
+#ifdef IPSEC
+#ifdef IPSEC_NAT_T
+#define	UF_ESPINUDP_ALL	(UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
+#ifdef INET
+static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
+#endif
+#endif /* IPSEC_NAT_T */
+#endif /* IPSEC */
 
 static void
 udp_zone_change(void *tag)
@@ -252,6 +261,13 @@ udp_append(struct inpcb *inp, struct ip 
 #ifdef INET6
 	struct sockaddr_in6 udp_in6;
 #endif
+#ifdef IPSEC
+#ifdef IPSEC_NAT_T
+#ifdef INET
+	struct udpcb *up;
+#endif
+#endif
+#endif
 
 	INP_RLOCK_ASSERT(inp);
 
@@ -263,6 +279,17 @@ udp_append(struct inpcb *inp, struct ip 
 		V_ipsec4stat.in_polvio++;
 		return;
 	}
+#ifdef IPSEC_NAT_T
+#ifdef INET
+	up = intoudpcb(inp);
+	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
+	if (up->u_flags & UF_ESPINUDP_ALL) {	/* IPSec UDP encaps. */
+		n = udp4_espdecap(inp, n, off);
+		if (n == NULL)				/* Consumed. */
+			return;
+	}
+#endif /* INET */
+#endif /* IPSEC_NAT_T */
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
@@ -825,6 +852,99 @@ SYSCTL_PROC(_net_inet_udp, OID_AUTO, get
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
 
+int
+udp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	int error = 0, optval;
+	struct inpcb *inp;
+#ifdef IPSEC_NAT_T
+	struct udpcb *up;
+#endif
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+	INP_WLOCK(inp);
+	if (sopt->sopt_level != IPPROTO_UDP) {
+#ifdef INET6
+		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
+			INP_WUNLOCK(inp);
+			error = ip6_ctloutput(so, sopt);
+		} else {
+#endif
+			INP_WUNLOCK(inp);
+			error = ip_ctloutput(so, sopt);
+#ifdef INET6
+		}
+#endif
+		return (error);
+	}
+
+	switch (sopt->sopt_dir) {
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case UDP_ENCAP:
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+			inp = sotoinpcb(so);
+			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+			INP_WLOCK(inp);
+#ifdef IPSEC_NAT_T
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+#endif
+			switch (optval) {
+			case 0:
+				/* Clear all UDP encap. */
+#ifdef IPSEC_NAT_T
+				up->u_flags &= ~UF_ESPINUDP_ALL;
+#endif
+				break;
+#ifdef IPSEC_NAT_T
+			case UDP_ENCAP_ESPINUDP:
+			case UDP_ENCAP_ESPINUDP_NON_IKE:
+				up->u_flags &= ~UF_ESPINUDP_ALL;
+				if (optval == UDP_ENCAP_ESPINUDP)
+					up->u_flags |= UF_ESPINUDP;
+				else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
+					up->u_flags |= UF_ESPINUDP_NON_IKE;
+				break;
+#endif
+			default:
+				error = EINVAL;
+				break;
+			}
+			INP_WUNLOCK(inp);
+			break;
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+#ifdef IPSEC_NAT_T
+		case UDP_ENCAP:
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+			optval = up->u_flags & UF_ESPINUDP_ALL;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+#endif
+		default:
+			INP_WUNLOCK(inp);
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}	
+	return (error);
+}
+
 static int
 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
@@ -1136,6 +1256,144 @@ release:
 	return (error);
 }
 
+
+#if defined(IPSEC) && defined(IPSEC_NAT_T)
+#ifdef INET
+/*
+ * Potentially decap ESP in UDP frame.  Check for an ESP header
+ * and optional marker; if present, strip the UDP header and
+ * push the result through IPSec.
+ *
+ * Returns mbuf to be processed (potentially re-allocated) or
+ * NULL if consumed and/or processed.
+ */
+static struct mbuf *
+udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
+{
+	INIT_VNET_IPSEC(curvnet);
+	size_t minlen, payload, skip, iphlen;
+	caddr_t data;
+	struct udpcb *up;
+	struct m_tag *tag;
+	struct udphdr *udphdr;
+	struct ip *ip;
+
+	INP_RLOCK_ASSERT(inp);
+
+	/* 
+	 * Pull up data so the longest case is contiguous:
+	 *    IP/UDP hdr + non ESP marker + ESP hdr.
+	 */
+	minlen = off + sizeof(uint64_t) + sizeof(struct esp);
+	if (minlen > m->m_pkthdr.len)
+		minlen = m->m_pkthdr.len;
+	if ((m = m_pullup(m, minlen)) == NULL) {
+		V_ipsec4stat.in_inval++;
+		return (NULL);		/* Bypass caller processing. */
+	}
+	data = mtod(m, caddr_t);	/* Points to ip header. */
+	payload = m->m_len - off;	/* Size of payload. */
+
+	if (payload == 1 && data[off] == '\xff')
+		return (m);		/* NB: keepalive packet, no decap. */
+
+	up = intoudpcb(inp);
+	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
+	KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
+	    ("u_flags 0x%x", up->u_flags));
+
+	/* 
+	 * Check that the payload is large enough to hold an
+	 * ESP header and compute the amount of data to remove.
+	 *
+	 * NB: the caller has already done a pullup for us.
+	 * XXX can we assume alignment and eliminate bcopys?
+	 */
+	if (up->u_flags & UF_ESPINUDP_NON_IKE) {
+		/*
+		 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
+		 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
+		 * possible AH mode non-IKE marker+non-ESP marker
+		 * from draft-ietf-ipsec-udp-encaps-00.txt.
+		 */
+		uint64_t marker;
+
+		if (payload <= sizeof(uint64_t) + sizeof(struct esp))
+			return (m);	/* NB: no decap. */
+		bcopy(data + off, &marker, sizeof(uint64_t));
+		if (marker != 0)	/* Non-IKE marker. */
+			return (m);	/* NB: no decap. */
+		skip = sizeof(uint64_t) + sizeof(struct udphdr);
+	} else {
+		uint32_t spi;
+
+		if (payload <= sizeof(struct esp)) {
+			V_ipsec4stat.in_inval++;
+			m_freem(m);
+			return (NULL);	/* Discard. */
+		}
+		bcopy(data + off, &spi, sizeof(uint32_t));
+		if (spi == 0)		/* Non-ESP marker. */
+			return (m);	/* NB: no decap. */
+		skip = sizeof(struct udphdr);
+	}
+
+	/*
+	 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
+	 * the UDP ports. This is required if we want to select
+	 * the right SPD for multiple hosts behind same NAT.
+	 *
+	 * NB: ports are maintained in network byte order everywhere
+	 *     in the NAT-T code.
+	 */
+	tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
+		2 * sizeof(uint16_t), M_NOWAIT);
+	if (tag == NULL) {
+		V_ipsec4stat.in_nomem++;
+		m_freem(m);
+		return (NULL);		/* Discard. */
+	}
+	iphlen = off - sizeof(struct udphdr);
+	udphdr = (struct udphdr *)(data + iphlen);
+	((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
+	((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
+	m_tag_prepend(m, tag);
+
+	/*
+	 * Remove the UDP header (and possibly the non ESP marker)
+	 * IP header length is iphlen
+	 * Before:
+	 *   <--- off --->
+	 *   +----+------+-----+
+	 *   | IP |  UDP | ESP |
+	 *   +----+------+-----+
+	 *        <-skip->
+	 * After:
+	 *          +----+-----+
+	 *          | IP | ESP |
+	 *          +----+-----+
+	 *   <-skip->
+	 */
+	ovbcopy(data, data + skip, iphlen);
+	m_adj(m, skip);
+
+	ip = mtod(m, struct ip *);
+	ip->ip_len -= skip;
+	ip->ip_p = IPPROTO_ESP;
+
+	/*
+	 * We cannot yet update the cksums so clear any
+	 * h/w cksum flags as they are no longer valid.
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
+		m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+
+	(void) ipsec4_common_input(m, iphlen, ip->ip_p);
+	return (NULL);			/* NB: consumed, bypass processing. */
+}
+#endif /* INET */
+#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
+
 static void
 udp_abort(struct socket *so)
 {

Modified: head/sys/netinet/udp_var.h
==============================================================================
--- head/sys/netinet/udp_var.h	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netinet/udp_var.h	Fri Jun 12 15:44:35 2009	(r194062)
@@ -64,6 +64,12 @@ struct udpcb {
 #define	intoudpcb(ip)	((struct udpcb *)(ip)->inp_ppcb)
 #define	sotoudpcb(so)	(intoudpcb(sotoinpcb(so)))
 
+				/* IPsec: ESP in UDP tunneling: */
+#define	UF_ESPINUDP_NON_IKE	0x00000001	/* w/ non-IKE marker .. */
+	/* .. per draft-ietf-ipsec-nat-t-ike-0[01],
+	 * and draft-ietf-ipsec-udp-encaps-(00/)01.txt */
+#define	UF_ESPINUDP		0x00000002	/* w/ non-ESP marker. */
+
 struct udpstat {
 				/* input statistics: */
 	u_long	udps_ipackets;		/* total input packets */
@@ -127,6 +133,7 @@ int		 udp_newudpcb(struct inpcb *);
 void		 udp_discardcb(struct udpcb *);
 
 void		 udp_ctlinput(int, struct sockaddr *, void *);
+int	 	 udp_ctloutput(struct socket *, struct sockopt *);
 void		 udp_init(void);
 #ifdef VIMAGE
 void		 udp_destroy(void);

Modified: head/sys/netipsec/ipsec_input.c
==============================================================================
--- head/sys/netipsec/ipsec_input.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netipsec/ipsec_input.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -121,6 +121,9 @@ ipsec_common_input(struct mbuf *m, int s
 	struct secasvar *sav;
 	u_int32_t spi;
 	int error;
+#ifdef IPSEC_NAT_T
+	struct m_tag *tag;
+#endif
 
 	IPSEC_ISTAT(sproto, V_espstat.esps_input, V_ahstat.ahs_input,
 		V_ipcompstat.ipcomps_input);
@@ -175,6 +178,12 @@ ipsec_common_input(struct mbuf *m, int s
 		m_copydata(m, offsetof(struct ip, ip_dst),
 		    sizeof(struct in_addr),
 		    (caddr_t) &dst_address.sin.sin_addr);
+#ifdef IPSEC_NAT_T
+		/* Find the source port for NAT-T; see udp*_espdecap. */
+		tag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
+		if (tag != NULL)
+			dst_address.sin.sin_port = ((u_int16_t *)(tag + 1))[1];
+#endif /* IPSEC_NAT_T */
 		break;
 #endif /* INET */
 #ifdef INET6

Modified: head/sys/netipsec/ipsec_output.c
==============================================================================
--- head/sys/netipsec/ipsec_output.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netipsec/ipsec_output.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -84,6 +84,10 @@
 
 #include <machine/in_cksum.h>
 
+#ifdef IPSEC_NAT_T
+#include <netinet/udp.h>
+#endif
+
 #ifdef DEV_ENC
 #include <net/if_enc.h>
 #endif
@@ -180,6 +184,57 @@ ipsec_process_done(struct mbuf *m, struc
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_off = ntohs(ip->ip_off);
 
+#ifdef IPSEC_NAT_T
+		/*
+		 * If NAT-T is enabled, now that all IPsec processing is done
+		 * insert UDP encapsulation header after IP header.
+		 */
+		if (sav->natt_type) {
+#ifdef _IP_VHL
+			const int hlen = IP_VHL_HL(ip->ip_vhl);
+#else
+			const int hlen = (ip->ip_hl << 2);
+#endif
+			int size, off;
+			struct mbuf *mi;
+			struct udphdr *udp;
+
+			size = sizeof(struct udphdr);
+			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
+				/*
+				 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
+				 * draft-ietf-ipsec-udp-encaps-(00/)01.txt,
+				 * ignoring possible AH mode
+				 * non-IKE marker + non-ESP marker
+				 * from draft-ietf-ipsec-udp-encaps-00.txt.
+				 */
+				size += sizeof(u_int64_t);
+			}
+			mi = m_makespace(m, hlen, size, &off);
+			if (mi == NULL) {
+				DPRINTF(("%s: m_makespace for udphdr failed\n",
+				    __func__));
+				error = ENOBUFS;
+				goto bad;
+			}
+
+			udp = (struct udphdr *)(mtod(mi, caddr_t) + off);
+			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE)
+				udp->uh_sport = htons(UDP_ENCAP_ESPINUDP_PORT);
+			else
+				udp->uh_sport =
+					KEY_PORTFROMSADDR(&sav->sah->saidx.src);
+			udp->uh_dport = KEY_PORTFROMSADDR(&sav->sah->saidx.dst);
+			udp->uh_sum = 0;
+			udp->uh_ulen = htons(m->m_pkthdr.len - hlen);
+			ip->ip_len = m->m_pkthdr.len;
+			ip->ip_p = IPPROTO_UDP;
+
+			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE)
+				*(u_int64_t *)(udp + 1) = 0;
+		}
+#endif /* IPSEC_NAT_T */
+
 		return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL);
 #endif /* INET */
 #ifdef INET6

Modified: head/sys/netipsec/key.c
==============================================================================
--- head/sys/netipsec/key.c	Fri Jun 12 14:27:50 2009	(r194061)
+++ head/sys/netipsec/key.c	Fri Jun 12 15:44:35 2009	(r194062)
@@ -221,6 +221,12 @@ static const int minsize[] = {
 	0,				/* SADB_X_EXT_KMPRIVATE */
 	sizeof(struct sadb_x_policy),	/* SADB_X_EXT_POLICY */
 	sizeof(struct sadb_x_sa2),	/* SADB_X_SA2 */
+	sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */
+	sizeof(struct sadb_address),	/* SADB_X_EXT_NAT_T_OAI */
+	sizeof(struct sadb_address),	/* SADB_X_EXT_NAT_T_OAR */
+	sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */
 };
 static const int maxsize[] = {
 	sizeof(struct sadb_msg),	/* SADB_EXT_RESERVED */
@@ -243,6 +249,12 @@ static const int maxsize[] = {
 	0,				/* SADB_X_EXT_KMPRIVATE */
 	0,				/* SADB_X_EXT_POLICY */
 	sizeof(struct sadb_x_sa2),	/* SADB_X_SA2 */
+	sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */
+	sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */
+	0,				/* SADB_X_EXT_NAT_T_OAI */
+	0,				/* SADB_X_EXT_NAT_T_OAR */
+	sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */
 };
 
 #ifdef SYSCTL_DECL
@@ -425,6 +437,13 @@ static struct mbuf *key_setsadbmsg __P((
 static struct mbuf *key_setsadbsa __P((struct secasvar *));
 static struct mbuf *key_setsadbaddr __P((u_int16_t,
 	const struct sockaddr *, u_int8_t, u_int16_t));
+#ifdef IPSEC_NAT_T
+static struct mbuf *key_setsadbxport(u_int16_t, u_int16_t);
+static struct mbuf *key_setsadbxtype(u_int16_t);
+#endif
+static void key_porttosaddr(struct sockaddr *, u_int16_t);
+#define	KEY_PORTTOSADDR(saddr, port)				\
+	key_porttosaddr((struct sockaddr *)(saddr), (port))
 static struct mbuf *key_setsadbxsa2 __P((u_int8_t, u_int32_t, u_int32_t));
 static struct mbuf *key_setsadbxpolicy __P((u_int16_t, u_int8_t,
 	u_int32_t));
@@ -1067,12 +1086,21 @@ key_allocsa(
 	struct secasvar *sav;
 	u_int stateidx, arraysize, state;
 	const u_int *saorder_state_valid;
+	int chkport;
 
 	IPSEC_ASSERT(dst != NULL, ("null dst address"));
 
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP %s from %s:%u\n", __func__, where, tag));
 
+#ifdef IPSEC_NAT_T
+        chkport = (dst->sa.sa_family == AF_INET &&
+	    dst->sa.sa_len == sizeof(struct sockaddr_in) &&
+	    dst->sin.sin_port != 0);
+#else
+	chkport = 0;
+#endif
+
 	/*
 	 * searching SAD.
 	 * XXX: to be checked internal IP header somewhere.  Also when
@@ -1104,11 +1132,11 @@ key_allocsa(
 					continue;
 #if 0	/* don't check src */
 				/* check src address */
-				if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, 0) != 0)
+				if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, chkport) != 0)
 					continue;
 #endif
 				/* check dst address */
-				if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, 0) != 0)
+				if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, chkport) != 0)
 					continue;
 				sa_addref(sav);
 				goto done;
@@ -1798,6 +1826,11 @@ key_spdadd(so, m, mhp)
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 	xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
 
+	/* 
+	 * Note: do not parse SADB_X_EXT_NAT_T_* here:
+	 * we are processing traffic endpoints.
+	 */
+
 	/* make secindex */
 	/* XXX boundary check against sa_len */
 	KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir,
@@ -1931,6 +1964,11 @@ key_spdadd(so, m, mhp)
 	struct sadb_msg *newmsg;
 	int off;
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
+
 	/* create new sadb_msg to reply. */
 	if (lft) {
 		n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED,
@@ -2053,6 +2091,11 @@ key_spddelete(so, m, mhp)
 	dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST];
 	xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY];
 
+	/*
+	 * Note: do not parse SADB_X_EXT_NAT_T_* here:
+	 * we are processing traffic endpoints.
+	 */
+
 	/* make secindex */
 	/* XXX boundary check against sa_len */
 	KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir,
@@ -2089,6 +2132,11 @@ key_spddelete(so, m, mhp)
 	struct mbuf *n;
 	struct sadb_msg *newmsg;
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
+
 	/* create new sadb_msg to reply. */
 	n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED,
 	    SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
@@ -2435,6 +2483,10 @@ key_setdumpsp(struct secpolicy *sp, u_in
 		goto fail;
 	result = m;
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 	    &sp->spidx.src.sa, sp->spidx.prefs,
 	    sp->spidx.ul_proto);
@@ -2581,6 +2633,11 @@ key_spdexpire(sp)
 	lt->sadb_lifetime_usetime = sp->validtime;
 	m_cat(result, m);
 
+	/*
+	 * Note: do not send SADB_X_EXT_NAT_T_* here:
+	 * we are sending traffic endpoints.
+	 */
+
 	/* set sadb_address for source */
 	m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
 	    &sp->spidx.src.sa,
@@ -3034,6 +3091,9 @@ key_setsaval(sav, m, mhp)
 	sav->tdb_encalgxform = NULL;	/* encoding algorithm */
 	sav->tdb_authalgxform = NULL;	/* authentication algorithm */
 	sav->tdb_compalgxform = NULL;	/* compression algorithm */
+	/*  Initialize even if NAT-T not compiled in: */
+	sav->natt_type = 0;
+	sav->natt_esp_frag_len = 0;
 
 	/* SA */
 	if (mhp->ext[SADB_EXT_SA] != NULL) {
@@ -3343,6 +3403,12 @@ key_setdumpsa(struct secasvar *sav, u_in
 		SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH,
 		SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC,
 		SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY,
+#ifdef IPSEC_NAT_T
+		SADB_X_EXT_NAT_T_TYPE,
+		SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT,
+		SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR,
+		SADB_X_EXT_NAT_T_FRAG,
+#endif
 	};
 
 	m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt);
@@ -3427,6 +3493,36 @@ key_setdumpsa(struct secasvar *sav, u_in
 				goto fail;
 			break;
 
+#ifdef IPSEC_NAT_T
+		case SADB_X_EXT_NAT_T_TYPE:
+			m = key_setsadbxtype(sav->natt_type);
+			if (!m)
+				goto fail;
+			break;
+		
+		case SADB_X_EXT_NAT_T_DPORT:
+			m = key_setsadbxport(
+			    KEY_PORTFROMSADDR(&sav->sah->saidx.dst),
+			    SADB_X_EXT_NAT_T_DPORT);
+			if (!m)
+				goto fail;
+			break;
+
+		case SADB_X_EXT_NAT_T_SPORT:
+			m = key_setsadbxport(
+			    KEY_PORTFROMSADDR(&sav->sah->saidx.src),
+			    SADB_X_EXT_NAT_T_SPORT);
+			if (!m)
+				goto fail;
+			break;
+
+		case SADB_X_EXT_NAT_T_OAI:
+		case SADB_X_EXT_NAT_T_OAR:
+		case SADB_X_EXT_NAT_T_FRAG:
+			/* We do not (yet) support those. */
+			continue;
+#endif
+
 		case SADB_EXT_ADDRESS_PROXY:
 		case SADB_EXT_IDENTITY_SRC:
 		case SADB_EXT_IDENTITY_DST:
@@ -3621,6 +3717,116 @@ key_setsadbxsa2(u_int8_t mode, u_int32_t
 	return m;
 }
 
+#ifdef IPSEC_NAT_T
+/*
+ * Set a type in sadb_x_nat_t_type.
+ */
+static struct mbuf *
+key_setsadbxtype(u_int16_t type)
+{
+	struct mbuf *m;
+	size_t len;
+	struct sadb_x_nat_t_type *p;
+
+	len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_type));
+
+	m = key_alloc_mbuf(len);
+	if (!m || m->m_next) {	/*XXX*/
+		if (m)
+			m_freem(m);
+		return (NULL);
+	}
+
+	p = mtod(m, struct sadb_x_nat_t_type *);
+
+	bzero(p, len);
+	p->sadb_x_nat_t_type_len = PFKEY_UNIT64(len);
+	p->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
+	p->sadb_x_nat_t_type_type = type;
+
+	return (m);
+}
+/*
+ * Set a port in sadb_x_nat_t_port.
+ * In contrast to default RFC 2367 behaviour, port is in network byte order.
+ */
+static struct mbuf *
+key_setsadbxport(u_int16_t port, u_int16_t type)
+{
+	struct mbuf *m;
+	size_t len;
+	struct sadb_x_nat_t_port *p;
+
+	len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_port));
+
+	m = key_alloc_mbuf(len);
+	if (!m || m->m_next) {	/*XXX*/
+		if (m)
+			m_freem(m);
+		return (NULL);
+	}
+
+	p = mtod(m, struct sadb_x_nat_t_port *);
+
+	bzero(p, len);
+	p->sadb_x_nat_t_port_len = PFKEY_UNIT64(len);
+	p->sadb_x_nat_t_port_exttype = type;
+	p->sadb_x_nat_t_port_port = port;
+
+	return (m);
+}
+
+/* 
+ * Get port from sockaddr. Port is in network byte order.
+ */
+u_int16_t 
+key_portfromsaddr(struct sockaddr *sa)
+{
+	INIT_VNET_IPSEC(curvnet);
+
+	switch (sa->sa_family) {
+#ifdef INET
+	case AF_INET:
+		return ((struct sockaddr_in *)sa)->sin_port;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		return ((struct sockaddr_in6 *)sa)->sin6_port;
+#endif
+	}
+	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
+		printf("DP %s unexpected address family %d\n",
+			__func__, sa->sa_family));
+	return (0);
+}
+#endif /* IPSEC_NAT_T */
+
+/*
+ * Set port in struct sockaddr. Port is in network byte order.
+ */
+static void
+key_porttosaddr(struct sockaddr *sa, u_int16_t port)
+{
+	INIT_VNET_IPSEC(curvnet);
+
+	switch (sa->sa_family) {
+#ifdef INET
+	case AF_INET:
+		((struct sockaddr_in *)sa)->sin_port = port;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		((struct sockaddr_in6 *)sa)->sin6_port = port;
+		break;
+#endif
+	default:
+		ipseclog((LOG_DEBUG, "%s: unexpected address family %d.\n",
+			__func__, sa->sa_family));
+		break;
+	}
+}
+
 /*
  * set data into sadb_x_policy
  */
@@ -3818,6 +4024,8 @@ key_cmpsaidx(
 	const struct secasindex *saidx1,
 	int flag)
 {
+	int chkport = 0;
+
 	/* sanity */
 	if (saidx0 == NULL && saidx1 == NULL)
 		return 1;
@@ -3855,10 +4063,25 @@ key_cmpsaidx(
 				return 0;
 		}
 
-		if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, 0) != 0) {
+#ifdef IPSEC_NAT_T
+		/*
+		 * If NAT-T is enabled, check ports for tunnel mode.
+		 * Do not check ports if they are set to zero in the SPD.
+		 * Also do not do it for transport mode, as there is no
+		 * port information available in the SP.
+		 */
+		if (saidx1->mode == IPSEC_MODE_TUNNEL &&
+		    saidx1->src.sa.sa_family == AF_INET &&
+		    saidx1->dst.sa.sa_family == AF_INET &&
+		    ((const struct sockaddr_in *)(&saidx1->src))->sin_port &&
+		    ((const struct sockaddr_in *)(&saidx1->dst))->sin_port)
+			chkport = 1;
+#endif /* IPSEC_NAT_T */
+
+		if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, chkport) != 0) {
 			return 0;
 		}
-		if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, 0) != 0) {
+		if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, chkport) != 0) {
 			return 0;
 		}
 	}
@@ -4492,7 +4715,10 @@ key_getspi(so, m, mhp)
 		return key_senderror(so, m, EINVAL);
 	}
 
-	/* make sure if port number is zero. */
+	/*
+	 * Make sure the port numbers are zero.
+	 * In case of NAT-T we will update them later if needed.
+	 */
 	switch (((struct sockaddr *)(src0 + 1))->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr *)(src0 + 1))->sa_len !=
@@ -4529,6 +4755,43 @@ key_getspi(so, m, mhp)
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
+#ifdef IPSEC_NAT_T
+	/*
+	 * Handle NAT-T info if present.
+	 * We made sure the port numbers are zero above, so we do
+	 * not have to worry in case we do not update them.
+	 */
+	if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL)
+		ipseclog((LOG_DEBUG, "%s: NAT-T OAi present\n", __func__));
+	if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL)
+		ipseclog((LOG_DEBUG, "%s: NAT-T OAr present\n", __func__));
+
+	if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
+		struct sadb_x_nat_t_type *type;
+		struct sadb_x_nat_t_port *sport, *dport;
+
+		if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
+			ipseclog((LOG_DEBUG, "%s: invalid nat-t message "
+			    "passed.\n", __func__));
+			return key_senderror(so, m, EINVAL);
+		}
+
+		sport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
+		dport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];
+
+		if (sport)
+			KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port);
+		if (dport)
+			KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port);
+	}
+#endif
+
 	/* SPI allocation */
 	spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE],
 	                       &saidx);
@@ -4726,6 +4989,11 @@ key_update(so, m, mhp)
 	INIT_VNET_IPSEC(curvnet);
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
+#ifdef IPSEC_NAT_T
+	struct sadb_x_nat_t_type *type;
+	struct sadb_address *iaddr, *raddr;
+	struct sadb_x_nat_t_frag *frag;
+#endif
 	struct secasindex saidx;
 	struct secashead *sah;
 	struct secasvar *sav;
@@ -4784,6 +5052,73 @@ key_update(so, m, mhp)
 	/* XXX boundary check against sa_len */
 	KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx);
 
+	/*
+	 * Make sure the port numbers are zero.
+	 * In case of NAT-T we will update them later if needed.
+	 */
+	KEY_PORTTOSADDR(&saidx.src, 0);
+	KEY_PORTTOSADDR(&saidx.dst, 0);
+
+#ifdef IPSEC_NAT_T
+	/*
+	 * Handle NAT-T info if present.
+	 */
+	if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL &&
+	    mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) {
+		struct sadb_x_nat_t_port *sport, *dport;
+
+		if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) ||
+		    mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
+			ipseclog((LOG_DEBUG, "%s: invalid message.\n",
+			    __func__));
+			return key_senderror(so, m, EINVAL);
+		}
+
+		type = (struct sadb_x_nat_t_type *)
+		    mhp->ext[SADB_X_EXT_NAT_T_TYPE];
+		sport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_SPORT];
+		dport = (struct sadb_x_nat_t_port *)
+		    mhp->ext[SADB_X_EXT_NAT_T_DPORT];

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list