git: 0ac8d8d9954f - stable/14 - pf: support SCTP multihoming

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Mon, 02 Oct 2023 09:33:22 UTC
The branch stable/14 has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=0ac8d8d9954f2d446d4fa7a5f154d55931f3d1f1

commit 0ac8d8d9954f2d446d4fa7a5f154d55931f3d1f1
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2023-08-02 17:05:00 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2023-10-02 09:32:14 +0000

    pf: support SCTP multihoming
    
    SCTP may announce additional IP addresses it'll use in the INIT/INIT_ACK
    chunks, or in ASCONF chunks at any time during the connection. Parse these
    parameters, evaluate the ruleset for the new connection and if allowed
    create the corresponding states.
    
    MFC after:      3 weeks
    Sponsored by:   Orange Business Services
    Differential Revision:  https://reviews.freebsd.org/D41637
    
    (cherry picked from commit 10aa9ddb4d45ab0c8f56b0e91c7e8de213030c0f)
---
 sys/net/pfvar.h          |  22 +++++-
 sys/netpfil/pf/pf.c      | 186 ++++++++++++++++++++++++++++++++++++++++++++++-
 sys/netpfil/pf/pf_norm.c |  34 +++++++--
 3 files changed, 231 insertions(+), 11 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index f9cb45f696d3..ec72c1079c70 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1531,6 +1531,9 @@ struct pfi_kkif {
 #define PFI_IFLAG_SKIP		0x0100	/* skip filtering on interface */
 
 #ifdef _KERNEL
+struct pf_sctp_multihome_job;
+TAILQ_HEAD(pf_sctp_multihome_jobs, pf_sctp_multihome_job);
+
 struct pf_pdesc {
 	struct {
 		int	 done;
@@ -1578,10 +1581,22 @@ struct pf_pdesc {
 #define PFDESC_SCTP_SHUTDOWN	0x0010
 #define PFDESC_SCTP_SHUTDOWN_COMPLETE	0x0020
 #define PFDESC_SCTP_DATA	0x0040
-#define PFDESC_SCTP_OTHER	0x0080
+#define PFDESC_SCTP_ASCONF	0x0080
+#define PFDESC_SCTP_OTHER	0x0100
 	u_int16_t	 sctp_flags;
 	u_int32_t	 sctp_initiate_tag;
+
+	struct pf_sctp_multihome_jobs	sctp_multihome_jobs;
+};
+
+struct pf_sctp_multihome_job {
+	TAILQ_ENTRY(pf_sctp_multihome_job)	next;
+	struct pf_pdesc				 pd;
+	struct pf_addr				 src;
+	struct pf_addr				 dst;
+	struct mbuf				*m;
 };
+
 #endif
 
 /* flags for RDR options */
@@ -2253,6 +2268,11 @@ void	pf_addr_inc(struct pf_addr *, sa_family_t);
 int	pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *, bool);
 #endif /* INET6 */
 
+int	pf_multihome_scan_init(struct mbuf *, int, int, struct pf_pdesc *,
+	    struct pfi_kkif *);
+int	pf_multihome_scan_asconf(struct mbuf *, int, int, struct pf_pdesc *,
+	    struct pfi_kkif *);
+
 u_int32_t	pf_new_isn(struct pf_kstate *);
 void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
 	    sa_family_t);
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 7cee0833072b..69373b720ad9 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -126,6 +126,8 @@ SDT_PROBE_DEFINE4(pf, ip, test6, done, "int", "int", "struct pf_krule *",
 SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *",
     "struct pf_state_key_cmp *", "int", "struct pf_pdesc *",
     "struct pf_kstate *");
+SDT_PROBE_DEFINE4(pf, sctp, multihome, test, "struct pfi_kkif *",
+    "struct pf_krule *", "struct mbuf *", "int");
 
 SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *",
     "struct mbuf *");
@@ -307,6 +309,8 @@ static int		 pf_test_state_udp(struct pf_kstate **,
 static int		 pf_test_state_icmp(struct pf_kstate **,
 			    struct pfi_kkif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
+static void		 pf_sctp_multihome_delayed(struct pf_pdesc *, int,
+			    struct pfi_kkif *, struct pf_kstate *);
 static int		 pf_test_state_sctp(struct pf_kstate **,
 			    struct pfi_kkif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
@@ -5911,6 +5915,174 @@ pf_test_state_sctp(struct pf_kstate **state, struct pfi_kkif *kif,
 	return (PF_PASS);
 }
 
+static void
+pf_sctp_multihome_delayed(struct pf_pdesc *pd, int off, struct pfi_kkif *kif,
+    struct pf_kstate *s)
+{
+	struct pf_sctp_multihome_job	*j, *tmp;
+	int			 action;;
+	struct pf_kstate	*sm = NULL;
+	struct pf_krule		*ra = NULL;
+	struct pf_krule		*r = &V_pf_default_rule;
+	struct pf_kruleset	*rs = NULL;
+
+	PF_RULES_RLOCK_TRACKER;
+
+	TAILQ_FOREACH_SAFE(j, &pd->sctp_multihome_jobs, next, tmp) {
+		PF_RULES_RLOCK();
+		action = pf_test_rule(&r, &sm, kif,
+		    j->m, off, &j->pd, &ra, &rs, NULL);
+		PF_RULES_RUNLOCK();
+		SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->m, action);
+		if (sm)
+			PF_STATE_UNLOCK(sm);
+
+		free(j, M_PFTEMP);
+	}
+}
+
+static int
+pf_multihome_scan(struct mbuf *m, int start, int len, struct pf_pdesc *pd,
+    struct pfi_kkif *kif)
+{
+	int			 off = 0;
+	struct pf_sctp_multihome_job	*job;
+
+	while (off < len) {
+		struct sctp_paramhdr h;
+
+		if (!pf_pull_hdr(m, start + off, &h, sizeof(h), NULL, NULL,
+		    pd->af))
+			return (PF_DROP);
+
+		/* Parameters are at least 4 bytes. */
+		if (ntohs(h.param_length) < 4)
+			return (PF_DROP);
+
+		switch (ntohs(h.param_type)) {
+		case  SCTP_IPV4_ADDRESS: {
+			struct in_addr t;
+
+			if (ntohs(h.param_length) !=
+			    (sizeof(struct sctp_paramhdr) + sizeof(t)))
+				return (PF_DROP);
+
+			if (!pf_pull_hdr(m, start + off + sizeof(h), &t, sizeof(t),
+			    NULL, NULL, pd->af))
+				return (PF_DROP);
+
+			/*
+			 * Avoid duplicating states. We'll already have
+			 * created a state based on the source address of
+			 * the packet, but SCTP endpoints may also list this
+			 * address again in the INIT(_ACK) parameters.
+			 */
+			if (t.s_addr == pd->src->v4.s_addr)
+				break;
+
+			if (in_nullhost(t))
+				t.s_addr = pd->src->v4.s_addr;
+
+			/*
+			 * We hold the state lock (idhash) here, which means
+			 * that we can't acquire the keyhash, or we'll get a
+			 * LOR (and potentially double-lock things too). We also
+			 * can't release the state lock here, so instead we'll
+			 * enqueue this for async handling.
+			 * There's a relatively small race here, in that a
+			 * packet using the new addresses could arrive already,
+			 * but that's just though luck for it.
+			 */
+			job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
+			if (! job)
+				return (PF_DROP);
+
+			memcpy(&job->pd, pd, sizeof(*pd));
+
+			// New source address!
+			memcpy(&job->src, &t, sizeof(t));
+			job->pd.src = &job->src;
+			memcpy(&job->dst, pd->dst, sizeof(job->dst));
+			job->pd.dst = &job->dst;
+			job->m = m;
+
+			TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
+			break;
+		}
+#ifdef INET6
+		case SCTP_IPV6_ADDRESS: {
+			struct in6_addr t;
+
+			if (ntohs(h.param_length) !=
+			    (sizeof(struct sctp_paramhdr) + sizeof(t)))
+				return (PF_DROP);
+
+			if (!pf_pull_hdr(m, start + off + sizeof(h), &t, sizeof(t),
+			    NULL, NULL, pd->af))
+				return (PF_DROP);
+			if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0)
+				break;
+			if (memcmp(&t, &in6addr_any, sizeof(t)) == 0)
+				memcpy(&t, &pd->src->v6, sizeof(t));
+
+			job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
+			if (! job)
+				return (PF_DROP);
+
+			memcpy(&job->pd, pd, sizeof(*pd));
+			memcpy(&job->src, &t, sizeof(t));
+			job->pd.src = &job->src;
+			memcpy(&job->dst, pd->dst, sizeof(job->dst));
+			job->pd.dst = &job->dst;
+			job->m = m;
+
+			TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
+			break;
+		}
+#endif
+		case SCTP_ADD_IP_ADDRESS: {
+			int ret;
+			struct sctp_asconf_paramhdr ah;
+
+			if (!pf_pull_hdr(m, start + off, &ah, sizeof(ah),
+			    NULL, NULL, pd->af))
+				return (PF_DROP);
+
+			ret = pf_multihome_scan(m, start + off + sizeof(ah),
+			    ntohs(ah.ph.param_length) - sizeof(ah), pd, kif);
+			if (ret != PF_PASS)
+				return (ret);
+			break;
+		}
+		default:
+			break;
+		}
+
+		off += roundup(ntohs(h.param_length), 4);
+	}
+
+	return (PF_PASS);
+}
+int
+pf_multihome_scan_init(struct mbuf *m, int start, int len, struct pf_pdesc *pd,
+    struct pfi_kkif *kif)
+{
+	start += sizeof(struct sctp_init_chunk);
+	len -= sizeof(struct sctp_init_chunk);
+
+	return (pf_multihome_scan(m, start, len, pd, kif));
+}
+
+int
+pf_multihome_scan_asconf(struct mbuf *m, int start, int len,
+    struct pf_pdesc *pd, struct pfi_kkif *kif)
+{
+	start += sizeof(struct sctp_asconf_chunk);
+	len -= sizeof(struct sctp_asconf_chunk);
+
+	return (pf_multihome_scan(m, start, len, pd, kif));
+}
+
 static int
 pf_test_state_icmp(struct pf_kstate **state, struct pfi_kkif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
@@ -7411,6 +7583,7 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
 	}
 
 	memset(&pd, 0, sizeof(pd));
+	TAILQ_INIT(&pd.sctp_multihome_jobs);
 	if (default_actions != NULL)
 		memcpy(&pd.act, default_actions, sizeof(pd.act));
 	pd.pf_mtag = pf_find_mtag(m);
@@ -7612,6 +7785,8 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
 				pd.act.log |= PF_LOG_FORCE;
 			goto done;
 		}
+		pd.p_len = pd.tot_len - off;
+
 		pd.sport = &pd.hdr.sctp.src_port;
 		pd.dport = &pd.hdr.sctp.dest_port;
 		if (pd.hdr.sctp.src_port == 0 || pd.hdr.sctp.dest_port == 0) {
@@ -7895,7 +8070,7 @@ done:
 		/* pf_route() returns unlocked. */
 		if (rt) {
 			pf_route(m0, r, kif->pfik_ifp, s, &pd, inp);
-			return (action);
+			goto out;
 		}
 		if (pf_dummynet(&pd, s, r, m0) != 0) {
 			action = PF_DROP;
@@ -7909,6 +8084,9 @@ done:
 	if (s)
 		PF_STATE_UNLOCK(s);
 
+out:
+	pf_sctp_multihome_delayed(&pd, off, kif, s);
+
 	return (action);
 }
 #endif /* INET */
@@ -7958,6 +8136,7 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
 	}
 
 	memset(&pd, 0, sizeof(pd));
+	TAILQ_INIT(&pd.sctp_multihome_jobs);
 	if (default_actions != NULL)
 		memcpy(&pd.act, default_actions, sizeof(pd.act));
 	pd.pf_mtag = pf_find_mtag(m);
@@ -8440,7 +8619,7 @@ done:
 		/* pf_route6() returns unlocked. */
 		if (rt) {
 			pf_route6(m0, r, kif->pfik_ifp, s, &pd, inp);
-			return (action);
+			goto out;
 		}
 		if (pf_dummynet(&pd, s, r, m0) != 0) {
 			action = PF_DROP;
@@ -8457,8 +8636,11 @@ done:
 	    (mtag = m_tag_find(m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL)
 		action = pf_refragment6(ifp, m0, mtag, pflags & PFIL_FWD);
 
+out:
 	SDT_PROBE4(pf, ip, test6, done, action, reason, r, s);
 
+	pf_sctp_multihome_delayed(&pd, off, kif, s);
+
 	return (action);
 }
 #endif /* INET6 */
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index 8f532ad08c46..83b94db87a19 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -2021,11 +2021,13 @@ pf_normalize_mss(struct mbuf *m, int off, struct pf_pdesc *pd)
 }
 
 static int
-pf_scan_sctp(struct mbuf *m, int ipoff, int off, struct pf_pdesc *pd)
+pf_scan_sctp(struct mbuf *m, int ipoff, int off, struct pf_pdesc *pd,
+    struct pfi_kkif *kif)
 {
 	struct sctp_chunkhdr ch = { };
 	int chunk_off = sizeof(struct sctphdr);
 	int chunk_start;
+	int ret;
 
 	while (off + chunk_off < pd->tot_len) {
 		if (!pf_pull_hdr(m, off + chunk_off, &ch, sizeof(ch), NULL,
@@ -2040,7 +2042,8 @@ pf_scan_sctp(struct mbuf *m, int ipoff, int off, struct pf_pdesc *pd)
 		chunk_off += roundup(ntohs(ch.chunk_length), 4);
 
 		switch (ch.chunk_type) {
-		case SCTP_INITIATION: {
+		case SCTP_INITIATION:
+		case SCTP_INITIATION_ACK: {
 			struct sctp_init_chunk init;
 
 			if (!pf_pull_hdr(m, off + chunk_start, &init,
@@ -2064,17 +2067,24 @@ pf_scan_sctp(struct mbuf *m, int ipoff, int off, struct pf_pdesc *pd)
 			 * RFC 9260, Section 3.1, INIT chunks MUST have zero
 			 * verification tag.
 			 */
-			if (pd->hdr.sctp.v_tag != 0)
+			if (ch.chunk_type == SCTP_INITIATION &&
+			    pd->hdr.sctp.v_tag != 0)
 				return (PF_DROP);
 
 			pd->sctp_initiate_tag = init.init.initiate_tag;
 
-			pd->sctp_flags |= PFDESC_SCTP_INIT;
+			if (ch.chunk_type == SCTP_INITIATION)
+				pd->sctp_flags |= PFDESC_SCTP_INIT;
+			else
+				pd->sctp_flags |= PFDESC_SCTP_INIT_ACK;
+
+			ret = pf_multihome_scan_init(m, off + chunk_start,
+			    ntohs(init.ch.chunk_length), pd, kif);
+			if (ret != PF_PASS)
+				return (ret);
+
 			break;
 		}
-		case SCTP_INITIATION_ACK:
-			pd->sctp_flags |= PFDESC_SCTP_INIT_ACK;
-			break;
 		case SCTP_ABORT_ASSOCIATION:
 			pd->sctp_flags |= PFDESC_SCTP_ABORT;
 			break;
@@ -2092,6 +2102,14 @@ pf_scan_sctp(struct mbuf *m, int ipoff, int off, struct pf_pdesc *pd)
 		case SCTP_DATA:
 			pd->sctp_flags |= PFDESC_SCTP_DATA;
 			break;
+		case SCTP_ASCONF:
+			pd->sctp_flags |= PFDESC_SCTP_ASCONF;
+
+			ret = pf_multihome_scan_asconf(m, off + chunk_start,
+			    ntohs(ch.chunk_length), pd, kif);
+			if (ret != PF_PASS)
+				return (ret);
+			break;
 		default:
 			pd->sctp_flags |= PFDESC_SCTP_OTHER;
 			break;
@@ -2133,7 +2151,7 @@ pf_normalize_sctp(int dir, struct pfi_kkif *kif, struct mbuf *m, int ipoff,
 
 	/* Unconditionally scan the SCTP packet, because we need to look for
 	 * things like shutdown and asconf chunks. */
-	if (pf_scan_sctp(m, ipoff, off, pd) != PF_PASS)
+	if (pf_scan_sctp(m, ipoff, off, pd, kif) != PF_PASS)
 		goto sctp_drop;
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);