git: b8ef285f6cc6 - main - pf: ensure dummynet gets the correct direction after route-to

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Fri, 02 Feb 2024 16:55:40 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=b8ef285f6cc6ae733e75488a6ff879e6fb23133d

commit b8ef285f6cc6ae733e75488a6ff879e6fb23133d
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2024-02-01 17:59:36 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2024-02-02 16:55:16 +0000

    pf: ensure dummynet gets the correct direction after route-to
    
    If we apply a route-to to an inbound packet pf_route() may hand that
    packet over to dummynet. Dummynet may then delay the packet, and later
    re-inject it. This re-injection (in dummynet_send()) needs to know
    if the packet was inbound or outbound, to call the correct path for
    continued processing.
    
    That's done based on the pf_pdesc we pass along (through
    pf_dummynet_route() and pf_pdesc_to_dnflow()). In the case of pf_route()
    on inbound packets that may be wrong, because we're called in the input
    path, and didn't update pf_pdesc->dir.
    
    This can manifest in issues with fragmented packets. For example, a
    fragmented packet will be re-fragmented in pf_route(), and if dummynet
    makes different decisions for some of the fragments (that is, it delays
    some and allows others to pass through directly) this will break.
    
    The packets that pass through dummynet without delay will be transmitted
    correctly (through the ifp->if_output() call in pf_route()), but
    the delayed packets will be re-injected in the input path (and not
    the output path, as they should be). These packets will pass through
    pf_test(PF_IN) as they're tagged PF_MTAG_FLAG_DUMMYNET. However,
    this tag is then removed and the packet will be routed and enter
    pf_test(PF_OUT) where pf_reassemble() will hold them indefinitely
    (as some fragments have been transmitted directly, and will never hit
    pf_test(PF_OUT)).
    
    The fix is simple: we must update pf_pfdesc->dir to PF_OUT before we
    pass the packet to dummynet.
    
    See also:       https://redmine.pfsense.org/issues/15156
    Reviewed by:    rcm
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
---
 sys/netpfil/pf/pf.c              |  6 +++++
 tests/sys/netpfil/pf/route_to.sh | 50 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 36ff0eac16ad..ec7964a48e6d 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -7361,6 +7361,12 @@ pf_route(struct mbuf **m, struct pf_krule *r, struct ifnet *oifp,
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 
+	/*
+	 * Make sure dummynet gets the correct direction, in case it needs to
+	 * re-inject later.
+	 */
+	pd->dir = PF_OUT;
+
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
diff --git a/tests/sys/netpfil/pf/route_to.sh b/tests/sys/netpfil/pf/route_to.sh
index 7e8310bceb30..31a47e75c82e 100644
--- a/tests/sys/netpfil/pf/route_to.sh
+++ b/tests/sys/netpfil/pf/route_to.sh
@@ -407,6 +407,55 @@ ifbound_cleanup()
 	pft_cleanup
 }
 
+atf_test_case "dummynet_frag" "cleanup"
+dummynet_frag_head()
+{
+	atf_set descr 'Test fragmentation with route-to and dummynet'
+	atf_set require.user root
+}
+
+dummynet_frag_body()
+{
+	pft_init
+	dummynet_init
+
+	epair_one=$(vnet_mkepair)
+	epair_two=$(vnet_mkepair)
+
+	ifconfig ${epair_one}a 192.0.2.1/24 up
+
+	vnet_mkjail alcatraz ${epair_one}b ${epair_two}a
+	jexec alcatraz ifconfig ${epair_one}b 192.0.2.2/24 up
+	jexec alcatraz ifconfig ${epair_two}a 198.51.100.1/24 up
+	jexec alcatraz sysctl net.inet.ip.forwarding=1
+
+	vnet_mkjail singsing ${epair_two}b
+	jexec singsing ifconfig ${epair_two}b 198.51.100.2/24 up
+	jexec singsing route add default 198.51.100.1
+
+	route add 198.51.100.0/24 192.0.2.2
+
+	jexec alcatraz dnctl pipe 1 config bw 1000Byte/s burst 4500
+	jexec alcatraz dnctl pipe 2 config
+	# This second pipe ensures that the pf_test(PF_OUT) call in pf_route() doesn't
+	# delay packets in dummynet (by inheriting pipe 1 from the input rule).
+
+	jexec alcatraz pfctl -e
+	pft_set_rules alcatraz \
+		"set reassemble yes" \
+		"pass in route-to (${epair_two}a 198.51.100.2) inet proto icmp all icmp-type echoreq dnpipe 1" \
+		"pass out dnpipe 2"
+
+
+	atf_check -s exit:0 -o ignore ping -c 1 198.51.100.2
+	atf_check -s exit:0 -o ignore ping -c 1 -s 4000 198.51.100.2
+}
+
+dummynet_frag_cleanup()
+{
+	pft_cleanup
+}
+
 atf_init_test_cases()
 {
 	atf_add_test_case "v4"
@@ -416,4 +465,5 @@ atf_init_test_cases()
 	atf_add_test_case "icmp_nat"
 	atf_add_test_case "dummynet"
 	atf_add_test_case "ifbound"
+	atf_add_test_case "dummynet_frag"
 }