git: 19acc50667bf - main - inpcb: retire suppresion of randomization of ephemeral ports

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Mon, 31 Oct 2022 15:57:59 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=19acc50667bf298f289326553c214a2360492a90

commit 19acc50667bf298f289326553c214a2360492a90
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-10-31 15:57:11 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-10-31 15:57:11 +0000

    inpcb: retire suppresion of randomization of ephemeral ports
    
    The suppresion was added in 5f311da2ccb with no explanation in the
    commit message of the exact problem that was fixed. In the BSDCan
    2006 talk [1], slides 12 to 14, we can find that it seems that there
    was some problem with the TIME_WAIT state not properly being handled
    on the remote side (also FreeBSD!), and this switching off the
    suppression had hidden the problem.  The rationale of the change was
    that other stacks may also be buggy wrt the TIME_WAIT.
    
    I did not find the actual problem in TIME_WAIT that the suppression
    has hidden, neither a commit that would fix it.  However, since that
    time we started to handle SYNs with RFC5961 instead of RFC793, see
    3220a2121cc.  We also now have the tcp-testsuite [2], that has full
    coverage of all possible scenarios of receiving SYN in TIME_WAIT.
    
    This effectively reverts 5f311da2ccb6c216b79049172be840af4778129a
    and 6ee79c59d2c081f837a11cc78908be54a6dbe09d.
    
    [1] https://www.bsdcan.org/2006/papers/ImprovingTCPIP.pdf
    [2] https://github.com/freebsd-net/tcp-testsuite
    
    Reviewed by:            rscheff
    Discussed with:         rscheff, rrs, tuexen
    Differential revision:  https://reviews.freebsd.org/D37042
---
 share/man/man4/ip.4  |  15 --------
 sys/netinet/in_pcb.c | 101 +++------------------------------------------------
 sys/netinet/in_pcb.h |   8 ----
 3 files changed, 5 insertions(+), 119 deletions(-)

diff --git a/share/man/man4/ip.4 b/share/man/man4/ip.4
index d1c3b34b51a6..282b076a2bef 100644
--- a/share/man/man4/ip.4
+++ b/share/man/man4/ip.4
@@ -453,21 +453,6 @@ In scenarios such as benchmarking, this behavior may be undesirable.
 In these cases,
 .Va net.inet.ip.portrange.randomized
 can be used to toggle randomization off.
-If more than
-.Va net.inet.ip.portrange.randomcps
-ports have been allocated in the last second, then return to sequential
-port allocation.
-Return to random allocation only once the current port allocation rate
-drops below
-.Va net.inet.ip.portrange.randomcps
-for at least
-.Va net.inet.ip.portrange.randomtime
-seconds.
-The default values for
-.Va net.inet.ip.portrange.randomcps
-and
-.Va net.inet.ip.portrange.randomtime
-are 10 port allocations per second and 45 seconds correspondingly.
 .Ss "Multicast Options"
 .Tn IP
 multicasting is supported only on
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 3348526b144e..95db8f22f1fe 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
-#include <sys/callout.h>
 #include <sys/eventhandler.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
@@ -117,8 +116,6 @@ __FBSDID("$FreeBSD$");
 #define	INPCBLBGROUP_SIZMAX	256
 #define	INP_FREED	0x00000200	/* See in_pcb.h. */
 
-static struct callout	ipport_tick_callout;
-
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
@@ -138,15 +135,8 @@ VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO;	/* 65535 */
 VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1;	/* 1023 */
 VNET_DEFINE(int, ipport_reservedlow);
 
-/* Variables dealing with random ephemeral port allocation. */
-VNET_DEFINE(int, ipport_randomized) = 1;	/* user controlled via sysctl */
-VNET_DEFINE(int, ipport_randomcps) = 10;	/* user controlled via sysctl */
-VNET_DEFINE(int, ipport_randomtime) = 45;	/* user controlled via sysctl */
-VNET_DEFINE(int, ipport_stoprandom);		/* toggled by ipport_tick */
-VNET_DEFINE(int, ipport_tcpallocs);
-VNET_DEFINE_STATIC(int, ipport_tcplastcount);
-
-#define	V_ipport_tcplastcount		VNET(ipport_tcplastcount)
+/* Enable random ephemeral port allocation by default. */
+VNET_DEFINE(int, ipport_randomized) = 1;
 
 #ifdef INET
 static struct inpcb	*in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
@@ -214,15 +204,6 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
-SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
-	CTLFLAG_VNET | CTLFLAG_RW,
-	&VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
-	"allocations before switching to a sequential one");
-SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
-	CTLFLAG_VNET | CTLFLAG_RW,
-	&VNET_NAME(ipport_randomtime), 0,
-	"Minimum time to keep sequential port "
-	"allocation before switching to a random one");
 
 #ifdef RATELIMIT
 counter_u64_t rate_limit_new;
@@ -730,7 +711,7 @@ in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
 	struct inpcbinfo *pcbinfo;
 	struct inpcb *tmpinp;
 	unsigned short *lastport;
-	int count, dorandom, error;
+	int count, error;
 	u_short aux, first, last, lport;
 #ifdef INET
 	struct in_addr laddr, faddr;
@@ -764,27 +745,7 @@ in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
 		last  = V_ipport_lastauto;
 		lastport = &pcbinfo->ipi_lastport;
 	}
-	/*
-	 * For UDP(-Lite), use random port allocation as long as the user
-	 * allows it.  For TCP (and as of yet unknown) connections,
-	 * use random port allocation only if the user allows it AND
-	 * ipport_tick() allows it.
-	 */
-	if (V_ipport_randomized &&
-		(!V_ipport_stoprandom || pcbinfo == &V_udbinfo ||
-		pcbinfo == &V_ulitecbinfo))
-		dorandom = 1;
-	else
-		dorandom = 0;
-	/*
-	 * It makes no sense to do random port allocation if
-	 * we have the only port available.
-	 */
-	if (first == last)
-		dorandom = 0;
-	/* Make sure to not include UDP(-Lite) packets in the count. */
-	if (pcbinfo != &V_udbinfo && pcbinfo != &V_ulitecbinfo)
-		V_ipport_tcpallocs++;
+
 	/*
 	 * Instead of having two loops further down counting up or down
 	 * make sure that first is always <= last and go with only one
@@ -818,7 +779,7 @@ in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
 	tmpinp = NULL;
 	lport = *lportp;
 
-	if (dorandom)
+	if (V_ipport_randomized)
 		*lastport = first + (arc4random() % (last - first));
 
 	count = last - first;
@@ -2597,58 +2558,6 @@ in_pcbsosetlabel(struct socket *so)
 #endif
 }
 
-/*
- * ipport_tick runs once per second, determining if random port allocation
- * should be continued.  If more than ipport_randomcps ports have been
- * allocated in the last second, then we return to sequential port
- * allocation. We return to random allocation only once we drop below
- * ipport_randomcps for at least ipport_randomtime seconds.
- */
-static void
-ipport_tick(void *xtp)
-{
-	VNET_ITERATOR_DECL(vnet_iter);
-
-	VNET_LIST_RLOCK_NOSLEEP();
-	VNET_FOREACH(vnet_iter) {
-		CURVNET_SET(vnet_iter);	/* XXX appease INVARIANTS here */
-		if (V_ipport_tcpallocs - V_ipport_tcplastcount <=
-		    V_ipport_randomcps) {
-			if (V_ipport_stoprandom > 0)
-				V_ipport_stoprandom--;
-		} else
-			V_ipport_stoprandom = V_ipport_randomtime;
-		V_ipport_tcplastcount = V_ipport_tcpallocs;
-		CURVNET_RESTORE();
-	}
-	VNET_LIST_RUNLOCK_NOSLEEP();
-	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
-}
-
-static void
-ip_fini(void *xtp)
-{
-
-	callout_stop(&ipport_tick_callout);
-}
-
-/*
- * The ipport_callout should start running at about the time we attach the
- * inet or inet6 domains.
- */
-static void
-ipport_tick_init(const void *unused __unused)
-{
-
-	/* Start ipport_tick. */
-	callout_init(&ipport_tick_callout, 1);
-	callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
-	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
-		SHUTDOWN_PRI_DEFAULT);
-}
-SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
-    ipport_tick_init, NULL);
-
 void
 inp_wlock(struct inpcb *inp)
 {
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 2ac1bb227613..29a042942ead 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -714,10 +714,6 @@ VNET_DECLARE(int, ipport_lastauto);
 VNET_DECLARE(int, ipport_hifirstauto);
 VNET_DECLARE(int, ipport_hilastauto);
 VNET_DECLARE(int, ipport_randomized);
-VNET_DECLARE(int, ipport_randomcps);
-VNET_DECLARE(int, ipport_randomtime);
-VNET_DECLARE(int, ipport_stoprandom);
-VNET_DECLARE(int, ipport_tcpallocs);
 
 #define	V_ipport_reservedhigh	VNET(ipport_reservedhigh)
 #define	V_ipport_reservedlow	VNET(ipport_reservedlow)
@@ -728,10 +724,6 @@ VNET_DECLARE(int, ipport_tcpallocs);
 #define	V_ipport_hifirstauto	VNET(ipport_hifirstauto)
 #define	V_ipport_hilastauto	VNET(ipport_hilastauto)
 #define	V_ipport_randomized	VNET(ipport_randomized)
-#define	V_ipport_randomcps	VNET(ipport_randomcps)
-#define	V_ipport_randomtime	VNET(ipport_randomtime)
-#define	V_ipport_stoprandom	VNET(ipport_stoprandom)
-#define	V_ipport_tcpallocs	VNET(ipport_tcpallocs)
 
 void	in_pcbinfo_init(struct inpcbinfo *, struct inpcbstorage *,
 	    u_int, u_int);