git: 1494f4776af3 - main - ip_reass: add loader tunable to tune the reassembly hash size
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 08 Sep 2022 20:50:48 UTC
The branch main has been updated by glebius:
URL: https://cgit.FreeBSD.org/src/commit/?id=1494f4776af32b49e3c5bbdf09d6b2995b374614
commit 1494f4776af32b49e3c5bbdf09d6b2995b374614
Author: Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2022-09-08 20:49:58 +0000
Commit: Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2022-09-08 20:49:58 +0000
ip_reass: add loader tunable to tune the reassembly hash size
---
share/man/man4/inet.4 | 2 ++
sys/netinet/ip_reass.c | 45 +++++++++++++++++++++++++++++----------------
2 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/share/man/man4/inet.4 b/share/man/man4/inet.4
index edd14fbc48b9..45da9301f86f 100644
--- a/share/man/man4/inet.4
+++ b/share/man/man4/inet.4
@@ -280,6 +280,8 @@ The number must be between 512 and 32768 inclusive.
This is a per-VNET value.
.It Va random_id_total
Integer: count of IP IDs created (read-only, per-VNET).
+.It Va reass_hashsize
+Number of hash slots in the IPv4 reassembly queue (loader tunable).
.It Va redirect
Boolean: enable/disable sending of ICMP redirects in response to
.Tn IP
diff --git a/sys/netinet/ip_reass.c b/sys/netinet/ip_reass.c
index 257211c0a1a1..88ba74ace4d6 100644
--- a/sys/netinet/ip_reass.c
+++ b/sys/netinet/ip_reass.c
@@ -82,10 +82,12 @@ struct ipqbucket {
int count;
};
-VNET_DEFINE_STATIC(struct ipqbucket, ipq[IPREASS_NHASH]);
+VNET_DEFINE_STATIC(struct ipqbucket *, ipq);
#define V_ipq VNET(ipq)
VNET_DEFINE_STATIC(uint32_t, ipq_hashseed);
-#define V_ipq_hashseed VNET(ipq_hashseed)
+#define V_ipq_hashseed VNET(ipq_hashseed)
+VNET_DEFINE_STATIC(uint32_t, ipq_hashsize);
+#define V_ipq_hashsize VNET(ipq_hashsize)
#define IPQ_LOCK(i) mtx_lock(&V_ipq[i].lock)
#define IPQ_TRYLOCK(i) mtx_trylock(&V_ipq[i].lock)
@@ -135,14 +137,14 @@ ipq_drop(struct ipqbucket *bucket, struct ipq *fp)
* Limit the total number of reassembly queues per VNET to the
* IP fragment limit, but ensure the limit will not allow any bucket
* to grow above 100 items. (The bucket limit is
- * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
+ * IP_MAXFRAGPACKETS / (V_ipq_hashsize / 2), so the 50 is the correct
* multiplier to reach a 100-item limit.)
* The 100-item limit was chosen as brief testing seems to show that
* this produces "reasonable" performance on some subset of systems
* under DoS attack.
*/
#define IP_MAXFRAGS (nmbclusters / 32)
-#define IP_MAXFRAGPACKETS (imin(IP_MAXFRAGS, IPREASS_NHASH * 50))
+#define IP_MAXFRAGPACKETS (imin(IP_MAXFRAGS, V_ipq_hashsize * 50))
static int maxfrags;
static u_int __exclusive_cache_line nfrags;
@@ -155,6 +157,11 @@ SYSCTL_UINT(_net_inet_ip, OID_AUTO, curfrags, CTLFLAG_RD,
VNET_DEFINE_STATIC(uma_zone_t, ipq_zone);
#define V_ipq_zone VNET(ipq_zone)
+
+SYSCTL_UINT(_net_inet_ip, OID_AUTO, reass_hashsize,
+ CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(ipq_hashsize), 0,
+ "Size of IP fragment reassembly hashtable");
+
SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
NULL, 0, sysctl_maxfragpackets, "I",
@@ -626,7 +633,7 @@ static void
ipreass_drain_vnet(void)
{
- for (int i = 0; i < IPREASS_NHASH; i++) {
+ for (int i = 0; i < V_ipq_hashsize; i++) {
IPQ_LOCK(i);
while(!TAILQ_EMPTY(&V_ipq[i].head))
ipq_drop(&V_ipq[i], TAILQ_FIRST(&V_ipq[i].head));
@@ -656,15 +663,21 @@ ipreass_drain(void)
/*
* Initialize IP reassembly structures.
*/
+MALLOC_DEFINE(M_IPREASS_HASH, "IP reass", "IP packet reassembly hash headers");
void
ipreass_vnet_init(void)
{
int max;
- for (int i = 0; i < IPREASS_NHASH; i++) {
+ V_ipq_hashsize = IPREASS_NHASH;
+ TUNABLE_INT_FETCH("net.inet.ip.reass_hashsize", &V_ipq_hashsize);
+ V_ipq = malloc(sizeof(struct ipqbucket) * V_ipq_hashsize,
+ M_IPREASS_HASH, M_WAITOK);
+
+ for (int i = 0; i < V_ipq_hashsize; i++) {
TAILQ_INIT(&V_ipq[i].head);
mtx_init(&V_ipq[i].lock, "IP reassembly", NULL,
- MTX_DEF | MTX_DUPOK);
+ MTX_DEF | MTX_DUPOK | MTX_NEW);
callout_init_mtx(&V_ipq[i].timer, &V_ipq[i].lock, 0);
V_ipq[i].count = 0;
#ifdef VIMAGE
@@ -677,7 +690,7 @@ ipreass_vnet_init(void)
NULL, UMA_ALIGN_PTR, 0);
max = IP_MAXFRAGPACKETS;
max = uma_zone_set_max(V_ipq_zone, max);
- V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
+ V_ipreass_maxbucketsize = imax(max / (V_ipq_hashsize / 2), 1);
}
void
@@ -717,7 +730,7 @@ ipreass_cleanup(void *arg __unused, struct ifnet *ifp)
return;
}
- for (i = 0; i < IPREASS_NHASH; i++) {
+ for (i = 0; i < V_ipq_hashsize; i++) {
IPQ_LOCK(i);
/* Scan fragment list. */
TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, temp) {
@@ -744,7 +757,7 @@ ipreass_destroy(void)
ipreass_drain_vnet();
uma_zdestroy(V_ipq_zone);
V_ipq_zone = NULL;
- for (int i = 0; i < IPREASS_NHASH; i++)
+ for (int i = 0; i < V_ipq_hashsize; i++)
mtx_destroy(&V_ipq[i].lock);
}
#endif
@@ -765,7 +778,7 @@ ipreass_drain_tomax(void)
* necessary, drop enough of the oldest elements from
* each bucket to get under the new limit.
*/
- for (int i = 0; i < IPREASS_NHASH; i++) {
+ for (int i = 0; i < V_ipq_hashsize; i++) {
IPQ_LOCK(i);
while (V_ipq[i].count > V_ipreass_maxbucketsize &&
(fp = TAILQ_LAST(&V_ipq[i].head, ipqhead)) != NULL)
@@ -782,7 +795,7 @@ ipreass_drain_tomax(void)
*/
target = uma_zone_get_max(V_ipq_zone);
while (uma_zone_get_cur(V_ipq_zone) > target) {
- for (int i = 0; i < IPREASS_NHASH; i++) {
+ for (int i = 0; i < V_ipq_hashsize; i++) {
IPQ_LOCK(i);
fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
if (fp != NULL) {
@@ -806,7 +819,7 @@ ipreass_zone_change(void *tag)
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
max = uma_zone_set_max(V_ipq_zone, max);
- V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
+ V_ipreass_maxbucketsize = imax(max / (V_ipq_hashsize / 2), 1);
ipreass_drain_tomax();
CURVNET_RESTORE();
}
@@ -838,7 +851,7 @@ sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
* and place an extreme upper bound.
*/
max = uma_zone_set_max(V_ipq_zone, max);
- V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
+ V_ipreass_maxbucketsize = imax(max / (V_ipq_hashsize / 2), 1);
ipreass_drain_tomax();
V_noreass = 0;
} else if (max == 0) {
@@ -865,8 +878,8 @@ ipq_reuse(int start)
IPQ_LOCK_ASSERT(start);
- for (i = 0; i < IPREASS_NHASH; i++) {
- bucket = (start + i) % IPREASS_NHASH;
+ for (i = 0; i < V_ipq_hashsize; i++) {
+ bucket = (start + i) % V_ipq_hashsize;
if (bucket != start && IPQ_TRYLOCK(bucket) == 0)
continue;
fp = TAILQ_LAST(&V_ipq[bucket].head, ipqhead);