git: c1839039b193 - main - netlink: use netlink mbufs in the mbuf chains.

From: Alexander V. Chernikov <melifaro_at_FreeBSD.org>
Date: Fri, 02 Jun 2023 13:19:33 UTC
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=c1839039b193b48c8eb7520c75487f0bd4340c3b

commit c1839039b193b48c8eb7520c75487f0bd4340c3b
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2023-06-02 13:04:03 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2023-06-02 13:14:20 +0000

    netlink: use netlink mbufs in the mbuf chains.
    
    Continue D40356 and switch the remaining parts of mbuf-related
    code to the Netlink mbufs.
    
    Reviewed By: gallatin
    Differential Revision: https://reviews.freebsd.org/D40368
    MFC after:      2 weeks
---
 sys/modules/ktest/Makefile                         |   3 +-
 .../ktest/ktest_netlink_message_writer/Makefile    |  15 ++
 sys/netlink/ktest_netlink_message_writer.c         | 169 +++++++++++++++++++++
 sys/netlink/ktest_netlink_message_writer.h         |  60 ++++++++
 sys/netlink/netlink_message_writer.c               |  55 +++++--
 tests/sys/netlink/Makefile                         |   1 +
 tests/sys/netlink/test_netlink_message_writer.py   |  79 ++++++++++
 7 files changed, 370 insertions(+), 12 deletions(-)

diff --git a/sys/modules/ktest/Makefile b/sys/modules/ktest/Makefile
index 21c94caabc30..151db53417df 100644
--- a/sys/modules/ktest/Makefile
+++ b/sys/modules/ktest/Makefile
@@ -2,6 +2,7 @@ SYSDIR?=${SRCTOP}/sys
 .include "${SYSDIR}/conf/kern.opts.mk"
 
 SUBDIR=	ktest \
-	ktest_example
+	ktest_example \
+	ktest_netlink_message_writer
 
 .include <bsd.subdir.mk>
diff --git a/sys/modules/ktest/ktest_netlink_message_writer/Makefile b/sys/modules/ktest/ktest_netlink_message_writer/Makefile
new file mode 100644
index 000000000000..2d14d93897f8
--- /dev/null
+++ b/sys/modules/ktest/ktest_netlink_message_writer/Makefile
@@ -0,0 +1,15 @@
+# $FreeBSD$
+
+PACKAGE=	tests
+
+SYSDIR?=${SRCTOP}/sys
+.include "${SYSDIR}/conf/kern.opts.mk"
+
+.PATH: ${SYSDIR}/netlink
+
+KMOD=	ktest_netlink_message_writer
+SRCS=	ktest_netlink_message_writer.c
+SRCS+=	opt_netlink.h
+
+.include <bsd.kmod.mk>
+
diff --git a/sys/netlink/ktest_netlink_message_writer.c b/sys/netlink/ktest_netlink_message_writer.c
new file mode 100644
index 000000000000..c13a25e05a70
--- /dev/null
+++ b/sys/netlink/ktest_netlink_message_writer.c
@@ -0,0 +1,169 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_netlink.h"
+
+#include <tests/ktest.h>
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <netlink/netlink.h>
+#include <netlink/netlink_ctl.h>
+#include <netlink/netlink_message_writer.h>
+
+#define KTEST_CALLER
+#include <netlink/ktest_netlink_message_writer.h>
+
+#ifdef INVARIANTS
+
+struct test_mbuf_attrs {
+	uint32_t	size;
+	uint32_t	expected_avail;
+	uint32_t	expected_count;
+	uint32_t	wtype;
+	int		waitok;
+};
+
+#define	_OUT(_field)	offsetof(struct test_mbuf_attrs, _field)
+static const struct nlattr_parser nla_p_mbuf_w[] = {
+	{ .type = 1, .off = _OUT(size), .cb = nlattr_get_uint32 },
+	{ .type = 2, .off = _OUT(expected_avail), .cb = nlattr_get_uint32 },
+	{ .type = 3, .off = _OUT(expected_count), .cb = nlattr_get_uint32 },
+	{ .type = 4, .off = _OUT(wtype), .cb = nlattr_get_uint32 },
+	{ .type = 5, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(mbuf_w_parser, nla_p_mbuf_w);
+
+static int
+test_mbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
+{
+	struct test_mbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
+
+	ctx->arg = attrs;
+	if (attrs != NULL)
+		return (nl_parse_nested(nla, &mbuf_w_parser, ctx->npt, attrs));
+	return (ENOMEM);
+}
+
+static int
+test_mbuf_writer_allocation(struct ktest_test_context *ctx)
+{
+	struct test_mbuf_attrs *attrs = ctx->arg;
+	bool ret;
+	struct nl_writer nw = {};
+
+	ret = nlmsg_get_buf_type_wrapper(&nw, attrs->size, attrs->wtype, attrs->waitok);
+	if (!ret)
+		return (EINVAL);
+
+	int alloc_len = nw.alloc_len;
+	KTEST_LOG(ctx, "requested %u, allocated %d", attrs->size, alloc_len);
+
+	/* Set cleanup callback */
+	nw.writer_target = NS_WRITER_TARGET_SOCKET;
+	nlmsg_set_callback_wrapper(&nw);
+
+	/* Mark enomem to avoid reallocation */
+	nw.enomem = true;
+
+	if (nlmsg_reserve_data(&nw, alloc_len, void *) == NULL) {
+		KTEST_LOG(ctx, "unable to get %d bytes from the writer", alloc_len);
+		return (EINVAL);
+	}
+
+	/* Mark as empty to free the storage */
+	nw.offset = 0;
+	nlmsg_flush(&nw);
+
+	if (alloc_len < attrs->expected_avail) {
+		KTEST_LOG(ctx, "alloc_len %d, expected %u",
+		    alloc_len, attrs->expected_avail);
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+static int
+test_mbuf_chain_allocation(struct ktest_test_context *ctx)
+{
+	struct test_mbuf_attrs *attrs = ctx->arg;
+	int mflags = attrs->waitok ? M_WAITOK : M_NOWAIT;
+	struct mbuf *chain = nl_get_mbuf_chain_wrapper(attrs->size, mflags);
+
+	if (chain == NULL) {
+		KTEST_LOG(ctx, "nl_get_mbuf_chain(%u) returned NULL", attrs->size);
+		return (EINVAL);
+	}
+
+	/* Iterate and check number of mbufs and space */
+	uint32_t allocated_count = 0, allocated_size = 0;
+	for (struct mbuf *m = chain; m != NULL; m = m->m_next) {
+		allocated_count++;
+		allocated_size += M_SIZE(m);
+	}
+	m_freem(chain);
+
+	if (attrs->expected_avail > allocated_size) {
+		KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
+				" expected/allocated count %u/%u",
+		    attrs->expected_avail, allocated_size,
+		    attrs->expected_count, allocated_count);
+		return (EINVAL);
+	}
+
+	if (attrs->expected_count > 0 && (attrs->expected_count != allocated_count)) {
+		KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
+				" expected/allocated count %u/%u",
+		    attrs->expected_avail, allocated_size,
+		    attrs->expected_count, allocated_count);
+		return (EINVAL);
+	}
+
+	return (0);
+}
+#endif
+
+static const struct ktest_test_info tests[] = {
+#ifdef INVARIANTS
+	{
+		.name = "test_mbuf_writer_allocation",
+		.desc = "test different mbuf sizes in the mbuf writer",
+		.func = &test_mbuf_writer_allocation,
+		.parse = &test_mbuf_parser,
+	},
+	{
+		.name = "test_mbuf_chain_allocation",
+		.desc = "verify allocation different chain sizes",
+		.func = &test_mbuf_chain_allocation,
+		.parse = &test_mbuf_parser,
+	},
+#endif
+};
+KTEST_MODULE_DECLARE(ktest_netlink_message_writer, tests);
diff --git a/sys/netlink/ktest_netlink_message_writer.h b/sys/netlink/ktest_netlink_message_writer.h
new file mode 100644
index 000000000000..b7864bea59c9
--- /dev/null
+++ b/sys/netlink/ktest_netlink_message_writer.h
@@ -0,0 +1,60 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETLINK_KTEST_NETLINK_MESSAGE_WRITER_H_
+#define _NETLINK_KTEST_NETLINK_MESSAGE_WRITER_H_
+
+#if defined(_KERNEL) && defined(INVARIANTS)
+
+bool nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok);
+void nlmsg_set_callback_wrapper(struct nl_writer *nw);
+struct mbuf *nl_get_mbuf_chain_wrapper(int len, int malloc_flags);
+
+#ifndef KTEST_CALLER
+
+bool
+nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok)
+{
+	return (nlmsg_get_buf_type(nw, size, type, waitok));
+}
+
+void
+nlmsg_set_callback_wrapper(struct nl_writer *nw)
+{
+	nlmsg_set_callback(nw);
+}
+
+struct mbuf *
+nl_get_mbuf_chain_wrapper(int len, int malloc_flags)
+{
+	return (nl_get_mbuf_chain(len, malloc_flags));
+}
+#endif
+
+#endif
+
+#endif
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
index 841bdb2d5c0b..31f1c9f80457 100644
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -69,7 +69,7 @@ _DECLARE_DEBUG(LOG_INFO);
  *
  * There are 3 types of storage:
  * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
- *    fits in MCLBYTES)
+ *    fits in NLMBUFSIZE)
  * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
  *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
  * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
@@ -131,6 +131,38 @@ nl_get_mbuf(int size, int malloc_flags)
 	return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR));
 }
 
+/*
+ * Gets a chain of Netlink mbufs.
+ * This is strip-down version of m_getm2()
+ */
+static struct mbuf *
+nl_get_mbuf_chain(int len, int malloc_flags)
+{
+	struct mbuf *m_chain = NULL, *m_tail = NULL;
+	int mbuf_flags = M_PKTHDR;
+
+	while (len > 0) {
+		int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len;
+		struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags);
+
+		if (m == NULL) {
+			m_freem(m_chain);
+			return (NULL);
+		}
+
+		/* Book keeping. */
+		len -= M_SIZE(m);
+		if (m_tail != NULL)
+			m_tail->m_next = m;
+		else
+			m_chain = m;
+		m_tail = m;
+		mbuf_flags &= ~M_PKTHDR;	/* Only valid on the first mbuf. */
+	}
+
+	return (m_chain);
+}
+
 void
 nl_init_msg_zone(void)
 {
@@ -187,7 +219,7 @@ nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 		return (true);
 	}
 
-	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
+	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
 	if (__predict_false(m == NULL)) {
 		/* XXX: should we set sorcverr? */
 		free(buf, M_NETLINK);
@@ -210,7 +242,7 @@ nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 		return (true);
 	}
 
-	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
+	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
 	if (__predict_false(m == NULL)) {
 		free(buf, M_NETLINK);
 		return (false);
@@ -237,9 +269,8 @@ nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 	}
 
 	if (*m0 == NULL) {
-		struct mbuf *m;
+		struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
 
-		m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 		if (__predict_false(m == NULL)) {
 			free(buf, M_NETLINK);
 			return (false);
@@ -423,7 +454,7 @@ nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 		return (true);
 	}
 
-	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
+	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
 	if (__predict_false(m == NULL)) {
 		free(buf, M_NETLINK);
 		return (false);
@@ -492,7 +523,7 @@ nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
 	int type;
 
 	if (!is_linux) {
-		if (__predict_true(size <= MCLBYTES))
+		if (__predict_true(size <= NLMBUFSIZE))
 			type = NS_WRITER_TYPE_MBUF;
 		else
 			type = NS_WRITER_TYPE_BUF;
@@ -585,12 +616,12 @@ _nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
 
 	/* Calculated new buffer size and allocate it s*/
 	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
-	if (completed_len > 0 && required_len < MCLBYTES) {
+	if (completed_len > 0 && required_len < NLMBUFSIZE) {
 		/* We already ran out of space, use the largest effective size */
-		new_len = max(nw->alloc_len, MCLBYTES);
+		new_len = max(nw->alloc_len, NLMBUFSIZE);
 	} else {
-		if (nw->alloc_len < MCLBYTES)
-			new_len = MCLBYTES;
+		if (nw->alloc_len < NLMBUFSIZE)
+			new_len = NLMBUFSIZE;
 		else
 			new_len = nw->alloc_len * 2;
 		while (new_len < required_len)
@@ -755,3 +786,5 @@ _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 
 	return (true);
 }
+
+#include <netlink/ktest_netlink_message_writer.h>
diff --git a/tests/sys/netlink/Makefile b/tests/sys/netlink/Makefile
index 16559f0e9d3d..83e31027b16f 100644
--- a/tests/sys/netlink/Makefile
+++ b/tests/sys/netlink/Makefile
@@ -11,6 +11,7 @@ ATF_TESTS_PYTEST +=	test_rtnl_iface.py
 ATF_TESTS_PYTEST +=	test_rtnl_ifaddr.py
 ATF_TESTS_PYTEST +=	test_rtnl_neigh.py
 ATF_TESTS_PYTEST +=	test_rtnl_route.py
+ATF_TESTS_PYTEST +=	test_netlink_message_writer.py
 
 CFLAGS+=	-I${.CURDIR:H:H:H}
 
diff --git a/tests/sys/netlink/test_netlink_message_writer.py b/tests/sys/netlink/test_netlink_message_writer.py
new file mode 100644
index 000000000000..df1768129b11
--- /dev/null
+++ b/tests/sys/netlink/test_netlink_message_writer.py
@@ -0,0 +1,79 @@
+import mmap
+import pytest
+
+from atf_python.ktest import BaseKernelTest
+from atf_python.sys.netlink.attrs import NlAttrU32
+
+
+M_NOWAIT = 1
+M_WAITOK = 2
+NS_WRITER_TYPE_MBUF = 0
+NS_WRITER_TYPE_BUF = 1
+NS_WRITER_TYPE_LBUF = 1
+
+MHLEN = 160
+MCLBYTES = 2048  # XXX: may differ on some archs?
+MJUMPAGESIZE = mmap.PAGESIZE
+MJUM9BYTES = 9 * 1024
+MJUM16BYTES = 16 * 1024
+
+
+class TestNetlinkMessageWriter(BaseKernelTest):
+    KTEST_MODULE_NAME = "ktest_netlink_message_writer"
+
+    @pytest.mark.parametrize(
+        "malloc_flags",
+        [
+            pytest.param(M_NOWAIT, id="NOWAIT"),
+            pytest.param(M_WAITOK, id="WAITOK"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "writer_type",
+        [
+            pytest.param(NS_WRITER_TYPE_MBUF, id="MBUF"),
+            pytest.param(NS_WRITER_TYPE_BUF, id="BUF"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "sz",
+        [
+            pytest.param([160, 160], id="MHLEN"),
+            pytest.param([MCLBYTES, MCLBYTES], id="MCLBYTES"),
+        ],
+    )
+    def test_mbuf_writer_allocation(self, sz, writer_type, malloc_flags):
+        """override to parametrize"""
+
+        test_meta = [
+            NlAttrU32(1, sz[0]),  # size
+            NlAttrU32(2, sz[1]),  # expected_avail
+            NlAttrU32(4, writer_type),
+            NlAttrU32(5, malloc_flags),
+        ]
+        self.runtest(test_meta)
+
+    @pytest.mark.parametrize(
+        "malloc_flags",
+        [
+            pytest.param(M_NOWAIT, id="NOWAIT"),
+            pytest.param(M_WAITOK, id="WAITOK"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "sz",
+        [
+            pytest.param([160, 160, 1], id="MHLEN"),
+            pytest.param([MCLBYTES, MCLBYTES, 1], id="MCLBYTES"),
+            pytest.param([MCLBYTES + 1, MCLBYTES + 1, 2], id="MCLBYTES_MHLEN"),
+            pytest.param([MCLBYTES + 256, MCLBYTES * 2, 2], id="MCLBYTESx2"),
+        ],
+    )
+    def test_mbuf_chain_allocation(self, sz, malloc_flags):
+        test_meta = [
+            NlAttrU32(1, sz[0]),  # size
+            NlAttrU32(2, sz[1]),  # expected_avail
+            NlAttrU32(3, sz[2]),  # expected_count
+            NlAttrU32(5, malloc_flags),
+        ]
+        self.runtest(test_meta)