git: a0069f16fe23 - main - tools: Add a couple of utilities which were useful for testing SO_SPLICE
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 01 Nov 2024 14:07:32 UTC
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=a0069f16fe235068fac8fac8d11aee4c5dac6e8c
commit a0069f16fe235068fac8fac8d11aee4c5dac6e8c
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-11-01 14:02:40 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-11-01 14:02:40 +0000
tools: Add a couple of utilities which were useful for testing SO_SPLICE
Sponsored by: Klara, Inc.
Sponsored by: Stormshield
Differential Revision: https://reviews.freebsd.org/D47308
---
tools/tools/README | 1 +
tools/tools/so_splice/Makefile | 12 ++
tools/tools/so_splice/pingpong.c | 197 +++++++++++++++++
tools/tools/so_splice/proxy.c | 451 +++++++++++++++++++++++++++++++++++++++
4 files changed, 661 insertions(+)
diff --git a/tools/tools/README b/tools/tools/README
index 45f1d09f2d21..c5b42d08ff0d 100644
--- a/tools/tools/README
+++ b/tools/tools/README
@@ -52,6 +52,7 @@ pciid Generate src/share/misc/pci_vendors.
pciroms A tool for dumping PCI ROM images. WARNING: alpha quality.
pirtool A tool for dumping the $PIR table on i386 machines at runtime.
scsi-defects Get at the primary or grown defect list of a SCSI disk.
+so_splice Utilities relating to the SO_SPLICE socket option.
sysdoc Build a manual page with available sysctls for a specific
kernel configuration.
track Track the progress of a world / kernel build
diff --git a/tools/tools/so_splice/Makefile b/tools/tools/so_splice/Makefile
new file mode 100644
index 000000000000..57ced2fc7b47
--- /dev/null
+++ b/tools/tools/so_splice/Makefile
@@ -0,0 +1,12 @@
+PROGS= pingpong proxy
+
+MAN=
+
+WARNS?= 6
+
+.if defined(SRCDIR)
+CFLAGS+= -I${SRCDIR}/include \
+ -I${SRCDIR}/sys
+.endif
+
+.include <bsd.progs.mk>
diff --git a/tools/tools/so_splice/pingpong.c b/tools/tools/so_splice/pingpong.c
new file mode 100644
index 000000000000..bfcd7f4e0736
--- /dev/null
+++ b/tools/tools/so_splice/pingpong.c
@@ -0,0 +1,197 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Klara, Inc.
+ */
+
+/*
+ * A utility which implements a simple ping-pong over TCP, and prints the amount
+ * of time elapsed for each round trip.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+
+#include <err.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+static void
+usage(void)
+{
+ fprintf(stderr,
+"usage: pingpong [-c <target addr> [-l <msgsz>] [-n <count]]|[-s <listen addr>]\n");
+ exit(1);
+}
+
+static void
+addrinfo(struct sockaddr_storage *ss, const char *addr)
+{
+ struct addrinfo hints, *res, *res1;
+ char *host, *port;
+ int error;
+
+ host = strdup(addr);
+ if (host == NULL)
+ err(1, "strdup");
+ port = strchr(host, ':');
+ if (port == NULL)
+ errx(1, "invalid address '%s', should be <addr>:<port>", host);
+ *port++ = '\0';
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_socktype = SOCK_STREAM;
+ error = getaddrinfo(host, port, &hints, &res);
+ if (error != 0)
+ errx(1, "%s", gai_strerror(error));
+ for (res1 = res; res != NULL; res = res->ai_next) {
+ if (res->ai_protocol == IPPROTO_TCP) {
+ memcpy(ss, res->ai_addr, res->ai_addrlen);
+ break;
+ }
+ }
+ if (res == NULL)
+ errx(1, "no TCP address found for '%s'", host);
+ free(host);
+ freeaddrinfo(res1);
+}
+
+int
+main(int argc, char **argv)
+{
+ char buf[1024];
+ struct sockaddr_storage ss;
+ char *addr;
+ size_t len;
+ int ch, count, s;
+ enum { NONE, CLIENT, SERVER } mode;
+
+ count = 0;
+ len = 0;
+ mode = NONE;
+ while ((ch = getopt(argc, argv, "c:l:n:s:")) != -1) {
+ switch (ch) {
+ case 'c':
+ addr = optarg;
+ mode = CLIENT;
+ break;
+ case 'l':
+ len = strtol(optarg, NULL, 10);
+ if (len > sizeof(buf))
+ errx(1, "message size too large");
+ if (len == 0)
+ errx(1, "message size must be non-zero");
+ break;
+ case 'n':
+ count = strtol(optarg, NULL, 10);
+ if (count <= 0)
+ errx(1, "count must be positive");
+ break;
+ case 's':
+ addr = optarg;
+ mode = SERVER;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (mode == NONE)
+ usage();
+ if (mode == SERVER && len != 0)
+ usage();
+
+ if (mode == CLIENT) {
+ if (len == 0)
+ len = 1;
+ if (count == 0)
+ count = 1;
+ }
+
+ addrinfo(&ss, addr);
+
+ s = socket(ss.ss_family, SOCK_STREAM, 0);
+ if (s == -1)
+ err(1, "socket");
+ if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &(int){1}, sizeof(int)) ==
+ -1)
+ err(1, "setsockopt");
+
+ if (mode == CLIENT) {
+ struct timespec ts1, ts2;
+ ssize_t n;
+
+ memset(buf, 'a', len);
+ if (connect(s, (struct sockaddr *)&ss, ss.ss_len) == -1)
+ err(1, "connect");
+
+ for (int i = 0; i < count; i++) {
+ int64_t ns;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts1);
+ n = write(s, buf, len);
+ if (n < 0)
+ err(1, "write");
+ n = read(s, buf, len);
+ if (n < 0)
+ err(1, "read");
+ if ((size_t)n < len)
+ errx(1, "unexpected short read");
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+
+ ns = (ts2.tv_sec - ts1.tv_sec) * 1000000000 +
+ (ts2.tv_nsec - ts1.tv_nsec);
+ printf("round trip time: %ld.%02ld us\n", ns / 1000,
+ (ns % 1000) / 10);
+ for (size_t j = 0; j < len; j++) {
+ if (buf[j] != 'b')
+ errx(1, "unexpected data");
+ }
+ }
+ } else /* if (mode == SERVER) */ {
+ int cs;
+
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &(int){1},
+ sizeof(int)) == -1)
+ err(1, "setsockopt");
+ if (bind(s, (struct sockaddr *)&ss, ss.ss_len) == -1)
+ err(1, "bind");
+ if (listen(s, 1) == -1)
+ err(1, "listen");
+
+ for (;;) {
+ ssize_t n;
+
+ cs = accept(s, NULL, NULL);
+ if (cs == -1)
+ err(1, "accept");
+
+ for (;;) {
+ n = read(cs, buf, sizeof(buf));
+ if (n < 0) {
+ if (errno == ECONNRESET)
+ break;
+ err(1, "read");
+ }
+ if (n == 0)
+ break;
+ memset(buf, 'b', n);
+ n = write(cs, buf, n);
+ if (n < 0)
+ err(1, "write");
+ }
+ (void)close(cs);
+ }
+ }
+
+ return (0);
+}
diff --git a/tools/tools/so_splice/proxy.c b/tools/tools/so_splice/proxy.c
new file mode 100644
index 000000000000..409a47225522
--- /dev/null
+++ b/tools/tools/so_splice/proxy.c
@@ -0,0 +1,451 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Klara, Inc.
+ */
+
+/*
+ * A simple TCP proxy. Listens on a local address until a connection appears,
+ * then opens a TCP connection to the target address and shuttles data between
+ * the two until one side closes its connection.
+ *
+ * For example:
+ *
+ * $ proxy -l 127.0.0.1:8080 www.example.com:80
+ *
+ * The -m flag selects the mode of the transfer. Specify "-m copy" to enable
+ * copying through userspace, and "-m splice" to use SO_SPLICE.
+ *
+ * The -L flag enables a loopback mode, wherein all data is additionally proxied
+ * through a loopback TCP connection. This exists mostly to help test a
+ * specific use-case in custom proxy software where we would like to use
+ * SO_SPLICE.
+ */
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <netdb.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct proxy_softc {
+ struct sockaddr_storage lss;
+ struct sockaddr_storage tss;
+ size_t bufsz;
+ enum proxy_mode { PROXY_MODE_COPY, PROXY_MODE_SPLICE } mode;
+ bool loopback;
+};
+
+static void
+usage(void)
+{
+ fprintf(stderr,
+"usage: proxy [-m copy|splice] [-s <buf-size>] [-L] -l <listen addr> <target addr>\n");
+ exit(1);
+}
+
+static void
+proxy_copy(struct proxy_softc *sc, int cs, int ts)
+{
+ struct kevent kev[2];
+ uint8_t *buf;
+ int kq;
+
+ kq = kqueue();
+ if (kq == -1)
+ err(1, "kqueue");
+
+ EV_SET(&kev[0], cs, EVFILT_READ, EV_ADD, 0, 0, (void *)(uintptr_t)ts);
+ EV_SET(&kev[1], ts, EVFILT_READ, EV_ADD, 0, 0, (void *)(uintptr_t)cs);
+ if (kevent(kq, kev, 2, NULL, 0, NULL) == -1)
+ err(1, "kevent");
+
+ buf = malloc(sc->bufsz);
+ if (buf == NULL)
+ err(1, "malloc");
+
+ for (;;) {
+ uint8_t *data;
+ ssize_t n, resid;
+ int rs, ws;
+
+ if (kevent(kq, NULL, 0, kev, 2, NULL) == -1) {
+ if (errno == EINTR)
+ continue;
+ err(1, "kevent");
+ }
+
+ rs = (int)kev[0].ident;
+ ws = (int)(uintptr_t)kev[0].udata;
+
+ n = read(rs, buf, sc->bufsz);
+ if (n == -1) {
+ if (errno == ECONNRESET)
+ break;
+ err(1, "read");
+ }
+ if (n == 0)
+ break;
+
+ data = buf;
+ resid = n;
+ do {
+ n = write(ws, data, resid);
+ if (n == -1) {
+ if (errno == EINTR)
+ continue;
+ if (errno == ECONNRESET || errno == EPIPE)
+ break;
+ err(1, "write");
+ }
+ assert(n > 0);
+ data += n;
+ resid -= n;
+ } while (resid > 0);
+ }
+
+ free(buf);
+ close(kq);
+}
+
+static void
+splice(int s1, int s2)
+{
+ struct splice sp;
+
+ memset(&sp, 0, sizeof(sp));
+ sp.sp_fd = s2;
+ if (setsockopt(s1, SOL_SOCKET, SO_SPLICE, &sp, sizeof(sp)) == -1)
+ err(1, "setsockopt");
+}
+
+static void
+proxy_splice(struct proxy_softc *sc __unused, int cs, int ts)
+{
+ struct kevent kev[2];
+ int error, kq;
+
+ /* Set up our splices. */
+ splice(cs, ts);
+ splice(ts, cs);
+
+ /* Block until the connection is terminated. */
+ kq = kqueue();
+ if (kq == -1)
+ err(1, "kqueue");
+ EV_SET(&kev[0], cs, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ EV_SET(&kev[1], ts, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ do {
+ error = kevent(kq, kev, 2, kev, 2, NULL);
+ if (error == -1 && errno != EINTR)
+ err(1, "kevent");
+ } while (error <= 0);
+
+ close(kq);
+}
+
+static void
+nodelay(int s)
+{
+ if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &(int){1}, sizeof(int)) ==
+ -1)
+ err(1, "setsockopt");
+}
+
+/*
+ * Like socketpair(2), but for TCP sockets on the loopback address.
+ */
+static void
+tcp_socketpair(int out[2], int af)
+{
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ struct sockaddr *sa;
+ int sd[2];
+
+ sd[0] = socket(af, SOCK_STREAM, 0);
+ if (sd[0] == -1)
+ err(1, "socket");
+ sd[1] = socket(af, SOCK_STREAM, 0);
+ if (sd[1] == -1)
+ err(1, "socket");
+
+ nodelay(sd[0]);
+ nodelay(sd[1]);
+
+ if (af == AF_INET) {
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ sin.sin_port = 0;
+ sin.sin_len = sizeof(sin);
+ sa = (struct sockaddr *)&sin;
+ } else if (af == AF_INET6) {
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = in6addr_loopback;
+ sin6.sin6_port = 0;
+ sin6.sin6_len = sizeof(sin6);
+ sa = (struct sockaddr *)&sin6;
+ } else {
+ errx(1, "unsupported address family %d", af);
+ }
+
+ if (bind(sd[0], sa, sa->sa_len) == -1)
+ err(1, "bind");
+ if (listen(sd[0], 1) == -1)
+ err(1, "listen");
+
+ if (getsockname(sd[0], sa, &(socklen_t){sa->sa_len}) == -1)
+ err(1, "getsockname");
+ if (connect(sd[1], sa, sa->sa_len) == -1)
+ err(1, "connect");
+
+ out[0] = sd[1];
+ out[1] = accept(sd[0], NULL, NULL);
+ if (out[1] == -1)
+ err(1, "accept");
+ close(sd[0]);
+}
+
+/*
+ * Proxy data between two connected TCP sockets. Returns the PID of the process
+ * forked off to handle the data transfer.
+ */
+static pid_t
+proxy(struct proxy_softc *sc, int s1, int s2)
+{
+ pid_t child;
+
+ child = fork();
+ if (child == -1)
+ err(1, "fork");
+ if (child != 0) {
+ close(s1);
+ close(s2);
+ return (child);
+ }
+
+ if (sc->mode == PROXY_MODE_COPY)
+ proxy_copy(sc, s1, s2);
+ else
+ proxy_splice(sc, s1, s2);
+ _exit(0);
+}
+
+/*
+ * The proxy event loop accepts connections and forks off child processes to
+ * handle them. We also handle events generated when child processes exit
+ * (triggered by one side closing its connection).
+ */
+static void
+eventloop(struct proxy_softc *sc)
+{
+ struct kevent kev;
+ int kq, lsd;
+ pid_t child;
+
+ lsd = socket(sc->lss.ss_family, SOCK_STREAM, 0);
+ if (lsd == -1)
+ err(1, "socket");
+ if (setsockopt(lsd, SOL_SOCKET, SO_REUSEADDR, &(int){1}, sizeof(int)) ==
+ -1)
+ err(1, "setsockopt");
+ if (bind(lsd, (struct sockaddr *)&sc->lss, sc->lss.ss_len) == -1)
+ err(1, "bind");
+ if (listen(lsd, 5) == -1)
+ err(1, "listen");
+
+ kq = kqueue();
+ if (kq == -1)
+ err(1, "kqueue");
+ EV_SET(&kev, lsd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1)
+ err(1, "kevent");
+
+ for (;;) {
+ if (kevent(kq, NULL, 0, &kev, 1, NULL) == -1) {
+ if (errno == EINTR)
+ continue;
+ err(1, "kevent");
+ }
+
+ switch (kev.filter) {
+ case EVFILT_READ: {
+ int s, ts;
+
+ if ((int)kev.ident != lsd)
+ errx(1, "unexpected event ident %d",
+ (int)kev.ident);
+
+ s = accept(lsd, NULL, NULL);
+ if (s == -1)
+ err(1, "accept");
+ nodelay(s);
+
+ ts = socket(sc->tss.ss_family, SOCK_STREAM, 0);
+ if (ts == -1)
+ err(1, "socket");
+ nodelay(ts);
+ if (connect(ts, (struct sockaddr *)&sc->tss,
+ sc->tss.ss_len) == -1)
+ err(1, "connect");
+
+ if (sc->loopback) {
+ int ls[2];
+
+ tcp_socketpair(ls, sc->tss.ss_family);
+ child = proxy(sc, ls[0], ts);
+ EV_SET(&kev, child, EVFILT_PROC, EV_ADD,
+ NOTE_EXIT, 0, NULL);
+ if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1)
+ err(1, "kevent");
+ child = proxy(sc, s, ls[1]);
+ EV_SET(&kev, child, EVFILT_PROC, EV_ADD,
+ NOTE_EXIT, 0, NULL);
+ if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1)
+ err(1, "kevent");
+ } else {
+ child = proxy(sc, s, ts);
+ EV_SET(&kev, child, EVFILT_PROC, EV_ADD,
+ NOTE_EXIT, 0, NULL);
+ if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1)
+ err(1, "kevent");
+ }
+
+ break;
+ }
+ case EVFILT_PROC: {
+ int status;
+
+ child = kev.ident;
+ status = (int)kev.data;
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) != 0) {
+ errx(1, "child exited with status %d",
+ WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ warnx("child %d terminated by signal %d",
+ (pid_t)kev.ident, WTERMSIG(status));
+ }
+ if (waitpid(child, NULL, 0) == -1)
+ err(1, "waitpid");
+ break;
+ }
+ }
+ }
+}
+
+static void
+addrinfo(struct sockaddr_storage *ss, const char *addr)
+{
+ struct addrinfo hints, *res, *res1;
+ char *host, *port;
+ int error;
+
+ host = strdup(addr);
+ if (host == NULL)
+ err(1, "strdup");
+ port = strchr(host, ':');
+ if (port == NULL)
+ errx(1, "invalid address '%s', should be <addr>:<port>", host);
+ *port++ = '\0';
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_socktype = SOCK_STREAM;
+ error = getaddrinfo(host, port, &hints, &res);
+ if (error != 0)
+ errx(1, "%s", gai_strerror(error));
+ for (res1 = res; res != NULL; res = res->ai_next) {
+ if (res->ai_protocol == IPPROTO_TCP) {
+ memcpy(ss, res->ai_addr, res->ai_addrlen);
+ break;
+ }
+ }
+ if (res == NULL)
+ errx(1, "no TCP address found for '%s'", host);
+ free(host);
+ freeaddrinfo(res1);
+}
+
+static void
+proxy_init(struct proxy_softc *sc, const char *laddr, const char *taddr,
+ size_t bufsz, enum proxy_mode mode, bool loopback)
+{
+ addrinfo(&sc->lss, laddr);
+ addrinfo(&sc->tss, taddr);
+
+ sc->bufsz = bufsz;
+ sc->mode = mode;
+ sc->loopback = loopback;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct proxy_softc sc;
+ char *laddr, *taddr;
+ size_t bufsz;
+ enum proxy_mode mode;
+ int ch;
+ bool loopback;
+
+ (void)signal(SIGPIPE, SIG_IGN);
+
+ loopback = false;
+ mode = PROXY_MODE_COPY;
+ bufsz = 2 * 1024 * 1024ul;
+ laddr = taddr = NULL;
+ while ((ch = getopt(argc, argv, "Ll:m:s:")) != -1) {
+ switch (ch) {
+ case 'l':
+ laddr = optarg;
+ break;
+ case 'L':
+ loopback = true;
+ break;
+ case 'm':
+ if (strcmp(optarg, "copy") == 0)
+ mode = PROXY_MODE_COPY;
+ else if (strcmp(optarg, "splice") == 0)
+ mode = PROXY_MODE_SPLICE;
+ else
+ usage();
+ break;
+ case 's':
+ bufsz = atoi(optarg);
+ break;
+ default:
+ usage();
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (laddr == NULL || argc != 1)
+ usage();
+ taddr = argv[0];
+
+ /* Marshal command-line parameters into a neat structure. */
+ proxy_init(&sc, laddr, taddr, bufsz, mode, loopback);
+
+ /* Start handling connections. */
+ eventloop(&sc);
+
+ return (0);
+}