git: a505b80b2f03 - main - ng_device: add kqueue support

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 27 Oct 2025 12:48:12 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=a505b80b2f038409300cd05e2ea95822942404e2

commit a505b80b2f038409300cd05e2ea95822942404e2
Author:     Quentin Thébault <quentin.thebault@defenso.fr>
AuthorDate: 2025-09-09 03:48:46 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-10-27 12:45:30 +0000

    ng_device: add kqueue support
    
    Signed-off-by: Quentin Thébault <quentin.thebault@defenso.fr>
    
    Sponsored by:   Defenso
    Reviewed by:    markj
    MFC after:      1 month
    Differential Revision:  https://reviews.freebsd.org/D52541
    Pull Request:   https://github.com/freebsd/freebsd-src/pull/1880
---
 sys/netgraph/ng_device.c | 126 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 3 deletions(-)

diff --git a/sys/netgraph/ng_device.c b/sys/netgraph/ng_device.c
index 582f877ff3ed..066e3be29694 100644
--- a/sys/netgraph/ng_device.c
+++ b/sys/netgraph/ng_device.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2002 Mark Santcroos <marks@ripe.net>
  * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2025 Quentin Thébault <quentin.thebault@defenso.fr>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -50,6 +51,7 @@
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
+#include <sys/selinfo.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/uio.h>
@@ -117,12 +119,15 @@ struct ngd_private {
 	struct	ng_node	*node;
 	struct	ng_hook	*hook;
 	struct	cdev	*ngddev;
+	struct  selinfo rsel;
+	struct  selinfo wsel;
 	struct	mtx	ngd_mtx;
 	int 		unit;
 	int		ether_align;
 	uint16_t	flags;
 #define	NGDF_OPEN	0x0001
 #define	NGDF_RWAIT	0x0002
+#define	NGDF_DYING	0x0004
 };
 typedef struct ngd_private *priv_p;
 
@@ -138,6 +143,24 @@ static d_read_t ngdread;
 static d_write_t ngdwrite;
 static d_ioctl_t ngdioctl;
 static d_poll_t ngdpoll;
+static d_kqfilter_t ngdkqfilter;
+
+static int      ngd_kqread_event(struct knote *, long);
+static int      ngd_kqwrite_event(struct knote *, long);
+static void     ngd_kqread_detach(struct knote *);
+static void     ngd_kqwrite_detach(struct knote *);
+
+static const struct filterops ngd_read_filterops = {
+	.f_isfd =   1,
+	.f_detach = ngd_kqread_detach,
+	.f_event =  ngd_kqread_event
+};
+
+static const struct filterops ngd_write_filterops = {
+	.f_isfd =   1,
+	.f_detach = ngd_kqwrite_detach,
+	.f_event =  ngd_kqwrite_event
+};
 
 static struct cdevsw ngd_cdevsw = {
 	.d_version =	D_VERSION,
@@ -146,6 +169,7 @@ static struct cdevsw ngd_cdevsw = {
 	.d_read =	ngdread,
 	.d_write =	ngdwrite,
 	.d_ioctl =	ngdioctl,
+	.d_kqfilter =   ngdkqfilter,
 	.d_poll =	ngdpoll,
 	.d_name =	NG_DEVICE_DEVNAME,
 };
@@ -198,6 +222,9 @@ ng_device_constructor(node_p node)
 	mtx_init(&priv->readq.ifq_mtx, "ng_device queue", NULL, MTX_DEF);
 	IFQ_SET_MAXLEN(&priv->readq, ifqmaxlen);
 
+	knlist_init_mtx(&priv->rsel.si_note, &priv->ngd_mtx);
+	knlist_init_mtx(&priv->wsel.si_note, &priv->ngd_mtx);
+
 	/* Link everything together */
 	NG_NODE_SET_PRIVATE(node, priv);
 	priv->node = node;
@@ -206,6 +233,8 @@ ng_device_constructor(node_p node)
 	    GID_WHEEL, 0600, NG_DEVICE_DEVNAME "%d", priv->unit);
 	if (priv->ngddev == NULL) {
 		printf("%s(): make_dev() failed\n", __func__);
+		knlist_destroy(&priv->rsel.si_note);
+		knlist_destroy(&priv->wsel.si_note);
 		mtx_destroy(&priv->ngd_mtx);
 		mtx_destroy(&priv->readq.ifq_mtx);
 		free_unr(ngd_unit, priv->unit);
@@ -319,6 +348,8 @@ ng_device_rcvdata(hook_p hook, item_p item)
 		priv->flags &= ~NGDF_RWAIT;
 		wakeup(priv);
 	}
+	selwakeup(&priv->rsel);
+	KNOTE_LOCKED(&priv->rsel.si_note, 0);
 	mtx_unlock(&priv->ngd_mtx);
 
 	return (0);
@@ -334,9 +365,22 @@ ng_device_disconnect(hook_p hook)
 
 	DBG;
 
+	mtx_lock(&priv->ngd_mtx);
+	priv->flags |= NGDF_DYING;
+	wakeup(priv);
+	mtx_unlock(&priv->ngd_mtx);
+
 	destroy_dev(priv->ngddev);
+
+	knlist_clear(&priv->rsel.si_note, 0);
+	knlist_clear(&priv->wsel.si_note, 0);
+	knlist_destroy(&priv->rsel.si_note);
+	knlist_destroy(&priv->wsel.si_note);
 	mtx_destroy(&priv->ngd_mtx);
 
+	seldrain(&priv->rsel);
+	seldrain(&priv->wsel);
+
 	IF_DRAIN(&priv->readq);
 	mtx_destroy(&(priv)->readq.ifq_mtx);
 
@@ -493,9 +537,13 @@ ngdread(struct cdev *dev, struct uio *uio, int flag)
 				return (EWOULDBLOCK);
 			mtx_lock(&priv->ngd_mtx);
 			priv->flags |= NGDF_RWAIT;
-			if ((error = msleep(priv, &priv->ngd_mtx,
-			    PDROP | PCATCH | PZERO,
-			    "ngdread", 0)) != 0)
+			if (priv->flags & NGDF_DYING) {
+				mtx_unlock(&priv->ngd_mtx);
+				error = ENXIO;
+			} else
+				error = mtx_sleep(priv, &priv->ngd_mtx,
+				    PDROP | PCATCH, "ngdread", 0);
+			if (error != 0)
 				return (error);
 		}
 	} while (m == NULL);
@@ -538,9 +586,12 @@ ngdwrite(struct cdev *dev, struct uio *uio, int flag)
 	if (m == NULL)
 		return (ENOBUFS);
 
+	/* Setting VNET is required if connecting to a ng_bridge. */
+	CURVNET_SET(priv->node->nd_vnet);
 	NET_EPOCH_ENTER(et);
 	NG_SEND_DATA_ONLY(error, priv->hook, m);
 	NET_EPOCH_EXIT(et);
+	CURVNET_RESTORE();
 
 	return (error);
 }
@@ -561,3 +612,72 @@ ngdpoll(struct cdev *dev, int events, struct thread *td)
 
 	return (revents);
 }
+
+static void
+ngd_kqread_detach(struct knote *kn)
+{
+	priv_p  priv = (priv_p)kn->kn_hook;
+
+	knlist_remove(&priv->rsel.si_note, kn, 0);
+}
+
+static int
+ngd_kqread_event(struct knote *kn, long hint)
+{
+	priv_p priv = (priv_p)kn->kn_hook;
+	struct mbuf *m;
+
+	IFQ_LOCK(&priv->readq);
+	if (IFQ_IS_EMPTY(&priv->readq)) {
+		kn->kn_data = 0;
+	} else {
+		/*
+		 * Since the queue does not store the total number of bytes that
+		 * could be read across all packets and we do not want to
+		 * traverse the whole queue, we only report the number of bytes
+		 * for the first packet in the queue.
+		 */
+		IF_POLL(&priv->readq, m);
+		kn->kn_data = m->m_len;
+	}
+	IFQ_UNLOCK(&priv->readq);
+
+	return (kn->kn_data > 0);
+}
+
+static void
+ngd_kqwrite_detach(struct knote *kn)
+{
+	priv_p  priv = (priv_p)kn->kn_hook;
+
+	knlist_remove(&priv->wsel.si_note, kn, 0);
+}
+
+static int
+ngd_kqwrite_event(struct knote *kn, long hint)
+{
+	kn->kn_data = IP_MAXPACKET;
+
+	return (1);
+}
+
+static int
+ngdkqfilter(struct cdev *dev, struct knote *kn)
+{
+	priv_p priv = (priv_p)dev->si_drv1;
+
+	switch (kn->kn_filter) {
+	case EVFILT_READ:
+		kn->kn_fop = &ngd_read_filterops;
+		kn->kn_hook = priv;
+		knlist_add(&priv->rsel.si_note, kn, 0);
+		return (0);
+	case EVFILT_WRITE:
+		kn->kn_fop = &ngd_write_filterops;
+		kn->kn_hook = priv;
+		knlist_add(&priv->wsel.si_note, kn, 0);
+		return (0);
+	default:
+		return (EINVAL);
+	}
+}