git: 3b263fa76c08 - main - if_tuntap: Try to fix device refcount bugs
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 28 Jul 2025 16:19:49 UTC
The branch main has been updated by markj:
URL: https://cgit.FreeBSD.org/src/commit/?id=3b263fa76c08993f37ce94cb7cd4605f67dd5965
commit 3b263fa76c08993f37ce94cb7cd4605f67dd5965
Author: Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-07-28 16:03:20 +0000
Commit: Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-07-28 16:19:38 +0000
if_tuntap: Try to fix device refcount bugs
There are two ways to create a tun device, via devfs cloning or with
ifconfig. The latter is implemented by tun_clone_create() and the
former by tunclone(), which invokes tun_clone_create() via
if_clone_create(). Both of these functions were invoking dev_ref()
after creating the devfs_node(), but this was extraneous. tunclone()
does need to acquire an extra reference since this is required by the
dev_clone EVENTHANDLER interface contract, but it was already doing so
by specifying MAKEDEV_REF. Fix this by removing unnecessary refcount
acquisitions.
A second problem is with teardown in a VNET jail. A tun interface
created by device cloning will hold a credential reference for the jail,
which prevents it from being destroyed and in particular prevents VNET
SYSUNINITs from running. To fix this, we need to register a
PR_METHOD_REMOVE callback for jail teardown which, in a VNET jail,
destroys cloned interfaces. This way, jail teardown can proceed.
These bugs are noticeable with something like
# jail -c name=test vnet command=ls /dev/tun
# jls -vd
While here, add some comments and be sure to destroy a nascent mutex and
condition variable in an error path.
Reviewed by: kib
MFC after: 1 month
Sponsored by: Stormshield
Sponsored by: Klara, Inc.
Differential Revision: https://reviews.freebsd.org/D51525
---
sys/net/if_tuntap.c | 76 ++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 58 insertions(+), 18 deletions(-)
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
index 3bab04aa4d38..5e6f65c04b2f 100644
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -74,6 +74,7 @@
#include <sys/malloc.h>
#include <sys/random.h>
#include <sys/ctype.h>
+#include <sys/osd.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -178,6 +179,7 @@ struct tuntap_softc {
static struct mtx tunmtx;
static eventhandler_tag arrival_tag;
static eventhandler_tag clone_tag;
+static int tuntap_osd_jail_slot;
static const char tunname[] = "tun";
static const char tapname[] = "tap";
static const char vmnetname[] = "vmnet";
@@ -497,6 +499,10 @@ vmnet_clone_match(struct if_clone *ifc, const char *name)
return (0);
}
+/*
+ * Create a clone via the ifnet cloning mechanism. Note that this is invoked
+ * indirectly by tunclone() below.
+ */
static int
tun_clone_create(struct if_clone *ifc, char *name, size_t len,
struct ifc_data *ifd, struct ifnet **ifpp)
@@ -532,15 +538,19 @@ tun_clone_create(struct if_clone *ifc, char *name, size_t len,
if (i != 0)
i = tun_create_device(drv, unit, NULL, &dev, name);
if (i == 0) {
- dev_ref(dev);
+ struct tuntap_softc *tp;
+
tuncreate(dev);
- struct tuntap_softc *tp = dev->si_drv1;
+ tp = dev->si_drv1;
*ifpp = tp->tun_ifp;
}
return (i);
}
+/*
+ * Create a clone via devfs access.
+ */
static void
tunclone(void *arg, struct ucred *cred, char *name, int namelen,
struct cdev **dev)
@@ -595,11 +605,12 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
}
i = tun_create_device(drv, u, cred, dev, name);
- }
- if (i == 0) {
+ } else {
+ /* Consumed by the dev_clone invoker. */
dev_ref(*dev);
- if_clone_create(name, namelen, NULL);
}
+ if (i == 0)
+ if_clone_create(name, namelen, NULL);
out:
CURVNET_RESTORE();
}
@@ -669,16 +680,6 @@ vnet_tun_init(const void *unused __unused)
VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
vnet_tun_init, NULL);
-static void
-vnet_tun_uninit(const void *unused __unused)
-{
-
- for (u_int i = 0; i < NDRV; ++i)
- if_clone_detach(V_tuntap_driver_cloners[i]);
-}
-VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
- vnet_tun_uninit, NULL);
-
static void
tun_uninit(const void *unused __unused)
{
@@ -689,6 +690,16 @@ tun_uninit(const void *unused __unused)
EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
+ CURVNET_SET(vnet0);
+ for (u_int i = 0; i < NDRV; i++) {
+ if_clone_detach(V_tuntap_driver_cloners[i]);
+ V_tuntap_driver_cloners[i] = NULL;
+ }
+ CURVNET_RESTORE();
+
+ if (tuntap_osd_jail_slot != 0)
+ osd_jail_deregister(tuntap_osd_jail_slot);
+
mtx_lock(&tunmtx);
while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
TAILQ_REMOVE(&tunhead, tp, tun_list);
@@ -724,6 +735,30 @@ tuntap_driver_from_ifnet(const struct ifnet *ifp)
return (NULL);
}
+/*
+ * Remove devices that were created by devfs cloning, as they hold references
+ * which prevent the prison from collapsing, in which state VNET sysuninits will
+ * not be invoked.
+ */
+static int
+tuntap_prison_remove(void *obj, void *data __unused)
+{
+#ifdef VIMAGE
+ struct prison *pr;
+
+ pr = obj;
+ if (prison_owns_vnet(pr)) {
+ CURVNET_SET(pr->pr_vnet);
+ for (u_int i = 0; i < NDRV; i++) {
+ if_clone_detach(V_tuntap_driver_cloners[i]);
+ V_tuntap_driver_cloners[i] = NULL;
+ }
+ CURVNET_RESTORE();
+ }
+#endif
+ return (0);
+}
+
static int
tuntapmodevent(module_t mod, int type, void *data)
{
@@ -738,8 +773,12 @@ tuntapmodevent(module_t mod, int type, void *data)
clone_setup(&drv->clones);
drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
}
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_REMOVE] = tuntap_prison_remove,
+ };
+ tuntap_osd_jail_slot = osd_jail_register(NULL, methods);
arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
- tunrename, 0, 1000);
+ tunrename, 0, 1000);
if (arrival_tag == NULL)
return (ENOMEM);
clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
@@ -747,7 +786,7 @@ tuntapmodevent(module_t mod, int type, void *data)
return (ENOMEM);
break;
case MOD_UNLOAD:
- /* See tun_uninit, so it's done after the vnet_sysuninit() */
+ /* See tun_uninit(). */
break;
default:
return EOPNOTSUPP;
@@ -798,6 +837,8 @@ tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
args.mda_si_drv1 = tp;
error = make_dev_s(&args, dev, "%s", name);
if (error != 0) {
+ mtx_destroy(&tp->tun_mtx);
+ cv_destroy(&tp->tun_cv);
free(tp, M_TUN);
return (error);
}
@@ -914,7 +955,6 @@ tap_transmit(struct ifnet *ifp, struct mbuf *m)
return (error);
}
-/* XXX: should return an error code so it can fail. */
static void
tuncreate(struct cdev *dev)
{