Re: git: 1092ec8b3375 - main - kern: Introduce RLIMIT_VMM

From: Cy Schubert <Cy.Schubert_at_cschubert.com>
Date: Wed, 17 Dec 2025 19:47:11 UTC
In message <20251217190348.7df7c756@hermann>, FreeBSD User writes:
> On Wed, 17 Dec 2025 14:08:55 +0000
> Bojan Novkovi=C4=87 <bnovkov@FreeBSD.org> wrote:
>
> > The branch main has been updated by bnovkov:
> >=20
> > URL:
> > https://cgit.FreeBSD.org/src/commit/?id=3D1092ec8b337595ed8d52accf41c6904=
> d75b3689d
> >=20
> > commit 1092ec8b337595ed8d52accf41c6904d75b3689d
> > Author:     Bojan Novkovi=C4=87 <bnovkov@FreeBSD.org>
> > AuthorDate: 2025-11-07 13:11:03 +0000
> > Commit:     Bojan Novkovi=C4=87 <bnovkov@FreeBSD.org>
> > CommitDate: 2025-12-17 14:08:31 +0000
> >=20
> >     kern: Introduce RLIMIT_VMM
> >    =20
> >     This change introduces a new per-UID limit for controlling the
> >     number of vmm instances, in anticipation of unprivileged bhyve.
> >     This allows ut to limit the amount of kernel memory allocated
> >     by the vmm driver and prevent potential memory exhaustion attacks.
> >    =20
> >     Differential Revision:  https://reviews.freebsd.org/D53728
> >     Reviewed by:    markj, olce, corvink
> >     MFC after:      3 months
> >     Sponsored by:   The FreeBSD Foundation
> >     Sponsored by:   Klara, Inc.
> > ---
> >  sys/dev/vmm/vmm_dev.c              | 18 +++++++++++++++---
> >  sys/kern/kern_resource.c           | 13 +++++++++++++
> >  sys/sys/resource.h                 |  4 +++-
> >  sys/sys/resourcevar.h              |  2 ++
> >  usr.bin/procstat/procstat_rlimit.c |  1 +
> >  5 files changed, 34 insertions(+), 4 deletions(-)
> >=20
> > diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
> > index d6543bf6534e..3a86a8f966ef 100644
> > --- a/sys/dev/vmm/vmm_dev.c
> > +++ b/sys/dev/vmm/vmm_dev.c
> > @@ -18,6 +18,7 @@
> >  #include <sys/priv.h>
> >  #include <sys/proc.h>
> >  #include <sys/queue.h>
> > +#include <sys/resourcevar.h>
> >  #include <sys/smp.h>
> >  #include <sys/sx.h>
> >  #include <sys/sysctl.h>
> > @@ -96,6 +97,10 @@ u_int vm_maxcpu;
> >  SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
> >      &vm_maxcpu, 0, "Maximum number of vCPUs");
> > =20
> > +u_int vm_maxvmms;
> > +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
> > +    &vm_maxvmms, 0, "Maximum number of VMM instances per user");
> > +
> >  static void devmem_destroy(void *arg);
> >  static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *dev=
> mem);
> > =20
> > @@ -870,6 +875,7 @@ vmmdev_destroy(struct vmmdev_softc *sc)
> >  	int error __diagused;
> > =20
> >  	KASSERT(sc->cdev =3D=3D NULL, ("%s: cdev not free", __func__));
> > +	KASSERT(sc->ucred !=3D NULL, ("%s: missing ucred", __func__));
> > =20
> >  	/*
> >  	 * Destroy all cdevs:
> > @@ -898,8 +904,8 @@ vmmdev_destroy(struct vmmdev_softc *sc)
> >  	if (sc->vm !=3D NULL)
> >  		vm_destroy(sc->vm);
> > =20
> > -	if (sc->ucred !=3D NULL)
> > -		crfree(sc->ucred);
> > +	chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
> > +	crfree(sc->ucred);
> > =20
> >  	sx_xlock(&vmmdev_mtx);
> >  	SLIST_REMOVE(&head, sc, vmmdev_softc, link);
> > @@ -1021,6 +1027,12 @@ vmmdev_create(const char *name, struct ucred *cred)
> >  		vmmdev_destroy(sc);
> >  		return (error);
> >  	}
> > +	if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
> > +		sx_xunlock(&vmmdev_mtx);
> > +		destroy_dev(cdev);
> > +		vmmdev_destroy(sc);
> > +		return (ENOMEM);
> > +	}
> >  	sc->cdev =3D cdev;
> >  	sx_xunlock(&vmmdev_mtx);
> >  	return (0);
> > @@ -1172,7 +1184,7 @@ vmm_handler(module_t mod, int what, void *arg)
> >  		}
> >  		if (vm_maxcpu =3D=3D 0)
> >  			vm_maxcpu =3D 1;
> > -
> > +		vm_maxvmms =3D 4 * mp_ncpus;
> >  		error =3D vmm_modinit();
> >  		if (error =3D=3D 0)
> >  			vmm_initialized =3D true;
> > diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
> > index dcd38c6e6fbe..31f89bd41f6d 100644
> > --- a/sys/kern/kern_resource.c
> > +++ b/sys/kern/kern_resource.c
> > @@ -895,6 +895,9 @@ getrlimitusage_one(struct proc *p, u_int which, int
> > flags, rlim_t *res) case RLIMIT_PIPEBUF:
> >  		*res =3D ui->ui_pipecnt;
> >  		break;
> > +	case RLIMIT_VMM:
> > +		*res =3D ui->ui_vmmcnt;
> > +		break;
> >  	default:
> >  		error =3D EINVAL;
> >  		break;
> > @@ -1643,6 +1646,9 @@ uifree(struct uidinfo *uip)
> >  	if (uip->ui_inotifywatchcnt !=3D 0)
> >  		printf("freeing uidinfo: uid =3D %d, inotifywatchcnt =3D %ld\n"
> ,
> >  		    uip->ui_uid, uip->ui_inotifywatchcnt);
> > +	if (uip->ui_vmmcnt !=3D 0)
> > +		printf("freeing vmmcnt: uid =3D %d, vmmcnt =3D %ld\n",
> > +		    uip->ui_uid, uip->ui_vmmcnt);
> >  	free(uip, M_UIDINFO);
> >  }
> > =20
> > @@ -1763,6 +1769,13 @@ chginotifywatchcnt(struct uidinfo *uip, int diff,
> > rlim_t max) "inotifywatchcnt"));
> >  }
> > =20
> > +int
> > +chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max)
> > +{
> > +
> > +	return (chglimit(uip, &uip->ui_vmmcnt, diff, max, "vmmcnt"));
> > +}
> > +
> >  static int
> >  sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS)
> >  {
> > diff --git a/sys/sys/resource.h b/sys/sys/resource.h
> > index 2725aa1ef646..9e0635cdb328 100644
> > --- a/sys/sys/resource.h
> > +++ b/sys/sys/resource.h
> > @@ -115,8 +115,9 @@ struct __wrusage {
> >  #define	RLIMIT_KQUEUES	13		/* kqueues allocated
> > */ #define	RLIMIT_UMTXP	14		/* process-shared
> > umtx */ #define	RLIMIT_PIPEBUF	15		/* pipes/fifos
> > buffers */ +#define	RLIMIT_VMM	16		/* virtual
> > machines */=20
> > -#define	RLIM_NLIMITS	16		/* number of resource
> > limits */ +#define	RLIM_NLIMITS	17		/* number of
> > resource limits */=20
> >  #define	RLIM_INFINITY	((rlim_t)(((__uint64_t)1 << 63) - 1))
> >  #define	RLIM_SAVED_MAX	RLIM_INFINITY
> > @@ -144,6 +145,7 @@ static const char *rlimit_ident[] =3D {
> >  	"kqueues",
> >  	"umtx",
> >  	"pipebuf",
> > +	"vmm",
> >  };
> >  #endif
> > =20
> > diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
> > index 61411890c85b..d5c4561eec66 100644
> > --- a/sys/sys/resourcevar.h
> > +++ b/sys/sys/resourcevar.h
> > @@ -124,6 +124,7 @@ struct uidinfo {
> >  	long	ui_pipecnt;		/* (b) consumption of pipe
> > buffers */ long	ui_inotifycnt;		/* (b) number of inotify
> > descriptors */ long	ui_inotifywatchcnt;	/* (b) number of
> > inotify watches */
> > +	long	ui_vmmcnt;		/* (b) number of vmm instances
> > */ uid_t	ui_uid;			/* (a) uid */
> >  	u_int	ui_ref;			/* (b) reference count */
> >  #ifdef	RACCT
> > @@ -148,6 +149,7 @@ int	 chgumtxcnt(struct uidinfo *uip, int diff,
> > rlim_t maxval); int	 chgpipecnt(struct uidinfo *uip, int diff, rlim_t
> > max); int	 chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval);
> >  int	 chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxva
> l);
> > +int	 chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max);
> >  int	 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int
> > which, struct rlimit *limp);
> >  struct plimit
> > diff --git a/usr.bin/procstat/procstat_rlimit.c
> > b/usr.bin/procstat/procstat_rlimit.c index c34550295f05..f3132758e005 100=
> 644
> > --- a/usr.bin/procstat/procstat_rlimit.c
> > +++ b/usr.bin/procstat/procstat_rlimit.c
> > @@ -64,6 +64,7 @@ static struct {
> >  	{"kqueues",          "   "},
> >  	{"umtxp",            "   "},
> >  	{"pipebuf",          "B  "},
> > +	{"virtual-machines", "   "},
> >  };
> > =20
> >  _Static_assert(nitems(rlimit_param) =3D=3D RLIM_NLIMITS,
> >=20
>
> After this commit - probably - my kernel pollutes the console with lots of=
> =20
>
> pid XXXXX (limits), jid 0, uid 0: exited on signal 11 (core dumped)
>
> Especially bind920 (named) is failing immediately, surprisingly the mouse i=
> sn't
> working anymore and so on ... ssgd is also dying on startup - no access
> possible.
>
> Environment: customized kernel (especially commenting out unused driver and
> ZFS/IPFW in-kernel).
>
> Kind regards,
>
> oh
>
> =20
>

I have a local patch that addresses this, at least partially. I don't know 
if there are other places that also need attention in the tree. Nor is this 
anywhere near ready for commit but it should get you going for now.

Maybe someone else can clean it up and commit it.