Re: git: 1092ec8b3375 - main - kern: Introduce RLIMIT_VMM
- In reply to: FreeBSD User : "Re: git: 1092ec8b3375 - main - kern: Introduce RLIMIT_VMM"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 17 Dec 2025 19:47:11 UTC
In message <20251217190348.7df7c756@hermann>, FreeBSD User writes:
> On Wed, 17 Dec 2025 14:08:55 +0000
> Bojan Novkovi=C4=87 <bnovkov@FreeBSD.org> wrote:
>
> > The branch main has been updated by bnovkov:
> >=20
> > URL:
> > https://cgit.FreeBSD.org/src/commit/?id=3D1092ec8b337595ed8d52accf41c6904=
> d75b3689d
> >=20
> > commit 1092ec8b337595ed8d52accf41c6904d75b3689d
> > Author: Bojan Novkovi=C4=87 <bnovkov@FreeBSD.org>
> > AuthorDate: 2025-11-07 13:11:03 +0000
> > Commit: Bojan Novkovi=C4=87 <bnovkov@FreeBSD.org>
> > CommitDate: 2025-12-17 14:08:31 +0000
> >=20
> > kern: Introduce RLIMIT_VMM
> > =20
> > This change introduces a new per-UID limit for controlling the
> > number of vmm instances, in anticipation of unprivileged bhyve.
> > This allows ut to limit the amount of kernel memory allocated
> > by the vmm driver and prevent potential memory exhaustion attacks.
> > =20
> > Differential Revision: https://reviews.freebsd.org/D53728
> > Reviewed by: markj, olce, corvink
> > MFC after: 3 months
> > Sponsored by: The FreeBSD Foundation
> > Sponsored by: Klara, Inc.
> > ---
> > sys/dev/vmm/vmm_dev.c | 18 +++++++++++++++---
> > sys/kern/kern_resource.c | 13 +++++++++++++
> > sys/sys/resource.h | 4 +++-
> > sys/sys/resourcevar.h | 2 ++
> > usr.bin/procstat/procstat_rlimit.c | 1 +
> > 5 files changed, 34 insertions(+), 4 deletions(-)
> >=20
> > diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
> > index d6543bf6534e..3a86a8f966ef 100644
> > --- a/sys/dev/vmm/vmm_dev.c
> > +++ b/sys/dev/vmm/vmm_dev.c
> > @@ -18,6 +18,7 @@
> > #include <sys/priv.h>
> > #include <sys/proc.h>
> > #include <sys/queue.h>
> > +#include <sys/resourcevar.h>
> > #include <sys/smp.h>
> > #include <sys/sx.h>
> > #include <sys/sysctl.h>
> > @@ -96,6 +97,10 @@ u_int vm_maxcpu;
> > SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
> > &vm_maxcpu, 0, "Maximum number of vCPUs");
> > =20
> > +u_int vm_maxvmms;
> > +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
> > + &vm_maxvmms, 0, "Maximum number of VMM instances per user");
> > +
> > static void devmem_destroy(void *arg);
> > static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *dev=
> mem);
> > =20
> > @@ -870,6 +875,7 @@ vmmdev_destroy(struct vmmdev_softc *sc)
> > int error __diagused;
> > =20
> > KASSERT(sc->cdev =3D=3D NULL, ("%s: cdev not free", __func__));
> > + KASSERT(sc->ucred !=3D NULL, ("%s: missing ucred", __func__));
> > =20
> > /*
> > * Destroy all cdevs:
> > @@ -898,8 +904,8 @@ vmmdev_destroy(struct vmmdev_softc *sc)
> > if (sc->vm !=3D NULL)
> > vm_destroy(sc->vm);
> > =20
> > - if (sc->ucred !=3D NULL)
> > - crfree(sc->ucred);
> > + chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
> > + crfree(sc->ucred);
> > =20
> > sx_xlock(&vmmdev_mtx);
> > SLIST_REMOVE(&head, sc, vmmdev_softc, link);
> > @@ -1021,6 +1027,12 @@ vmmdev_create(const char *name, struct ucred *cred)
> > vmmdev_destroy(sc);
> > return (error);
> > }
> > + if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
> > + sx_xunlock(&vmmdev_mtx);
> > + destroy_dev(cdev);
> > + vmmdev_destroy(sc);
> > + return (ENOMEM);
> > + }
> > sc->cdev =3D cdev;
> > sx_xunlock(&vmmdev_mtx);
> > return (0);
> > @@ -1172,7 +1184,7 @@ vmm_handler(module_t mod, int what, void *arg)
> > }
> > if (vm_maxcpu =3D=3D 0)
> > vm_maxcpu =3D 1;
> > -
> > + vm_maxvmms =3D 4 * mp_ncpus;
> > error =3D vmm_modinit();
> > if (error =3D=3D 0)
> > vmm_initialized =3D true;
> > diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
> > index dcd38c6e6fbe..31f89bd41f6d 100644
> > --- a/sys/kern/kern_resource.c
> > +++ b/sys/kern/kern_resource.c
> > @@ -895,6 +895,9 @@ getrlimitusage_one(struct proc *p, u_int which, int
> > flags, rlim_t *res) case RLIMIT_PIPEBUF:
> > *res =3D ui->ui_pipecnt;
> > break;
> > + case RLIMIT_VMM:
> > + *res =3D ui->ui_vmmcnt;
> > + break;
> > default:
> > error =3D EINVAL;
> > break;
> > @@ -1643,6 +1646,9 @@ uifree(struct uidinfo *uip)
> > if (uip->ui_inotifywatchcnt !=3D 0)
> > printf("freeing uidinfo: uid =3D %d, inotifywatchcnt =3D %ld\n"
> ,
> > uip->ui_uid, uip->ui_inotifywatchcnt);
> > + if (uip->ui_vmmcnt !=3D 0)
> > + printf("freeing vmmcnt: uid =3D %d, vmmcnt =3D %ld\n",
> > + uip->ui_uid, uip->ui_vmmcnt);
> > free(uip, M_UIDINFO);
> > }
> > =20
> > @@ -1763,6 +1769,13 @@ chginotifywatchcnt(struct uidinfo *uip, int diff,
> > rlim_t max) "inotifywatchcnt"));
> > }
> > =20
> > +int
> > +chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max)
> > +{
> > +
> > + return (chglimit(uip, &uip->ui_vmmcnt, diff, max, "vmmcnt"));
> > +}
> > +
> > static int
> > sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS)
> > {
> > diff --git a/sys/sys/resource.h b/sys/sys/resource.h
> > index 2725aa1ef646..9e0635cdb328 100644
> > --- a/sys/sys/resource.h
> > +++ b/sys/sys/resource.h
> > @@ -115,8 +115,9 @@ struct __wrusage {
> > #define RLIMIT_KQUEUES 13 /* kqueues allocated
> > */ #define RLIMIT_UMTXP 14 /* process-shared
> > umtx */ #define RLIMIT_PIPEBUF 15 /* pipes/fifos
> > buffers */ +#define RLIMIT_VMM 16 /* virtual
> > machines */=20
> > -#define RLIM_NLIMITS 16 /* number of resource
> > limits */ +#define RLIM_NLIMITS 17 /* number of
> > resource limits */=20
> > #define RLIM_INFINITY ((rlim_t)(((__uint64_t)1 << 63) - 1))
> > #define RLIM_SAVED_MAX RLIM_INFINITY
> > @@ -144,6 +145,7 @@ static const char *rlimit_ident[] =3D {
> > "kqueues",
> > "umtx",
> > "pipebuf",
> > + "vmm",
> > };
> > #endif
> > =20
> > diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
> > index 61411890c85b..d5c4561eec66 100644
> > --- a/sys/sys/resourcevar.h
> > +++ b/sys/sys/resourcevar.h
> > @@ -124,6 +124,7 @@ struct uidinfo {
> > long ui_pipecnt; /* (b) consumption of pipe
> > buffers */ long ui_inotifycnt; /* (b) number of inotify
> > descriptors */ long ui_inotifywatchcnt; /* (b) number of
> > inotify watches */
> > + long ui_vmmcnt; /* (b) number of vmm instances
> > */ uid_t ui_uid; /* (a) uid */
> > u_int ui_ref; /* (b) reference count */
> > #ifdef RACCT
> > @@ -148,6 +149,7 @@ int chgumtxcnt(struct uidinfo *uip, int diff,
> > rlim_t maxval); int chgpipecnt(struct uidinfo *uip, int diff, rlim_t
> > max); int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval);
> > int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxva
> l);
> > +int chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max);
> > int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int
> > which, struct rlimit *limp);
> > struct plimit
> > diff --git a/usr.bin/procstat/procstat_rlimit.c
> > b/usr.bin/procstat/procstat_rlimit.c index c34550295f05..f3132758e005 100=
> 644
> > --- a/usr.bin/procstat/procstat_rlimit.c
> > +++ b/usr.bin/procstat/procstat_rlimit.c
> > @@ -64,6 +64,7 @@ static struct {
> > {"kqueues", " "},
> > {"umtxp", " "},
> > {"pipebuf", "B "},
> > + {"virtual-machines", " "},
> > };
> > =20
> > _Static_assert(nitems(rlimit_param) =3D=3D RLIM_NLIMITS,
> >=20
>
> After this commit - probably - my kernel pollutes the console with lots of=
> =20
>
> pid XXXXX (limits), jid 0, uid 0: exited on signal 11 (core dumped)
>
> Especially bind920 (named) is failing immediately, surprisingly the mouse i=
> sn't
> working anymore and so on ... ssgd is also dying on startup - no access
> possible.
>
> Environment: customized kernel (especially commenting out unused driver and
> ZFS/IPFW in-kernel).
>
> Kind regards,
>
> oh
>
> =20
>
I have a local patch that addresses this, at least partially. I don't know
if there are other places that also need attention in the tree. Nor is this
anywhere near ready for commit but it should get you going for now.
Maybe someone else can clean it up and commit it.