Re: git: 1092ec8b3375 - main - kern: Introduce RLIMIT_VMM
- Reply: Dag-Erling_Smørgrav : "Re: git: 1092ec8b3375 - main - kern: Introduce RLIMIT_VMM"
- Reply: Cy Schubert : "Re: git: 1092ec8b3375 - main - kern: Introduce RLIMIT_VMM"
- In reply to: Bojan Novković : "git: 1092ec8b3375 - main - kern: Introduce RLIMIT_VMM"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 17 Dec 2025 18:03:48 UTC
On Wed, 17 Dec 2025 14:08:55 +0000
Bojan Novković <bnovkov@FreeBSD.org> wrote:
> The branch main has been updated by bnovkov:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=1092ec8b337595ed8d52accf41c6904d75b3689d
>
> commit 1092ec8b337595ed8d52accf41c6904d75b3689d
> Author: Bojan Novković <bnovkov@FreeBSD.org>
> AuthorDate: 2025-11-07 13:11:03 +0000
> Commit: Bojan Novković <bnovkov@FreeBSD.org>
> CommitDate: 2025-12-17 14:08:31 +0000
>
> kern: Introduce RLIMIT_VMM
>
> This change introduces a new per-UID limit for controlling the
> number of vmm instances, in anticipation of unprivileged bhyve.
> This allows ut to limit the amount of kernel memory allocated
> by the vmm driver and prevent potential memory exhaustion attacks.
>
> Differential Revision: https://reviews.freebsd.org/D53728
> Reviewed by: markj, olce, corvink
> MFC after: 3 months
> Sponsored by: The FreeBSD Foundation
> Sponsored by: Klara, Inc.
> ---
> sys/dev/vmm/vmm_dev.c | 18 +++++++++++++++---
> sys/kern/kern_resource.c | 13 +++++++++++++
> sys/sys/resource.h | 4 +++-
> sys/sys/resourcevar.h | 2 ++
> usr.bin/procstat/procstat_rlimit.c | 1 +
> 5 files changed, 34 insertions(+), 4 deletions(-)
>
> diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
> index d6543bf6534e..3a86a8f966ef 100644
> --- a/sys/dev/vmm/vmm_dev.c
> +++ b/sys/dev/vmm/vmm_dev.c
> @@ -18,6 +18,7 @@
> #include <sys/priv.h>
> #include <sys/proc.h>
> #include <sys/queue.h>
> +#include <sys/resourcevar.h>
> #include <sys/smp.h>
> #include <sys/sx.h>
> #include <sys/sysctl.h>
> @@ -96,6 +97,10 @@ u_int vm_maxcpu;
> SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
> &vm_maxcpu, 0, "Maximum number of vCPUs");
>
> +u_int vm_maxvmms;
> +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
> + &vm_maxvmms, 0, "Maximum number of VMM instances per user");
> +
> static void devmem_destroy(void *arg);
> static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
>
> @@ -870,6 +875,7 @@ vmmdev_destroy(struct vmmdev_softc *sc)
> int error __diagused;
>
> KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
> + KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__));
>
> /*
> * Destroy all cdevs:
> @@ -898,8 +904,8 @@ vmmdev_destroy(struct vmmdev_softc *sc)
> if (sc->vm != NULL)
> vm_destroy(sc->vm);
>
> - if (sc->ucred != NULL)
> - crfree(sc->ucred);
> + chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
> + crfree(sc->ucred);
>
> sx_xlock(&vmmdev_mtx);
> SLIST_REMOVE(&head, sc, vmmdev_softc, link);
> @@ -1021,6 +1027,12 @@ vmmdev_create(const char *name, struct ucred *cred)
> vmmdev_destroy(sc);
> return (error);
> }
> + if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
> + sx_xunlock(&vmmdev_mtx);
> + destroy_dev(cdev);
> + vmmdev_destroy(sc);
> + return (ENOMEM);
> + }
> sc->cdev = cdev;
> sx_xunlock(&vmmdev_mtx);
> return (0);
> @@ -1172,7 +1184,7 @@ vmm_handler(module_t mod, int what, void *arg)
> }
> if (vm_maxcpu == 0)
> vm_maxcpu = 1;
> -
> + vm_maxvmms = 4 * mp_ncpus;
> error = vmm_modinit();
> if (error == 0)
> vmm_initialized = true;
> diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
> index dcd38c6e6fbe..31f89bd41f6d 100644
> --- a/sys/kern/kern_resource.c
> +++ b/sys/kern/kern_resource.c
> @@ -895,6 +895,9 @@ getrlimitusage_one(struct proc *p, u_int which, int
> flags, rlim_t *res) case RLIMIT_PIPEBUF:
> *res = ui->ui_pipecnt;
> break;
> + case RLIMIT_VMM:
> + *res = ui->ui_vmmcnt;
> + break;
> default:
> error = EINVAL;
> break;
> @@ -1643,6 +1646,9 @@ uifree(struct uidinfo *uip)
> if (uip->ui_inotifywatchcnt != 0)
> printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n",
> uip->ui_uid, uip->ui_inotifywatchcnt);
> + if (uip->ui_vmmcnt != 0)
> + printf("freeing vmmcnt: uid = %d, vmmcnt = %ld\n",
> + uip->ui_uid, uip->ui_vmmcnt);
> free(uip, M_UIDINFO);
> }
>
> @@ -1763,6 +1769,13 @@ chginotifywatchcnt(struct uidinfo *uip, int diff,
> rlim_t max) "inotifywatchcnt"));
> }
>
> +int
> +chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max)
> +{
> +
> + return (chglimit(uip, &uip->ui_vmmcnt, diff, max, "vmmcnt"));
> +}
> +
> static int
> sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS)
> {
> diff --git a/sys/sys/resource.h b/sys/sys/resource.h
> index 2725aa1ef646..9e0635cdb328 100644
> --- a/sys/sys/resource.h
> +++ b/sys/sys/resource.h
> @@ -115,8 +115,9 @@ struct __wrusage {
> #define RLIMIT_KQUEUES 13 /* kqueues allocated
> */ #define RLIMIT_UMTXP 14 /* process-shared
> umtx */ #define RLIMIT_PIPEBUF 15 /* pipes/fifos
> buffers */ +#define RLIMIT_VMM 16 /* virtual
> machines */
> -#define RLIM_NLIMITS 16 /* number of resource
> limits */ +#define RLIM_NLIMITS 17 /* number of
> resource limits */
> #define RLIM_INFINITY ((rlim_t)(((__uint64_t)1 << 63) - 1))
> #define RLIM_SAVED_MAX RLIM_INFINITY
> @@ -144,6 +145,7 @@ static const char *rlimit_ident[] = {
> "kqueues",
> "umtx",
> "pipebuf",
> + "vmm",
> };
> #endif
>
> diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
> index 61411890c85b..d5c4561eec66 100644
> --- a/sys/sys/resourcevar.h
> +++ b/sys/sys/resourcevar.h
> @@ -124,6 +124,7 @@ struct uidinfo {
> long ui_pipecnt; /* (b) consumption of pipe
> buffers */ long ui_inotifycnt; /* (b) number of inotify
> descriptors */ long ui_inotifywatchcnt; /* (b) number of
> inotify watches */
> + long ui_vmmcnt; /* (b) number of vmm instances
> */ uid_t ui_uid; /* (a) uid */
> u_int ui_ref; /* (b) reference count */
> #ifdef RACCT
> @@ -148,6 +149,7 @@ int chgumtxcnt(struct uidinfo *uip, int diff,
> rlim_t maxval); int chgpipecnt(struct uidinfo *uip, int diff, rlim_t
> max); int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval);
> int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval);
> +int chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max);
> int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int
> which, struct rlimit *limp);
> struct plimit
> diff --git a/usr.bin/procstat/procstat_rlimit.c
> b/usr.bin/procstat/procstat_rlimit.c index c34550295f05..f3132758e005 100644
> --- a/usr.bin/procstat/procstat_rlimit.c
> +++ b/usr.bin/procstat/procstat_rlimit.c
> @@ -64,6 +64,7 @@ static struct {
> {"kqueues", " "},
> {"umtxp", " "},
> {"pipebuf", "B "},
> + {"virtual-machines", " "},
> };
>
> _Static_assert(nitems(rlimit_param) == RLIM_NLIMITS,
>
After this commit - probably - my kernel pollutes the console with lots of
pid XXXXX (limits), jid 0, uid 0: exited on signal 11 (core dumped)
Especially bind920 (named) is failing immediately, surprisingly the mouse isn't
working anymore and so on ... ssgd is also dying on startup - no access
possible.
Environment: customized kernel (especially commenting out unused driver and
ZFS/IPFW in-kernel).
Kind regards,
oh