git: f5e18a7c3c8b - stable/13 - libvmmapi: Provide an interface for limiting rights on the device fd

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 31 Oct 2022 22:17:30 UTC
The branch stable/13 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=f5e18a7c3c8b745675d9a81a30c2703d09d18de1

commit f5e18a7c3c8b745675d9a81a30c2703d09d18de1
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-10-24 21:31:11 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-10-31 13:26:49 +0000

    libvmmapi: Provide an interface for limiting rights on the device fd
    
    Currently libvmmapi provides a way to get a list of the allowed ioctls
    on the vmm device file, so that bhyve can limit rights on the device
    file fd.  The interface is rather strange: it allocates a copy of the
    list but returns a const pointer, so the caller has to cast away the
    const in order to free it without aggravating the compiler.
    
    As far as I can see, there's no reason to make a copy of the array, but
    changing vm_get_ioctls() to not do that would break compatibility.  So
    this change just introduces a better interface: move all rights-limiting
    logic into libvmmapi.
    
    Any new operations on the fd should be wrapped by libvmmapi, so also
    discourage use of vm_get_device_fd().  Currently bhyve uses it only when
    limiting rights on the device fd.
    
    No functional change intended.
    
    Reviewed by:    jhb
    
    (cherry picked from commit 3e9b4532d174378624e4582637883736f7781851)
---
 lib/libvmmapi/vmmapi.c | 68 ++++++++++++++++++++++++++++++++++----------------
 lib/libvmmapi/vmmapi.h | 11 +++++---
 2 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index 0291e4a10c33..4e4393d81961 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -32,6 +32,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/capsicum.h>
 #include <sys/sysctl.h>
 #include <sys/ioctl.h>
 #include <sys/linker.h>
@@ -43,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <x86/segments.h>
 #include <machine/specialreg.h>
 
+#include <capsicum_helpers.h>
 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -1687,6 +1689,49 @@ vm_get_topology(struct vmctx *ctx,
 	return (error);
 }
 
+/* Keep in sync with machine/vmm_dev.h. */
+static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
+    VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,
+    VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER,
+    VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
+    VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
+    VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV,
+    VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ,
+    VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ,
+    VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ,
+    VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
+    VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
+    VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
+    VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
+    VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
+    VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
+    VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
+    VM_GLA2GPA_NOFAULT,
+    VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
+    VM_SET_INTINFO, VM_GET_INTINFO,
+    VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
+    VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY
+};
+
+int
+vm_limit_rights(struct vmctx *ctx)
+{
+	cap_rights_t rights;
+	size_t ncmds;
+
+	cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW);
+	if (caph_rights_limit(ctx->fd, &rights) != 0)
+		return (-1);
+	ncmds = nitems(vm_ioctl_cmds);
+	if (caph_ioctls_limit(ctx->fd, vm_ioctl_cmds, ncmds) != 0)
+		return (-1);
+	return (0);
+}
+
+/*
+ * Avoid using in new code.  Operations on the fd should be wrapped here so that
+ * capability rights can be kept in sync.
+ */
 int
 vm_get_device_fd(struct vmctx *ctx)
 {
@@ -1694,32 +1739,11 @@ vm_get_device_fd(struct vmctx *ctx)
 	return (ctx->fd);
 }
 
+/* Legacy interface, do not use. */
 const cap_ioctl_t *
 vm_get_ioctls(size_t *len)
 {
 	cap_ioctl_t *cmds;
-	/* keep in sync with machine/vmm_dev.h */
-	static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
-	    VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,
-	    VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER,
-	    VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
-	    VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
-	    VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV,
-	    VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ,
-	    VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ,
-	    VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ,
-	    VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
-	    VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
-	    VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
-	    VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
-	    VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
-	    VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
-	    VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
-	    VM_GLA2GPA_NOFAULT,
-	    VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
-	    VM_SET_INTINFO, VM_GET_INTINFO,
-	    VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
-	    VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
 
 	if (len == NULL) {
 		cmds = malloc(sizeof(vm_ioctl_cmds));
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index 840c78bcf9f7..064875f26818 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -117,9 +117,9 @@ int	vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
 int	vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len);
 
 int	vm_create(const char *name);
-int	vm_get_device_fd(struct vmctx *ctx);
 struct vmctx *vm_open(const char *name);
 void	vm_destroy(struct vmctx *ctx);
+int	vm_limit_rights(struct vmctx *ctx);
 int	vm_parse_memsize(const char *optarg, size_t *memsize);
 int	vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
 void	*vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
@@ -193,8 +193,6 @@ int	vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
 int	vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
 
-const cap_ioctl_t *vm_get_ioctls(size_t *len);
-
 /*
  * Return a pointer to the statistics buffer. Note that this is not MT-safe.
  */
@@ -277,6 +275,13 @@ struct checkpoint_op {
 
 int	vm_snapshot_req(struct vm_snapshot_meta *meta);
 int	vm_restore_time(struct vmctx *ctx);
+
+/*
+ * Deprecated interfaces, do not use them in new code.
+ */
+int	vm_get_device_fd(struct vmctx *ctx);
+const cap_ioctl_t *vm_get_ioctls(size_t *len);
+
 __END_DECLS
 
 #endif	/* _VMMAPI_H_ */