git: 0977ab30c9c9 - stable/13 - kvm_clock: KVM paravirtual clock support
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 10 Oct 2021 09:24:01 UTC
The branch stable/13 has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=0977ab30c9c90a09f1be2a32c9197f51384ae34a commit 0977ab30c9c90a09f1be2a32c9197f51384ae34a Author: Adam Fenn <adam@fenn.io> AuthorDate: 2021-08-04 15:42:48 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2021-10-10 09:21:16 +0000 kvm_clock: KVM paravirtual clock support (cherry picked from commit 6c69c6bb4c7ffde48b130df707799d4eca21ca9f) --- sys/amd64/conf/GENERIC | 3 + sys/amd64/conf/MINIMAL | 3 + sys/amd64/conf/NOTES | 3 + sys/conf/files.x86 | 3 +- sys/dev/kvm_clock/kvm_clock.c | 240 ++++++++++++++++++++++++++++++++++++++++++ sys/i386/conf/GENERIC | 3 + sys/i386/conf/MINIMAL | 3 + sys/i386/conf/NOTES | 3 + sys/x86/include/kvm.h | 80 ++++++++++++++ 9 files changed, 340 insertions(+), 1 deletion(-) diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index e1a4560ab7f5..82d143c18f51 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -362,6 +362,9 @@ device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # HyperV drivers and enhancement support device hyperv # HyperV drivers diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL index 945cc653e0fc..f6dd2047f2ce 100644 --- a/sys/amd64/conf/MINIMAL +++ b/sys/amd64/conf/MINIMAL @@ -131,6 +131,9 @@ device ether # Ethernet support # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 714b20101703..e24b4c988056 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -502,6 +502,9 @@ device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # Microsoft Hyper-V enhancement support device hyperv # HyperV drivers diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 index a74a848756f7..4ff888e8910c 100644 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -286,6 +286,7 @@ dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci dev/itwd/itwd.c optional itwd +dev/kvm_clock/kvm_clock.c optional kvm_clock dev/qat/qat.c optional qat dev/qat/qat_ae.c optional qat dev/qat/qat_c2xxx.c optional qat @@ -341,7 +342,7 @@ x86/x86/x86_mem.c optional mem x86/x86/mp_x86.c optional smp x86/x86/mp_watchdog.c optional mp_watchdog smp x86/x86/nexus.c standard -x86/x86/pvclock.c optional xenhvm +x86/x86/pvclock.c optional kvm_clock | xenhvm x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/ucode.c standard diff --git a/sys/dev/kvm_clock/kvm_clock.c b/sys/dev/kvm_clock/kvm_clock.c new file mode 100644 index 000000000000..1a76432e417d --- /dev/null +++ b/sys/dev/kvm_clock/kvm_clock.c @@ -0,0 +1,240 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org> + * Copyright (c) 2021 Mathieu Chouquet-Stringer + * Copyright (c) 2021 Juniper Networks, Inc. + * Copyright (c) 2021 Klara, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Linux KVM paravirtual clock support + * + * References: + * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html + * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/domainset.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> + +#include <machine/pvclock.h> +#include <x86/kvm.h> + +#include "clock_if.h" + +#define KVM_CLOCK_DEVNAME "kvmclock" +/* + * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's + * default value of 800, and (3) below the TSC's value when it supports the + * "Invariant TSC" feature and is believed to be synchronized across all CPUs. + */ +#define KVM_CLOCK_TC_QUALITY 975 + +struct kvm_clock_softc { + struct pvclock pvc; + struct pvclock_wall_clock wc; + struct pvclock_vcpu_time_info *timeinfos; + u_int msr_tc; + u_int msr_wc; +}; + +static devclass_t kvm_clock_devclass; + +static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg); +static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc); +static void kvm_clock_system_time_enable_pcpu(void *arg); + +static struct pvclock_wall_clock * +kvm_clock_get_wallclock(void *arg) +{ + struct kvm_clock_softc *sc = arg; + + wrmsr(sc->msr_wc, vtophys(&sc->wc)); + return (&sc->wc); +} + +static void +kvm_clock_system_time_enable(struct kvm_clock_softc *sc) +{ + smp_rendezvous(NULL, kvm_clock_system_time_enable_pcpu, NULL, sc); +} + +static void +kvm_clock_system_time_enable_pcpu(void *arg) +{ + struct kvm_clock_softc *sc = arg; + + /* + * See [2]; the lsb of this MSR is the system time enable bit. + */ + wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1); +} + +static void +kvm_clock_identify(driver_t *driver, device_t parent) +{ + u_int regs[4]; + + kvm_cpuid_get_features(regs); + if ((regs[0] & + (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0) + return; + if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1)) + return; + BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0); +} + +static int +kvm_clock_probe(device_t dev) +{ + device_set_desc(dev, "KVM paravirtual clock"); + return (BUS_PROBE_DEFAULT); +} + +static int +kvm_clock_attach(device_t dev) +{ + u_int regs[4]; + struct kvm_clock_softc *sc = device_get_softc(dev); + bool stable_flag_supported; + + /* Process KVM "features" CPUID leaf content: */ + kvm_cpuid_get_features(regs); + if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) { + sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW; + sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW; + } else { + KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0, + ("Clocksource feature flags disappeared since " + "kvm_clock_identify: regs[0] %#0x.", regs[0])); + sc->msr_tc = KVM_MSR_SYSTEM_TIME; + sc->msr_wc = KVM_MSR_WALL_CLOCK; + } + stable_flag_supported = + (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0; + + /* Set up 'struct pvclock_vcpu_time_info' page(s): */ + sc->timeinfos = (struct pvclock_vcpu_time_info *)kmem_malloc(mp_ncpus * + sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO); + kvm_clock_system_time_enable(sc); + + /* + * Init pvclock; register KVM clock wall clock, register KVM clock + * timecounter, and set up the requisite infrastructure for vDSO access + * to this timecounter. + * Regarding 'tc_flags': Since the KVM MSR documentation does not + * specifically discuss suspend/resume scenarios, conservatively + * leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system + * time must be re-inited in such cases. + */ + sc->pvc.get_wallclock = kvm_clock_get_wallclock; + sc->pvc.get_wallclock_arg = sc; + sc->pvc.timeinfos = sc->timeinfos; + sc->pvc.stable_flag_supported = stable_flag_supported; + pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0); + return (0); +} + +static int +kvm_clock_detach(device_t dev) +{ + struct kvm_clock_softc *sc = device_get_softc(dev); + + return (pvclock_destroy(&sc->pvc)); +} + +static int +kvm_clock_suspend(device_t dev) +{ + return (0); +} + +static int +kvm_clock_resume(device_t dev) +{ + /* + * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE'; + * conservatively assume that the system time must be re-inited in + * suspend/resume scenarios. + */ + kvm_clock_system_time_enable(device_get_softc(dev)); + pvclock_resume(); + inittodr(time_second); + return (0); +} + +static int +kvm_clock_gettime(device_t dev, struct timespec *ts) +{ + struct kvm_clock_softc *sc = device_get_softc(dev); + + pvclock_gettime(&sc->pvc, ts); + return (0); +} + +static int +kvm_clock_settime(device_t dev, struct timespec *ts) +{ + /* + * Even though it is not possible to set the KVM clock's wall clock, to + * avoid the possibility of periodic benign error messages from + * 'settime_task_func()', report success rather than, e.g., 'ENODEV'. + */ + return (0); +} + +static device_method_t kvm_clock_methods[] = { + DEVMETHOD(device_identify, kvm_clock_identify), + DEVMETHOD(device_probe, kvm_clock_probe), + DEVMETHOD(device_attach, kvm_clock_attach), + DEVMETHOD(device_detach, kvm_clock_detach), + DEVMETHOD(device_suspend, kvm_clock_suspend), + DEVMETHOD(device_resume, kvm_clock_resume), + /* clock interface */ + DEVMETHOD(clock_gettime, kvm_clock_gettime), + DEVMETHOD(clock_settime, kvm_clock_settime), + + DEVMETHOD_END +}; + +static driver_t kvm_clock_driver = { + KVM_CLOCK_DEVNAME, + kvm_clock_methods, + sizeof(struct kvm_clock_softc), +}; + +DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, kvm_clock_devclass, 0, 0); diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index a60f031de0bb..ed9865cc7c55 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -328,6 +328,9 @@ device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # HyperV drivers and enhancement support # NOTE: HYPERV depends on hyperv. They must be added or removed together. options HYPERV # Kernel support for HyperV drivers diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL index 9ca3ed408f9c..17b8444e35d2 100644 --- a/sys/i386/conf/MINIMAL +++ b/sys/i386/conf/MINIMAL @@ -134,6 +134,9 @@ device gif # IPv6 and IPv4 tunneling # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES index c6a1ca7c4255..0cbac313bf6f 100644 --- a/sys/i386/conf/NOTES +++ b/sys/i386/conf/NOTES @@ -729,6 +729,9 @@ device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + options HYPERV device hyperv # HyperV drivers diff --git a/sys/x86/include/kvm.h b/sys/x86/include/kvm.h new file mode 100644 index 000000000000..beec6447d7d6 --- /dev/null +++ b/sys/x86/include/kvm.h @@ -0,0 +1,80 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org> + * Copyright (c) 2021 Mathieu Chouquet-Stringer + * Copyright (c) 2021 Juniper Networks, Inc. + * Copyright (c) 2021 Klara, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Linux KVM paravirtualization: common definitions + * + * References: + * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html + * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html + */ + +#ifndef _X86_KVM_H_ +#define _X86_KVM_H_ + +#include <sys/types.h> +#include <sys/systm.h> + +#include <machine/md_var.h> + +#define KVM_CPUID_SIGNATURE 0x40000000 +#define KVM_CPUID_FEATURES_LEAF 0x40000001 + +#define KVM_FEATURE_CLOCKSOURCE 0x00000001 +#define KVM_FEATURE_CLOCKSOURCE2 0x00000008 +#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 0x01000000 + +/* Deprecated: for the CLOCKSOURCE feature. */ +#define KVM_MSR_WALL_CLOCK 0x11 +#define KVM_MSR_SYSTEM_TIME 0x12 + +#define KVM_MSR_WALL_CLOCK_NEW 0x4b564d00 +#define KVM_MSR_SYSTEM_TIME_NEW 0x4b564d01 + +static inline bool +kvm_cpuid_features_leaf_supported(void) +{ + return (vm_guest == VM_GUEST_KVM && + KVM_CPUID_FEATURES_LEAF > hv_base && + KVM_CPUID_FEATURES_LEAF <= hv_high); +} + +static inline void +kvm_cpuid_get_features(u_int *regs) +{ + if (!kvm_cpuid_features_leaf_supported()) + regs[0] = regs[1] = regs[2] = regs[3] = 0; + else + do_cpuid(KVM_CPUID_FEATURES_LEAF, regs); +} + +#endif /* !_X86_KVM_H_ */