git: 6c69c6bb4c7f - main - kvm_clock: KVM paravirtual clock support

Colin Percival cperciva at tarsnap.com
Sun Aug 29 03:52:19 UTC 2021


Will this be MFCed to stable/13 ?  I'd really really like to have it in 13.1
since it fixes a regression in EC2.

Colin Percival

On 8/14/21 5:58 AM, Konstantin Belousov wrote:
> The branch main has been updated by kib:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=6c69c6bb4c7ffde48b130df707799d4eca21ca9f
> 
> commit 6c69c6bb4c7ffde48b130df707799d4eca21ca9f
> Author:     Adam Fenn <adam at fenn.io>
> AuthorDate: 2021-08-04 15:42:48 +0000
> Commit:     Konstantin Belousov <kib at FreeBSD.org>
> CommitDate: 2021-08-14 12:57:54 +0000
> 
>     kvm_clock: KVM paravirtual clock support
>     
>     Add support for the KVM paravirtual clock device.
>     
>     Sponsored by:   Juniper Networks, Inc.
>     Sponsored by:   Klara, Inc.
>     Reviewed by:    kib
>     Differential Revision:  https://reviews.freebsd.org/D29733
> ---
>  sys/amd64/conf/GENERIC        |   3 +
>  sys/amd64/conf/MINIMAL        |   3 +
>  sys/amd64/conf/NOTES          |   3 +
>  sys/conf/files.x86            |   3 +-
>  sys/dev/kvm_clock/kvm_clock.c | 240 ++++++++++++++++++++++++++++++++++++++++++
>  sys/i386/conf/GENERIC         |   3 +
>  sys/i386/conf/MINIMAL         |   3 +
>  sys/i386/conf/NOTES           |   3 +
>  sys/x86/include/kvm.h         |  80 ++++++++++++++
>  9 files changed, 340 insertions(+), 1 deletion(-)
> 
> diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
> index f7b41919575d..78b5e0e299f6 100644
> --- a/sys/amd64/conf/GENERIC
> +++ b/sys/amd64/conf/GENERIC
> @@ -376,6 +376,9 @@ device		virtio_blk		# VirtIO Block device
>  device		virtio_scsi		# VirtIO SCSI device
>  device		virtio_balloon		# VirtIO Memory Balloon device
>  
> +# Linux KVM paravirtualization support
> +device		kvm_clock		# KVM paravirtual clock driver
> +
>  # HyperV drivers and enhancement support
>  device		hyperv			# HyperV drivers 
>  
> diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL
> index 14f91e6c8eaf..f724cbd2e3f1 100644
> --- a/sys/amd64/conf/MINIMAL
> +++ b/sys/amd64/conf/MINIMAL
> @@ -131,6 +131,9 @@ device		ether			# Ethernet support
>  # Note that 'bpf' is required for DHCP.
>  device		bpf			# Berkeley packet filter
>  
> +# Linux KVM paravirtualization support
> +device		kvm_clock		# KVM paravirtual clock driver
> +
>  # Xen HVM Guest Optimizations
>  # NOTE: XENHVM depends on xenpci and xentimer.
>  # They must be added or removed together.
> diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
> index 501ceaedb222..c1db8ef7512f 100644
> --- a/sys/amd64/conf/NOTES
> +++ b/sys/amd64/conf/NOTES
> @@ -498,6 +498,9 @@ device		virtio_balloon	# VirtIO Memory Balloon device
>  device		virtio_random	# VirtIO Entropy device
>  device		virtio_console	# VirtIO Console device
>  
> +# Linux KVM paravirtualization support
> +device		kvm_clock	# KVM paravirtual clock driver
> +
>  # Microsoft Hyper-V enhancement support
>  device 		hyperv		# HyperV drivers
>  
> diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
> index d1fc234c4182..d0cda2da8580 100644
> --- a/sys/conf/files.x86
> +++ b/sys/conf/files.x86
> @@ -263,6 +263,7 @@ dev/isci/scil/scif_sas_task_request_state_handlers.c		optional isci
>  dev/isci/scil/scif_sas_task_request_states.c			optional isci
>  dev/isci/scil/scif_sas_timer.c					optional isci
>  dev/itwd/itwd.c							optional itwd
> +dev/kvm_clock/kvm_clock.c	optional	kvm_clock
>  dev/qat/qat.c			optional qat
>  dev/qat/qat_ae.c		optional qat
>  dev/qat/qat_c2xxx.c		optional qat
> @@ -318,7 +319,7 @@ x86/x86/x86_mem.c		optional	mem
>  x86/x86/mp_x86.c		optional	smp
>  x86/x86/mp_watchdog.c		optional	mp_watchdog smp
>  x86/x86/nexus.c			standard
> -x86/x86/pvclock.c		optional	xenhvm
> +x86/x86/pvclock.c		optional	kvm_clock | xenhvm
>  x86/x86/stack_machdep.c		optional	ddb | stack
>  x86/x86/tsc.c			standard
>  x86/x86/ucode.c			standard
> diff --git a/sys/dev/kvm_clock/kvm_clock.c b/sys/dev/kvm_clock/kvm_clock.c
> new file mode 100644
> index 000000000000..1a76432e417d
> --- /dev/null
> +++ b/sys/dev/kvm_clock/kvm_clock.c
> @@ -0,0 +1,240 @@
> +/*-
> + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
> + *
> + * Copyright (c) 2014 Bryan Venteicher <bryanv at FreeBSD.org>
> + * Copyright (c) 2021 Mathieu Chouquet-Stringer
> + * Copyright (c) 2021 Juniper Networks, Inc.
> + * Copyright (c) 2021 Klara, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +/*
> + * Linux KVM paravirtual clock support
> + *
> + * References:
> + *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
> + *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <sys/param.h>
> +#include <sys/bus.h>
> +#include <sys/domainset.h>
> +#include <sys/kernel.h>
> +#include <sys/malloc.h>
> +#include <sys/module.h>
> +#include <sys/smp.h>
> +
> +#include <vm/vm.h>
> +#include <vm/pmap.h>
> +#include <vm/vm_extern.h>
> +
> +#include <machine/pvclock.h>
> +#include <x86/kvm.h>
> +
> +#include "clock_if.h"
> +
> +#define	KVM_CLOCK_DEVNAME		"kvmclock"
> +/*
> + * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
> + * default value of 800, and (3) below the TSC's value when it supports the
> + * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
> + */
> +#define	KVM_CLOCK_TC_QUALITY		975
> +
> +struct kvm_clock_softc {
> +	struct pvclock			 pvc;
> +	struct pvclock_wall_clock	 wc;
> +	struct pvclock_vcpu_time_info	*timeinfos;
> +	u_int				 msr_tc;
> +	u_int				 msr_wc;
> +};
> +
> +static devclass_t	kvm_clock_devclass;
> +
> +static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
> +static void	kvm_clock_system_time_enable(struct kvm_clock_softc *sc);
> +static void	kvm_clock_system_time_enable_pcpu(void *arg);
> +
> +static struct pvclock_wall_clock *
> +kvm_clock_get_wallclock(void *arg)
> +{
> +	struct kvm_clock_softc *sc = arg;
> +
> +	wrmsr(sc->msr_wc, vtophys(&sc->wc));
> +	return (&sc->wc);
> +}
> +
> +static void
> +kvm_clock_system_time_enable(struct kvm_clock_softc *sc)
> +{
> +	smp_rendezvous(NULL, kvm_clock_system_time_enable_pcpu, NULL, sc);
> +}
> +
> +static void
> +kvm_clock_system_time_enable_pcpu(void *arg)
> +{
> +	struct kvm_clock_softc *sc = arg;
> +
> +	/*
> +	 * See [2]; the lsb of this MSR is the system time enable bit.
> +	 */
> +	wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
> +}
> +
> +static void
> +kvm_clock_identify(driver_t *driver, device_t parent)
> +{
> +	u_int regs[4];
> +
> +	kvm_cpuid_get_features(regs);
> +	if ((regs[0] &
> +	    (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
> +		return;
> +	if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
> +		return;
> +	BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
> +}
> +
> +static int
> +kvm_clock_probe(device_t dev)
> +{
> +	device_set_desc(dev, "KVM paravirtual clock");
> +	return (BUS_PROBE_DEFAULT);
> +}
> +
> +static int
> +kvm_clock_attach(device_t dev)
> +{
> +	u_int regs[4];
> +	struct kvm_clock_softc *sc = device_get_softc(dev);
> +	bool stable_flag_supported;
> +
> +	/* Process KVM "features" CPUID leaf content: */
> +	kvm_cpuid_get_features(regs);
> +	if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
> +		sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
> +		sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
> +	} else {
> +		KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
> +		    ("Clocksource feature flags disappeared since "
> +		    "kvm_clock_identify: regs[0] %#0x.", regs[0]));
> +		sc->msr_tc = KVM_MSR_SYSTEM_TIME;
> +		sc->msr_wc = KVM_MSR_WALL_CLOCK;
> +	}
> +	stable_flag_supported =
> +	    (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
> +
> +	/* Set up 'struct pvclock_vcpu_time_info' page(s): */
> +	sc->timeinfos = (struct pvclock_vcpu_time_info *)kmem_malloc(mp_ncpus *
> +	    sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
> +	kvm_clock_system_time_enable(sc);
> +
> +	/*
> +	 * Init pvclock; register KVM clock wall clock, register KVM clock
> +	 * timecounter, and set up the requisite infrastructure for vDSO access
> +	 * to this timecounter.
> +	 *     Regarding 'tc_flags': Since the KVM MSR documentation does not
> +	 *     specifically discuss suspend/resume scenarios, conservatively
> +	 *     leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
> +	 *     time must be re-inited in such cases.
> +	 */
> +	sc->pvc.get_wallclock = kvm_clock_get_wallclock;
> +	sc->pvc.get_wallclock_arg = sc;
> +	sc->pvc.timeinfos = sc->timeinfos;
> +	sc->pvc.stable_flag_supported = stable_flag_supported;
> +	pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
> +	return (0);
> +}
> +
> +static int
> +kvm_clock_detach(device_t dev)
> +{
> +	struct kvm_clock_softc *sc = device_get_softc(dev);
> +
> +	return (pvclock_destroy(&sc->pvc));
> +}
> +
> +static int
> +kvm_clock_suspend(device_t dev)
> +{
> +	return (0);
> +}
> +
> +static int
> +kvm_clock_resume(device_t dev)
> +{
> +	/*
> +	 * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
> +	 * conservatively assume that the system time must be re-inited in
> +	 * suspend/resume scenarios.
> +	 */
> +	kvm_clock_system_time_enable(device_get_softc(dev));
> +	pvclock_resume();
> +	inittodr(time_second);
> +	return (0);
> +}
> +
> +static int
> +kvm_clock_gettime(device_t dev, struct timespec *ts)
> +{
> +	struct kvm_clock_softc *sc = device_get_softc(dev);
> +
> +	pvclock_gettime(&sc->pvc, ts);
> +	return (0);
> +}
> +
> +static int
> +kvm_clock_settime(device_t dev, struct timespec *ts)
> +{
> +	/*
> +	 * Even though it is not possible to set the KVM clock's wall clock, to
> +	 * avoid the possibility of periodic benign error messages from
> +	 * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
> +	 */
> +	return (0);
> +}
> +
> +static device_method_t kvm_clock_methods[] = {
> +	DEVMETHOD(device_identify,	kvm_clock_identify),
> +	DEVMETHOD(device_probe,		kvm_clock_probe),
> +	DEVMETHOD(device_attach,	kvm_clock_attach),
> +	DEVMETHOD(device_detach,	kvm_clock_detach),
> +	DEVMETHOD(device_suspend,	kvm_clock_suspend),
> +	DEVMETHOD(device_resume,	kvm_clock_resume),
> +	/* clock interface */
> +	DEVMETHOD(clock_gettime,	kvm_clock_gettime),
> +	DEVMETHOD(clock_settime,	kvm_clock_settime),
> +
> +	DEVMETHOD_END
> +};
> +
> +static driver_t kvm_clock_driver = {
> +	KVM_CLOCK_DEVNAME,
> +	kvm_clock_methods,
> +	sizeof(struct kvm_clock_softc),
> +};
> +
> +DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, kvm_clock_devclass, 0, 0);
> diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
> index 86c062effd81..5447c452c4f7 100644
> --- a/sys/i386/conf/GENERIC
> +++ b/sys/i386/conf/GENERIC
> @@ -337,6 +337,9 @@ device		virtio_blk		# VirtIO Block device
>  device		virtio_scsi		# VirtIO SCSI device
>  device		virtio_balloon		# VirtIO Memory Balloon device
>  
> +# Linux KVM paravirtualization support
> +device		kvm_clock		# KVM paravirtual clock driver
> +
>  # HyperV drivers and enhancement support
>  # NOTE: HYPERV depends on hyperv.  They must be added or removed together.
>  options 	HYPERV			# Kernel support for HyperV drivers
> diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL
> index 37b8e074ac65..9d735dbb0580 100644
> --- a/sys/i386/conf/MINIMAL
> +++ b/sys/i386/conf/MINIMAL
> @@ -145,6 +145,9 @@ device		gif			# IPv6 and IPv4 tunneling
>  # Note that 'bpf' is required for DHCP.
>  device		bpf			# Berkeley packet filter
>  
> +# Linux KVM paravirtualization support
> +device		kvm_clock		# KVM paravirtual clock driver
> +
>  # Xen HVM Guest Optimizations
>  # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
>  options 	XENHVM			# Xen HVM kernel infrastructure
> diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES
> index df9ff4fb3aed..27cebf0c48a0 100644
> --- a/sys/i386/conf/NOTES
> +++ b/sys/i386/conf/NOTES
> @@ -719,6 +719,9 @@ device		virtio_balloon	# VirtIO Memory Balloon device
>  device		virtio_random	# VirtIO Entropy device
>  device		virtio_console	# VirtIO Console device
>  
> +# Linux KVM paravirtualization support
> +device 		kvm_clock	# KVM paravirtual clock driver
> +
>  options 	HYPERV
>  device		hyperv		# HyperV drivers
>  
> diff --git a/sys/x86/include/kvm.h b/sys/x86/include/kvm.h
> new file mode 100644
> index 000000000000..beec6447d7d6
> --- /dev/null
> +++ b/sys/x86/include/kvm.h
> @@ -0,0 +1,80 @@
> +/*-
> + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
> + *
> + * Copyright (c) 2014 Bryan Venteicher <bryanv at FreeBSD.org>
> + * Copyright (c) 2021 Mathieu Chouquet-Stringer
> + * Copyright (c) 2021 Juniper Networks, Inc.
> + * Copyright (c) 2021 Klara, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + * $FreeBSD$
> + */
> +
> +/*
> + * Linux KVM paravirtualization: common definitions
> + *
> + * References:
> + *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
> + *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
> + */
> +
> +#ifndef _X86_KVM_H_
> +#define	_X86_KVM_H_
> +
> +#include <sys/types.h>
> +#include <sys/systm.h>
> +
> +#include <machine/md_var.h>
> +
> +#define	KVM_CPUID_SIGNATURE			0x40000000
> +#define	KVM_CPUID_FEATURES_LEAF			0x40000001
> +
> +#define	KVM_FEATURE_CLOCKSOURCE			0x00000001
> +#define	KVM_FEATURE_CLOCKSOURCE2		0x00000008
> +#define	KVM_FEATURE_CLOCKSOURCE_STABLE_BIT	0x01000000
> +
> +/* Deprecated: for the CLOCKSOURCE feature. */
> +#define	KVM_MSR_WALL_CLOCK			0x11
> +#define	KVM_MSR_SYSTEM_TIME			0x12
> +
> +#define	KVM_MSR_WALL_CLOCK_NEW			0x4b564d00
> +#define	KVM_MSR_SYSTEM_TIME_NEW			0x4b564d01
> +
> +static inline bool
> +kvm_cpuid_features_leaf_supported(void)
> +{
> +	return (vm_guest == VM_GUEST_KVM &&
> +	    KVM_CPUID_FEATURES_LEAF > hv_base &&
> +	    KVM_CPUID_FEATURES_LEAF <= hv_high);
> +}
> +
> +static inline void
> +kvm_cpuid_get_features(u_int *regs)
> +{
> +	if (!kvm_cpuid_features_leaf_supported())
> +		regs[0] = regs[1] = regs[2] = regs[3] = 0;
> +	else
> +		do_cpuid(KVM_CPUID_FEATURES_LEAF, regs);
> +}
> +
> +#endif /* !_X86_KVM_H_ */
> 

-- 
Colin Percival
Security Officer Emeritus, FreeBSD | The power to serve
Founder, Tarsnap | www.tarsnap.com | Online backups for the truly paranoid


More information about the dev-commits-src-all mailing list