git: 870464f11f3c - stable/12 - kvmclock: Fix initialization when EARLY_AP_STARTUP is not defined

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Sun, 29 Jan 2023 18:56:14 UTC
The branch stable/12 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=870464f11f3c53fcc7114ea406f95655b9821263

commit 870464f11f3c53fcc7114ea406f95655b9821263
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2023-01-13 15:01:00 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2023-01-29 18:56:06 +0000

    kvmclock: Fix initialization when EARLY_AP_STARTUP is not defined
    
    To attach to the hypervisor, kvmclock needs to write a per-CPU MSR.
    When EARLY_AP_STARTUP is not defined, device attach happens too early:
    APs are not yet spun up, so smp_rendezvous only runs the callback on the
    local CPU.  As a result, the timecounter only gets initialized on the
    BSP, and then timekeeping is broken on SMP systems.
    
    Implement handling for !EARLY_AP_STARTUP kernels: keep track of the CPU
    on which device attach ran, and then use a SI_SUB_SMP SYSINIT to
    register the rest of the CPUs with the hypervisor.
    
    Reported by:    Shrikanth R Kamath <kshrikanth@juniper.net>
    Reviewed by:    kib, jhb (earlier versions)
    Sponsored by:   Klara, Inc.
    Sponsored by:   Juniper Networks, Inc.
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D37705
    
    (cherry picked from commit 568f552b0410f4f72db9ce710f7803acca23f79f)
---
 sys/dev/kvm_clock/kvm_clock.c | 46 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/sys/dev/kvm_clock/kvm_clock.c b/sys/dev/kvm_clock/kvm_clock.c
index 1a76432e417d..982a345a74c7 100644
--- a/sys/dev/kvm_clock/kvm_clock.c
+++ b/sys/dev/kvm_clock/kvm_clock.c
@@ -70,12 +70,16 @@ struct kvm_clock_softc {
 	struct pvclock_vcpu_time_info	*timeinfos;
 	u_int				 msr_tc;
 	u_int				 msr_wc;
+#ifndef EARLY_AP_STARTUP
+	int				 firstcpu;
+#endif
 };
 
 static devclass_t	kvm_clock_devclass;
 
 static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
-static void	kvm_clock_system_time_enable(struct kvm_clock_softc *sc);
+static void	kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
+		    const cpuset_t *cpus);
 static void	kvm_clock_system_time_enable_pcpu(void *arg);
 
 static struct pvclock_wall_clock *
@@ -88,9 +92,10 @@ kvm_clock_get_wallclock(void *arg)
 }
 
 static void
-kvm_clock_system_time_enable(struct kvm_clock_softc *sc)
+kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
 {
-	smp_rendezvous(NULL, kvm_clock_system_time_enable_pcpu, NULL, sc);
+	smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
+	    NULL, sc);
 }
 
 static void
@@ -104,6 +109,32 @@ kvm_clock_system_time_enable_pcpu(void *arg)
 	wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
 }
 
+#ifndef EARLY_AP_STARTUP
+static void
+kvm_clock_init_smp(void *arg __unused)
+{
+	devclass_t kvm_clock_devclass;
+	cpuset_t cpus;
+	struct kvm_clock_softc *sc;
+
+	kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
+	sc = devclass_get_softc(kvm_clock_devclass, 0);
+	if (sc == NULL || mp_ncpus == 1)
+		return;
+
+	/*
+	 * Register with the hypervisor on all CPUs except the one that
+	 * registered in kvm_clock_attach().
+	 */
+	cpus = all_cpus;
+	KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
+	    ("%s: invalid first CPU %d", __func__, sc->firstcpu));
+	CPU_CLR(sc->firstcpu, &cpus);
+	kvm_clock_system_time_enable(sc, &cpus);
+}
+SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
+#endif
+
 static void
 kvm_clock_identify(driver_t *driver, device_t parent)
 {
@@ -150,7 +181,12 @@ kvm_clock_attach(device_t dev)
 	/* Set up 'struct pvclock_vcpu_time_info' page(s): */
 	sc->timeinfos = (struct pvclock_vcpu_time_info *)kmem_malloc(mp_ncpus *
 	    sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
-	kvm_clock_system_time_enable(sc);
+#ifdef EARLY_AP_STARTUP
+	kvm_clock_system_time_enable(sc, &all_cpus);
+#else
+	sc->firstcpu = curcpu;
+	kvm_clock_system_time_enable_pcpu(sc);
+#endif
 
 	/*
 	 * Init pvclock; register KVM clock wall clock, register KVM clock
@@ -191,7 +227,7 @@ kvm_clock_resume(device_t dev)
 	 * conservatively assume that the system time must be re-inited in
 	 * suspend/resume scenarios.
 	 */
-	kvm_clock_system_time_enable(device_get_softc(dev));
+	kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
 	pvclock_resume();
 	inittodr(time_second);
 	return (0);