svn commit: r255726 - in head/sys: amd64/amd64 amd64/include dev/acpica dev/xen/control dev/xen/timer dev/xen/xenpci i386/i386 i386/include kern sys x86/acpica x86/isa x86/x86 x86/xen xen

Justin T. Gibbs gibbs at FreeBSD.org
Fri Sep 20 05:06:07 UTC 2013


Author: gibbs
Date: Fri Sep 20 05:06:03 2013
New Revision: 255726
URL: http://svnweb.freebsd.org/changeset/base/255726

Log:
  Add support for suspend/resume/migration operations when running as a
  Xen PVHVM guest.
  
  Submitted by:	Roger Pau Monné
  Sponsored by:	Citrix Systems R&D
  Reviewed by:	gibbs
  Approved by:	re (blanket Xen)
  MFC after:	2 weeks
  
  sys/amd64/amd64/mp_machdep.c:
  sys/i386/i386/mp_machdep.c:
  	- Make sure that are no MMU related IPIs pending on migration.
  	- Reset pending IPI_BITMAP on resume.
  	- Init vcpu_info on resume.
  
  sys/amd64/include/intr_machdep.h:
  sys/i386/include/intr_machdep.h:
  sys/x86/acpica/acpi_wakeup.c:
  sys/x86/x86/intr_machdep.c:
  sys/x86/isa/atpic.c:
  sys/x86/x86/io_apic.c:
  sys/x86/x86/local_apic.c:
  	- Add a "suspend_cancelled" parameter to pic_resume().  For the
  	  Xen PIC, restoration of interrupt services differs between
  	  the aborted suspend and normal resume cases, so we must provide
  	  this information.
  
  sys/dev/acpica/acpi_timer.c:
  sys/dev/xen/timer/timer.c:
  sys/timetc.h:
  	- Don't swap out "suspend safe" timers across a suspend/resume
  	  cycle.  This includes the Xen PV and ACPI timers.
  
  sys/dev/xen/control/control.c:
  	- Perform proper suspend/resume process for PVHVM:
  		- Suspend all APs before going into suspension, this allows us
  		  to reset the vcpu_info on resume for each AP.
  		- Reset shared info page and callback on resume.
  
  sys/dev/xen/timer/timer.c:
  	- Implement suspend/resume support for the PV timer. Since FreeBSD
  	  doesn't perform a per-cpu resume of the timer, we need to call
  	  smp_rendezvous in order to correctly resume the timer on each CPU.
  
  sys/dev/xen/xenpci/xenpci.c:
  	- Don't reset the PCI interrupt on each suspend/resume.
  
  sys/kern/subr_smp.c:
  	- When suspending a PVHVM domain make sure there are no MMU IPIs
  	  in-flight, or we will get a lockup on resume due to the fact that
  	  pending event channels are not carried over on migration.
  	- Implement a generic version of restart_cpus that can be used by
  	  suspended and stopped cpus.
  
  sys/x86/xen/hvm.c:
  	- Implement resume support for the hypercall page and shared info.
  	- Clear vcpu_info so it can be reset by APs when resuming from
  	  suspension.
  
  sys/dev/xen/xenpci/xenpci.c:
  sys/x86/xen/hvm.c:
  sys/x86/xen/xen_intr.c:
  	- Support UP kernel configurations.
  
  sys/x86/xen/xen_intr.c:
  	- Properly rebind per-cpus VIRQs and IPIs on resume.

Modified:
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/include/intr_machdep.h
  head/sys/dev/acpica/acpi_timer.c
  head/sys/dev/xen/control/control.c
  head/sys/dev/xen/timer/timer.c
  head/sys/dev/xen/xenpci/xenpci.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/include/intr_machdep.h
  head/sys/kern/subr_smp.c
  head/sys/sys/smp.h
  head/sys/sys/timetc.h
  head/sys/x86/acpica/acpi_wakeup.c
  head/sys/x86/isa/atpic.c
  head/sys/x86/x86/intr_machdep.c
  head/sys/x86/x86/io_apic.c
  head/sys/x86/x86/local_apic.c
  head/sys/x86/xen/hvm.c
  head/sys/x86/xen/xen_intr.c
  head/sys/xen/hvm.h

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/amd64/amd64/mp_machdep.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -1468,6 +1468,10 @@ cpususpend_handler(void)
 
 	cpu = PCPU_GET(cpuid);
 
+#ifdef XENHVM
+	mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+#endif
+
 	if (savectx(susppcbs[cpu])) {
 		ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
 		wbinvd();
@@ -1486,11 +1490,23 @@ cpususpend_handler(void)
 	while (!CPU_ISSET(cpu, &started_cpus))
 		ia32_pause();
 
+#ifdef XENHVM
+	/*
+	 * Reset pending bitmap IPIs, because Xen doesn't preserve pending
+	 * event channels on migration.
+	 */
+	cpu_ipi_pending[cpu] = 0;
+	/* register vcpu_info area */
+	xen_hvm_init_cpu();
+#endif
+
 	/* Resume MCA and local APIC */
 	mca_resume();
 	lapic_setup(0);
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
+	/* Indicate that we are resumed */
+	CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 }
 
 /*

Modified: head/sys/amd64/include/intr_machdep.h
==============================================================================
--- head/sys/amd64/include/intr_machdep.h	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/amd64/include/intr_machdep.h	Fri Sep 20 05:06:03 2013	(r255726)
@@ -102,7 +102,7 @@ struct pic {
 	int (*pic_vector)(struct intsrc *);
 	int (*pic_source_pending)(struct intsrc *);
 	void (*pic_suspend)(struct pic *);
-	void (*pic_resume)(struct pic *);
+	void (*pic_resume)(struct pic *, bool suspend_cancelled);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
 	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@@ -170,7 +170,7 @@ struct intsrc *intr_lookup_source(int ve
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
 int	intr_remove_handler(void *cookie);
-void	intr_resume(void);
+void	intr_resume(bool suspend_cancelled);
 void	intr_suspend(void);
 void	intrcnt_add(const char *name, u_long **countp);
 void	nexus_add_irq(u_long irq);

Modified: head/sys/dev/acpica/acpi_timer.c
==============================================================================
--- head/sys/dev/acpica/acpi_timer.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/dev/acpica/acpi_timer.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -189,6 +189,7 @@ acpi_timer_probe(device_t dev)
     else
 	acpi_timer_timecounter.tc_counter_mask = 0x00ffffff;
     acpi_timer_timecounter.tc_frequency = acpi_timer_frequency;
+    acpi_timer_timecounter.tc_flags = TC_FLAGS_SUSPEND_SAFE;
     if (testenv("debug.acpi.timer_test"))
 	acpi_timer_boot_test();
 
@@ -285,6 +286,14 @@ acpi_timer_suspend_handler(struct timeco
 		acpi_timer_eh = NULL;
 	}
 
+	if ((timecounter->tc_flags & TC_FLAGS_SUSPEND_SAFE) != 0) {
+		/*
+		 * If we are using a suspend safe timecounter, don't
+		 * save/restore it across suspend/resume.
+		 */
+		return;
+	}
+
 	KASSERT(newtc == &acpi_timer_timecounter,
 	    ("acpi_timer_suspend_handler: wrong timecounter"));
 

Modified: head/sys/dev/xen/control/control.c
==============================================================================
--- head/sys/dev/xen/control/control.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/dev/xen/control/control.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -119,11 +119,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/taskqueue.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
-
-#ifndef XENHVM
 #include <sys/sched.h>
 #include <sys/smp.h>
-#endif
+#include <sys/eventhandler.h>
 
 #include <geom/geom.h>
 
@@ -140,6 +138,10 @@ __FBSDID("$FreeBSD$");
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
+#ifdef XENHVM
+#include <xen/hvm.h>
+#endif
+
 #include <xen/interface/event_channel.h>
 #include <xen/interface/grant_table.h>
 
@@ -199,7 +201,7 @@ extern void xencons_resume(void);
 static void
 xctrl_suspend()
 {
-	int i, j, k, fpp;
+	int i, j, k, fpp, suspend_cancelled;
 	unsigned long max_pfn, start_info_mfn;
 
 	EVENTHANDLER_INVOKE(power_suspend);
@@ -264,7 +266,7 @@ xctrl_suspend()
 	 */
 	start_info_mfn = VTOMFN(xen_start_info);
 	pmap_suspend();
-	HYPERVISOR_suspend(start_info_mfn);
+	suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
 	pmap_resume();
 
 	pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
@@ -287,7 +289,7 @@ xctrl_suspend()
 	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
 
 	gnttab_resume();
-	intr_resume();
+	intr_resume(suspend_cancelled != 0);
 	local_irq_enable();
 	xencons_resume();
 
@@ -331,16 +333,31 @@ xen_pv_shutdown_final(void *arg, int how
 }
 
 #else
-extern void xenpci_resume(void);
 
 /* HVM mode suspension. */
 static void
 xctrl_suspend()
 {
+#ifdef SMP
+	cpuset_t cpu_suspend_map;
+#endif
 	int suspend_cancelled;
 
 	EVENTHANDLER_INVOKE(power_suspend);
 
+	if (smp_started) {
+		thread_lock(curthread);
+		sched_bind(curthread, 0);
+		thread_unlock(curthread);
+	}
+	KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
+
+	/*
+	 * Clear our XenStore node so the toolstack knows we are
+	 * responding to the suspend request.
+	 */
+	xs_write(XST_NIL, "control", "shutdown", "");
+
 	/*
 	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
 	 * drivers need this.
@@ -353,31 +370,67 @@ xctrl_suspend()
 	}
 	mtx_unlock(&Giant);
 
+#ifdef SMP
+	if (smp_started) {
+		/*
+		 * Suspend other CPUs. This prevents IPIs while we
+		 * are resuming, and will allow us to reset per-cpu
+		 * vcpu_info on resume.
+		 */
+		cpu_suspend_map = all_cpus;
+		CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
+		if (!CPU_EMPTY(&cpu_suspend_map))
+			suspend_cpus(cpu_suspend_map);
+	}
+#endif
+
 	/*
 	 * Prevent any races with evtchn_interrupt() handler.
 	 */
 	disable_intr();
 	intr_suspend();
+	xen_hvm_suspend();
 
 	suspend_cancelled = HYPERVISOR_suspend(0);
 
-	intr_resume();
+	xen_hvm_resume(suspend_cancelled != 0);
+	intr_resume(suspend_cancelled != 0);
+	enable_intr();
 
 	/*
-	 * Re-enable interrupts and put the scheduler back to normal.
+	 * Reset grant table info.
 	 */
-	enable_intr();
+	gnttab_resume();
+
+#ifdef SMP
+	if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
+		/*
+		 * Now that event channels have been initialized,
+		 * resume CPUs.
+		 */
+		resume_cpus(cpu_suspend_map);
+	}
+#endif
 
 	/*
 	 * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
 	 * similar.
 	 */
 	mtx_lock(&Giant);
-	if (!suspend_cancelled)
-		DEVICE_RESUME(root_bus);
+	DEVICE_RESUME(root_bus);
 	mtx_unlock(&Giant);
 
+	if (smp_started) {
+		thread_lock(curthread);
+		sched_unbind(curthread);
+		thread_unlock(curthread);
+	}
+
 	EVENTHANDLER_INVOKE(power_resume);
+
+	if (bootverbose)
+		printf("System resumed after suspension\n");
+
 }
 #endif
 

Modified: head/sys/dev/xen/timer/timer.c
==============================================================================
--- head/sys/dev/xen/timer/timer.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/dev/xen/timer/timer.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -1,4 +1,4 @@
-/**
+/*-
  * Copyright (c) 2009 Adrian Chadd
  * Copyright (c) 2012 Spectra Logic Corporation
  * All rights reserved.
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 #include <machine/clock.h>
 #include <machine/_inttypes.h>
+#include <machine/smp.h>
 
 #include "clock_if.h"
 
@@ -316,7 +317,7 @@ xentimer_settime(device_t dev __unused, 
 	 * Don't return EINVAL here; just silently fail if the domain isn't
 	 * privileged enough to set the TOD.
 	 */
-	return(0);
+	return (0);
 }
 
 /**
@@ -339,7 +340,7 @@ xentimer_gettime(device_t dev, struct ti
 	xen_fetch_uptime(&u_ts);
 	timespecadd(ts, &u_ts);
 
-	return(0);
+	return (0);
 }
 
 /**
@@ -457,8 +458,9 @@ xentimer_attach(device_t dev)
 
 	/* Bind an event channel to a VIRQ on each VCPU. */
 	CPU_FOREACH(i) {
-		struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
+		struct xentimer_pcpu_data *pcpu;
 
+		pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
 		error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
 		if (error) {
 			device_printf(dev, "Error disabling Xen periodic timer "
@@ -493,6 +495,7 @@ xentimer_attach(device_t dev)
 	/* Register the timecounter. */
 	sc->tc.tc_name = "XENTIMER";
 	sc->tc.tc_quality = XENTIMER_QUALITY;
+	sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE;
 	/*
 	 * The underlying resolution is in nanoseconds, since the timer info
 	 * scales TSC frequencies using a fraction that represents time in
@@ -523,75 +526,60 @@ xentimer_detach(device_t dev)
 	return (EBUSY);
 }
 
-/**
- * The following device methods are disabled because they wouldn't work
- * properly.
- */
-#ifdef NOTYET
+static void
+xentimer_percpu_resume(void *arg)
+{
+	device_t dev = (device_t) arg;
+	struct xentimer_softc *sc = device_get_softc(dev);
+
+	xentimer_et_start(&sc->et, sc->et.et_min_period, 0);
+}
+
 static int
 xentimer_resume(device_t dev)
 {
-	struct xentimer_softc *sc = device_get_softc(dev);
-	int error = 0;
+	int error;
 	int i;
 
-	device_printf(sc->dev, "%s", __func__);
+	/* Disable the periodic timer */
 	CPU_FOREACH(i) {
-		struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
-
-		/* Skip inactive timers. */
-		if (pcpu->timer == 0)
-			continue;
-
-		/*
-		 * XXX This won't actually work, because Xen requires that
-		 *     singleshot timers be set while running on the given CPU.
-		 */
-		error = xentimer_vcpu_start_timer(i, pcpu->timer);
-		if (error == -ETIME) {
-			/* Event time has already passed; process. */
-			xentimer_intr(sc);
-		} else if (error != 0) {
-			panic("%s: error %d restarting vcpu %d\n",
-			    __func__, error, i);
+		error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
+		if (error != 0) {
+			device_printf(dev,
+			    "Error disabling Xen periodic timer on CPU %d\n",
+			    i);
+			return (error);
 		}
 	}
 
-	return (error);
+	/* Reset the last uptime value */
+	xen_timer_last_time = 0;
+
+	/* Reset the RTC clock */
+	inittodr(time_second);
+
+	/* Kick the timers on all CPUs */
+	smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev);
+
+	if (bootverbose)
+		device_printf(dev, "resumed operation after suspension\n");
+
+	return (0);
 }
 
 static int
 xentimer_suspend(device_t dev)
 {
-	struct xentimer_softc *sc = device_get_softc(dev);
-	int error = 0;
-	int i;
-
-	device_printf(sc->dev, "%s", __func__);
-	CPU_FOREACH(i) {
-		struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
-
-		/* Skip inactive timers. */
-		if (pcpu->timer == 0)
-			continue;
-		error = xentimer_vcpu_stop_timer(i);
-		if (error)
-			panic("Error %d stopping VCPU %d timer\n", error, i);
-	}
-
-	return (error);
+	return (0);
 }
-#endif
 
 static device_method_t xentimer_methods[] = {
 	DEVMETHOD(device_identify, xentimer_identify),
 	DEVMETHOD(device_probe, xentimer_probe),
 	DEVMETHOD(device_attach, xentimer_attach),
 	DEVMETHOD(device_detach, xentimer_detach),
-#ifdef NOTYET
 	DEVMETHOD(device_suspend, xentimer_suspend),
 	DEVMETHOD(device_resume, xentimer_resume),
-#endif
 	/* clock interface */
 	DEVMETHOD(clock_gettime, xentimer_gettime),
 	DEVMETHOD(clock_settime, xentimer_settime),

Modified: head/sys/dev/xen/xenpci/xenpci.c
==============================================================================
--- head/sys/dev/xen/xenpci/xenpci.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/dev/xen/xenpci/xenpci.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -77,6 +77,7 @@ xenpci_irq_init(device_t device, struct 
 	if (error)
 		return error;
 
+#ifdef SMP
 	/*
 	 * When using the PCI event delivery callback we cannot assign
 	 * events to specific vCPUs, so all events are delivered to vCPU#0 by
@@ -88,6 +89,7 @@ xenpci_irq_init(device_t device, struct 
 	                      scp->res_irq, 0);
 	if (error)
 		return error;
+#endif
 
 	xen_hvm_set_callback(device);
 	return (0);
@@ -309,28 +311,12 @@ xenpci_detach(device_t dev)
 static int
 xenpci_suspend(device_t dev)
 {
-	struct xenpci_softc *scp = device_get_softc(dev);
-	device_t parent = device_get_parent(dev);
-
-	if (scp->intr_cookie != NULL) {
-		if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq,
-		    scp->intr_cookie) != 0)
-			printf("intr teardown failed.. continuing\n");
-		scp->intr_cookie = NULL;
-	}
-
 	return (bus_generic_suspend(dev));
 }
 
 static int
 xenpci_resume(device_t dev)
 {
-	struct xenpci_softc *scp = device_get_softc(dev);
-	device_t parent = device_get_parent(dev);
-
-	BUS_SETUP_INTR(parent, dev, scp->res_irq,
-	    INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL,
-	    /*trap_frame*/NULL, &scp->intr_cookie);
 	xen_hvm_set_callback(dev);
 	return (bus_generic_resume(dev));
 }

Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/i386/i386/mp_machdep.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -1529,6 +1529,10 @@ cpususpend_handler(void)
 
 	cpu = PCPU_GET(cpuid);
 
+#ifdef XENHVM
+	mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+#endif
+
 	if (savectx(susppcbs[cpu])) {
 		wbinvd();
 		CPU_SET_ATOMIC(cpu, &suspended_cpus);
@@ -1545,10 +1549,22 @@ cpususpend_handler(void)
 	while (!CPU_ISSET(cpu, &started_cpus))
 		ia32_pause();
 
+#ifdef XENHVM
+	/*
+	 * Reset pending bitmap IPIs, because Xen doesn't preserve pending
+	 * event channels on migration.
+	 */
+	cpu_ipi_pending[cpu] = 0;
+	/* register vcpu_info area */
+	xen_hvm_init_cpu();
+#endif
+
 	/* Resume MCA and local APIC */
 	mca_resume();
 	lapic_setup(0);
 
+	/* Indicate that we are resumed */
+	CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 }
 /*

Modified: head/sys/i386/include/intr_machdep.h
==============================================================================
--- head/sys/i386/include/intr_machdep.h	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/i386/include/intr_machdep.h	Fri Sep 20 05:06:03 2013	(r255726)
@@ -108,7 +108,7 @@ struct pic {
 	int (*pic_vector)(struct intsrc *);
 	int (*pic_source_pending)(struct intsrc *);
 	void (*pic_suspend)(struct pic *);
-	void (*pic_resume)(struct pic *);
+	void (*pic_resume)(struct pic *, bool suspend_cancelled);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
 	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@@ -166,7 +166,7 @@ struct intsrc *intr_lookup_source(int ve
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
 int	intr_remove_handler(void *cookie);
-void	intr_resume(void);
+void	intr_resume(bool suspend_cancelled);
 void	intr_suspend(void);
 void	intrcnt_add(const char *name, u_long **countp);
 void	nexus_add_irq(u_long irq);

Modified: head/sys/kern/subr_smp.c
==============================================================================
--- head/sys/kern/subr_smp.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/kern/subr_smp.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -225,6 +225,18 @@ generic_stop_cpus(cpuset_t map, u_int ty
 	CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
 	    cpusetobj_strprint(cpusetbuf, &map), type);
 
+#ifdef XENHVM
+	/*
+	 * When migrating a PVHVM domain we need to make sure there are
+	 * no IPIs in progress.  IPIs that have been issued, but not
+	 * yet delivered (not pending on a vCPU) will be lost in the
+	 * IPI rebinding process, violating FreeBSD's assumption of
+	 * reliable IPI delivery.
+	 */
+	if (type == IPI_SUSPEND)
+		mtx_lock_spin(&smp_ipi_mtx);
+#endif
+
 	if (stopping_cpu != PCPU_GET(cpuid))
 		while (atomic_cmpset_int(&stopping_cpu, NOCPU,
 		    PCPU_GET(cpuid)) == 0)
@@ -252,6 +264,11 @@ generic_stop_cpus(cpuset_t map, u_int ty
 		}
 	}
 
+#ifdef XENHVM
+	if (type == IPI_SUSPEND)
+		mtx_unlock_spin(&smp_ipi_mtx);
+#endif
+
 	stopping_cpu = NOCPU;
 	return (1);
 }
@@ -292,28 +309,60 @@ suspend_cpus(cpuset_t map)
  *   0: NA
  *   1: ok
  */
-int
-restart_cpus(cpuset_t map)
+static int
+generic_restart_cpus(cpuset_t map, u_int type)
 {
 #ifdef KTR
 	char cpusetbuf[CPUSETBUFSIZ];
 #endif
+	volatile cpuset_t *cpus;
+
+	KASSERT(
+#if defined(__amd64__) || defined(__i386__)
+	    type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
+#else
+	    type == IPI_STOP || type == IPI_STOP_HARD,
+#endif
+	    ("%s: invalid stop type", __func__));
 
 	if (!smp_started)
 		return 0;
 
 	CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
 
+#if defined(__amd64__) || defined(__i386__)
+	if (type == IPI_SUSPEND)
+		cpus = &suspended_cpus;
+	else
+#endif
+		cpus = &stopped_cpus;
+
 	/* signal other cpus to restart */
 	CPU_COPY_STORE_REL(&map, &started_cpus);
 
 	/* wait for each to clear its bit */
-	while (CPU_OVERLAP(&stopped_cpus, &map))
+	while (CPU_OVERLAP(cpus, &map))
 		cpu_spinwait();
 
 	return 1;
 }
 
+int
+restart_cpus(cpuset_t map)
+{
+
+	return (generic_restart_cpus(map, IPI_STOP));
+}
+
+#if defined(__amd64__) || defined(__i386__)
+int
+resume_cpus(cpuset_t map)
+{
+
+	return (generic_restart_cpus(map, IPI_SUSPEND));
+}
+#endif
+
 /*
  * All-CPU rendezvous.  CPUs are signalled, all execute the setup function 
  * (if specified), rendezvous, execute the action function (if specified),

Modified: head/sys/sys/smp.h
==============================================================================
--- head/sys/sys/smp.h	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/sys/smp.h	Fri Sep 20 05:06:03 2013	(r255726)
@@ -166,6 +166,7 @@ int	stop_cpus(cpuset_t);
 int	stop_cpus_hard(cpuset_t);
 #if defined(__amd64__) || defined(__i386__)
 int	suspend_cpus(cpuset_t);
+int	resume_cpus(cpuset_t);
 #endif
 
 void	smp_rendezvous_action(void);

Modified: head/sys/sys/timetc.h
==============================================================================
--- head/sys/sys/timetc.h	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/sys/timetc.h	Fri Sep 20 05:06:03 2013	(r255726)
@@ -59,6 +59,10 @@ struct timecounter {
 		 */
 	u_int			tc_flags;
 #define	TC_FLAGS_C3STOP		1	/* Timer dies in C3. */
+#define	TC_FLAGS_SUSPEND_SAFE	2	/*
+					 * Timer functional across
+					 * suspend/resume.
+					 */
 
 	void			*tc_priv;
 		/* Pointer to the timecounter's private parts. */

Modified: head/sys/x86/acpica/acpi_wakeup.c
==============================================================================
--- head/sys/x86/acpica/acpi_wakeup.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/x86/acpica/acpi_wakeup.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -266,7 +266,7 @@ acpi_wakeup_machdep(struct acpi_softc *s
 			restart_cpus(suspcpus);
 #endif
 		mca_resume();
-		intr_resume();
+		intr_resume(/*suspend_cancelled*/false);
 
 		AcpiSetFirmwareWakingVector(0);
 	} else {

Modified: head/sys/x86/isa/atpic.c
==============================================================================
--- head/sys/x86/isa/atpic.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/x86/isa/atpic.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -123,7 +123,7 @@ static void atpic_eoi_slave(struct intsr
 static void atpic_enable_intr(struct intsrc *isrc);
 static void atpic_disable_intr(struct intsrc *isrc);
 static int atpic_vector(struct intsrc *isrc);
-static void atpic_resume(struct pic *pic);
+static void atpic_resume(struct pic *pic, bool suspend_cancelled);
 static int atpic_source_pending(struct intsrc *isrc);
 static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol);
@@ -276,7 +276,7 @@ atpic_source_pending(struct intsrc *isrc
 }
 
 static void
-atpic_resume(struct pic *pic)
+atpic_resume(struct pic *pic, bool suspend_cancelled)
 {
 	struct atpic *ap = (struct atpic *)pic;
 

Modified: head/sys/x86/x86/intr_machdep.c
==============================================================================
--- head/sys/x86/x86/intr_machdep.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/x86/x86/intr_machdep.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -279,7 +279,7 @@ intr_execute_handlers(struct intsrc *isr
 }
 
 void
-intr_resume(void)
+intr_resume(bool suspend_cancelled)
 {
 	struct pic *pic;
 
@@ -289,7 +289,7 @@ intr_resume(void)
 	mtx_lock(&intr_table_lock);
 	TAILQ_FOREACH(pic, &pics, pics) {
 		if (pic->pic_resume != NULL)
-			pic->pic_resume(pic);
+			pic->pic_resume(pic, suspend_cancelled);
 	}
 	mtx_unlock(&intr_table_lock);
 }

Modified: head/sys/x86/x86/io_apic.c
==============================================================================
--- head/sys/x86/x86/io_apic.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/x86/x86/io_apic.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -119,7 +119,7 @@ static int	ioapic_vector(struct intsrc *
 static int	ioapic_source_pending(struct intsrc *isrc);
 static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
-static void	ioapic_resume(struct pic *pic);
+static void	ioapic_resume(struct pic *pic, bool suspend_cancelled);
 static int	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
 
@@ -486,7 +486,7 @@ ioapic_config_intr(struct intsrc *isrc, 
 }
 
 static void
-ioapic_resume(struct pic *pic)
+ioapic_resume(struct pic *pic, bool suspend_cancelled)
 {
 	struct ioapic *io = (struct ioapic *)pic;
 	int i;

Modified: head/sys/x86/x86/local_apic.c
==============================================================================
--- head/sys/x86/x86/local_apic.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/x86/x86/local_apic.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -161,7 +161,7 @@ static u_long lapic_timer_divisor;
 static struct eventtimer lapic_et;
 
 static void	lapic_enable(void);
-static void	lapic_resume(struct pic *pic);
+static void	lapic_resume(struct pic *pic, bool suspend_cancelled);
 static void	lapic_timer_oneshot(struct lapic *,
 		    u_int count, int enable_int);
 static void	lapic_timer_periodic(struct lapic *,
@@ -566,7 +566,7 @@ lapic_enable(void)
 
 /* Reset the local APIC on the BSP during resume. */
 static void
-lapic_resume(struct pic *pic)
+lapic_resume(struct pic *pic, bool suspend_cancelled)
 {
 
 	lapic_setup(0);

Modified: head/sys/x86/xen/hvm.c
==============================================================================
--- head/sys/x86/xen/hvm.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/x86/xen/hvm.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
 #include <xen/interface/vcpu.h>
 
 /*--------------------------- Forward Declarations ---------------------------*/
+#ifdef SMP
 static driver_filter_t xen_smp_rendezvous_action;
 static driver_filter_t xen_invltlb;
 static driver_filter_t xen_invlpg;
@@ -70,6 +71,7 @@ static driver_filter_t xen_ipi_bitmap_ha
 static driver_filter_t xen_cpustop_handler;
 static driver_filter_t xen_cpususpend_handler;
 static driver_filter_t xen_cpustophard_handler;
+#endif
 
 /*---------------------------- Extern Declarations ---------------------------*/
 /* Variables used by mp_machdep to perform the MMU related IPIs */
@@ -93,6 +95,12 @@ extern void pmap_lazyfix_action(void);
 #define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
 
 /*-------------------------------- Local Types -------------------------------*/
+enum xen_hvm_init_type {
+	XEN_HVM_INIT_COLD,
+	XEN_HVM_INIT_CANCELLED_SUSPEND,
+	XEN_HVM_INIT_RESUME
+};
+
 struct xen_ipi_handler
 {
 	driver_filter_t	*filter;
@@ -104,6 +112,7 @@ enum xen_domain_type xen_domain_type = X
 
 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
 
+#ifdef SMP
 static struct xen_ipi_handler xen_ipis[] = 
 {
 	[IPI_TO_IDX(IPI_RENDEZVOUS)]	= { xen_smp_rendezvous_action,	"r"   },
@@ -119,6 +128,7 @@ static struct xen_ipi_handler xen_ipis[]
 	[IPI_TO_IDX(IPI_SUSPEND)]	= { xen_cpususpend_handler,	"sp"  },
 	[IPI_TO_IDX(IPI_STOP_HARD)]	= { xen_cpustophard_handler,	"sth" },
 };
+#endif
 
 /**
  * If non-zero, the hypervisor has been configured to use a direct
@@ -129,13 +139,16 @@ int xen_vector_callback_enabled;
 /*------------------------------- Per-CPU Data -------------------------------*/
 DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
 DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
+#ifdef SMP
 DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
+#endif
 
 /*------------------ Hypervisor Access Shared Memory Regions -----------------*/
 /** Hypercall table accessed via HYPERVISOR_*_op() methods. */
 char *hypercall_stubs;
 shared_info_t *HYPERVISOR_shared_info;
 
+#ifdef SMP
 /*---------------------------- XEN PV IPI Handlers ---------------------------*/
 /*
  * This are C clones of the ASM functions found in apic_vector.s
@@ -496,6 +509,7 @@ xen_init_ipis(void)
 	/* Set the xen pv ipi ops to replace the native ones */
 	cpu_ops.ipi_vectored = xen_ipi_vectored;
 }
+#endif
 
 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
 static uint32_t
@@ -579,6 +593,9 @@ xen_hvm_set_callback(device_t dev)
 	struct xen_hvm_param xhp;
 	int irq;
 
+	if (xen_vector_callback_enabled)
+		return;
+
 	xhp.domid = DOMID_SELF;
 	xhp.index = HVM_PARAM_CALLBACK_IRQ;
 	if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
@@ -637,41 +654,83 @@ xen_hvm_disable_emulated_devices(void)
 	outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS);
 }
 
+static void
+xen_hvm_init(enum xen_hvm_init_type init_type)
+{
+	int error;
+	int i;
+
+	if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
+		return;
+
+	error = xen_hvm_init_hypercall_stubs();
+
+	switch (init_type) {
+	case XEN_HVM_INIT_COLD:
+		if (error != 0)
+			return;
+
+		setup_xen_features();
+		break;
+	case XEN_HVM_INIT_RESUME:
+		if (error != 0)
+			panic("Unable to init Xen hypercall stubs on resume");
+		break;
+	default:
+		panic("Unsupported HVM initialization type");
+	}
+
+	/* Clear any stale vcpu_info. */
+	CPU_FOREACH(i)
+		DPCPU_ID_SET(i, vcpu_info, NULL);
+
+	xen_vector_callback_enabled = 0;
+	xen_domain_type = XEN_HVM_DOMAIN;
+	xen_hvm_init_shared_info_page();
+	xen_hvm_set_callback(NULL);
+	xen_hvm_disable_emulated_devices();
+} 
+
 void
 xen_hvm_suspend(void)
 {
 }
 
 void
-xen_hvm_resume(void)
+xen_hvm_resume(bool suspend_cancelled)
 {
 
-	xen_hvm_init_hypercall_stubs();
-	xen_hvm_init_shared_info_page();
+	xen_hvm_init(suspend_cancelled ?
+	    XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
+
+	/* Register vcpu_info area for CPU#0. */
+	xen_hvm_init_cpu();
 }
  
 static void
-xen_hvm_init(void *dummy __unused)
+xen_hvm_sysinit(void *arg __unused)
 {
+	xen_hvm_init(XEN_HVM_INIT_COLD);
+}
 
-	if (xen_hvm_init_hypercall_stubs() != 0)
-		return;
-
-	xen_domain_type = XEN_HVM_DOMAIN;
-	setup_xen_features();
-	xen_hvm_init_shared_info_page();
-	xen_hvm_set_callback(NULL);
-	xen_hvm_disable_emulated_devices();
-} 
-
-void xen_hvm_init_cpu(void)
+void
+xen_hvm_init_cpu(void)
 {
 	struct vcpu_register_vcpu_info info;
 	struct vcpu_info *vcpu_info;
 	int cpu, rc;
 
-	cpu = PCPU_GET(acpi_id);
+	if (DPCPU_GET(vcpu_info) != NULL) {
+		/*
+		 * vcpu_info is already set.  We're resuming
+		 * from a failed migration and our pre-suspend
+		 * configuration is still valid.
+		 */
+		return;
+	}
+
 	vcpu_info = DPCPU_PTR(vcpu_local_info);
+	cpu = PCPU_GET(acpi_id);
 	info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
 	info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
 
@@ -682,6 +741,8 @@ void xen_hvm_init_cpu(void)
 		DPCPU_SET(vcpu_info, vcpu_info);
 }
 
-SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL);
+SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
+#ifdef SMP
 SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL);
+#endif
 SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL);

Modified: head/sys/x86/xen/xen_intr.c
==============================================================================
--- head/sys/x86/xen/xen_intr.c	Fri Sep 20 04:35:09 2013	(r255725)
+++ head/sys/x86/xen/xen_intr.c	Fri Sep 20 05:06:03 2013	(r255726)
@@ -120,7 +120,7 @@ struct xenisrc {
 #define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
 
 static void	xen_intr_suspend(struct pic *);
-static void	xen_intr_resume(struct pic *);
+static void	xen_intr_resume(struct pic *, bool suspend_cancelled);
 static void	xen_intr_enable_source(struct intsrc *isrc);
 static void	xen_intr_disable_source(struct intsrc *isrc, int eoi);
 static void	xen_intr_eoi_source(struct intsrc *isrc);
@@ -334,7 +334,7 @@ xen_intr_release_isrc(struct xenisrc *is
 	evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
 	evtchn_cpu_unmask_port(0, isrc->xi_port);
 
-	if (isrc->xi_close != 0) {
+	if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) {
 		struct evtchn_close close = { .port = isrc->xi_port };
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
 			panic("EVTCHNOP_close failed");
@@ -408,6 +408,7 @@ xen_intr_bind_isrc(struct xenisrc **isrc
 		return (error);
 	}
 	*isrcp = isrc;
+	evtchn_unmask_port(local_port);
 	return (0);
 }
 
@@ -571,6 +572,9 @@ xen_intr_init(void *dummy __unused)
 	struct xen_intr_pcpu_data *pcpu;
 	int i;
 
+	if (!xen_domain())
+		return (0);
+
 	mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF);
 
 	/*
@@ -602,20 +606,116 @@ xen_intr_suspend(struct pic *unused)
 {
 }
 
+static void
+xen_rebind_ipi(struct xenisrc *isrc)
+{
+#ifdef SMP
+	int cpu = isrc->xi_cpu;
+	int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+	int error;
+	struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
+
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+	                                    &bind_ipi);
+	if (error != 0)
+		panic("unable to rebind xen IPI: %d", error);
+
+	isrc->xi_port = bind_ipi.port;
+	isrc->xi_cpu = 0;
+	xen_intr_port_to_isrc[bind_ipi.port] = isrc;
+
+	error = xen_intr_assign_cpu(&isrc->xi_intsrc,
+	                            cpu_apic_ids[cpu]);
+	if (error)
+		panic("unable to bind xen IPI to CPU#%d: %d",
+		      cpu, error);
+
+	evtchn_unmask_port(bind_ipi.port);
+#else
+	panic("Resume IPI event channel on UP");
+#endif
+}
+
+static void
+xen_rebind_virq(struct xenisrc *isrc)
+{
+	int cpu = isrc->xi_cpu;
+	int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+	int error;
+	struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq,
+	                                      .vcpu = acpi_id };
+
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+	                                    &bind_virq);
+	if (error != 0)
+		panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error);
+
+	isrc->xi_port = bind_virq.port;
+	isrc->xi_cpu = 0;
+	xen_intr_port_to_isrc[bind_virq.port] = isrc;
+
+#ifdef SMP
+	error = xen_intr_assign_cpu(&isrc->xi_intsrc,
+	                            cpu_apic_ids[cpu]);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list