SMP suspend/resume.

takawata at init-main.com takawata at init-main.com
Tue May 13 11:45:59 UTC 2008


Hi, I managed to make suspend and resume work on SMP system.
The patch following is a bit crude patch, but it begin 
to work on my ThinkPad X61 (core2duo system).

TODO:
1. Suspend/resume path it self is simular to AP boot path.
Some of code may be integrated.
2. More context, like MTRR or npx context should be saved on 
suspend.
3. Make acpi  suspend resume path more ABI aware: needless 
register recoverly or special register context saving 
(the value itself is usually constant) should be removed.
4. Make same binary module work on both UP or SMP case.
(Or is it time to give up using acpi module on also on i386?)





Index: i386/acpica/acpi_wakeup.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/acpica/acpi_wakeup.c,v
retrieving revision 1.47
diff -u -r1.47 acpi_wakeup.c
--- i386/acpica/acpi_wakeup.c	16 Mar 2008 10:58:03 -0000	1.47
+++ i386/acpica/acpi_wakeup.c	13 May 2008 09:12:18 -0000
@@ -27,6 +27,7 @@
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/sys/i386/acpica/acpi_wakeup.c,v 1.47 2008/03/16 10:58:03 rwatson Exp $");
+#define SMP
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -49,6 +50,11 @@
 
 #include <contrib/dev/acpica/acpi.h>
 #include <dev/acpica/acpivar.h>
+#include <sys/smp.h>
+#include <machine/apicreg.h>
+#include <machine/apicvar.h>
+#include <machine/smp.h>
+#include <sys/sched.h>
 
 #include "acpi_wakecode.h"
 
@@ -71,7 +77,9 @@
 
 static uint16_t		r_cs, r_ds, r_es, r_fs, r_gs, r_ss, r_tr;
 static uint32_t		r_esp;
-
+extern void *bootstacks[];
+static char *bootSTK;
+void restore_sub(void);
 static void		acpi_printcpu(void);
 static void		acpi_realmodeinst(void *arg, bus_dma_segment_t *segs,
 					  int nsegs, int error);
@@ -80,6 +88,7 @@
 /* XXX shut gcc up */
 extern int		acpi_savecpu(void);
 extern int		acpi_restorecpu(void);
+extern void		acpi_kicksub(void);
 
 #ifdef __GNUCLIKE_ASM
 __asm__("				\n\
@@ -104,6 +113,15 @@
 	movl	%eax,(%esp)		\n\
 	xorl	%eax,%eax		\n\
 	ret				\n\
+				\n\
+	.text				\n\
+	.p2align 2, 0x90		\n\
+	.type acpi_kicksub, @function  \n\
+acpi_kicksub:			\n\
+	.align 4			\n\
+	movl	bootSTK,%esp		\n\
+	jmp     restore_sub		\n\
+	ret				\n\
 					\n\
 	.text				\n\
 	.p2align 2, 0x90		\n\
@@ -149,6 +167,24 @@
 	ret				\n\
 ");
 #endif /* __GNUCLIKE_ASM */
+int acpi_cpu_resumed[MAXCPU];
+int acpi_curcpu;
+extern int switch_debug;
+
+void restore_sub()
+{
+	ACPI_DISABLE_IRQS();
+	printf("RESTORE_SUB\n");
+	lapic_disable();	
+	printf("LAPIC_SETUP\n");
+	lapic_setup(0);	
+	lapic_dump("RESTORE_SUB");
+	printf("RESTORE_SUB2\n");
+	ACPI_ENABLE_IRQS();
+
+	acpi_cpu_resumed[acpi_curcpu]= 1;
+	acpi_restorecpu();
+}
 
 static void
 acpi_printcpu(void)
@@ -187,6 +223,119 @@
 	outb(0x61, inb(0x61) & ~0x3);
 }
 
+
+int resume_other_cpu(struct acpi_softc *sc, int cpu);
+int resume_other_cpu(struct acpi_softc *sc, int cpu)
+{
+	int ms;
+	int apic_id = cpu_apic_ids[cpu];
+	int gsel_tss;
+
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+	acpi_curcpu = cpu;
+	bootSTK= (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4;
+	printf("%p\n", bootSTK);	
+	p_gdt = (struct region_descriptor *)
+		(sc->acpi_wakeaddr + physical_gdt);
+	saved_gdt.rd_limit = NGDT * sizeof(gdt[0]) -1;
+	saved_gdt.rd_base = (int )&gdt[cpu*NGDT];
+	p_gdt->rd_limit = saved_gdt.rd_limit;
+ 	p_gdt->rd_base = vtophys(saved_gdt.rd_base);
+	r_esp = stoppcbs[cpu].pcb_esp;
+	r_ebp = stoppcbs[cpu].pcb_ebp;
+	r_esi = stoppcbs[cpu].pcb_esi;
+	r_edi = stoppcbs[cpu].pcb_edi;
+	r_efl = stoppcbs[cpu].pcb_psl;
+	ret_addr = stoppcbs[cpu].pcb_eip;
+	WAKECODE_FIXUP(physical_esp, uint32_t, vtophys(bootSTK) );
+	WAKECODE_FIXUP(previous_cr0, uint32_t, r_cr0);
+	WAKECODE_FIXUP(previous_cr2, uint32_t, r_cr2);
+	WAKECODE_FIXUP(previous_cr3, uint32_t, r_cr3);
+	WAKECODE_FIXUP(previous_cr4, uint32_t, r_cr4);
+	
+	WAKECODE_FIXUP(resume_beep, uint32_t, 0);
+	WAKECODE_FIXUP(reset_video, uint32_t, 0);
+	
+	WAKECODE_FIXUP(previous_tr,  uint16_t, gsel_tss);
+	WAKECODE_BCOPY(previous_gdt, struct region_descriptor, saved_gdt);
+	WAKECODE_FIXUP(previous_ldt, uint16_t, saved_ldt);
+	WAKECODE_BCOPY(previous_idt, struct region_descriptor, saved_idt);
+	
+	WAKECODE_FIXUP(where_to_recover, void *, acpi_kicksub);
+	
+	WAKECODE_FIXUP(previous_ds,  uint16_t, r_ds);
+	WAKECODE_FIXUP(previous_es,  uint16_t, r_es);
+	WAKECODE_FIXUP(previous_fs,  uint16_t, r_fs);
+	WAKECODE_FIXUP(previous_gs,  uint16_t, 0);
+	WAKECODE_FIXUP(previous_ss,  uint16_t, r_ss);
+
+	/* do an INIT IPI: assert RESET */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+
+	/* wait for pending status end */
+	lapic_ipi_wait(-1);
+
+	/* do an INIT IPI: deassert RESET */
+	lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
+
+	/* wait for pending status end */
+	DELAY(10000);		/* wait ~10mS */
+	lapic_ipi_wait(-1);
+	/*
+	 * next we do a STARTUP IPI: the previous INIT IPI might still be
+	 * latched, (P5 bug) this 1st STARTUP would then terminate
+	 * immediately, and the previously started INIT IPI would continue. OR
+	 * the previous INIT IPI has already run. and this STARTUP IPI will
+	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+	 * will run.
+	 */
+
+	/* do a STARTUP IPI */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+		      ((sc->acpi_wakephys >>12)&0xff), apic_id);
+	lapic_ipi_wait(-1);
+	DELAY(200);		/* wait ~200uS */
+
+	/*
+	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+	 * recognized after hardware RESET or INIT IPI.
+	 */
+
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    ((sc->acpi_wakephys >>12)&0xff), apic_id);
+	lapic_ipi_wait(-1);
+	DELAY(200);		/* wait ~200uS */
+
+	/* Wait up to 5 seconds for it to start. */
+	for (ms = 0; ms < 5000; ms++) {
+		if(acpi_cpu_resumed[cpu]){
+			acpi_cpu_resumed[cpu]= 0;
+			return 0;
+		}
+		DELAY(1000);
+	}
+	return -1;		/* return FAILURE */
+
+}
+int resume_other_cpus(struct acpi_softc *sc);
+int resume_other_cpus(struct acpi_softc *sc)
+{
+	int i;
+	printf("RESUME_OTHER_CPUS");
+	*((volatile u_short *) 0x467) = 0;
+	*((volatile u_short *) 0x468) = (sc->acpi_wakephys&0xffff0)>>4;
+
+	for(i = 1; i < mp_ncpus; i++){
+		resume_other_cpu(sc, i);
+	}
+	return 0;
+}
 int
 acpi_sleep_machdep(struct acpi_softc *sc, int state)
 {
@@ -270,14 +419,15 @@
 		for (;;) ;
 	} else {
 		/* Execute Wakeup */
-		intr_resume();
-
 		if (bootverbose) {
 			acpi_savecpu();
 			acpi_printcpu();
 		}
+		resume_other_cpus(sc);
+		restart_cpus(stopped_cpus);
+		intr_resume();
+		lapic_dump("MAIN");
 	}
-
 out:
 	load_cr3(cr3);
 	write_eflags(ef);
@@ -285,7 +435,7 @@
 	/* If we beeped, turn it off after a delay. */
 	if (acpi_resume_beep)
 		timeout(acpi_stop_beep, NULL, 3 * hz);
-
+	printf("FUGAFUGA\n");
 	return (ret);
 }
 
Index: i386/i386/io_apic.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/io_apic.c,v
retrieving revision 1.35
diff -u -r1.35 io_apic.c
--- i386/i386/io_apic.c	5 Jun 2007 18:57:48 -0000	1.35
+++ i386/i386/io_apic.c	13 May 2008 08:22:55 -0000
@@ -444,8 +444,9 @@
 	struct ioapic *io = (struct ioapic *)pic;
 	int i;
 
-	for (i = 0; i < io->io_numintr; i++)
+	for (i = 0; i < io->io_numintr; i++){
 		ioapic_program_intpin(&io->io_pins[i]);
+	}
 }
 
 /*
Index: i386/i386/mp_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/mp_machdep.c,v
retrieving revision 1.286
diff -u -r1.286 mp_machdep.c
--- i386/i386/mp_machdep.c	10 Apr 2008 18:38:31 -0000	1.286
+++ i386/i386/mp_machdep.c	13 May 2008 07:08:29 -0000
@@ -1299,18 +1299,19 @@
 	int cpu = PCPU_GET(cpuid);
 	int cpumask = PCPU_GET(cpumask);
 
-	savectx(&stoppcbs[cpu]);
-
-	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	if(savectx(&stoppcbs[cpu])){
+		/* Indicate that we are stopped */
+		atomic_set_int(&stopped_cpus, cpumask);
+		wbinvd();
+	}
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
-	    ia32_pause();
-
+	while (!(started_cpus & cpumask)){
+		ia32_pause();
+	}
 	atomic_clear_int(&started_cpus, cpumask);
 	atomic_clear_int(&stopped_cpus, cpumask);
-
+	
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
 		cpustop_restartfunc = NULL;
Index: i386/i386/swtch.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/swtch.s,v
retrieving revision 1.156
diff -u -r1.156 swtch.s
--- i386/i386/swtch.s	22 Aug 2007 05:06:14 -0000	1.156
+++ i386/i386/swtch.s	9 May 2008 15:16:03 -0000
@@ -413,6 +413,6 @@
 1:
 	popfl
 #endif	/* DEV_NPX */
-
+	movl  $1, %eax
 	ret
 END(savectx)
Index: i386/include/pcb.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/pcb.h,v
retrieving revision 1.56
diff -u -r1.56 pcb.h
--- i386/include/pcb.h	29 Dec 2005 13:23:48 -0000	1.56
+++ i386/include/pcb.h	24 Apr 2008 06:46:59 -0000
@@ -81,7 +81,7 @@
 struct trapframe;
 
 void	makectx(struct trapframe *, struct pcb *);
-void	savectx(struct pcb *);
+int	savectx(struct pcb *);
 #endif
 
 #endif /* _I386_PCB_H_ */
Index: dev/acpica/acpi.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/acpica/acpi.c,v
retrieving revision 1.247
diff -u -r1.247 acpi.c
--- dev/acpica/acpi.c	13 Mar 2008 20:39:03 -0000	1.247
+++ dev/acpica/acpi.c	30 Apr 2008 13:14:48 -0000
@@ -29,7 +29,7 @@
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/sys/dev/acpica/acpi.c,v 1.247 2008/03/13 20:39:03 jhb Exp $");
-
+#define SMP
 #include "opt_acpi.h"
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -47,6 +47,7 @@
 #include <sys/power.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
+#include <sys/sched.h>
 
 #include <machine/resource.h>
 #include <machine/bus.h>
@@ -2339,6 +2340,8 @@
      * drivers need this.
      */
     mtx_lock(&Giant);
+    sched_bind(curthread, 0);
+    stop_cpus(PCPU_GET(other_cpus));
     slp_state = ACPI_SS_NONE;
     switch (state) {
     case ACPI_STATE_S1:
@@ -2430,13 +2433,16 @@
 	acpi_wake_prep_walk(state);
 	sc->acpi_sstate = ACPI_STATE_S0;
     }
+    printf("PREP WALK\n");
     if (slp_state >= ACPI_SS_SLP_PREP)
 	AcpiLeaveSleepState(state);
+    printf("LEAVE_SLEEP_STATE\n");
     if (slp_state >= ACPI_SS_DEV_SUSPEND)
 	DEVICE_RESUME(root_bus);
+    printf("DEVICE_RESUME\n");
     if (slp_state >= ACPI_SS_SLEPT)
 	acpi_enable_fixed_events(sc);
-
+    printf("ENABLE_FIXED_EVENT\n");
     /* Allow another sleep request after a while. */
     if (state != ACPI_STATE_S5)
 	timeout(acpi_sleep_enable, sc, hz * ACPI_MINIMUM_AWAKETIME);
@@ -2445,6 +2451,7 @@
     acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state);
 
     mtx_unlock(&Giant);
+    sched_unbind(curthread);
     return_ACPI_STATUS (status);
 }
 
Index: dev/acpica/acpi_ec.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/acpica/acpi_ec.c,v
retrieving revision 1.80
diff -u -r1.80 acpi_ec.c
--- dev/acpica/acpi_ec.c	8 Nov 2007 21:20:34 -0000	1.80
+++ dev/acpica/acpi_ec.c	7 May 2008 17:07:11 -0000
@@ -747,7 +747,7 @@
      * If booting, check if we need to run the query handler.  If so, we
      * we call it directly here since our thread taskq is not active yet.
      */
-    if (cold || rebooting) {
+    if (cold || rebooting||sc->ec_suspending) {
 	if ((EC_GET_CSR(sc) & EC_EVENT_SCI)) {
 	    CTR0(KTR_ACPI, "ec running gpe handler directly");
 	    EcGpeQueryHandler(sc);


More information about the freebsd-acpi mailing list