kern/170021: commit references a PR

dfilter service dfilter at FreeBSD.ORG
Fri Jul 20 19:40:13 UTC 2012


The following reply was made to PR kern/170021; it has been noted by GNATS.

From: dfilter at FreeBSD.ORG (dfilter service)
To: bug-followup at FreeBSD.org
Cc:  
Subject: Re: kern/170021: commit references a PR
Date: Fri, 20 Jul 2012 19:35:37 +0000 (UTC)

 Author: mav
 Date: Fri Jul 20 19:35:20 2012
 New Revision: 238658
 URL: http://svn.freebsd.org/changeset/base/238658
 
 Log:
   Partially MFC r212541:
   Refactor cpu_idle() on x86.
   Use MONITOR/MWAIT instrunctions (if supported) under high sleep/wakeup rate,
   as fast alternative to other methods.  It allows SMP scheduler to wake up
   sleeping CPUs much faster without using IPI, significantly increasing
   performance on some highly task-switching loads.  Also on such loads it
   hides two ACPI timer reads, otherwise done by acpi_cpu_idle(), that are
   reported to be slow on some systems.
   
   MFC r225936 (by attilio):
   Add some improvements in the idle table callbacks:
   - Replace instances of manual assembly instruction "hlt" call
     with halt() function calling.
   - In cpu_idle_mwait() avoid races in check to sched_runnable() using
     the same pattern used in cpu_idle_hlt() with the 'hlt' instruction.
   - Add comments explaining the logic behind the pattern used in
     cpu_idle_hlt() and other idle callbacks.
   
   PR:		kern/170021
 
 Modified:
   stable/8/sys/amd64/amd64/machdep.c
   stable/8/sys/i386/i386/machdep.c
   stable/8/sys/pc98/pc98/machdep.c
 Directory Properties:
   stable/8/sys/   (props changed)
 
 Modified: stable/8/sys/amd64/amd64/machdep.c
 ==============================================================================
 --- stable/8/sys/amd64/amd64/machdep.c	Fri Jul 20 17:51:20 2012	(r238657)
 +++ stable/8/sys/amd64/amd64/machdep.c	Fri Jul 20 19:35:20 2012	(r238658)
 @@ -629,63 +629,122 @@ void
  cpu_halt(void)
  {
  	for (;;)
 -		__asm__ ("hlt");
 +		halt();
  }
  
  void (*cpu_idle_hook)(void) = NULL;	/* ACPI idle hook. */
 +static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
 +static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 +TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
 +SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
 +    0, "Use MONITOR/MWAIT for short idle");
 +
 +#define	STATE_RUNNING	0x0
 +#define	STATE_MWAIT	0x1
 +#define	STATE_SLEEPING	0x2
 +
 +static void
 +cpu_idle_acpi(int busy)
 +{
 +	int *state;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_SLEEPING;
 +
 +	/* See comments in cpu_idle_hlt(). */
 +	disable_intr();
 +	if (sched_runnable())
 +		enable_intr();
 +	else if (cpu_idle_hook)
 +		cpu_idle_hook();
 +	else
 +		__asm __volatile("sti; hlt");
 +	*state = STATE_RUNNING;
 +}
  
  static void
  cpu_idle_hlt(int busy)
  {
 +	int *state;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_SLEEPING;
 +
  	/*
 -	 * we must absolutely guarentee that hlt is the next instruction
 -	 * after sti or we introduce a timing window.
 +	 * Since we may be in a critical section from cpu_idle(), if
 +	 * an interrupt fires during that critical section we may have
 +	 * a pending preemption.  If the CPU halts, then that thread
 +	 * may not execute until a later interrupt awakens the CPU.
 +	 * To handle this race, check for a runnable thread after
 +	 * disabling interrupts and immediately return if one is
 +	 * found.  Also, we must absolutely guarentee that hlt is
 +	 * the next instruction after sti.  This ensures that any
 +	 * interrupt that fires after the call to disable_intr() will
 +	 * immediately awaken the CPU from hlt.  Finally, please note
 +	 * that on x86 this works fine because of interrupts enabled only
 +	 * after the instruction following sti takes place, while IF is set
 +	 * to 1 immediately, allowing hlt instruction to acknowledge the
 +	 * interrupt.
  	 */
  	disable_intr();
 -  	if (sched_runnable())
 +	if (sched_runnable())
  		enable_intr();
  	else
  		__asm __volatile("sti; hlt");
 +	*state = STATE_RUNNING;
  }
  
 +/*
 + * MWAIT cpu power states.  Lower 4 bits are sub-states.
 + */
 +#define	MWAIT_C0	0xf0
 +#define	MWAIT_C1	0x00
 +#define	MWAIT_C2	0x10
 +#define	MWAIT_C3	0x20
 +#define	MWAIT_C4	0x30
 +
  static void
 -cpu_idle_acpi(int busy)
 +cpu_idle_mwait(int busy)
  {
 +	int *state;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_MWAIT;
 +
 +	/* See comments in cpu_idle_hlt(). */
  	disable_intr();
 -  	if (sched_runnable())
 +	if (sched_runnable()) {
  		enable_intr();
 -	else if (cpu_idle_hook)
 -		cpu_idle_hook();
 +		*state = STATE_RUNNING;
 +		return;
 +	}
 +	cpu_monitor(state, 0, 0);
 +	if (*state == STATE_MWAIT)
 +		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
  	else
 -		__asm __volatile("sti; hlt");
 +		enable_intr();
 +	*state = STATE_RUNNING;
  }
  
 -static int cpu_ident_amdc1e = 0;
 -
 -static int
 -cpu_probe_amdc1e(void)
 +static void
 +cpu_idle_spin(int busy)
  {
 +	int *state;
  	int i;
  
 -	/*
 -	 * Forget it, if we're not using local APIC timer.
 -	 */
 -	if (resource_disabled("apic", 0) ||
 -	    (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
 -		return (0);
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_RUNNING;
  
  	/*
 -	 * Detect the presence of C1E capability mostly on latest
 -	 * dual-cores (or future) k8 family.
 -	 */
 -	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 -	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 -	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 -		cpu_ident_amdc1e = 1;
 -		return (1);
 +	 * The sched_runnable() call is racy but as long as there is
 +	 * a loop missing it one time will have just a little impact if any
 +	 * (and it is much better than missing the check at all).
 +	 */
 +	for (i = 0; i < 1000; i++) {
 +		if (sched_runnable())
 +			return;
 +		cpu_spinwait();
  	}
 -
 -	return (0);
  }
  
  /*
 @@ -703,110 +762,66 @@ cpu_probe_amdc1e(void)
  #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
  
  static void
 -cpu_idle_amdc1e(int busy)
 +cpu_probe_amdc1e(void)
  {
  
 -	disable_intr();
 -	if (sched_runnable())
 -		enable_intr();
 -	else {
 -		uint64_t msr;
 -
 -		msr = rdmsr(MSR_AMDK8_IPM);
 -		if (msr & AMDK8_CMPHALT)
 -			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
 -
 -		if (cpu_idle_hook)
 -			cpu_idle_hook();
 -		else
 -			__asm __volatile("sti; hlt");
 +	/*
 +	 * Detect the presence of C1E capability mostly on latest
 +	 * dual-cores (or future) k8 family.
 +	 */
 +	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 +	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 +	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 +		cpu_ident_amdc1e = 1;
  	}
  }
  
 -static void
 -cpu_idle_spin(int busy)
 -{
 -	return;
 -}
 -
  void (*cpu_idle_fn)(int) = cpu_idle_acpi;
  
  void
  cpu_idle(int busy)
  {
 +	uint64_t msr;
 +
  #ifdef SMP
  	if (mp_grab_cpu_hlt())
  		return;
  #endif
 -	cpu_idle_fn(busy);
 -}
 -
 -/*
 - * mwait cpu power states.  Lower 4 bits are sub-states.
 - */
 -#define	MWAIT_C0	0xf0
 -#define	MWAIT_C1	0x00
 -#define	MWAIT_C2	0x10
 -#define	MWAIT_C3	0x20
 -#define	MWAIT_C4	0x30
 -
 -#define	MWAIT_DISABLED	0x0
 -#define	MWAIT_WOKEN	0x1
 -#define	MWAIT_WAITING	0x2
 -
 -static void
 -cpu_idle_mwait(int busy)
 -{
 -	int *mwait;
 -
 -	mwait = (int *)PCPU_PTR(monitorbuf);
 -	*mwait = MWAIT_WAITING;
 -	if (sched_runnable())
 -		return;
 -	cpu_monitor(mwait, 0, 0);
 -	if (*mwait == MWAIT_WAITING)
 -		cpu_mwait(0, MWAIT_C1);
 -}
 -
 -static void
 -cpu_idle_mwait_hlt(int busy)
 -{
 -	int *mwait;
 +	/* If we are busy - try to use fast methods. */
 +	if (busy) {
 +		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
 +			cpu_idle_mwait(busy);
 +			return;
 +		}
 +	}
  
 -	mwait = (int *)PCPU_PTR(monitorbuf);
 -	if (busy == 0) {
 -		*mwait = MWAIT_DISABLED;
 -		cpu_idle_hlt(busy);
 -		return;
 +	/* Apply AMD APIC timer C1E workaround. */
 +	if (cpu_ident_amdc1e) {
 +		msr = rdmsr(MSR_AMDK8_IPM);
 +		if (msr & AMDK8_CMPHALT)
 +			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
  	}
 -	*mwait = MWAIT_WAITING;
 -	if (sched_runnable())
 -		return;
 -	cpu_monitor(mwait, 0, 0);
 -	if (*mwait == MWAIT_WAITING)
 -		cpu_mwait(0, MWAIT_C1);
 +
 +	/* Call main idle method. */
 +	cpu_idle_fn(busy);
  }
  
  int
  cpu_idle_wakeup(int cpu)
  {
  	struct pcpu *pcpu;
 -	int *mwait;
 +	int *state;
  
 -	if (cpu_idle_fn == cpu_idle_spin)
 -		return (1);
 -	if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
 -		return (0);
  	pcpu = pcpu_find(cpu);
 -	mwait = (int *)pcpu->pc_monitorbuf;
 +	state = (int *)pcpu->pc_monitorbuf;
  	/*
  	 * This doesn't need to be atomic since missing the race will
  	 * simply result in unnecessary IPIs.
  	 */
 -	if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
 +	if (*state == STATE_SLEEPING)
  		return (0);
 -	*mwait = MWAIT_WOKEN;
 -
 +	if (*state == STATE_MWAIT)
 +		*state = STATE_RUNNING;
  	return (1);
  }
  
 @@ -819,8 +834,6 @@ struct {
  } idle_tbl[] = {
  	{ cpu_idle_spin, "spin" },
  	{ cpu_idle_mwait, "mwait" },
 -	{ cpu_idle_mwait_hlt, "mwait_hlt" },
 -	{ cpu_idle_amdc1e, "amdc1e" },
  	{ cpu_idle_hlt, "hlt" },
  	{ cpu_idle_acpi, "acpi" },
  	{ NULL, NULL }
 @@ -839,8 +852,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
  		if (strstr(idle_tbl[i].id_name, "mwait") &&
  		    (cpu_feature2 & CPUID2_MON) == 0)
  			continue;
 -		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
 -		    cpu_ident_amdc1e == 0)
 +		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 +		    cpu_idle_hook == NULL)
  			continue;
  		p += sprintf(p, "%s, ", idle_tbl[i].id_name);
  	}
 @@ -849,6 +862,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
  	return (error);
  }
  
 +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
 +    0, 0, idle_sysctl_available, "A", "list of available idle functions");
 +
  static int
  idle_sysctl(SYSCTL_HANDLER_ARGS)
  {
 @@ -872,8 +888,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
  		if (strstr(idle_tbl[i].id_name, "mwait") &&
  		    (cpu_feature2 & CPUID2_MON) == 0)
  			continue;
 -		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
 -		    cpu_ident_amdc1e == 0)
 +		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 +		    cpu_idle_hook == NULL)
  			continue;
  		if (strcmp(idle_tbl[i].id_name, buf))
  			continue;
 @@ -883,9 +899,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
  	return (EINVAL);
  }
  
 -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
 -    0, 0, idle_sysctl_available, "A", "list of available idle functions");
 -
  SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
      idle_sysctl, "A", "currently selected idle function");
  
 @@ -1819,8 +1832,7 @@ hammer_time(u_int64_t modulep, u_int64_t
  	}
  #endif
  
 -	if (cpu_probe_amdc1e())
 -		cpu_idle_fn = cpu_idle_amdc1e;
 +	cpu_probe_amdc1e();
  
  	/* Location of kernel stack for locore */
  	return ((u_int64_t)thread0.td_pcb);
 
 Modified: stable/8/sys/i386/i386/machdep.c
 ==============================================================================
 --- stable/8/sys/i386/i386/machdep.c	Fri Jul 20 17:51:20 2012	(r238657)
 +++ stable/8/sys/i386/i386/machdep.c	Fri Jul 20 19:35:20 2012	(r238658)
 @@ -1177,9 +1177,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *
  	return (0);
  }
  
 -
 -void (*cpu_idle_hook)(void) = NULL;	/* ACPI idle hook. */
 -
  #ifdef XEN
  
  void
 @@ -1207,66 +1204,127 @@ void
  cpu_halt(void)
  {
  	for (;;)
 -		__asm__ ("hlt");
 +		halt();
  }
  
 +#endif
 +
 +void (*cpu_idle_hook)(void) = NULL;	/* ACPI idle hook. */
 +static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
 +static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 +TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
 +SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
 +    0, "Use MONITOR/MWAIT for short idle");
 +
 +#define	STATE_RUNNING	0x0
 +#define	STATE_MWAIT	0x1
 +#define	STATE_SLEEPING	0x2
 +
 +static void
 +cpu_idle_acpi(int busy)
 +{
 +	int *state;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_SLEEPING;
 +
 +	/* See comments in cpu_idle_hlt(). */
 +	disable_intr();
 +	if (sched_runnable())
 +		enable_intr();
 +	else if (cpu_idle_hook)
 +		cpu_idle_hook();
 +	else
 +		__asm __volatile("sti; hlt");
 +	*state = STATE_RUNNING;
 +}
 +
 +#ifndef XEN
  static void
  cpu_idle_hlt(int busy)
  {
 +	int *state;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_SLEEPING;
 +
  	/*
 -	 * we must absolutely guarentee that hlt is the next instruction
 -	 * after sti or we introduce a timing window.
 +	 * Since we may be in a critical section from cpu_idle(), if
 +	 * an interrupt fires during that critical section we may have
 +	 * a pending preemption.  If the CPU halts, then that thread
 +	 * may not execute until a later interrupt awakens the CPU.
 +	 * To handle this race, check for a runnable thread after
 +	 * disabling interrupts and immediately return if one is
 +	 * found.  Also, we must absolutely guarentee that hlt is
 +	 * the next instruction after sti.  This ensures that any
 +	 * interrupt that fires after the call to disable_intr() will
 +	 * immediately awaken the CPU from hlt.  Finally, please note
 +	 * that on x86 this works fine because of interrupts enabled only
 +	 * after the instruction following sti takes place, while IF is set
 +	 * to 1 immediately, allowing hlt instruction to acknowledge the
 +	 * interrupt.
  	 */
  	disable_intr();
 -  	if (sched_runnable())
 +	if (sched_runnable())
  		enable_intr();
  	else
  		__asm __volatile("sti; hlt");
 +	*state = STATE_RUNNING;
  }
  #endif
  
 +/*
 + * MWAIT cpu power states.  Lower 4 bits are sub-states.
 + */
 +#define	MWAIT_C0	0xf0
 +#define	MWAIT_C1	0x00
 +#define	MWAIT_C2	0x10
 +#define	MWAIT_C3	0x20
 +#define	MWAIT_C4	0x30
 +
  static void
 -cpu_idle_acpi(int busy)
 +cpu_idle_mwait(int busy)
  {
 +	int *state;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_MWAIT;
 +
 +	/* See comments in cpu_idle_hlt(). */
  	disable_intr();
 -  	if (sched_runnable())
 +	if (sched_runnable()) {
  		enable_intr();
 -	else if (cpu_idle_hook)
 -		cpu_idle_hook();
 +		*state = STATE_RUNNING;
 +		return;
 +	}
 +	cpu_monitor(state, 0, 0);
 +	if (*state == STATE_MWAIT)
 +		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
  	else
 -		__asm __volatile("sti; hlt");
 +		enable_intr();
 +	*state = STATE_RUNNING;
  }
  
 -static int cpu_ident_amdc1e = 0;
 -
 -#if !defined(XEN) || defined(XEN_PRIVILEGED)
 -static int
 -cpu_probe_amdc1e(void)
 -{ 
 -#ifdef DEV_APIC
 +static void
 +cpu_idle_spin(int busy)
 +{
 +	int *state;
  	int i;
  
 -	/*
 -	 * Forget it, if we're not using local APIC timer.
 -	 */
 -	if (resource_disabled("apic", 0) ||
 -	    (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
 -		return (0);
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_RUNNING;
  
  	/*
 -	 * Detect the presence of C1E capability mostly on latest
 -	 * dual-cores (or future) k8 family.
 -	 */
 -	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 -	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 -	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 -		cpu_ident_amdc1e = 1;
 -		return (1);
 +	 * The sched_runnable() call is racy but as long as there is
 +	 * a loop missing it one time will have just a little impact if any 
 +	 * (and it is much better than missing the check at all).
 +	 */
 +	for (i = 0; i < 1000; i++) {
 +		if (sched_runnable())
 +			return;
 +		cpu_spinwait();
  	}
 -#endif
 -	return (0);
  }
 -#endif
  
  /*
   * C1E renders the local APIC timer dead, so we disable it by
 @@ -1283,32 +1341,20 @@ cpu_probe_amdc1e(void)
  #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
  
  static void
 -cpu_idle_amdc1e(int busy)
 +cpu_probe_amdc1e(void)
  {
  
 -	disable_intr();
 -	if (sched_runnable())
 -		enable_intr();
 -	else {
 -		uint64_t msr;
 -
 -		msr = rdmsr(MSR_AMDK8_IPM);
 -		if (msr & AMDK8_CMPHALT)
 -			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
 -
 -		if (cpu_idle_hook)
 -			cpu_idle_hook();
 -		else
 -			__asm __volatile("sti; hlt");
 +	/*
 +	 * Detect the presence of C1E capability mostly on latest
 +	 * dual-cores (or future) k8 family.
 +	 */
 +	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 +	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 +	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 +		cpu_ident_amdc1e = 1;
  	}
  }
  
 -static void
 -cpu_idle_spin(int busy)
 -{
 -	return;
 -}
 -
  #ifdef XEN
  void (*cpu_idle_fn)(int) = cpu_idle_hlt;
  #else
 @@ -1318,79 +1364,51 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi
  void
  cpu_idle(int busy)
  {
 +#ifndef XEN
 +	uint64_t msr;
 +#endif
 +
  #if defined(SMP) && !defined(XEN)
  	if (mp_grab_cpu_hlt())
  		return;
  #endif
 -	cpu_idle_fn(busy);
 -}
 -
 -/*
 - * mwait cpu power states.  Lower 4 bits are sub-states.
 - */
 -#define	MWAIT_C0	0xf0
 -#define	MWAIT_C1	0x00
 -#define	MWAIT_C2	0x10
 -#define	MWAIT_C3	0x20
 -#define	MWAIT_C4	0x30
 -
 -#define	MWAIT_DISABLED	0x0
 -#define	MWAIT_WOKEN	0x1
 -#define	MWAIT_WAITING	0x2
 -
 -static void
 -cpu_idle_mwait(int busy)
 -{
 -	int *mwait;
 -
 -	mwait = (int *)PCPU_PTR(monitorbuf);
 -	*mwait = MWAIT_WAITING;
 -	if (sched_runnable())
 -		return;
 -	cpu_monitor(mwait, 0, 0);
 -	if (*mwait == MWAIT_WAITING)
 -		cpu_mwait(0, MWAIT_C1);
 -}
 -
 -static void
 -cpu_idle_mwait_hlt(int busy)
 -{
 -	int *mwait;
 +#ifndef XEN
 +	/* If we are busy - try to use fast methods. */
 +	if (busy) {
 +		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
 +			cpu_idle_mwait(busy);
 +			return;
 +		}
 +	}
  
 -	mwait = (int *)PCPU_PTR(monitorbuf);
 -	if (busy == 0) {
 -		*mwait = MWAIT_DISABLED;
 -		cpu_idle_hlt(busy);
 -		return;
 +	/* Apply AMD APIC timer C1E workaround. */
 +	if (cpu_ident_amdc1e) {
 +		msr = rdmsr(MSR_AMDK8_IPM);
 +		if (msr & AMDK8_CMPHALT)
 +			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
  	}
 -	*mwait = MWAIT_WAITING;
 -	if (sched_runnable())
 -		return;
 -	cpu_monitor(mwait, 0, 0);
 -	if (*mwait == MWAIT_WAITING)
 -		cpu_mwait(0, MWAIT_C1);
 +#endif
 +
 +	/* Call main idle method. */
 +	cpu_idle_fn(busy);
  }
  
  int
  cpu_idle_wakeup(int cpu)
  {
  	struct pcpu *pcpu;
 -	int *mwait;
 +	int *state;
  
 -	if (cpu_idle_fn == cpu_idle_spin)
 -		return (1);
 -	if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
 -		return (0);
  	pcpu = pcpu_find(cpu);
 -	mwait = (int *)pcpu->pc_monitorbuf;
 +	state = (int *)pcpu->pc_monitorbuf;
  	/*
  	 * This doesn't need to be atomic since missing the race will
  	 * simply result in unnecessary IPIs.
  	 */
 -	if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
 +	if (*state == STATE_SLEEPING)
  		return (0);
 -	*mwait = MWAIT_WOKEN;
 -
 +	if (*state == STATE_MWAIT)
 +		*state = STATE_RUNNING;
  	return (1);
  }
  
 @@ -1403,8 +1421,6 @@ struct {
  } idle_tbl[] = {
  	{ cpu_idle_spin, "spin" },
  	{ cpu_idle_mwait, "mwait" },
 -	{ cpu_idle_mwait_hlt, "mwait_hlt" },
 -	{ cpu_idle_amdc1e, "amdc1e" },
  	{ cpu_idle_hlt, "hlt" },
  	{ cpu_idle_acpi, "acpi" },
  	{ NULL, NULL }
 @@ -1423,8 +1439,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
  		if (strstr(idle_tbl[i].id_name, "mwait") &&
  		    (cpu_feature2 & CPUID2_MON) == 0)
  			continue;
 -		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
 -		    cpu_ident_amdc1e == 0)
 +		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 +		    cpu_idle_hook == NULL)
  			continue;
  		p += sprintf(p, "%s, ", idle_tbl[i].id_name);
  	}
 @@ -1433,6 +1449,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
  	return (error);
  }
  
 +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
 +    0, 0, idle_sysctl_available, "A", "list of available idle functions");
 +
  static int
  idle_sysctl(SYSCTL_HANDLER_ARGS)
  {
 @@ -1456,8 +1475,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
  		if (strstr(idle_tbl[i].id_name, "mwait") &&
  		    (cpu_feature2 & CPUID2_MON) == 0)
  			continue;
 -		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
 -		    cpu_ident_amdc1e == 0)
 +		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 +		    cpu_idle_hook == NULL)
  			continue;
  		if (strcmp(idle_tbl[i].id_name, buf))
  			continue;
 @@ -1467,9 +1486,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
  	return (EINVAL);
  }
  
 -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
 -    0, 0, idle_sysctl_available, "A", "list of available idle functions");
 -
  SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
      idle_sysctl, "A", "currently selected idle function");
  
 @@ -2723,8 +2739,7 @@ init386(first)
  	thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
  
  #if defined(XEN_PRIVILEGED)
 -	if (cpu_probe_amdc1e())
 -		cpu_idle_fn = cpu_idle_amdc1e;
 +	cpu_probe_amdc1e();
  #endif
  }
  
 @@ -3001,8 +3016,7 @@ init386(first)
  	thread0.td_pcb->pcb_ext = 0;
  	thread0.td_frame = &proc0_tf;
  
 -	if (cpu_probe_amdc1e())
 -		cpu_idle_fn = cpu_idle_amdc1e;
 +	cpu_probe_amdc1e();
  }
  #endif
  
 
 Modified: stable/8/sys/pc98/pc98/machdep.c
 ==============================================================================
 --- stable/8/sys/pc98/pc98/machdep.c	Fri Jul 20 17:51:20 2012	(r238657)
 +++ stable/8/sys/pc98/pc98/machdep.c	Fri Jul 20 19:35:20 2012	(r238658)
 @@ -1122,40 +1122,36 @@ cpu_halt(void)
  		__asm__ ("hlt");
  }
  
 +static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 +TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
 +SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
 +    0, "Use MONITOR/MWAIT for short idle");
 +
 +#define	STATE_RUNNING	0x0
 +#define	STATE_MWAIT	0x1
 +#define	STATE_SLEEPING	0x2
 +
  static void
  cpu_idle_hlt(int busy)
  {
 +	int *state;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_SLEEPING;
  	/*
 -	 * we must absolutely guarentee that hlt is the next instruction
 +	 * We must absolutely guarentee that hlt is the next instruction
  	 * after sti or we introduce a timing window.
  	 */
  	disable_intr();
 -  	if (sched_runnable())
 +	if (sched_runnable())
  		enable_intr();
  	else
  		__asm __volatile("sti; hlt");
 -}
 -
 -static void
 -cpu_idle_spin(int busy)
 -{
 -	return;
 -}
 -
 -void (*cpu_idle_fn)(int) = cpu_idle_hlt;
 -
 -void
 -cpu_idle(int busy)
 -{
 -#if defined(SMP)
 -	if (mp_grab_cpu_hlt())
 -		return;
 -#endif
 -	cpu_idle_fn(busy);
 +	*state = STATE_RUNNING;
  }
  
  /*
 - * mwait cpu power states.  Lower 4 bits are sub-states.
 + * MWAIT cpu power states.  Lower 4 bits are sub-states.
   */
  #define	MWAIT_C0	0xf0
  #define	MWAIT_C1	0x00
 @@ -1163,63 +1159,74 @@ cpu_idle(int busy)
  #define	MWAIT_C3	0x20
  #define	MWAIT_C4	0x30
  
 -#define	MWAIT_DISABLED	0x0
 -#define	MWAIT_WOKEN	0x1
 -#define	MWAIT_WAITING	0x2
 -
  static void
  cpu_idle_mwait(int busy)
  {
 -	int *mwait;
 +	int *state;
  
 -	mwait = (int *)PCPU_PTR(monitorbuf);
 -	*mwait = MWAIT_WAITING;
 -	if (sched_runnable())
 -		return;
 -	cpu_monitor(mwait, 0, 0);
 -	if (*mwait == MWAIT_WAITING)
 -		cpu_mwait(0, MWAIT_C1);
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_MWAIT;
 +	if (!sched_runnable()) {
 +		cpu_monitor(state, 0, 0);
 +		if (*state == STATE_MWAIT)
 +			cpu_mwait(0, MWAIT_C1);
 +	}
 +	*state = STATE_RUNNING;
  }
  
  static void
 -cpu_idle_mwait_hlt(int busy)
 +cpu_idle_spin(int busy)
 +{
 +	int *state;
 +	int i;
 +
 +	state = (int *)PCPU_PTR(monitorbuf);
 +	*state = STATE_RUNNING;
 +	for (i = 0; i < 1000; i++) {
 +		if (sched_runnable())
 +			return;
 +		cpu_spinwait();
 +	}
 +}
 +
 +void (*cpu_idle_fn)(int) = cpu_idle_hlt;
 +
 +void
 +cpu_idle(int busy)
  {
 -	int *mwait;
  
 -	mwait = (int *)PCPU_PTR(monitorbuf);
 -	if (busy == 0) {
 -		*mwait = MWAIT_DISABLED;
 -		cpu_idle_hlt(busy);
 +#ifdef SMP
 +	if (mp_grab_cpu_hlt())
  		return;
 +#endif
 +	/* If we are busy - try to use fast methods. */
 +	if (busy) {
 +		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
 +			cpu_idle_mwait(busy);
 +			return;
 +		}
  	}
 -	*mwait = MWAIT_WAITING;
 -	if (sched_runnable())
 -		return;
 -	cpu_monitor(mwait, 0, 0);
 -	if (*mwait == MWAIT_WAITING)
 -		cpu_mwait(0, MWAIT_C1);
 +
 +	/* Call main idle method. */
 +	cpu_idle_fn(busy);
  }
  
  int
  cpu_idle_wakeup(int cpu)
  {
  	struct pcpu *pcpu;
 -	int *mwait;
 +	int *state;
  
 -	if (cpu_idle_fn == cpu_idle_spin)
 -		return (1);
 -	if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
 -		return (0);
  	pcpu = pcpu_find(cpu);
 -	mwait = (int *)pcpu->pc_monitorbuf;
 +	state = (int *)pcpu->pc_monitorbuf;
  	/*
  	 * This doesn't need to be atomic since missing the race will
  	 * simply result in unnecessary IPIs.
  	 */
 -	if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
 +	if (*state == STATE_SLEEPING)
  		return (0);
 -	*mwait = MWAIT_WOKEN;
 -
 +	if (*state == STATE_MWAIT)
 +		*state = STATE_RUNNING;
  	return (1);
  }
  
 @@ -1232,7 +1239,6 @@ struct {
  } idle_tbl[] = {
  	{ cpu_idle_spin, "spin" },
  	{ cpu_idle_mwait, "mwait" },
 -	{ cpu_idle_mwait_hlt, "mwait_hlt" },
  	{ cpu_idle_hlt, "hlt" },
  	{ NULL, NULL }
  };
 @@ -1257,6 +1263,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG
  	return (error);
  }
  
 +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
 +    0, 0, idle_sysctl_available, "A", "list of available idle functions");
 +
  static int
  idle_sysctl(SYSCTL_HANDLER_ARGS)
  {
 @@ -1288,9 +1297,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
  	return (EINVAL);
  }
  
 -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
 -    0, 0, idle_sysctl_available, "A", "list of available idle functions");
 -
  SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
      idle_sysctl, "A", "currently selected idle function");
  
 _______________________________________________
 svn-src-all at freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe at freebsd.org"
 


More information about the freebsd-bugs mailing list