svn commit: r192663 - in user/kmacy/releng_7_2_fcs/sys: amd64/amd64
amd64/include i386/i386 kern
Kip Macy
kmacy at FreeBSD.org
Sat May 23 19:17:28 UTC 2009
Author: kmacy
Date: Sat May 23 19:17:27 2009
New Revision: 192663
URL: http://svn.freebsd.org/changeset/base/192663
Log:
fix hwpmc callchain and NMI handling
Modified:
user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S
user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c
user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c
user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c
user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h
user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s
user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c
user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c
Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S Sat May 23 19:17:27 2009 (r192663)
@@ -383,22 +383,24 @@ IDTVEC(fast_syscall32)
* NMI handling is special.
*
* First, NMIs do not respect the state of the processor's RFLAGS.IF
- * bit and the NMI handler may be invoked at any time, including when
- * the processor is in a critical section with RFLAGS.IF == 0. In
- * particular, this means that the processor's GS.base values could be
- * inconsistent on entry to the handler, and so we need to read
- * MSR_GSBASE to determine if a 'swapgs' is needed. We use '%ebx', a
- * C-preserved register, to remember whether to swap GS back on the
- * exit path.
+ * bit. The NMI handler may be entered at any time, including when
+ * the processor is in a critical section with RFLAGS.IF == 0.
+ * The processor's GS.base value could be invalid on entry to the
+ * handler.
*
* Second, the processor treats NMIs specially, blocking further NMIs
- * until an 'iretq' instruction is executed. We therefore need to
- * execute the NMI handler with interrupts disabled to prevent a
- * nested interrupt from executing an 'iretq' instruction and
- * inadvertently taking the processor out of NMI mode.
+ * until an 'iretq' instruction is executed. We thus need to execute
+ * the NMI handler with interrupts disabled, to prevent a nested interrupt
+ * from executing an 'iretq' instruction and inadvertently taking the
+ * processor out of NMI mode.
*
- * Third, the NMI handler runs on its own stack (tss_ist1), shared
- * with the double fault handler.
+ * Third, the NMI handler runs on its own stack (tss_ist2). The canonical
+ * GS.base value for the processor is stored just above the bottom of its
+ * NMI stack. For NMIs taken from kernel mode, the current value in
+ * the processor's GS.base is saved at entry to C-preserved register %r12,
+ * the canonical value for GS.base is then loaded into the processor, and
+ * the saved value is restored at exit time. For NMIs taken from user mode,
+ * the cheaper 'SWAPGS' instructions are used for swapping GS.base.
*/
IDTVEC(nmi)
@@ -423,12 +425,22 @@ IDTVEC(nmi)
movq %r15,TF_R15(%rsp)
xorl %ebx,%ebx
testb $SEL_RPL_MASK,TF_CS(%rsp)
- jnz nmi_needswapgs /* we came from userland */
+ jnz nmi_fromuserspace
+ /*
+ * We've interrupted the kernel. Preserve GS.base in %r12.
+ */
movl $MSR_GSBASE,%ecx
rdmsr
- cmpl $VM_MAXUSER_ADDRESS >> 32,%edx
- jae nmi_calltrap /* GS.base holds a kernel VA */
-nmi_needswapgs:
+ movq %rax,%r12
+ shlq $32,%rdx
+ orq %rdx,%r12
+ /* Retrieve and load the canonical value for GS.base. */
+ movq TF_SIZE(%rsp),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ jmp nmi_calltrap
+nmi_fromuserspace:
incl %ebx
swapgs
/* Note: this label is also used by ddb and gdb: */
@@ -439,14 +451,19 @@ nmi_calltrap:
MEXITCOUNT
#ifdef HWPMC_HOOKS
/*
- * Check if the current trap was from user mode and if so
- * whether the current thread needs a user call chain to be
- * captured. We are still in NMI mode at this point.
+ * Capture a userspace callchain if needed.
+ *
+ * - Check if the current trap was from user mode.
+ * - Check if the current thread is valid.
+ * - Check if the thread requires a user call chain to be
+ * captured.
+ *
+ * We are still in NMI mode at this point.
*/
- testb $SEL_RPL_MASK,TF_CS(%rsp)
- jz nocallchain
- movq PCPU(CURTHREAD),%rax /* curthread present? */
- orq %rax,%rax
+ testl %ebx,%ebx
+ jz nocallchain /* not from userspace */
+ movq PCPU(CURTHREAD),%rax
+ orq %rax,%rax /* curthread present? */
jz nocallchain
testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
jz nocallchain
@@ -459,9 +476,9 @@ nmi_calltrap:
*/
movq %rsp,%rsi /* source stack pointer */
movq $TF_SIZE,%rcx
- movq PCPU(RSP0),%rbx
- subq %rcx,%rbx
- movq %rbx,%rdi /* destination stack pointer */
+ movq PCPU(RSP0),%rdx
+ subq %rcx,%rdx
+ movq %rdx,%rdi /* destination stack pointer */
shrq $3,%rcx /* trap frame size in long words */
cld
@@ -470,7 +487,7 @@ nmi_calltrap:
movl %ss,%eax
pushq %rax /* tf_ss */
- pushq %rbx /* tf_rsp (on kernel stack) */
+ pushq %rdx /* tf_rsp (on kernel stack) */
pushfq /* tf_rflags */
movl %cs,%eax
pushq %rax /* tf_cs */
@@ -480,21 +497,36 @@ outofnmi:
/*
* At this point the processor has exited NMI mode and is running
* with interrupts turned off on the normal kernel stack.
- * We turn interrupts back on, and take the usual 'doreti' exit
- * path.
*
* If a pending NMI gets recognized at or after this point, it
- * will cause a kernel callchain to be traced. Since this path
- * is only taken for NMI interrupts from user space, our `swapgs'
- * state is correct for taking the doreti path.
+ * will cause a kernel callchain to be traced.
+ *
+ * We turn interrupts back on, and call the user callchain capture hook.
*/
+ movq pmc_hook,%rax
+ orq %rax,%rax
+ jz nocallchain
+ movq PCPU(CURTHREAD),%rdi /* thread */
+ movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */
+ movq %rsp,%rdx /* frame */
sti
- jmp doreti
+ call *%rax
+ cli
nocallchain:
#endif
testl %ebx,%ebx
- jz nmi_restoreregs
+ jz nmi_kernelexit
swapgs
+ jmp nmi_restoreregs
+nmi_kernelexit:
+ /*
+ * Put back the preserved MSR_GSBASE value.
+ */
+ movl $MSR_GSBASE,%ecx
+ movq %r12,%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
nmi_restoreregs:
movq TF_RDI(%rsp),%rdi
movq TF_RSI(%rsp),%rsi
Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c Sat May 23 19:17:27 2009 (r192663)
@@ -36,6 +36,7 @@
__FBSDID("$FreeBSD$");
#include "opt_compat.h"
+#include "opt_hwpmc_hooks.h"
#include "opt_kstack_pages.h"
#include <sys/param.h>
@@ -44,6 +45,9 @@ __FBSDID("$FreeBSD$");
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/proc.h>
+#ifdef HWPMC_HOOKS
+#include <sys/pmckern.h>
+#endif
#include <sys/errno.h>
#include <sys/mount.h>
#include <sys/mutex.h>
@@ -223,3 +227,7 @@ ASSYM(MTX_LOCK, offsetof(struct mtx, mtx
ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse));
ASSYM(MSR_GSBASE, MSR_GSBASE);
+
+#ifdef HWPMC_HOOKS
+ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
+#endif
Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c Sat May 23 19:17:27 2009 (r192663)
@@ -680,6 +680,9 @@ struct gate_descriptor *idt = &idt0[0];
static char dblfault_stack[PAGE_SIZE] __aligned(16);
+static char nmi0_stack[PAGE_SIZE] __aligned(16);
+CTASSERT(sizeof(struct nmi_pcpu) == 16);
+
struct amd64tss common_tss[MAXCPU];
/* software prototypes -- in more palatable form */
@@ -1150,6 +1153,7 @@ hammer_time(u_int64_t modulep, u_int64_t
caddr_t kmdp;
int gsel_tss, x;
struct pcpu *pc;
+ struct nmi_pcpu *np;
u_int64_t msr;
char *env;
@@ -1224,7 +1228,7 @@ hammer_time(u_int64_t modulep, u_int64_t
setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0);
setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 1);
+ setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2);
setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0);
setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0);
setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0);
@@ -1297,6 +1301,14 @@ hammer_time(u_int64_t modulep, u_int64_t
/* doublefault stack space, runs on ist1 */
common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
+ /*
+ * NMI stack, runs on ist2. The pcpu pointer is stored just
+ * above the start of the ist2 stack.
+ */
+ np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
+ np->np_pcpu = (register_t) pc;
+ common_tss[0].tss_ist2 = (long) np;
+
/* Set the IO permission bitmap (empty due to tss seg limit) */
common_tss[0].tss_iobase = sizeof(struct amd64tss);
Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c Sat May 23 19:17:27 2009 (r192663)
@@ -98,6 +98,7 @@ void *bootstacks[MAXCPU];
/* Temporary holder for double fault stack */
char *doublefault_stack;
+char *nmi_stack;
/* Hotwire a 0->4MB V==P mapping */
extern pt_entry_t *KPTphys;
@@ -455,6 +456,7 @@ void
init_secondary(void)
{
struct pcpu *pc;
+ struct nmi_pcpu *np;
u_int64_t msr, cr0;
int cpu, gsel_tss, x;
struct region_descriptor ap_gdt;
@@ -468,6 +470,10 @@ init_secondary(void)
common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
+ /* The NMI stack runs on IST2. */
+ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
+ common_tss[cpu].tss_ist2 = (long) np;
+
/* Prepare private GDT */
gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
ssdtosyssd(&gdt_segs[GPROC0_SEL],
@@ -492,6 +498,9 @@ init_secondary(void)
pc->pc_rsp0 = 0;
pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
+ /* Save the per-cpu pointer for use by the NMI handler. */
+ np->np_pcpu = (register_t) pc;
+
wrmsr(MSR_FSBASE, 0); /* User value */
wrmsr(MSR_GSBASE, (u_int64_t)pc);
wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */
@@ -758,6 +767,7 @@ start_all_aps(void)
/* allocate and set up an idle stack data page */
bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
+ nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
bootAP = cpu;
Modified: user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h Sat May 23 19:17:27 2009 (r192663)
@@ -120,6 +120,15 @@ struct intsrc {
struct trapframe;
+/*
+ * The following data structure holds per-cpu data, and is placed just
+ * above the top of the space used for the NMI stack.
+ */
+struct nmi_pcpu {
+ register_t np_pcpu;
+ register_t __padding; /* pad to 16 bytes */
+};
+
extern struct mtx icu_lock;
extern int elcr_found;
Modified: user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s Sat May 23 19:17:27 2009 (r192663)
@@ -439,9 +439,18 @@ doreti_nmi:
iret
outofnmi:
/*
- * Clear interrupts and jump to AST handling code.
+ * Call the callchain capture hook after turning interrupts back on.
*/
+ movl pmc_hook,%ecx
+ orl %ecx,%ecx
+ jz doreti_exit
+ pushl %esp /* frame pointer */
+ pushl $PMC_FN_USER_CALLCHAIN /* command */
+ movl PCPU(CURTHREAD),%eax
+ pushl %eax /* curthread */
sti
+ call *%ecx
+ addl $12,%esp
jmp doreti_ast
ENTRY(end_exceptions)
#endif
Modified: user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c Sat May 23 19:17:27 2009 (r192663)
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include "opt_apic.h"
#include "opt_compat.h"
+#include "opt_hwpmc_hooks.h"
#include "opt_kstack_pages.h"
#include <sys/param.h>
@@ -44,6 +45,9 @@ __FBSDID("$FreeBSD$");
#include <sys/assym.h>
#include <sys/bio.h>
#include <sys/buf.h>
+#ifdef HWPMC_HOOKS
+#include <sys/pmckern.h>
+#endif
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/mount.h>
@@ -231,3 +235,13 @@ ASSYM(MTX_RECURSECNT, offsetof(struct mt
ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
#endif
+
+#ifdef XEN
+#include <machine/xen/hypervisor.h>
+ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
+ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START);
+#endif
+
+#ifdef HWPMC_HOOKS
+ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
+#endif
Modified: user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c Sat May 23 19:17:05 2009 (r192662)
+++ user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c Sat May 23 19:17:27 2009 (r192663)
@@ -44,7 +44,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "opt_hwpmc_hooks.h"
#include "opt_ktrace.h"
#include "opt_mac.h"
#ifdef __i386__
More information about the svn-src-user
mailing list