svn commit: r333371 - in releng: 10.4 10.4/sys/amd64/amd64 10.4/sys/conf 10.4/sys/i386/i386 11.1 11.1/sys/amd64/amd64 11.1/sys/conf 11.1/sys/i386/i386
Gordon Tetlow
gordon at FreeBSD.org
Tue May 8 17:12:14 UTC 2018
Author: gordon
Date: Tue May 8 17:12:10 2018
New Revision: 333371
URL: https://svnweb.freebsd.org/changeset/base/333371
Log:
Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg]
Bump newvers.sh and UPDATING for today's patches.
Approved by: so
Security: CVE-2018-8897
Security: FreeBSD-SA-18:06.debugreg
Sponsored by: The FreeBSD Foundation
Modified:
releng/10.4/UPDATING
releng/10.4/sys/amd64/amd64/exception.S
releng/10.4/sys/amd64/amd64/machdep.c
releng/10.4/sys/amd64/amd64/mp_machdep.c
releng/10.4/sys/amd64/amd64/trap.c
releng/10.4/sys/conf/newvers.sh
releng/10.4/sys/i386/i386/trap.c
releng/11.1/UPDATING
releng/11.1/sys/amd64/amd64/exception.S
releng/11.1/sys/amd64/amd64/machdep.c
releng/11.1/sys/amd64/amd64/mp_machdep.c
releng/11.1/sys/amd64/amd64/pmap.c
releng/11.1/sys/amd64/amd64/trap.c
releng/11.1/sys/conf/newvers.sh
releng/11.1/sys/i386/i386/trap.c
Modified: releng/10.4/UPDATING
==============================================================================
--- releng/10.4/UPDATING Tue May 8 17:05:39 2018 (r333370)
+++ releng/10.4/UPDATING Tue May 8 17:12:10 2018 (r333371)
@@ -16,6 +16,16 @@ from older versions of FreeBSD, try WITHOUT_CLANG to b
stable/10, and then rebuild without this option. The bootstrap process from
older version of current is a bit fragile.
+20180508 p9 FreeBSD-SA-18:06.debugreg
+ FreeBSD-EN-18:05.mem
+ FreeBSD-EN-18:06.tzdata
+
+ Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg]
+
+ Fix multiple small kernel memory disclosures. [EN-18:05.mem]
+
+ Update timezone database information. [EN-18:06.tzdata]
+
20180404 p8 FreeBSD-SA-18:04.vt
FreeBSD-SA-18:05.ipsec
FreeBSD-EN-18:03.tzdata
Modified: releng/10.4/sys/amd64/amd64/exception.S
==============================================================================
--- releng/10.4/sys/amd64/amd64/exception.S Tue May 8 17:05:39 2018 (r333370)
+++ releng/10.4/sys/amd64/amd64/exception.S Tue May 8 17:12:10 2018 (r333371)
@@ -108,8 +108,6 @@ MCOUNT_LABEL(btrap)
movq $0,TF_ADDR(%rsp) ; \
movq $0,TF_ERR(%rsp) ; \
jmp alltraps_noen
-IDTVEC(dbg)
- TRAP_NOEN(T_TRCTRAP)
IDTVEC(bpt)
TRAP_NOEN(T_BPTFLT)
#ifdef KDTRACE_HOOKS
@@ -434,6 +432,101 @@ IDTVEC(fast_syscall)
*/
IDTVEC(fast_syscall32)
sysret
+
+/*
+ * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
+ * generation of exception until the next instruction is executed,
+ * which might be a kernel entry. So we must execute the handler
+ * on IST stack and be ready for non-kernel GSBASE.
+ */
+IDTVEC(dbg)
+ subq $TF_RIP,%rsp
+ movl $(T_TRCTRAP),TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
+ movq %rdi,TF_RDI(%rsp)
+ movq %rsi,TF_RSI(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ movq %r8,TF_R8(%rsp)
+ movq %r9,TF_R9(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rbx,TF_RBX(%rsp)
+ movq %rbp,TF_RBP(%rsp)
+ movq %r10,TF_R10(%rsp)
+ movq %r11,TF_R11(%rsp)
+ movq %r12,TF_R12(%rsp)
+ movq %r13,TF_R13(%rsp)
+ movq %r14,TF_R14(%rsp)
+ movq %r15,TF_R15(%rsp)
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
+ cld
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jnz dbg_fromuserspace
+ /*
+ * We've interrupted the kernel. Preserve GS.base in %r12.
+ */
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ movq %rax,%r12
+ shlq $32,%rdx
+ orq %rdx,%r12
+ /* Retrieve and load the canonical value for GS.base. */
+ movq TF_SIZE(%rsp),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ movq %rsp,%rdi
+ call trap
+ MEXITCOUNT
+ /*
+ * Put back the preserved MSR_GSBASE value.
+ */
+ movl $MSR_GSBASE,%ecx
+ movq %r12,%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ movq TF_RDI(%rsp),%rdi
+ movq TF_RSI(%rsp),%rsi
+ movq TF_RDX(%rsp),%rdx
+ movq TF_RCX(%rsp),%rcx
+ movq TF_R8(%rsp),%r8
+ movq TF_R9(%rsp),%r9
+ movq TF_RAX(%rsp),%rax
+ movq TF_RBX(%rsp),%rbx
+ movq TF_RBP(%rsp),%rbp
+ movq TF_R10(%rsp),%r10
+ movq TF_R11(%rsp),%r11
+ movq TF_R12(%rsp),%r12
+ movq TF_R13(%rsp),%r13
+ movq TF_R14(%rsp),%r14
+ movq TF_R15(%rsp),%r15
+ addq $TF_RIP,%rsp
+ jmp doreti_iret
+dbg_fromuserspace:
+ /*
+ * Switch to kernel GSBASE and kernel page table, and copy frame
+ * from the IST stack to the normal kernel stack, since trap()
+ * re-enables interrupts, and since we might trap on DB# while
+ * in trap().
+ */
+ swapgs
+ movq PCPU(RSP0),%rax
+ movl $TF_SIZE,%ecx
+ subq %rcx,%rax
+ movq %rax,%rdi
+ movq %rsp,%rsi
+ rep;movsb
+ movq %rax,%rsp
+ movq PCPU(CURPCB),%rdi
+ orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
+ jmp calltrap
/*
* NMI handling is special.
Modified: releng/10.4/sys/amd64/amd64/machdep.c
==============================================================================
--- releng/10.4/sys/amd64/amd64/machdep.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/10.4/sys/amd64/amd64/machdep.c Tue May 8 17:12:10 2018 (r333371)
@@ -1023,6 +1023,7 @@ struct gate_descriptor *idt = &idt0[0]; /* interrupt d
static char dblfault_stack[PAGE_SIZE] __aligned(16);
static char nmi0_stack[PAGE_SIZE] __aligned(16);
+static char dbg0_stack[PAGE_SIZE] __aligned(16);
CTASSERT(sizeof(struct nmi_pcpu) == 16);
struct amd64tss common_tss[MAXCPU];
@@ -1908,7 +1909,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
for (x = 0; x < NIDT; x++)
setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0);
- setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2);
setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0);
setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0);
@@ -1965,6 +1966,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
np->np_pcpu = (register_t) pc;
common_tss[0].tss_ist2 = (long) np;
+
+ /*
+ * DB# stack, runs on ist4.
+ */
+ np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1;
+ np->np_pcpu = (register_t) pc;
+ common_tss[0].tss_ist4 = (long) np;
/* Set the IO permission bitmap (empty due to tss seg limit) */
common_tss[0].tss_iobase = sizeof(struct amd64tss) +
Modified: releng/10.4/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- releng/10.4/sys/amd64/amd64/mp_machdep.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/10.4/sys/amd64/amd64/mp_machdep.c Tue May 8 17:12:10 2018 (r333371)
@@ -98,6 +98,7 @@ void *bootstacks[MAXCPU];
/* Temporary variables for init_secondary() */
char *doublefault_stack;
char *nmi_stack;
+char *dbg_stack;
void *dpcpu;
struct pcb stoppcbs[MAXCPU];
@@ -644,6 +645,10 @@ init_secondary(void)
np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
common_tss[cpu].tss_ist2 = (long) np;
+ /* The DB# stack runs on IST4. */
+ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+ common_tss[cpu].tss_ist4 = (long) np;
+
/* Prepare private GDT */
gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
for (x = 0; x < NGDT; x++) {
@@ -679,6 +684,10 @@ init_secondary(void)
/* Save the per-cpu pointer for use by the NMI handler. */
np->np_pcpu = (register_t) pc;
+ /* Save the per-cpu pointer for use by the DB# handler. */
+ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+ np->np_pcpu = (register_t) pc;
+
wrmsr(MSR_FSBASE, 0); /* User value */
wrmsr(MSR_GSBASE, (u_int64_t)pc);
wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */
@@ -966,6 +975,8 @@ start_all_aps(void)
doublefault_stack = (char *)kmem_malloc(kernel_arena,
PAGE_SIZE, M_WAITOK | M_ZERO);
nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
+ M_WAITOK | M_ZERO);
+ dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
M_WAITOK | M_ZERO);
dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
M_WAITOK | M_ZERO);
Modified: releng/10.4/sys/amd64/amd64/trap.c
==============================================================================
--- releng/10.4/sys/amd64/amd64/trap.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/10.4/sys/amd64/amd64/trap.c Tue May 8 17:12:10 2018 (r333371)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
*/
#include "opt_clock.h"
+#include "opt_compat.h"
#include "opt_cpu.h"
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
@@ -98,6 +99,9 @@ PMC_SOFT_DEFINE( , , page_fault, write);
#include <sys/dtrace_bsd.h>
#endif
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(fast_syscall),
+ IDTVEC(fast_syscall32), IDTVEC(int0x80_syscall);
+
extern void trap(struct trapframe *frame);
extern void syscall(struct trapframe *frame);
void dblfault_handler(struct trapframe *frame);
@@ -549,6 +553,39 @@ trap(struct trapframe *frame)
load_dr6(rdr6() & 0xfffffff0);
goto out;
}
+
+ /*
+ * Malicious user code can configure a debug
+ * register watchpoint to trap on data access
+ * to the top of stack and then execute 'pop
+ * %ss; int 3'. Due to exception deferral for
+ * 'pop %ss', the CPU will not interrupt 'int
+ * 3' to raise the DB# exception for the debug
+ * register but will postpone the DB# until
+ * execution of the first instruction of the
+ * BP# handler (in kernel mode). Normally the
+ * previous check would ignore DB# exceptions
+ * for watchpoints on user addresses raised in
+ * kernel mode. However, some CPU errata
+ * include cases where DB# exceptions do not
+ * properly set bits in %dr6, e.g. Haswell
+ * HSD23 and Skylake-X SKZ24.
+ *
+ * A deferred DB# can also be raised on the
+ * first instructions of system call entry
+ * points or single-step traps via similar use
+ * of 'pop %ss' or 'mov xxx, %ss'.
+ */
+ if (frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall) ||
+#ifdef COMPAT_FREEBSD32
+ frame->tf_rip ==
+ (uintptr_t)IDTVEC(int0x80_syscall) ||
+#endif
+ frame->tf_rip == (uintptr_t)IDTVEC(bpt) ||
+ frame->tf_rip == (uintptr_t)IDTVEC(dbg) ||
+ /* Needed for AMD. */
+ frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32))
+ return;
/*
* FALLTHROUGH (TRCTRAP kernel mode, kernel address)
*/
Modified: releng/10.4/sys/conf/newvers.sh
==============================================================================
--- releng/10.4/sys/conf/newvers.sh Tue May 8 17:05:39 2018 (r333370)
+++ releng/10.4/sys/conf/newvers.sh Tue May 8 17:12:10 2018 (r333371)
@@ -32,7 +32,7 @@
TYPE="FreeBSD"
REVISION="10.4"
-BRANCH="RELEASE-p8"
+BRANCH="RELEASE-p9"
if [ "X${BRANCH_OVERRIDE}" != "X" ]; then
BRANCH=${BRANCH_OVERRIDE}
fi
Modified: releng/10.4/sys/i386/i386/trap.c
==============================================================================
--- releng/10.4/sys/i386/i386/trap.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/10.4/sys/i386/i386/trap.c Tue May 8 17:12:10 2018 (r333371)
@@ -116,6 +116,8 @@ void dblfault_handler(void);
extern inthand_t IDTVEC(lcall_syscall);
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
+
#define MAX_TRAP_MSG 32
static char *trap_msg[] = {
"", /* 0 unused */
@@ -683,6 +685,34 @@ trap(struct trapframe *frame)
load_dr6(rdr6() & 0xfffffff0);
goto out;
}
+
+ /*
+ * Malicious user code can configure a debug
+ * register watchpoint to trap on data access
+ * to the top of stack and then execute 'pop
+ * %ss; int 3'. Due to exception deferral for
+ * 'pop %ss', the CPU will not interrupt 'int
+ * 3' to raise the DB# exception for the debug
+ * register but will postpone the DB# until
+ * execution of the first instruction of the
+ * BP# handler (in kernel mode). Normally the
+ * previous check would ignore DB# exceptions
+ * for watchpoints on user addresses raised in
+ * kernel mode. However, some CPU errata
+ * include cases where DB# exceptions do not
+ * properly set bits in %dr6, e.g. Haswell
+ * HSD23 and Skylake-X SKZ24.
+ *
+ * A deferred DB# can also be raised on the
+ * first instructions of system call entry
+ * points or single-step traps via similar use
+ * of 'pop %ss' or 'mov xxx, %ss'.
+ */
+ if (frame->tf_eip ==
+ (uintptr_t)IDTVEC(int0x80_syscall) ||
+ frame->tf_eip == (uintptr_t)IDTVEC(bpt) ||
+ frame->tf_eip == (uintptr_t)IDTVEC(dbg))
+ return;
/*
* FALLTHROUGH (TRCTRAP kernel mode, kernel address)
*/
Modified: releng/11.1/UPDATING
==============================================================================
--- releng/11.1/UPDATING Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/UPDATING Tue May 8 17:12:10 2018 (r333371)
@@ -16,6 +16,16 @@ from older versions of FreeBSD, try WITHOUT_CLANG and
the tip of head, and then rebuild without this option. The bootstrap process
from older version of current across the gcc/clang cutover is a bit fragile.
+20180508 p10 FreeBSD-SA-18:06.debugreg
+ FreeBSD-EN-18:05.mem
+ FreeBSD-EN-18:06.tzdata
+
+ Fix mishandling of x86 debug exceptions. [SA-18:06.debugreg]
+
+ Fix multiple small kernel memory disclosures. [EN-18:05.mem]
+
+ Update timezone database information. [EN-18:06.tzdata]
+
20180404 p9 FreeBSD-SA-18:04.vt
FreeBSD-SA-18:05.ipsec
FreeBSD-EN-18:03.tzdata
Modified: releng/11.1/sys/amd64/amd64/exception.S
==============================================================================
--- releng/11.1/sys/amd64/amd64/exception.S Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/sys/amd64/amd64/exception.S Tue May 8 17:12:10 2018 (r333371)
@@ -116,7 +116,6 @@ X\l: subq $TF_RIP,%rsp
jmp alltraps_noen
.endm
- TRAP_NOEN dbg, T_TRCTRAP
TRAP_NOEN bpt, T_BPTFLT
#ifdef KDTRACE_HOOKS
TRAP_NOEN dtrace_ret, T_DTRACE_RET
@@ -507,6 +506,121 @@ fast_syscall_common:
*/
IDTVEC(fast_syscall32)
sysret
+
+/*
+ * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
+ * generation of exception until the next instruction is executed,
+ * which might be a kernel entry. So we must execute the handler
+ * on IST stack and be ready for non-kernel GSBASE.
+ */
+IDTVEC(dbg)
+ subq $TF_RIP,%rsp
+ movl $(T_TRCTRAP),TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
+ movq %rdi,TF_RDI(%rsp)
+ movq %rsi,TF_RSI(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ movq %r8,TF_R8(%rsp)
+ movq %r9,TF_R9(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rbx,TF_RBX(%rsp)
+ movq %rbp,TF_RBP(%rsp)
+ movq %r10,TF_R10(%rsp)
+ movq %r11,TF_R11(%rsp)
+ movq %r12,TF_R12(%rsp)
+ movq %r13,TF_R13(%rsp)
+ movq %r14,TF_R14(%rsp)
+ movq %r15,TF_R15(%rsp)
+ SAVE_SEGS
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
+ cld
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jnz dbg_fromuserspace
+ /*
+ * We've interrupted the kernel. Preserve GS.base in %r12,
+ * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
+ */
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ movq %rax,%r12
+ shlq $32,%rdx
+ orq %rdx,%r12
+ /* Retrieve and load the canonical value for GS.base. */
+ movq TF_SIZE(%rsp),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ movq %cr3,%r13
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je 2f
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ rdmsr
+ movl %eax,%r14d
+ call handle_ibrs_entry
+2: FAKE_MCOUNT(TF_RIP(%rsp))
+ movq %rsp,%rdi
+ call trap
+ MEXITCOUNT
+ testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je 3f
+ movl %r14d,%eax
+ xorl %edx,%edx
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ wrmsr
+ /*
+ * Put back the preserved MSR_GSBASE value.
+ */
+3: movl $MSR_GSBASE,%ecx
+ movq %r12,%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ movq %r13,%cr3
+ RESTORE_REGS
+ addq $TF_RIP,%rsp
+ jmp doreti_iret
+dbg_fromuserspace:
+ /*
+ * Switch to kernel GSBASE and kernel page table, and copy frame
+ * from the IST stack to the normal kernel stack, since trap()
+ * re-enables interrupts, and since we might trap on DB# while
+ * in trap().
+ */
+ swapgs
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: movq PCPU(RSP0),%rax
+ movl $TF_SIZE,%ecx
+ subq %rcx,%rax
+ movq %rax,%rdi
+ movq %rsp,%rsi
+ rep;movsb
+ movq %rax,%rsp
+ call handle_ibrs_entry
+ movq PCPU(CURPCB),%rdi
+ orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
+ testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+ jz 3f
+ cmpw $KUF32SEL,TF_FS(%rsp)
+ jne 2f
+ rdfsbase %rax
+ movq %rax,PCB_FSBASE(%rdi)
+2: cmpw $KUG32SEL,TF_GS(%rsp)
+ jne 3f
+ movl $MSR_KGSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ orq %rdx,%rax
+ movq %rax,PCB_GSBASE(%rdi)
+3: jmp calltrap
/*
* NMI handling is special.
Modified: releng/11.1/sys/amd64/amd64/machdep.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/machdep.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/sys/amd64/amd64/machdep.c Tue May 8 17:12:10 2018 (r333371)
@@ -675,6 +675,7 @@ struct gate_descriptor *idt = &idt0[0]; /* interrupt d
static char dblfault_stack[PAGE_SIZE] __aligned(16);
static char mce0_stack[PAGE_SIZE] __aligned(16);
static char nmi0_stack[PAGE_SIZE] __aligned(16);
+static char dbg0_stack[PAGE_SIZE] __aligned(16);
CTASSERT(sizeof(struct nmi_pcpu) == 16);
struct amd64tss common_tss[MAXCPU];
@@ -827,7 +828,7 @@ extern inthand_t
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
IDTVEC(xmm), IDTVEC(dblfault),
- IDTVEC(div_pti), IDTVEC(dbg_pti), IDTVEC(bpt_pti),
+ IDTVEC(div_pti), IDTVEC(bpt_pti),
IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti),
IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti),
IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti),
@@ -1637,8 +1638,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
SEL_KPL, 0);
setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT,
SEL_KPL, 0);
- setidt(IDT_DB, pti ? &IDTVEC(dbg_pti) : &IDTVEC(dbg), SDT_SYSIGT,
- SEL_KPL, 0);
+ setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4);
setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2);
setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT,
SEL_UPL, 0);
@@ -1720,6 +1720,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1;
np->np_pcpu = (register_t) pc;
common_tss[0].tss_ist3 = (long) np;
+
+ /*
+ * DB# stack, runs on ist4.
+ */
+ np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1;
+ np->np_pcpu = (register_t) pc;
+ common_tss[0].tss_ist4 = (long) np;
/* Set the IO permission bitmap (empty due to tss seg limit) */
common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE;
Modified: releng/11.1/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/mp_machdep.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/sys/amd64/amd64/mp_machdep.c Tue May 8 17:12:10 2018 (r333371)
@@ -87,6 +87,7 @@ extern struct pcpu __pcpu[];
char *doublefault_stack;
char *mce_stack;
char *nmi_stack;
+char *dbg_stack;
/*
* Local data and functions.
@@ -225,6 +226,10 @@ init_secondary(void)
np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1;
common_tss[cpu].tss_ist3 = (long) np;
+ /* The DB# stack runs on IST4. */
+ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+ common_tss[cpu].tss_ist4 = (long) np;
+
/* Prepare private GDT */
gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
for (x = 0; x < NGDT; x++) {
@@ -270,6 +275,10 @@ init_secondary(void)
np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1;
np->np_pcpu = (register_t) pc;
+ /* Save the per-cpu pointer for use by the DB# handler. */
+ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
+ np->np_pcpu = (register_t) pc;
+
wrmsr(MSR_FSBASE, 0); /* User value */
wrmsr(MSR_GSBASE, (u_int64_t)pc);
wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */
@@ -367,6 +376,8 @@ native_start_all_aps(void)
mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
M_WAITOK | M_ZERO);
nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
+ M_WAITOK | M_ZERO);
+ dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
M_WAITOK | M_ZERO);
dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
M_WAITOK | M_ZERO);
Modified: releng/11.1/sys/amd64/amd64/pmap.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/pmap.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/sys/amd64/amd64/pmap.c Tue May 8 17:12:10 2018 (r333371)
@@ -7565,6 +7565,9 @@ pmap_pti_init(void)
/* MC# stack IST 3 */
va = common_tss[i].tss_ist3 + sizeof(struct nmi_pcpu);
pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false);
+ /* DB# stack IST 4 */
+ va = common_tss[i].tss_ist4 + sizeof(struct nmi_pcpu);
+ pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false);
}
pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE,
(vm_offset_t)etext, true);
Modified: releng/11.1/sys/amd64/amd64/trap.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/trap.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/sys/amd64/amd64/trap.c Tue May 8 17:12:10 2018 (r333371)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
*/
#include "opt_clock.h"
+#include "opt_compat.h"
#include "opt_cpu.h"
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
@@ -99,6 +100,11 @@ PMC_SOFT_DEFINE( , , page_fault, write);
#include <sys/dtrace_bsd.h>
#endif
+extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg),
+ IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32),
+ IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall);
+
+
extern void __noinline trap(struct trapframe *frame);
extern void trap_check(struct trapframe *frame);
extern void syscall(struct trapframe *frame);
@@ -536,6 +542,52 @@ trap(struct trapframe *frame)
load_dr6(rdr6() & ~0xf);
goto out;
}
+
+ /*
+ * Malicious user code can configure a debug
+ * register watchpoint to trap on data access
+ * to the top of stack and then execute 'pop
+ * %ss; int 3'. Due to exception deferral for
+ * 'pop %ss', the CPU will not interrupt 'int
+ * 3' to raise the DB# exception for the debug
+ * register but will postpone the DB# until
+ * execution of the first instruction of the
+ * BP# handler (in kernel mode). Normally the
+ * previous check would ignore DB# exceptions
+ * for watchpoints on user addresses raised in
+ * kernel mode. However, some CPU errata
+ * include cases where DB# exceptions do not
+ * properly set bits in %dr6, e.g. Haswell
+ * HSD23 and Skylake-X SKZ24.
+ *
+ * A deferred DB# can also be raised on the
+ * first instructions of system call entry
+ * points or single-step traps via similar use
+ * of 'pop %ss' or 'mov xxx, %ss'.
+ */
+ if (pti) {
+ if (frame->tf_rip ==
+ (uintptr_t)IDTVEC(fast_syscall_pti) ||
+#ifdef COMPAT_FREEBSD32
+ frame->tf_rip ==
+ (uintptr_t)IDTVEC(int0x80_syscall_pti) ||
+#endif
+ frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti))
+ return;
+ } else {
+ if (frame->tf_rip ==
+ (uintptr_t)IDTVEC(fast_syscall) ||
+#ifdef COMPAT_FREEBSD32
+ frame->tf_rip ==
+ (uintptr_t)IDTVEC(int0x80_syscall) ||
+#endif
+ frame->tf_rip == (uintptr_t)IDTVEC(bpt))
+ return;
+ }
+ if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) ||
+ /* Needed for AMD. */
+ frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32))
+ return;
/*
* FALLTHROUGH (TRCTRAP kernel mode, kernel address)
*/
Modified: releng/11.1/sys/conf/newvers.sh
==============================================================================
--- releng/11.1/sys/conf/newvers.sh Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/sys/conf/newvers.sh Tue May 8 17:12:10 2018 (r333371)
@@ -44,7 +44,7 @@
TYPE="FreeBSD"
REVISION="11.1"
-BRANCH="RELEASE-p9"
+BRANCH="RELEASE-p10"
if [ -n "${BRANCH_OVERRIDE}" ]; then
BRANCH=${BRANCH_OVERRIDE}
fi
Modified: releng/11.1/sys/i386/i386/trap.c
==============================================================================
--- releng/11.1/sys/i386/i386/trap.c Tue May 8 17:05:39 2018 (r333370)
+++ releng/11.1/sys/i386/i386/trap.c Tue May 8 17:12:10 2018 (r333371)
@@ -116,6 +116,8 @@ void dblfault_handler(void);
extern inthand_t IDTVEC(lcall_syscall);
+extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall);
+
#define MAX_TRAP_MSG 32
static char *trap_msg[] = {
"", /* 0 unused */
@@ -668,6 +670,34 @@ kernel_trctrap:
load_dr6(rdr6() & ~0xf);
goto out;
}
+
+ /*
+ * Malicious user code can configure a debug
+ * register watchpoint to trap on data access
+ * to the top of stack and then execute 'pop
+ * %ss; int 3'. Due to exception deferral for
+ * 'pop %ss', the CPU will not interrupt 'int
+ * 3' to raise the DB# exception for the debug
+ * register but will postpone the DB# until
+ * execution of the first instruction of the
+ * BP# handler (in kernel mode). Normally the
+ * previous check would ignore DB# exceptions
+ * for watchpoints on user addresses raised in
+ * kernel mode. However, some CPU errata
+ * include cases where DB# exceptions do not
+ * properly set bits in %dr6, e.g. Haswell
+ * HSD23 and Skylake-X SKZ24.
+ *
+ * A deferred DB# can also be raised on the
+ * first instructions of system call entry
+ * points or single-step traps via similar use
+ * of 'pop %ss' or 'mov xxx, %ss'.
+ */
+ if (frame->tf_eip ==
+ (uintptr_t)IDTVEC(int0x80_syscall) ||
+ frame->tf_eip == (uintptr_t)IDTVEC(bpt) ||
+ frame->tf_eip == (uintptr_t)IDTVEC(dbg))
+ return;
/*
* FALLTHROUGH (TRCTRAP kernel mode, kernel address)
*/
More information about the svn-src-all
mailing list