svn commit: r231979 - in stable/9/sys: amd64/acpica amd64/amd64 amd64/ia32 amd64/include compat/ia32 conf crypto/aesni crypto/via dev/random i386/i386 i386/include i386/isa pc98/pc98

Konstantin Belousov kib at FreeBSD.org
Tue Feb 21 20:56:03 UTC 2012


Author: kib
Date: Tue Feb 21 20:56:03 2012
New Revision: 231979
URL: http://svn.freebsd.org/changeset/base/231979

Log:
  MFC r230426:
  Add support for the extended FPU states on amd64, both for native
  64bit and 32bit ABIs.  As a side-effect, it enables AVX on capable
  CPUs.
  
  MFC r230765:
  Synchronize the struct sigcontext definitions on x86 with mcontext_t.

Added:
  stable/9/sys/amd64/amd64/ptrace_machdep.c
     - copied unchanged from r230426, head/sys/amd64/amd64/ptrace_machdep.c
Modified:
  stable/9/sys/amd64/acpica/acpi_switch.S
  stable/9/sys/amd64/acpica/acpi_wakecode.S
  stable/9/sys/amd64/acpica/acpi_wakeup.c
  stable/9/sys/amd64/amd64/cpu_switch.S
  stable/9/sys/amd64/amd64/fpu.c
  stable/9/sys/amd64/amd64/genassym.c
  stable/9/sys/amd64/amd64/initcpu.c
  stable/9/sys/amd64/amd64/machdep.c
  stable/9/sys/amd64/amd64/mp_machdep.c
  stable/9/sys/amd64/amd64/sys_machdep.c
  stable/9/sys/amd64/amd64/trap.c
  stable/9/sys/amd64/amd64/vm_machdep.c
  stable/9/sys/amd64/ia32/ia32_reg.c
  stable/9/sys/amd64/ia32/ia32_signal.c
  stable/9/sys/amd64/include/fpu.h
  stable/9/sys/amd64/include/frame.h
  stable/9/sys/amd64/include/md_var.h
  stable/9/sys/amd64/include/pcb.h
  stable/9/sys/amd64/include/ptrace.h
  stable/9/sys/amd64/include/signal.h
  stable/9/sys/amd64/include/sysarch.h
  stable/9/sys/amd64/include/ucontext.h
  stable/9/sys/compat/ia32/ia32_signal.h
  stable/9/sys/conf/files.amd64
  stable/9/sys/crypto/aesni/aesni.c
  stable/9/sys/crypto/aesni/aesni.h
  stable/9/sys/crypto/aesni/aesni_wrap.c
  stable/9/sys/crypto/via/padlock.c
  stable/9/sys/crypto/via/padlock.h
  stable/9/sys/crypto/via/padlock_cipher.c
  stable/9/sys/crypto/via/padlock_hash.c
  stable/9/sys/dev/random/nehemiah.c
  stable/9/sys/i386/i386/machdep.c
  stable/9/sys/i386/include/npx.h
  stable/9/sys/i386/include/ptrace.h
  stable/9/sys/i386/include/signal.h
  stable/9/sys/i386/include/sysarch.h
  stable/9/sys/i386/include/ucontext.h
  stable/9/sys/i386/isa/npx.c
  stable/9/sys/pc98/pc98/machdep.c
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/conf/   (props changed)

Modified: stable/9/sys/amd64/acpica/acpi_switch.S
==============================================================================
--- stable/9/sys/amd64/acpica/acpi_switch.S	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/acpica/acpi_switch.S	Tue Feb 21 20:56:03 2012	(r231979)
@@ -146,11 +146,22 @@ ENTRY(acpi_restorecpu)
 
 	/* Restore FPU state. */
 	fninit
-	fxrstor	PCB_USERFPU(%rdi)
+	movq	WAKEUP_CTX(fpusave),%rdi
+	cmpl	$0,use_xsave
+	jne	1f
+	fxrstor	(%rdi)
+	jmp	2f
+1:	movl	xsave_mask,%eax
+	movl	xsave_mask+4,%edx
+/*	xrstor	(%rdi) */
+	.byte	0x0f,0xae,0x2f
+2:
 
 	/* Reload CR0. */
 	movq	%rcx, %cr0
 
+	movq	WAKEUP_CTX(pcb),%rdi
+
 	/* Restore return address. */
 	movq	PCB_RIP(%rdi), %rax
 	movq	%rax, (%rsp)

Modified: stable/9/sys/amd64/acpica/acpi_wakecode.S
==============================================================================
--- stable/9/sys/amd64/acpica/acpi_wakecode.S	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/acpica/acpi_wakecode.S	Tue Feb 21 20:56:03 2012	(r231979)
@@ -270,6 +270,8 @@ wakeup_pcb:
 wakeup_gdt:
 	.word	0
 	.quad	0
+wakeup_fpusave:
+	.quad	0
 
 	ALIGN_DATA
 wakeup_efer:

Modified: stable/9/sys/amd64/acpica/acpi_wakeup.c
==============================================================================
--- stable/9/sys/amd64/acpica/acpi_wakeup.c	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/acpica/acpi_wakeup.c	Tue Feb 21 20:56:03 2012	(r231979)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/pcb.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
+#include <machine/md_var.h>
 
 #ifdef SMP
 #include <x86/apicreg.h>
@@ -67,8 +68,10 @@ extern int		acpi_reset_video;
 
 #ifdef SMP
 extern struct pcb	**susppcbs;
+extern void		**suspfpusave;
 #else
 static struct pcb	**susppcbs;
+static void		**suspfpusave;
 #endif
 
 int			acpi_restorecpu(vm_offset_t, struct pcb *);
@@ -105,6 +108,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, in
 	int		ms;
 
 	WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[cpu]);
+	WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[cpu]);
 	WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[cpu]->pcb_gdt.rd_limit);
 	WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
 	    susppcbs[cpu]->pcb_gdt.rd_base);
@@ -244,6 +248,7 @@ acpi_sleep_machdep(struct acpi_softc *sc
 	load_cr3(KPML4phys);
 
 	if (savectx(susppcbs[0])) {
+		ctx_fpusave(suspfpusave[0]);
 #ifdef SMP
 		if (!CPU_EMPTY(&wakeup_cpus) &&
 		    suspend_cpus(wakeup_cpus) == 0) {
@@ -256,6 +261,7 @@ acpi_sleep_machdep(struct acpi_softc *sc
 		WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0));
 
 		WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[0]);
+		WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[0]);
 		WAKECODE_FIXUP(wakeup_gdt, uint16_t,
 		    susppcbs[0]->pcb_gdt.rd_limit);
 		WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t,
@@ -333,8 +339,11 @@ acpi_alloc_wakeup_handler(void)
 		return (NULL);
 	}
 	susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK);
-	for (i = 0; i < mp_ncpus; i++)
+	suspfpusave = malloc(mp_ncpus * sizeof(void *), M_DEVBUF, M_WAITOK);
+	for (i = 0; i < mp_ncpus; i++) {
 		susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK);
+		suspfpusave[i] = alloc_fpusave(M_WAITOK);
+	}
 
 	return (wakeaddr);
 }

Modified: stable/9/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- stable/9/sys/amd64/amd64/cpu_switch.S	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/amd64/cpu_switch.S	Tue Feb 21 20:56:03 2012	(r231979)
@@ -112,16 +112,25 @@ done_store_dr:
 
 	/* have we used fp, and need a save? */
 	cmpq	%rdi,PCPU(FPCURTHREAD)
-	jne	1f
+	jne	3f
 	movq	PCB_SAVEFPU(%r8),%r8
 	clts
+	cmpl	$0,use_xsave
+	jne	1f
 	fxsave	(%r8)
-	smsw	%ax
+	jmp	2f
+1:	movq	%rdx,%rcx
+	movl	xsave_mask,%eax
+	movl	xsave_mask+4,%edx
+/*	xsave	(%r8) */
+	.byte	0x41,0x0f,0xae,0x20
+	movq	%rcx,%rdx
+2:	smsw	%ax
 	orb	$CR0_TS,%al
 	lmsw	%ax
 	xorl	%eax,%eax
 	movq	%rax,PCPU(FPCURTHREAD)
-1:
+3:
 
 	/* Save is done.  Now fire up new thread. Leave old vmspace. */
 	movq	TD_PCB(%rsi),%r8
@@ -354,10 +363,19 @@ ENTRY(savectx)
 	sldt	PCB_LDT(%rdi)
 	str	PCB_TR(%rdi)
 
-	clts
-	fxsave	PCB_USERFPU(%rdi)
-	movq	%rsi,%cr0	/* The previous %cr0 is saved in %rsi. */
+2:	movq	%rsi,%cr0	/* The previous %cr0 is saved in %rsi. */
 
 	movl	$1,%eax
 	ret
 END(savectx)
+
+/*
+ * Wrapper around fpusave to care about TS0_CR.
+ */
+ENTRY(ctx_fpusave)
+	movq	%cr0,%rsi
+	clts
+	call	fpusave
+	movq	%rsi,%cr0
+	ret
+END(ctx_fpusave)

Modified: stable/9/sys/amd64/amd64/fpu.c
==============================================================================
--- stable/9/sys/amd64/amd64/fpu.c	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/amd64/fpu.c	Tue Feb 21 20:56:03 2012	(r231979)
@@ -134,19 +134,97 @@ void	xsetbv(uint32_t reg, uint64_t val);
 #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
 #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
 
-typedef u_char bool_t;
+CTASSERT(sizeof(struct savefpu) == 512);
+CTASSERT(sizeof(struct xstate_hdr) == 64);
+CTASSERT(sizeof(struct savefpu_ymm) == 832);
+
+/*
+ * This requirement is to make it easier for asm code to calculate
+ * offset of the fpu save area from the pcb address. FPU save area
+ * must by 64-bytes aligned.
+ */
+CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
 
 static	void	fpu_clean_state(void);
 
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     NULL, 1, "Floating point instructions executed in hardware");
 
-static	struct savefpu		fpu_initialstate;
+int use_xsave;			/* non-static for cpu_switch.S */
+uint64_t xsave_mask;		/* the same */
+static	struct savefpu *fpu_initialstate;
+
+void
+fpusave(void *addr)
+{
+
+	if (use_xsave)
+		xsave((char *)addr, xsave_mask);
+	else
+		fxsave((char *)addr);
+}
+
+static void
+fpurestore(void *addr)
+{
+
+	if (use_xsave)
+		xrstor((char *)addr, xsave_mask);
+	else
+		fxrstor((char *)addr);
+}
+
+/*
+ * Enable XSAVE if supported and allowed by user.
+ * Calculate the xsave_mask.
+ */
+static void
+fpuinit_bsp1(void)
+{
+	u_int cp[4];
+	uint64_t xsave_mask_user;
+
+	if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
+		use_xsave = 1;
+		TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
+	}
+	if (!use_xsave)
+		return;
+
+	cpuid_count(0xd, 0x0, cp);
+	xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+	if ((cp[0] & xsave_mask) != xsave_mask)
+		panic("CPU0 does not support X87 or SSE: %x", cp[0]);
+	xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
+	xsave_mask_user = xsave_mask;
+	TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
+	xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+	xsave_mask &= xsave_mask_user;
+}
 
 /*
- * Initialize the floating point unit.  On the boot CPU we generate a
- * clean state that is used to initialize the floating point unit when
- * it is first used by a process.
+ * Calculate the fpu save area size.
+ */
+static void
+fpuinit_bsp2(void)
+{
+	u_int cp[4];
+
+	if (use_xsave) {
+		cpuid_count(0xd, 0x0, cp);
+		cpu_max_ext_state_size = cp[1];
+
+		/*
+		 * Reload the cpu_feature2, since we enabled OSXSAVE.
+		 */
+		do_cpuid(1, cp);
+		cpu_feature2 = cp[2];
+	} else
+		cpu_max_ext_state_size = sizeof(struct savefpu);
+}
+
+/*
+ * Initialize the floating point unit.
  */
 void
 fpuinit(void)
@@ -155,6 +233,20 @@ fpuinit(void)
 	u_int mxcsr;
 	u_short control;
 
+	if (IS_BSP())
+		fpuinit_bsp1();
+
+	if (use_xsave) {
+		load_cr4(rcr4() | CR4_XSAVE);
+		xsetbv(XCR0, xsave_mask);
+	}
+
+	/*
+	 * XCR0 shall be set up before CPU can report the save area size.
+	 */
+	if (IS_BSP())
+		fpuinit_bsp2();
+
 	/*
 	 * It is too early for critical_enter() to work on AP.
 	 */
@@ -165,20 +257,46 @@ fpuinit(void)
 	fldcw(control);
 	mxcsr = __INITIAL_MXCSR__;
 	ldmxcsr(mxcsr);
-	if (PCPU_GET(cpuid) == 0) {
-		fxsave(&fpu_initialstate);
-		if (fpu_initialstate.sv_env.en_mxcsr_mask)
-			cpu_mxcsr_mask = fpu_initialstate.sv_env.en_mxcsr_mask;
-		else
-			cpu_mxcsr_mask = 0xFFBF;
-		bzero(fpu_initialstate.sv_fp, sizeof(fpu_initialstate.sv_fp));
-		bzero(fpu_initialstate.sv_xmm, sizeof(fpu_initialstate.sv_xmm));
-	}
 	start_emulating();
 	intr_restore(saveintr);
 }
 
 /*
+ * On the boot CPU we generate a clean state that is used to
+ * initialize the floating point unit when it is first used by a
+ * process.
+ */
+static void
+fpuinitstate(void *arg __unused)
+{
+	register_t saveintr;
+
+	fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
+	    M_WAITOK | M_ZERO);
+	saveintr = intr_disable();
+	stop_emulating();
+
+	fpusave(fpu_initialstate);
+	if (fpu_initialstate->sv_env.en_mxcsr_mask)
+		cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask;
+	else
+		cpu_mxcsr_mask = 0xFFBF;
+
+	/*
+	 * The fninit instruction does not modify XMM registers.  The
+	 * fpusave call dumped the garbage contained in the registers
+	 * after reset to the initial state saved.  Clear XMM
+	 * registers file image to make the startup program state and
+	 * signal handler XMM register content predictable.
+	 */
+	bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+
+	start_emulating();
+	intr_restore(saveintr);
+}
+SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL);
+
+/*
  * Free coprocessor (if we have it).
  */
 void
@@ -188,7 +306,7 @@ fpuexit(struct thread *td)
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread)) {
 		stop_emulating();
-		fxsave(PCPU_GET(curpcb)->pcb_save);
+		fpusave(PCPU_GET(curpcb)->pcb_save);
 		start_emulating();
 		PCPU_SET(fpcurthread, 0);
 	}
@@ -461,7 +579,7 @@ fpudna(void)
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
 		 */
-		fxrstor(&fpu_initialstate);
+		fpurestore(fpu_initialstate);
 		if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
 			fldcw(pcb->pcb_initial_fpucw);
 		if (PCB_USER_FPU(pcb))
@@ -470,7 +588,7 @@ fpudna(void)
 		else
 			set_pcb_flags(pcb, PCB_FPUINITDONE);
 	} else
-		fxrstor(pcb->pcb_save);
+		fpurestore(pcb->pcb_save);
 	critical_exit();
 }
 
@@ -499,15 +617,16 @@ fpugetregs(struct thread *td)
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
-		bcopy(&fpu_initialstate, &pcb->pcb_user_save,
-		    sizeof(fpu_initialstate));
-		pcb->pcb_user_save.sv_env.en_cw = pcb->pcb_initial_fpucw;
+		bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb),
+		    cpu_max_ext_state_size);
+		get_pcb_user_save_pcb(pcb)->sv_env.en_cw =
+		    pcb->pcb_initial_fpucw;
 		fpuuserinited(td);
 		return (_MC_FPOWNED_PCB);
 	}
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
-		fxsave(&pcb->pcb_user_save);
+		fpusave(get_pcb_user_save_pcb(pcb));
 		critical_exit();
 		return (_MC_FPOWNED_FPU);
 	} else {
@@ -529,25 +648,78 @@ fpuuserinited(struct thread *td)
 		set_pcb_flags(pcb, PCB_FPUINITDONE);
 }
 
+int
+fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
+{
+	struct xstate_hdr *hdr, *ehdr;
+	size_t len, max_len;
+	uint64_t bv;
+
+	/* XXXKIB should we clear all extended state in xstate_bv instead ? */
+	if (xfpustate == NULL)
+		return (0);
+	if (!use_xsave)
+		return (EOPNOTSUPP);
+
+	len = xfpustate_size;
+	if (len < sizeof(struct xstate_hdr))
+		return (EINVAL);
+	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+	if (len > max_len)
+		return (EINVAL);
+
+	ehdr = (struct xstate_hdr *)xfpustate;
+	bv = ehdr->xstate_bv;
+
+	/*
+	 * Avoid #gp.
+	 */
+	if (bv & ~xsave_mask)
+		return (EINVAL);
+	if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) !=
+	    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE))
+		return (EINVAL);
+
+	hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
+
+	hdr->xstate_bv = bv;
+	bcopy(xfpustate + sizeof(struct xstate_hdr),
+	    (char *)(hdr + 1), len - sizeof(struct xstate_hdr));
+
+	return (0);
+}
+
 /*
  * Set the state of the FPU.
  */
-void
-fpusetregs(struct thread *td, struct savefpu *addr)
+int
+fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate,
+    size_t xfpustate_size)
 {
 	struct pcb *pcb;
+	int error;
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
-		fxrstor(addr);
+		error = fpusetxstate(td, xfpustate, xfpustate_size);
+		if (error != 0) {
+			critical_exit();
+			return (error);
+		}
+		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
+		fpurestore(get_pcb_user_save_td(td));
 		critical_exit();
 		set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 	} else {
 		critical_exit();
-		bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr));
+		error = fpusetxstate(td, xfpustate, xfpustate_size);
+		if (error != 0)
+			return (error);
+		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
 		fpuuserinited(td);
 	}
+	return (0);
 }
 
 /*
@@ -637,20 +809,62 @@ static devclass_t fpupnp_devclass;
 DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0);
 #endif	/* DEV_ISA */
 
+static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
+    "Kernel contexts for FPU state");
+
+#define	FPU_KERN_CTX_FPUINITDONE 0x01
+
+struct fpu_kern_ctx {
+	struct savefpu *prev;
+	uint32_t flags;
+	char hwstate1[];
+};
+
+struct fpu_kern_ctx *
+fpu_kern_alloc_ctx(u_int flags)
+{
+	struct fpu_kern_ctx *res;
+	size_t sz;
+
+	sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
+	    cpu_max_ext_state_size;
+	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
+	    M_NOWAIT : M_WAITOK) | M_ZERO);
+	return (res);
+}
+
+void
+fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
+{
+
+	/* XXXKIB clear the memory ? */
+	free(ctx, M_FPUKERN_CTX);
+}
+
+static struct savefpu *
+fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
+{
+	vm_offset_t p;
+
+	p = (vm_offset_t)&ctx->hwstate1;
+	p = roundup2(p, XSAVE_AREA_ALIGN);
+	return ((struct savefpu *)p);
+}
+
 int
 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
-	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
-	    ("mangled pcb_save"));
+	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
+	    get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
 	ctx->flags = 0;
 	if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0)
 		ctx->flags |= FPU_KERN_CTX_FPUINITDONE;
 	fpuexit(td);
 	ctx->prev = pcb->pcb_save;
-	pcb->pcb_save = &ctx->hwstate;
+	pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
 	set_pcb_flags(pcb, PCB_KERNFPU);
 	clear_pcb_flags(pcb, PCB_FPUINITDONE);
 	return (0);
@@ -667,7 +881,7 @@ fpu_kern_leave(struct thread *td, struct
 		fpudrop();
 	critical_exit();
 	pcb->pcb_save = ctx->prev;
-	if (pcb->pcb_save == &pcb->pcb_user_save) {
+	if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
 		if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
 			set_pcb_flags(pcb, PCB_FPUINITDONE);
 			clear_pcb_flags(pcb, PCB_KERNFPU);
@@ -691,7 +905,8 @@ fpu_kern_thread(u_int flags)
 	pcb = PCPU_GET(curpcb);
 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
 	    ("Only kthread may use fpu_kern_thread"));
-	KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save"));
+	KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb),
+	    ("mangled pcb_save"));
 	KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
 
 	set_pcb_flags(pcb, PCB_KERNFPU);

Modified: stable/9/sys/amd64/amd64/genassym.c
==============================================================================
--- stable/9/sys/amd64/amd64/genassym.c	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/amd64/genassym.c	Tue Feb 21 20:56:03 2012	(r231979)
@@ -156,7 +156,7 @@ ASSYM(PCB_GS32SD, offsetof(struct pcb, p
 ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
 ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
-ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save));
+ASSYM(PCB_USERFPU, sizeof(struct pcb));
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 ASSYM(PCB_FULL_IRET, PCB_FULL_IRET);
 ASSYM(PCB_DBREGS, PCB_DBREGS);

Modified: stable/9/sys/amd64/amd64/initcpu.c
==============================================================================
--- stable/9/sys/amd64/amd64/initcpu.c	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/amd64/initcpu.c	Tue Feb 21 20:56:03 2012	(r231979)
@@ -72,6 +72,7 @@ u_int	cpu_vendor_id;		/* CPU vendor ID *
 u_int	cpu_fxsr;		/* SSE enabled */
 u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
 u_int	cpu_clflush_line_size = 32;
+u_int	cpu_max_ext_state_size;
 
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
 	&via_feature_rng, 0, "VIA RNG feature available in CPU");

Modified: stable/9/sys/amd64/amd64/machdep.c
==============================================================================
--- stable/9/sys/amd64/amd64/machdep.c	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/amd64/machdep.c	Tue Feb 21 20:56:03 2012	(r231979)
@@ -154,8 +154,10 @@ extern void panicifcpuunsupported(void);
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup(void *);
-static void get_fpcontext(struct thread *td, mcontext_t *mcp);
-static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp,
+    char *xfpusave, size_t xfpusave_len);
+static int  set_fpcontext(struct thread *td, const mcontext_t *mcp,
+    char *xfpustate, size_t xfpustate_len);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /*
@@ -315,6 +317,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
+	char *xfpusave;
+	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
@@ -328,6 +332,14 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
+	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
+		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+		xfpusave = __builtin_alloca(xfpusave_len);
+	} else {
+		xfpusave_len = 0;
+		xfpusave = NULL;
+	}
+
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
@@ -337,7 +349,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
-	get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
 	sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
@@ -348,13 +360,18 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
-		sp = td->td_sigstk.ss_sp +
-		    td->td_sigstk.ss_size - sizeof(struct sigframe);
+		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
-		sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128;
+		sp = (char *)regs->tf_rsp - 128;
+	if (xfpusave != NULL) {
+		sp -= xfpusave_len;
+		sp = (char *)((unsigned long)sp & ~0x3Ful);
+		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+	}
+	sp -= sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
 
@@ -387,7 +404,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
-	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+	    (xfpusave != NULL && copyout(xfpusave,
+	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
+	    != 0)) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
@@ -432,6 +452,8 @@ sys_sigreturn(td, uap)
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
+	char *xfpustate;
+	size_t xfpustate_len;
 	long rflags;
 	int cs, error, ret;
 	ksiginfo_t ksi;
@@ -490,7 +512,28 @@ sys_sigreturn(td, uap)
 		return (EINVAL);
 	}
 
-	ret = set_fpcontext(td, &ucp->uc_mcontext);
+	if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+		xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+		if (xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu)) {
+			uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+			    p->p_pid, td->td_name, xfpustate_len);
+			return (EINVAL);
+		}
+		xfpustate = __builtin_alloca(xfpustate_len);
+		error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
+		    xfpustate, xfpustate_len);
+		if (error != 0) {
+			uprintf(
+	"pid %d (%s): sigreturn copying xfpustate failed\n",
+			    p->p_pid, td->td_name);
+			return (error);
+		}
+	} else {
+		xfpustate = NULL;
+		xfpustate_len = 0;
+	}
+	ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
 	if (ret != 0) {
 		uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
 		    p->p_pid, td->td_name, ret);
@@ -1563,6 +1606,7 @@ hammer_time(u_int64_t modulep, u_int64_t
 	int gsel_tss, x;
 	struct pcpu *pc;
 	struct nmi_pcpu *np;
+	struct xstate_hdr *xhdr;
 	u_int64_t msr;
 	char *env;
 	size_t kstack0_sz;
@@ -1572,7 +1616,6 @@ hammer_time(u_int64_t modulep, u_int64_t
 	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 	bzero((void *)thread0.td_kstack, kstack0_sz);
 	physfree += kstack0_sz;
-	thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
 
 	/*
  	 * This may be done better later if it gets more high level
@@ -1621,7 +1664,6 @@ hammer_time(u_int64_t modulep, u_int64_t
 	physfree += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
-	PCPU_SET(curpcb, thread0.td_pcb);
 	PCPU_SET(tssp, &common_tss[0]);
 	PCPU_SET(commontssp, &common_tss[0]);
 	PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
@@ -1713,13 +1755,6 @@ hammer_time(u_int64_t modulep, u_int64_t
 	initializecpu();	/* Initialize CPU registers */
 	initializecpucache();
 
-	/* make an initial tss so cpu can get interrupt stack on syscall! */
-	common_tss[0].tss_rsp0 = thread0.td_kstack +
-	    kstack0_sz - sizeof(struct pcb);
-	/* Ensure the stack is aligned to 16 bytes */
-	common_tss[0].tss_rsp0 &= ~0xFul;
-	PCPU_SET(rsp0, common_tss[0].tss_rsp0);
-
 	/* doublefault stack space, runs on ist1 */
 	common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
 
@@ -1756,6 +1791,25 @@ hammer_time(u_int64_t modulep, u_int64_t
 	msgbufinit(msgbufp, msgbufsize);
 	fpuinit();
 
+	/*
+	 * Set up thread0 pcb after fpuinit calculated pcb + fpu save
+	 * area size.  Zero out the extended state header in fpu save
+	 * area.
+	 */
+	thread0.td_pcb = get_pcb_td(&thread0);
+	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+	if (use_xsave) {
+		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+		    1);
+		xhdr->xstate_bv = xsave_mask;
+	}
+	/* make an initial tss so cpu can get interrupt stack on syscall! */
+	common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb;
+	/* Ensure the stack is aligned to 16 bytes */
+	common_tss[0].tss_rsp0 &= ~0xFul;
+	PCPU_SET(rsp0, common_tss[0].tss_rsp0);
+	PCPU_SET(curpcb, thread0.td_pcb);
+
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
@@ -2025,7 +2079,7 @@ fill_fpregs(struct thread *td, struct fp
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	fpugetregs(td);
-	fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs);
+	fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 	return (0);
 }
 
@@ -2034,7 +2088,7 @@ int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
-	set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save);
+	set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 	fpuuserinited(td);
 	return (0);
 }
@@ -2085,9 +2139,11 @@ get_mcontext(struct thread *td, mcontext
 	mcp->mc_gs = tp->tf_gs;
 	mcp->mc_flags = tp->tf_flags;
 	mcp->mc_len = sizeof(*mcp);
-	get_fpcontext(td, mcp);
+	get_fpcontext(td, mcp, NULL, 0);
 	mcp->mc_fsbase = pcb->pcb_fsbase;
 	mcp->mc_gsbase = pcb->pcb_gsbase;
+	mcp->mc_xfpustate = 0;
+	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 	return (0);
 }
@@ -2103,6 +2159,7 @@ set_mcontext(struct thread *td, const mc
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
+	char *xfpustate;
 	long rflags;
 	int ret;
 
@@ -2113,7 +2170,18 @@ set_mcontext(struct thread *td, const mc
 		return (EINVAL);
 	rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 	    (tp->tf_rflags & ~PSL_USERCHANGE);
-	ret = set_fpcontext(td, mcp);
+	if (mcp->mc_flags & _MC_HASFPXSTATE) {
+		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
+		    sizeof(struct savefpu))
+			return (EINVAL);
+		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
+		    mcp->mc_xfpustate_len);
+		if (ret != 0)
+			return (ret);
+	} else
+		xfpustate = NULL;
+	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_r15 = mcp->mc_r15;
@@ -2151,35 +2219,51 @@ set_mcontext(struct thread *td, const mc
 }
 
 static void
-get_fpcontext(struct thread *td, mcontext_t *mcp)
+get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
+    size_t xfpusave_len)
 {
+	size_t max_len, len;
 
 	mcp->mc_ownedfp = fpugetregs(td);
-	bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate,
+	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate,
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = fpuformat();
+	if (!use_xsave || xfpusave_len == 0)
+		return;
+	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
+	len = xfpusave_len;
+	if (len > max_len) {
+		len = max_len;
+		bzero(xfpusave + max_len, len - max_len);
+	}
+	mcp->mc_flags |= _MC_HASFPXSTATE;
+	mcp->mc_xfpustate_len = len;
+	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 }
 
 static int
-set_fpcontext(struct thread *td, const mcontext_t *mcp)
+set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate,
+    size_t xfpustate_len)
 {
 	struct savefpu *fpstate;
+	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
-	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
-	else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+		error = 0;
+	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		fpstate = (struct savefpu *)&mcp->mc_fpstate;
 		fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
-		fpusetregs(td, fpstate);
+		error = fpusetregs(td, fpstate, xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
-	return (0);
+	return (error);
 }
 
 void

Modified: stable/9/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- stable/9/sys/amd64/amd64/mp_machdep.c	Tue Feb 21 20:55:43 2012	(r231978)
+++ stable/9/sys/amd64/amd64/mp_machdep.c	Tue Feb 21 20:56:03 2012	(r231979)
@@ -99,7 +99,8 @@ char *nmi_stack;
 void *dpcpu;
 
 struct pcb stoppcbs[MAXCPU];
-struct pcb **susppcbs = NULL;
+struct pcb **susppcbs;
+void **suspfpusave;
 
 /* Variables needed for SMP tlb shootdown. */
 vm_offset_t smp_tlb_addr1;
@@ -1422,6 +1423,7 @@ cpususpend_handler(void)
 	cr3 = rcr3();
 
 	if (savectx(susppcbs[cpu])) {
+		ctx_fpusave(suspfpusave[cpu]);
 		wbinvd();
 		CPU_SET_ATOMIC(cpu, &stopped_cpus);
 	} else {

Copied: stable/9/sys/amd64/amd64/ptrace_machdep.c (from r230426, head/sys/amd64/amd64/ptrace_machdep.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/9/sys/amd64/amd64/ptrace_machdep.c	Tue Feb 21 20:56:03 2012	(r231979, copy of r230426, head/sys/amd64/amd64/ptrace_machdep.c)
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 2011 Konstantin Belousov <kib at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/ptrace.h>
+#include <sys/sysent.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+
+static int
+cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
+{
+	char *savefpu;
+	int error;
+
+	if (!use_xsave)
+		return (EOPNOTSUPP);
+
+	switch (req) {
+	case PT_GETXSTATE:
+		savefpu = (char *)(get_pcb_user_save_td(td) + 1);
+		error = copyout(savefpu, addr,
+		    cpu_max_ext_state_size - sizeof(struct savefpu));
+		break;
+
+	case PT_SETXSTATE:
+		if (data > cpu_max_ext_state_size - sizeof(struct savefpu)) {
+			error = EINVAL;
+			break;
+		}
+		savefpu = malloc(data, M_TEMP, M_WAITOK);
+		error = copyin(addr, savefpu, data);
+		if (error == 0)
+			error = fpusetxstate(td, savefpu, data);
+		free(savefpu, M_TEMP);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+#ifdef COMPAT_FREEBSD32
+#define PT_I386_GETXMMREGS	(PT_FIRSTMACH + 0)
+#define PT_I386_SETXMMREGS	(PT_FIRSTMACH + 1)
+#define PT_I386_GETXSTATE	(PT_FIRSTMACH + 2)
+#define PT_I386_SETXSTATE	(PT_FIRSTMACH + 3)
+
+static int
+cpu32_ptrace(struct thread *td, int req, void *addr, int data)
+{
+	struct savefpu *fpstate;
+	int error;
+
+	switch (req) {
+	case PT_I386_GETXMMREGS:
+		error = copyout(get_pcb_user_save_td(td), addr,
+		    sizeof(*fpstate));
+		break;
+
+	case PT_I386_SETXMMREGS:
+		fpstate = get_pcb_user_save_td(td);
+		error = copyin(addr, fpstate, sizeof(*fpstate));
+		fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
+		break;
+
+	case PT_I386_GETXSTATE:
+		error = cpu_ptrace_xstate(td, PT_GETXSTATE, addr, data);
+		break;
+
+	case PT_I386_SETXSTATE:

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list