svn commit: r250880 - in stable/8/sys/amd64: amd64 include

John Baldwin jhb at FreeBSD.org
Tue May 21 19:25:20 UTC 2013


Author: jhb
Date: Tue May 21 19:25:19 2013
New Revision: 250880
URL: http://svnweb.freebsd.org/changeset/base/250880

Log:
  MFC 238450,250152-250153,250415:
  - Add support for the XSAVEOPT instruction use.
  - The check to ensure that xstate_bv always has XFEATURE_ENABLED_X87 and
    XFEATURE_ENABLED_SSE bits set is not needed.  CPU correctly handles
    any bitmask which is subset of the enabled bits in %XCR0.
  - Partially saved extended state must be handled always, i.e. for both
    fpu-owned context, and for pcb-saved one.
  - Correct the type for the literal used on the left side of the shift up
    to 63 bit positions.
  
    Do not fill the save area and do not set the saved bit in the xstate
    bit vector for the state which is not marked as enabled in xsave_mask.

Modified:
  stable/8/sys/amd64/amd64/cpu_switch.S
  stable/8/sys/amd64/amd64/fpu.c
  stable/8/sys/amd64/include/md_var.h
  stable/8/sys/amd64/include/specialreg.h
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/   (props changed)
  stable/8/sys/x86/   (props changed)

Modified: stable/8/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- stable/8/sys/amd64/amd64/cpu_switch.S	Tue May 21 19:23:49 2013	(r250879)
+++ stable/8/sys/amd64/amd64/cpu_switch.S	Tue May 21 19:25:19 2013	(r250880)
@@ -122,6 +122,9 @@ done_store_dr:
 1:	movq	%rdx,%rcx
 	movl	xsave_mask,%eax
 	movl	xsave_mask+4,%edx
+	.globl	ctx_switch_xsave
+ctx_switch_xsave:
+	/* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
 /*	xsave	(%r8) */
 	.byte	0x41,0x0f,0xae,0x20
 	movq	%rcx,%rdx

Modified: stable/8/sys/amd64/amd64/fpu.c
==============================================================================
--- stable/8/sys/amd64/amd64/fpu.c	Tue May 21 19:23:49 2013	(r250879)
+++ stable/8/sys/amd64/amd64/fpu.c	Tue May 21 19:25:19 2013	(r250880)
@@ -154,6 +154,11 @@ int use_xsave;			/* non-static for cpu_s
 uint64_t xsave_mask;		/* the same */
 static	struct savefpu *fpu_initialstate;
 
+struct xsave_area_elm_descr {
+	u_int	offset;
+	u_int	size;
+} *xsave_area_desc;
+
 void
 fpusave(void *addr)
 {
@@ -200,6 +205,16 @@ fpuinit_bsp1(void)
 	TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
 	xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
 	xsave_mask &= xsave_mask_user;
+
+	cpuid_count(0xd, 0x1, cp);
+	if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+		/*
+		 * Patch the XSAVE instruction in the cpu_switch code
+		 * to XSAVEOPT.  We assume that XSAVE encoding used
+		 * REX byte, and set the bit 4 of the r/m byte.
+		 */
+		ctx_switch_xsave[3] |= 0x10;
+	}
 }
 
 /*
@@ -270,6 +285,7 @@ static void
 fpuinitstate(void *arg __unused)
 {
 	register_t saveintr;
+	int cp[4], i, max_ext_n;
 
 	fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
@@ -291,6 +307,28 @@ fpuinitstate(void *arg __unused)
 	 */
 	bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
 
+	/*
+	 * Create a table describing the layout of the CPU Extended
+	 * Save Area.
+	 */
+	if (use_xsave) {
+		max_ext_n = flsl(xsave_mask);
+		xsave_area_desc = malloc(max_ext_n * sizeof(struct
+		    xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+		/* x87 state */
+		xsave_area_desc[0].offset = 0;
+		xsave_area_desc[0].size = 160;
+		/* XMM */
+		xsave_area_desc[1].offset = 160;
+		xsave_area_desc[1].size = 288 - 160;
+
+		for (i = 2; i < max_ext_n; i++) {
+			cpuid_count(0xd, i, cp);
+			xsave_area_desc[i].offset = cp[1];
+			xsave_area_desc[i].size = cp[0];
+		}
+	}
+
 	start_emulating();
 	intr_restore(saveintr);
 }
@@ -578,8 +616,14 @@ fpudna(void)
 		 * This is the first time this thread has used the FPU or
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
+		 *
+		 * We prefer to restore the state from the actual save
+		 * area in PCB instead of directly loading from
+		 * fpu_initialstate, to ignite the XSAVEOPT
+		 * tracking engine.
 		 */
-		fpurestore(fpu_initialstate);
+		bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
+		fpurestore(pcb->pcb_save);
 		if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
 			fldcw(pcb->pcb_initial_fpucw);
 		if (PCB_USER_FPU(pcb))
@@ -614,6 +658,9 @@ int
 fpugetregs(struct thread *td)
 {
 	struct pcb *pcb;
+	uint64_t *xstate_bv, bit;
+	char *sa;
+	int max_ext_n, i, owned;
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
@@ -627,12 +674,31 @@ fpugetregs(struct thread *td)
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
 		fpusave(get_pcb_user_save_pcb(pcb));
-		critical_exit();
-		return (_MC_FPOWNED_FPU);
+		owned = _MC_FPOWNED_FPU;
 	} else {
-		critical_exit();
-		return (_MC_FPOWNED_PCB);
+		owned = _MC_FPOWNED_PCB;
 	}
+	critical_exit();
+	if (use_xsave) {
+		/*
+		 * Handle partially saved state.
+		 */
+		sa = (char *)get_pcb_user_save_pcb(pcb);
+		xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+		    offsetof(struct xstate_hdr, xstate_bv));
+		max_ext_n = flsl(xsave_mask);
+		for (i = 0; i < max_ext_n; i++) {
+			bit = 1ULL << i;
+			if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0)
+				continue;
+			bcopy((char *)fpu_initialstate +
+			    xsave_area_desc[i].offset,
+			    sa + xsave_area_desc[i].offset,
+			    xsave_area_desc[i].size);
+			*xstate_bv |= bit;
+		}
+	}
+	return (owned);
 }
 
 void
@@ -676,9 +742,6 @@ fpusetxstate(struct thread *td, char *xf
 	 */
 	if (bv & ~xsave_mask)
 		return (EINVAL);
-	if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) !=
-	    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE))
-		return (EINVAL);
 
 	hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
 

Modified: stable/8/sys/amd64/include/md_var.h
==============================================================================
--- stable/8/sys/amd64/include/md_var.h	Tue May 21 19:23:49 2013	(r250879)
+++ stable/8/sys/amd64/include/md_var.h	Tue May 21 19:25:19 2013	(r250880)
@@ -57,6 +57,7 @@ extern	u_int	cpu_procinfo;
 extern	u_int	cpu_procinfo2;
 extern	char	cpu_vendor[];
 extern	u_int	cpu_vendor_id;
+extern	char	ctx_switch_xsave[];
 extern	char	kstack[];
 extern	char	sigcode[];
 extern	int	szsigcode;

Modified: stable/8/sys/amd64/include/specialreg.h
==============================================================================
--- stable/8/sys/amd64/include/specialreg.h	Tue May 21 19:23:49 2013	(r250879)
+++ stable/8/sys/amd64/include/specialreg.h	Tue May 21 19:25:19 2013	(r250880)
@@ -230,6 +230,11 @@
 #define	CPUID_TYPE_CORE		2
 
 /*
+ * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
+ */
+#define	CPUID_EXTSTATE_XSAVEOPT	0x00000001
+
+/*
  * AMD extended function 8000_0007h edx info
  */
 #define	AMDPM_TS		0x00000001


More information about the svn-src-all mailing list