svn commit: r238450 - in head/sys: amd64/amd64 amd64/include x86/include

Konstantin Belousov kib at FreeBSD.org
Sat Jul 14 15:48:31 UTC 2012


Author: kib
Date: Sat Jul 14 15:48:30 2012
New Revision: 238450
URL: http://svn.freebsd.org/changeset/base/238450

Log:
  Add support for the XSAVEOPT instruction use. Our XSAVE/XRSTOR usage
  mostly meets the guidelines set by the Intel SDM:
  1. We use XRSTOR and XSAVE from the same CPL using the same linear
     address for the store area
  2. Contrary to the recommendations, we cannot zero the FPU save area
     for a new thread, since fork semantic requires the copy of the
     previous state. This advice seemingly contradicts to the advice
     from the item 6.
  3. We do use XSAVEOPT in the context switch code only, and the area
     for XSAVEOPT already always contains the data saved by XSAVE.
  4. We do not modify the save area between XRSTOR, when the area is
     loaded into FPU context, and XSAVE. We always spit the fpu context
     into save area and start emulation when directly writing into FPU
     context.
  5. We do not use segmented addressing to access save area, or rather,
     always address it using %ds basing.
  6. XSAVEOPT can be only executed in the area which was previously
     loaded with XRSTOR, since context switch code checks for FPU use by
     outgoing thread before saving, and thread which stopped emulation
     forcibly get context loaded with XRSTOR.
  7. The PCB cannot be paged out while FPU emulation is turned off, since
     stack of the executing thread is never swapped out.
  
  The context switch code is patched to issue XSAVEOPT instead of XSAVE
  if supported. This approach eliminates one conditional in the context
  switch code, which would be needed otherwise.
  
  For user-visible machine context to have proper data, fpugetregs()
  checks for unsaved extension blocks and manually copies pristine FPU
  state into them, according to the description provided by CPUID leaf
  0xd.
  
  MFC after:  1 month

Modified:
  head/sys/amd64/amd64/cpu_switch.S
  head/sys/amd64/amd64/fpu.c
  head/sys/amd64/include/md_var.h
  head/sys/x86/include/specialreg.h

Modified: head/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- head/sys/amd64/amd64/cpu_switch.S	Sat Jul 14 12:15:20 2012	(r238449)
+++ head/sys/amd64/amd64/cpu_switch.S	Sat Jul 14 15:48:30 2012	(r238450)
@@ -122,6 +122,9 @@ done_store_dr:
 1:	movq	%rdx,%rcx
 	movl	xsave_mask,%eax
 	movl	xsave_mask+4,%edx
+	.globl	ctx_switch_xsave
+ctx_switch_xsave:
+	/* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
 	xsave	(%r8)
 	movq	%rcx,%rdx
 2:	smsw	%ax

Modified: head/sys/amd64/amd64/fpu.c
==============================================================================
--- head/sys/amd64/amd64/fpu.c	Sat Jul 14 12:15:20 2012	(r238449)
+++ head/sys/amd64/amd64/fpu.c	Sat Jul 14 15:48:30 2012	(r238450)
@@ -132,10 +132,16 @@ static	void	fpu_clean_state(void);
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     NULL, 1, "Floating point instructions executed in hardware");
 
+static int use_xsaveopt;
 int use_xsave;			/* non-static for cpu_switch.S */
 uint64_t xsave_mask;		/* the same */
 static	struct savefpu *fpu_initialstate;
 
+struct xsave_area_elm_descr {
+	u_int	offset;
+	u_int	size;
+} *xsave_area_desc;
+
 void
 fpusave(void *addr)
 {
@@ -182,6 +188,17 @@ fpuinit_bsp1(void)
 	TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
 	xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
 	xsave_mask &= xsave_mask_user;
+
+	cpuid_count(0xd, 0x1, cp);
+	if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+		/*
+		 * Patch the XSAVE instruction in the cpu_switch code
+		 * to XSAVEOPT.  We assume that XSAVE encoding used
+		 * REX byte, and set the bit 4 of the r/m byte.
+		 */
+		ctx_switch_xsave[3] |= 0x10;
+		use_xsaveopt = 1;
+	}
 }
 
 /*
@@ -252,6 +269,7 @@ static void
 fpuinitstate(void *arg __unused)
 {
 	register_t saveintr;
+	int cp[4], i, max_ext_n;
 
 	fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
@@ -273,6 +291,28 @@ fpuinitstate(void *arg __unused)
 	 */
 	bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
 
+	/*
+	 * Create a table describing the layout of the CPU Extended
+	 * Save Area.
+	 */
+	if (use_xsaveopt) {
+		max_ext_n = flsl(xsave_mask);
+		xsave_area_desc = malloc(max_ext_n * sizeof(struct
+		    xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+		/* x87 state */
+		xsave_area_desc[0].offset = 0;
+		xsave_area_desc[0].size = 160;
+		/* XMM */
+		xsave_area_desc[1].offset = 160;
+		xsave_area_desc[1].size = 288 - 160;
+
+		for (i = 2; i < max_ext_n; i++) {
+			cpuid_count(0xd, i, cp);
+			xsave_area_desc[i].offset = cp[1];
+			xsave_area_desc[i].size = cp[0];
+		}
+	}
+
 	start_emulating();
 	intr_restore(saveintr);
 }
@@ -560,8 +600,14 @@ fpudna(void)
 		 * This is the first time this thread has used the FPU or
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
+		 *
+		 * We prefer to restore the state from the actual save
+		 * area in PCB instead of directly loading from
+		 * fpu_initialstate, to ignite the XSAVEOPT
+		 * tracking engine.
 		 */
-		fpurestore(fpu_initialstate);
+		bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
+		fpurestore(pcb->pcb_save);
 		if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
 			fldcw(pcb->pcb_initial_fpucw);
 		if (PCB_USER_FPU(pcb))
@@ -596,6 +642,9 @@ int
 fpugetregs(struct thread *td)
 {
 	struct pcb *pcb;
+	uint64_t *xstate_bv, bit;
+	char *sa;
+	int max_ext_n, i;
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
@@ -613,6 +662,25 @@ fpugetregs(struct thread *td)
 		return (_MC_FPOWNED_FPU);
 	} else {
 		critical_exit();
+		if (use_xsaveopt) {
+			/*
+			 * Handle partially saved state.
+			 */
+			sa = (char *)get_pcb_user_save_pcb(pcb);
+			xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+			    offsetof(struct xstate_hdr, xstate_bv));
+			max_ext_n = flsl(xsave_mask);
+			for (i = 0; i < max_ext_n; i++) {
+				bit = 1 << i;
+				if ((*xstate_bv & bit) != 0)
+					continue;
+				bcopy((char *)fpu_initialstate +
+				    xsave_area_desc[i].offset,
+				    sa + xsave_area_desc[i].offset,
+				    xsave_area_desc[i].size);
+				*xstate_bv |= bit;
+			}
+		}
 		return (_MC_FPOWNED_PCB);
 	}
 }

Modified: head/sys/amd64/include/md_var.h
==============================================================================
--- head/sys/amd64/include/md_var.h	Sat Jul 14 12:15:20 2012	(r238449)
+++ head/sys/amd64/include/md_var.h	Sat Jul 14 15:48:30 2012	(r238450)
@@ -57,6 +57,7 @@ extern	u_int	cpu_procinfo;
 extern	u_int	cpu_procinfo2;
 extern	char	cpu_vendor[];
 extern	u_int	cpu_vendor_id;
+extern	char	ctx_switch_xsave[];
 extern	char	kstack[];
 extern	char	sigcode[];
 extern	int	szsigcode;

Modified: head/sys/x86/include/specialreg.h
==============================================================================
--- head/sys/x86/include/specialreg.h	Sat Jul 14 12:15:20 2012	(r238449)
+++ head/sys/x86/include/specialreg.h	Sat Jul 14 15:48:30 2012	(r238450)
@@ -247,6 +247,11 @@
 #define	CPUID_TYPE_CORE		2
 
 /*
+ * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
+ */
+#define	CPUID_EXTSTATE_XSAVEOPT	0x00000001
+
+/*
  * AMD extended function 8000_0007h edx info
  */
 #define	AMDPM_TS		0x00000001


More information about the svn-src-head mailing list