svn commit: r238450 - in head/sys: amd64/amd64 amd64/include
x86/include
Konstantin Belousov
kib at FreeBSD.org
Sat Jul 14 15:48:31 UTC 2012
Author: kib
Date: Sat Jul 14 15:48:30 2012
New Revision: 238450
URL: http://svn.freebsd.org/changeset/base/238450
Log:
Add support for the XSAVEOPT instruction use. Our XSAVE/XRSTOR usage
mostly meets the guidelines set by the Intel SDM:
1. We use XRSTOR and XSAVE from the same CPL using the same linear
address for the store area
2. Contrary to the recommendations, we cannot zero the FPU save area
for a new thread, since fork semantic requires the copy of the
previous state. This advice seemingly contradicts to the advice
from the item 6.
3. We do use XSAVEOPT in the context switch code only, and the area
for XSAVEOPT already always contains the data saved by XSAVE.
4. We do not modify the save area between XRSTOR, when the area is
loaded into FPU context, and XSAVE. We always spit the fpu context
into save area and start emulation when directly writing into FPU
context.
5. We do not use segmented addressing to access save area, or rather,
always address it using %ds basing.
6. XSAVEOPT can be only executed in the area which was previously
loaded with XRSTOR, since context switch code checks for FPU use by
outgoing thread before saving, and thread which stopped emulation
forcibly get context loaded with XRSTOR.
7. The PCB cannot be paged out while FPU emulation is turned off, since
stack of the executing thread is never swapped out.
The context switch code is patched to issue XSAVEOPT instead of XSAVE
if supported. This approach eliminates one conditional in the context
switch code, which would be needed otherwise.
For user-visible machine context to have proper data, fpugetregs()
checks for unsaved extension blocks and manually copies pristine FPU
state into them, according to the description provided by CPUID leaf
0xd.
MFC after: 1 month
Modified:
head/sys/amd64/amd64/cpu_switch.S
head/sys/amd64/amd64/fpu.c
head/sys/amd64/include/md_var.h
head/sys/x86/include/specialreg.h
Modified: head/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- head/sys/amd64/amd64/cpu_switch.S Sat Jul 14 12:15:20 2012 (r238449)
+++ head/sys/amd64/amd64/cpu_switch.S Sat Jul 14 15:48:30 2012 (r238450)
@@ -122,6 +122,9 @@ done_store_dr:
1: movq %rdx,%rcx
movl xsave_mask,%eax
movl xsave_mask+4,%edx
+ .globl ctx_switch_xsave
+ctx_switch_xsave:
+ /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
xsave (%r8)
movq %rcx,%rdx
2: smsw %ax
Modified: head/sys/amd64/amd64/fpu.c
==============================================================================
--- head/sys/amd64/amd64/fpu.c Sat Jul 14 12:15:20 2012 (r238449)
+++ head/sys/amd64/amd64/fpu.c Sat Jul 14 15:48:30 2012 (r238450)
@@ -132,10 +132,16 @@ static void fpu_clean_state(void);
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
NULL, 1, "Floating point instructions executed in hardware");
+static int use_xsaveopt;
int use_xsave; /* non-static for cpu_switch.S */
uint64_t xsave_mask; /* the same */
static struct savefpu *fpu_initialstate;
+struct xsave_area_elm_descr {
+ u_int offset;
+ u_int size;
+} *xsave_area_desc;
+
void
fpusave(void *addr)
{
@@ -182,6 +188,17 @@ fpuinit_bsp1(void)
TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
xsave_mask &= xsave_mask_user;
+
+ cpuid_count(0xd, 0x1, cp);
+ if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+ /*
+ * Patch the XSAVE instruction in the cpu_switch code
+ * to XSAVEOPT. We assume that XSAVE encoding used
+ * REX byte, and set the bit 4 of the r/m byte.
+ */
+ ctx_switch_xsave[3] |= 0x10;
+ use_xsaveopt = 1;
+ }
}
/*
@@ -252,6 +269,7 @@ static void
fpuinitstate(void *arg __unused)
{
register_t saveintr;
+ int cp[4], i, max_ext_n;
fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
M_WAITOK | M_ZERO);
@@ -273,6 +291,28 @@ fpuinitstate(void *arg __unused)
*/
bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+ /*
+ * Create a table describing the layout of the CPU Extended
+ * Save Area.
+ */
+ if (use_xsaveopt) {
+ max_ext_n = flsl(xsave_mask);
+ xsave_area_desc = malloc(max_ext_n * sizeof(struct
+ xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+ /* x87 state */
+ xsave_area_desc[0].offset = 0;
+ xsave_area_desc[0].size = 160;
+ /* XMM */
+ xsave_area_desc[1].offset = 160;
+ xsave_area_desc[1].size = 288 - 160;
+
+ for (i = 2; i < max_ext_n; i++) {
+ cpuid_count(0xd, i, cp);
+ xsave_area_desc[i].offset = cp[1];
+ xsave_area_desc[i].size = cp[0];
+ }
+ }
+
start_emulating();
intr_restore(saveintr);
}
@@ -560,8 +600,14 @@ fpudna(void)
* This is the first time this thread has used the FPU or
* the PCB doesn't contain a clean FPU state. Explicitly
* load an initial state.
+ *
+ * We prefer to restore the state from the actual save
+ * area in PCB instead of directly loading from
+ * fpu_initialstate, to ignite the XSAVEOPT
+ * tracking engine.
*/
- fpurestore(fpu_initialstate);
+ bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
+ fpurestore(pcb->pcb_save);
if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
fldcw(pcb->pcb_initial_fpucw);
if (PCB_USER_FPU(pcb))
@@ -596,6 +642,9 @@ int
fpugetregs(struct thread *td)
{
struct pcb *pcb;
+ uint64_t *xstate_bv, bit;
+ char *sa;
+ int max_ext_n, i;
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
@@ -613,6 +662,25 @@ fpugetregs(struct thread *td)
return (_MC_FPOWNED_FPU);
} else {
critical_exit();
+ if (use_xsaveopt) {
+ /*
+ * Handle partially saved state.
+ */
+ sa = (char *)get_pcb_user_save_pcb(pcb);
+ xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+ offsetof(struct xstate_hdr, xstate_bv));
+ max_ext_n = flsl(xsave_mask);
+ for (i = 0; i < max_ext_n; i++) {
+ bit = 1 << i;
+ if ((*xstate_bv & bit) != 0)
+ continue;
+ bcopy((char *)fpu_initialstate +
+ xsave_area_desc[i].offset,
+ sa + xsave_area_desc[i].offset,
+ xsave_area_desc[i].size);
+ *xstate_bv |= bit;
+ }
+ }
return (_MC_FPOWNED_PCB);
}
}
Modified: head/sys/amd64/include/md_var.h
==============================================================================
--- head/sys/amd64/include/md_var.h Sat Jul 14 12:15:20 2012 (r238449)
+++ head/sys/amd64/include/md_var.h Sat Jul 14 15:48:30 2012 (r238450)
@@ -57,6 +57,7 @@ extern u_int cpu_procinfo;
extern u_int cpu_procinfo2;
extern char cpu_vendor[];
extern u_int cpu_vendor_id;
+extern char ctx_switch_xsave[];
extern char kstack[];
extern char sigcode[];
extern int szsigcode;
Modified: head/sys/x86/include/specialreg.h
==============================================================================
--- head/sys/x86/include/specialreg.h Sat Jul 14 12:15:20 2012 (r238449)
+++ head/sys/x86/include/specialreg.h Sat Jul 14 15:48:30 2012 (r238450)
@@ -247,6 +247,11 @@
#define CPUID_TYPE_CORE 2
/*
+ * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
+ */
+#define CPUID_EXTSTATE_XSAVEOPT 0x00000001
+
+/*
* AMD extended function 8000_0007h edx info
*/
#define AMDPM_TS 0x00000001
More information about the svn-src-head
mailing list