git: 4d0d7ea9e752 - stable/13 - amd64: stop using top of the thread' kernel stack for FPU user save area

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sun, 10 Oct 2021 09:24:09 UTC
The branch stable/13 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=4d0d7ea9e752d7fce2a2f46e4c6c02951aaaf504

commit 4d0d7ea9e752d7fce2a2f46e4c6c02951aaaf504
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2021-09-13 21:05:47 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2021-10-10 09:21:17 +0000

    amd64: stop using top of the thread' kernel stack for FPU user save area
    
    MFC note: this commit changes layout of td_md for amd64, resulting in
    static checks for struct thread ABI in kern_thread.c to fail.  Next
    two commits restore the layout, I decided to not overcomplicate the
    merge and not do the work that is going to be overwritten immediately.
    
    (cherry picked from commit df8dd6025af88a99d34f549fa9591a9b8f9b75b1)
---
 sys/amd64/amd64/exec_machdep.c |  4 ++--
 sys/amd64/amd64/fpu.c          |  2 ++
 sys/amd64/amd64/machdep.c      | 14 --------------
 sys/amd64/amd64/vm_machdep.c   | 22 +++++++++++++---------
 sys/amd64/ia32/ia32_signal.c   |  6 +++---
 sys/amd64/include/proc.h       |  2 ++
 sys/kern/kern_thread.c         |  2 +-
 7 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c
index 426bb08edd7c..e85ebe741353 100644
--- a/sys/amd64/amd64/exec_machdep.c
+++ b/sys/amd64/amd64/exec_machdep.c
@@ -135,7 +135,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 
 	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
-		xfpusave = __builtin_alloca(xfpusave_len);
+		xfpusave = (char *)td->td_md.md_fpu_scratch;
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
@@ -674,7 +674,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu))
 			return (EINVAL);
-		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+		xfpustate = (char *)td->td_md.md_fpu_scratch;
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index d7936b3b1922..24986958d4ca 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -448,6 +448,8 @@ fpuinitstate(void *arg __unused)
 		    xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
 	}
 
+	cpu_thread_alloc(&thread0);
+
 	saveintr = intr_disable();
 	stop_emulating();
 
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index db1b88faeabb..512fee0de7df 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1266,7 +1266,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	caddr_t kmdp;
 	int gsel_tss, x;
 	struct pcpu *pc;
-	struct xstate_hdr *xhdr;
 	uint64_t cr3, rsp0;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
@@ -1572,19 +1571,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	msgbufinit(msgbufp, msgbufsize);
 	fpuinit();
 
-	/*
-	 * Reinitialize thread0's stack base now that the xsave area size is
-	 * known.  Set up thread0's pcb save area after fpuinit calculated fpu
-	 * save area size.  Zero out the extended state header in fpu save area.
-	 */
-	set_top_of_stack_td(&thread0);
-	thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
-	bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size);
-	if (use_xsave) {
-		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
-		    1);
-		xhdr->xstate_bv = xsave_mask;
-	}
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	rsp0 = thread0.td_md.md_stack_base;
 	/* Ensure the stack is aligned to 16 bytes */
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 7d65269410e0..5c3cd638c92d 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -90,19 +90,17 @@ void
 set_top_of_stack_td(struct thread *td)
 {
 	td->td_md.md_stack_base = td->td_kstack +
-	    td->td_kstack_pages * PAGE_SIZE -
-	    roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
+	    td->td_kstack_pages * PAGE_SIZE;
 }
 
 struct savefpu *
 get_pcb_user_save_td(struct thread *td)
 {
-	vm_offset_t p;
-
-	p = td->td_md.md_stack_base;
-	KASSERT((p % XSAVE_AREA_ALIGN) == 0,
-	    ("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
-	return ((struct savefpu *)p);
+	KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save %
+	    XSAVE_AREA_ALIGN) == 0,
+	    ("Unaligned pcb_user_save area ptr %p td %p",
+	    td->td_md.md_usr_fpu_save, td));
+	return (td->td_md.md_usr_fpu_save);
 }
 
 struct pcb *
@@ -384,6 +382,8 @@ cpu_thread_alloc(struct thread *td)
 	set_top_of_stack_td(td);
 	td->td_pcb = pcb = get_pcb_td(td);
 	td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
+	td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
+	td->td_md.md_fpu_scratch = fpu_save_area_alloc();
 	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
@@ -395,8 +395,12 @@ cpu_thread_alloc(struct thread *td)
 void
 cpu_thread_free(struct thread *td)
 {
-
 	cpu_thread_clean(td);
+
+	fpu_save_area_free(td->td_md.md_usr_fpu_save);
+	td->td_md.md_usr_fpu_save = NULL;
+	fpu_save_area_free(td->td_md.md_fpu_scratch);
+	td->td_md.md_fpu_scratch = NULL;
 }
 
 bool
diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
index 49b5797d68fd..9b67c7001a87 100644
--- a/sys/amd64/ia32/ia32_signal.c
+++ b/sys/amd64/ia32/ia32_signal.c
@@ -210,7 +210,7 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp)
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu))
 			return (EINVAL);
-		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+		xfpustate = (char *)td->td_md.md_fpu_scratch;
 		ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
@@ -579,7 +579,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 
 	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
-		xfpusave = __builtin_alloca(xfpusave_len);
+		xfpusave = (char *)td->td_md.md_fpu_scratch;
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
@@ -882,7 +882,7 @@ freebsd32_sigreturn(td, uap)
 			    td->td_proc->p_pid, td->td_name, xfpustate_len);
 			return (EINVAL);
 		}
-		xfpustate = __builtin_alloca(xfpustate_len);
+		xfpustate = (char *)td->td_md.md_fpu_scratch;
 		error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
 		    xfpustate, xfpustate_len);
 		if (error != 0) {
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index 59796e729ac4..12fb7bf9724c 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -75,6 +75,8 @@ struct mdthread {
 	int	md_efirt_dis_pf;	/* (k) */
 	struct pcb md_pcb;
 	vm_offset_t md_stack_base;
+	struct savefpu *md_usr_fpu_save;
+	struct savefpu *md_fpu_scratch;
 };
 
 struct mdproc {
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 48aac8e057b8..a8282c94b8e1 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -90,7 +90,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
     "struct thread KBI td_pflags");
 _Static_assert(offsetof(struct thread, td_frame) == 0x4a0,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0xb8,
     "struct proc KBI p_flag");