git: df8dd6025af8 - main - amd64: stop using top of the thread' kernel stack for FPU user save area
Konstantin Belousov
kib at FreeBSD.org
Tue Sep 21 17:21:18 UTC 2021
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=df8dd6025af88a99d34f549fa9591a9b8f9b75b1
commit df8dd6025af88a99d34f549fa9591a9b8f9b75b1
Author: Konstantin Belousov <kib at FreeBSD.org>
AuthorDate: 2021-09-13 21:05:47 +0000
Commit: Konstantin Belousov <kib at FreeBSD.org>
CommitDate: 2021-09-21 17:20:15 +0000
amd64: stop using top of the thread' kernel stack for FPU user save area
Instead do one more allocation at the thread creation time. This frees
a lot of space on the stack.
Also do not use alloca() for temporal storage in signal delivery sendsig()
function and signal return syscall sys_sigreturn(). This saves equal
amount of space, again by the cost of one more allocation at the thread
creation time.
A useful experiment now would be to reduce KSTACK_PAGES.
Reviewed by: jhb, markj
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D31954
---
sys/amd64/amd64/exec_machdep.c | 4 ++--
sys/amd64/amd64/fpu.c | 2 ++
sys/amd64/amd64/machdep.c | 14 --------------
sys/amd64/amd64/vm_machdep.c | 22 +++++++++++++---------
sys/amd64/ia32/ia32_signal.c | 6 +++---
sys/amd64/include/proc.h | 2 ++
sys/kern/kern_thread.c | 2 +-
7 files changed, 23 insertions(+), 29 deletions(-)
diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c
index 1297117638d6..48bda05f9685 100644
--- a/sys/amd64/amd64/exec_machdep.c
+++ b/sys/amd64/amd64/exec_machdep.c
@@ -135,7 +135,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
- xfpusave = __builtin_alloca(xfpusave_len);
+ xfpusave = (char *)td->td_md.md_fpu_scratch;
} else {
xfpusave_len = 0;
xfpusave = NULL;
@@ -674,7 +674,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
sizeof(struct savefpu))
return (EINVAL);
- xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+ xfpustate = (char *)td->td_md.md_fpu_scratch;
ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
mcp->mc_xfpustate_len);
if (ret != 0)
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index d7936b3b1922..24986958d4ca 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -448,6 +448,8 @@ fpuinitstate(void *arg __unused)
xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
}
+ cpu_thread_alloc(&thread0);
+
saveintr = intr_disable();
stop_emulating();
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index d4e2356a9ae1..5c9b64526609 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1258,7 +1258,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
caddr_t kmdp;
int gsel_tss, x;
struct pcpu *pc;
- struct xstate_hdr *xhdr;
uint64_t cr3, rsp0;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
@@ -1564,19 +1563,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
msgbufinit(msgbufp, msgbufsize);
fpuinit();
- /*
- * Reinitialize thread0's stack base now that the xsave area size is
- * known. Set up thread0's pcb save area after fpuinit calculated fpu
- * save area size. Zero out the extended state header in fpu save area.
- */
- set_top_of_stack_td(&thread0);
- thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
- bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size);
- if (use_xsave) {
- xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
- 1);
- xhdr->xstate_bv = xsave_mask;
- }
/* make an initial tss so cpu can get interrupt stack on syscall! */
rsp0 = thread0.td_md.md_stack_base;
/* Ensure the stack is aligned to 16 bytes */
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 4567e6e0eb5d..e42d16d61b3a 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -90,19 +90,17 @@ void
set_top_of_stack_td(struct thread *td)
{
td->td_md.md_stack_base = td->td_kstack +
- td->td_kstack_pages * PAGE_SIZE -
- roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
+ td->td_kstack_pages * PAGE_SIZE;
}
struct savefpu *
get_pcb_user_save_td(struct thread *td)
{
- vm_offset_t p;
-
- p = td->td_md.md_stack_base;
- KASSERT((p % XSAVE_AREA_ALIGN) == 0,
- ("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
- return ((struct savefpu *)p);
+ KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save %
+ XSAVE_AREA_ALIGN) == 0,
+ ("Unaligned pcb_user_save area ptr %p td %p",
+ td->td_md.md_usr_fpu_save, td));
+ return (td->td_md.md_usr_fpu_save);
}
struct pcb *
@@ -393,6 +391,8 @@ cpu_thread_alloc(struct thread *td)
set_top_of_stack_td(td);
td->td_pcb = pcb = get_pcb_td(td);
td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
+ td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
+ td->td_md.md_fpu_scratch = fpu_save_area_alloc();
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
if (use_xsave) {
xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
@@ -404,8 +404,12 @@ cpu_thread_alloc(struct thread *td)
void
cpu_thread_free(struct thread *td)
{
-
cpu_thread_clean(td);
+
+ fpu_save_area_free(td->td_md.md_usr_fpu_save);
+ td->td_md.md_usr_fpu_save = NULL;
+ fpu_save_area_free(td->td_md.md_fpu_scratch);
+ td->td_md.md_fpu_scratch = NULL;
}
bool
diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
index 49b5797d68fd..9b67c7001a87 100644
--- a/sys/amd64/ia32/ia32_signal.c
+++ b/sys/amd64/ia32/ia32_signal.c
@@ -210,7 +210,7 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp)
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
sizeof(struct savefpu))
return (EINVAL);
- xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+ xfpustate = (char *)td->td_md.md_fpu_scratch;
ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
mcp->mc_xfpustate_len);
if (ret != 0)
@@ -579,7 +579,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
- xfpusave = __builtin_alloca(xfpusave_len);
+ xfpusave = (char *)td->td_md.md_fpu_scratch;
} else {
xfpusave_len = 0;
xfpusave = NULL;
@@ -882,7 +882,7 @@ freebsd32_sigreturn(td, uap)
td->td_proc->p_pid, td->td_name, xfpustate_len);
return (EINVAL);
}
- xfpustate = __builtin_alloca(xfpustate_len);
+ xfpustate = (char *)td->td_md.md_fpu_scratch;
error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
xfpustate, xfpustate_len);
if (error != 0) {
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index 0f8cf50e326d..bd07f70f8d44 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -75,6 +75,8 @@ struct mdthread {
int md_efirt_dis_pf; /* (k) */
struct pcb md_pcb;
vm_offset_t md_stack_base;
+ struct savefpu *md_usr_fpu_save;
+ struct savefpu *md_fpu_scratch;
};
struct mdproc {
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 65c5cc65c87e..62f939406374 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110,
"struct thread KBI td_pflags");
_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
"struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0xb8,
"struct proc KBI p_flag");
More information about the dev-commits-src-main
mailing list