git: bd9e0f5df681 - main - amd64: eliminate td_md.md_fpu_scratch
Cy Schubert
Cy.Schubert at cschubert.com
Tue Sep 21 20:53:24 UTC 2021
In message <202109211721.18LHLGn6028952 at gitrepo.freebsd.org>, Konstantin
Belous
ov writes:
> The branch main has been updated by kib:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=bd9e0f5df681da8b5ef05a587b4b5b07
> 572d3fc2
>
> commit bd9e0f5df681da8b5ef05a587b4b5b07572d3fc2
> Author: Konstantin Belousov <kib at FreeBSD.org>
> AuthorDate: 2021-09-15 18:37:47 +0000
> Commit: Konstantin Belousov <kib at FreeBSD.org>
> CommitDate: 2021-09-21 17:20:15 +0000
>
> amd64: eliminate td_md.md_fpu_scratch
>
> For signal send, copyout from the user FPU save area directly.
>
> For sigreturn, we are in sleepable context and can do temporal
> allocation of the transient save area. We cannot copying from userspace
> directly to user save area because XSAVE state needs to be validated,
> also partial copyins can corrupt it.
>
> Requested by: jhb
> Reviewed by: jhb, markj
> Tested by: pho
> Sponsored by: The FreeBSD Foundation
> MFC after: 1 week
> Differential revision: https://reviews.freebsd.org/D31954
> ---
> sys/amd64/amd64/exec_machdep.c | 47 +++++++++++++++++++---------------------
> --
> sys/amd64/amd64/vm_machdep.c | 3 ---
> sys/amd64/ia32/ia32_signal.c | 43 ++++++++++++++++----------------------
> sys/amd64/include/proc.h | 1 -
> sys/kern/kern_thread.c | 2 +-
> 5 files changed, 40 insertions(+), 56 deletions(-)
>
> diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c
> index 48bda05f9685..168c24cbb65b 100644
> --- a/sys/amd64/amd64/exec_machdep.c
> +++ b/sys/amd64/amd64/exec_machdep.c
> @@ -96,7 +96,7 @@ __FBSDID("$FreeBSD$");
> #include <machine/trap.h>
>
> static void get_fpcontext(struct thread *td, mcontext_t *mcp,
> - char *xfpusave, size_t xfpusave_len);
> + char **xfpusave, size_t *xfpusave_len);
> static int set_fpcontext(struct thread *td, mcontext_t *mcp,
> char *xfpustate, size_t xfpustate_len);
>
> @@ -133,14 +133,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
> regs = td->td_frame;
> oonstack = sigonstack(regs->tf_rsp);
>
> - if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
> - xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
> - xfpusave = (char *)td->td_md.md_fpu_scratch;
> - } else {
> - xfpusave_len = 0;
> - xfpusave = NULL;
> - }
> -
> /* Save user context. */
> bzero(&sf, sizeof(sf));
> sf.sf_uc.uc_sigmask = *mask;
> @@ -150,7 +142,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
> sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
> bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
> sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
> - get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
> + get_fpcontext(td, &sf.sf_uc.uc_mcontext, &xfpusave, &xfpusave_len);
> fpstate_drop(td);
> update_pcb_bases(pcb);
> sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
> @@ -301,10 +293,11 @@ sys_sigreturn(struct thread *td, struct sigreturn_args
> *uap)
> p->p_pid, td->td_name, xfpustate_len);
> return (EINVAL);
> }
> - xfpustate = __builtin_alloca(xfpustate_len);
> + xfpustate = (char *)fpu_save_area_alloc();
> error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
> xfpustate, xfpustate_len);
> if (error != 0) {
> + fpu_save_area_free((struct savefpu *)xfpustate);
> uprintf(
> "pid %d (%s): sigreturn copying xfpustate failed\n",
> p->p_pid, td->td_name);
> @@ -315,6 +308,7 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *u
> ap)
> xfpustate_len = 0;
> }
> ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
> + fpu_save_area_free((struct savefpu *)xfpustate);
> if (ret != 0) {
> uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
> p->p_pid, td->td_name, ret);
> @@ -674,14 +668,17 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
> if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
> sizeof(struct savefpu))
> return (EINVAL);
> - xfpustate = (char *)td->td_md.md_fpu_scratch;
> + xfpustate = (char *)fpu_save_area_alloc();
> ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
> mcp->mc_xfpustate_len);
> - if (ret != 0)
> + if (ret != 0) {
> + fpu_save_area_free((struct savefpu *)xfpustate);
> return (ret);
> + }
> } else
> xfpustate = NULL;
> ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
> + fpu_save_area_free((struct savefpu *)xfpustate);
> if (ret != 0)
> return (ret);
> tp->tf_r15 = mcp->mc_r15;
> @@ -719,26 +716,24 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
> }
>
> static void
> -get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
> - size_t xfpusave_len)
> +get_fpcontext(struct thread *td, mcontext_t *mcp, char **xfpusave,
> + size_t *xfpusave_len)
> {
> - size_t max_len, len;
> -
> mcp->mc_ownedfp = fpugetregs(td);
> bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
> sizeof(mcp->mc_fpstate));
> mcp->mc_fpformat = fpuformat();
> - if (!use_xsave || xfpusave_len == 0)
> + if (xfpusave == NULL)
> return;
> - max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
> - len = xfpusave_len;
> - if (len > max_len) {
> - len = max_len;
> - bzero(xfpusave + max_len, len - max_len);
> + if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) {
> + *xfpusave_len = 0;
> + *xfpusave = NULL;
> + } else {
> + mcp->mc_flags |= _MC_HASFPXSTATE;
> + *xfpusave_len = mcp->mc_xfpustate_len =
> + cpu_max_ext_state_size - sizeof(struct savefpu);
> + *xfpusave = (char *)(get_pcb_user_save_td(td) + 1);
> }
> - mcp->mc_flags |= _MC_HASFPXSTATE;
> - mcp->mc_xfpustate_len = len;
> - bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
> }
>
> static int
> diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
> index e42d16d61b3a..0bfcd03d6655 100644
> --- a/sys/amd64/amd64/vm_machdep.c
> +++ b/sys/amd64/amd64/vm_machdep.c
> @@ -392,7 +392,6 @@ cpu_thread_alloc(struct thread *td)
> td->td_pcb = pcb = get_pcb_td(td);
> td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
> td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
> - td->td_md.md_fpu_scratch = fpu_save_area_alloc();
> pcb->pcb_save = get_pcb_user_save_pcb(pcb);
> if (use_xsave) {
> xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
> @@ -408,8 +407,6 @@ cpu_thread_free(struct thread *td)
>
> fpu_save_area_free(td->td_md.md_usr_fpu_save);
> td->td_md.md_usr_fpu_save = NULL;
> - fpu_save_area_free(td->td_md.md_fpu_scratch);
> - td->td_md.md_fpu_scratch = NULL;
> }
>
> bool
> diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
> index 9b67c7001a87..1ca19072a1dc 100644
> --- a/sys/amd64/ia32/ia32_signal.c
> +++ b/sys/amd64/ia32/ia32_signal.c
> @@ -87,10 +87,8 @@ static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sig
> set_t *);
>
> static void
> ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp,
> - char *xfpusave, size_t xfpusave_len)
> + char **xfpusave, size_t *xfpusave_len)
> {
> - size_t max_len, len;
> -
> /*
> * XXX Format of 64bit and 32bit FXSAVE areas differs. FXSAVE
> * in 32bit mode saves %cs and %ds, while on 64bit it saves
> @@ -101,17 +99,15 @@ ia32_get_fpcontext(struct thread *td, struct ia32_mconte
> xt *mcp,
> bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
> sizeof(mcp->mc_fpstate));
> mcp->mc_fpformat = fpuformat();
> - if (!use_xsave || xfpusave_len == 0)
> - return;
> - max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
> - len = xfpusave_len;
> - if (len > max_len) {
> - len = max_len;
> - bzero(xfpusave + max_len, len - max_len);
> + if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) {
> + *xfpusave_len = 0;
> + *xfpusave = NULL;
> + } else {
> + mcp->mc_flags |= _MC_IA32_HASFPXSTATE;
> + *xfpusave_len = mcp->mc_xfpustate_len =
This commit causes a panic running and i386 poudriere on an amd64. Notice
the ud2 machine instruction at line 107 in the disassemble portion of this
reply.
Unread portion of the kernel message buffer:
interrupt enabled, resume, IOPL = 0
current process = 5200 (cc)
trap number = 1
panic: privileged instruction fault
cpuid = 3
time = 1632253591
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame
0xfffffe00e52377d0
vpanic() at vpanic+0x187/frame 0xfffffe00e5237830
panic() at panic+0x43/frame 0xfffffe00e5237890
trap_fatal() at trap_fatal+0x387/frame 0xfffffe00e52378f0
trap() at trap+0x8b/frame 0xfffffe00e5237a00
calltrap() at calltrap+0x8/frame 0xfffffe00e5237a00
--- trap 0x1, rip = 0xffffffff80a5fef1, rsp = 0xfffffe00e5237ad0, rbp =
0xfffffe00e5237af0 ---
ia32_get_mcontext() at ia32_get_mcontext+0x181/frame 0xfffffe00e5237af0
freebsd32_getcontext() at freebsd32_getcontext+0x52/frame 0xfffffe00e5237df0
ia32_syscall() at ia32_syscall+0x126/frame 0xfffffe00e5237f30
int0x80_syscall_common() at int0x80_syscall_common+0x9c/frame 0xffffca38
Uptime: 8m48s
Dumping 526 out of 8161 MB: (CTRL-C to abort) ..4%..13%..22%..31%..43%..52%.
.61%..73%..83%..92%
__curthread () at /opt/src/git-src/sys/amd64/include/pcpu_aux.h:55
55 __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct pcpu,
(kgdb) bt
#0 __curthread () at /opt/src/git-src/sys/amd64/include/pcpu_aux.h:55
#1 doadump (textdump=textdump at entry=1)
at /opt/src/git-src/sys/kern/kern_shutdown.c:399
#2 0xffffffff806d417b in kern_reboot (howto=260)
at /opt/src/git-src/sys/kern/kern_shutdown.c:486
#3 0xffffffff806d45f6 in vpanic (fmt=0xffffffff80a9b26c "%s",
ap=<optimized out>) at /opt/src/git-src/sys/kern/kern_shutdown.c:919
#4 0xffffffff806d43f3 in panic (fmt=<unavailable>)
at /opt/src/git-src/sys/kern/kern_shutdown.c:843
#5 0xffffffff80a3a587 in trap_fatal (frame=0xfffffe00e5237a10, eva=0)
at /opt/src/git-src/sys/amd64/amd64/trap.c:946
#6 0xffffffff80a39a7b in trap (frame=0xfffffe00e5237a10)
at /opt/src/git-src/sys/amd64/amd64/trap.c:251
#7 <signal handler called>
#8 0xffffffff80a5fef1 in ia32_get_fpcontext (td=0xfffffe00e468f3a0,
mcp=0xfffffe00e5237b10, xfpusave=0x0, xfpusave_len=0x0)
at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:107
#9 ia32_get_mcontext (td=td at entry=0xfffffe00e468f3a0,
mcp=mcp at entry=0xfffffe00e5237b10, flags=1)
at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:177
#10 0xffffffff80a5fca2 in freebsd32_getcontext (td=0xfffffe00e468f3a0,
uap=0xfffffe00e468f790)
at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:260
#11 0xffffffff80a61a16 in syscallenter (td=0xfffffe00e468f3a0)
at /opt/src/git-src/sys/amd64/ia32/../../kern/subr_syscall.c:189
#12 ia32_syscall (frame=0xfffffe00e5237f40)
at /opt/src/git-src/sys/amd64/ia32/ia32_syscall.c:218
#13 0xffffffff80a12bcf in int0x80_syscall_common ()
at /opt/src/git-src/sys/amd64/ia32/ia32_exception.S:77
#14 0x00000000ffffc710 in ?? ()
#15 0x00000000ffffc9d0 in ?? ()
#16 0x000000002540b9ad in ?? ()
#17 0x000000002526104f in ?? ()
#18 0x0000000000000000 in ?? ()
(kgdb) frame 8
#8 0xffffffff80a5fef1 in ia32_get_fpcontext (td=0xfffffe00e468f3a0,
mcp=0xfffffe00e5237b10, xfpusave=0x0, xfpusave_len=0x0)
at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:107
107 *xfpusave_len = mcp->mc_xfpustate_len =
(kgdb) l
102 if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) {
103 *xfpusave_len = 0;
104 *xfpusave = NULL;
105 } else {
106 mcp->mc_flags |= _MC_IA32_HASFPXSTATE;
107 *xfpusave_len = mcp->mc_xfpustate_len =
108 cpu_max_ext_state_size - sizeof(struct savefpu);
109 *xfpusave = (char *)(get_pcb_user_save_td(td) + 1);
110 }
111 }
(kgdb) disassemble /m
Dump of assembler code for function ia32_get_mcontext:
98 mcp->mc_ownedfp = fpugetregs(td);
0xffffffff80a5fe9d <+301>: mov %r14,%rdi
0xffffffff80a5fea0 <+304>: call 0xffffffff80a16ca0 <fpugetregs>
0xffffffff80a5fea5 <+309>: mov %eax,0x58(%rbx)
99 bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
0xffffffff80a5fea8 <+312>: lea 0x60(%rbx),%r15
0xffffffff80a5feac <+316>: mov %r14,%rdi
0xffffffff80a5feaf <+319>: call 0xffffffff80a3bbc0
<get_pcb_user_save_td>
0xffffffff80a5feb4 <+324>: mov $0x200,%edx
0xffffffff80a5feb9 <+329>: mov %r15,%rdi
0xffffffff80a5febc <+332>: mov %rax,%rsi
0xffffffff80a5febf <+335>: call 0xffffffff80a35d20 <memmove_std>
100 sizeof(mcp->mc_fpstate));
101 mcp->mc_fpformat = fpuformat();
0xffffffff80a5fec4 <+340>: call 0xffffffff80a169e0 <fpuformat>
0xffffffff80a5fec9 <+345>: mov %eax,0x54(%rbx)
102 if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) {
0xffffffff80a5fecc <+348>: cmpl $0x0,0x53c73d(%rip) #
0xffffffff80f9c610 <use_xsave>
0xffffffff80a5fed3 <+355>: je 0xffffffff80a5fef1
<ia32_get_mcontext+385>
0xffffffff80a5fed5 <+357>: mov 0x5274bd(%rip),%eax #
0xffffffff80f87398 <cpu_max_ext_state_size>
0xffffffff80a5fedb <+363>: cmp $0x200,%eax
0xffffffff80a5fee0 <+368>: jbe 0xffffffff80a5fef1
<ia32_get_mcontext+385>
103 *xfpusave_len = 0;
104 *xfpusave = NULL;
105 } else {
106 mcp->mc_flags |= _MC_IA32_HASFPXSTATE;
0xffffffff80a5fee2 <+370>: orb $0x4,0x5c(%rbx)
107 *xfpusave_len = mcp->mc_xfpustate_len =
0xffffffff80a5feeb <+379>: mov %eax,0x26c(%rbx)
=> 0xffffffff80a5fef1 <+385>: ud2
0xffffffff80a5fef3 <+387>: xor %eax,%eax
0xffffffff80a5fef5 <+389>: mov $0x140,%edi
108 cpu_max_ext_state_size - sizeof(struct savefpu);
0xffffffff80a5fee6 <+374>: add $0xfffffe00,%eax
109 *xfpusave = (char *)(get_pcb_user_save_td(td) + 1);
110 }
111 }
112
113 static int
--Type <RET> for more, q to quit, c to continue without paging--q
> + cpu_max_ext_state_size - sizeof(struct savefpu);
> + *xfpusave = (char *)(get_pcb_user_save_td(td) + 1);
> }
> - mcp->mc_flags |= _MC_IA32_HASFPXSTATE;
> - mcp->mc_xfpustate_len = len;
> - bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
> }
>
> static int
> @@ -210,14 +206,17 @@ ia32_set_mcontext(struct thread *td, struct ia32_mconte
> xt *mcp)
> if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
> sizeof(struct savefpu))
> return (EINVAL);
> - xfpustate = (char *)td->td_md.md_fpu_scratch;
> + xfpustate = (char *)fpu_save_area_alloc();
> ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
> mcp->mc_xfpustate_len);
> - if (ret != 0)
> + if (ret != 0) {
> + fpu_save_area_free((struct savefpu *)xfpustate);
> return (ret);
> + }
> } else
> xfpustate = NULL;
> ret = ia32_set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
> + fpu_save_area_free((struct savefpu *)xfpustate);
> if (ret != 0)
> return (ret);
> tp->tf_gs = mcp->mc_gs;
> @@ -577,14 +576,6 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *m
> ask)
> regs = td->td_frame;
> oonstack = sigonstack(regs->tf_rsp);
>
> - if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
> - xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
> - xfpusave = (char *)td->td_md.md_fpu_scratch;
> - } else {
> - xfpusave_len = 0;
> - xfpusave = NULL;
> - }
> -
> /* Save user context. */
> bzero(&sf, sizeof(sf));
> sf.sf_uc.uc_sigmask = *mask;
> @@ -613,7 +604,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *ma
> sk)
> sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
> sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
> sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
> - ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
> + ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, &xfpusave, &xfpusave_len)
> ;
> fpstate_drop(td);
> sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase;
> sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase;
> @@ -882,10 +873,11 @@ freebsd32_sigreturn(td, uap)
> td->td_proc->p_pid, td->td_name, xfpustate_len);
> return (EINVAL);
> }
> - xfpustate = (char *)td->td_md.md_fpu_scratch;
> + xfpustate = (char *)fpu_save_area_alloc();
> error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
> xfpustate, xfpustate_len);
> if (error != 0) {
> + fpu_save_area_free((struct savefpu *)xfpustate);
> uprintf(
> "pid %d (%s): sigreturn copying xfpustate failed\n",
> td->td_proc->p_pid, td->td_name);
> @@ -897,6 +889,7 @@ freebsd32_sigreturn(td, uap)
> }
> ret = ia32_set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
> xfpustate_len);
> + fpu_save_area_free((struct savefpu *)xfpustate);
> if (ret != 0) {
> uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
> td->td_proc->p_pid, td->td_name, ret);
> diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
> index bd07f70f8d44..94ac69d31e65 100644
> --- a/sys/amd64/include/proc.h
> +++ b/sys/amd64/include/proc.h
> @@ -76,7 +76,6 @@ struct mdthread {
> struct pcb md_pcb;
> vm_offset_t md_stack_base;
> struct savefpu *md_usr_fpu_save;
> - struct savefpu *md_fpu_scratch;
> };
>
> struct mdproc {
> diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
> index 62f939406374..65c5cc65c87e 100644
> --- a/sys/kern/kern_thread.c
> +++ b/sys/kern/kern_thread.c
> @@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110,
> "struct thread KBI td_pflags");
> _Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
> "struct thread KBI td_frame");
> -_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
> +_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
> "struct thread KBI td_emuldata");
> _Static_assert(offsetof(struct proc, p_flag) == 0xb8,
> "struct proc KBI p_flag");
>
--
Cheers,
Cy Schubert <Cy.Schubert at cschubert.com>
FreeBSD UNIX: <cy at FreeBSD.org> Web: https://FreeBSD.org
NTP: <cy at nwtime.org> Web: https://nwtime.org
The need of the many outweighs the greed of the few.
More information about the dev-commits-src-all
mailing list