svn commit: r213281 - head/lib/libc/amd64/gen
Jung-uk Kim
jkim at FreeBSD.org
Wed Sep 29 22:12:35 UTC 2010
On Wednesday 29 September 2010 05:20 pm, Dimitry Andric wrote:
> Author: dim
> Date: Wed Sep 29 21:20:29 2010
> New Revision: 213281
> URL: http://svn.freebsd.org/changeset/base/213281
>
> Log:
> Apply the same workaround for clang to amd64's version of ldexp.c
> (as in r212976): order the incoming arguments to fscale as st(0),
> st(1), and mark temp2 volatile (only in case of compilation with
> clang) to force clang to pop it correctly. No binary change when
> compiled with gcc.
Actually the binary slightly changes when compiled with gcc:
%diff -u ldexp-r1.14.c ldexp-r1.15.c
--- ldexp-r1.14.c 2010-09-29 17:44:45.000000000 -0400
+++ ldexp-r1.15.c 2010-09-29 17:45:10.000000000 -0400
@@ -34,7 +34,9 @@
static char sccsid[] = "@(#)ldexp.c 8.1 (Berkeley) 6/4/93";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/amd64/gen/ldexp.c,v 1.14 2007/01/09 00:38:24 imp Exp $");
+__FBSDID("$FreeBSD: src/lib/libc/amd64/gen/ldexp.c,v 1.15 2010/09/29 21:20:29 dim Exp $");
+
+#include <math.h>
/*
* ldexp(value, exp): return value * (2 ** exp).
@@ -49,12 +51,16 @@
double
ldexp (double value, int exp)
{
- double temp, texp, temp2;
+ double temp, texp;
+#ifdef __clang__
+ volatile
+#endif
+ double temp2;
texp = exp;
#ifdef __GNUC__
__asm ("fscale "
- : "=u" (temp2), "=t" (temp)
- : "0" (texp), "1" (value));
+ : "=t" (temp), "=u" (temp2)
+ : "0" (value), "1" (texp));
#else
#error unknown asm
#endif
%objdump -d ldexp-r1.14.o ldexp-r1.15.o
ldexp-r1.14.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <ldexp>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: f2 0f 11 45 d8 movsd %xmm0,0xffffffffffffffd8(%rbp)
9: 89 7d d4 mov %edi,0xffffffffffffffd4(%rbp)
c: f2 0f 2a 45 d4 cvtsi2sd 0xffffffffffffffd4(%rbp),%xmm0
11: f2 0f 11 45 f0 movsd %xmm0,0xfffffffffffffff0(%rbp)
16: dd 45 f0 fldl 0xfffffffffffffff0(%rbp)
19: dd 45 d8 fldl 0xffffffffffffffd8(%rbp)
1c: d9 fd fscale
1e: d9 c9 fxch %st(1)
20: dd 5d f8 fstpl 0xfffffffffffffff8(%rbp)
23: dd 5d e8 fstpl 0xffffffffffffffe8(%rbp)
26: 48 8b 45 e8 mov 0xffffffffffffffe8(%rbp),%rax
2a: 48 89 45 c8 mov %rax,0xffffffffffffffc8(%rbp)
2e: f2 0f 10 45 c8 movsd 0xffffffffffffffc8(%rbp),%xmm0
33: c9 leaveq
34: c3 retq
ldexp-r1.15.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <ldexp>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: f2 0f 11 45 d8 movsd %xmm0,0xffffffffffffffd8(%rbp)
9: 89 7d d4 mov %edi,0xffffffffffffffd4(%rbp)
c: f2 0f 2a 45 d4 cvtsi2sd 0xffffffffffffffd4(%rbp),%xmm0
11: f2 0f 11 45 f0 movsd %xmm0,0xfffffffffffffff0(%rbp)
16: dd 45 d8 fldl 0xffffffffffffffd8(%rbp)
19: dd 45 f0 fldl 0xfffffffffffffff0(%rbp)
1c: d9 c9 fxch %st(1)
1e: d9 fd fscale
20: dd 5d e8 fstpl 0xffffffffffffffe8(%rbp)
23: dd 5d f8 fstpl 0xfffffffffffffff8(%rbp)
26: 48 8b 45 e8 mov 0xffffffffffffffe8(%rbp),%rax
2a: 48 89 45 c8 mov %rax,0xffffffffffffffc8(%rbp)
2e: f2 0f 10 45 c8 movsd 0xffffffffffffffc8(%rbp),%xmm0
33: c9 leaveq
34: c3 retq
Note the new version place FXCH before FSCALE.
When it is compiled with -O2:
%objdump -d ldexp-r1.14.o ldexp-r1.15.o
ldexp-r1.14.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <ldexp>:
0: f2 0f 2a cf cvtsi2sd %edi,%xmm1
4: f2 0f 11 44 24 f0 movsd %xmm0,0xfffffffffffffff0(%rsp)
a: f2 0f 11 4c 24 f8 movsd %xmm1,0xfffffffffffffff8(%rsp)
10: dd 44 24 f8 fldl 0xfffffffffffffff8(%rsp)
14: dd 44 24 f0 fldl 0xfffffffffffffff0(%rsp)
18: d9 fd fscale
1a: dd d9 fstp %st(1)
1c: dd 5c 24 f0 fstpl 0xfffffffffffffff0(%rsp)
20: f2 0f 10 44 24 f0 movsd 0xfffffffffffffff0(%rsp),%xmm0
26: c3 retq
ldexp-r1.15.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <ldexp>:
0: f2 0f 2a cf cvtsi2sd %edi,%xmm1
4: f2 0f 11 44 24 f0 movsd %xmm0,0xfffffffffffffff0(%rsp)
a: dd 44 24 f0 fldl 0xfffffffffffffff0(%rsp)
e: f2 0f 11 4c 24 f8 movsd %xmm1,0xfffffffffffffff8(%rsp)
14: dd 44 24 f8 fldl 0xfffffffffffffff8(%rsp)
18: d9 c9 fxch %st(1)
1a: d9 fd fscale
1c: dd d9 fstp %st(1)
1e: dd 5c 24 f0 fstpl 0xfffffffffffffff0(%rsp)
22: f2 0f 10 44 24 f0 movsd 0xfffffffffffffff0(%rsp),%xmm0
28: c3 retq
With -O1 and above, the FXCH completely disappears from the old
version by rearranging stack operations, which is even more
interesting.
Don't get me wrong, both work fine. FYI, verified with this:
http://cvsweb.netbsd.org/bsdweb.cgi/src/regress/lib/libc/ldexp/
Old ldexp() with gcc: PASSED
Old ldexp() with clang: FAILED
New ldexp() with gcc: PASSED
New ldexp() with clang: PASSED
Jung-uk Kim
More information about the svn-src-head
mailing list