svn commit: r213281 - head/lib/libc/amd64/gen

Jung-uk Kim jkim at FreeBSD.org
Wed Sep 29 22:12:35 UTC 2010


On Wednesday 29 September 2010 05:20 pm, Dimitry Andric wrote:
> Author: dim
> Date: Wed Sep 29 21:20:29 2010
> New Revision: 213281
> URL: http://svn.freebsd.org/changeset/base/213281
>
> Log:
>   Apply the same workaround for clang to amd64's version of ldexp.c
> (as in r212976): order the incoming arguments to fscale as st(0),
> st(1), and mark temp2 volatile (only in case of compilation with
> clang) to force clang to pop it correctly.  No binary change when
> compiled with gcc.

Actually the binary slightly changes when compiled with gcc:

%diff -u ldexp-r1.14.c ldexp-r1.15.c
--- ldexp-r1.14.c       2010-09-29 17:44:45.000000000 -0400
+++ ldexp-r1.15.c       2010-09-29 17:45:10.000000000 -0400
@@ -34,7 +34,9 @@
 static char sccsid[] = "@(#)ldexp.c    8.1 (Berkeley) 6/4/93";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/amd64/gen/ldexp.c,v 1.14 2007/01/09 00:38:24 imp Exp $");
+__FBSDID("$FreeBSD: src/lib/libc/amd64/gen/ldexp.c,v 1.15 2010/09/29 21:20:29 dim Exp $");
+
+#include <math.h>
 
 /*
  * ldexp(value, exp): return value * (2 ** exp).
@@ -49,12 +51,16 @@
 double
 ldexp (double value, int exp)
 {
-       double temp, texp, temp2;
+       double temp, texp;
+#ifdef __clang__
+       volatile
+#endif
+       double temp2;
        texp = exp;
 #ifdef __GNUC__
        __asm ("fscale "
-               : "=u" (temp2), "=t" (temp)
-               : "0" (texp), "1" (value));
+               : "=t" (temp), "=u" (temp2)
+               : "0" (value), "1" (texp));
 #else
 #error unknown asm
 #endif
%objdump -d ldexp-r1.14.o ldexp-r1.15.o

ldexp-r1.14.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <ldexp>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   f2 0f 11 45 d8          movsd  %xmm0,0xffffffffffffffd8(%rbp)
   9:   89 7d d4                mov    %edi,0xffffffffffffffd4(%rbp)
   c:   f2 0f 2a 45 d4          cvtsi2sd 0xffffffffffffffd4(%rbp),%xmm0
  11:   f2 0f 11 45 f0          movsd  %xmm0,0xfffffffffffffff0(%rbp)
  16:   dd 45 f0                fldl   0xfffffffffffffff0(%rbp)
  19:   dd 45 d8                fldl   0xffffffffffffffd8(%rbp)
  1c:   d9 fd                   fscale 
  1e:   d9 c9                   fxch   %st(1)
  20:   dd 5d f8                fstpl  0xfffffffffffffff8(%rbp)
  23:   dd 5d e8                fstpl  0xffffffffffffffe8(%rbp)
  26:   48 8b 45 e8             mov    0xffffffffffffffe8(%rbp),%rax
  2a:   48 89 45 c8             mov    %rax,0xffffffffffffffc8(%rbp)
  2e:   f2 0f 10 45 c8          movsd  0xffffffffffffffc8(%rbp),%xmm0
  33:   c9                      leaveq 
  34:   c3                      retq   

ldexp-r1.15.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <ldexp>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   f2 0f 11 45 d8          movsd  %xmm0,0xffffffffffffffd8(%rbp)
   9:   89 7d d4                mov    %edi,0xffffffffffffffd4(%rbp)
   c:   f2 0f 2a 45 d4          cvtsi2sd 0xffffffffffffffd4(%rbp),%xmm0
  11:   f2 0f 11 45 f0          movsd  %xmm0,0xfffffffffffffff0(%rbp)
  16:   dd 45 d8                fldl   0xffffffffffffffd8(%rbp)
  19:   dd 45 f0                fldl   0xfffffffffffffff0(%rbp)
  1c:   d9 c9                   fxch   %st(1)
  1e:   d9 fd                   fscale 
  20:   dd 5d e8                fstpl  0xffffffffffffffe8(%rbp)
  23:   dd 5d f8                fstpl  0xfffffffffffffff8(%rbp)
  26:   48 8b 45 e8             mov    0xffffffffffffffe8(%rbp),%rax
  2a:   48 89 45 c8             mov    %rax,0xffffffffffffffc8(%rbp)
  2e:   f2 0f 10 45 c8          movsd  0xffffffffffffffc8(%rbp),%xmm0
  33:   c9                      leaveq 
  34:   c3                      retq   

Note the new version place FXCH before FSCALE.

When it is compiled with -O2:

%objdump -d ldexp-r1.14.o ldexp-r1.15.o

ldexp-r1.14.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <ldexp>:
   0:   f2 0f 2a cf             cvtsi2sd %edi,%xmm1
   4:   f2 0f 11 44 24 f0       movsd  %xmm0,0xfffffffffffffff0(%rsp)
   a:   f2 0f 11 4c 24 f8       movsd  %xmm1,0xfffffffffffffff8(%rsp)
  10:   dd 44 24 f8             fldl   0xfffffffffffffff8(%rsp)
  14:   dd 44 24 f0             fldl   0xfffffffffffffff0(%rsp)
  18:   d9 fd                   fscale 
  1a:   dd d9                   fstp   %st(1)
  1c:   dd 5c 24 f0             fstpl  0xfffffffffffffff0(%rsp)
  20:   f2 0f 10 44 24 f0       movsd  0xfffffffffffffff0(%rsp),%xmm0
  26:   c3                      retq   

ldexp-r1.15.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <ldexp>:
   0:   f2 0f 2a cf             cvtsi2sd %edi,%xmm1
   4:   f2 0f 11 44 24 f0       movsd  %xmm0,0xfffffffffffffff0(%rsp)
   a:   dd 44 24 f0             fldl   0xfffffffffffffff0(%rsp)
   e:   f2 0f 11 4c 24 f8       movsd  %xmm1,0xfffffffffffffff8(%rsp)
  14:   dd 44 24 f8             fldl   0xfffffffffffffff8(%rsp)
  18:   d9 c9                   fxch   %st(1)
  1a:   d9 fd                   fscale 
  1c:   dd d9                   fstp   %st(1)
  1e:   dd 5c 24 f0             fstpl  0xfffffffffffffff0(%rsp)
  22:   f2 0f 10 44 24 f0       movsd  0xfffffffffffffff0(%rsp),%xmm0
  28:   c3                      retq   

With -O1 and above, the FXCH completely disappears from the old
version by rearranging stack operations, which is even more
interesting.

Don't get me wrong, both work fine.  FYI, verified with this:

http://cvsweb.netbsd.org/bsdweb.cgi/src/regress/lib/libc/ldexp/

Old ldexp() with gcc:		PASSED
Old ldexp() with clang:		FAILED
New ldexp() with gcc:		PASSED
New ldexp() with clang:		PASSED

Jung-uk Kim


More information about the svn-src-all mailing list