Re: git: 888796ade284 - main - libm: fma: correct zero sign with small inputs

From: Steve Kargl <kargls_at_comcast.net>
Date: Tue, 11 Jun 2024 02:57:52 UTC

On 6/10/24 16:31, Ryan Libby wrote:
> On Sat, Jun 8, 2024 at 8:56 AM Ed Maste <emaste@freebsd.org> wrote:
>> The branch main has been updated by emaste:
>>
>> URL: https://cgit.FreeBSD.org/src/commit/?id=888796ade2842486d3167067e8034254c38aadd3
>>
>> commit 888796ade2842486d3167067e8034254c38aadd3
>> Author:     Ed Maste <emaste@FreeBSD.org>
>> AuthorDate: 2024-03-19 14:31:39 +0000
>> Commit:     Ed Maste <emaste@FreeBSD.org>
>> CommitDate: 2024-06-08 15:55:36 +0000
>>
>>      libm: fma: correct zero sign with small inputs
>>
>>      PR:             277783
>>      Reported by:    Victor Stinner
>>      Submitted by:   kargl
>>      MFC after:      1 week
>>      Differential Revision: https://reviews.freebsd.org/D44433
>> ---
>>   lib/msun/src/s_fma.c  | 4 +++-
>>   lib/msun/src/s_fmal.c | 4 +++-
>>   2 files changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/msun/src/s_fma.c b/lib/msun/src/s_fma.c
>> index b8a342646d85..4d08b40cc71a 100644
>> --- a/lib/msun/src/s_fma.c
>> +++ b/lib/msun/src/s_fma.c
>> @@ -267,7 +267,9 @@ fma(double x, double y, double z)
>>                   */
>>                  fesetround(oround);
>>                  volatile double vzs = zs; /* XXX gcc CSE bug workaround */
>> -               return (xy.hi + vzs + ldexp(xy.lo, spread));
>> +               xs = ldexp(xy.lo, spread);
>> +               xy.hi += vzs;
>> +               return (xy.hi == 0 ? xs : xy.hi + xs);
>>          }
>>
>>          if (oround != FE_TONEAREST) {
>> diff --git a/lib/msun/src/s_fmal.c b/lib/msun/src/s_fmal.c
>> index 3d333632127c..12f9c364670b 100644
>> --- a/lib/msun/src/s_fmal.c
>> +++ b/lib/msun/src/s_fmal.c
>> @@ -248,7 +248,9 @@ fmal(long double x, long double y, long double z)
>>                   */
>>                  fesetround(oround);
>>                  volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
>> -               return (xy.hi + vzs + ldexpl(xy.lo, spread));
>> +               xs = ldexpl(xy.lo, spread);
>> +               xy.hi += vzs;
>> +               return (xy.hi == 0 ? xs : xy.hi + xs);
>>          }
>>
>>          if (oround != FE_TONEAREST) {
> This seems to have caused the lib/msun/fma_tests:zeroes test to fail in
> the FE_ROUNDDOWN mode on amd64, now finding 0 while expecting -0.  I
> don't know if the test is wrong or too strict, or if the new result is
> wrong.
>
> https://ci.freebsd.org/job/FreeBSD-main-amd64-test/25249/testReport/junit/lib.msun/fma_test/zeroes/
>
> Reproduces with
> kyua debug -k /usr/tests/Kyuafile lib/msun/fma_test:zeroes

The test is unreadable except by the original author.

I'll note that there is a comment in fma_test.c

/*
  * This is needed because clang constant-folds fma in ways that are 
incorrect
  * in rounding modes other than FE_TONEAREST.
  */
static volatile double one = 1.0;

and the failing tests are

     testall(-one, one, one, rd ? -0.0 : 0.0, ALL_STD_EXCEPT, 0);
     testall(one, -one, one, rd ? -0.0 : 0.0, ALL_STD_EXCEPT, 0);
     testall(-one, -one, -one, rd ? -0.0 : 0.0, ALL_STD_EXCEPT, 0);

If one compares against MPFR,

    fesetround(FE_TONEAREST);
    dx = -1;
    dy = dz = 1;
    mpfr_fma(a, x, y, z, MPFR_RNDN);
    mpfr_printf("% Re % e\n", a, fma(dx, dy, dz));

    fesetround(FE_DOWNWARD);
    mpfr_fma(a, x, y, z, MPFR_RNDD);
    mpfr_printf("% Re % e\n", a, fma(dx, dy, dz));

    fesetround(FE_UPWARD);
    mpfr_fma(a, x, y, z, MPFR_RNDU);
    mpfr_printf("% Re % e\n", a, fma(dx, dy, dz));

    fesetround(FE_TOWARDZERO);
    mpfr_fma(a, x, y, z, MPFR_RNDZ);
    mpfr_printf("% Re % e\n", a, fma(dx, dy, dz));


% gcc13 -fno-builtin -o z a.c -I/usr/local/include -L/usr/local/lib 
-lmpfr -lgmp -lm
% ./z
  0.0000000000000000e+00  0.000000e+00
-0.0000000000000000e+00 -0.000000e+00
  0.0000000000000000e+00  0.000000e+00
  0.0000000000000000e+00  0.000000e+00

  0.0000000000000000e+00  0.000000e+00
-0.0000000000000000e+00 -0.000000e+00
  0.0000000000000000e+00  0.000000e+00
  0.0000000000000000e+00  0.000000e+00

  0.0000000000000000e+00  0.000000e+00
-0.0000000000000000e+00 -0.000000e+00
  0.0000000000000000e+00  0.000000e+00
  0.0000000000000000e+00  0.000000e+00

cc gives a similar result.

-- 
steve