[PATCH] hypotl, cabsl, and code removal in cabs
Bruce Evans
brde at optusnet.com.au
Mon Nov 5 12:14:54 PST 2007
On Mon, 5 Nov 2007, Bruce Evans wrote:
> I found only 1 serious bug n your hypotl(): adding n or m back to the
> exponent gives garbage if the resulting exponent doesn't fit. Fix for
> the float precision case:
>
> % if (n >= m) {
> ...
Full patch to convert to float precision and fix some bugs:
% --- z~ Mon Nov 5 23:40:51 2007
% +++ z Mon Nov 5 23:41:32 2007
% @@ -27,6 +27,6 @@
% /*
% * Compute hypotl(x, y) = sqrt(x**2 + y**2) such that underflow and
% - * overflow are avoided. To accomplishe the task, write x = a * 2**n
% - * and y = b * 2**m, one then has
% + * overflow are avoided. To accomplish the task, write x = a * 2**n
% + * and y = b * 2**m; one then has
% *
% * hypotl(x, y) = 2**n * sqrt(a**2 + b**(2*(m - n))) for n >= m
I didn't converrt the comments to float precision.
% @@ -42,9 +42,10 @@
% * hypotl(NaN, NaN) = hypotl(NaN, NaN) = Inf
% */
% +
% #include "math.h"
% #include "math_private.h"
% #include "fpmath.h"
%
% -#define ZERO 0.L
% +static const float ZERO = 0.0;
%
% /*
ZERO must be a non-literal constant to prevent evaluation of 1.0/0.0
at runtime. But due to compiler bugs/lack of support for C99/fenv,
the above is probably no different.
% @@ -53,12 +54,21 @@
% * function sqrtl() function exists.
% */
% -#define PREC 32
% +#define PREC 12 /* XXX 6 works, but why isn't > 12 needed? */
% +
% +union IEEEfbits {
% + float e;
% + struct {
% + unsigned int man :23;
% + unsigned int exp :8;
% + unsigned int sign :1;
% + } bits;
% +};
I forgot that there was a suitable struct IEEEf2bits already defined in
fpmath.h.
%
% -long double
% -hypotl(long double x, long double y)
% +float
% +myhypotf(float x, float y)
% {
% - union IEEEl2bits a, b;
% + union IEEEfbits a, b;
% + float t;
% int m, n;
% - long double val;
%
% a.e = x;
val is not used.
% @@ -68,6 +78,8 @@
%
% /* If x = 0 or y = 0, then hypotl(x, y) = |x| + |y|. */
% - if (!(a.bits.manh | a.bits.manl) || !(b.bits.manh | b.bits.manl))
% + if ((a.bits.exp == 0 && a.bits.man == 0) ||
% + (b.bits.exp == 0 && b.bits.man == 0))
% return (a.e + b.e);
% +
% /*
% * If x = +-Inf or y = +- Inf, then hypotl(x, y) = Inf (even if the
% @@ -75,11 +87,31 @@
% * argument is not +- Inf, then hypotl(x, y) = NaN.
% */
% - if (a.bits.exp == 32767 || b.bits.exp == 32767) {
% - mask_nbit_l(a);
% - mask_nbit_l(b);
% - if (!(a.bits.manh | a.bits.manl) ||
% - !(b.bits.manh | b.bits.manl))
% - return (1.L / ZERO);
% - return ((x - x) / (x - x));
% + if (a.bits.exp == 0xff || b.bits.exp == 0xff) {
% + if ((a.bits.exp == 0xff && a.bits.man == 0) ||
% + (b.bits.exp == 0xff && b.bits.man == 0))
% + return (1.0F / ZERO);
% + /*
% + * Here we do extra work to return the same NaN as fdlibm
% + * on i386's, so that simple binary comparisons work. If
% + * both a.e and b.e are NaNs, we want to return the one
% + * that is highest as an integer. The height depends on
% + * whether the hardware has set the qNaN bit, and whether
% + * the hardware gets a chance to do that depends on the
% + * optimization level and on whether our comparison function
% + * has higher precision, since passing floats in integer
% + * registers prevents quieting and converting floats to
% + * higher precision uses the hardware.
% + */
% + if (a.bits.exp == 0xff && b.bits.exp == 0xff) {
% +#if 1 /* if call to comparison function will quieten */
% + if (a.bits.man != 0)
% + a.bits.man |= 0x00400000; /* quieten */
% + if (b.bits.man != 0)
% + b.bits.man |= 0x00400000;
% +#endif
% + if (a.bits.man < b.bits.man)
% + return (b.e + a.e);
% + }
% + return (a.e + b.e);
% }
%
Return the same value as fdlibm for testing.
We should at least return some combination of both x and y in the NaN case,
not just ((x - x) / (x - x)) when only y is a NaN, since when x is not a
NaN the latter is unrelated to the only NaN arg.
fdlibm sets the signs to 0 early similarly, so it tends to convert negative
NaNs to positive ones in a way that the hardware would not do.
Hacking on values using bit-fields or SET_*() tends to allow changes to NaNs
that the hardware would not do.
The above was written mainly on an A64 in i386 mode. In amd64 mode, I
think loading sNaNs doesn't quieten them, so there would be even more
binary differences.
% @@ -90,16 +122,15 @@
% */
% if (a.bits.exp == 0) {
% - a.e *= 0x1.0p514;
% - n = a.bits.exp - 0x4200;
% + a.e *= 0x1.0p25;
% + n = a.bits.exp - 0x97;
% } else
% - n = a.bits.exp - 0x3ffe;
% - a.bits.exp = 0x3ffe;
% -
% + n = a.bits.exp - 0x7e;
% + a.bits.exp = 0x7e;
% if (b.bits.exp == 0) {
% - b.e *= 0x1.0p514;
% - m = b.bits.exp - 0x4200;
% + b.e *= 0x1.0p25;
% + m = b.bits.exp - 0x97;
% } else
% - m = b.bits.exp - 0x3ffe;
% - b.bits.exp = 0x3ffe;
% + m = b.bits.exp - 0x7e;
% + b.bits.exp = 0x7e;
%
% if (n >= m) {
I'm not sure why this uses 514. I first tried converting 514 to 34, to
get a nice round number instead of 0x97. This didn't work, but the
problems may have been just the rescaling bugs later.
fdlibm normalizes to a non-constant exponent. This would be more complicated
here but simpler later.
% @@ -110,6 +141,11 @@
% a.e += b.e * b.e;
% }
% - a.e = sqrtl(a.e);
% - a.bits.exp += n;
% + a.e = sqrtf(a.e);
% + if (a.bits.exp + n <= 0 || a.bits.exp + n >= 0xff) {
% + a.bits.exp += n - n / 2;
% + SET_FLOAT_WORD(t, 0x3f800000 + ((n / 2) << 23));
% + a.e *= t;
% + } else
% + a.bits.exp += n;
% return (a.e);
% } else {
% @@ -120,6 +156,11 @@
% b.e += a.e * a.e;
% }
% - b.e = sqrtl(b.e);
% - b.bits.exp += m;
% + b.e = sqrtf(b.e);
% + if (b.bits.exp + m <= 0 || b.bits.exp + m >= 0xff) {
% + b.bits.exp += m - m / 2;
% + SET_FLOAT_WORD(t, 0x3f800000 + ((m / 2) << 23));
% + b.e *= t;
% + } else
% + b.bits.exp += m;
% return (b.e);
% }
Fix rescaling.
Bruce
More information about the freebsd-standards
mailing list