[PATCH] hypotl, cabsl, and code removal in cabs

Mon Nov 5 12:14:54 PST 2007

On Mon, 5 Nov 2007, Bruce Evans wrote:

> I found only 1 serious bug n your hypotl(): adding n or m back to the
> exponent gives garbage if the resulting exponent doesn't fit.  Fix for
> the float precision case:
>
> % 	if (n >= m) {
> ...

Full patch to convert to float precision and fix some bugs:

% --- z~	Mon Nov  5 23:40:51 2007
% +++ z	Mon Nov  5 23:41:32 2007
% @@ -27,6 +27,6 @@
%  /*
%   * Compute hypotl(x, y) = sqrt(x**2 + y**2) such that underflow and 
% - * overflow are avoided.  To accomplishe the task, write x = a * 2**n
% - * and y = b * 2**m, one then has
% + * overflow are avoided.  To accomplish the task, write x = a * 2**n
% + * and y = b * 2**m; one then has
%   *
%   *   hypotl(x, y) = 2**n * sqrt(a**2 + b**(2*(m - n))) for n >= m

I didn't converrt the comments to float precision.

% @@ -42,9 +42,10 @@
%   *   hypotl(NaN, NaN) = hypotl(NaN, NaN) = Inf
%   */
% +
%  #include "math.h"
%  #include "math_private.h"
%  #include "fpmath.h"
% 
% -#define ZERO	0.L
% +static const float ZERO = 0.0;
% 
%  /*

ZERO must be a non-literal constant to prevent evaluation of 1.0/0.0
at runtime.  But due to compiler bugs/lack of support for C99/fenv,
the above is probably no different.

% @@ -53,12 +54,21 @@
%   * function sqrtl() function exists.
%   */
% -#define PREC	32
% +#define PREC	12		/* XXX 6 works, but why isn't > 12 needed? */
% +
% +union IEEEfbits {
% +	float	e;
% +	struct {
% +		unsigned int	man	:23;
% +		unsigned int	exp	:8;
% +		unsigned int	sign	:1;
% +	} bits;
% +};

I forgot that there was a suitable struct IEEEf2bits already defined in
fpmath.h.

% 
% -long double
% -hypotl(long double x, long double y)
% +float
% +myhypotf(float x, float y)
%  {
% -	union IEEEl2bits a, b;
% +	union IEEEfbits a, b;
% +	float t;
%  	int m, n;
% -	long double val;
% 
%  	a.e = x;

val is not used.

% @@ -68,6 +78,8 @@
% 
%  	/* If x = 0 or y = 0, then hypotl(x, y) = |x| + |y|. */
% -	if (!(a.bits.manh | a.bits.manl) || !(b.bits.manh | b.bits.manl))
% +	if ((a.bits.exp == 0 && a.bits.man == 0) ||
% +	    (b.bits.exp == 0 && b.bits.man == 0))
%  		return (a.e + b.e);
% +
%  	/*
%  	 * If x = +-Inf or y = +- Inf, then hypotl(x, y) = Inf (even if the
% @@ -75,11 +87,31 @@
%  	 * argument is not +- Inf, then hypotl(x, y) = NaN.
%  	 */
% -	if (a.bits.exp == 32767 || b.bits.exp == 32767) {
% -		mask_nbit_l(a);
% -		mask_nbit_l(b);
% -		if (!(a.bits.manh | a.bits.manl) ||
% -		    !(b.bits.manh | b.bits.manl))
% -			return (1.L / ZERO);
% -		return ((x - x) / (x - x));
% +	if (a.bits.exp == 0xff || b.bits.exp == 0xff) {
% +		if ((a.bits.exp == 0xff && a.bits.man == 0) ||
% +		    (b.bits.exp == 0xff && b.bits.man == 0))
% +			return (1.0F / ZERO);
% +		/*
% +		 * Here we do extra work to return the same NaN as fdlibm
% +		 * on i386's, so that simple binary comparisons work.  If
% +		 * both a.e and b.e are NaNs, we want to return the one
% +		 * that is highest as an integer.  The height depends on
% +		 * whether the hardware has set the qNaN bit, and whether
% +		 * the hardware gets a chance to do that depends on the
% +		 * optimization level and on whether our comparison function
% +		 * has higher precision, since passing floats in integer
% +		 * registers prevents quieting and converting floats to
% +		 * higher precision uses the hardware.
% +		 */
% +		if (a.bits.exp == 0xff && b.bits.exp == 0xff) {
% +#if 1 /* if call to comparison function will quieten */
% +			if (a.bits.man != 0)
% +				a.bits.man |= 0x00400000;	/* quieten */
% +			if (b.bits.man != 0)
% +				b.bits.man |= 0x00400000;
% +#endif
% +			if (a.bits.man < b.bits.man)
% +			    return (b.e + a.e);
% +		}
% +		return (a.e + b.e);
%  	}
%

Return the same value as fdlibm for testing.

We should at least return some combination of both x and y in the NaN case,
not just ((x - x) / (x - x)) when only y is a NaN, since when x is not a
NaN the latter is unrelated to the only NaN arg.

fdlibm sets the signs to 0 early similarly, so it tends to convert negative
NaNs to positive ones in a way that the hardware would not do.

Hacking on values using bit-fields or SET_*() tends to allow changes to NaNs
that the hardware would not do.

The above was written mainly on an A64 in i386 mode.  In amd64 mode, I
think loading sNaNs doesn't quieten them, so there would be even more
binary differences.

% @@ -90,16 +122,15 @@
%  	 */
%  	if (a.bits.exp == 0) {
% -		a.e *= 0x1.0p514;
% -		n = a.bits.exp - 0x4200;
% +		a.e *= 0x1.0p25;
% +		n = a.bits.exp - 0x97;
%  	} else
% -		n = a.bits.exp - 0x3ffe;
% -	a.bits.exp = 0x3ffe;
% -
% +		n = a.bits.exp - 0x7e;
% +	a.bits.exp = 0x7e;
%  	if (b.bits.exp == 0) {
% -		b.e *= 0x1.0p514;
% -		m = b.bits.exp - 0x4200;
% +		b.e *= 0x1.0p25;
% +		m = b.bits.exp - 0x97;
%  	} else
% -		m = b.bits.exp - 0x3ffe;
% -	b.bits.exp = 0x3ffe;
% +		m = b.bits.exp - 0x7e;
% +	b.bits.exp = 0x7e;
% 
%  	if (n >= m) {

I'm not sure why this uses 514.  I first tried converting 514 to 34, to
get a nice round number instead of 0x97.  This didn't work, but the
problems may have been just the rescaling bugs later.

fdlibm normalizes to a non-constant exponent.  This would be more complicated
here but simpler later.

% @@ -110,6 +141,11 @@
%  			a.e += b.e * b.e;
%  		}
% -		a.e = sqrtl(a.e);
% -		a.bits.exp += n;
% +		a.e = sqrtf(a.e);
% +		if (a.bits.exp + n <= 0 || a.bits.exp + n >= 0xff) {
% +			a.bits.exp += n - n / 2;
% +			SET_FLOAT_WORD(t, 0x3f800000 + ((n / 2) << 23));
% +			a.e *= t;
% +		} else
% +			a.bits.exp += n;
%  		return (a.e);
%  	} else {
% @@ -120,6 +156,11 @@
%  			b.e += a.e * a.e;
%  		}
% -		b.e = sqrtl(b.e);
% -		b.bits.exp += m;
% +		b.e = sqrtf(b.e);
% +		if (b.bits.exp + m <= 0 || b.bits.exp + m >= 0xff) {
% +			b.bits.exp += m - m / 2;
% +			SET_FLOAT_WORD(t, 0x3f800000 + ((m / 2) << 23));
% +			b.e *= t;
% +		} else
% +			b.bits.exp += m;
%  		return (b.e);
%  	}

Fix rescaling.

Bruce