Complex arg-trig functions

Bruce Evans brde at optusnet.com.au
Fri Sep 21 23:14:08 UTC 2012


On Fri, 21 Sep 2012, Stephen Montgomery-Smith wrote:

> On 09/21/2012 02:25 PM, Stephen Montgomery-Smith wrote:
>> On 09/21/2012 02:05 PM, Bruce Evans wrote:
>>> On Fri, 21 Sep 2012, Stephen Montgomery-Smith wrote:
>> 
>>> - decide whether my old change to remove unnecessary accuracy for the
>>>    case where ax == 1, ay < FLT_EPSILON in catanh() is correct (you
>>>    didn't accept it, and maybe other accuracy changes make it extra
>>>    accuracy more interesting).
>> 
>> Or maybe I missed it.
>
> When you send me changes to catrigf.c, I translate it to catrig.c (the double 
> version), and then convert it back to catrigf.c.  So sometimes I miss things.

This time I merged everything into catrig.c and even ran the conversion
scripts to check this.  I keep forgetting to add or remove f suffixes
when manually converting.

Patches tomorrow.  Well, the main new one now, for all 3 files since
part of it has lots of magic numbers which are not handled by the
conversion scripts.

% diff -u2 catrig.c~ catrig.c
% --- catrig.c~	2012-09-21 15:51:00.000000000 +0000
% +++ catrig.c	2012-09-21 21:40:34.521926000 +0000
% @@ -206,6 +217,6 @@
%  		 */
%  		*B_is_usable = 0;
% -		*sqrt_A2my2 = scalbn(A, DBL_MANT_DIG);
% -		*new_y = scalbn(y, DBL_MANT_DIG);
% +		*sqrt_A2my2 = A * (2 / DBL_EPSILON);
% +		*new_y= y * (2 / DBL_EPSILON);
%  		return;
%  	}
% @@ -244,6 +255,7 @@
%  			 * scaling should avoid any underflow problems.
%  			 */
% -			*sqrt_A2my2 = scalbn(x, 2*DBL_MANT_DIG) * y / sqrt((y+1)*(y-1));
% -			*new_y = scalbn(y, 2*DBL_MANT_DIG);
% +			*sqrt_A2my2 = x * (4/DBL_EPSILON/DBL_EPSILON) * y /
% +			    sqrt((y+1)*(y-1));
% +			*new_y = y * (4/DBL_EPSILON/DBL_EPSILON);
%  		} else /* if (y < 1) */ {
%  			/*

It's easy to eliminiate these scalbn()s, since the values are constant.

scalbn() is a builtin in gcc-4.2 but not in gcc-3.3, and in 4.2 the
builtin just calls the extern function.  Here the constant values
could be calculated at compile time, but gcc doesn't do this.  I
think clang does.

The conversion script handles this fine.

% @@ -501,29 +519,40 @@
%  /*
%   * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
% - * Assumes x and y are positive or zero, and one of x and y is larger than
% + * Assumes x and y are not NaN, and one of x and y is larger than
%   * RECIP_EPSILON.  We avoid unwarranted underflow.  It is important to not use

The old version was passed positive x and y, but didn't depend on this.
The caller then had to fix up the sign.

This version is passed x and y with their original signs.  The sign is
handled automatically by expressions in the function, and the caller
doesn't fix it up.

%   * the code creal(1/z), because the imaginary part may produce an unwanted
%   * underflow.
% + * This is only called in a context where inexact is always raised before
% + * the call, so no effort is made to avoid or force inexact.
%   */
%  inline static double
%  real_part_reciprocal(double x, double y)
%  {
% +	double scale;
% +	uint32_t hx, hy;
% +	int32_t ix, iy;
% +
%  	/*
%  	 * This code is inspired by the C99 document n1124.pdf, Section G.5.1,
%  	 * example 2.
%  	 */
% -	int ex, ey;
% -
% -	if (isinf(x) || isinf(y))
% -		return (0);
% -	if (y == 0) return (1/x);
% -	if (x == 0) return (x/y/y);
% -	ex = ilogb(x);
% -	ey = ilogb(y);
% -	if (ex - ey >= DBL_MANT_DIG) return (1/x);
% -	if (ey - ex >= DBL_MANT_DIG) return (x/y/y);
% -	x = scalbn(x, -ex);
% -	y = scalbn(y, -ex);
% -	return scalbn(x/(x*x + y*y), -ex);

The conversion to not use scalbn() is fairly direct and routine, but also
fairly magic.

% +	GET_HIGH_WORD(hx, x);
% +	ix = hx & 0x7ff00000;
% +	GET_HIGH_WORD(hy, y);
% +	iy = hy & 0x7ff00000;

ilogb() is a builtin to much the same extent as scalbn() IIRC -- mostly
it isn't.

By working with the raw exponent, we avoid complications from the following
design bugs in ilogb():
- ilogb(0) returns FP_ILOGB0, so the above needs special cases for x == 0
   and y == 0
- ilogb(+-Inf) returns INT_MAX, so the above needs to handle infs earlier
   than is optimal.
With the raw exponents, you can just subtract them and most things work.
Denormals cause problems with this subtraction in some contexts, and
ilogb() has to do a lot of work find their exponent, and scalbn has to
do a lot of work to shift their mantissa (compared with just adding to
the exponent for a normal).  Here we handle them fairly subtly without
any extra code:
     when one arg is denormal, the absolute value exceeds RECIP_EPSILON,
     so there is a large exponent differences, and the special cases
     for large exponent differences handle this case automatically.
The case of y infinite but x finite is handled similarly.

% +#define	BIAS	(DBL_MAX_EXP - 1)
% +/* XXX more guard digits are useful iff there is extra precision. */

Without extra precision, a cutoff of with fewer guard digits somehow
gives better accuracy than one with more.  (The old cutoffs in
terms of exponent bits give ~DBL_MANT_DIG/2 active bits and
~DBL_MANT_DIG/2 guard bits.)

% +#define	CUTOFF	(DBL_MANT_DIG / 2 + 1)	/* just half or 1 guard digit */
% +	if (ix - iy >= CUTOFF << 20 || isinf(x))
% +		return (1/x);		/* +-Inf -> +-0 is special */

Constants are shifted to avoid shifting the exponent bits in ix and iy
back and forth.

The special cases for infinities have been reduced to this one here.
The sign used to be handled by copysign(0, x) when x is +-Inf.  Now
the common 1/x return is used.

% +	if (iy - ix >= CUTOFF << 20)
% +		return (x/y/y);		/* should avoid double div, but hard */
% +	if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
% +		return (x/(x*x + y*y));
% +	scale = 0;
% +	SET_HIGH_WORD(scale, 0x7ff00000 - ix);	/* 2**(1-ilogb(x)) */
% +	x *= scale;
% +	y *= scale;
% +	return (x/(x*x + y*y) * scale);
%  }
% 
% @@ -577,13 +606,22 @@
% 
%  	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
% -		return (cpack(copysign(real_part_reciprocal(ax, ay), x), copysign(m_pi_2, y)));
% +		return (cpack(real_part_reciprocal(x, y), copysign(pio2_hi + pio2_lo, y)));

Handle the sign in the function.

Unrelated details that I couldn't edit out without breaking the patch
hunk:

% 
% -	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
% +	if (ax < SQRT_3_EPSILON/2 && ay < SQRT_3_EPSILON/2) {
% +		/*
% +		 * z = 0 was filtered out above.  All other cases must raise
% +		 * inexact, but this is the only only that needs to do it
% +		 * explicitly.
% +		 */
% +		raise_inexact();
%  		return (z);
% +	}

It is an optimization to only raise inexact here.  The early return for
y == 0 && ax <= 1 worked well, but not the early raising of inexact.
I also reduce the x == 0 case to atanf() early.  That leaves no z == 0
case here, so we raise inexact unconditionally.

% 
%  	if (ax == 1 && ay < DBL_EPSILON) {
% +#if 0 /* this only improves accuracy in an already relative accurate case */
%  		if (ay > 2*DBL_MIN)
%  			rx = - log(ay/2) / 2;
%  		else
% +#endif

This was the change that you might have missed.

%  			rx = - (log(ay) - m_ln2) / 2;
%  	} else

Everything for the other files is routine except for the magic numbers
in real_part_reciprocal related to the packing of the bits.  I prefer
to leave those as magic.  A full macroization of them would have to
macroize the accesses GET_HIGH_WORD() etc.

% diff -u2 catrigf.c~ catrigf.c
% --- catrigf.c~	2012-09-21 15:51:16.000000000 +0000
% +++ catrigf.c	2012-09-21 21:34:41.140231000 +0000
% @@ -108,6 +109,6 @@
%  	if (y < FOUR_SQRT_MIN) {
%  		*B_is_usable = 0;
% -		*sqrt_A2my2 = scalbnf(A, FLT_MANT_DIG);
% -		*new_y = scalbnf(y, FLT_MANT_DIG);
% +		*sqrt_A2my2 = A * (2 / FLT_EPSILON);
% +		*new_y= y * (2 / FLT_EPSILON);
%  		return;
%  	}
% @@ -124,6 +125,7 @@
%  			*sqrt_A2my2 = sqrtf(Amy*(A+y));
%  		} else if (y > 1) {
% -			*sqrt_A2my2 = scalbnf(x, 2*FLT_MANT_DIG) * y / sqrtf((y+1)*(y-1));
% -			*new_y = scalbnf(y, 2*FLT_MANT_DIG);
% +			*sqrt_A2my2 = x * (4/FLT_EPSILON/FLT_EPSILON) * y /
% +			    sqrtf((y+1)*(y-1));
% +			*new_y = y * (4/FLT_EPSILON/FLT_EPSILON);
%  		} else {
%  			*sqrt_A2my2 = sqrtf((1-y)*(1+y));
% @@ -293,17 +299,24 @@
%  real_part_reciprocal(float x, float y)
%  {
% -	int ex, ey;
% -
% -	if (isinf(x) || isinf(y))
% -		return (0);
% -	if (y == 0) return (1/x);
% -	if (x == 0) return (x/y/y);
% -	ex = ilogbf(x);
% -	ey = ilogbf(y);
% -	if (ex - ey >= FLT_MANT_DIG) return (1/x);
% -	if (ey - ex >= FLT_MANT_DIG) return (x/y/y);
% -	x = scalbnf(x, -ex);
% -	y = scalbnf(y, -ex);
% -	return scalbnf(x/(x*x + y*y), -ex);
% +	float scale;
% +	uint32_t hx, hy;
% +	int32_t ix, iy;
% +
% +	GET_FLOAT_WORD(hx, x);
% +	ix = hx & 0x7f800000;
% +	GET_FLOAT_WORD(hy, y);
% +	iy = hy & 0x7f800000;
% +#define	BIAS	(FLT_MAX_EXP - 1)
% +#define	CUTOFF	(FLT_MANT_DIG / 2 + 1)
% +	if (ix - iy >= CUTOFF << 23 || isinf(x))
% +		return (1/x);
% +	if (iy - ix >= CUTOFF << 23)
% +		return (x/y/y);
% +	if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23)
% +		return (x/(x*x + y*y));
% +	SET_FLOAT_WORD(scale, 0x7f800000 - ix);
% +	x *= scale;
% +	y *= scale;
% +	return (x/(x*x + y*y) * scale);
%  }
% 
% @@ -335,13 +348,17 @@
% 
%  	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
% -		return (cpackf(copysignf(real_part_reciprocal(ax, ay), x), copysignf(m_pi_2, y)));
% +		return (cpackf(real_part_reciprocal(x, y), copysignf(pio2_hi + pio2_lo, y)));
% 
% -	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
% +	if (ax < SQRT_3_EPSILON/2 && ay < SQRT_3_EPSILON/2) {
% +		raise_inexact();
%  		return (z);
% +	}
% 
%  	if (ax == 1 && ay < FLT_EPSILON) {
% +#if 0
%  		if (ay > 2*FLT_MIN)
%  			rx = - logf(ay/2) / 2;
%  		else
% +#endif
%  			rx = - (logf(ay) - m_ln2) / 2;
%  	} else
% diff -u2 catrigl.c~ catrigl.c
% --- catrigl.c~	2012-09-21 16:22:40.000000000 +0000
% +++ catrigl.c	2012-09-21 21:17:46.962698000 +0000
% @@ -122,6 +124,6 @@
%  	if (y < FOUR_SQRT_MIN) {
%  		*B_is_usable = 0;
% -		*sqrt_A2my2 = scalbnl(A, LDBL_MANT_DIG);
% -		*new_y = scalbnl(y, LDBL_MANT_DIG);
% +		*sqrt_A2my2 = A * (2 / LDBL_EPSILON);
% +		*new_y= y * (2 / LDBL_EPSILON);
%  		return;
%  	}
% @@ -138,6 +140,7 @@
%  			*sqrt_A2my2 = sqrtl(Amy*(A+y));
%  		} else if (y > 1) {
% -			*sqrt_A2my2 = scalbnl(x, 2*LDBL_MANT_DIG) * y / sqrtl((y+1)*(y-1));
% -			*new_y = scalbnl(y, 2*LDBL_MANT_DIG);
% +			*sqrt_A2my2 = x * (4/LDBL_EPSILON/LDBL_EPSILON) * y /
% +			    sqrtl((y+1)*(y-1));
% +			*new_y = y * (4/LDBL_EPSILON/LDBL_EPSILON);
%  		} else {
%  			*sqrt_A2my2 = sqrtl((1-y)*(1+y));
% @@ -307,17 +314,24 @@
%  real_part_reciprocal(long double x, long double y)
%  {
% -	int ex, ey;
% -
% -	if (isinf(x) || isinf(y))
% -		return (0);
% -	if (y == 0) return (1/x);
% -	if (x == 0) return (x/y/y);
% -	ex = ilogbl(x);
% -	ey = ilogbl(y);
% -	if (ex - ey >= LDBL_MANT_DIG) return (1/x);
% -	if (ey - ex >= LDBL_MANT_DIG) return (x/y/y);
% -	x = scalbnl(x, -ex);
% -	y = scalbnl(y, -ex);
% -	return scalbnl(x/(x*x + y*y), -ex);
% +	long double scale;
% +	uint16_t hx, hy;
% +	int16_t ix, iy;
% +
% +	GET_LDBL_EXPSIGN(hx, x);
% +	ix = hx & 0x7fff;
% +	GET_LDBL_EXPSIGN(hy, y);
% +	iy = hy & 0x7fff;
% +#define	BIAS	(LDBL_MAX_EXP - 1)
% +#define	CUTOFF	(LDBL_MANT_DIG / 2 + 1)
% +	if (ix - iy >= CUTOFF || isinf(x))
% +		return (1/x);
% +	if (iy - ix >= CUTOFF)
% +		return (x/y/y);
% +	if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF)
% +		return (x/(x*x + y*y));
% +	SET_LDBL_EXPSIGN(scale, 0x7fff - ix);
% +	x *= scale;
% +	y *= scale;
% +	return (x/(x*x + y*y) * scale);
%  }
% 
% @@ -349,13 +363,17 @@
% 
%  	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
% -		return (cpackl(copysignl(real_part_reciprocal(ax, ay), x), copysignl(m_pi_2, y)));
% +		return (cpackl(real_part_reciprocal(x, y), copysignl(pio2_hi + pio2_lo, y)));
% 
% -	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
% +	if (ax < SQRT_3_EPSILON/2 && ay < SQRT_3_EPSILON/2) {
% +		raise_inexact();
%  		return (z);
% +	}
% 
%  	if (ax == 1 && ay < LDBL_EPSILON) {
% +#if 0
%  		if (ay > 2*LDBL_MIN)
%  			rx = - logl(ay/2) / 2;
%  		else
% +#endif
%  			rx = - (logl(ay) - m_ln2) / 2;
%  	} else

The patch is also attached.

Bruce
-------------- next part --------------
diff -u2 catrig.c~ catrig.c
--- catrig.c~	2012-09-21 15:51:00.000000000 +0000
+++ catrig.c	2012-09-21 21:40:34.521926000 +0000
@@ -35,4 +35,5 @@
 #undef isnan
 #define isnan(x)	((x) != (x))
+#define	raise_inexact()	do { volatile int junk = 1 + tiny; } while(0)
 #undef signbit
 #define signbit(x)	(__builtin_signbit(x))
@@ -46,12 +47,22 @@
 m_e =			2.7182818284590452e0,	/*  0x15bf0a8b145769.0p-51 */
 m_ln2 =			6.9314718055994531e-1,	/*  0x162e42fefa39ef.0p-53 */
-m_pi_2 =		1.5707963267948966e0,	/*  0x1921fb54442d18.0p-52 */
-pio2_hi =		1.57079632679489655800e+00,	/* 0x3FF921FB, 0x54442D18 */
-pio2_lo =		6.12323399573676603587e-17,	/* 0x3C91A626, 0x33145C07 */
+/*
+ * We no longer use M_PI_2 or m_pi_2.  In float precision, rounding to
+ * nearest of PI/2 happens to round up, but we want rounding down so
+ * that the expressions for approximating PI/2 and (PI/2 - z) work in all
+ * rounding modes.  This is not very important, but it is necessary for
+ * the same quality of implementation that fdlibm had in 1992 and that
+ * real functions mostly still have.  This is known to be broken only in
+ * ld80 acosl() via invtrig.c and in some invalid optimizations in code
+ * under development, and now in all functions in catrigl.c via invtrig.c.
+ */
+pio2_hi =		1.5707963267948966e0,	/*  0x1921fb54442d18.0p-52 */
 RECIP_EPSILON =		1/DBL_EPSILON,
-SQRT_EPSILON =		0x1p-27,		/* <= sqrt(DBL_EPSILON) */ 
+SQRT_3_EPSILON =	2.5809568279517849e-8,	/*  0x1bb67ae8584caa.0p-78 */
+SQRT_6_EPSILON =	3.6500241499888571e-8,	/*  0x13988e1409212e.0p-77 */
 SQRT_MIN =		0x1p-511;	/* >= sqrt(DBL_MIN) */
 
 static const volatile double
+pio2_lo =		6.1232339957367659e-17,	/*  0x11a62633145c07.0p-106 */
 tiny =			0x1p-1000;
 
@@ -206,6 +217,6 @@
 		 */
 		*B_is_usable = 0;
-		*sqrt_A2my2 = scalbn(A, DBL_MANT_DIG);
-		*new_y = scalbn(y, DBL_MANT_DIG);
+		*sqrt_A2my2 = A * (2 / DBL_EPSILON);
+		*new_y= y * (2 / DBL_EPSILON);
 		return;
 	}
@@ -244,6 +255,7 @@
 			 * scaling should avoid any underflow problems.
 			 */
-			*sqrt_A2my2 = scalbn(x, 2*DBL_MANT_DIG) * y / sqrt((y+1)*(y-1));
-			*new_y = scalbn(y, 2*DBL_MANT_DIG);
+			*sqrt_A2my2 = x * (4/DBL_EPSILON/DBL_EPSILON) * y /
+			    sqrt((y+1)*(y-1));
+			*new_y = y * (4/DBL_EPSILON/DBL_EPSILON);
 		} else /* if (y < 1) */ {
 			/*
@@ -303,9 +315,12 @@
 	}
 
-	/* raise inexact if z != 0. */
-	if ((x == 0 && y == 0) || (int)(1 + tiny) != 1)
+	/* Avoid spuriously raising inexact for z = 0. */
+	if (x == 0 && y == 0)
 		return (z);
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	/* All remaining cases are inexact. */
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON/4 && ay < SQRT_6_EPSILON/4)
 		return (z);
 
@@ -364,5 +379,5 @@
 			return (cpack(x+x, -y));
 		/* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
-		if (x == 0) return (cpack(m_pi_2 + tiny, y+y));
+		if (x == 0) return (cpack(pio2_hi + pio2_lo, y+y));
 		/*
 		 * All other cases involving NaN return NaN + I*NaN.
@@ -383,9 +398,12 @@
 	}
 
-	/* raise inexact if z != 1. */
-	if ((x == 1 && y == 0) || (int)(1 + tiny) != 1)
+	/* Avoid spuriously raising inexact for z = 1. */
+	if (x == 1 && y == 0)
 		return (cpack(0, -y));
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	/* All remaining cases are inexact. */
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON/4 && ay < SQRT_6_EPSILON/4)
 		return (cpack(pio2_hi - (x - pio2_lo), -y));
 
@@ -501,29 +519,40 @@
 /*
  * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
- * Assumes x and y are positive or zero, and one of x and y is larger than
+ * Assumes x and y are not NaN, and one of x and y is larger than
  * RECIP_EPSILON.  We avoid unwarranted underflow.  It is important to not use
  * the code creal(1/z), because the imaginary part may produce an unwanted
  * underflow.
+ * This is only called in a context where inexact is always raised before
+ * the call, so no effort is made to avoid or force inexact.
  */
 inline static double
 real_part_reciprocal(double x, double y)
 {
+	double scale;
+	uint32_t hx, hy;
+	int32_t ix, iy;
+
 	/*
 	 * This code is inspired by the C99 document n1124.pdf, Section G.5.1,
 	 * example 2.
 	 */
-	int ex, ey;
-
-	if (isinf(x) || isinf(y))
-		return (0);
-	if (y == 0) return (1/x);
-	if (x == 0) return (x/y/y);
-	ex = ilogb(x);
-	ey = ilogb(y);
-	if (ex - ey >= DBL_MANT_DIG) return (1/x);
-	if (ey - ex >= DBL_MANT_DIG) return (x/y/y);
-	x = scalbn(x, -ex);
-	y = scalbn(y, -ex);
-	return scalbn(x/(x*x + y*y), -ex);
+	GET_HIGH_WORD(hx, x);
+	ix = hx & 0x7ff00000;
+	GET_HIGH_WORD(hy, y);
+	iy = hy & 0x7ff00000;
+#define	BIAS	(DBL_MAX_EXP - 1)
+/* XXX more guard digits are useful iff there is extra precision. */
+#define	CUTOFF	(DBL_MANT_DIG / 2 + 1)	/* just half or 1 guard digit */
+	if (ix - iy >= CUTOFF << 20 || isinf(x))
+		return (1/x);		/* +-Inf -> +-0 is special */
+	if (iy - ix >= CUTOFF << 20)
+		return (x/y/y);		/* should avoid double div, but hard */
+	if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
+		return (x/(x*x + y*y));
+	scale = 0;
+	SET_HIGH_WORD(scale, 0x7ff00000 - ix);	/* 2**(1-ilogb(x)) */
+	x *= scale;
+	y *= scale;
+	return (x/(x*x + y*y) * scale);
 }
 
@@ -554,7 +583,7 @@
 		return (cpack(atanh(x), y)); 
 
-	/* raise inexact if z != 0. */
-	if ((x == 0 && y == 0) || (int)(1 + tiny) != 1)
-		return (z);
+	/* To ensure the same accuracy as atan(), and to filter out z = 0. */
+	if (x == 0)
+		return (cpack(x, atan(y)));
 
 	if (isnan(x) || isnan(y)) {
@@ -564,5 +593,5 @@
 		/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
 		if (isinf(y))
-			return (cpack(copysign(0, x), copysign(m_pi_2, y)));
+			return (cpack(copysign(0, x), copysign(pio2_hi + pio2_lo, y)));
 		/* catanh(+-0 + I*NaN) = +-0 + I*NaN */
 		if (x == 0)
@@ -577,13 +606,22 @@
 
 	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
-		return (cpack(copysign(real_part_reciprocal(ax, ay), x), copysign(m_pi_2, y)));
+		return (cpack(real_part_reciprocal(x, y), copysign(pio2_hi + pio2_lo, y)));
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	if (ax < SQRT_3_EPSILON/2 && ay < SQRT_3_EPSILON/2) {
+		/*
+		 * z = 0 was filtered out above.  All other cases must raise
+		 * inexact, but this is the only only that needs to do it
+		 * explicitly.
+		 */
+		raise_inexact();
 		return (z);
+	}
 
 	if (ax == 1 && ay < DBL_EPSILON) {
+#if 0 /* this only improves accuracy in an already relative accurate case */
 		if (ay > 2*DBL_MIN)
 			rx = - log(ay/2) / 2;
 		else
+#endif
 			rx = - (log(ay) - m_ln2) / 2;
 	} else
@@ -592,5 +630,5 @@
 	if (ax == 1)
 		ry = atan2(2, -ay) / 2;
-	else if (ay < FOUR_SQRT_MIN)
+	else if (ay < DBL_EPSILON)
 		ry = atan2(2*ay, (1-ax)*(1+ax)) / 2;
 	else
diff -u2 catrigf.c~ catrigf.c
--- catrigf.c~	2012-09-21 15:51:16.000000000 +0000
+++ catrigf.c	2012-09-21 21:34:41.140231000 +0000
@@ -45,4 +45,5 @@
 #undef isnan
 #define isnan(x)	((x) != (x))
+#define	raise_inexact()	do { volatile int junk = 1 + tiny; } while(0)
 #undef signbit
 #define signbit(x)	(__builtin_signbitf(x))
@@ -55,12 +56,12 @@
 m_e =			2.7182818285e0,		/*  0xadf854.0p-22 */
 m_ln2 =			6.9314718056e-1,	/*  0xb17218.0p-24 */
-m_pi_2 =		1.5707963268e0,		/*  0xc90fdb.0p-23 */
-pio2_hi =		1.5707962513e+00,	/* 0x3fc90fda */
-pio2_lo =		7.5497894159e-08,	/* 0x33a22168 */
+pio2_hi =		1.5707962513e0,		/*  0xc90fda.0p-23 */
 RECIP_EPSILON =		1/FLT_EPSILON,
-SQRT_EPSILON =		2048 * FLT_EPSILON,
+SQRT_3_EPSILON =	5.9801995673e-4,	/*  0x9cc471.0p-34 */
+SQRT_6_EPSILON =	8.4572793338e-4,	/*  0xddb3d7.0p-34 */
 SQRT_MIN =		0x1p-63;
 
 static const volatile float
+pio2_lo =		7.5497899549e-8,	/*  0xa22169.0p-47 */
 tiny =			0x1p-100;
 
@@ -108,6 +109,6 @@
 	if (y < FOUR_SQRT_MIN) {
 		*B_is_usable = 0;
-		*sqrt_A2my2 = scalbnf(A, FLT_MANT_DIG);
-		*new_y = scalbnf(y, FLT_MANT_DIG);
+		*sqrt_A2my2 = A * (2 / FLT_EPSILON);
+		*new_y= y * (2 / FLT_EPSILON);
 		return;
 	}
@@ -124,6 +125,7 @@
 			*sqrt_A2my2 = sqrtf(Amy*(A+y));
 		} else if (y > 1) {
-			*sqrt_A2my2 = scalbnf(x, 2*FLT_MANT_DIG) * y / sqrtf((y+1)*(y-1));
-			*new_y = scalbnf(y, 2*FLT_MANT_DIG);
+			*sqrt_A2my2 = x * (4/FLT_EPSILON/FLT_EPSILON) * y /
+			    sqrtf((y+1)*(y-1));
+			*new_y = y * (4/FLT_EPSILON/FLT_EPSILON);
 		} else {
 			*sqrt_A2my2 = sqrtf((1-y)*(1+y));
@@ -161,8 +163,10 @@
 	}
 
-	if ((x == 0 && y == 0) || (int)(1 + tiny) != 1)
+	if (x == 0 && y == 0)
 		return (z);
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON/4 && ay < SQRT_6_EPSILON/4)
 		return (z);
 
@@ -202,5 +206,5 @@
 		if (isinf(y))
 			return (cpackf(x+x, -y));
-		if (x == 0) return (cpackf(m_pi_2 + tiny, y+y));
+		if (x == 0) return (cpackf(pio2_hi + pio2_lo, y+y));
 		return (cpackf(x+0.0L+(y+0), x+0.0L+(y+0)));
 	}
@@ -215,8 +219,10 @@
 	}
 
-	if ((x == 1 && y == 0) || (int)(1 + tiny) != 1)
+	if (x == 1 && y == 0)
 		return (cpackf(0, -y));
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON/4 && ay < SQRT_6_EPSILON/4)
 		return (cpackf(pio2_hi - (x - pio2_lo), -y));
 
@@ -293,17 +299,24 @@
 real_part_reciprocal(float x, float y)
 {
-	int ex, ey;
-
-	if (isinf(x) || isinf(y))
-		return (0);
-	if (y == 0) return (1/x);
-	if (x == 0) return (x/y/y);
-	ex = ilogbf(x);
-	ey = ilogbf(y);
-	if (ex - ey >= FLT_MANT_DIG) return (1/x);
-	if (ey - ex >= FLT_MANT_DIG) return (x/y/y);
-	x = scalbnf(x, -ex);
-	y = scalbnf(y, -ex);
-	return scalbnf(x/(x*x + y*y), -ex);
+	float scale;
+	uint32_t hx, hy;
+	int32_t ix, iy;
+
+	GET_FLOAT_WORD(hx, x);
+	ix = hx & 0x7f800000;
+	GET_FLOAT_WORD(hy, y);
+	iy = hy & 0x7f800000;
+#define	BIAS	(FLT_MAX_EXP - 1)
+#define	CUTOFF	(FLT_MANT_DIG / 2 + 1)
+	if (ix - iy >= CUTOFF << 23 || isinf(x))
+		return (1/x);
+	if (iy - ix >= CUTOFF << 23)
+		return (x/y/y);
+	if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23)
+		return (x/(x*x + y*y));
+	SET_FLOAT_WORD(scale, 0x7f800000 - ix);
+	x *= scale;
+	y *= scale;
+	return (x/(x*x + y*y) * scale);
 }
 
@@ -321,6 +334,6 @@
 		return (cpackf(atanhf(x), y)); 
 
-	if ((x == 0 && y == 0) || (int)(1 + tiny) != 1)
-		return (z);
+	if (x == 0)
+		return (cpackf(x, atanf(y)));
 
 	if (isnan(x) || isnan(y)) {
@@ -328,5 +341,5 @@
 			return (cpackf(copysignf(0, x), y+y));
 		if (isinf(y))
-			return (cpackf(copysignf(0, x), copysignf(m_pi_2, y)));
+			return (cpackf(copysignf(0, x), copysignf(pio2_hi + pio2_lo, y)));
 		if (x == 0)
 			return (cpackf(x, y+y));
@@ -335,13 +348,17 @@
 
 	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
-		return (cpackf(copysignf(real_part_reciprocal(ax, ay), x), copysignf(m_pi_2, y)));
+		return (cpackf(real_part_reciprocal(x, y), copysignf(pio2_hi + pio2_lo, y)));
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	if (ax < SQRT_3_EPSILON/2 && ay < SQRT_3_EPSILON/2) {
+		raise_inexact();
 		return (z);
+	}
 
 	if (ax == 1 && ay < FLT_EPSILON) {
+#if 0
 		if (ay > 2*FLT_MIN)
 			rx = - logf(ay/2) / 2;
 		else
+#endif
 			rx = - (logf(ay) - m_ln2) / 2;
 	} else
@@ -350,5 +367,5 @@
 	if (ax == 1)
 		ry = atan2f(2, -ay) / 2;
-	else if (ay < FOUR_SQRT_MIN)
+	else if (ay < FLT_EPSILON)
 		ry = atan2f(2*ay, (1-ax)*(1+ax)) / 2;
 	else
diff -u2 catrigl.c~ catrigl.c
--- catrigl.c~	2012-09-21 16:22:40.000000000 +0000
+++ catrigl.c	2012-09-21 21:17:46.962698000 +0000
@@ -38,5 +38,4 @@
 #include <float.h>
 
-#include "fpmath.h"
 #include "invtrig.h"
 #include "math.h"
@@ -47,4 +46,5 @@
 #undef isnan
 #define isnan(x)	((x) != (x))
+#define	raise_inexact()	do { volatile int junk = 1 + tiny; } while(0)
 #undef signbit
 #define signbit(x)	(__builtin_signbitl(x)) 
@@ -56,5 +56,4 @@
 QUARTER_SQRT_MAX =	0x1p8189L,
 RECIP_EPSILON =		1/LDBL_EPSILON,
-SQRT_EPSILON =		1E-10L,
 SQRT_MIN =		0x1p-8191L;
 
@@ -62,14 +61,17 @@
 static const union IEEEl2bits
 um_e =		LD80C(0xadf85458a2bb4a9b,  1, 0, 2.71828182845904523536e0L),
-um_ln2 =	LD80C(0xb17217f7d1cf79ac, -1, 0, 6.93147180559945309417e-1L),
-um_pi_2 =	LD80C(0xc90fdaa22168c235,  0, 0, 1.57079632679489661923e0L);
+um_ln2 =	LD80C(0xb17217f7d1cf79ac, -1, 0, 6.93147180559945309417e-1L);
 #define		m_e	um_e.e
 #define		m_ln2	um_ln2.e
-#define		m_pi_2	um_pi_2.e
+static const long double
+/* The next 2 literals for non-i386.  Misrounding them on i386 is harmless. */
+SQRT_3_EPSILON = 5.70316273435758915310e-10,	/*  0x9cc470a0490973e8.0p-94 */
+SQRT_6_EPSILON = 8.06549008734932771664e-10;	/*  0xddb3d742c265539e.0p-94 */
 #elif LDBL_MANT_DIG == 113
 static const long double
 m_e =		2.71828182845904523536028747135266250e0L,	/* 0x15bf0a8b1457695355fb8ac404e7a.0p-111 */
 m_ln2 =		6.93147180559945309417232121458176568e-1L,	/* 0x162e42fefa39ef35793c7673007e6.0p-113 */
-m_pi_2 =	1.57079632679489661923132169163975144e0L;	/* 0x1921fb54442d18469898cc51701b8.0p-112 */
+SQRT_3_EPSILON = 2.40370335797945490975336727199878124e-17,	/*  0x1bb67ae8584caa73b25742d7078b8.0p-168 */
+SQRT_6_EPSILON = 3.39934988877629587239082586223300391e-17;	/*  0x13988e1409212e7d0321914321a55.0p-167 */
 #else
 #error "Unsupported long double format"
@@ -122,6 +124,6 @@
 	if (y < FOUR_SQRT_MIN) {
 		*B_is_usable = 0;
-		*sqrt_A2my2 = scalbnl(A, LDBL_MANT_DIG);
-		*new_y = scalbnl(y, LDBL_MANT_DIG);
+		*sqrt_A2my2 = A * (2 / LDBL_EPSILON);
+		*new_y= y * (2 / LDBL_EPSILON);
 		return;
 	}
@@ -138,6 +140,7 @@
 			*sqrt_A2my2 = sqrtl(Amy*(A+y));
 		} else if (y > 1) {
-			*sqrt_A2my2 = scalbnl(x, 2*LDBL_MANT_DIG) * y / sqrtl((y+1)*(y-1));
-			*new_y = scalbnl(y, 2*LDBL_MANT_DIG);
+			*sqrt_A2my2 = x * (4/LDBL_EPSILON/LDBL_EPSILON) * y /
+			    sqrtl((y+1)*(y-1));
+			*new_y = y * (4/LDBL_EPSILON/LDBL_EPSILON);
 		} else {
 			*sqrt_A2my2 = sqrtl((1-y)*(1+y));
@@ -175,8 +178,10 @@
 	}
 
-	if ((x == 0 && y == 0) || (int)(1 + tiny) != 1)
+	if (x == 0 && y == 0)
 		return (z);
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON/4 && ay < SQRT_6_EPSILON/4)
 		return (z);
 
@@ -216,5 +221,5 @@
 		if (isinf(y))
 			return (cpackl(x+x, -y));
-		if (x == 0) return (cpackl(m_pi_2 + tiny, y+y));
+		if (x == 0) return (cpackl(pio2_hi + pio2_lo, y+y));
 		return (cpackl(x+0.0L+(y+0), x+0.0L+(y+0)));
 	}
@@ -229,8 +234,10 @@
 	}
 
-	if ((x == 1 && y == 0) || (int)(1 + tiny) != 1)
+	if (x == 1 && y == 0)
 		return (cpackl(0, -y));
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON/4 && ay < SQRT_6_EPSILON/4)
 		return (cpackl(pio2_hi - (x - pio2_lo), -y));
 
@@ -307,17 +314,24 @@
 real_part_reciprocal(long double x, long double y)
 {
-	int ex, ey;
-
-	if (isinf(x) || isinf(y))
-		return (0);
-	if (y == 0) return (1/x);
-	if (x == 0) return (x/y/y);
-	ex = ilogbl(x);
-	ey = ilogbl(y);
-	if (ex - ey >= LDBL_MANT_DIG) return (1/x);
-	if (ey - ex >= LDBL_MANT_DIG) return (x/y/y);
-	x = scalbnl(x, -ex);
-	y = scalbnl(y, -ex);
-	return scalbnl(x/(x*x + y*y), -ex);
+	long double scale;
+	uint16_t hx, hy;
+	int16_t ix, iy;
+
+	GET_LDBL_EXPSIGN(hx, x);
+	ix = hx & 0x7fff;
+	GET_LDBL_EXPSIGN(hy, y);
+	iy = hy & 0x7fff;
+#define	BIAS	(LDBL_MAX_EXP - 1)
+#define	CUTOFF	(LDBL_MANT_DIG / 2 + 1)
+	if (ix - iy >= CUTOFF || isinf(x))
+		return (1/x);
+	if (iy - ix >= CUTOFF)
+		return (x/y/y);
+	if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF)
+		return (x/(x*x + y*y));
+	SET_LDBL_EXPSIGN(scale, 0x7fff - ix);
+	x *= scale;
+	y *= scale;
+	return (x/(x*x + y*y) * scale);
 }
 
@@ -333,8 +347,8 @@
 
 	if (y == 0 && ax <= 1)
-		return (cpackl(atanhl(x), y)); 
+		return (cpackl(atanh(x), y)); 	/* XXX need atanhl() */
 
-	if ((x == 0 && y == 0) || (int)(1 + tiny) != 1)
-		return (z);
+	if (x == 0)
+		return (cpackl(x, atanl(y)));
 
 	if (isnan(x) || isnan(y)) {
@@ -342,5 +356,5 @@
 			return (cpackl(copysignl(0, x), y+y));
 		if (isinf(y))
-			return (cpackl(copysignl(0, x), copysignl(m_pi_2, y)));
+			return (cpackl(copysignl(0, x), copysignl(pio2_hi + pio2_lo, y)));
 		if (x == 0)
 			return (cpackl(x, y+y));
@@ -349,13 +363,17 @@
 
 	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
-		return (cpackl(copysignl(real_part_reciprocal(ax, ay), x), copysignl(m_pi_2, y)));
+		return (cpackl(real_part_reciprocal(x, y), copysignl(pio2_hi + pio2_lo, y)));
 
-	if (ax < SQRT_EPSILON && ay < SQRT_EPSILON)
+	if (ax < SQRT_3_EPSILON/2 && ay < SQRT_3_EPSILON/2) {
+		raise_inexact();
 		return (z);
+	}
 
 	if (ax == 1 && ay < LDBL_EPSILON) {
+#if 0
 		if (ay > 2*LDBL_MIN)
 			rx = - logl(ay/2) / 2;
 		else
+#endif
 			rx = - (logl(ay) - m_ln2) / 2;
 	} else
@@ -364,5 +382,5 @@
 	if (ax == 1)
 		ry = atan2l(2, -ay) / 2;
-	else if (ay < FOUR_SQRT_MIN)
+	else if (ay < LDBL_EPSILON)
 		ry = atan2l(2*ay, (1-ax)*(1+ax)) / 2;
 	else


More information about the freebsd-numerics mailing list