Use of C99 extra long double math functions after r236148

Sun Aug 12 23:10:05 UTC 2012

On Fri, 20 Jul 2012, Bruce Evans wrote:

> I was going to leave all the fixes to you, but now want to try the
> approximation for tiny y :-).

This version now beats Apple clog() for accuracy.  The maximum difference
observed is down from the exa-ulp range to 16 ulps with all of the 4-16
ulp differences checked against pari being innaccuracies in Apple clog().

2**28 cases were tested, but most of the errors found look like this:

% x =                               0x3fe0000000000000 0.5
% y =                               0x3fec000000000000 0.875
% loghypota(x, y) = 0x3ff7fe054587e01ea000 0x3f7fc0a8b0fc03d4 0.00775209326798261336156
% loghypot(x, y)  = 0x3ff7fe054587e01f2000 0x3f7fc0a8b0fc03e4 0.00775209326798262723934
% err = +0x8000 16.00000
% pari log(0.5+0.875*I):
% 0.007752093267982627075427023021 + 1.051650212548373667459867312*I

so my tests barely cover fractions like 1/8 and the coverage may be too
limited.  New errors kept turning up as I expanded the coverage.

% #include <complex.h>
% #include <float.h>
% #include <math.h>
% #include <fenv.h>
% 
% #include "math_private.h"
% 
% /*
%  * gcc doesn't implement complex multiplication or division correctly, so we
%  * need to handle infinities specially. We turn on this pragma to notify
%  * conforming c99 compilers that the fast-but-incorrect code that gcc
%  * generates is acceptable, since the special cases have already been
%  * handled.
%  */
% #pragma	STDC CX_LIMITED_RANGE	ON
% 
% double	complex
% clog(double complex z)
% {
% 	double x, y, h, t1, t2, t3;
% 	double x0, y0, x1, y1;
% 
% 	x = creal(z);
% 	y = cimag(z);
% 
% #define	NANMIX_APPLE_CLOG_COMPAT	1
% 	/* Handle special cases when x or y is NAN. */
% 	if (isnan(x)) {
% 		if (isinf(y))
% 			return (cpack(INFINITY, NAN));
% 		else {
% #if NANMIX_HYPOTF_COMPAT
% 			y = fabs(y + 0);
% 			t1 = (y - y) / (y - y);	/* Raise invalid flag if y is
% 						 * not NaN */
% 	       		t1 = fabs(x + 0) + t1;	/* Mix NaN(s). */
% #elif NANMIX_APPLE_CLOG_COMPAT
% 			/* No actual mixing. */
% 			return (cpack(x, copysign(x, y)));
% #else
% 			t1 = (y - y) / (y - y);	/* Raise invalid flag if y is
% 						 * not NaN */
% #endif
% 			return (cpack(t1, t1));
% 		}
% 	} else if (isnan(y)) {
% 		if (isinf(x))
% 			return (cpack(INFINITY, NAN));
% 		else {
% #ifdef NANMIX_HYPOTF_COMPAT
% 			x = fabs(x + 0);
% 			t1 = (x - x) / (x - x);	/* Raise invalid flag if x is
% 						 * not NaN */
% 	       		t1 = t1 + fabs(y + 0);	/* Mix NaN(s). */
% #elif NANMIX_APPLE_CLOG_COMPAT
% 			/* No actual mixing. */
% 			return (cpack(y, y));
% #else
% 			t1 = (x - x) / (x - x);	/* Raise invalid flag if x is
% 						 * not NaN */
% #endif
% 			return (cpack(t1, t1));
% 		}
% 	} else if (isfinite(x) && isfinite(y) &&
% 	    (fabs(x) > 1e308 || fabs(y) > 1e308))
% 		/*
% 		 * To avoid unnecessary overflow, if x or y are very large,
% 		 * divide x and y by M_E, and then add 1 to the logarithm.
% 		 * This depends on M_E being larger than sqrt(2).
% 		 */
% 		return (cpack(log(hypot(x / M_E, y / M_E)) + 1, atan2(y, x)));

Important fix here.  1 was added to the log() arg instead of to log().

% 	else if (fabs(x) < 1e-50 && fabs(y) != 1 ||
% 	    fabs(y) < 1e-50 && fabs(x) != 1 ||
% 	    fabs(x) > 1e50 || fabs(y) > 1e50)

The special case for |x| == 1 and |y| == 1 was defeated by returning for
it here.

% 		/*
% 		 * Because atan2 and hypot conform to C99, this also covers
% 		 * all the edge cases when x or y are 0 or infinite.
% 		 */
% 		return (cpack(log(hypot(x, y)), atan2(y, x)));
% 	else {
% 		/* We don't need to worry about overflow in x*x+y*y. */
% 		h = x * x + y * y;
% 		if (h < 0.1 || h > 10)
% 			return (cpack(log(h) / 2, atan2(y, x)));
% 		/* Take extra care if h is moderately close to 1 */
% 		else {
% #if 1
% 			if (fabs(x) == 1)
% 				return (cpack(log1p(y * y) / 2,
% 				    atan2(y, x)));
% 			if (fabs(y) == 1)
% 				return (cpack(log1p(x * x) / 2,
% 				    atan2(y, x)));
% #endif

Special case.  It seems too special, but Apple clog() doesn't do any more,
and this with the other special cases is enough to beat Apple clog().

% 			/*
% 			 * x0 and y0 are good approximations to x and y, but
% 			 * have their bits trimmed so that double precision
% 			 * floating point is capable of calculating x0*x0 +
% 			 * y0*y0 - 1 exactly.
% 			 */

The only way for x*x + y*y to be _very_ near 1 in infinite precision
is for |x| or y| to be 1 (I think).  Other cases are bounded away from
1, and if you are lucky the bound is fairly far from 1 so that sloppier
approximations work OK.  Mathematicians should determine the bound
exactly using continued fractions or something like they do for
approximations to N*Pi/2.  This becomes especially interesting in high
precisions where you can't hope to get near the worst case by random
testing.

% 			x0 = ldexp(floor(ldexp(x, 24)), -24);
% 			x1 = x - x0;
% 			y0 = ldexp(floor(ldexp(y, 24)), -24);
% 			y1 = y - y0;

This has a chance of working iff the bound away from 1 is something like
2**-24.  Otherwise, multiplying by 2**24 and flooring a positive value
will just produce 0.  2**-24 seems much too small a bound.  My test
coverage is not wide enough to hit many bad cases.

% 			/* Notice that mathematically, h = t1*(1+t3). */
% 			t1 = x0 * x0 + y0 * y0;
% 			t2 = 2 * x0 * x1 + x1 * x1 + 2 * y0 * y1 + y1 * y1;
% 			t3 = t2 / t1;
% 			return (cpack((log1p(t1 - 1) + log1p(t3)) / 2,
% 			    atan2(y, x)));
% 		}
% 	}
% }

Bruce