Use of C99 extra long double math functions after r236148

Bruce Evans brde at optusnet.com.au
Sun Aug 12 23:10:32 UTC 2012


Replying again to this...

On Fri, 20 Jul 2012, Stephen Montgomery-Smith wrote:

> I worked quite a bit on my clog function last night.  Could you look at this 
> one?

I ended up deleting most of your changes in this one.

> The trick for when hypot(x,y) is close to 1 can only work so well, and you 
> are testing it out of its range of applicability.  But for the special case x 
> is close to 1, and y is close to zero, I have a much better formula.  I can 
> produce three other formula so that I can handle |x| close to 1, y small, and 
> |y| close to 1 and x small.
>
> After fixing it up, could you send it back as an attachment?  That will make 
> it easier for me to put it back into my system, and work more on it.

It will be painful for everyone to understand and merge.  This time as
an attachment as well as (partly) inline with commentary.

% #include <complex.h>
% #include <float.h>
% #include <math.h>
% 
% #include "math_private.h"
% 
% static inline void
% xonorm(double *a, double *b)
% {
% 	double w;
% 
% 	if (fabs(*a) < fabs(*b)) {
% 		w = *a;
% 		*a = *b;
% 		*b = w;
% 	}
% 	STRICT_ASSIGN(double, w, *a + *b);
% 	*b = (*a - w) + *b;
% 	*a = w;
% }
% 
% #define	xnorm(a, b)	xonorm(&(a), &(b))
% 
% #define	xspadd(a, b, c) do {	\
% 	double __tmp;		\
% 				\
% 	__tmp = (c);		\
% 	xonorm(&__tmp, &(a));	\
% 	(b) += (a);		\
% 	(a) = __tmp;		\
% } while (0)
% 
% static inline void
% xonormf(float *a, float *b)
% {
% 	float w;
% 
% 	if (fabsf(*a) < fabsf(*b)) {
% 		w = *a;
% 		*a = *b;
% 		*b = w;
% 	}
% 	STRICT_ASSIGN(float, w, *a + *b);
% 	*b = (*a - w) + *b;
% 	*a = w;
% }
% 
% #define	xnormf(a, b)	xonormf(&(a), &(b))
% 
% #define	xspaddf(a, b, c) do {	\
% 	float __tmp;		\
% 				\
% 	__tmp = (c);		\
% 	xonormf(&__tmp, &(a));	\
% 	(b) += (a);		\
% 	(a) = __tmp;		\
% } while (0)

Above are my standard extra-precision macros from my math_private.h,
cut down for use here and named with an x.  Then expanded and pessimized
to swap the args.  Optimal callers ensure that they don't need swapping.
See s_fma.c for a fuller algorithm that doesn't need swapping but does
more operations.  I started to copy that, but s_fma.c doesn't seem to
have anything as convenient as xspaddf().  Further expanded and
pessimized() to do STRICT_ASSIGN().  Optimal callers use float_t and
double_t so that STRICT_ASSIGN() is unnecessary.  Compiler bugs break
algorithms like the above on i386 unless float_t and double_t, or
STRICT_ASSIGN() are used.  Fixing the bugs would give the same slowness
as STRICT_ASSIGN(), but globally by doing it for all assignments, so
even I now consider these bugs to be features and C standards to be
broken for not allowing them.  I first discussed fixing them with gcc
maintainers over 20 years ago.

% 
% double complex
% clog(double complex z)
% {
% 	double x, y, h, t1, t2, t3;
% 	double ax, ay, x0, y0, x1, y1;
% 
% 	x = creal(z);
% 	y = cimag(z);
% 
% 	/* Handle NaNs using the general formula to mix them right. */
% 	if (x != x || y != y)
% 		return (cpack(log(hypot(x, y)), atan2(y, x)));

I replaced all my messy ifdefs for this by the function call.  Also
changes isnan() to a not-so-magic test.  Though isnan() is about the
only FP classification macro that I trust the compiler for.

% 
% 	ax = fabs(x);
% 	ay = fabs(y);
% 	if (ax < ay) {
% 		t1 = ax;
% 		ax = ay;
% 		ay = t1;
% 	}

I got tired of repeating fabs()'s, and need to know which arg is larger.

% 
% 	/*
% 	 * To avoid unnecessary overflow, if x or y are very large, divide x
% 	 * and y by M_E, and then add 1 to the logarithm.  This depends on
% 	 * M_E being larger than sqrt(2).
% 	 *
% 	 * XXX bugs to fix:
% 	 * - underflow if one of x or y is tiny.  e_hypot.c avoids this
% 	 *   problem, and optimizes for the case that the ratio of the
% 	 *   args is very large, by returning the absolute value of
% 	 *   the largest arg in this case.
% 	 * - not very accurate.  Could divide by 2 and add log(2) in extra
% 	 *   precision.  A general scaling step that divides by 2**k and
% 	 *   adds k*log(2) in extra precision might be good for reducing
% 	 *   the range so that we don't have to worry about overflow or
% 	 *   underflow in the general steps.  This needs the previous step
% 	 *   of eliminating large ratios of args so that the args can be
% 	 *   scaled on the same scale.
% 	 * - s/are/is/ in comment.
% 	 */
% 	if (ax > 1e308)
% 		return (cpack(log(hypot(x / M_E, y / M_E)) + 1, atan2(y, x)));

No need to avoid infinities here.  No need to test y now that we know
ax is largest.

% 
% 	if (ax == 1) {
% 		if (ay < 1e-150)
% 			return (cpack((ay * 0.5) * ay, atan2(y, x)));
% 		return (cpack(log1p(ay * ay) * 0.5, atan2(y, x)));
% 	}

Special case mainly for when (ay * ay) is rounded down to the smallest
denormal.

% 
% 	/*
% 	 * Because atan2 and hypot conform to C99, this also covers all the
% 	 * edge cases when x or y are 0 or infinite.
% 	 */
% 	if (ax < 1e-50 || ay < 1e-50 || ax > 1e50 || ay > 1e50)
% 		return (cpack(log(hypot(x, y)), atan2(y, x)));

Not quite right.  It is the ratio that matters more than these magic
magnitudes.

% 
% 	/* We don't need to worry about overflow in x*x+y*y. */
% 
% 	/*
% 	 * Take extra care so that ULP of real part is small if h is
% 	 * moderately close to 1.  If one only cares about the relative error
% 	 * of the whole result (real and imaginary part taken together), this
% 	 * algorithm is overkill.
% 	 *
% 	 * This algorithm does a rather good job if |h-1| >= 1e-5.  The only
% 	 * algorithm that I can think of that would work for any h close to
% 	 * one would require hypot(x,y) being computed using double double
% 	 * precision precision (i.e. double as many bits in the mantissa as
% 	 * double precision).
% 	 *
% 	 * x0 and y0 are good approximations to x and y, but have their bits
% 	 * trimmed so that double precision floating point is capable of
% 	 * calculating x0*x0 + y0*y0 - 1 exactly.
% 	 */

Comments not all updated.  This one especially out of date.

% 	x0 = ax;
% 	SET_LOW_WORD(x0, 0);
% 	x1 = ax - x0;
% 	y0 = ay;
% 	SET_LOW_WORD(y0, 0);
% 	y1 = ay - y0;

Sloppy decomposition with only 21 bits in hi part.  Since we are short
of bits, we shouldn't burn 5 like this for efficency.  In float precision,
all the multiplications are exact since 24 splits exactly.

% 	/* Notice that mathematically, h = t1*(1+t3). */
% #if 0

Old version.  Still drops bits unnecessary, although I added several
full hi/lo decomposition steps to it.

% 	t1 = x0 * x0;		/* Exact. */
% 	t2 = y0 * y0;		/* Exact. */

Comments not quite right.  All of the muliplications are as exact as
possible.  They would all be exact if we could split 53 in half, and
did so.

% 	STRICT_ASSIGN(double, t3, t1 + t2);
% 	t2 = (t1 - t3) + t2;
% 	t1 = t3;		/* Now t1+t2 is hi+lo for x0*x0+y0*y0.*/
% 	t2 += 2 * x0 * x1;
% 	STRICT_ASSIGN(double, t3, t1 + t2);
% 	t2 = (t1 - t3) + t2;
% 	t1 = t3;
% 	t2 += 2 * y0 * y1;
% 	STRICT_ASSIGN(double, t3, t1 + t2);
% 	t2 = (t1 - t3) + t2;
% 	t1 = t3;
% 	t2 += x1 * x1 + y1 * y1;
% 	STRICT_ASSIGN(double, t3, t1 + t2);
% 	t2 = (t1 - t3) + t2;
% 	t1 = t3;		/* Now t1+t2 is hi+lo for x*x+y*y.*/
% #else
% 	t1 = x1 * x1;
% 	t2 = y1 * y1;
% 	xnorm(t1, t2);
% 	xspadd(t1, t2, 2 * y0 * y1);
% 	xspadd(t1, t2, 2 * x0 * x1);
% 	xspadd(t1, t2, y0 * y0);
% 	xspadd(t1, t2, x0 * x0);
% 	xnorm(t1, t2);

It was too hard to turn the above into this without using the macros.
Now all the multiplications are as exact as possible, and extra precision
is used for all the additions (this mattered even for the first 2 terms).
Terms should be added from the smallest to the highest.  This happens in
most cases and some bits are lost when it isn't.

% #endif
% 	t3 = t2 / t1;
% 	/*
% 	 * |t3| ~< 2**-22 since we work with 24 extra bits of precision, so
% 	 * log1p(t3) can be evaluated with about 13 extra bits of precision
% 	 * using 2 terms of its power series.  But there are complexities
% 	 * to avoid underflow.
% 	 */

Complexities to avoid underflow incomplete and not here yet.

Comment otherwise anachronistic/anaspac(sp?)istic.  22 and 13 are for the
float version.  The final xnorm() step (to maximize accuracy) ensures that
|t3| < 2**-24 precisely for floats (half an ulp).  2**-53 for doubles.

% 	return (cpack((t3 - t3*0.5*t3 + log(t1)) * 0.5, atan2(y, x)));

The second term for log1p(t3) is probably nonsense .  We lose by
inaccuracies in t3 itself.

% }
% 
% float complex
% clogf(float complex z)
% {

This is a routine translation.  It duplicates too many comments.  Only
this has been tested much with the latest accuracty fixes.

% 	float x, y, h, t1, t2, t3;
% 	float ax, ay, x0, y0, x1, y1;
% 	uint32_t hx, hy;
% 
% 	x = crealf(z);
% 	y = cimagf(z);
% 
% 	/* Handle NaNs using the general formula to mix them right. */
% 	if (x != x || y != y)
% 		return (cpack(log(hypot(x, y)), atan2(y, x)));

Oops, copied too much -- forgot to add f's.

Bruce
-------------- next part --------------
#include <complex.h>
#include <float.h>
#include <math.h>

#include "math_private.h"

static inline void
xonorm(double *a, double *b)
{
	double w;

	if (fabs(*a) < fabs(*b)) {
		w = *a;
		*a = *b;
		*b = w;
	}
	STRICT_ASSIGN(double, w, *a + *b);
	*b = (*a - w) + *b;
	*a = w;
}

#define	xnorm(a, b)	xonorm(&(a), &(b))

#define	xspadd(a, b, c) do {	\
	double __tmp;		\
				\
	__tmp = (c);		\
	xonorm(&__tmp, &(a));	\
	(b) += (a);		\
	(a) = __tmp;		\
} while (0)

static inline void
xonormf(float *a, float *b)
{
	float w;

	if (fabsf(*a) < fabsf(*b)) {
		w = *a;
		*a = *b;
		*b = w;
	}
	STRICT_ASSIGN(float, w, *a + *b);
	*b = (*a - w) + *b;
	*a = w;
}

#define	xnormf(a, b)	xonormf(&(a), &(b))

#define	xspaddf(a, b, c) do {	\
	float __tmp;		\
				\
	__tmp = (c);		\
	xonormf(&__tmp, &(a));	\
	(b) += (a);		\
	(a) = __tmp;		\
} while (0)

double complex
clog(double complex z)
{
	double x, y, h, t1, t2, t3;
	double ax, ay, x0, y0, x1, y1;

	x = creal(z);
	y = cimag(z);

	/* Handle NaNs using the general formula to mix them right. */
	if (x != x || y != y)
		return (cpack(log(hypot(x, y)), atan2(y, x)));

	ax = fabs(x);
	ay = fabs(y);
	if (ax < ay) {
		t1 = ax;
		ax = ay;
		ay = t1;
	}

	/*
	 * To avoid unnecessary overflow, if x or y are very large, divide x
	 * and y by M_E, and then add 1 to the logarithm.  This depends on
	 * M_E being larger than sqrt(2).
	 *
	 * XXX bugs to fix:
	 * - underflow if one of x or y is tiny.  e_hypot.c avoids this
	 *   problem, and optimizes for the case that the ratio of the
	 *   args is very large, by returning the absolute value of
	 *   the largest arg in this case.
	 * - not very accurate.  Could divide by 2 and add log(2) in extra
	 *   precision.  A general scaling step that divides by 2**k and
	 *   adds k*log(2) in extra precision might be good for reducing
	 *   the range so that we don't have to worry about overflow or
	 *   underflow in the general steps.  This needs the previous step
	 *   of eliminating large ratios of args so that the args can be
	 *   scaled on the same scale.
	 * - s/are/is/ in comment.
	 */
	if (ax > 1e308)
		return (cpack(log(hypot(x / M_E, y / M_E)) + 1, atan2(y, x)));

	if (ax == 1) {
		if (ay < 1e-150)
			return (cpack((ay * 0.5) * ay, atan2(y, x)));
		return (cpack(log1p(ay * ay) * 0.5, atan2(y, x)));
	}

	/*
	 * Because atan2 and hypot conform to C99, this also covers all the
	 * edge cases when x or y are 0 or infinite.
	 */
	if (ax < 1e-50 || ay < 1e-50 || ax > 1e50 || ay > 1e50)
		return (cpack(log(hypot(x, y)), atan2(y, x)));

	/* We don't need to worry about overflow in x*x+y*y. */

	/*
	 * Take extra care so that ULP of real part is small if h is
	 * moderately close to 1.  If one only cares about the relative error
	 * of the whole result (real and imaginary part taken together), this
	 * algorithm is overkill.
	 *
	 * This algorithm does a rather good job if |h-1| >= 1e-5.  The only
	 * algorithm that I can think of that would work for any h close to
	 * one would require hypot(x,y) being computed using double double
	 * precision precision (i.e. double as many bits in the mantissa as
	 * double precision).
	 *
	 * x0 and y0 are good approximations to x and y, but have their bits
	 * trimmed so that double precision floating point is capable of
	 * calculating x0*x0 + y0*y0 - 1 exactly.
	 */
	x0 = ax;
	SET_LOW_WORD(x0, 0);
	x1 = ax - x0;
	y0 = ay;
	SET_LOW_WORD(y0, 0);
	y1 = ay - y0;
	/* Notice that mathematically, h = t1*(1+t3). */
#if 0
	t1 = x0 * x0;		/* Exact. */
	t2 = y0 * y0;		/* Exact. */
	STRICT_ASSIGN(double, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;		/* Now t1+t2 is hi+lo for x0*x0+y0*y0.*/
	t2 += 2 * x0 * x1;
	STRICT_ASSIGN(double, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;
	t2 += 2 * y0 * y1;
	STRICT_ASSIGN(double, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;
	t2 += x1 * x1 + y1 * y1;
	STRICT_ASSIGN(double, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;		/* Now t1+t2 is hi+lo for x*x+y*y.*/
#else
	t1 = x1 * x1;
	t2 = y1 * y1;
	xnorm(t1, t2);
	xspadd(t1, t2, 2 * y0 * y1);
	xspadd(t1, t2, 2 * x0 * x1);
	xspadd(t1, t2, y0 * y0);
	xspadd(t1, t2, x0 * x0);
	xnorm(t1, t2);
#endif
	t3 = t2 / t1;
	/*
	 * |t3| ~< 2**-22 since we work with 24 extra bits of precision, so
	 * log1p(t3) can be evaluated with about 13 extra bits of precision
	 * using 2 terms of its power series.  But there are complexities
	 * to avoid underflow.
	 */
	return (cpack((t3 - t3*0.5*t3 + log(t1)) * 0.5, atan2(y, x)));
}

float complex
clogf(float complex z)
{
	float x, y, h, t1, t2, t3;
	float ax, ay, x0, y0, x1, y1;
	uint32_t hx, hy;

	x = crealf(z);
	y = cimagf(z);

	/* Handle NaNs using the general formula to mix them right. */
	if (x != x || y != y)
		return (cpack(log(hypot(x, y)), atan2(y, x)));

	ax = fabsf(x);
	ay = fabsf(y);
	if (ax < ay) {
		t1 = ax;
		ax = ay;
		ay = t1;
	}

	/*
	 * To avoid unnecessary overflow, if x or y are very large, divide x
	 * and y by M_E, and then add 1 to the logarithm.  This depends on
	 * M_E being larger than sqrt(2).
	 *
	 * XXX bugs to fix:
	 * - underflow if one of x or y is tiny.  e_hypot.c avoids this
	 *   problem, and optimizes for the case that the ratio of the
	 *   args is very large, by returning the absolute value of
	 *   the largest arg in this case.
	 * - not very accurate.  Could divide by 2 and add log(2) in extra
	 *   precision.  A general scaling step that divides by 2**k and
	 *   adds k*log(2) in extra precision might be good for reducing
	 *   the range so that we don't have to worry about overflow or
	 *   underflow in the general steps.  This needs the previous step
	 *   of eliminating large ratios of args so that the args can be
	 *   scaled on the same scale.
	 * - s/are/is/ in comment.
	 */
	if (ax > 1e38F)
		return (cpack(logf(hypotf(x / (float)M_E, y / (float)M_E)) + 1,
		    atan2f(y, x)));

	if (ax == 1) {
		if (ay < 1e-18F)
			return (cpackf((ay * 0.5F) * ay, atan2f(y, x)));
		return (cpackf(log1pf(ay * ay) * 0.5F, atan2f(y, x)));
	}

	/*
	 * Because atan2 and hypot conform to C99, this also covers all the
	 * edge cases when x or y are 0 or infinite.
	 */
	if (ax < 1e-10F || ay < 1e-10F || ax > 1e10F || ay > 1e10F)
		return (cpackf(logf(hypotf(x, y)), atan2f(y, x)));

	/* We don't need to worry about overflow in x*x+y*y. */

	/*
	 * Take extra care so that ULP of real part is small if h is
	 * moderately close to 1.  If one only cares about the relative error
	 * of the whole result (real and imaginary part taken together), this
	 * algorithm is overkill.
	 *
	 * This algorithm does a rather good job if |h-1| >= 1e-5.  The only
	 * algorithm that I can think of that would work for any h close to
	 * one would require hypot(x,y) being computed using double double
	 * precision precision (i.e. double as many bits in the mantissa as
	 * double precision).
	 *
	 * x0 and y0 are good approximations to x and y, but have their bits
	 * trimmed so that double precision floating point is capable of
	 * calculating x0*x0 + y0*y0 - 1 exactly.
	 */

	GET_FLOAT_WORD(hx, x);
	SET_FLOAT_WORD(x0, hx & 0xfffff000);
	x1 = x - x0;
	GET_FLOAT_WORD(hy, y);
	SET_FLOAT_WORD(y0, hy & 0xfffff000);
	y1 = y - y0;
	/* Notice that mathematically, h = t1*(1+t3). */
#if 0
	t1 = x0 * x0;		/* Exact. */
	t2 = y0 * y0;		/* Exact. */
	STRICT_ASSIGN(float, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;		/* Now t1+t2 is hi+lo for x0*x0+y0*y0.*/
	t2 += 2 * x0 * x1;
	STRICT_ASSIGN(float, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;
	t2 += 2 * y0 * y1;
	STRICT_ASSIGN(float, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;
	t2 += x1 * x1 + y1 * y1;
	STRICT_ASSIGN(float, t3, t1 + t2);
	t2 = (t1 - t3) + t2;
	t1 = t3;		/* Now t1+t2 is hi+lo for x*x+y*y.*/
#else
	t1 = x1 * x1;
	t2 = y1 * y1;
	xnormf(t1, t2);
	xspaddf(t1, t2, 2 * y0 * y1);
	xspaddf(t1, t2, 2 * x0 * x1);
	xspaddf(t1, t2, y0 * y0);
	xspaddf(t1, t2, x0 * x0);
	xnormf(t1, t2);
#endif
	t3 = t2 / t1;
	/*
	 * |t3| ~< 2**-10 since we work with 12 extra bits of precision, so
	 * log1p(t3) can be evaluated with about 6 extra bits of precision
	 * using 2 terms of its power series.  But there are complexities
	 * to avoid underflow.
	 */
	return (cpackf((t3 - t3*0.5F*t3 + logf(t1)) * 0.5F, atan2f(y, x)));
}


More information about the freebsd-numerics mailing list