bin/170206: [msun] [patch] complex arcsinh, log, etc.

Sun Jul 29 21:10:14 UTC 2012

The following reply was made to PR bin/170206; it has been noted by GNATS.

From: Stephen Montgomery-Smith <stephen at missouri.edu>
To: bug-followup at FreeBSD.org, stephen at FreeBSD.org
Cc:  
Subject: Re: bin/170206: [msun] [patch] complex arcsinh, log, etc.
Date: Sun, 29 Jul 2012 16:07:17 -0500

 This is a multi-part message in MIME format.
 --------------070709080600030802090207
 Content-Type: text/plain; charset=ISO-8859-1; format=flowed
 Content-Transfer-Encoding: 7bit

 I found a bug in catanh when |z| is very large.  I believe I have 
 corrected the bug in catrig.c.  I made some other small changes also, 
 mostly style and comments.

 --------------070709080600030802090207
 Content-Type: text/x-csrc;
  name="catrig.c"
 Content-Transfer-Encoding: 7bit
 Content-Disposition: attachment;
  filename="catrig.c"

 #include <complex.h>
 #include <float.h>
 #include <math.h>

 #include "math_private.h"

 /*
  * gcc doesn't implement complex multiplication or division correctly,
  * so we need to handle infinities specially. We turn on this pragma to
  * notify conforming c99 compilers that the fast-but-incorrect code that
  * gcc generates is acceptable, since the special cases have already been
  * handled.
  */
 #pragma	STDC CX_LIMITED_RANGE	ON

 double complex clog(double complex z);

 static const double
 one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
 huge=  1.00000000000000000000e+300;

 /*
  * Testing indicates that all these functions are accurate up to 4 ULP.
  */

 /*
  * The algorithm is very close to that in "Implementing the complex arcsine
  * and arccosine functions using exception handling" by T. E. Hull,
  * Thomas F. Fairgrieve, and Ping Tak Peter Tang, published in ACM
  * Transactions on Mathematical Software, Volume 23 Issue 3, 1997, Pages
  * 299-335, http://dl.acm.org/citation.cfm?id=275324
  *
  * casinh(x+I*y) = sign(x)*log(A+sqrt(A*A-1)) + sign(y)*I*asin(B)
  * where
  * A = 0.5(|z+I| + |z-I|) = f(x,1+y) + f(x,1-y) + 1
  * B = 0.5(|z+I| - |z-I|)
  * z = x+I*y
  * f(x,y) = 0.5*(hypot(x,y)-y)
  *
  * We also use
  * asin(B) = atan2(sqrt(A*A-y*y),y)
  * A-y = f(x,y+1) + f(x,y-1).
  *
  * Much of the difficulty comes because computing f(x,y) may produce
  * underflows.
  */

 /*
  * Returns 0.5*(hypot(x,y)-y).  It assumes x is positive, and that y does
  * not satisfy the inequalities 0 < fabs(y) < 1e-20.
  * If reporting the answer risks an underflow, the underflow flag is set,
  * and it returns 0.5*(hypot(x,y)-y)/x/x.
  */
 inline static double
 f(double x, double y, int *underflow)
 {
 	if (x==0) {
 		*underflow = 0;
 		if (y > 0)
 			return 0;
 		return -y;
 	}
 	if (y==0) {
 		*underflow = 0;
 		return 0.5*x;
 	}
 	if (x < 1e-100 && x < y) {
 		*underflow = 1;
 		return 0.5/(hypot(x,y)+y);
 	}
 	if (x < y) {
 		*underflow = 0;
 		return 0.5*x*x/(hypot(x,y)+y);
 	}
 	*underflow = 0;
 	return 0.5*(hypot(x,y)-y);
 }

 /*
  * All the hard work is contained in this function.
  * Upon return:
  * rx = Re(casinh(x+I*y))
  * B_is_usable is set to 1 if the value of B is usable.
  * If B_is_usable is set to 0, A2my2 = A*A-y*y.
  */
 inline static void
 do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B, double *A2my2)
 {
 	double R, S, A, fp, fm;
 	int fpuf, fmuf;

 	R = hypot(x,y+1);
 	S = hypot(x,y-1);
 	A = 0.5*(R + S);

 	/*
 	 * The paper by Hull et al suggests using A < 1.5.  Experiment
 	 * suggested A < 10 worked better.
 	 */
 	if (A < 10 && isfinite(A)) {
 		fp = f(x,1+y,&fpuf);
 		fm = f(x,1-y,&fmuf);
 		if (fpuf == 1 && fmuf == 1) {
 			if (huge+x>one) /* set inexact flag. */
 				*rx = log1p(x*sqrt((fp+fm)*(A+1)));
 		} else if (fmuf == 1) {
 			/*
 			 * Overflow not possible because fp < 1e50 and
 			 * x > 1e-100.
 			 * Underflow not possible because either fm=0 or fm
 			 * approximately bigger than 1e-200.
 			 */
 			if (huge+x>one) /* set inexact flag. */
 				*rx = log1p(fp+sqrt(x)*sqrt((fp/x+fm*x)*(A+1)));
 		} else if (fpuf == 1) {
 			/* Similar arguments against over/underflow. */
 			if (huge+x>one) /* set inexact flag. */
 				*rx = log1p(fm+sqrt(x)*sqrt((fm/x+fp*x)*(A+1)));
 		} else {
 			*rx = log1p(fp + fm + sqrt((fp+fm)*(A+1)));
 		}
 	} else if (isinf(y))
 		*rx = y;
 	else
 		*rx = log(A + sqrt(A*A-1));

 	if (!isfinite(y)) {
 		*B_is_usable = 0;
 		if (isfinite(x)) *A2my2 = 0;
 		else if (isnan(x)) *A2my2 = x;
 		else *A2my2 = y;
 		return;
 	}
 	if (isnan(x) && y == 0) {
 		*B_is_usable = 0;
 		*A2my2 = 1;
 		return;
 	}

 	*B = y/A; /* = 0.5*(R - S) */
 	*B_is_usable = 1;

 	/*
 	 * The paper by Hull et al suggests using B > 0.6417.  I just made up
 	 * the number 0.5.  It seems to work.
 	 */
 	if (*B > 0.5) {
 		*B_is_usable = 0;
 		fp = f(x,y+1,&fpuf);
 		fm = f(x,y-1,&fmuf);
 		if (fpuf == 1 && fmuf == 1)
 			*A2my2 =x*sqrt((A+y)*(fp+fm));
 		else if (fmuf == 1)
 			/*
 			 * Overflow not possible because fp < 1e50 and
 			 * x > 1e-100.
 			 * Underflow not possible because either fm=0 or fm
 			 * approximately bigger than 1e-200.
 			 */
 			*A2my2 = sqrt(x)*sqrt((A+y)*(fp/x+fm*x));
 		else if (fpuf == 1)
 			/* Similar arguments against over/underflow. */
 			*A2my2 = sqrt(x)*sqrt((A+y)*(fm/x+fp*x));
 		else
 			*A2my2 = sqrt((A+y)*(fp+fm));
 	}
 }

 double complex
 casinh(double complex z)
 {
 	double x, y, rx, ry, B, A2my2;
 	int sx, sy;
 	int B_is_usable;

 	x = creal(z);
 	y = cimag(z);
 	sx = signbit(x);
 	sy = signbit(y);
 	x = fabs(x);
 	y = fabs(y);

 	if (cabs(z) > 1e20 && isfinite(x) && isfinite(y)) {
 		if (huge+x>one) { /* set inexact flag. */
 			if (sx == 0) return clog(2*z);
 			if (sx == 1) return -clog(-2*z);
 		}
 	}

 	if (cabs(z) < 1e-20)
 		if (huge+x>one) /* set inexact flag. */
 			return z;

 	do_hard_work(x, y, &rx, &B_is_usable, &B, &A2my2);
 	if (B_is_usable)
 		ry = asin(B);
 	else
 		ry = atan2(y,A2my2);

 	if (sx == 1)
 		rx = copysign(rx, -1); /* casinh is odd. */
 	if (sy == 1)
 		ry = copysign(ry, -1); /* casinh(conj(z)) = conj(casinh(z)). */

 	return cpack(rx,ry);
 }

 /*
  * casin(z) = reverse(casinh(reverse(z)))
  * where reverse(x+I*y) = y+x*I = I*conj(x+I*y).
  */

 double complex
 casin(double complex z)
 {
 	complex w;

 	w = casinh(cpack(cimag(z),creal(z)));
 	return cpack(cimag(w),creal(w));
 }

 /*
  * cacos(z) = PI/2 - casin(z)
  * but do the computation carefully so cacos(z) is accurate when z is
  * close to 1.
  */

 double complex
 cacos(double complex z)
 {
 	double x, y, rx, ry, B, A2my2;
 	int sx, sy;
 	int B_is_usable;
 	complex w;

 	x = creal(z);
 	y = cimag(z);
 	sx = signbit(x);
 	sy = signbit(y);

 	x = fabs(x);
 	y = fabs(y);

 	if (cabs(z) > 1e20 && isfinite(x) && isfinite(y)) {
 		if (huge+x>one) { /* set inexact flag. */
 			w = clog(2*z);
 			if (signbit(cimag(w)) == 0)
 				return cpack(cimag(w),-creal(w));
 			return cpack(-cimag(w),creal(w));
 		}
 	}

 	if (cabs(z) < 1e-10)
 		if (huge+x>one) { /* set inexact flag. */
 			if (signbit(cimag(z)) == 0)
 				return cpack(M_PI_2-creal(z),copysign(cimag(z),-1));
 			return cpack(M_PI_2-creal(z),copysign(cimag(z),1));
 		}

 	do_hard_work(y, x, &ry, &B_is_usable, &B, &A2my2);
 	if (B_is_usable) {
 		if (sx==0)
 			rx = acos(B);
 		else
 			rx = acos(-B);
 	} else {
 		if (sx==0)
 			rx = atan2(A2my2,x);
 		else
 			rx = atan2(A2my2,-x);
 	}

 	if (sy==0)
 		ry = copysign(ry, -1); /* cacos(conj(z)) = conj(cacos(z)). */

 	return cpack(rx,ry);
 }

 /*
  * cacosh(z) = I*cacos(z) or -I*cacos(z)
  * where the sign is chosen so Re(cacosh(z)) >= 0.
  */

 double complex
 cacosh(double complex z)
 {
 	double complex w;

 	w = cacos(z);
 	if (signbit(cimag(w)) == 0)
 		return cpack(cimag(w),copysign(creal(w),-1));
 	else
 		return cpack(-cimag(w),creal(w));
 }

 /* 
  * catanh(z) = 0.5 * log((1+z)/(1-z))
  *           = 0.25 * log(|z+1|^2/|z-1|^2) + 0.5 * I * atan2(2y, (1-x*x-y*y))
  *
  * Note that |z+1|^2/|z-1|^2 = 1 + 4*x/|z-1|^2
  *                           = 1 / ( 1 - 4*x/|z+1|^2 )
  *
  * If |z| is large, then
  * catanh(z) appprox = 1/z + sign(y)*I*PI/2
  */
 double complex
 catanh(double complex z)
 {
 	double x, y, rx, ry;
 	double zp1_2, zm1_2; /* |z+1|^2 and |z-1|^2 */

 	x = creal(z);
 	y = cimag(z);

 	if (cabs(z) < 1e-20)
 		if (huge+x>one) /* set inexact flag. */
 			return z;

 	if (isinf(x) && isnan(y))
 		return cpack(copysign(0, x), y);

 	if (isnan(x) && isinf(y))
 		return cpack(copysign(0, x), copysign(M_PI_2,y));

 	if (x == 0 && isnan(y))
 		return cpack(x, y);

 	if (isnan(x) || isnan(y))
 		return clog(z);

 	if (isinf(x) || isinf(y))
 		return cpack(copysign(0,x),copysign(M_PI_2,y));

 	if (cabs(z) > 1e20)
 		if (huge+x>one) { /* set inexact flag. */
 			if (x==0)
 				return cpack(copysign(0,x),copysign(M_PI_2,y));
 			else
 				return 1/z + cpack(0,copysign(M_PI_2,y));
 		}

 	if (fabs(y) < 1e-100) {
 		if (huge+x>one) { /* set inexact flag. */
 			zp1_2 = (x+1)*(x+1);
 			zm1_2 = (x-1)*(x-1);
 		}
 	} else {
 		zp1_2 = (x+1)*(x+1)+y*y;
 		zm1_2 = (x-1)*(x-1)+y*y;
 	}

 	if (zp1_2 < 0.5 || zm1_2 < 0.5)
 		rx = 0.25*(log(zp1_2/zm1_2));
 	else if (x == 0)
 		rx = x;
 	else if (x > 0)
 		rx = 0.25*log1p(4*x/zm1_2);
 	else
 		rx = -0.25*log1p(-4*x/zp1_2);

 	if (x==1 || x==-1) {
 		if (y==0)
 			ry = y;
 		else if (signbit(y) == 0)
 			ry = atan2(2, -y)/2;
 		else
 			ry = atan2(-2, y)/2;
 	} else if (fabs(y) < 1e-100) {
 		if (huge+x>one) /* set inexact flag. */
 			ry = atan2(2*y, (1-x)*(1+x))/2;
 	} else
 		ry = atan2(2*y, (1-x)*(1+x)-y*y)/2;

 	return cpack(rx,ry);
 }

 /*
  * catan(z) = reverse(catanh(reverse(z)))
  * where reverse(x+I*y) = y+x*I = I*conj(x+I*y).
  */

 double complex
 catan(double complex z)
 {
 	complex w;

 	w = catanh(cpack(cimag(z),creal(z)));
 	return cpack(cimag(w),creal(w));
 }

 --------------070709080600030802090207--