svn commit: r254969 - head/lib/msun/src

Steve Kargl kargl at FreeBSD.org
Tue Aug 27 19:46:58 UTC 2013


Author: kargl
Date: Tue Aug 27 19:46:56 2013
New Revision: 254969
URL: http://svnweb.freebsd.org/changeset/base/254969

Log:
  * s_erf.c:
    . Use integer literal constants instead of double literal constants.
  
  * s_erff.c:
    . Use integer literal constants instead of casting double literal
      constants to float.
    . Update the threshold values from those carried over from erf() to
      values appropriate for float.
    . New sets of polynomial coefficients for the rational approximations.
      These coefficients have little, but positive, effect on the maximum
      error in ULP in the four intervals, but do improve the overall
      speed of execution.
    . Remove redundant GET_FLOAT_WORD(ix,x) as hx already contained the
      contents that is packed into ix.
    . Update the mask that is used to zero-out lower-order bits in x in
      the intervals [1.25, 2.857143] and [2.857143, 12].  In tests on
      amd64, this change improves the maximum error in ULP from 6.27739
      and 63.8095 to 3.16774 and 2.92095 on these intervals for erffc().
  
  Reviewed by:	bde

Modified:
  head/lib/msun/src/s_erf.c
  head/lib/msun/src/s_erff.c

Modified: head/lib/msun/src/s_erf.c
==============================================================================
--- head/lib/msun/src/s_erf.c	Tue Aug 27 19:10:36 2013	(r254968)
+++ head/lib/msun/src/s_erf.c	Tue Aug 27 19:46:56 2013	(r254969)
@@ -201,7 +201,7 @@ erf(double x)
 	if(ix < 0x3feb0000) {		/* |x|<0.84375 */
 	    if(ix < 0x3e300000) { 	/* |x|<2**-28 */
 	        if (ix < 0x00800000)
-		    return 0.125*(8.0*x+efx8*x);  /*avoid underflow */
+		    return (8*x+efx8*x)/8;  /* avoid spurious underflow */
 		return x + efx*x;
 	    }
 	    z = x*x;

Modified: head/lib/msun/src/s_erff.c
==============================================================================
--- head/lib/msun/src/s_erff.c	Tue Aug 27 19:10:36 2013	(r254968)
+++ head/lib/msun/src/s_erff.c	Tue Aug 27 19:46:56 2013	(r254969)
@@ -24,75 +24,59 @@ tiny	    = 1e-30,
 half=  5.0000000000e-01, /* 0x3F000000 */
 one =  1.0000000000e+00, /* 0x3F800000 */
 two =  2.0000000000e+00, /* 0x40000000 */
-	/* c = (subfloat)0.84506291151 */
-erx =  8.4506291151e-01, /* 0x3f58560b */
 /*
- * Coefficients for approximation to  erf on [0,0.84375]
+ * Coefficients for approximation to erf on [0,0.84375]
  */
 efx =  1.2837916613e-01, /* 0x3e0375d4 */
 efx8=  1.0270333290e+00, /* 0x3f8375d4 */
-pp0  =  1.2837916613e-01, /* 0x3e0375d4 */
-pp1  = -3.2504209876e-01, /* 0xbea66beb */
-pp2  = -2.8481749818e-02, /* 0xbce9528f */
-pp3  = -5.7702702470e-03, /* 0xbbbd1489 */
-pp4  = -2.3763017452e-05, /* 0xb7c756b1 */
-qq1  =  3.9791721106e-01, /* 0x3ecbbbce */
-qq2  =  6.5022252500e-02, /* 0x3d852a63 */
-qq3  =  5.0813062117e-03, /* 0x3ba68116 */
-qq4  =  1.3249473704e-04, /* 0x390aee49 */
-qq5  = -3.9602282413e-06, /* 0xb684e21a */
 /*
- * Coefficients for approximation to  erf  in [0.84375,1.25]
+ *  Domain [0, 0.84375], range ~[-5.4446e-10,5.5197e-10]:
+ *  |(erf(x) - x)/x - p(x)/q(x)| < 2**-31.
  */
-pa0  = -2.3621185683e-03, /* 0xbb1acdc6 */
-pa1  =  4.1485610604e-01, /* 0x3ed46805 */
-pa2  = -3.7220788002e-01, /* 0xbebe9208 */
-pa3  =  3.1834661961e-01, /* 0x3ea2fe54 */
-pa4  = -1.1089469492e-01, /* 0xbde31cc2 */
-pa5  =  3.5478305072e-02, /* 0x3d1151b3 */
-pa6  = -2.1663755178e-03, /* 0xbb0df9c0 */
-qa1  =  1.0642088205e-01, /* 0x3dd9f331 */
-qa2  =  5.4039794207e-01, /* 0x3f0a5785 */
-qa3  =  7.1828655899e-02, /* 0x3d931ae7 */
-qa4  =  1.2617121637e-01, /* 0x3e013307 */
-qa5  =  1.3637083583e-02, /* 0x3c5f6e13 */
-qa6  =  1.1984500103e-02, /* 0x3c445aa3 */
+pp0  =  1.28379166e-01F, /*  0x1.06eba8p-3 */
+pp1  = -3.36030394e-01F, /* -0x1.58185ap-2 */
+pp2  = -1.86260219e-03F, /* -0x1.e8451ep-10 */
+qq1  =  3.12324286e-01F, /*  0x1.3fd1f0p-2 */
+qq2  =  2.16070302e-02F, /*  0x1.620274p-6 */
+qq3  = -1.98859419e-03F, /* -0x1.04a626p-9 */
 /*
- * Coefficients for approximation to  erfc in [1.25,1/0.35]
+ * Domain [0.84375, 1.25], range ~[-1.953e-11,1.940e-11]:
+ * |(erf(x) - erx) - p(x)/q(x)| < 2**-36.
  */
-ra0  = -9.8649440333e-03, /* 0xbc21a093 */
-ra1  = -6.9385856390e-01, /* 0xbf31a0b7 */
-ra2  = -1.0558626175e+01, /* 0xc128f022 */
-ra3  = -6.2375331879e+01, /* 0xc2798057 */
-ra4  = -1.6239666748e+02, /* 0xc322658c */
-ra5  = -1.8460508728e+02, /* 0xc3389ae7 */
-ra6  = -8.1287437439e+01, /* 0xc2a2932b */
-ra7  = -9.8143291473e+00, /* 0xc11d077e */
-sa1  =  1.9651271820e+01, /* 0x419d35ce */
-sa2  =  1.3765776062e+02, /* 0x4309a863 */
-sa3  =  4.3456588745e+02, /* 0x43d9486f */
-sa4  =  6.4538726807e+02, /* 0x442158c9 */
-sa5  =  4.2900814819e+02, /* 0x43d6810b */
-sa6  =  1.0863500214e+02, /* 0x42d9451f */
-sa7  =  6.5702495575e+00, /* 0x40d23f7c */
-sa8  = -6.0424413532e-02, /* 0xbd777f97 */
+erx  =  8.42697144e-01F, /*  0x1.af7600p-1.  erf(1) rounded to 16 bits. */
+pa0  =  3.64939137e-06F, /*  0x1.e9d022p-19 */
+pa1  =  4.15109694e-01F, /*  0x1.a91284p-2 */
+pa2  = -1.65179938e-01F, /* -0x1.5249dcp-3 */
+pa3  =  1.10914491e-01F, /*  0x1.c64e46p-4 */
+qa1  =  6.02074385e-01F, /*  0x1.344318p-1 */
+qa2  =  5.35934687e-01F, /*  0x1.126608p-1 */
+qa3  =  1.68576106e-01F, /*  0x1.593e6ep-3 */
+qa4  =  5.62181212e-02F, /*  0x1.cc89f2p-5 */
 /*
- * Coefficients for approximation to  erfc in [1/.35,28]
+ * Domain [1.25,1/0.35], range ~[-7.043e-10,7.457e-10]:
+ * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-30
  */
-rb0  = -9.8649431020e-03, /* 0xbc21a092 */
-rb1  = -7.9928326607e-01, /* 0xbf4c9dd4 */
-rb2  = -1.7757955551e+01, /* 0xc18e104b */
-rb3  = -1.6063638306e+02, /* 0xc320a2ea */
-rb4  = -6.3756646729e+02, /* 0xc41f6441 */
-rb5  = -1.0250950928e+03, /* 0xc480230b */
-rb6  = -4.8351919556e+02, /* 0xc3f1c275 */
-sb1  =  3.0338060379e+01, /* 0x41f2b459 */
-sb2  =  3.2579251099e+02, /* 0x43a2e571 */
-sb3  =  1.5367296143e+03, /* 0x44c01759 */
-sb4  =  3.1998581543e+03, /* 0x4547fdbb */
-sb5  =  2.5530502930e+03, /* 0x451f90ce */
-sb6  =  4.7452853394e+02, /* 0x43ed43a7 */
-sb7  = -2.2440952301e+01; /* 0xc1b38712 */
+ra0  = -9.87132732e-03F, /* -0x1.4376b2p-7 */
+ra1  = -5.53605914e-01F, /* -0x1.1b723cp-1 */
+ra2  = -2.17589188e+00F, /* -0x1.1683a0p+1 */
+ra3  = -1.43268085e+00F, /* -0x1.6ec42cp+0 */
+sa1  =  5.45995426e+00F, /*  0x1.5d6fe4p+2 */
+sa2  =  6.69798088e+00F, /*  0x1.acabb8p+2 */
+sa3  =  1.43113089e+00F, /*  0x1.6e5e98p+0 */
+sa4  = -5.77397496e-02F, /* -0x1.d90108p-5 */
+/*
+ * Domain [1/0.35, 11], range ~[-2.264e-13,2.336e-13]:
+ * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-42
+ */
+rb0  = -9.86494310e-03F, /* -0x1.434124p-7 */
+rb1  = -6.25171244e-01F, /* -0x1.401672p-1 */
+rb2  = -6.16498327e+00F, /* -0x1.8a8f16p+2 */
+rb3  = -1.66696873e+01F, /* -0x1.0ab70ap+4 */
+rb4  = -9.53764343e+00F, /* -0x1.313460p+3 */
+sb1  =  1.26884899e+01F, /*  0x1.96081cp+3 */
+sb2  =  4.51839523e+01F, /*  0x1.6978bcp+5 */
+sb3  =  4.72810211e+01F, /*  0x1.7a3f88p+5 */
+sb4  =  8.93033314e+00F; /*  0x1.1dc54ap+3 */
 
 float
 erff(float x)
@@ -107,43 +91,37 @@ erff(float x)
 	}
 
 	if(ix < 0x3f580000) {		/* |x|<0.84375 */
-	    if(ix < 0x31800000) { 	/* |x|<2**-28 */
-	        if (ix < 0x04000000)
-		    /*avoid underflow */
-		    return (float)0.125*((float)8.0*x+efx8*x);
+	    if(ix < 0x38800000) { 	/* |x|<2**-14 */
+	        if (ix < 0x04000000)	/* |x|<0x1p-119 */
+		    return (8*x+efx8*x)/8;	/* avoid spurious underflow */
 		return x + efx*x;
 	    }
 	    z = x*x;
-	    r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-	    s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
+	    r = pp0+z*(pp1+z*pp2);
+	    s = one+z*(qq1+z*(qq2+z*qq3));
 	    y = r/s;
 	    return x + x*y;
 	}
 	if(ix < 0x3fa00000) {		/* 0.84375 <= |x| < 1.25 */
 	    s = fabsf(x)-one;
-	    P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
-	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
+	    P = pa0+s*(pa1+s*(pa2+s*pa3));
+	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4)));
 	    if(hx>=0) return erx + P/Q; else return -erx - P/Q;
 	}
-	if (ix >= 0x40c00000) {		/* inf>|x|>=6 */
+	if (ix >= 0x40800000) {		/* inf>|x|>=4 */
 	    if(hx>=0) return one-tiny; else return tiny-one;
 	}
 	x = fabsf(x);
  	s = one/(x*x);
 	if(ix< 0x4036DB6E) {	/* |x| < 1/0.35 */
-	    R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
-				ra5+s*(ra6+s*ra7))))));
-	    S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
-				sa5+s*(sa6+s*(sa7+s*sa8)))))));
+	    R=ra0+s*(ra1+s*(ra2+s*ra3));
+	    S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4)));
 	} else {	/* |x| >= 1/0.35 */
-	    R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
-				rb5+s*rb6)))));
-	    S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
-				sb5+s*(sb6+s*sb7))))));
-	}
-	GET_FLOAT_WORD(ix,x);
-	SET_FLOAT_WORD(z,ix&0xfffff000);
-	r  =  __ieee754_expf(-z*z-(float)0.5625)*__ieee754_expf((z-x)*(z+x)+R/S);
+	    R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4)));
+	    S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4)));
+	}
+	SET_FLOAT_WORD(z,hx&0xffffe000);
+	r  = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S);
 	if(hx>=0) return one-r/x; else return  r/x-one;
 }
 
@@ -160,11 +138,11 @@ erfcf(float x)
 	}
 
 	if(ix < 0x3f580000) {		/* |x|<0.84375 */
-	    if(ix < 0x23800000)  	/* |x|<2**-56 */
+	    if(ix < 0x33800000)  	/* |x|<2**-24 */
 		return one-x;
 	    z = x*x;
-	    r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-	    s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
+	    r = pp0+z*(pp1+z*pp2);
+	    s = one+z*(qq1+z*(qq2+z*qq3));
 	    y = r/s;
 	    if(hx < 0x3e800000) {  	/* x<1/4 */
 		return one-(x+x*y);
@@ -176,33 +154,27 @@ erfcf(float x)
 	}
 	if(ix < 0x3fa00000) {		/* 0.84375 <= |x| < 1.25 */
 	    s = fabsf(x)-one;
-	    P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
-	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
+	    P = pa0+s*(pa1+s*(pa2+s*pa3));
+	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4)));
 	    if(hx>=0) {
 	        z  = one-erx; return z - P/Q;
 	    } else {
 		z = erx+P/Q; return one+z;
 	    }
 	}
-	if (ix < 0x41e00000) {		/* |x|<28 */
+	if (ix < 0x41300000) {		/* |x|<11 */
 	    x = fabsf(x);
  	    s = one/(x*x);
 	    if(ix< 0x4036DB6D) {	/* |x| < 1/.35 ~ 2.857143*/
-	        R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
-				ra5+s*(ra6+s*ra7))))));
-	        S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
-				sa5+s*(sa6+s*(sa7+s*sa8)))))));
+	        R=ra0+s*(ra1+s*(ra2+s*ra3));
+	        S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4)));
 	    } else {			/* |x| >= 1/.35 ~ 2.857143 */
-		if(hx<0&&ix>=0x40c00000) return two-tiny;/* x < -6 */
-	        R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
-				rb5+s*rb6)))));
-	        S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
-				sb5+s*(sb6+s*sb7))))));
+		if(hx<0&&ix>=0x40a00000) return two-tiny;/* x < -5 */
+	        R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4)));
+		S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4)));
 	    }
-	    GET_FLOAT_WORD(ix,x);
-	    SET_FLOAT_WORD(z,ix&0xfffff000);
-	    r  =  __ieee754_expf(-z*z-(float)0.5625)*
-			__ieee754_expf((z-x)*(z+x)+R/S);
+	    SET_FLOAT_WORD(z,hx&0xffffe000);
+	    r  = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S);
 	    if(hx>0) return r/x; else return two-r/x;
 	} else {
 	    if(hx>0) return tiny*tiny; else return two-tiny;


More information about the svn-src-head mailing list