svn commit: r216210 - head/lib/msun/src

Sun Dec 5 22:11:04 UTC 2010

Author: das
Date: Sun Dec  5 22:11:03 2010
New Revision: 216210
URL: http://svn.freebsd.org/changeset/base/216210

Log:
  Add a "kernel" log function, based on e_log.c, which is useful for
  implementing accurate logarithms in different bases.  This is based
  on an approach bde coded up years ago.
  
  This function should always be inlined; it will be used in only a few
  places, and rudimentary tests show a 40% performance improvement in
  implementations of log2() and log10() on amd64.
  
  The kernel takes a reduced argument x and returns the same polynomial
  approximation as e_log.c, but omitting the low-order term. The low-order
  term is much larger than the rest of the approximation, so the caller of
  the kernel function can scale it to the appropriate base in extra precision
  and obtain a much more accurate answer than by using log(x)/log(b).

Added:
  head/lib/msun/src/k_log.h
     - copied, changed from r216174, head/lib/msun/src/e_log.c
  head/lib/msun/src/k_logf.h
     - copied, changed from r216174, head/lib/msun/src/e_logf.c

Copied and modified: head/lib/msun/src/k_log.h (from r216174, head/lib/msun/src/e_log.c)
==============================================================================

--- head/lib/msun/src/e_log.c	Sat Dec  4 02:42:52 2010	(r216174, copy source)
+++ head/lib/msun/src/k_log.h	Sun Dec  5 22:11:03 2010	(r216210)
@@ -14,8 +14,13 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-/* __ieee754_log(x)
- * Return the logrithm of x
+/* __kernel_log(x)
+ * Return log(x) - (x-1) for x in ~[sqrt(2)/2, sqrt(2)].
+ *
+ * The following describes the overall strategy for computing
+ * logarithms in base e.  The argument reduction and adding the final
+ * term of the polynomial are done by the caller for increased accuracy
+ * when different bases are used.
  *
  * Method :                  
  *   1. Argument Reduction: find k and f such that 
@@ -65,13 +70,7 @@ __FBSDID("$FreeBSD$");
  * to produce the hexadecimal values shown.
  */
 
-#include "math.h"
-#include "math_private.h"
-
 static const double
-ln2_hi  =  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
-ln2_lo  =  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
-two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
 Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
 Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
 Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
@@ -80,48 +79,27 @@ Lg5 = 1.818357216161805012e-01,  /* 3FC7
 Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
 Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 
-static const double zero   =  0.0;
-
-double
-__ieee754_log(double x)
+/*
+ * We always inline __kernel_log(), since doing so produces a
+ * substantial performance improvement (~40% on amd64).
+ */
+static inline double
+__kernel_log(double x)
 {
-	double hfsq,f,s,z,R,w,t1,t2,dk;
-	int32_t k,hx,i,j;
+	double hfsq,f,s,z,R,w,t1,t2;
+	int32_t hx,i,j;
 	u_int32_t lx;
 
 	EXTRACT_WORDS(hx,lx,x);
 
-	k=0;
-	if (hx < 0x00100000) {			/* x < 2**-1022  */
-	    if (((hx&0x7fffffff)|lx)==0) 
-		return -two54/zero;		/* log(+-0)=-inf */
-	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
-	    k -= 54; x *= two54; /* subnormal number, scale up x */
-	    GET_HIGH_WORD(hx,x);
-	} 
-	if (hx >= 0x7ff00000) return x+x;
-	k += (hx>>20)-1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	SET_HIGH_WORD(x,hx|(i^0x3ff00000));	/* normalize x or x/2 */
-	k += (i>>20);
 	f = x-1.0;
 	if((0x000fffff&(2+hx))<3) {	/* -2**-20 <= f < 2**-20 */
-	    if(f==zero) {
-		if(k==0) {
-		    return zero;
-		} else {
-		    dk=(double)k;
-		    return dk*ln2_hi+dk*ln2_lo;
-		}
-	    }
-	    R = f*f*(0.5-0.33333333333333333*f);
-	    if(k==0) return f-R; else {dk=(double)k;
-	    	     return dk*ln2_hi-((R-dk*ln2_lo)-f);}
+	    if(f==0.0) return 0.0;
+	    return f*f*(0.33333333333333333*f-0.5);
 	}
  	s = f/(2.0+f); 
-	dk = (double)k;
 	z = s*s;
+	hx &= 0x000fffff;
 	i = hx-0x6147a;
 	w = z*z;
 	j = 0x6b851-hx;
@@ -129,12 +107,10 @@ __ieee754_log(double x)
 	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); 
 	i |= j;
 	R = t2+t1;
-	if(i>0) {
+	if (i>0) {
 	    hfsq=0.5*f*f;
-	    if(k==0) return f-(hfsq-s*(hfsq+R)); else
-		     return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
+	    return s*(hfsq+R) - hfsq;
 	} else {
-	    if(k==0) return f-s*(f-R); else
-		     return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
+	    return s*(R-f);
 	}
 }

Copied and modified: head/lib/msun/src/k_logf.h (from r216174, head/lib/msun/src/e_logf.c)
==============================================================================
--- head/lib/msun/src/e_logf.c	Sat Dec  4 02:42:52 2010	(r216174, copy source)
+++ head/lib/msun/src/k_logf.h	Sun Dec  5 22:11:03 2010	(r216210)
@@ -1,7 +1,3 @@
-/* e_logf.c -- float version of e_log.c.
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian at cygnus.com.
- */
-
 /*
  * ====================================================
  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
@@ -16,60 +12,33 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "math.h"
-#include "math_private.h"
+/* __kernel_logf(x)
+ * Return log(x) - (x-1) for x in ~[sqrt(2)/2, sqrt(2)].
+ */
 
 static const float
-ln2_hi =   6.9313812256e-01,	/* 0x3f317180 */
-ln2_lo =   9.0580006145e-06,	/* 0x3717f7d1 */
-two25 =    3.355443200e+07,	/* 0x4c000000 */
 /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
 Lg1 =      0xaaaaaa.0p-24,	/* 0.66666662693 */
 Lg2 =      0xccce13.0p-25,	/* 0.40000972152 */
 Lg3 =      0x91e9ee.0p-25,	/* 0.28498786688 */
 Lg4 =      0xf89e26.0p-26;	/* 0.24279078841 */
 
-static const float zero   =  0.0;
-
-float
-__ieee754_logf(float x)
+static inline float
+__kernel_logf(float x)
 {
-	float hfsq,f,s,z,R,w,t1,t2,dk;
-	int32_t k,ix,i,j;
+	float hfsq,f,s,z,R,w,t1,t2;
+	int32_t ix,i,j;
 
 	GET_FLOAT_WORD(ix,x);
 
-	k=0;
-	if (ix < 0x00800000) {			/* x < 2**-126  */
-	    if ((ix&0x7fffffff)==0)
-		return -two25/zero;		/* log(+-0)=-inf */
-	    if (ix<0) return (x-x)/zero;	/* log(-#) = NaN */
-	    k -= 25; x *= two25; /* subnormal number, scale up x */
-	    GET_FLOAT_WORD(ix,x);
-	}
-	if (ix >= 0x7f800000) return x+x;
-	k += (ix>>23)-127;
-	ix &= 0x007fffff;
-	i = (ix+(0x95f64<<3))&0x800000;
-	SET_FLOAT_WORD(x,ix|(i^0x3f800000));	/* normalize x or x/2 */
-	k += (i>>23);
 	f = x-(float)1.0;
 	if((0x007fffff&(0x8000+ix))<0xc000) {	/* -2**-9 <= f < 2**-9 */
-	    if(f==zero) {
-		if(k==0) {
-		    return zero;
-		} else {
-		    dk=(float)k;
-		    return dk*ln2_hi+dk*ln2_lo;
-		}
-	    }
-	    R = f*f*((float)0.5-(float)0.33333333333333333*f);
-	    if(k==0) return f-R; else {dk=(float)k;
-	    	     return dk*ln2_hi-((R-dk*ln2_lo)-f);}
+	    if(f==0.0) return 0.0;
+	    return f*f*((float)0.33333333333333333*f-(float)0.5);
 	}
  	s = f/((float)2.0+f);
-	dk = (float)k;
 	z = s*s;
+	ix &= 0x007fffff;
 	i = ix-(0x6147a<<3);
 	w = z*z;
 	j = (0x6b851<<3)-ix;
@@ -79,10 +48,8 @@ __ieee754_logf(float x)
 	R = t2+t1;
 	if(i>0) {
 	    hfsq=(float)0.5*f*f;
-	    if(k==0) return f-(hfsq-s*(hfsq+R)); else
-		     return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
+	    return s*(hfsq+R) - hfsq;
 	} else {
-	    if(k==0) return f-s*(f-R); else
-		     return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
+	    return s*(R-f);
 	}
 }