# svn commit: r226245 - head/lib/msun/src

David Schultz das at FreeBSD.org
Tue Oct 11 05:17:46 UTC 2011

```Author: das
Date: Tue Oct 11 05:17:45 2011
New Revision: 226245
URL: http://svn.freebsd.org/changeset/base/226245

Log:
Refactor this code by introducing separate functions to handle the
extra-precision add and multiply operations. This simplifies future
work but shouldn't result in any functional change.

Modified:

==============================================================================
--- head/lib/msun/src/s_fma.c	Tue Oct 11 05:17:26 2011	(r226244)
+++ head/lib/msun/src/s_fma.c	Tue Oct 11 05:17:45 2011	(r226245)
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2005 David Schultz <das at FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das at FreeBSD.ORG>
*
* Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,63 @@ __FBSDID("\$FreeBSD\$");
#include <math.h>

/*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a double.  We maintain the invariant that "hi" stores the 53 high-order
+ * bits of the result.
+ */
+struct dd {
+	double hi;
+	double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+{
+	struct dd ret;
+	double s;
+
+	ret.hi = a + b;
+	s = ret.hi - a;
+	ret.lo = (a - (ret.hi - s)) + (b - s);
+	return (ret);
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(double a, double b)
+{
+	static const double split = 0x1p27 + 1.0;
+	struct dd ret;
+	double ha, hb, la, lb, p, q;
+
+	p = a * split;
+	ha = a - p;
+	ha += p;
+	la = a - ha;
+
+	p = b * split;
+	hb = b - p;
+	hb += p;
+	lb = b - hb;
+
+	p = ha * hb;
+	q = ha * lb + la * hb;
+
+	ret.hi = p + q;
+	ret.lo = p - ret.hi + q + la * lb;
+	return (ret);
+}
+
+/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
* We use scaling to avoid overflow/underflow, along with the
@@ -52,10 +109,10 @@ __FBSDID("\$FreeBSD\$");
double
fma(double x, double y, double z)
{
-	static const double split = 0x1p27 + 1.0;
double xs, ys, zs;
-	double c, cc, hx, hy, p, q, tx, ty;
-	double r, rr, s;
+	struct dd xy, r, r2;
+	double p;
+	double s;
int oround;
int ex, ey, ez;
@@ -95,29 +152,29 @@ fma(double x, double y, double z)
if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
return (x * y);
feholdexcept(&env);
-			r = x * y;
+			s = x * y;
if (!fetestexcept(FE_INEXACT))
-				r = nextafter(r, 0);
+				s = nextafter(s, 0);
feupdateenv(&env);
-			return (r);
+			return (s);
case FE_DOWNWARD:
if (z > 0.0)
return (x * y);
feholdexcept(&env);
-			r = x * y;
+			s = x * y;
if (!fetestexcept(FE_INEXACT))
-				r = nextafter(r, -INFINITY);
+				s = nextafter(s, -INFINITY);
feupdateenv(&env);
-			return (r);
+			return (s);
default:	/* FE_UPWARD */
if (z < 0.0)
return (x * y);
feholdexcept(&env);
-			r = x * y;
+			s = x * y;
if (!fetestexcept(FE_INEXACT))
-				r = nextafter(r, INFINITY);
+				s = nextafter(s, INFINITY);
feupdateenv(&env);
-			return (r);
+			return (s);
}
}
@@ -145,50 +202,29 @@ fma(double x, double y, double z)
}
}

-	/*
-	 * Use Dekker's algorithm to perform the multiplication and
-	 * subsequent addition in twice the machine precision.
-	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
-	 */
fesetround(FE_TONEAREST);

-	p = xs * split;
-	hx = xs - p;
-	hx += p;
-	tx = xs - hx;
-
-	p = ys * split;
-	hy = ys - p;
-	hy += p;
-	ty = ys - hy;
-
-	p = hx * hy;
-	q = hx * ty + tx * hy;
-	c = p + q;
-	cc = p - c + q + tx * ty;
-
+	xy = dd_mul(xs, ys);
-	r = c + zs;
-	s = r - c;
-	rr = (c - (r - s)) + (zs - s) + cc;
+	r.lo += xy.lo;

-	if (spread + ilogb(r) > -1023) {
+	if (spread + ilogb(r.hi) > -1023) {
fesetround(oround);
-		r = r + rr;
+		r.hi = r.hi + r.lo;
} else {
/*
* The result is subnormal, so we round before scaling to
* avoid double rounding.
*/
-		p = ldexp(copysign(0x1p-1022, r), -spread);
-		c = r + p;
-		s = c - r;
-		cc = (r - (c - s)) + (p - s) + rr;
+		p = ldexp(copysign(0x1p-1022, r.hi), -spread);
+		r2.lo += r.lo;
fesetround(oround);
-		r = (c + cc) - p;
+		r.hi = (r2.hi + r2.lo) - p;
}
}
#else	/* LDBL_MANT_DIG == 113 */
/*

==============================================================================
--- head/lib/msun/src/s_fmal.c	Tue Oct 11 05:17:26 2011	(r226244)
+++ head/lib/msun/src/s_fmal.c	Tue Oct 11 05:17:45 2011	(r226245)
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2005 David Schultz <das at FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das at FreeBSD.ORG>
*
* Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,67 @@ __FBSDID("\$FreeBSD\$");
#include <math.h>

/*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a long double.  We maintain the invariant that "hi" stores the high-order
+ * bits of the result.
+ */
+struct dd {
+	long double hi;
+	long double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+dd_add(long double a, long double b)
+{
+	struct dd ret;
+	long double s;
+
+	ret.hi = a + b;
+	s = ret.hi - a;
+	ret.lo = (a - (ret.hi - s)) + (b - s);
+	return (ret);
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(long double a, long double b)
+{
+#if LDBL_MANT_DIG == 64
+	static const long double split = 0x1p32L + 1.0;
+#elif LDBL_MANT_DIG == 113
+	static const long double split = 0x1p57L + 1.0;
+#endif
+	struct dd ret;
+	long double ha, hb, la, lb, p, q;
+
+	p = a * split;
+	ha = a - p;
+	ha += p;
+	la = a - ha;
+
+	p = b * split;
+	hb = b - p;
+	hb += p;
+	lb = b - hb;
+
+	p = ha * hb;
+	q = ha * lb + la * hb;
+
+	ret.hi = p + q;
+	ret.lo = p - ret.hi + q + la * lb;
+	return (ret);
+}
+
+/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
* We use scaling to avoid overflow/underflow, along with the
@@ -43,14 +104,10 @@ __FBSDID("\$FreeBSD\$");
long double
fmal(long double x, long double y, long double z)
{
-#if LDBL_MANT_DIG == 64
-	static const long double split = 0x1p32L + 1.0;
-#elif LDBL_MANT_DIG == 113
-	static const long double split = 0x1p57L + 1.0;
-#endif
long double xs, ys, zs;
-	long double c, cc, hx, hy, p, q, tx, ty;
-	long double r, rr, s;
+	struct dd xy, r, r2;
+	long double p;
+	long double s;
int oround;
int ex, ey, ez;
@@ -90,29 +147,29 @@ fmal(long double x, long double y, long
if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
return (x * y);
feholdexcept(&env);
-			r = x * y;
+			s = x * y;
if (!fetestexcept(FE_INEXACT))
-				r = nextafterl(r, 0);
+				s = nextafterl(s, 0);
feupdateenv(&env);
-			return (r);
+			return (s);
case FE_DOWNWARD:
if (z > 0.0)
return (x * y);
feholdexcept(&env);
-			r = x * y;
+			s = x * y;
if (!fetestexcept(FE_INEXACT))
-				r = nextafterl(r, -INFINITY);
+				s = nextafterl(s, -INFINITY);
feupdateenv(&env);
-			return (r);
+			return (s);
default:	/* FE_UPWARD */
if (z < 0.0)
return (x * y);
feholdexcept(&env);
-			r = x * y;
+			s = x * y;
if (!fetestexcept(FE_INEXACT))
-				r = nextafterl(r, INFINITY);
+				s = nextafterl(s, INFINITY);
feupdateenv(&env);
-			return (r);
+			return (s);
}
}
@@ -140,48 +197,27 @@ fmal(long double x, long double y, long
}
}

-	/*
-	 * Use Dekker's algorithm to perform the multiplication and
-	 * subsequent addition in twice the machine precision.
-	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
-	 */
fesetround(FE_TONEAREST);

-	p = xs * split;
-	hx = xs - p;
-	hx += p;
-	tx = xs - hx;
-
-	p = ys * split;
-	hy = ys - p;
-	hy += p;
-	ty = ys - hy;
-
-	p = hx * hy;
-	q = hx * ty + tx * hy;
-	c = p + q;
-	cc = p - c + q + tx * ty;
-
+	xy = dd_mul(xs, ys);
-	r = c + zs;
-	s = r - c;
-	rr = (c - (r - s)) + (zs - s) + cc;
+	r.lo += xy.lo;

-	if (spread + ilogbl(r) > -16383) {
+	if (spread + ilogbl(r.hi) > -16383) {
fesetround(oround);
-		r = r + rr;
+		r.hi = r.hi + r.lo;
} else {
/*
* The result is subnormal, so we round before scaling to
* avoid double rounding.
*/
-		p = ldexpl(copysignl(0x1p-16382L, r), -spread);
-		c = r + p;
-		s = c - r;
-		cc = (r - (c - s)) + (p - s) + rr;
+		p = ldexpl(copysignl(0x1p-16382L, r.hi), -spread);