Patches for s_expl.c
Bruce Evans
brde at optusnet.com.au
Fri May 31 19:18:24 UTC 2013
On Fri, 31 May 2013, Steve Kargl wrote:
> On Fri, May 31, 2013 at 06:19:09AM +1000, Bruce Evans wrote:
>> On Thu, 30 May 2013, Steve Kargl wrote:
> I've add most of your suggests.
Perhaps too many :-). Leave out and/or act more of my XXX comments and
I'm happy with it.
>> In a development version, I need hi to have only about 56 bits. It is
>> easy to re-split hi+lo for testing this. A 24-bit or 53-bit hi is
>> sufficient and would give this automatically.
>
> Is this a version where you try to eliminate the C and D polynomials?
Yes. It all works fine except for efficiency, but efficiency was the main
reason to eliminate them (also simplicity -- we avoid a special case and
hope that the pipelining effects from this compensate for a few extra
instructions for the general case).
>> @ static const long double
>> @ +/*
>> @ + * XXX none of the long double C or D coeffs except C10 is correctly printed.
>> @ + * If you re-print their values in %.35Le format, the result is always
>> @ + * different. For example, the last 2 digits in C3 should be 59, not 67.
>> @ + * 67 is apparently from rounding an extra-precision value to 36 decimal
>> @ + * places.
>> @ + */
>> @ C3 = 1.66666666666666666666666666666666667e-1L,
>>
>> I didn't fix these.
>
> I didn't fix the coefficient as well. I'll do it if I ever get
> around to regenerating the coefficients. The limiting testing
> that I've been able to do on flame gave max ULP < 0.51. This,
> IMO, is good enough for now.
This is just cosmetic. In order to verify the coeffs, I like to be
able to at least print them and get back the same results. My pari
program that verifies them (by plotting the error function) does a
little more. It has to round them to binary fractions, since any
extra precision in them would make them appear to be more accurate
then they are -- pari would use the extra precision of the decimal
values, but the compiler has to convert to binary for the CPU to use.
Here is a program to print their actual values (after rounding to
binary and back to decimal):
@ #include <float.h>
@ #include <stdio.h>
@
@ static const long double
@ o_threshold = 11356.523406294143949491931077970763428L,
@ u_threshold = -11433.462743336297878837243843452621503L,
@ L1 = 5.41521234812457272982212595914567508e-3L,
@ A3 = 1.66666666666666666666666666651085500e-1L,
@ A4 = 4.16666666666666666666666666425885320e-2L,
@ A5 = 8.33333333333333333334522877160175842e-3L,
@ A6 = 1.38888888888888888889971139751596836e-3L,
@ C3 = 1.66666666666666666666666666666666667e-1L,
@ C4 = 4.16666666666666666666666666666666645e-2L,
@ C5 = 8.33333333333333333333333333333371638e-3L,
@ C6 = 1.38888888888888888888888888891188658e-3L,
@ C7 = 1.98412698412698412698412697235950394e-4L,
@ C8 = 2.48015873015873015873015112487849040e-5L,
@ C9 = 2.75573192239858906525606685484412005e-6L,
@ C10 = 2.75573192239858906612966093057020362e-7L,
@ C11 = 2.50521083854417203619031960151253944e-8L,
@ C12 = 2.08767569878679576457272282566520649e-9L,
@ C13 = 1.60590438367252471783548748824255707e-10L,
@ D3 = 1.66666666666666666666666666666682245e-1L,
@ D4 = 4.16666666666666666666666666634228324e-2L,
@ D5 = 8.33333333333333333333333364022244481e-3L,
@ D6 = 1.38888888888888888888887138722762072e-3L,
@ D7 = 1.98412698412698412699085805424661471e-4L,
@ D8 = 2.48015873015873015687993712101479612e-5L,
@ D9 = 2.75573192239858944101036288338208042e-6L,
@ D10 = 2.75573192239853161148064676533754048e-7L,
@ D11 = 2.50521083855084570046480450935267433e-8L,
@ D12 = 2.08767569819738524488686318024854942e-9L,
@ D13 = 1.60590442297008495301927448122499313e-10L;
@
@ static const double
@ INV_L = 1.8466496523378731e+2, /* 0x171547652b82fe.0p-45 */
@ L2 = -1.0253670638894731e-29, /* -0x1.9ff0342542fc3p-97 */
@ A7 = 1.9841269841269471e-4,
@ A8 = 2.4801587301585284e-5,
@ A9 = 2.7557324277411234e-6,
@ A10 = 2.7557333722375072e-7,
@ C14 = 1.1470745580491932e-11, /* 0x1.93974a81dae3p-37 */
@ C15 = 7.6471620181090468e-13, /* 0x1.ae7f3820adab1p-41 */
@ C16 = 4.7793721460260450e-14, /* 0x1.ae7cd18a18eacp-45 */
@ C17 = 2.8074757356658877e-15, /* 0x1.949992a1937d9p-49 */
@ C18 = 1.4760610323699476e-16, /* 0x1.545b43aabfbcdp-53 */
@ D14 = 1.1470726176204336e-11, /* 0x1.93971dc395d9ep-37 */
@ D15 = 7.6478532249581686e-13, /* 0x1.ae892e3D16fcep-41 */
@ D16 = 4.7628892832607741e-14, /* 0x1.ad00Dfe41feccp-45 */
@ D17 = 3.0524857220358650e-15; /* 0x1.D7e8d886Df921p-49 */
@
@ main()
@ {
@ printf(" %.35Le\n", o_threshold, o_threshold);
@ printf(" %.35Le\n", u_threshold, u_threshold);
@ printf(" %.35Le\n", L1, L1);
@ printf(" %.35Le\n", A3, A3);
@ printf(" %.35Le\n", A4, A4);
@ printf(" %.35Le\n", A5, A5);
@ printf(" %.35Le\n", A6, A6);
@ printf(" %.35Le\n", C3, C3);
@ printf(" %.35Le\n", C4, C4);
@ printf(" %.35Le\n", C5, C5);
@ printf(" %.35Le\n", C6, C6);
@ printf(" %.35Le\n", C7, C7);
@ printf(" %.35Le\n", C8, C8);
@ printf(" %.35Le\n", C9, C9);
@ printf(" %.35Le\n", C10, C10);
@ printf(" %.35Le\n", C11, C11);
@ printf(" %.35Le\n", C12, C12);
@ printf(" %.35Le\n", C13, C13);
@ printf(" %.35Le\n", D3, D3);
@ printf(" %.35Le\n", D4, D4);
@ printf(" %.35Le\n", D5, D5);
@ printf(" %.35Le\n", D6, D6);
@ printf(" %.35Le\n", D7, D7);
@ printf(" %.35Le\n", D8, D8);
@ printf(" %.35Le\n", D9, D9);
@ printf(" %.35Le\n", D10, D10);
@ printf(" %.35Le\n", D11, D11);
@ printf(" %.35Le\n", D12, D12);
@ printf(" %.35Le\n", D13, D13);
@
@ printf(" %.16e %a\n", INV_L, INV_L);
@ printf(" %.16e %a\n", L2, L2);
@ printf(" %.16e %a\n", A7, A7);
@ printf(" %.16e %a\n", A8, A9);
@ printf(" %.16e %a\n", A9, A9);
@ printf(" %.16e %a\n", A10, A10);
@ printf(" %.16e %a\n", C14, C14);
@ printf(" %.16e %a\n", C15, C15);
@ printf(" %.16e %a\n", C16, C16);
@ printf(" %.16e %a\n", C17, C17);
@ printf(" %.16e %a\n", C18, C18);
@ printf(" %.16e %a\n", D14, D14);
@ printf(" %.16e %a\n", D15, D15);
@ printf(" %.16e %a\n", D16, D16);
@ printf(" %.16e %a\n", D17, D17);
@ }
> Final diff(?).
Just omit some new XXX comments and fix one of the new XXX comments:
> Index: ld128/s_expl.c
> ===================================================================
> --- ld128/s_expl.c (revision 251146)
> +++ ld128/s_expl.c (working copy)
> ...
> @@ -38,35 +40,67 @@
> ...
> +/*
> + * XXX values in hex in comments have been lost (or were never present)
> + * from here.
> + */
Omit.
> +static const long double
> +/*
> + * Domain [-0.002708, 0.002708], range ~[-2.4021e-38, 2.4234e-38]:
> + * |exp(x) - p(x)| < 2**-124.9
> + * (0.002708 is ln2/(2*INTERVALS) rounded up a little).
> + *
> + * XXX the coeffs aren't very carefully rounded, and I get 2.3 more bits.
> + */
Omit the XXX part.
> ...
> @@ -244,18 +287,224 @@
> +static const double
> +T1 = -0.1659, /* ~-30.625/128 * log(2) */
> +T2 = 0.1659; /* ~30.625/128 * log(2) */
> +
> +/*
> + * Split the interval [T1:T2] into two intervals [T1:T3] and [T3:T2].
> + * Setting T3 to 0 would require the |x| < 0x1p-113 condition to appear
> + * in both subintervals, so set T3 = 2**-5, which places the condition
> + * into the [T1:T3] interval.
> + *
> + * XXX we now do this more to (partially) balance the number of terms
> + * in the C and D polys than to avoid checking the conditon in both
> + * intervals.
Merge with the previous comment and remove XXX.
I just noticed that you use a different notation for intervals than me --
[T1:T2] instead of [T1, T2]. The former looks like it is from a
programming language and the latter is normal math notation.
> ...
> +/*
> + * Domain [0.03125, 0.1659], range ~[-2.7676e-37, -1.0367e-38]:
> + * |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-121.44
> + *
> + */
Extra empty line.
Bruce
More information about the freebsd-numerics
mailing list