diff options
Diffstat (limited to 'src/math')
-rw-r--r-- | src/math/__invtrigl.c | 41 | ||||
-rw-r--r-- | src/math/__invtrigl.h | 66 | ||||
-rw-r--r-- | src/math/__rem_pio2f.c | 4 | ||||
-rw-r--r-- | src/math/acos.c | 74 | ||||
-rw-r--r-- | src/math/acosf.c | 77 | ||||
-rw-r--r-- | src/math/acosh.c | 61 | ||||
-rw-r--r-- | src/math/acoshf.c | 47 | ||||
-rw-r--r-- | src/math/acoshl.c | 58 | ||||
-rw-r--r-- | src/math/acosl.c | 61 | ||||
-rw-r--r-- | src/math/asin.c | 71 | ||||
-rw-r--r-- | src/math/asinf.c | 51 | ||||
-rw-r--r-- | src/math/asinh.c | 69 | ||||
-rw-r--r-- | src/math/asinhf.c | 62 | ||||
-rw-r--r-- | src/math/asinhl.c | 72 | ||||
-rw-r--r-- | src/math/asinl.c | 61 | ||||
-rw-r--r-- | src/math/atan.c | 32 | ||||
-rw-r--r-- | src/math/atan2l.c | 40 | ||||
-rw-r--r-- | src/math/atanf.c | 28 | ||||
-rw-r--r-- | src/math/atanh.c | 67 | ||||
-rw-r--r-- | src/math/atanhf.c | 50 | ||||
-rw-r--r-- | src/math/atanhl.c | 69 | ||||
-rw-r--r-- | src/math/atanl.c | 33 | ||||
-rw-r--r-- | src/math/cosh.c | 41 | ||||
-rw-r--r-- | src/math/coshf.c | 37 | ||||
-rw-r--r-- | src/math/coshl.c | 55 | ||||
-rw-r--r-- | src/math/i386/__invtrigl.s | 0 | ||||
-rw-r--r-- | src/math/i386/exp.s | 6 | ||||
-rw-r--r-- | src/math/i386/expl.s | 108 | ||||
-rw-r--r-- | src/math/tgamma.c | 223 | ||||
-rw-r--r-- | src/math/tgammaf.c | 12 | ||||
-rw-r--r-- | src/math/x86_64/__invtrigl.s | 0 |
31 files changed, 794 insertions, 882 deletions
diff --git a/src/math/__invtrigl.c b/src/math/__invtrigl.c index a51330e2..f2d33d3e 100644 --- a/src/math/__invtrigl.c +++ b/src/math/__invtrigl.c @@ -1,36 +1,8 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.c */ -/*- - * Copyright (c) 2008 David Schultz <das@FreeBSD.ORG> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - #include "__invtrigl.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -/* coefficients used in asinl() and acosl() */ -const long double +static const long double pS0 = 1.66666666666666666631e-01L, pS1 = -4.16313987993683104320e-01L, pS2 = 3.69068046323246813704e-01L, @@ -44,9 +16,16 @@ qS3 = -1.68285799854822427013e+00L, qS4 = 3.90699412641738801874e-01L, qS5 = -3.14365703596053263322e-02L; -const long double pi_hi = 3.1415926535897932384626433832795L; -const long double pi_lo = -5.01655761266833202345e-20L; const long double pio2_hi = 1.57079632679489661926L; const long double pio2_lo = -2.50827880633416601173e-20L; +/* used in asinl() and acosl() */ +/* R(x^2) is a rational approximation of (asin(x)-x)/x^3 with Remez algorithm */ +long double __invtrigl_R(long double z) +{ + long double p, q; + p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*(pS5+z*pS6)))))); + q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*(qS4+z*qS5)))); + return p/q; +} #endif diff --git a/src/math/__invtrigl.h b/src/math/__invtrigl.h index 22748b68..cac465c2 100644 --- a/src/math/__invtrigl.h +++ b/src/math/__invtrigl.h @@ -1,68 +1,10 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.h */ -/*- - * Copyright (c) 2008 David Schultz <das@FreeBSD.ORG> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "libm.h" +#include <float.h> #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 - -#define BIAS (LDBL_MAX_EXP - 1) -#define MANH_SIZE LDBL_MANH_SIZE - -/* Constants shared by the long double inverse trig functions. */ -#define pS0 __pS0 -#define pS1 __pS1 -#define pS2 __pS2 -#define pS3 __pS3 -#define pS4 __pS4 -#define pS5 __pS5 -#define pS6 __pS6 -#define qS1 __qS1 -#define qS2 __qS2 -#define qS3 __qS3 -#define qS4 __qS4 -#define qS5 __qS5 -#define pi_hi __pi_hi -#define pi_lo __pi_lo +/* shared by acosl, asinl and atan2l */ #define pio2_hi __pio2_hi #define pio2_lo __pio2_lo +extern const long double pio2_hi, pio2_lo; -extern const long double pS0, pS1, pS2, pS3, pS4, pS5, pS6; -extern const long double qS1, qS2, qS3, qS4, qS5; -extern const long double pi_hi, pi_lo, pio2_hi, pio2_lo; - -static long double P(long double x) -{ - return x * (pS0 + x * (pS1 + x * (pS2 + x * (pS3 + - x * (pS4 + x * (pS5 + x * pS6)))))); -} - -static long double Q(long double x) -{ - return 1.0 + x * (qS1 + x * (qS2 + x * (qS3 + x * (qS4 + x * qS5)))); -} - +long double __invtrigl_R(long double z); #endif diff --git a/src/math/__rem_pio2f.c b/src/math/__rem_pio2f.c index 965dc46a..198ee91b 100644 --- a/src/math/__rem_pio2f.c +++ b/src/math/__rem_pio2f.c @@ -24,7 +24,7 @@ /* * invpio2: 53 bits of 2/pi - * pio2_1: first 33 bit of pi/2 + * pio2_1: first 25 bits of pi/2 * pio2_1t: pi/2 - pio2_1 */ static const double @@ -41,7 +41,7 @@ int __rem_pio2f(float x, double *y) GET_FLOAT_WORD(hx, x); ix = hx & 0x7fffffff; - /* 33+53 bit pi is good enough for medium size */ + /* 25+53 bit pi is good enough for medium size */ if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ STRICT_ASSIGN(double, fn, x*invpio2 + 0x1.8p52); diff --git a/src/math/acos.c b/src/math/acos.c index 0eb15bed..be95d25e 100644 --- a/src/math/acos.c +++ b/src/math/acos.c @@ -36,12 +36,8 @@ #include "libm.h" static const double -pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ -pio2_hi = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ -// FIXME -static const volatile double -pio2_lo = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ -static const double +pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ +pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ @@ -53,49 +49,53 @@ qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ +static double R(double z) +{ + double p, q; + p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); + q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + return p/q; +} + double acos(double x) { - double z,p,q,r,w,s,c,df; - int32_t hx,ix; + double z,w,s,c,df; + uint32_t hx,ix; GET_HIGH_WORD(hx, x); ix = hx & 0x7fffffff; - if (ix >= 0x3ff00000) { /* |x| >= 1 */ + /* |x| >= 1 or nan */ + if (ix >= 0x3ff00000) { uint32_t lx; GET_LOW_WORD(lx,x); - if ((ix-0x3ff00000 | lx) == 0) { /* |x|==1 */ - if (hx > 0) return 0.0; /* acos(1) = 0 */ - return pi + 2.0*pio2_lo; /* acos(-1)= pi */ + if ((ix-0x3ff00000 | lx) == 0) { + /* acos(1)=0, acos(-1)=pi */ + if (hx >> 31) + return 2*pio2_hi + 0x1p-1000; + return 0; } - return (x-x)/(x-x); /* acos(|x|>1) is NaN */ + return 0/(x-x); } - if (ix < 0x3fe00000) { /* |x| < 0.5 */ + /* |x| < 0.5 */ + if (ix < 0x3fe00000) { if (ix <= 0x3c600000) /* |x| < 2**-57 */ - return pio2_hi + pio2_lo; - z = x*x; - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); - r = p/q; - return pio2_hi - (x - (pio2_lo-x*r)); - } else if (hx < 0) { /* x < -0.5 */ + return pio2_hi + 0x1p-1000; + return pio2_hi - (x - (pio2_lo-x*R(x*x))); + } + /* x < -0.5 */ + if (hx >> 31) { z = (1.0+x)*0.5; - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); - s = sqrt(z); - r = p/q; - w = r*s-pio2_lo; - return pi - 2.0*(s+w); - } else { /* x > 0.5 */ - z = (1.0-x)*0.5; s = sqrt(z); - df = s; - SET_LOW_WORD(df,0); - c = (z-df*df)/(s+df); - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); - r = p/q; - w = r*s+c; - return 2.0*(df+w); + w = R(z)*s-pio2_lo; + return 2*(pio2_hi - (s+w)); } + /* x > 0.5 */ + z = (1.0-x)*0.5; + s = sqrt(z); + df = s; + SET_LOW_WORD(df,0); + c = (z-df*df)/(s+df); + w = R(z)*s+c; + return 2*(df+w); } diff --git a/src/math/acosf.c b/src/math/acosf.c index d875f3ef..5d7c0270 100644 --- a/src/math/acosf.c +++ b/src/math/acosf.c @@ -16,59 +16,56 @@ #include "libm.h" static const float -pi = 3.1415925026e+00, /* 0x40490fda */ -pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */ -static const volatile float -pio2_lo = 7.5497894159e-08; /* 0x33a22168 */ -static const float +pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */ +pio2_lo = 7.5497894159e-08, /* 0x33a22168 */ pS0 = 1.6666586697e-01, pS1 = -4.2743422091e-02, pS2 = -8.6563630030e-03, qS1 = -7.0662963390e-01; +static float R(float z) +{ + float p, q; + p = z*(pS0+z*(pS1+z*pS2)); + q = 1.0f+z*qS1; + return p/q; +} + float acosf(float x) { - float z,p,q,r,w,s,c,df; - int32_t hx,ix; + float z,w,s,c,df; + uint32_t hx,ix; GET_FLOAT_WORD(hx, x); ix = hx & 0x7fffffff; - if (ix >= 0x3f800000) { /* |x| >= 1 */ - if (ix == 0x3f800000) { /* |x| == 1 */ - if (hx > 0) return 0.0f; /* acos(1) = 0 */ - return pi + 2.0f*pio2_lo; /* acos(-1)= pi */ + /* |x| >= 1 or nan */ + if (ix >= 0x3f800000) { + if (ix == 0x3f800000) { + if (hx >> 31) + return 2*pio2_hi + 0x1p-120f; + return 0; } - return (x-x)/(x-x); /* acos(|x|>1) is NaN */ + return 0/(x-x); } - if (ix < 0x3f000000) { /* |x| < 0.5 */ + /* |x| < 0.5 */ + if (ix < 0x3f000000) { if (ix <= 0x32800000) /* |x| < 2**-26 */ - return pio2_hi + pio2_lo; - z = x*x; - p = z*(pS0+z*(pS1+z*pS2)); - q = 1.0f+z*qS1; - r = p/q; - return pio2_hi - (x - (pio2_lo-x*r)); - } else if (hx < 0) { /* x < -0.5 */ - z = (1.0f+x)*0.5f; - p = z*(pS0+z*(pS1+z*pS2)); - q = 1.0f+z*qS1; - s = sqrtf(z); - r = p/q; - w = r*s-pio2_lo; - return pi - 2.0f*(s+w); - } else { /* x > 0.5 */ - int32_t idf; - - z = (1.0f-x)*0.5f; + return pio2_hi + 0x1p-120f; + return pio2_hi - (x - (pio2_lo-x*R(x*x))); + } + /* x < -0.5 */ + if (hx >> 31) { + z = (1+x)*0.5f; s = sqrtf(z); - df = s; - GET_FLOAT_WORD(idf,df); - SET_FLOAT_WORD(df,idf&0xfffff000); - c = (z-df*df)/(s+df); - p = z*(pS0+z*(pS1+z*pS2)); - q = 1.0f+z*qS1; - r = p/q; - w = r*s+c; - return 2.0f*(df+w); + w = R(z)*s-pio2_lo; + return 2*(pio2_hi - (s+w)); } + /* x > 0.5 */ + z = (1-x)*0.5f; + s = sqrtf(z); + GET_FLOAT_WORD(hx,s); + SET_FLOAT_WORD(df,hx&0xfffff000); + c = (z-df*df)/(s+df); + w = R(z)*s+c; + return 2*(df+w); } diff --git a/src/math/acosh.c b/src/math/acosh.c index 15f51c6e..4ce9b3d1 100644 --- a/src/math/acosh.c +++ b/src/math/acosh.c @@ -1,54 +1,19 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_acosh.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - */ -/* acosh(x) - * Method : - * Based on - * acosh(x) = log [ x + sqrt(x*x-1) ] - * we have - * acosh(x) := log(x)+ln2, if x is large; else - * acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else - * acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1. - * - * Special cases: - * acosh(x) is NaN with signal if x<1. - * acosh(NaN) is NaN without signal. - */ - #include "libm.h" -static const double -ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */ - +/* acosh(x) = log(x + sqrt(x*x-1)) */ double acosh(double x) { - double t; - int32_t hx; - uint32_t lx; + union {double f; uint64_t i;} u = {.f = x}; + unsigned e = u.i >> 52 & 0x7ff; + + /* x < 1 domain error is handled in the called functions */ - EXTRACT_WORDS(hx, lx, x); - if (hx < 0x3ff00000) { /* x < 1 */ - return (x-x)/(x-x); - } else if (hx >= 0x41b00000) { /* x > 2**28 */ - if (hx >= 0x7ff00000) /* x is inf of NaN */ - return x+x; - return log(x) + ln2; /* acosh(huge) = log(2x) */ - } else if ((hx-0x3ff00000 | lx) == 0) { - return 0.0; /* acosh(1) = 0 */ - } else if (hx > 0x40000000) { /* 2**28 > x > 2 */ - t = x*x; - return log(2.0*x - 1.0/(x+sqrt(t-1.0))); - } else { /* 1 < x < 2 */ - t = x-1.0; - return log1p(t + sqrt(2.0*t+t*t)); - } + if (e < 0x3ff + 1) + /* |x| < 2, up to 2ulp error in [1,1.125] */ + return log1p(x-1 + sqrt((x-1)*(x-1)+2*(x-1))); + if (e < 0x3ff + 26) + /* |x| < 0x1p26 */ + return log(2*x - 1/(x+sqrt(x*x-1))); + /* |x| >= 0x1p26 or nan */ + return log(x) + 0.693147180559945309417232121458176568; } diff --git a/src/math/acoshf.c b/src/math/acoshf.c index 0f7aae2a..4596085e 100644 --- a/src/math/acoshf.c +++ b/src/math/acoshf.c @@ -1,42 +1,17 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_acoshf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - #include "libm.h" -static const float -ln2 = 6.9314718246e-01; /* 0x3f317218 */ - +/* acosh(x) = log(x + sqrt(x*x-1)) */ float acoshf(float x) { - float t; - int32_t hx; + union {float f; int32_t i;} u = {.f = x}; - GET_FLOAT_WORD(hx, x); - if (hx < 0x3f800000) { /* x < 1 */ - return (x-x)/(x-x); - } else if (hx >= 0x4d800000) { /* x > 2**28 */ - if (hx >= 0x7f800000) /* x is inf of NaN */ - return x + x; - return logf(x) + ln2; /* acosh(huge)=log(2x) */ - } else if (hx == 0x3f800000) { - return 0.0f; /* acosh(1) = 0 */ - } else if (hx > 0x40000000) { /* 2**28 > x > 2 */ - t = x*x; - return logf(2.0f*x - 1.0f/(x+sqrtf(t-1.0f))); - } else { /* 1 < x < 2 */ - t = x-1.0f; - return log1pf(t + sqrtf(2.0f*t+t*t)); - } + if (u.i < 0x3f800000+(1<<23)) + /* x < 2, invalid if x < 1 or nan */ + /* up to 2ulp error in [1,1.125] */ + return log1pf(x-1 + sqrtf((x-1)*(x-1)+2*(x-1))); + if (u.i < 0x3f800000+(12<<23)) + /* x < 0x1p12 */ + return logf(2*x - 1/(x+sqrtf(x*x-1))); + /* x >= 0x1p12 */ + return logf(x) + 0.693147180559945309417232121458176568f; } diff --git a/src/math/acoshl.c b/src/math/acoshl.c index a4024516..472c71cb 100644 --- a/src/math/acoshl.c +++ b/src/math/acoshl.c @@ -1,28 +1,3 @@ -/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_acoshl.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* acoshl(x) - * Method : - * Based on - * acoshl(x) = logl [ x + sqrtl(x*x-1) ] - * we have - * acoshl(x) := logl(x)+ln2, if x is large; else - * acoshl(x) := logl(2x-1/(sqrtl(x*x-1)+x)) if x>2; else - * acoshl(x) := log1pl(t+sqrtl(2.0*t+t*t)); where t=x-1. - * - * Special cases: - * acoshl(x) is NaN with signal if x<1. - * acoshl(NaN) is NaN without signal. - */ - #include "libm.h" #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 @@ -31,29 +6,20 @@ long double acoshl(long double x) return acosh(x); } #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -static const long double -ln2 = 6.931471805599453094287e-01L; /* 0x3FFE, 0xB17217F7, 0xD1CF79AC */ - +/* acosh(x) = log(x + sqrt(x*x-1)) */ long double acoshl(long double x) { - long double t; - uint32_t se,i0,i1; + union { + long double f; + struct{uint64_t m; int16_t se; uint16_t pad;} i; + } u = {.f = x}; - GET_LDOUBLE_WORDS(se, i0, i1, x); - if (se < 0x3fff || se & 0x8000) { /* x < 1 */ - return (x-x)/(x-x); - } else if (se >= 0x401d) { /* x > 2**30 */ - if (se >= 0x7fff) /* x is inf or NaN */ - return x+x; - return logl(x) + ln2; /* acoshl(huge) = logl(2x) */ - } else if (((se-0x3fff)|i0|i1) == 0) { - return 0.0; /* acosh(1) = 0 */ - } else if (se > 0x4000) { /* x > 2 */ - t = x*x; - return logl(2.0*x - 1.0/(x + sqrtl(t - 1.0))); - } - /* 1 < x <= 2 */ - t = x - 1.0; - return log1pl(t + sqrtl(2.0*t + t*t)); + if (u.i.se < 0x3fff + 1) + /* x < 2, invalid if x < 1 or nan */ + return log1pl(x-1 + sqrtl((x-1)*(x-1)+2*(x-1))); + if (u.i.se < 0x3fff + 32) + /* x < 0x1p32 */ + return logl(2*x - 1/(x+sqrtl(x*x-1))); + return logl(x) + 0.693147180559945309417232121458176568L; } #endif diff --git a/src/math/acosl.c b/src/math/acosl.c index 83857d49..9e9b01e4 100644 --- a/src/math/acosl.c +++ b/src/math/acosl.c @@ -23,55 +23,46 @@ long double acosl(long double x) } #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 #include "__invtrigl.h" -#define ACOS_CONST (BIAS - 65) /* 2**-65 */ long double acosl(long double x) { union IEEEl2bits u; - long double z, p, q, r, w, s, c, df; + long double z, w, s, c, df; int16_t expsign, expt; u.e = x; expsign = u.xbits.expsign; expt = expsign & 0x7fff; - if (expt >= BIAS) { /* |x| >= 1 */ - if (expt == BIAS && + /* |x| >= 1 or nan */ + if (expt >= 0x3fff) { + if (expt == 0x3fff && ((u.bits.manh & ~LDBL_NBIT) | u.bits.manl) == 0) { if (expsign > 0) - return 0.0; /* acos(1) = 0 */ - else - // FIXME - return pi_hi + 2.0 * pio2_lo; /* acos(-1)= pi */ + return 0; /* acos(1) = 0 */ + return 2*pio2_hi + 0x1p-1000; /* acos(-1)= pi */ } - return (x - x) / (x - x); /* acos(|x|>1) is NaN */ + return 0/(x-x); /* acos(|x|>1) is NaN */ } - if (expt < BIAS - 1) { /* |x| < 0.5 */ - if (expt < ACOS_CONST) - return pio2_hi + pio2_lo; /* x tiny: acosl=pi/2 */ - z = x * x; - p = P(z); - q = Q(z); - r = p / q; - return pio2_hi - (x - (pio2_lo - x * r)); - } else if (expsign < 0) { /* x < -0.5 */ + /* |x| < 0.5 */ + if (expt < 0x3fff - 1) { + if (expt < 0x3fff - 65) + return pio2_hi + 0x1p-1000; /* x < 0x1p-65: acosl(x)=pi/2 */ + return pio2_hi - (x - (pio2_lo - x * __invtrigl_R(x*x))); + } + /* x < -0.5 */ + if (expsign < 0) { z = (1.0 + x) * 0.5; - p = P(z); - q = Q(z); - s = sqrtl(z); - r = p / q; - w = r * s - pio2_lo; - return pi_hi - 2.0 * (s + w); - } else { /* x > 0.5 */ - z = (1.0 - x) * 0.5; s = sqrtl(z); - u.e = s; - u.bits.manl = 0; - df = u.e; - c = (z - df * df) / (s + df); - p = P(z); - q = Q(z); - r = p / q; - w = r * s + c; - return 2.0 * (df + w); + w = __invtrigl_R(z) * s - pio2_lo; + return 2*(pio2_hi - (s + w)); } + /* x > 0.5 */ + z = (1.0 - x) * 0.5; + s = sqrtl(z); + u.e = s; + u.bits.manl = 0; + df = u.e; + c = (z - df * df) / (s + df); + w = __invtrigl_R(z) * s + c; + return 2*(df + w); } #endif diff --git a/src/math/asin.c b/src/math/asin.c index e3d8b250..a1906b08 100644 --- a/src/math/asin.c +++ b/src/math/asin.c @@ -42,10 +42,8 @@ #include "libm.h" static const double -huge = 1.000e+300, pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ -pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ /* coefficients for R(x^2) */ pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ @@ -58,51 +56,54 @@ qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ +static double R(double z) +{ + double p, q; + p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); + q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + return p/q; +} + double asin(double x) { - double t=0.0,w,p,q,c,r,s; - int32_t hx,ix; + double z,r,s; + uint32_t hx,ix; GET_HIGH_WORD(hx, x); ix = hx & 0x7fffffff; - if (ix >= 0x3ff00000) { /* |x|>= 1 */ + /* |x| >= 1 or nan */ + if (ix >= 0x3ff00000) { uint32_t lx; - GET_LOW_WORD(lx, x); if ((ix-0x3ff00000 | lx) == 0) /* asin(1) = +-pi/2 with inexact */ - return x*pio2_hi + x*pio2_lo; - return (x-x)/(x-x); /* asin(|x|>1) is NaN */ - } else if (ix < 0x3fe00000) { /* |x|<0.5 */ - if (ix < 0x3e500000) { /* if |x| < 2**-26 */ - if (huge+x > 1.0) - return x; /* return x with inexact if x!=0*/ + return x*pio2_hi + 0x1p-1000; + return 0/(x-x); + } + /* |x| < 0.5 */ + if (ix < 0x3fe00000) { + if (ix < 0x3e500000) { + /* |x|<0x1p-26, return x with inexact if x!=0*/ + FORCE_EVAL(x + 0x1p1000); + return x; } - t = x*x; - p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); - q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4))); - w = p/q; - return x + x*w; + return x + x*R(x*x); } /* 1 > |x| >= 0.5 */ - w = 1.0 - fabs(x); - t = w*0.5; - p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); - q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4))); - s = sqrt(t); - if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */ - w = p/q; - t = pio2_hi-(2.0*(s+s*w)-pio2_lo); + z = (1 - fabs(x))*0.5; + s = sqrt(z); + r = R(z); + if (ix >= 0x3fef3333) { /* if |x| > 0.975 */ + x = pio2_hi-(2*(s+s*r)-pio2_lo); } else { - w = s; - SET_LOW_WORD(w,0); - c = (t-w*w)/(s+w); - r = p/q; - p = 2.0*s*r-(pio2_lo-2.0*c); - q = pio4_hi - 2.0*w; - t = pio4_hi - (p-q); + double f,c; + /* f+c = sqrt(z) */ + f = s; + SET_LOW_WORD(f,0); + c = (z-f*f)/(s+f); + x = 0.5*pio2_hi - (2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f)); } - if (hx > 0) - return t; - return -t; + if (hx >> 31) + return -x; + return x; } diff --git a/src/math/asinf.c b/src/math/asinf.c index c1c42c7b..462bf043 100644 --- a/src/math/asinf.c +++ b/src/math/asinf.c @@ -12,52 +12,51 @@ * is preserved. * ==================================================== */ - #include "libm.h" +static const double +pio2 = 1.570796326794896558e+00; + static const float -huge = 1.000e+30, /* coefficients for R(x^2) */ pS0 = 1.6666586697e-01, pS1 = -4.2743422091e-02, pS2 = -8.6563630030e-03, qS1 = -7.0662963390e-01; -static const double -pio2 = 1.570796326794896558e+00; +static float R(float z) +{ + float p, q; + p = z*(pS0+z*(pS1+z*pS2)); + q = 1.0f+z*qS1; + return p/q; +} float asinf(float x) { double s; - float t,w,p,q; - int32_t hx,ix; + float z; + uint32_t hx,ix; GET_FLOAT_WORD(hx, x); ix = hx & 0x7fffffff; if (ix >= 0x3f800000) { /* |x| >= 1 */ if (ix == 0x3f800000) /* |x| == 1 */ - return x*pio2; /* asin(+-1) = +-pi/2 with inexact */ - return (x-x)/(x-x); /* asin(|x|>1) is NaN */ - } else if (ix < 0x3f000000) { /* |x|<0.5 */ + return x*pio2 + 0x1p-120f; /* asin(+-1) = +-pi/2 with inexact */ + return 0/(x-x); /* asin(|x|>1) is NaN */ + } + if (ix < 0x3f000000) { /* |x| < 0.5 */ if (ix < 0x39800000) { /* |x| < 2**-12 */ - if (huge+x > 1.0f) - return x; /* return x with inexact if x!=0 */ + FORCE_EVAL(x + 0x1p120f); + return x; /* return x with inexact if x!=0 */ } - t = x*x; - p = t*(pS0+t*(pS1+t*pS2)); - q = 1.0f+t*qS1; - w = p/q; - return x + x*w; + return x + x*R(x*x); } /* 1 > |x| >= 0.5 */ - w = 1.0f - fabsf(x); - t = w*0.5f; - p = t*(pS0+t*(pS1+t*pS2)); - q = 1.0f+t*qS1; - s = sqrt(t); - w = p/q; - t = pio2-2.0*(s+s*w); - if (hx > 0) - return t; - return -t; + z = (1 - fabsf(x))*0.5f; + s = sqrt(z); + x = pio2 - 2*(s+s*R(z)); + if (hx >> 31) + return -x; + return x; } diff --git a/src/math/asinh.c b/src/math/asinh.c index 11bbd71a..4152dc39 100644 --- a/src/math/asinh.c +++ b/src/math/asinh.c @@ -1,55 +1,28 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_asinh.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* asinh(x) - * Method : - * Based on - * asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] - * we have - * asinh(x) := x if 1+x*x=1, - * := sign(x)*(log(x)+ln2)) for large |x|, else - * := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else - * := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2))) - */ - #include "libm.h" -static const double -ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ -huge= 1.00000000000000000000e+300; - +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ double asinh(double x) { - double t,w; - int32_t hx,ix; + union {double f; uint64_t i;} u = {.f = x}; + unsigned e = u.i >> 52 & 0x7ff; + unsigned s = u.i >> 63; - GET_HIGH_WORD(hx, x); - ix = hx & 0x7fffffff; - if (ix >= 0x7ff00000) /* x is inf or NaN */ - return x+x; - if (ix < 0x3e300000) { /* |x| < 2**-28 */ - /* return x inexact except 0 */ - if (huge+x > 1.0) - return x; - } - if (ix > 0x41b00000) { /* |x| > 2**28 */ - w = log(fabs(x)) + ln2; - } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */ - t = fabs(x); - w = log(2.0*t + 1.0/(sqrt(x*x+1.0)+t)); - } else { /* 2.0 > |x| > 2**-28 */ - t = x*x; - w =log1p(fabs(x) + t/(1.0+sqrt(1.0+t))); + /* |x| */ + u.i &= (uint64_t)-1/2; + x = u.f; + + if (e >= 0x3ff + 26) { + /* |x| >= 0x1p26 or inf or nan */ + x = log(x) + 0.693147180559945309417232121458176568; + } else if (e >= 0x3ff + 1) { + /* |x| >= 2 */ + x = log(2*x + 1/(sqrt(x*x+1)+x)); + } else if (e >= 0x3ff - 26) { + /* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */ + x = log1p(x + x*x/(sqrt(x*x+1)+1)); + } else { + /* |x| < 0x1p-26, raise inexact if x != 0 */ + FORCE_EVAL(x + 0x1p1000); } - if (hx > 0) - return w; - return -w; + return s ? -x : x; } diff --git a/src/math/asinhf.c b/src/math/asinhf.c index efe3af94..fc9f0911 100644 --- a/src/math/asinhf.c +++ b/src/math/asinhf.c @@ -1,48 +1,28 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_asinhf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - #include "libm.h" -static const float -ln2 = 6.9314718246e-01, /* 0x3f317218 */ -huge= 1.0000000000e+30; - +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ float asinhf(float x) { - float t,w; - int32_t hx,ix; + union {float f; uint32_t i;} u = {.f = x}; + uint32_t i = u.i & 0x7fffffff; + unsigned s = u.i >> 31; - GET_FLOAT_WORD(hx, x); - ix = hx & 0x7fffffff; - if (ix >= 0x7f800000) /* x is inf or NaN */ - return x+x; - if (ix < 0x31800000) { /* |x| < 2**-28 */ - /* return x inexact except 0 */ - if (huge+x > 1.0f) - return x; - } - if (ix > 0x4d800000) { /* |x| > 2**28 */ - w = logf(fabsf(x)) + ln2; - } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */ - t = fabsf(x); - w = logf(2.0f*t + 1.0f/(sqrtf(x*x+1.0f)+t)); - } else { /* 2.0 > |x| > 2**-28 */ - t = x*x; - w =log1pf(fabsf(x) + t/(1.0f+sqrtf(1.0f+t))); + /* |x| */ + u.i = i; + x = u.f; + + if (i >= 0x3f800000 + (12<<23)) { + /* |x| >= 0x1p12 or inf or nan */ + x = logf(x) + 0.693147180559945309417232121458176568f; + } else if (i >= 0x3f800000 + (1<<23)) { + /* |x| >= 2 */ + x = logf(2*x + 1/(sqrtf(x*x+1)+x)); + } else if (i >= 0x3f800000 - (12<<23)) { + /* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */ + x = log1pf(x + x*x/(sqrtf(x*x+1)+1)); + } else { + /* |x| < 0x1p-12, raise inexact if x!=0 */ + FORCE_EVAL(x + 0x1p120f); } - if (hx > 0) - return w; - return -w; + return s ? -x : x; } diff --git a/src/math/asinhl.c b/src/math/asinhl.c index dc5dd71f..db966246 100644 --- a/src/math/asinhl.c +++ b/src/math/asinhl.c @@ -1,25 +1,3 @@ -/* origin: OpenBSD /usr/src/lib/libm/src/ld80/s_asinhl.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* asinhl(x) - * Method : - * Based on - * asinhl(x) = signl(x) * logl [ |x| + sqrtl(x*x+1) ] - * we have - * asinhl(x) := x if 1+x*x=1, - * := signl(x)*(logl(x)+ln2)) for large |x|, else - * := signl(x)*logl(2|x|+1/(|x|+sqrtl(x*x+1))) if|x|>2, else - * := signl(x)*log1pl(|x| + x^2/(1 + sqrtl(1+x^2))) - */ - #include "libm.h" #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 @@ -28,35 +6,33 @@ long double asinhl(long double x) return asinh(x); } #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -static const long double -ln2 = 6.931471805599453094287e-01L, /* 0x3FFE, 0xB17217F7, 0xD1CF79AC */ -huge = 1.000000000000000000e+4900L; - +/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ long double asinhl(long double x) { - long double t,w; - int32_t hx,ix; + union { + long double f; + struct{uint64_t m; uint16_t se; uint16_t pad;} i; + } u = {.f = x}; + unsigned e = u.i.se & 0x7fff; + unsigned s = u.i.se >> 15; - GET_LDOUBLE_EXP(hx, x); - ix = hx & 0x7fff; - if (ix == 0x7fff) - return x + x; /* x is inf or NaN */ - if (ix < 0x3fde) { /* |x| < 2**-34 */ - /* return x, raise inexact if x != 0 */ - if (huge+x > 1.0) - return x; - } - if (ix > 0x4020) { /* |x| > 2**34 */ - w = logl(fabsl(x)) + ln2; - } else if (ix > 0x4000) { /* 2**34 > |x| > 2.0 */ - t = fabsl(x); - w = logl(2.0*t + 1.0/(sqrtl(x*x + 1.0) + t)); - } else { /* 2.0 > |x| > 2**-28 */ - t = x*x; - w =log1pl(fabsl(x) + t/(1.0 + sqrtl(1.0 + t))); + /* |x| */ + u.i.se = e; + x = u.f; + + if (e >= 0x3fff + 32) { + /* |x| >= 0x1p32 or inf or nan */ + x = logl(x) + 0.693147180559945309417232121458176568L; + } else if (e >= 0x3fff + 1) { + /* |x| >= 2 */ + x = logl(2*x + 1/(sqrtl(x*x+1)+x)); + } else if (e >= 0x3fff - 32) { + /* |x| >= 0x1p-32 */ + x = log1pl(x + x*x/(sqrtl(x*x+1)+1)); + } else { + /* |x| < 0x1p-32, raise inexact if x!=0 */ + FORCE_EVAL(x + 0x1p1000); } - if (hx & 0x8000) - return -w; - return w; + return s ? -x : x; } #endif diff --git a/src/math/asinl.c b/src/math/asinl.c index 7572767c..0ef9853b 100644 --- a/src/math/asinl.c +++ b/src/math/asinl.c @@ -23,60 +23,49 @@ long double asinl(long double x) } #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 #include "__invtrigl.h" -static const long double huge = 1.000e+300; -static const long double pio4_hi = 7.85398163397448309628e-01L; -#define ASIN_LINEAR (BIAS - 32) /* 2**-32 */ /* 0.95 */ -#define THRESH ((0xe666666666666666ULL>>(64-(MANH_SIZE-1)))|LDBL_NBIT) +#define THRESH ((0xe666666666666666ULL>>(64-(LDBL_MANH_SIZE-1)))|LDBL_NBIT) long double asinl(long double x) { union IEEEl2bits u; - long double t=0.0,w,p,q,c,r,s; - int16_t expsign, expt; + long double z,r,s; + uint16_t expsign, expt; u.e = x; expsign = u.xbits.expsign; expt = expsign & 0x7fff; - if (expt >= BIAS) { /* |x|>= 1 */ - if (expt == BIAS && + if (expt >= 0x3fff) { /* |x| >= 1 or nan */ + if (expt == 0x3fff && ((u.bits.manh&~LDBL_NBIT)|u.bits.manl) == 0) - /* asin(1)=+-pi/2 with inexact */ - return x*pio2_hi + x*pio2_lo; - return (x-x)/(x-x); /* asin(|x|>1) is NaN */ - } else if (expt < BIAS-1) { /* |x|<0.5 */ - if (expt < ASIN_LINEAR) { /* if |x| is small, asinl(x)=x */ + /* asin(+-1)=+-pi/2 with inexact */ + return x*pio2_hi + 0x1p-1000; + return 0/(x-x); + } + if (expt < 0x3fff - 1) { /* |x| < 0.5 */ + if (expt < 0x3fff - 32) { /* |x|<0x1p-32, asinl(x)=x */ /* return x with inexact if x!=0 */ - if (huge+x > 1.0) - return x; + FORCE_EVAL(x + 0x1p1000); + return x; } - t = x*x; - p = P(t); - q = Q(t); - w = p/q; - return x + x*w; + return x + x*__invtrigl_R(x*x); } /* 1 > |x| >= 0.5 */ - w = 1.0 - fabsl(x); - t = w*0.5; - p = P(t); - q = Q(t); - s = sqrtl(t); + z = (1.0 - fabsl(x))*0.5; + s = sqrtl(z); + r = __invtrigl_R(z); if (u.bits.manh >= THRESH) { /* if |x| is close to 1 */ - w = p/q; - t = pio2_hi-(2.0*(s+s*w)-pio2_lo); + x = pio2_hi - (2*(s+s*r)-pio2_lo); } else { + long double f, c; u.e = s; u.bits.manl = 0; - w = u.e; - c = (t-w*w)/(s+w); - r = p/q; - p = 2.0*s*r-(pio2_lo-2.0*c); - q = pio4_hi-2.0*w; - t = pio4_hi-(p-q); + f = u.e; + c = (z-f*f)/(s+f); + x = 0.5*pio2_hi-(2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f)); } - if (expsign > 0) - return t; - return -t; + if (expsign>>15) + return -x; + return x; } #endif diff --git a/src/math/atan.c b/src/math/atan.c index f34551c7..7fd8a3b8 100644 --- a/src/math/atan.c +++ b/src/math/atan.c @@ -60,32 +60,26 @@ static const double aT[] = { 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */ }; -static const double huge = 1.0e300; - double atan(double x) { double w,s1,s2,z; - int32_t ix,hx,id; + uint32_t ix,sign; + int id; - GET_HIGH_WORD(hx, x); - ix = hx & 0x7fffffff; + GET_HIGH_WORD(ix, x); + sign = ix >> 31; + ix &= 0x7fffffff; if (ix >= 0x44100000) { /* if |x| >= 2^66 */ - uint32_t low; - - GET_LOW_WORD(low, x); - if (ix > 0x7ff00000 || - (ix == 0x7ff00000 && low != 0)) /* NaN */ - return x+x; - if (hx > 0) - return atanhi[3] + *(volatile double *)&atanlo[3]; - else - return -atanhi[3] - *(volatile double *)&atanlo[3]; + if (isnan(x)) + return x; + z = atanhi[3] + 0x1p-1000; + return sign ? -z : z; } if (ix < 0x3fdc0000) { /* |x| < 0.4375 */ if (ix < 0x3e400000) { /* |x| < 2^-27 */ - /* raise inexact */ - if (huge+x > 1.0) - return x; + /* raise inexact if x!=0 */ + FORCE_EVAL(x + 0x1p1000); + return x; } id = -1; } else { @@ -117,5 +111,5 @@ double atan(double x) if (id < 0) return x - x*(s1+s2); z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x); - return hx < 0 ? -z : z; + return sign ? -z : z; } diff --git a/src/math/atan2l.c b/src/math/atan2l.c index 45cbfcc1..e86dbffb 100644 --- a/src/math/atan2l.c +++ b/src/math/atan2l.c @@ -24,8 +24,6 @@ long double atan2l(long double y, long double x) } #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 #include "__invtrigl.h" -// FIXME: -static const volatile long double tiny = 1.0e-300; long double atan2l(long double y, long double x) { @@ -40,12 +38,12 @@ long double atan2l(long double y, long double x) ux.e = x; expsignx = ux.xbits.expsign; exptx = expsignx & 0x7fff; - if ((exptx==BIAS+LDBL_MAX_EXP && + if ((exptx==0x7fff && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)!=0) || /* x is NaN */ - (expty==BIAS+LDBL_MAX_EXP && + (expty==0x7fff && ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0)) /* y is NaN */ return x+y; - if (expsignx==BIAS && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */ + if (expsignx==0x3fff && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */ return atanl(y); m = ((expsigny>>15)&1) | ((expsignx>>14)&2); /* 2*sign(x)+sign(y) */ @@ -54,39 +52,39 @@ long double atan2l(long double y, long double x) switch(m) { case 0: case 1: return y; /* atan(+-0,+anything)=+-0 */ - case 2: return pi_hi+tiny; /* atan(+0,-anything) = pi */ - case 3: return -pi_hi-tiny; /* atan(-0,-anything) =-pi */ + case 2: return 2*pio2_hi+0x1p-1000; /* atan(+0,-anything) = pi */ + case 3: return -2*pio2_hi-0x1p-1000; /* atan(-0,-anything) =-pi */ } } /* when x = 0 */ if (exptx==0 && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) - return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny; + return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000; /* when x is INF */ - if (exptx == BIAS+LDBL_MAX_EXP) { - if (expty == BIAS+LDBL_MAX_EXP) { + if (exptx == 0x7fff) { + if (expty == 0x7fff) { switch(m) { - case 0: return pio2_hi*0.5+tiny; /* atan(+INF,+INF) */ - case 1: return -pio2_hi*0.5-tiny; /* atan(-INF,+INF) */ - case 2: return 1.5*pio2_hi+tiny; /* atan(+INF,-INF) */ - case 3: return -1.5*pio2_hi-tiny; /* atan(-INF,-INF) */ + case 0: return pio2_hi*0.5+0x1p-1000; /* atan(+INF,+INF) */ + case 1: return -pio2_hi*0.5-0x1p-1000; /* atan(-INF,+INF) */ + case 2: return 1.5*pio2_hi+0x1p-1000; /* atan(+INF,-INF) */ + case 3: return -1.5*pio2_hi-0x1p-1000; /* atan(-INF,-INF) */ } } else { switch(m) { case 0: return 0.0; /* atan(+...,+INF) */ case 1: return -0.0; /* atan(-...,+INF) */ - case 2: return pi_hi+tiny; /* atan(+...,-INF) */ - case 3: return -pi_hi-tiny; /* atan(-...,-INF) */ + case 2: return 2*pio2_hi+0x1p-1000; /* atan(+...,-INF) */ + case 3: return -2*pio2_hi-0x1p-1000; /* atan(-...,-INF) */ } } } /* when y is INF */ - if (expty == BIAS+LDBL_MAX_EXP) - return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny; + if (expty == 0x7fff) + return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000; /* compute y/x */ k = expty-exptx; if(k > LDBL_MANT_DIG+2) { /* |y/x| huge */ - z = pio2_hi+pio2_lo; + z = pio2_hi+0x1p-1000; m &= 1; } else if (expsignx < 0 && k < -LDBL_MANT_DIG-2) /* |y/x| tiny, x<0 */ z = 0.0; @@ -95,9 +93,9 @@ long double atan2l(long double y, long double x) switch (m) { case 0: return z; /* atan(+,+) */ case 1: return -z; /* atan(-,+) */ - case 2: return pi_hi-(z-pi_lo); /* atan(+,-) */ + case 2: return 2*pio2_hi-(z-2*pio2_lo); /* atan(+,-) */ default: /* case 3 */ - return (z-pi_lo)-pi_hi; /* atan(-,-) */ + return (z-2*pio2_lo)-2*pio2_hi; /* atan(-,-) */ } } #endif diff --git a/src/math/atanf.c b/src/math/atanf.c index 4e31b902..4b59509a 100644 --- a/src/math/atanf.c +++ b/src/math/atanf.c @@ -38,28 +38,26 @@ static const float aT[] = { 6.1687607318e-02, }; -static const float huge = 1.0e30; - float atanf(float x) { float w,s1,s2,z; - int32_t ix,hx,id; + uint32_t ix,sign; + int id; - GET_FLOAT_WORD(hx, x); - ix = hx & 0x7fffffff; + GET_FLOAT_WORD(ix, x); + sign = ix>>31; + ix &= 0x7fffffff; if (ix >= 0x4c800000) { /* if |x| >= 2**26 */ - if (ix > 0x7f800000) /* NaN */ - return x+x; - if (hx > 0) - return atanhi[3] + *(volatile float *)&atanlo[3]; - else - return -atanhi[3] - *(volatile float *)&atanlo[3]; + if (isnan(x)) + return x; + z = atanhi[3] + 0x1p-120f; + return sign ? -z : z; } if (ix < 0x3ee00000) { /* |x| < 0.4375 */ if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact */ - if(huge+x>1.0f) - return x; + /* raise inexact if x!=0 */ + FORCE_EVAL(x + 0x1p120f); + return x; } id = -1; } else { @@ -91,5 +89,5 @@ float atanf(float x) if (id < 0) return x - x*(s1+s2); z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); - return hx < 0 ? -z : z; + return sign ? -z : z; } diff --git a/src/math/atanh.c b/src/math/atanh.c index dbe241d1..84a84c69 100644 --- a/src/math/atanh.c +++ b/src/math/atanh.c @@ -1,58 +1,21 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_atanh.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - */ -/* atanh(x) - * Method : - * 1.Reduced x to positive by atanh(-x) = -atanh(x) - * 2.For x>=0.5 - * 1 2x x - * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) - * 2 1 - x 1 - x - * - * For x<0.5 - * atanh(x) = 0.5*log1p(2x+2x*x/(1-x)) - * - * Special cases: - * atanh(x) is NaN if |x| > 1 with signal; - * atanh(NaN) is that NaN with no signal; - * atanh(+-1) is +-INF with signal. - * - */ - #include "libm.h" -static const double huge = 1e300; - +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ double atanh(double x) { - double t; - int32_t hx,ix; - uint32_t lx; + union {double f; uint64_t i;} u = {.f = x}; + unsigned e = u.i >> 52 & 0x7ff; + unsigned s = u.i >> 63; + + /* |x| */ + u.i &= (uint64_t)-1/2; + x = u.f; - EXTRACT_WORDS(hx, lx, x); - ix = hx & 0x7fffffff; - if ((ix | ((lx|-lx)>>31)) > 0x3ff00000) /* |x| > 1 */ - return (x-x)/(x-x); - if (ix == 0x3ff00000) - return x/0.0; - if (ix < 0x3e300000 && (huge+x) > 0.0) /* x < 2**-28 */ - return x; - SET_HIGH_WORD(x, ix); - if (ix < 0x3fe00000) { /* x < 0.5 */ - t = x+x; - t = 0.5*log1p(t + t*x/(1.0-x)); - } else - t = 0.5*log1p((x+x)/(1.0-x)); - if (hx >= 0) - return t; - return -t; + if (e < 0x3ff - 1) { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5*log1p(2*x + 2*x*x/(1-x)); + } else { + x = 0.5*log1p(2*x/(1-x)); + } + return s ? -x : x; } diff --git a/src/math/atanhf.c b/src/math/atanhf.c index 2be780bb..ca106efc 100644 --- a/src/math/atanhf.c +++ b/src/math/atanhf.c @@ -1,42 +1,20 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_atanhf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - #include "libm.h" -static const float huge = 1e30; - +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ float atanhf(float x) { - float t; - int32_t hx,ix; + union {float f; uint32_t i;} u = {.f = x}; + unsigned s = u.i >> 31; + + /* |x| */ + u.i &= 0x7fffffff; + x = u.f; - GET_FLOAT_WORD(hx, x); - ix = hx & 0x7fffffff; - if (ix > 0x3f800000) /* |x| > 1 */ - return (x-x)/(x-x); - if (ix == 0x3f800000) - return x/0.0f; - if (ix < 0x31800000 && huge+x > 0.0f) /* x < 2**-28 */ - return x; - SET_FLOAT_WORD(x, ix); - if (ix < 0x3f000000) { /* x < 0.5 */ - t = x+x; - t = 0.5f*log1pf(t + t*x/(1.0f-x)); - } else - t = 0.5f*log1pf((x+x)/(1.0f-x)); - if (hx >= 0) - return t; - return -t; + if (u.i < 0x3f800000 - (1<<23)) { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5f*log1pf(2*x + 2*x*x/(1-x)); + } else { + x = 0.5f*log1pf(2*x/(1-x)); + } + return s ? -x : x; } diff --git a/src/math/atanhl.c b/src/math/atanhl.c index 931bae32..b4c5e58b 100644 --- a/src/math/atanhl.c +++ b/src/math/atanhl.c @@ -1,31 +1,3 @@ -/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_atanh.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* atanhl(x) - * Method : - * 1.Reduced x to positive by atanh(-x) = -atanh(x) - * 2.For x>=0.5 - * 1 2x x - * atanhl(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) - * 2 1 - x 1 - x - * - * For x<0.5 - * atanhl(x) = 0.5*log1pl(2x+2x*x/(1-x)) - * - * Special cases: - * atanhl(x) is NaN if |x| > 1 with signal; - * atanhl(NaN) is that NaN with no signal; - * atanhl(+-1) is +-INF with signal. - */ - #include "libm.h" #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 @@ -34,31 +6,26 @@ long double atanhl(long double x) return atanh(x); } #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -static const long double huge = 1e4900L; - +/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ long double atanhl(long double x) { - long double t; - int32_t ix; - uint32_t se,i0,i1; + union { + long double f; + struct{uint64_t m; uint16_t se; uint16_t pad;} i; + } u = {.f = x}; + unsigned e = u.i.se & 0x7fff; + unsigned s = u.i.se >> 15; + + /* |x| */ + u.i.se = e; + x = u.f; - GET_LDOUBLE_WORDS(se, i0, i1, x); - ix = se & 0x7fff; - if ((ix+((((i0&0x7fffffff)|i1)|(-((i0&0x7fffffff)|i1)))>>31)) > 0x3fff) - /* |x| > 1 */ - return (x-x)/(x-x); - if (ix == 0x3fff) - return x/0.0; - if (ix < 0x3fe3 && huge+x > 0.0) /* x < 2**-28 */ - return x; - SET_LDOUBLE_EXP(x, ix); - if (ix < 0x3ffe) { /* x < 0.5 */ - t = x + x; - t = 0.5*log1pl(t + t*x/(1.0 - x)); - } else - t = 0.5*log1pl((x + x)/(1.0 - x)); - if (se <= 0x7fff) - return t; - return -t; + if (e < 0x3fff - 1) { + /* |x| < 0.5, up to 1.7ulp error */ + x = 0.5*log1pl(2*x + 2*x*x/(1-x)); + } else { + x = 0.5*log1pl(2*x/(1-x)); + } + return s ? -x : x; } #endif diff --git a/src/math/atanl.c b/src/math/atanl.c index 33ecff0d..6a480a7a 100644 --- a/src/math/atanl.c +++ b/src/math/atanl.c @@ -22,11 +22,6 @@ long double atanl(long double x) return atan(x); } #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 -#include "__invtrigl.h" - -#define ATAN_CONST (BIAS + 65) /* 2**65 */ -#define ATAN_LINEAR (BIAS - 32) /* 2**-32 */ -static const long double huge = 1.0e300; static const long double atanhi[] = { 4.63647609000806116202e-01L, @@ -81,29 +76,27 @@ long double atanl(long double x) u.e = x; expsign = u.xbits.expsign; expt = expsign & 0x7fff; - if (expt >= ATAN_CONST) { /* if |x| is large, atan(x)~=pi/2 */ - if (expt == BIAS + LDBL_MAX_EXP && + if (expt >= 0x3fff + 65) { /* if |x| is large, atan(x)~=pi/2 */ + if (expt == 0x7fff && ((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0) /* NaN */ return x+x; - if (expsign > 0) - return atanhi[3]+atanlo[3]; - else - return -atanhi[3]-atanlo[3]; + z = atanhi[3] + 0x1p-1000; + return expsign < 0 ? -z : z; } /* Extract the exponent and the first few bits of the mantissa. */ /* XXX There should be a more convenient way to do this. */ - expman = (expt << 8) | ((u.bits.manh >> (MANH_SIZE - 9)) & 0xff); - if (expman < ((BIAS - 2) << 8) + 0xc0) { /* |x| < 0.4375 */ - if (expt < ATAN_LINEAR) { /* if |x| is small, atanl(x)~=x */ - /* raise inexact */ - if (huge+x > 1.0) - return x; + expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff); + if (expman < ((0x3fff - 2) << 8) + 0xc0) { /* |x| < 0.4375 */ + if (expt < 0x3fff - 32) { /* if |x| is small, atanl(x)~=x */ + /* raise inexact if x!=0 */ + FORCE_EVAL(x + 0x1p1000); + return x; } id = -1; } else { x = fabsl(x); - if (expman < (BIAS << 8) + 0x30) { /* |x| < 1.1875 */ - if (expman < ((BIAS - 1) << 8) + 0x60) { /* 7/16 <= |x| < 11/16 */ + if (expman < (0x3fff << 8) + 0x30) { /* |x| < 1.1875 */ + if (expman < ((0x3fff - 1) << 8) + 0x60) { /* 7/16 <= |x| < 11/16 */ id = 0; x = (2.0*x-1.0)/(2.0+x); } else { /* 11/16 <= |x| < 19/16 */ @@ -111,7 +104,7 @@ long double atanl(long double x) x = (x-1.0)/(x+1.0); } } else { - if (expman < ((BIAS + 1) << 8) + 0x38) { /* |x| < 2.4375 */ + if (expman < ((0x3fff + 1) << 8) + 0x38) { /* |x| < 2.4375 */ id = 2; x = (x-1.5)/(1.0+1.5*x); } else { /* 2.4375 <= |x| < 2^ATAN_CONST */ diff --git a/src/math/cosh.c b/src/math/cosh.c index a1f7dbc7..2fcdea8f 100644 --- a/src/math/cosh.c +++ b/src/math/cosh.c @@ -23,7 +23,7 @@ * 2 * 22 <= x <= lnovft : cosh(x) := exp(x)/2 * lnovft <= x <= ln2ovft: cosh(x) := exp(x/2)/2 * exp(x/2) - * ln2ovft < x : cosh(x) := huge*huge (overflow) + * ln2ovft < x : cosh(x) := inf (overflow) * * Special cases: * cosh(x) is |x| if x is +INF, -INF, or NaN. @@ -32,43 +32,40 @@ #include "libm.h" -static const double huge = 1.0e300; - double cosh(double x) { - double t, w; - int32_t ix; - - GET_HIGH_WORD(ix, x); - ix &= 0x7fffffff; + union {double f; uint64_t i;} u = {.f = x}; + uint32_t ix; + double t; - /* x is INF or NaN */ - if (ix >= 0x7ff00000) - return x*x; + /* |x| */ + u.i &= (uint64_t)-1/2; + x = u.f; + ix = u.i >> 32; /* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */ if (ix < 0x3fd62e43) { - t = expm1(fabs(x)); - w = 1.0+t; + t = expm1(x); if (ix < 0x3c800000) - return w; /* cosh(tiny) = 1 */ - return 1.0 + (t*t)/(w+w); + return 1; + return 1 + t*t/(2*(1+t)); } /* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|))/2; */ if (ix < 0x40360000) { - t = exp(fabs(x)); + t = exp(x); return 0.5*t + 0.5/t; } /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */ - if (ix < 0x40862E42) - return 0.5*exp(fabs(x)); + if (ix < 0x40862e42) + return 0.5*exp(x); /* |x| in [log(maxdouble), overflowthresold] */ - if (ix <= 0x408633CE) - return __expo2(fabs(x)); + if (ix <= 0x408633ce) + return __expo2(x); - /* |x| > overflowthresold, cosh(x) overflow */ - return huge*huge; + /* overflow (or nan) */ + x *= 0x1p1023; + return x; } diff --git a/src/math/coshf.c b/src/math/coshf.c index 97318f10..2bed1258 100644 --- a/src/math/coshf.c +++ b/src/math/coshf.c @@ -15,43 +15,40 @@ #include "libm.h" -static const float huge = 1.0e30; - float coshf(float x) { - float t, w; - int32_t ix; - - GET_FLOAT_WORD(ix, x); - ix &= 0x7fffffff; + union {float f; uint32_t i;} u = {.f = x}; + uint32_t ix; + float t; - /* x is INF or NaN */ - if (ix >= 0x7f800000) - return x*x; + /* |x| */ + u.i &= 0x7fffffff; + x = u.f; + ix = u.i; /* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */ if (ix < 0x3eb17218) { - t = expm1f(fabsf(x)); - w = 1.0f+t; - if (ix<0x39800000) - return 1.0f; /* cosh(tiny) = 1 */ - return 1.0f + (t*t)/(w+w); + t = expm1f(x); + if (ix < 0x39800000) + return 1; + return 1 + t*t/(2*(1+t)); } /* |x| in [0.5*ln2,9], return (exp(|x|)+1/exp(|x|))/2; */ if (ix < 0x41100000) { - t = expf(fabsf(x)); + t = expf(x); return 0.5f*t + 0.5f/t; } /* |x| in [9, log(maxfloat)] return 0.5f*exp(|x|) */ if (ix < 0x42b17217) - return 0.5f*expf(fabsf(x)); + return 0.5f*expf(x); /* |x| in [log(maxfloat), overflowthresold] */ if (ix <= 0x42b2d4fc) - return __expo2f(fabsf(x)); + return __expo2f(x); - /* |x| > overflowthresold, cosh(x) overflow */ - return huge*huge; + /* |x| > overflowthresold or nan */ + x *= 0x1p127f; + return x; } diff --git a/src/math/coshl.c b/src/math/coshl.c index 36b52c02..3ea56e00 100644 --- a/src/math/coshl.c +++ b/src/math/coshl.c @@ -23,7 +23,7 @@ * 2 * 22 <= x <= lnovft : coshl(x) := expl(x)/2 * lnovft <= x <= ln2ovft: coshl(x) := expl(x/2)/2 * expl(x/2) - * ln2ovft < x : coshl(x) := huge*huge (overflow) + * ln2ovft < x : coshl(x) := inf (overflow) * * Special cases: * coshl(x) is |x| if x is +INF, -INF, or NaN. @@ -38,49 +38,48 @@ long double coshl(long double x) return cosh(x); } #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -static const long double huge = 1.0e4900L; - long double coshl(long double x) { - long double t,w; - int32_t ex; + union { + long double f; + struct{uint64_t m; uint16_t se; uint16_t pad;} i; + } u = {.f = x}; + unsigned ex = u.i.se & 0x7fff; + long double t; uint32_t mx,lx; - /* High word of |x|. */ - GET_LDOUBLE_WORDS(ex, mx, lx, x); - ex &= 0x7fff; - - /* x is INF or NaN */ - if (ex == 0x7fff) return x*x; + /* |x| */ + u.i.se = ex; + x = u.f; + mx = u.i.m >> 32; + lx = u.i.m; /* |x| in [0,0.5*ln2], return 1+expm1l(|x|)^2/(2*expl(|x|)) */ - if (ex < 0x3ffd || (ex == 0x3ffd && mx < 0xb17217f7u)) { - t = expm1l(fabsl(x)); - w = 1.0 + t; - if (ex < 0x3fbc) return w; /* cosh(tiny) = 1 */ - return 1.0+(t*t)/(w+w); + if (ex < 0x3fff-2 || (ex == 0x3fff-2 && mx < 0xb17217f7)) { + t = expm1l(x); + if (ex < 0x3fff-64) + return 1; + return 1 + t*t/(2*(1+t)); } /* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|)/2; */ - if (ex < 0x4003 || (ex == 0x4003 && mx < 0xb0000000u)) { - t = expl(fabsl(x)); + if (ex < 0x3fff+4 || (ex == 0x3fff+4 && mx < 0xb0000000)) { + t = expl(x); return 0.5*t + 0.5/t; } /* |x| in [22, ln(maxdouble)] return 0.5*exp(|x|) */ - if (ex < 0x400c || (ex == 0x400c && mx < 0xb1700000u)) - return 0.5*expl(fabsl(x)); + if (ex < 0x3fff+13 || (ex == 0x3fff+13 && mx < 0xb1700000)) + return 0.5*expl(x); /* |x| in [log(maxdouble), log(2*maxdouble)) */ - if (ex == 0x400c && (mx < 0xb174ddc0u || - (mx == 0xb174ddc0u && lx < 0x31aec0ebu))) - { - w = expl(0.5*fabsl(x)); - t = 0.5*w; - return t*w; + if (ex == 0x3fff+13 && (mx < 0xb174ddc0 || + (mx == 0xb174ddc0 && lx < 0x31aec0eb))) { + t = expl(0.5*x); + return 0.5*t*t; } - /* |x| >= log(2*maxdouble), cosh(x) overflow */ - return huge*huge; + /* |x| >= log(2*maxdouble) or nan */ + return x*0x1p16383L; } #endif diff --git a/src/math/i386/__invtrigl.s b/src/math/i386/__invtrigl.s new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/math/i386/__invtrigl.s diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index c7f5ad0f..e3b42af5 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -50,12 +50,6 @@ expf: flds 4(%esp) jmp 2f -.global expl -.type expl,@function -expl: - fldt 4(%esp) - jmp 2f - .global exp .type exp,@function exp: diff --git a/src/math/i386/expl.s b/src/math/i386/expl.s index f335a3e5..3f2f707d 100644 --- a/src/math/i386/expl.s +++ b/src/math/i386/expl.s @@ -1 +1,107 @@ -# see exp.s +# exp(x) = 2^hi + 2^hi (2^lo - 1) +# where hi+lo = log2e*x with 128bit precision +# exact log2e*x calculation depends on nearest rounding mode + +.global expl +.type expl,@function +expl: + fldt 4(%esp) + + # special cases: 2*x is +-inf, nan or |x| < 0x1p-32 + # check (exponent|0x8000)+2 < 0xbfff+2-32 + movw 12(%esp), %ax + movw %ax, %dx + orw $0x8000, %dx + addw $2, %dx + cmpw $0xbfff-30, %dx + jnb 3f + cmpw $1, %dx + jbe 1f + # if |x|<0x1p-32 return 1+x + fld1 + jmp 2f +1: testw %ax, %ax + jns 1f + # if 2*x == -inf,-nan return -0/x + fldz + fchs + fdivp + ret + # if 2*x == inf,nan return 2*x +1: fld %st(0) +2: faddp + ret + + # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc + # it will be wrong on non-nearest rounding mode +3: fldl2e +# subl $32, %esp + subl $44, %esp + # hi = log2e_hi*x + # 2^hi = exp2l(hi) + fmul %st(1),%st + fld %st(0) + fstpt (%esp) + fstpt 16(%esp) + fstpt 32(%esp) + call exp2l + # if 2^hi == inf return 2^hi + fld %st(0) + fstpt (%esp) + cmpw $0x7fff, 8(%esp) + je 1f + fldt 32(%esp) + fldt 16(%esp) + # fpu stack: 2^hi x hi + # exact mult: x*log2e + fld %st(1) # x + # c = 0x1p32+1 + pushl $0x41f00000 + pushl $0x00100000 + fldl (%esp) + # xh = x - c*x + c*x + # xl = x - xh + fmulp + fld %st(2) + fsub %st(1), %st + faddp + fld %st(2) + fsub %st(1), %st + # yh = log2e_hi - c*log2e_hi + c*log2e_hi + pushl $0x3ff71547 + pushl $0x65200000 + fldl (%esp) + # fpu stack: 2^hi x hi xh xl yh + # lo = hi - xh*yh + xl*yh + fld %st(2) + fmul %st(1), %st + fsubp %st, %st(4) + fmul %st(1), %st + faddp %st, %st(3) + # yl = log2e_hi - yh + pushl $0x3de705fc + pushl $0x2f000000 + fldl (%esp) + # fpu stack: 2^hi x lo xh xl yl + # lo += xh*yl + xl*yl + fmul %st, %st(2) + fmulp %st, %st(1) + fxch %st(2) + faddp + faddp + # log2e_lo + pushl $0xbfbe + pushl $0x82f0025f + pushl $0x2dc582ee + fldt (%esp) + addl $36,%esp + # fpu stack: 2^hi x lo log2e_lo + # lo += log2e_lo*x + # return 2^hi + 2^hi (2^lo - 1) + fmulp %st, %st(2) + faddp + f2xm1 + fmul %st(1), %st + faddp +1: addl $44, %esp + ret diff --git a/src/math/tgamma.c b/src/math/tgamma.c index f3bbe370..a3f203c1 100644 --- a/src/math/tgamma.c +++ b/src/math/tgamma.c @@ -1,16 +1,221 @@ -#include <math.h> +/* +"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) +"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001) +"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004) -// FIXME: use lanczos approximation +approximation method: -double __lgamma_r(double, int *); + (x - 0.5) S(x) +Gamma(x) = (x + g - 0.5) * ---------------- + exp(x + g - 0.5) + +with + a1 a2 a3 aN +S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ] + x + 1 x + 2 x + 3 x + N + +with a0, a1, a2, a3,.. aN constants which depend on g. + +for x < 0 the following reflection formula is used: + +Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) + +most ideas and constants are from boost and python +*/ +#include "libm.h" + +static const double pi = 3.141592653589793238462643383279502884; + +/* sin(pi x) with x > 0 && isnormal(x) assumption */ +static double sinpi(double x) +{ + int n; + + /* argument reduction: x = |x| mod 2 */ + /* spurious inexact when x is odd int */ + x = x * 0.5; + x = 2 * (x - floor(x)); + + /* reduce x into [-.25,.25] */ + n = 4 * x; + n = (n+1)/2; + x -= n * 0.5; + + x *= pi; + switch (n) { + default: /* case 4 */ + case 0: + return __sin(x, 0, 0); + case 1: + return __cos(x, 0); + case 2: + /* sin(0-x) and -sin(x) have different sign at 0 */ + return __sin(0-x, 0, 0); + case 3: + return -__cos(x, 0); + } +} + +#define N 12 +//static const double g = 6.024680040776729583740234375; +static const double gmhalf = 5.524680040776729583740234375; +static const double Snum[N+1] = { + 23531376880.410759688572007674451636754734846804940, + 42919803642.649098768957899047001988850926355848959, + 35711959237.355668049440185451547166705960488635843, + 17921034426.037209699919755754458931112671403265390, + 6039542586.3520280050642916443072979210699388420708, + 1439720407.3117216736632230727949123939715485786772, + 248874557.86205415651146038641322942321632125127801, + 31426415.585400194380614231628318205362874684987640, + 2876370.6289353724412254090516208496135991145378768, + 186056.26539522349504029498971604569928220784236328, + 8071.6720023658162106380029022722506138218516325024, + 210.82427775157934587250973392071336271166969580291, + 2.5066282746310002701649081771338373386264310793408, +}; +static const double Sden[N+1] = { + 0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535, + 2637558, 357423, 32670, 1925, 66, 1, +}; +/* n! for small integer n */ +static const double fact[] = { + 1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0, + 479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0, + 355687428096000.0, 6402373705728000.0, 121645100408832000.0, + 2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0, +}; + +/* S(x) rational function for positive x */ +static double S(double x) +{ + double num = 0, den = 0; + int i; + + /* to avoid overflow handle large x differently */ + if (x < 8) + for (i = N; i >= 0; i--) { + num = num * x + Snum[i]; + den = den * x + Sden[i]; + } + else + for (i = 0; i <= N; i++) { + num = num / x + Snum[i]; + den = den / x + Sden[i]; + } + return num/den; +} double tgamma(double x) { - int sign; - double y; + double absx, y, dy, z, r; - y = exp(__lgamma_r(x, &sign)); - if (sign < 0) - y = -y; - return y; + /* special cases */ + if (!isfinite(x)) + /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ + return x + INFINITY; + + /* integer arguments */ + /* raise inexact when non-integer */ + if (x == floor(x)) { + if (x == 0) + /* tgamma(+-0)=+-inf with divide-by-zero */ + return 1/x; + if (x < 0) + return 0/0.0; + if (x <= sizeof fact/sizeof *fact) + return fact[(int)x - 1]; + } + + absx = fabs(x); + + /* x ~ 0: tgamma(x) ~ 1/x */ + if (absx < 0x1p-54) + return 1/x; + + /* x >= 172: tgamma(x)=inf with overflow */ + /* x =< -184: tgamma(x)=+-0 with underflow */ + if (absx >= 184) { + if (x < 0) { + if (floor(x) * 0.5 == floor(x * 0.5)) + return 0; + return -0.0; + } + x *= 0x1p1023; + return x; + } + + /* handle the error of x + g - 0.5 */ + y = absx + gmhalf; + if (absx > gmhalf) { + dy = y - absx; + dy -= gmhalf; + } else { + dy = y - gmhalf; + dy -= absx; + } + + z = absx - 0.5; + r = S(absx) * exp(-y); + if (x < 0) { + /* reflection formula for negative x */ + r = -pi / (sinpi(absx) * absx * r); + dy = -dy; + z = -z; + } + r += dy * (gmhalf+0.5) * r / y; + z = pow(y, 0.5*z); + r = r * z * z; + return r; } + +#if 0 +double __lgamma_r(double x, int *sign) +{ + double r, absx, z, zz, w; + + *sign = 1; + + /* special cases */ + if (!isfinite(x)) + /* lgamma(nan)=nan, lgamma(+-inf)=inf */ + return x*x; + + /* integer arguments */ + if (x == floor(x) && x <= 2) { + /* n <= 0: lgamma(n)=inf with divbyzero */ + /* n == 1,2: lgamma(n)=0 */ + if (x <= 0) + return 1/0.0; + return 0; + } + + absx = fabs(x); + + /* lgamma(x) ~ -log(|x|) for tiny |x| */ + if (absx < 0x1p-54) { + *sign = 1 - 2*!!signbit(x); + return -log(absx); + } + + /* use tgamma for smaller |x| */ + if (absx < 128) { + x = tgamma(x); + *sign = 1 - 2*!!signbit(x); + return log(fabs(x)); + } + + /* second term (log(S)-g) could be more precise here.. */ + /* or with stirling: (|x|-0.5)*(log(|x|)-1) + poly(1/|x|) */ + r = (absx-0.5)*(log(absx+gmhalf)-1) + (log(S(absx)) - (gmhalf+0.5)); + if (x < 0) { + /* reflection formula for negative x */ + x = sinpi(absx); + *sign = 2*!!signbit(x) - 1; + r = log(pi/(fabs(x)*absx)) - r; + } + return r; +} + +weak_alias(__lgamma_r, lgamma_r); +#endif diff --git a/src/math/tgammaf.c b/src/math/tgammaf.c index 16df8076..b4ca51c9 100644 --- a/src/math/tgammaf.c +++ b/src/math/tgammaf.c @@ -1,16 +1,6 @@ #include <math.h> -// FIXME: use lanczos approximation - -float __lgammaf_r(float, int *); - float tgammaf(float x) { - int sign; - float y; - - y = exp(__lgammaf_r(x, &sign)); - if (sign < 0) - y = -y; - return y; + return tgamma(x); } diff --git a/src/math/x86_64/__invtrigl.s b/src/math/x86_64/__invtrigl.s new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/math/x86_64/__invtrigl.s |