diff options
Diffstat (limited to 'src/math')
-rw-r--r-- | src/math/__log1p.h | 94 | ||||
-rw-r--r-- | src/math/__log1pf.h | 35 | ||||
-rw-r--r-- | src/math/log.c | 84 | ||||
-rw-r--r-- | src/math/log10.c | 99 | ||||
-rw-r--r-- | src/math/log10f.c | 84 | ||||
-rw-r--r-- | src/math/log10l.c | 7 | ||||
-rw-r--r-- | src/math/log1p.c | 148 | ||||
-rw-r--r-- | src/math/log1pf.c | 125 | ||||
-rw-r--r-- | src/math/log1pl.c | 2 | ||||
-rw-r--r-- | src/math/log2.c | 91 | ||||
-rw-r--r-- | src/math/log2f.c | 95 | ||||
-rw-r--r-- | src/math/log2l.c | 2 | ||||
-rw-r--r-- | src/math/logf.c | 78 | ||||
-rw-r--r-- | src/math/logl.c | 8 |
14 files changed, 369 insertions, 583 deletions
diff --git a/src/math/__log1p.h b/src/math/__log1p.h deleted file mode 100644 index 57187115..00000000 --- a/src/math/__log1p.h +++ /dev/null @@ -1,94 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/k_log.h */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * __log1p(f): - * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)]. - * - * The following describes the overall strategy for computing - * logarithms in base e. The argument reduction and adding the final - * term of the polynomial are done by the caller for increased accuracy - * when different bases are used. - * - * Method : - * 1. Argument Reduction: find k and f such that - * x = 2^k * (1+f), - * where sqrt(2)/2 < 1+f < sqrt(2) . - * - * 2. Approximation of log(1+f). - * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) - * = 2s + 2/3 s**3 + 2/5 s**5 + ....., - * = 2s + s*R - * We use a special Reme algorithm on [0,0.1716] to generate - * a polynomial of degree 14 to approximate R The maximum error - * of this polynomial approximation is bounded by 2**-58.45. In - * other words, - * 2 4 6 8 10 12 14 - * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s - * (the values of Lg1 to Lg7 are listed in the program) - * and - * | 2 14 | -58.45 - * | Lg1*s +...+Lg7*s - R(z) | <= 2 - * | | - * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. - * In order to guarantee error in log below 1ulp, we compute log - * by - * log(1+f) = f - s*(f - R) (if f is not too large) - * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) - * - * 3. Finally, log(x) = k*ln2 + log(1+f). - * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) - * Here ln2 is split into two floating point number: - * ln2_hi + ln2_lo, - * where n*ln2_hi is always exact for |n| < 2000. - * - * Special cases: - * log(x) is NaN with signal if x < 0 (including -INF) ; - * log(+INF) is +INF; log(0) is -INF with signal; - * log(NaN) is that NaN with no signal. - * - * Accuracy: - * according to an error analysis, the error is always less than - * 1 ulp (unit in the last place). - * - * Constants: - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. - */ - -static const double -Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ -Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ -Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ -Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ -Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ -Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ -Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ - -/* - * We always inline __log1p(), since doing so produces a - * substantial performance improvement (~40% on amd64). - */ -static inline double __log1p(double f) -{ - double_t hfsq,s,z,R,w,t1,t2; - - s = f/(2.0+f); - z = s*s; - w = z*z; - t1= w*(Lg2+w*(Lg4+w*Lg6)); - t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); - R = t2+t1; - hfsq = 0.5*f*f; - return s*(hfsq+R); -} diff --git a/src/math/__log1pf.h b/src/math/__log1pf.h deleted file mode 100644 index f2fbef29..00000000 --- a/src/math/__log1pf.h +++ /dev/null @@ -1,35 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/k_logf.h */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * See comments in __log1p.h. - */ - -/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ -static const float -Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ -Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ -Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ -Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ - -static inline float __log1pf(float f) -{ - float_t hfsq,s,z,R,w,t1,t2; - - s = f/(2.0f + f); - z = s*s; - w = z*z; - t1 = w*(Lg2+w*Lg4); - t2 = z*(Lg1+w*Lg3); - R = t2+t1; - hfsq = 0.5f * f * f; - return s*(hfsq+R); -} diff --git a/src/math/log.c b/src/math/log.c index 98051205..e61e113d 100644 --- a/src/math/log.c +++ b/src/math/log.c @@ -10,7 +10,7 @@ * ==================================================== */ /* log(x) - * Return the logrithm of x + * Return the logarithm of x * * Method : * 1. Argument Reduction: find k and f such that @@ -60,12 +60,12 @@ * to produce the hexadecimal values shown. */ -#include "libm.h" +#include <math.h> +#include <stdint.h> static const double ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ -two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ @@ -76,63 +76,43 @@ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ double log(double x) { - double hfsq,f,s,z,R,w,t1,t2,dk; - int32_t k,hx,i,j; - uint32_t lx; - - EXTRACT_WORDS(hx, lx, x); + union {double f; uint64_t i;} u = {x}; + double_t hfsq,f,s,z,R,w,t1,t2,dk; + uint32_t hx; + int k; + hx = u.i>>32; k = 0; - if (hx < 0x00100000) { /* x < 2**-1022 */ - if (((hx&0x7fffffff)|lx) == 0) - return -two54/0.0; /* log(+-0)=-inf */ - if (hx < 0) - return (x-x)/0.0; /* log(-#) = NaN */ - /* subnormal number, scale up x */ + if (hx < 0x00100000 || hx>>31) { + if (u.i<<1 == 0) + return -1/(x*x); /* log(+-0)=-inf */ + if (hx>>31) + return (x-x)/0.0; /* log(-#) = NaN */ + /* subnormal number, scale x up */ k -= 54; - x *= two54; - GET_HIGH_WORD(hx,x); - } - if (hx >= 0x7ff00000) - return x+x; - k += (hx>>20) - 1023; - hx &= 0x000fffff; - i = (hx+0x95f64)&0x100000; - SET_HIGH_WORD(x, hx|(i^0x3ff00000)); /* normalize x or x/2 */ - k += i>>20; + x *= 0x1p54; + u.f = x; + hx = u.i>>32; + } else if (hx >= 0x7ff00000) { + return x; + } else if (hx == 0x3ff00000 && u.i<<32 == 0) + return 0; + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (int)(hx>>20) - 0x3ff; + hx = (hx&0x000fffff) + 0x3fe6a09e; + u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); + x = u.f; + f = x - 1.0; - if ((0x000fffff&(2+hx)) < 3) { /* -2**-20 <= f < 2**-20 */ - if (f == 0.0) { - if (k == 0) { - return 0.0; - } - dk = (double)k; - return dk*ln2_hi + dk*ln2_lo; - } - R = f*f*(0.5-0.33333333333333333*f); - if (k == 0) - return f - R; - dk = (double)k; - return dk*ln2_hi - ((R-dk*ln2_lo)-f); - } + hfsq = 0.5*f*f; s = f/(2.0+f); - dk = (double)k; z = s*s; - i = hx - 0x6147a; w = z*z; - j = 0x6b851 - hx; t1 = w*(Lg2+w*(Lg4+w*Lg6)); t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); - i |= j; R = t2 + t1; - if (i > 0) { - hfsq = 0.5*f*f; - if (k == 0) - return f - (hfsq-s*(hfsq+R)); - return dk*ln2_hi - ((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); - } else { - if (k == 0) - return f - s*(f-R); - return dk*ln2_hi - ((s*(f-R)-dk*ln2_lo)-f); - } + dk = k; + return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi; } diff --git a/src/math/log10.c b/src/math/log10.c index ed65d9be..81026876 100644 --- a/src/math/log10.c +++ b/src/math/log10.c @@ -10,72 +10,91 @@ * ==================================================== */ /* - * Return the base 10 logarithm of x. See e_log.c and k_log.h for most - * comments. + * Return the base 10 logarithm of x. See log.c for most comments. * - * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) - * in not-quite-routine extra precision. + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2) */ -#include "libm.h" -#include "__log1p.h" +#include <math.h> +#include <stdint.h> static const double -two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */ ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */ log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ -log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ +log10_2lo = 3.69423907715893078616e-13, /* 0x3D59FEF3, 0x11F12B36 */ +Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ +Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ +Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ +Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ +Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ +Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ +Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ double log10(double x) { - double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2; - int32_t i,k,hx; - uint32_t lx; - - EXTRACT_WORDS(hx, lx, x); + union {double f; uint64_t i;} u = {x}; + double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo; + uint32_t hx; + int k; + hx = u.i>>32; k = 0; - if (hx < 0x00100000) { /* x < 2**-1022 */ - if (((hx&0x7fffffff)|lx) == 0) - return -two54/0.0; /* log(+-0)=-inf */ - if (hx<0) - return (x-x)/0.0; /* log(-#) = NaN */ - /* subnormal number, scale up x */ + if (hx < 0x00100000 || hx>>31) { + if (u.i<<1 == 0) + return -1/(x*x); /* log(+-0)=-inf */ + if (hx>>31) + return (x-x)/0.0; /* log(-#) = NaN */ + /* subnormal number, scale x up */ k -= 54; - x *= two54; - GET_HIGH_WORD(hx, x); - } - if (hx >= 0x7ff00000) - return x+x; - if (hx == 0x3ff00000 && lx == 0) - return 0.0; /* log(1) = +0 */ - k += (hx>>20) - 1023; - hx &= 0x000fffff; - i = (hx+0x95f64)&0x100000; - SET_HIGH_WORD(x, hx|(i^0x3ff00000)); /* normalize x or x/2 */ - k += i>>20; - y = (double)k; + x *= 0x1p54; + u.f = x; + hx = u.i>>32; + } else if (hx >= 0x7ff00000) { + return x; + } else if (hx == 0x3ff00000 && u.i<<32 == 0) + return 0; + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (int)(hx>>20) - 0x3ff; + hx = (hx&0x000fffff) + 0x3fe6a09e; + u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); + x = u.f; + f = x - 1.0; hfsq = 0.5*f*f; - r = __log1p(f); + s = f/(2.0+f); + z = s*s; + w = z*z; + t1 = w*(Lg2+w*(Lg4+w*Lg6)); + t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); + R = t2 + t1; /* See log2.c for details. */ + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ hi = f - hfsq; - SET_LOW_WORD(hi, 0); - lo = (f - hi) - hfsq + r; + u.f = hi; + u.i &= (uint64_t)-1<<32; + hi = u.f; + lo = f - hi - hfsq + s*(hfsq+R); + + /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */ val_hi = hi*ivln10hi; - y2 = y*log10_2hi; - val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi; + dk = k; + y = dk*log10_2hi; + val_lo = dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi; /* - * Extra precision in for adding y*log10_2hi is not strictly needed + * Extra precision in for adding y is not strictly needed * since there is no very large cancellation near x = sqrt(2) or * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs * with some parallelism and it reduces the error for many args. */ - w = y2 + val_hi; - val_lo += (y2 - w) + val_hi; + w = y + val_hi; + val_lo += (y - w) + val_hi; val_hi = w; return val_lo + val_hi; diff --git a/src/math/log10f.c b/src/math/log10f.c index e10749b5..9ca2f017 100644 --- a/src/math/log10f.c +++ b/src/math/log10f.c @@ -13,57 +13,65 @@ * See comments in log10.c. */ -#include "libm.h" -#include "__log1pf.h" +#include <math.h> +#include <stdint.h> static const float -two25 = 3.3554432000e+07, /* 0x4c000000 */ ivln10hi = 4.3432617188e-01, /* 0x3ede6000 */ ivln10lo = -3.1689971365e-05, /* 0xb804ead9 */ log10_2hi = 3.0102920532e-01, /* 0x3e9a2080 */ -log10_2lo = 7.9034151668e-07; /* 0x355427db */ +log10_2lo = 7.9034151668e-07, /* 0x355427db */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ float log10f(float x) { - float f,hfsq,hi,lo,r,y; - int32_t i,k,hx; - - GET_FLOAT_WORD(hx, x); + union {float f; uint32_t i;} u = {x}; + float_t hfsq,f,s,z,R,w,t1,t2,dk,hi,lo; + uint32_t ix; + int k; + ix = u.i; k = 0; - if (hx < 0x00800000) { /* x < 2**-126 */ - if ((hx&0x7fffffff) == 0) - return -two25/0.0f; /* log(+-0)=-inf */ - if (hx < 0) - return (x-x)/0.0f; /* log(-#) = NaN */ + if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */ + if (ix<<1 == 0) + return -1/(x*x); /* log(+-0)=-inf */ + if (ix>>31) + return (x-x)/0.0f; /* log(-#) = NaN */ /* subnormal number, scale up x */ k -= 25; - x *= two25; - GET_FLOAT_WORD(hx, x); - } - if (hx >= 0x7f800000) - return x+x; - if (hx == 0x3f800000) - return 0.0f; /* log(1) = +0 */ - k += (hx>>23) - 127; - hx &= 0x007fffff; - i = (hx+(0x4afb0d))&0x800000; - SET_FLOAT_WORD(x, hx|(i^0x3f800000)); /* normalize x or x/2 */ - k += i>>23; - y = (float)k; + x *= 0x1p25f; + u.f = x; + ix = u.i; + } else if (ix >= 0x7f800000) { + return x; + } else if (ix == 0x3f800000) + return 0; + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (int)(ix>>23) - 0x7f; + ix = (ix&0x007fffff) + 0x3f3504f3; + u.i = ix; + x = u.f; + f = x - 1.0f; - hfsq = 0.5f * f * f; - r = __log1pf(f); + s = f/(2.0f + f); + z = s*s; + w = z*z; + t1= w*(Lg2+w*Lg4); + t2= z*(Lg1+w*Lg3); + R = t2 + t1; + hfsq = 0.5f*f*f; -// FIXME -// /* See log2f.c and log2.c for details. */ -// if (sizeof(float_t) > sizeof(float)) -// return (r - hfsq + f) * ((float_t)ivln10lo + ivln10hi) + -// y * ((float_t)log10_2lo + log10_2hi); hi = f - hfsq; - GET_FLOAT_WORD(hx, hi); - SET_FLOAT_WORD(hi, hx&0xfffff000); - lo = (f - hi) - hfsq + r; - return y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + - hi*ivln10hi + y*log10_2hi; + u.f = hi; + u.i &= 0xfffff000; + hi = u.f; + lo = f - hi - hfsq + s*(hfsq+R); + dk = k; + return dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + hi*ivln10hi + dk*log10_2hi; } diff --git a/src/math/log10l.c b/src/math/log10l.c index f0eeeafb..c7aacf90 100644 --- a/src/math/log10l.c +++ b/src/math/log10l.c @@ -117,16 +117,15 @@ static const long double S[4] = { long double log10l(long double x) { - long double y; - volatile long double z; + long double y, z; int e; if (isnan(x)) return x; if(x <= 0.0) { if(x == 0.0) - return -1.0 / (x - x); - return (x - x) / (x - x); + return -1.0 / (x*x); + return (x - x) / 0.0; } if (x == INFINITY) return INFINITY; diff --git a/src/math/log1p.c b/src/math/log1p.c index a71ac423..00971349 100644 --- a/src/math/log1p.c +++ b/src/math/log1p.c @@ -10,6 +10,7 @@ * ==================================================== */ /* double log1p(double x) + * Return the natural logarithm of 1+x. * * Method : * 1. Argument Reduction: find k and f such that @@ -23,31 +24,9 @@ * and add back the correction term c/u. * (Note: when x > 2**53, one can simply return log(x)) * - * 2. Approximation of log1p(f). - * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) - * = 2s + 2/3 s**3 + 2/5 s**5 + ....., - * = 2s + s*R - * We use a special Reme algorithm on [0,0.1716] to generate - * a polynomial of degree 14 to approximate R The maximum error - * of this polynomial approximation is bounded by 2**-58.45. In - * other words, - * 2 4 6 8 10 12 14 - * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s - * (the values of Lp1 to Lp7 are listed in the program) - * and - * | 2 14 | -58.45 - * | Lp1*s +...+Lp7*s - R(z) | <= 2 - * | | - * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. - * In order to guarantee error in log below 1ulp, we compute log - * by - * log1p(f) = f - (hfsq - s*(hfsq+R)). + * 2. Approximation of log(1+f): See log.c * - * 3. Finally, log1p(x) = k*ln2 + log1p(f). - * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) - * Here ln2 is split into two floating point number: - * ln2_hi + ln2_lo, - * where n*ln2_hi is always exact for |n| < 2000. + * 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c * * Special cases: * log1p(x) is NaN with signal if x < -1 (including -INF) ; @@ -79,94 +58,65 @@ static const double ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ -two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ -Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ -Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ -Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ -Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ -Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ -Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ -Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ +Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ +Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ +Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ +Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ +Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ +Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ double log1p(double x) { - double hfsq,f,c,s,z,R,u; - int32_t k,hx,hu,ax; - - GET_HIGH_WORD(hx, x); - ax = hx & 0x7fffffff; + union {double f; uint64_t i;} u = {x}; + double_t hfsq,f,c,s,z,R,w,t1,t2,dk; + uint32_t hx,hu; + int k; + hx = u.i>>32; k = 1; - if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */ - if (ax >= 0x3ff00000) { /* x <= -1.0 */ - if (x == -1.0) - return -two54/0.0; /* log1p(-1)=+inf */ - return (x-x)/(x-x); /* log1p(x<-1)=NaN */ + if (hx < 0x3fda827a || hx>>31) { /* 1+x < sqrt(2)+ */ + if (hx >= 0xbff00000) { /* x <= -1.0 */ + if (x == -1) + return x/0.0; /* log1p(-1) = -inf */ + return (x-x)/0.0; /* log1p(x<-1) = NaN */ } - if (ax < 0x3e200000) { /* |x| < 2**-29 */ - /* if 0x1p-1022 <= |x| < 0x1p-54, avoid raising underflow */ - if (ax < 0x3c900000 && ax >= 0x00100000) - return x; -#if FLT_EVAL_METHOD != 0 - FORCE_EVAL((float)x); -#endif - return x - x*x*0.5; + if (hx<<1 < 0x3ca00000<<1) { /* |x| < 2**-53 */ + /* underflow if subnormal */ + if ((hx&0x7ff00000) == 0) + FORCE_EVAL((float)x); + return x; } - if (hx > 0 || hx <= (int32_t)0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ k = 0; + c = 0; f = x; - hu = 1; } - } - if (hx >= 0x7ff00000) - return x+x; - if (k != 0) { - if (hx < 0x43400000) { - u = 1 + x; - GET_HIGH_WORD(hu, u); - k = (hu>>20) - 1023; - c = k > 0 ? 1.0-(u-x) : x-(u-1.0); /* correction term */ - c /= u; - } else { - u = x; - GET_HIGH_WORD(hu,u); - k = (hu>>20) - 1023; + } else if (hx >= 0x7ff00000) + return x; + if (k) { + u.f = 1 + x; + hu = u.i>>32; + hu += 0x3ff00000 - 0x3fe6a09e; + k = (int)(hu>>20) - 0x3ff; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if (k < 54) { + c = k >= 2 ? 1-(u.f-x) : x-(u.f-1); + c /= u.f; + } else c = 0; - } - hu &= 0x000fffff; - /* - * The approximation to sqrt(2) used in thresholds is not - * critical. However, the ones used above must give less - * strict bounds than the one here so that the k==0 case is - * never reached from here, since here we have committed to - * using the correction term but don't use it if k==0. - */ - if (hu < 0x6a09e) { /* u ~< sqrt(2) */ - SET_HIGH_WORD(u, hu|0x3ff00000); /* normalize u */ - } else { - k += 1; - SET_HIGH_WORD(u, hu|0x3fe00000); /* normalize u/2 */ - hu = (0x00100000-hu)>>2; - } - f = u - 1.0; + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + hu = (hu&0x000fffff) + 0x3fe6a09e; + u.i = (uint64_t)hu<<32 | (u.i&0xffffffff); + f = u.f - 1; } hfsq = 0.5*f*f; - if (hu == 0) { /* |f| < 2**-20 */ - if (f == 0.0) { - if(k == 0) - return 0.0; - c += k*ln2_lo; - return k*ln2_hi + c; - } - R = hfsq*(1.0 - 0.66666666666666666*f); - if (k == 0) - return f - R; - return k*ln2_hi - ((R-(k*ln2_lo+c))-f); - } s = f/(2.0+f); z = s*s; - R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); - if (k == 0) - return f - (hfsq-s*(hfsq+R)); - return k*ln2_hi - ((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); + w = z*z; + t1 = w*(Lg2+w*(Lg4+w*Lg6)); + t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); + R = t2 + t1; + dk = k; + return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi; } diff --git a/src/math/log1pf.c b/src/math/log1pf.c index e6940d29..23985c35 100644 --- a/src/math/log1pf.c +++ b/src/math/log1pf.c @@ -1,8 +1,5 @@ /* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */ /* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * @@ -18,95 +15,63 @@ static const float ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ -two25 = 3.355443200e+07, /* 0x4c000000 */ -Lp1 = 6.6666668653e-01, /* 3F2AAAAB */ -Lp2 = 4.0000000596e-01, /* 3ECCCCCD */ -Lp3 = 2.8571429849e-01, /* 3E924925 */ -Lp4 = 2.2222198546e-01, /* 3E638E29 */ -Lp5 = 1.8183572590e-01, /* 3E3A3325 */ -Lp6 = 1.5313838422e-01, /* 3E1CD04F */ -Lp7 = 1.4798198640e-01; /* 3E178897 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ float log1pf(float x) { - float hfsq,f,c,s,z,R,u; - int32_t k,hx,hu,ax; - - GET_FLOAT_WORD(hx, x); - ax = hx & 0x7fffffff; + union {float f; uint32_t i;} u = {x}; + float_t hfsq,f,c,s,z,R,w,t1,t2,dk; + uint32_t ix,iu; + int k; + ix = u.i; k = 1; - if (hx < 0x3ed413d0) { /* 1+x < sqrt(2)+ */ - if (ax >= 0x3f800000) { /* x <= -1.0 */ - if (x == -1.0f) - return -two25/0.0f; /* log1p(-1)=+inf */ - return (x-x)/(x-x); /* log1p(x<-1)=NaN */ + if (ix < 0x3ed413d0 || ix>>31) { /* 1+x < sqrt(2)+ */ + if (ix >= 0xbf800000) { /* x <= -1.0 */ + if (x == -1) + return x/0.0f; /* log1p(-1)=+inf */ + return (x-x)/0.0f; /* log1p(x<-1)=NaN */ } - if (ax < 0x38000000) { /* |x| < 2**-15 */ - /* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */ - if (ax < 0x33800000 && ax >= 0x00800000) - return x; -#if FLT_EVAL_METHOD != 0 - FORCE_EVAL(x*x); -#endif - return x - x*x*0.5f; + if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */ + /* underflow if subnormal */ + if ((ix&0x7f800000) == 0) + FORCE_EVAL(x*x); + return x; } - if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ k = 0; + c = 0; f = x; - hu = 1; } - } - if (hx >= 0x7f800000) - return x+x; - if (k != 0) { - if (hx < 0x5a000000) { - u = 1 + x; - GET_FLOAT_WORD(hu, u); - k = (hu>>23) - 127; - /* correction term */ - c = k > 0 ? 1.0f-(u-x) : x-(u-1.0f); - c /= u; - } else { - u = x; - GET_FLOAT_WORD(hu,u); - k = (hu>>23) - 127; + } else if (ix >= 0x7f800000) + return x; + if (k) { + u.f = 1 + x; + iu = u.i; + iu += 0x3f800000 - 0x3f3504f3; + k = (int)(iu>>23) - 0x7f; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if (k < 25) { + c = k >= 2 ? 1-(u.f-x) : x-(u.f-1); + c /= u.f; + } else c = 0; - } - hu &= 0x007fffff; - /* - * The approximation to sqrt(2) used in thresholds is not - * critical. However, the ones used above must give less - * strict bounds than the one here so that the k==0 case is - * never reached from here, since here we have committed to - * using the correction term but don't use it if k==0. - */ - if (hu < 0x3504f4) { /* u < sqrt(2) */ - SET_FLOAT_WORD(u, hu|0x3f800000); /* normalize u */ - } else { - k += 1; - SET_FLOAT_WORD(u, hu|0x3f000000); /* normalize u/2 */ - hu = (0x00800000-hu)>>2; - } - f = u - 1.0f; - } - hfsq = 0.5f * f * f; - if (hu == 0) { /* |f| < 2**-20 */ - if (f == 0.0f) { - if (k == 0) - return 0.0f; - c += k*ln2_lo; - return k*ln2_hi+c; - } - R = hfsq*(1.0f - 0.66666666666666666f * f); - if (k == 0) - return f - R; - return k*ln2_hi - ((R-(k*ln2_lo+c))-f); + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + iu = (iu&0x007fffff) + 0x3f3504f3; + u.i = iu; + f = u.f - 1; } s = f/(2.0f + f); z = s*s; - R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); - if (k == 0) - return f - (hfsq-s*(hfsq+R)); - return k*ln2_hi - ((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); + w = z*z; + t1= w*(Lg2+w*Lg4); + t2= z*(Lg1+w*Lg3); + R = t2 + t1; + hfsq = 0.5f*f*f; + dk = k; + return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi; } diff --git a/src/math/log1pl.c b/src/math/log1pl.c index edb48df1..37da46d2 100644 --- a/src/math/log1pl.c +++ b/src/math/log1pl.c @@ -118,7 +118,7 @@ long double log1pl(long double xm1) /* Test for domain errors. */ if (x <= 0.0) { if (x == 0.0) - return -1/x; /* -inf with divbyzero */ + return -1/(x*x); /* -inf with divbyzero */ return 0/0.0f; /* nan with invalid */ } diff --git a/src/math/log2.c b/src/math/log2.c index 1974215d..0aafad4b 100644 --- a/src/math/log2.c +++ b/src/math/log2.c @@ -10,55 +10,66 @@ * ==================================================== */ /* - * Return the base 2 logarithm of x. See log.c and __log1p.h for most - * comments. + * Return the base 2 logarithm of x. See log.c for most comments. * - * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, - * then does the combining and scaling steps - * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k - * in not-quite-routine extra precision. + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log2(x) = (f - f*f/2 + r)/log(2) + k */ -#include "libm.h" -#include "__log1p.h" +#include <math.h> +#include <stdint.h> static const double -two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ -ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ +ivln2lo = 1.67517131648865118353e-10, /* 0x3de705fc, 0x2eefa200 */ +Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ +Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ +Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ +Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ +Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ +Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ +Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ double log2(double x) { - double f,hfsq,hi,lo,r,val_hi,val_lo,w,y; - int32_t i,k,hx; - uint32_t lx; - - EXTRACT_WORDS(hx, lx, x); + union {double f; uint64_t i;} u = {x}; + double_t hfsq,f,s,z,R,w,t1,t2,y,hi,lo,val_hi,val_lo; + uint32_t hx; + int k; + hx = u.i>>32; k = 0; - if (hx < 0x00100000) { /* x < 2**-1022 */ - if (((hx&0x7fffffff)|lx) == 0) - return -two54/0.0; /* log(+-0)=-inf */ - if (hx < 0) - return (x-x)/0.0; /* log(-#) = NaN */ - /* subnormal number, scale up x */ + if (hx < 0x00100000 || hx>>31) { + if (u.i<<1 == 0) + return -1/(x*x); /* log(+-0)=-inf */ + if (hx>>31) + return (x-x)/0.0; /* log(-#) = NaN */ + /* subnormal number, scale x up */ k -= 54; - x *= two54; - GET_HIGH_WORD(hx, x); - } - if (hx >= 0x7ff00000) - return x+x; - if (hx == 0x3ff00000 && lx == 0) - return 0.0; /* log(1) = +0 */ - k += (hx>>20) - 1023; - hx &= 0x000fffff; - i = (hx+0x95f64) & 0x100000; - SET_HIGH_WORD(x, hx|(i^0x3ff00000)); /* normalize x or x/2 */ - k += i>>20; - y = (double)k; + x *= 0x1p54; + u.f = x; + hx = u.i>>32; + } else if (hx >= 0x7ff00000) { + return x; + } else if (hx == 0x3ff00000 && u.i<<32 == 0) + return 0; + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + hx += 0x3ff00000 - 0x3fe6a09e; + k += (int)(hx>>20) - 0x3ff; + hx = (hx&0x000fffff) + 0x3fe6a09e; + u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); + x = u.f; + f = x - 1.0; hfsq = 0.5*f*f; - r = __log1p(f); + s = f/(2.0+f); + z = s*s; + w = z*z; + t1 = w*(Lg2+w*(Lg4+w*Lg6)); + t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); + R = t2 + t1; /* * f-hfsq must (for args near 1) be evaluated in extra precision @@ -90,13 +101,19 @@ double log2(double x) * The multi-precision calculations for the multiplications are * routine. */ + + /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ hi = f - hfsq; - SET_LOW_WORD(hi, 0); - lo = (f - hi) - hfsq + r; + u.f = hi; + u.i &= (uint64_t)-1<<32; + hi = u.f; + lo = f - hi - hfsq + s*(hfsq+R); + val_hi = hi*ivln2hi; val_lo = (lo+hi)*ivln2lo + lo*ivln2hi; /* spadd(val_hi, val_lo, y), except for not using double_t: */ + y = k; w = y + val_hi; val_lo += (y - w) + val_hi; val_hi = w; diff --git a/src/math/log2f.c b/src/math/log2f.c index e0d6a9e4..b3e305fe 100644 --- a/src/math/log2f.c +++ b/src/math/log2f.c @@ -13,67 +13,62 @@ * See comments in log2.c. */ -#include "libm.h" -#include "__log1pf.h" +#include <math.h> +#include <stdint.h> static const float -two25 = 3.3554432000e+07, /* 0x4c000000 */ ivln2hi = 1.4428710938e+00, /* 0x3fb8b000 */ -ivln2lo = -1.7605285393e-04; /* 0xb9389ad4 */ +ivln2lo = -1.7605285393e-04, /* 0xb9389ad4 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ float log2f(float x) { - float f,hfsq,hi,lo,r,y; - int32_t i,k,hx; - - GET_FLOAT_WORD(hx, x); + union {float f; uint32_t i;} u = {x}; + float_t hfsq,f,s,z,R,w,t1,t2,hi,lo; + uint32_t ix; + int k; + ix = u.i; k = 0; - if (hx < 0x00800000) { /* x < 2**-126 */ - if ((hx&0x7fffffff) == 0) - return -two25/0.0f; /* log(+-0)=-inf */ - if (hx < 0) - return (x-x)/0.0f; /* log(-#) = NaN */ + if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */ + if (ix<<1 == 0) + return -1/(x*x); /* log(+-0)=-inf */ + if (ix>>31) + return (x-x)/0.0f; /* log(-#) = NaN */ /* subnormal number, scale up x */ k -= 25; - x *= two25; - GET_FLOAT_WORD(hx, x); - } - if (hx >= 0x7f800000) - return x+x; - if (hx == 0x3f800000) - return 0.0f; /* log(1) = +0 */ - k += (hx>>23) - 127; - hx &= 0x007fffff; - i = (hx+(0x4afb0d))&0x800000; - SET_FLOAT_WORD(x, hx|(i^0x3f800000)); /* normalize x or x/2 */ - k += i>>23; - y = (float)k; - f = x - 1.0f; - hfsq = 0.5f * f * f; - r = __log1pf(f); + x *= 0x1p25f; + u.f = x; + ix = u.i; + } else if (ix >= 0x7f800000) { + return x; + } else if (ix == 0x3f800000) + return 0; - /* - * We no longer need to avoid falling into the multi-precision - * calculations due to compiler bugs breaking Dekker's theorem. - * Keep avoiding this as an optimization. See log2.c for more - * details (some details are here only because the optimization - * is not yet available in double precision). - * - * Another compiler bug turned up. With gcc on i386, - * (ivln2lo + ivln2hi) would be evaluated in float precision - * despite runtime evaluations using double precision. So we - * must cast one of its terms to float_t. This makes the whole - * expression have type float_t, so return is forced to waste - * time clobbering its extra precision. - */ -// FIXME -// if (sizeof(float_t) > sizeof(float)) -// return (r - hfsq + f) * ((float_t)ivln2lo + ivln2hi) + y; + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (int)(ix>>23) - 0x7f; + ix = (ix&0x007fffff) + 0x3f3504f3; + u.i = ix; + x = u.f; + + f = x - 1.0f; + s = f/(2.0f + f); + z = s*s; + w = z*z; + t1= w*(Lg2+w*Lg4); + t2= z*(Lg1+w*Lg3); + R = t2 + t1; + hfsq = 0.5f*f*f; hi = f - hfsq; - GET_FLOAT_WORD(hx,hi); - SET_FLOAT_WORD(hi,hx&0xfffff000); - lo = (f - hi) - hfsq + r; - return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + y; + u.f = hi; + u.i &= 0xfffff000; + hi = u.f; + lo = f - hi - hfsq + s*(hfsq+R); + return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + k; } diff --git a/src/math/log2l.c b/src/math/log2l.c index 345b395d..d00531d5 100644 --- a/src/math/log2l.c +++ b/src/math/log2l.c @@ -117,7 +117,7 @@ long double log2l(long double x) return x; if (x <= 0.0) { if (x == 0.0) - return -1/(x+0); /* -inf with divbyzero */ + return -1/(x*x); /* -inf with divbyzero */ return 0/0.0f; /* nan with invalid */ } diff --git a/src/math/logf.c b/src/math/logf.c index c7f7dbe6..52230a1b 100644 --- a/src/math/logf.c +++ b/src/math/logf.c @@ -13,12 +13,12 @@ * ==================================================== */ -#include "libm.h" +#include <math.h> +#include <stdint.h> static const float ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ -two25 = 3.355443200e+07, /* 0x4c000000 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ @@ -27,61 +27,43 @@ Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ float logf(float x) { - float hfsq,f,s,z,R,w,t1,t2,dk; - int32_t k,ix,i,j; - - GET_FLOAT_WORD(ix, x); + union {float f; uint32_t i;} u = {x}; + float_t hfsq,f,s,z,R,w,t1,t2,dk; + uint32_t ix; + int k; + ix = u.i; k = 0; - if (ix < 0x00800000) { /* x < 2**-126 */ - if ((ix & 0x7fffffff) == 0) - return -two25/0.0f; /* log(+-0)=-inf */ - if (ix < 0) - return (x-x)/0.0f; /* log(-#) = NaN */ + if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */ + if (ix<<1 == 0) + return -1/(x*x); /* log(+-0)=-inf */ + if (ix>>31) + return (x-x)/0.0f; /* log(-#) = NaN */ /* subnormal number, scale up x */ k -= 25; - x *= two25; - GET_FLOAT_WORD(ix, x); - } - if (ix >= 0x7f800000) - return x+x; - k += (ix>>23) - 127; - ix &= 0x007fffff; - i = (ix + (0x95f64<<3)) & 0x800000; - SET_FLOAT_WORD(x, ix|(i^0x3f800000)); /* normalize x or x/2 */ - k += i>>23; + x *= 0x1p25f; + u.f = x; + ix = u.i; + } else if (ix >= 0x7f800000) { + return x; + } else if (ix == 0x3f800000) + return 0; + + /* reduce x into [sqrt(2)/2, sqrt(2)] */ + ix += 0x3f800000 - 0x3f3504f3; + k += (int)(ix>>23) - 0x7f; + ix = (ix&0x007fffff) + 0x3f3504f3; + u.i = ix; + x = u.f; + f = x - 1.0f; - if ((0x007fffff & (0x8000 + ix)) < 0xc000) { /* -2**-9 <= f < 2**-9 */ - if (f == 0.0f) { - if (k == 0) - return 0.0f; - dk = (float)k; - return dk*ln2_hi + dk*ln2_lo; - } - R = f*f*(0.5f - 0.33333333333333333f*f); - if (k == 0) - return f-R; - dk = (float)k; - return dk*ln2_hi - ((R-dk*ln2_lo)-f); - } s = f/(2.0f + f); - dk = (float)k; z = s*s; - i = ix-(0x6147a<<3); w = z*z; - j = (0x6b851<<3)-ix; t1= w*(Lg2+w*Lg4); t2= z*(Lg1+w*Lg3); - i |= j; R = t2 + t1; - if (i > 0) { - hfsq = 0.5f * f * f; - if (k == 0) - return f - (hfsq-s*(hfsq+R)); - return dk*ln2_hi - ((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); - } else { - if (k == 0) - return f - s*(f-R); - return dk*ln2_hi - ((s*(f-R)-dk*ln2_lo)-f); - } + hfsq = 0.5f*f*f; + dk = k; + return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi; } diff --git a/src/math/logl.c b/src/math/logl.c index ef2b5515..03c5188f 100644 --- a/src/math/logl.c +++ b/src/math/logl.c @@ -35,9 +35,9 @@ * * log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x). * - * Otherwise, setting z = 2(x-1)/x+1), + * Otherwise, setting z = 2(x-1)/(x+1), * - * log(x) = z + z**3 P(z)/Q(z). + * log(x) = log(1+z/2) - log(1-z/2) = z + z**3 P(z)/Q(z). * * * ACCURACY: @@ -116,7 +116,7 @@ long double logl(long double x) return x; if (x <= 0.0) { if (x == 0.0) - return -1/(x+0); /* -inf with divbyzero */ + return -1/(x*x); /* -inf with divbyzero */ return 0/0.0f; /* nan with invalid */ } @@ -127,7 +127,7 @@ long double logl(long double x) x = frexpl(x, &e); /* logarithm using log(x) = z + z**3 P(z)/Q(z), - * where z = 2(x-1)/x+1) + * where z = 2(x-1)/(x+1) */ if (e > 2 || e < -2) { if (x < SQRTH) { /* 2(2x-1)/(2x+1) */ |