diff options
author | Szabolcs Nagy <nsz@port70.net> | 2013-10-28 01:16:14 +0000 |
---|---|---|
committer | Szabolcs Nagy <nsz@port70.net> | 2013-10-28 01:16:14 +0000 |
commit | 71d23b310383699a3101ea8bf088398796529ddd (patch) | |
tree | 812e6281a32bdc70977475abef9ee6b52b187422 /src/math/log1pf.c | |
parent | 4b15d9f46a2b260661d2e054575e617c76795578 (diff) | |
download | musl-71d23b310383699a3101ea8bf088398796529ddd.tar.gz musl-71d23b310383699a3101ea8bf088398796529ddd.tar.bz2 musl-71d23b310383699a3101ea8bf088398796529ddd.tar.xz musl-71d23b310383699a3101ea8bf088398796529ddd.zip |
math: extensive log*.c cleanup
The log, log2 and log10 functions share a lot of code and to a lesser
extent log1p too. A small part of the code was kept separately in
__log1p.h, but since it did not capture much of the common code and
it was inlined anyway, it did not solve the issue properly. Now the
log functions have significant code duplication, which may be resolved
later, until then they need to be modified together.
logl, log10l, log2l, log1pl:
* Fix the sign when the return value should be -inf.
* Remove the volatile hack from log10l (seems unnecessary)
log1p, log1pf:
* Change the handling of small inputs: only |x|<2^-53 is special
(then it is enough to return x with the usual subnormal handling)
this fixes the sign of log1p(0) in downward rounding.
* Do not handle the k==0 case specially (other than skipping the
elaborate argument reduction)
* Do not handle 1+x close to power-of-two specially (this code was
used rarely, did not give much speed up and the precision wasn't
better than the general)
* Fix the correction term formula (c=1-(u-x) was used incorrectly
when x<1 but (double)(x+1)==2, this was not a critical issue)
* Use the exact same method for calculating log(1+f) as in log
(except in log1p the c correction term is added to the result).
log, logf, log10, log10f, log2, log2f:
* Use double_t and float_t consistently.
* Now the first part of log10 and log2 is identical to log (until the
return statement, hopefully this makes maintainence easier).
* Most special case formulas were removed (close to power-of-two and
k==0 cases), they increase the code size without providing precision
or performance benefits (and obfuscate the code).
Only x==1 is handled specially so in downward rounding mode the
sign of zero is correct (the general formula happens to give -0).
* For x==0 instead of -1/0.0 or -two54/0.0, return -1/(x*x) to force
raising the exception at runtime.
* Arg reduction code is changed (slightly simplified)
* The thresholds for arg reduction to [sqrt(2)/2,sqrt(2)] are now
consistently the [0x3fe6a09e00000000,0x3ff6a09dffffffff] and the
[0x3f3504f3,0x3fb504f2] intervals for double and float reductions
respectively (the exact threshold values are not critical)
* Remove the obsolete comment for the FLT_EVAL_METHOD!=0 case in log2f
(The same code is used for all eval methods now, on i386 slightly
simpler code could be used, but we have asm there anyway)
all:
* Fix signed int arithmetics (using unsigned for bitmanipulation)
* Fix various comments
Diffstat (limited to 'src/math/log1pf.c')
-rw-r--r-- | src/math/log1pf.c | 125 |
1 files changed, 45 insertions, 80 deletions
diff --git a/src/math/log1pf.c b/src/math/log1pf.c index e6940d29..23985c35 100644 --- a/src/math/log1pf.c +++ b/src/math/log1pf.c @@ -1,8 +1,5 @@ /* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */ /* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * @@ -18,95 +15,63 @@ static const float ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ -two25 = 3.355443200e+07, /* 0x4c000000 */ -Lp1 = 6.6666668653e-01, /* 3F2AAAAB */ -Lp2 = 4.0000000596e-01, /* 3ECCCCCD */ -Lp3 = 2.8571429849e-01, /* 3E924925 */ -Lp4 = 2.2222198546e-01, /* 3E638E29 */ -Lp5 = 1.8183572590e-01, /* 3E3A3325 */ -Lp6 = 1.5313838422e-01, /* 3E1CD04F */ -Lp7 = 1.4798198640e-01; /* 3E178897 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ float log1pf(float x) { - float hfsq,f,c,s,z,R,u; - int32_t k,hx,hu,ax; - - GET_FLOAT_WORD(hx, x); - ax = hx & 0x7fffffff; + union {float f; uint32_t i;} u = {x}; + float_t hfsq,f,c,s,z,R,w,t1,t2,dk; + uint32_t ix,iu; + int k; + ix = u.i; k = 1; - if (hx < 0x3ed413d0) { /* 1+x < sqrt(2)+ */ - if (ax >= 0x3f800000) { /* x <= -1.0 */ - if (x == -1.0f) - return -two25/0.0f; /* log1p(-1)=+inf */ - return (x-x)/(x-x); /* log1p(x<-1)=NaN */ + if (ix < 0x3ed413d0 || ix>>31) { /* 1+x < sqrt(2)+ */ + if (ix >= 0xbf800000) { /* x <= -1.0 */ + if (x == -1) + return x/0.0f; /* log1p(-1)=+inf */ + return (x-x)/0.0f; /* log1p(x<-1)=NaN */ } - if (ax < 0x38000000) { /* |x| < 2**-15 */ - /* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */ - if (ax < 0x33800000 && ax >= 0x00800000) - return x; -#if FLT_EVAL_METHOD != 0 - FORCE_EVAL(x*x); -#endif - return x - x*x*0.5f; + if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */ + /* underflow if subnormal */ + if ((ix&0x7f800000) == 0) + FORCE_EVAL(x*x); + return x; } - if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ + if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ k = 0; + c = 0; f = x; - hu = 1; } - } - if (hx >= 0x7f800000) - return x+x; - if (k != 0) { - if (hx < 0x5a000000) { - u = 1 + x; - GET_FLOAT_WORD(hu, u); - k = (hu>>23) - 127; - /* correction term */ - c = k > 0 ? 1.0f-(u-x) : x-(u-1.0f); - c /= u; - } else { - u = x; - GET_FLOAT_WORD(hu,u); - k = (hu>>23) - 127; + } else if (ix >= 0x7f800000) + return x; + if (k) { + u.f = 1 + x; + iu = u.i; + iu += 0x3f800000 - 0x3f3504f3; + k = (int)(iu>>23) - 0x7f; + /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ + if (k < 25) { + c = k >= 2 ? 1-(u.f-x) : x-(u.f-1); + c /= u.f; + } else c = 0; - } - hu &= 0x007fffff; - /* - * The approximation to sqrt(2) used in thresholds is not - * critical. However, the ones used above must give less - * strict bounds than the one here so that the k==0 case is - * never reached from here, since here we have committed to - * using the correction term but don't use it if k==0. - */ - if (hu < 0x3504f4) { /* u < sqrt(2) */ - SET_FLOAT_WORD(u, hu|0x3f800000); /* normalize u */ - } else { - k += 1; - SET_FLOAT_WORD(u, hu|0x3f000000); /* normalize u/2 */ - hu = (0x00800000-hu)>>2; - } - f = u - 1.0f; - } - hfsq = 0.5f * f * f; - if (hu == 0) { /* |f| < 2**-20 */ - if (f == 0.0f) { - if (k == 0) - return 0.0f; - c += k*ln2_lo; - return k*ln2_hi+c; - } - R = hfsq*(1.0f - 0.66666666666666666f * f); - if (k == 0) - return f - R; - return k*ln2_hi - ((R-(k*ln2_lo+c))-f); + /* reduce u into [sqrt(2)/2, sqrt(2)] */ + iu = (iu&0x007fffff) + 0x3f3504f3; + u.i = iu; + f = u.f - 1; } s = f/(2.0f + f); z = s*s; - R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); - if (k == 0) - return f - (hfsq-s*(hfsq+R)); - return k*ln2_hi - ((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); + w = z*z; + t1= w*(Lg2+w*Lg4); + t2= z*(Lg1+w*Lg3); + R = t2 + t1; + hfsq = 0.5f*f*f; + dk = k; + return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi; } |