diff options
author | Rich Felker <dalias@aerifal.cx> | 2012-03-19 21:55:53 -0400 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2012-03-19 21:55:53 -0400 |
commit | acb744921b73f5a73803e533e5e4a4896d164a26 (patch) | |
tree | 2cdfcd2dc29d4d3041af99958215871ae5c900b6 /src/math/i386/exp.s | |
parent | 01084202815fefbb7db23825d8b11a570c455e13 (diff) | |
download | musl-acb744921b73f5a73803e533e5e4a4896d164a26.tar.gz musl-acb744921b73f5a73803e533e5e4a4896d164a26.tar.bz2 musl-acb744921b73f5a73803e533e5e4a4896d164a26.tar.xz musl-acb744921b73f5a73803e533e5e4a4896d164a26.zip |
fix exp asm
exponents (base 2) near 16383 were broken due to (1) wrong cutoff, and
(2) inability to fit the necessary range of scalings into a long
double value.
as a solution, we fall back to using frndint/fscale for insanely large
exponents, and also have to special-case infinities here to avoid
inf-inf generating nan.
thankfully the costly code never runs in normal usage cases.
Diffstat (limited to 'src/math/i386/exp.s')
-rw-r--r-- | src/math/i386/exp.s | 45 |
1 files changed, 22 insertions, 23 deletions
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index 76ab4d64..ca0de1d4 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -68,21 +68,19 @@ exp: .type exp2,@function exp2: fldl 4(%esp) -1: mov $0x47000000,%eax - push %eax +1: pushl $0x467ff000 flds (%esp) - shl $7,%eax - push %eax - add %eax,%eax + xorl %eax,%eax + pushl $0x80000000 push %eax fld %st(1) fabs fucom %st(1) fnstsw - sahf - ja 2f fstp %st(0) fstp %st(0) + sahf + ja 2f fld %st(0) fistpl 8(%esp) fildl 8(%esp) @@ -99,22 +97,23 @@ exp2: add $12,%esp ret -2: fstp %st(0) - fstp %st(0) - fsts 8(%esp) - mov 8(%esp),%eax - lea (%eax,%eax),%ecx - cmp $0xff000000,%ecx - ja 2f +2: fld %st(0) + fstpt (%esp) + mov 9(%esp),%ah + and $0x7f,%ah + cmp $0x7f,%ah + jne 1f + decb 9(%esp) fstp %st(0) - xor %ecx,%ecx - inc %ecx - add %eax,%eax - jc 1f - mov $0x7ffe,%ecx -1: mov %ecx,8(%esp) fldt (%esp) - fld %st(0) - fmulp -2: add $12,%esp +1: fld %st(0) + frndint + fxch %st(1) + fsub %st(1) + f2xm1 + fld1 + faddp + fscale + fstp %st(1) + add $12,%esp ret |