summaryrefslogtreecommitdiff
path: root/src/math/x32/exp2l.s
diff options
context:
space:
mode:
authorrofl0r <retnyg@gmx.net>2014-01-07 22:43:34 +0100
committerrofl0r <retnyg@gmx.net>2014-02-23 11:07:18 +0100
commit323272db175204b951f119dae4bd99ef05e20f13 (patch)
tree70329156d5189294b1e9e7f9c7c326924ad62e35 /src/math/x32/exp2l.s
parent0f169cbb79c39a5b15f7a27d9283cdeb6e122b8f (diff)
downloadmusl-323272db175204b951f119dae4bd99ef05e20f13.tar.gz
musl-323272db175204b951f119dae4bd99ef05e20f13.tar.bz2
musl-323272db175204b951f119dae4bd99ef05e20f13.tar.xz
musl-323272db175204b951f119dae4bd99ef05e20f13.zip
import vanilla x86_64 code as x32
Diffstat (limited to 'src/math/x32/exp2l.s')
-rw-r--r--src/math/x32/exp2l.s90
1 files changed, 90 insertions, 0 deletions
diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s
new file mode 100644
index 00000000..0d6cd563
--- /dev/null
+++ b/src/math/x32/exp2l.s
@@ -0,0 +1,90 @@
+.global expm1l
+.type expm1l,@function
+expm1l:
+ fldt 8(%rsp)
+ fldl2e
+ fmulp
+ movl $0xc2820000,-4(%rsp)
+ flds -4(%rsp)
+ fucomp %st(1)
+ fnstsw %ax
+ sahf
+ fld1
+ jb 1f
+ # x*log2e <= -65, return -1 without underflow
+ fstp %st(1)
+ fchs
+ ret
+1: fld %st(1)
+ fabs
+ fucom %st(1)
+ fnstsw %ax
+ fstp %st(0)
+ fstp %st(0)
+ sahf
+ ja 1f
+ f2xm1
+ ret
+1: push %rax
+ call 1f
+ pop %rax
+ fld1
+ fsubrp
+ ret
+
+.global exp2l
+.type exp2l,@function
+exp2l:
+ fldt 8(%rsp)
+1: fld %st(0)
+ sub $16,%rsp
+ fstpt (%rsp)
+ mov 8(%rsp),%ax
+ and $0x7fff,%ax
+ cmp $0x3fff+13,%ax
+ jb 4f # |x| < 8192
+ cmp $0x3fff+15,%ax
+ jae 3f # |x| >= 32768
+ fsts (%rsp)
+ cmpl $0xc67ff800,(%rsp)
+ jb 2f # x > -16382
+ movl $0x5f000000,(%rsp)
+ flds (%rsp) # 0x1p63
+ fld %st(1)
+ fsub %st(1)
+ faddp
+ fucomp %st(1)
+ fnstsw
+ sahf
+ je 2f # x - 0x1p63 + 0x1p63 == x
+ movl $1,(%rsp)
+ flds (%rsp) # 0x1p-149
+ fdiv %st(1)
+ fstps (%rsp) # raise underflow
+2: fld1
+ fld %st(1)
+ frndint
+ fxch %st(2)
+ fsub %st(2) # st(0)=x-rint(x), st(1)=1, st(2)=rint(x)
+ f2xm1
+ faddp # 2^(x-rint(x))
+1: fscale
+ fstp %st(1)
+ add $16,%rsp
+ ret
+3: xor %eax,%eax
+4: cmp $0x3fff-64,%ax
+ fld1
+ jb 1b # |x| < 0x1p-64
+ fstpt (%rsp)
+ fistl 8(%rsp)
+ fildl 8(%rsp)
+ fsubrp %st(1)
+ addl $0x3fff,8(%rsp)
+ f2xm1
+ fld1
+ faddp # 2^(x-rint(x))
+ fldt (%rsp) # 2^rint(x)
+ fmulp
+ add $16,%rsp
+ ret