diff options
author | Rich Felker <dalias@aerifal.cx> | 2012-03-19 09:00:30 -0400 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2012-03-19 09:00:30 -0400 |
commit | 02db27d9deaee71b244c91e720ec819c74dab150 (patch) | |
tree | ef2543fd54a6fdbca8839cb14c71bb10ffdaa8f1 /src/math | |
parent | da7458a602a6f0bdea25d6b9b613372048a974e6 (diff) | |
download | musl-02db27d9deaee71b244c91e720ec819c74dab150.tar.gz musl-02db27d9deaee71b244c91e720ec819c74dab150.tar.bz2 musl-02db27d9deaee71b244c91e720ec819c74dab150.tar.xz musl-02db27d9deaee71b244c91e720ec819c74dab150.zip |
optimize exponential asm for i386
up to 30% faster exp2 by avoiding slow frndint and fscale functions.
expm1 also takes a much more direct path for small arguments (the
expected usage case).
Diffstat (limited to 'src/math')
-rw-r--r-- | src/math/i386/exp.s | 87 | ||||
-rw-r--r-- | src/math/i386/expm1.s | 48 |
2 files changed, 77 insertions, 58 deletions
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index f4769d59..76ab4d64 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -1,3 +1,37 @@ +.global expm1f +.type expm1f,@function +expm1f: + flds 4(%esp) + jmp 1f + +.global expm1l +.type expm1l,@function +expm1l: + fldt 4(%esp) + jmp 1f + +.global expm1 +.type expm1,@function +expm1: + fldl 4(%esp) +1: fldl2e + fmulp + fld1 + fld %st(1) + fabs + fucom %st(1) + fnstsw %ax + fstp %st(0) + fstp %st(0) + sahf + ja 1f + f2xm1 + ret +1: call 1f + fld1 + fsubrp + ret + .global exp2f .type exp2f,@function exp2f: @@ -34,22 +68,53 @@ exp: .type exp2,@function exp2: fldl 4(%esp) -1: fxam - fnstsw %ax +1: mov $0x47000000,%eax + push %eax + flds (%esp) + shl $7,%eax + push %eax + add %eax,%eax + push %eax + fld %st(1) + fabs + fucom %st(1) + fnstsw sahf - jnp 1f - jnc 1f - fstps 4(%esp) - mov $0xfe,%al - and %al,7(%esp) - flds 4(%esp) -1: fld %st(0) - frndint + ja 2f + fstp %st(0) + fstp %st(0) + fld %st(0) + fistpl 8(%esp) + fildl 8(%esp) fxch %st(1) fsub %st(1) + mov $0x3fff,%eax + add %eax,8(%esp) f2xm1 fld1 faddp - fscale + fldt (%esp) + fmulp fstp %st(1) + add $12,%esp + ret + +2: fstp %st(0) + fstp %st(0) + fsts 8(%esp) + mov 8(%esp),%eax + lea (%eax,%eax),%ecx + cmp $0xff000000,%ecx + ja 2f + fstp %st(0) + xor %ecx,%ecx + inc %ecx + add %eax,%eax + jc 1f + mov $0x7ffe,%ecx +1: mov %ecx,8(%esp) + fldt (%esp) + fld %st(0) + fmulp +2: add $12,%esp ret diff --git a/src/math/i386/expm1.s b/src/math/i386/expm1.s index bbb5d12e..f335a3e5 100644 --- a/src/math/i386/expm1.s +++ b/src/math/i386/expm1.s @@ -1,47 +1 @@ -.global expm1f -.type expm1f,@function -expm1f: - flds 4(%esp) - jmp 1f - -.global expm1l -.type expm1l,@function -expm1l: - fldt 4(%esp) - jmp 1f - -.global expm1 -.type expm1,@function -expm1: - fldl 4(%esp) -1: fxam - fnstsw %ax - sahf - jnp 1f - jnc 1f - fstps 4(%esp) - mov $0xfe,%al - and %al,7(%esp) - flds 4(%esp) -1: fldl2e - fmulp - fld %st(0) - frndint - fldz - fcomp - fnstsw %ax - sahf - jnz 1f - fstp %st(0) - f2xm1 - ret -1: fxch %st(1) - fsub %st(1) - f2xm1 - fld1 - faddp - fscale - fld1 - fsubrp - fstp %st(1) - ret +# see exp.s |