summaryrefslogtreecommitdiff
path: root/src/math
diff options
context:
space:
mode:
authornsz <nsz@port70.net>2012-05-05 01:11:56 +0200
committernsz <nsz@port70.net>2012-05-05 01:11:56 +0200
commitf697d66b81912af59128ac1b96bc0e2a4514b758 (patch)
tree32433a4df7d33f692afa2ff6e759fa1c5870b8f0 /src/math
parentdb4096c5f2ffb15e52015004ab5a900b820c6683 (diff)
downloadmusl-f697d66b81912af59128ac1b96bc0e2a4514b758.tar.gz
musl-f697d66b81912af59128ac1b96bc0e2a4514b758.tar.bz2
musl-f697d66b81912af59128ac1b96bc0e2a4514b758.tar.xz
musl-f697d66b81912af59128ac1b96bc0e2a4514b758.zip
math: change the formula used for acos.s
old: 2*atan2(sqrt(1-x),sqrt(1+x)) new: atan2(fabs(sqrt((1-x)*(1+x))),x) improvements: * all edge cases are fixed (sign of zero in downward rounding) * a bit faster (here a single call is about 131ns vs 162ns) * a bit more precise (at most 1ulp error on 1M uniform random samples in [0,1), the old formula gave some 2ulp errors as well)
Diffstat (limited to 'src/math')
-rw-r--r--src/math/i386/acos.s18
-rw-r--r--src/math/x86_64/acosl.s18
2 files changed, 16 insertions, 20 deletions
diff --git a/src/math/i386/acos.s b/src/math/i386/acos.s
index bfff0c5c..47f365ef 100644
--- a/src/math/i386/acos.s
+++ b/src/math/i386/acos.s
@@ -1,3 +1,5 @@
+# use acos(x) = atan2(fabs(sqrt((1-x)*(1+x))), x)
+
.global acosf
.type acosf,@function
acosf:
@@ -14,17 +16,13 @@ acosl:
.type acos,@function
acos:
fldl 4(%esp)
-1: fld1
- fld %st(1)
+1: fld %st(0)
fld1
- fsubp
- fsqrt
- fxch %st(2)
- faddp
+ fsub %st(0),%st(1)
+ fadd %st(2)
+ fmulp
fsqrt
+ fabs # fix sign of zero (matters in downward rounding mode)
+ fxch %st(1)
fpatan
- fld1
- fld1
- faddp
- fmulp
ret
diff --git a/src/math/x86_64/acosl.s b/src/math/x86_64/acosl.s
index db68d2de..88e01b49 100644
--- a/src/math/x86_64/acosl.s
+++ b/src/math/x86_64/acosl.s
@@ -1,18 +1,16 @@
+# see ../i386/acos.s
+
.global acosl
.type acosl,@function
acosl:
fldt 8(%rsp)
+1: fld %st(0)
fld1
- fld %st(1)
- fld1
- fsubp
- fsqrt
- fxch %st(2)
- faddp
+ fsub %st(0),%st(1)
+ fadd %st(2)
+ fmulp
fsqrt
+ fabs
+ fxch %st(1)
fpatan
- fld1
- fld1
- faddp
- fmulp
ret