diff options
author | Szabolcs Nagy <nsz@port70.net> | 2017-03-19 05:26:45 +0100 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2017-03-21 12:39:18 -0400 |
commit | 54807d47acecab778498ced88ce8f62bfa16e379 (patch) | |
tree | 2203cc44d6061f092dd104ef6f89d4f2ca9fa1d7 /src/math/aarch64 | |
parent | b6e1fe0d5e78dac647e85d49c2d537bb071ba49e (diff) | |
download | musl-54807d47acecab778498ced88ce8f62bfa16e379.tar.gz musl-54807d47acecab778498ced88ce8f62bfa16e379.tar.bz2 musl-54807d47acecab778498ced88ce8f62bfa16e379.tar.xz musl-54807d47acecab778498ced88ce8f62bfa16e379.zip |
aarch64: add single instruction math functions
this should increase performance and reduce code size on aarch64.
the compiled code was checked against using __builtin_* instead
of inline asm with gcc-6.2.0.
lrint is two instructions.
c with inline asm is used because it is safer than a pure asm
implementation, this prevents ll{rint,round} to be an alias
of l{rint,round} (because the types don't match) and depends
on gcc style inline asm support.
ceil, floor, round, trunc can either raise inexact on finite
non-integer inputs or not raise any exceptions. the new
implementation does not raise exceptions while the generic
c code does.
on aarch64, the underflow exception is signaled before rounding
(ieee 754 allows both before and after rounding, but it must be
consistent), the generic fma c code signals it after rounding
so using single instruction fixes a slight conformance issue too.
Diffstat (limited to 'src/math/aarch64')
34 files changed, 226 insertions, 24 deletions
diff --git a/src/math/aarch64/ceil.c b/src/math/aarch64/ceil.c new file mode 100644 index 00000000..ac80c1dc --- /dev/null +++ b/src/math/aarch64/ceil.c @@ -0,0 +1,7 @@ +#include <math.h> + +double ceil(double x) +{ + __asm__ ("frintp %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/ceilf.c b/src/math/aarch64/ceilf.c new file mode 100644 index 00000000..1ef1e9c8 --- /dev/null +++ b/src/math/aarch64/ceilf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float ceilf(float x) +{ + __asm__ ("frintp %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fabs.c b/src/math/aarch64/fabs.c new file mode 100644 index 00000000..5c3ecaf4 --- /dev/null +++ b/src/math/aarch64/fabs.c @@ -0,0 +1,7 @@ +#include <math.h> + +double fabs(double x) +{ + __asm__ ("fabs %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fabs.s b/src/math/aarch64/fabs.s deleted file mode 100644 index 8c04d091..00000000 --- a/src/math/aarch64/fabs.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global fabs -.type fabs,%function -fabs: - fabs d0, d0 - ret diff --git a/src/math/aarch64/fabsf.c b/src/math/aarch64/fabsf.c new file mode 100644 index 00000000..7fde9817 --- /dev/null +++ b/src/math/aarch64/fabsf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float fabsf(float x) +{ + __asm__ ("fabs %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fabsf.s b/src/math/aarch64/fabsf.s deleted file mode 100644 index 6e96dd43..00000000 --- a/src/math/aarch64/fabsf.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global fabsf -.type fabsf,%function -fabsf: - fabs s0, s0 - ret diff --git a/src/math/aarch64/floor.c b/src/math/aarch64/floor.c new file mode 100644 index 00000000..50ffdb28 --- /dev/null +++ b/src/math/aarch64/floor.c @@ -0,0 +1,7 @@ +#include <math.h> + +double floor(double x) +{ + __asm__ ("frintm %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/floorf.c b/src/math/aarch64/floorf.c new file mode 100644 index 00000000..8d007e9f --- /dev/null +++ b/src/math/aarch64/floorf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float floorf(float x) +{ + __asm__ ("frintm %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fma.c b/src/math/aarch64/fma.c new file mode 100644 index 00000000..2450ea7e --- /dev/null +++ b/src/math/aarch64/fma.c @@ -0,0 +1,7 @@ +#include <math.h> + +double fma(double x, double y, double z) +{ + __asm__ ("fmadd %d0, %d1, %d2, %d3" : "=w"(x) : "w"(x), "w"(y), "w"(z)); + return x; +} diff --git a/src/math/aarch64/fmaf.c b/src/math/aarch64/fmaf.c new file mode 100644 index 00000000..9a147213 --- /dev/null +++ b/src/math/aarch64/fmaf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float fmaf(float x, float y, float z) +{ + __asm__ ("fmadd %s0, %s1, %s2, %s3" : "=w"(x) : "w"(x), "w"(y), "w"(z)); + return x; +} diff --git a/src/math/aarch64/fmax.c b/src/math/aarch64/fmax.c new file mode 100644 index 00000000..86dcb3b4 --- /dev/null +++ b/src/math/aarch64/fmax.c @@ -0,0 +1,7 @@ +#include <math.h> + +double fmax(double x, double y) +{ + __asm__ ("fmaxnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/fmaxf.c b/src/math/aarch64/fmaxf.c new file mode 100644 index 00000000..ee5eac2d --- /dev/null +++ b/src/math/aarch64/fmaxf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float fmaxf(float x, float y) +{ + __asm__ ("fmaxnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/fmin.c b/src/math/aarch64/fmin.c new file mode 100644 index 00000000..f1e99808 --- /dev/null +++ b/src/math/aarch64/fmin.c @@ -0,0 +1,7 @@ +#include <math.h> + +double fmin(double x, double y) +{ + __asm__ ("fminnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/fminf.c b/src/math/aarch64/fminf.c new file mode 100644 index 00000000..80468f67 --- /dev/null +++ b/src/math/aarch64/fminf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float fminf(float x, float y) +{ + __asm__ ("fminnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/llrint.c b/src/math/aarch64/llrint.c new file mode 100644 index 00000000..a9e07a93 --- /dev/null +++ b/src/math/aarch64/llrint.c @@ -0,0 +1,10 @@ +#include <math.h> + +long long llrint(double x) +{ + long long n; + __asm__ ( + "frintx %d1, %d1\n" + "fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/llrintf.c b/src/math/aarch64/llrintf.c new file mode 100644 index 00000000..12b6804f --- /dev/null +++ b/src/math/aarch64/llrintf.c @@ -0,0 +1,10 @@ +#include <math.h> + +long long llrintf(float x) +{ + long long n; + __asm__ ( + "frintx %s1, %s1\n" + "fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/llround.c b/src/math/aarch64/llround.c new file mode 100644 index 00000000..e09ddd48 --- /dev/null +++ b/src/math/aarch64/llround.c @@ -0,0 +1,8 @@ +#include <math.h> + +long long llround(double x) +{ + long long n; + __asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/llroundf.c b/src/math/aarch64/llroundf.c new file mode 100644 index 00000000..16699598 --- /dev/null +++ b/src/math/aarch64/llroundf.c @@ -0,0 +1,8 @@ +#include <math.h> + +long long llroundf(float x) +{ + long long n; + __asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/lrint.c b/src/math/aarch64/lrint.c new file mode 100644 index 00000000..cb7785ad --- /dev/null +++ b/src/math/aarch64/lrint.c @@ -0,0 +1,10 @@ +#include <math.h> + +long lrint(double x) +{ + long n; + __asm__ ( + "frintx %d1, %d1\n" + "fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/lrintf.c b/src/math/aarch64/lrintf.c new file mode 100644 index 00000000..4d750d69 --- /dev/null +++ b/src/math/aarch64/lrintf.c @@ -0,0 +1,10 @@ +#include <math.h> + +long lrintf(float x) +{ + long n; + __asm__ ( + "frintx %s1, %s1\n" + "fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/lround.c b/src/math/aarch64/lround.c new file mode 100644 index 00000000..85656c78 --- /dev/null +++ b/src/math/aarch64/lround.c @@ -0,0 +1,8 @@ +#include <math.h> + +long lround(double x) +{ + long n; + __asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/lroundf.c b/src/math/aarch64/lroundf.c new file mode 100644 index 00000000..32e51f3c --- /dev/null +++ b/src/math/aarch64/lroundf.c @@ -0,0 +1,8 @@ +#include <math.h> + +long lroundf(float x) +{ + long n; + __asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/nearbyint.c b/src/math/aarch64/nearbyint.c new file mode 100644 index 00000000..9c3fdb44 --- /dev/null +++ b/src/math/aarch64/nearbyint.c @@ -0,0 +1,7 @@ +#include <math.h> + +double nearbyint(double x) +{ + __asm__ ("frinti %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/nearbyintf.c b/src/math/aarch64/nearbyintf.c new file mode 100644 index 00000000..8e7f61df --- /dev/null +++ b/src/math/aarch64/nearbyintf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float nearbyintf(float x) +{ + __asm__ ("frinti %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/rint.c b/src/math/aarch64/rint.c new file mode 100644 index 00000000..45b194b5 --- /dev/null +++ b/src/math/aarch64/rint.c @@ -0,0 +1,7 @@ +#include <math.h> + +double rint(double x) +{ + __asm__ ("frintx %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/rintf.c b/src/math/aarch64/rintf.c new file mode 100644 index 00000000..1ae7dd25 --- /dev/null +++ b/src/math/aarch64/rintf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float rintf(float x) +{ + __asm__ ("frintx %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/round.c b/src/math/aarch64/round.c new file mode 100644 index 00000000..897a84cc --- /dev/null +++ b/src/math/aarch64/round.c @@ -0,0 +1,7 @@ +#include <math.h> + +double round(double x) +{ + __asm__ ("frinta %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/roundf.c b/src/math/aarch64/roundf.c new file mode 100644 index 00000000..91637eaa --- /dev/null +++ b/src/math/aarch64/roundf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float roundf(float x) +{ + __asm__ ("frinta %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/sqrt.c b/src/math/aarch64/sqrt.c new file mode 100644 index 00000000..fe93c3e6 --- /dev/null +++ b/src/math/aarch64/sqrt.c @@ -0,0 +1,7 @@ +#include <math.h> + +double sqrt(double x) +{ + __asm__ ("fsqrt %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/sqrt.s b/src/math/aarch64/sqrt.s deleted file mode 100644 index 1917e18d..00000000 --- a/src/math/aarch64/sqrt.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global sqrt -.type sqrt,%function -sqrt: - fsqrt d0, d0 - ret diff --git a/src/math/aarch64/sqrtf.c b/src/math/aarch64/sqrtf.c new file mode 100644 index 00000000..275c7f39 --- /dev/null +++ b/src/math/aarch64/sqrtf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float sqrtf(float x) +{ + __asm__ ("fsqrt %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/sqrtf.s b/src/math/aarch64/sqrtf.s deleted file mode 100644 index 1639497b..00000000 --- a/src/math/aarch64/sqrtf.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global sqrtf -.type sqrtf,%function -sqrtf: - fsqrt s0, s0 - ret diff --git a/src/math/aarch64/trunc.c b/src/math/aarch64/trunc.c new file mode 100644 index 00000000..e592147a --- /dev/null +++ b/src/math/aarch64/trunc.c @@ -0,0 +1,7 @@ +#include <math.h> + +double trunc(double x) +{ + __asm__ ("frintz %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/truncf.c b/src/math/aarch64/truncf.c new file mode 100644 index 00000000..20ef30f1 --- /dev/null +++ b/src/math/aarch64/truncf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float truncf(float x) +{ + __asm__ ("frintz %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} |