diff options
author | Szabolcs Nagy <nsz@port70.net> | 2018-09-22 18:47:27 +0000 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2018-10-15 14:42:46 -0400 |
commit | 7396ef0a05b834bf92c4f268a3336c0bc10c3593 (patch) | |
tree | 185691544913e852ac64c13ba8d0a9f7cfe9530f /src | |
parent | 7c5f3bb955123ba65bbdedee0e4499ef78a5747c (diff) | |
download | musl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.tar.gz musl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.tar.bz2 musl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.tar.xz musl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.zip |
arm: add single instruction fma
vfma is available in the vfpv4 fpu and above, the ACLE standard feature
test for double precision hardware fma support is
__ARM_FEATURE_FMA && __ARM_FP&8
we need further checks to work around clang bugs (fixed in clang >=7.0)
&& !__SOFTFP__
because __ARM_FP is defined even with -mfloat-abi=soft
&& !BROKEN_VFP_ASM
to disable the single precision code when inline asm handling is broken.
For runtime selection the HWCAP_ARM_VFPv4 hwcap flag can be used, but
that requires further work.
Diffstat (limited to 'src')
-rw-r--r-- | src/math/arm/fma.c | 15 | ||||
-rw-r--r-- | src/math/arm/fmaf.c | 15 |
2 files changed, 30 insertions, 0 deletions
diff --git a/src/math/arm/fma.c b/src/math/arm/fma.c new file mode 100644 index 00000000..2a9b8efa --- /dev/null +++ b/src/math/arm/fma.c @@ -0,0 +1,15 @@ +#include <math.h> + +#if __ARM_FEATURE_FMA && __ARM_FP&8 && !__SOFTFP__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfma.f64 %P0, %P1, %P2" : "+w"(z) : "w"(x), "w"(y)); + return z; +} + +#else + +#include "../fma.c" + +#endif diff --git a/src/math/arm/fmaf.c b/src/math/arm/fmaf.c new file mode 100644 index 00000000..a1793d27 --- /dev/null +++ b/src/math/arm/fmaf.c @@ -0,0 +1,15 @@ +#include <math.h> + +#if __ARM_FEATURE_FMA && __ARM_FP&4 && !__SOFTFP__ && !BROKEN_VFP_ASM + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfma.f32 %0, %1, %2" : "+t"(z) : "t"(x), "t"(y)); + return z; +} + +#else + +#include "../fmaf.c" + +#endif |