summaryrefslogtreecommitdiff
path: root/src/math
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2018-09-22 18:47:27 +0000
committerRich Felker <dalias@aerifal.cx>2018-10-15 14:42:46 -0400
commit7396ef0a05b834bf92c4f268a3336c0bc10c3593 (patch)
tree185691544913e852ac64c13ba8d0a9f7cfe9530f /src/math
parent7c5f3bb955123ba65bbdedee0e4499ef78a5747c (diff)
downloadmusl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.tar.gz
musl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.tar.bz2
musl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.tar.xz
musl-7396ef0a05b834bf92c4f268a3336c0bc10c3593.zip
arm: add single instruction fma
vfma is available in the vfpv4 fpu and above, the ACLE standard feature test for double precision hardware fma support is __ARM_FEATURE_FMA && __ARM_FP&8 we need further checks to work around clang bugs (fixed in clang >=7.0) && !__SOFTFP__ because __ARM_FP is defined even with -mfloat-abi=soft && !BROKEN_VFP_ASM to disable the single precision code when inline asm handling is broken. For runtime selection the HWCAP_ARM_VFPv4 hwcap flag can be used, but that requires further work.
Diffstat (limited to 'src/math')
-rw-r--r--src/math/arm/fma.c15
-rw-r--r--src/math/arm/fmaf.c15
2 files changed, 30 insertions, 0 deletions
diff --git a/src/math/arm/fma.c b/src/math/arm/fma.c
new file mode 100644
index 00000000..2a9b8efa
--- /dev/null
+++ b/src/math/arm/fma.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __ARM_FEATURE_FMA && __ARM_FP&8 && !__SOFTFP__
+
+double fma(double x, double y, double z)
+{
+ __asm__ ("vfma.f64 %P0, %P1, %P2" : "+w"(z) : "w"(x), "w"(y));
+ return z;
+}
+
+#else
+
+#include "../fma.c"
+
+#endif
diff --git a/src/math/arm/fmaf.c b/src/math/arm/fmaf.c
new file mode 100644
index 00000000..a1793d27
--- /dev/null
+++ b/src/math/arm/fmaf.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+#if __ARM_FEATURE_FMA && __ARM_FP&4 && !__SOFTFP__ && !BROKEN_VFP_ASM
+
+float fmaf(float x, float y, float z)
+{
+ __asm__ ("vfma.f32 %0, %1, %2" : "+t"(z) : "t"(x), "t"(y));
+ return z;
+}
+
+#else
+
+#include "../fmaf.c"
+
+#endif