From 8c245bf2725f3171ee8f58dab7169e5d682de026 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 5 Nov 2014 21:40:29 +0100 Subject: math: fix x86_64 and x32 asm not to use sahf instruction Some early x86_64 cpus (released before 2006) did not support sahf/lahf instructions so they should be avoided (intel manual says they are only supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1). The workaround simplifies exp2l and expm1l because fucomip can be used instead of the fucomp;fnstsw;sahf sequence copied from i386. In fmodl and remainderl sahf is replaced by a simple bit test. (cherry picked from commit a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e) --- src/math/x86_64/exp2l.s | 13 +++---------- src/math/x86_64/fmodl.s | 4 ++-- src/math/x86_64/remainderl.s | 4 ++-- 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'src/math/x86_64') diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s index 0d6cd563..0e9bdf9f 100644 --- a/src/math/x86_64/exp2l.s +++ b/src/math/x86_64/exp2l.s @@ -6,9 +6,7 @@ expm1l: fmulp movl $0xc2820000,-4(%rsp) flds -4(%rsp) - fucomp %st(1) - fnstsw %ax - sahf + fucomip %st(1) fld1 jb 1f # x*log2e <= -65, return -1 without underflow @@ -17,11 +15,8 @@ expm1l: ret 1: fld %st(1) fabs - fucom %st(1) - fnstsw %ax + fucomip %st(1) fstp %st(0) - fstp %st(0) - sahf ja 1f f2xm1 ret @@ -53,9 +48,7 @@ exp2l: fld %st(1) fsub %st(1) faddp - fucomp %st(1) - fnstsw - sahf + fucomip %st(1) je 2f # x - 0x1p63 + 0x1p63 == x movl $1,(%rsp) flds (%rsp) # 0x1p-149 diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s index ca81e60c..cd8d2b7c 100644 --- a/src/math/x86_64/fmodl.s +++ b/src/math/x86_64/fmodl.s @@ -5,7 +5,7 @@ fmodl: fldt 8(%rsp) 1: fprem fstsw %ax - sahf - jp 1b + testb $4,%ah + jnz 1b fstp %st(1) ret diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s index 75c12374..2c337cf5 100644 --- a/src/math/x86_64/remainderl.s +++ b/src/math/x86_64/remainderl.s @@ -5,7 +5,7 @@ remainderl: fldt 8(%rsp) 1: fprem1 fstsw %ax - sahf - jp 1b + testb $4,%ah + jnz 1b fstp %st(1) ret -- cgit v1.2.3-70-g09d2