From ebc10fa17634a6ddb87a3aedd71b7d9617d12c19 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 17 Aug 2013 02:40:44 +0000 Subject: add sse fenv support on i386 through hwcap the sse and x87 rounding modes should be always the same, the visible exception flags are the bitwise or of the two fenv states (so it's enough to query the rounding mode or raise exceptions on one fenv) --- src/fenv/i386/fenv.s | 67 +++++++++++++++++++++++++++++++++++++++++++++----- src/fenv/x86_64/fenv.s | 3 --- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s index 471d2af8..9bba40a5 100644 --- a/src/fenv/i386/fenv.s +++ b/src/fenv/i386/fenv.s @@ -1,14 +1,26 @@ +.hidden __hwcap + .global feclearexcept .type feclearexcept,@function feclearexcept: mov 4(%esp),%ecx not %ecx - test $0x3f,%ecx + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr 4(%esp) + and %ecx,4(%esp) + ldmxcsr 4(%esp) +1: test $0x3f,%ecx jnz 2f 1: fnclex xor %eax,%eax ret 2: fnstsw %ax + # TODO: only load/store fenv if exceptions arent clear yet and %ecx,%eax jz 1b sub $32,%esp @@ -41,7 +53,18 @@ fesetround: andb $0xf3,1(%esp) or %ch,1(%esp) fldcw (%esp) - pop %ecx + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jmp 1f + stmxcsr (%esp) + shl $3,%ch + andb $0x9f,1(%esp) + or %ch,1(%esp) + ldmxcsr (%esp) +1: pop %ecx ret .global fegetround @@ -59,7 +82,18 @@ fegetenv: mov 4(%esp),%ecx xor %eax,%eax fnstenv (%ecx) - ret + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + push %eax + stmxcsr (%esp) + pop %edx + and $0x3f,%edx + or %edx,4(%ecx) +1: ret .global fesetenv .type fesetenv,@function @@ -69,7 +103,8 @@ fesetenv: inc %ecx jz 1f fldenv -1(%ecx) - ret + movl -1(%ecx),%ecx + jmp 2f 1: push %eax push %eax push %eax @@ -79,12 +114,32 @@ fesetenv: pushl $0x37f fldenv (%esp) add $28,%esp - ret + # consider sse fenv as well if the cpu has XMM capability +2: call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + # mxcsr := same rounding mode, cleared exceptions, default mask + and $0xc00,%ecx + shl $3,%ecx + or $0x1f80,%ecx + mov %ecx,4(%esp) + ldmxcsr 4(%esp) +1: ret .global fetestexcept .type fetestexcept,@function fetestexcept: mov 4(%esp),%ecx fnstsw %ax - and %ecx,%eax + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr 4(%esp) + or 4(%esp),%eax +1: and %ecx,%eax ret diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s index 443e35a2..c48dade3 100644 --- a/src/fenv/x86_64/fenv.s +++ b/src/fenv/x86_64/fenv.s @@ -28,9 +28,6 @@ feraiseexcept: stmxcsr -8(%rsp) or %edi,-8(%rsp) ldmxcsr -8(%rsp) - fnstenv -32(%rsp) - or %edi,-28(%rsp) - fldenv -32(%rsp) xor %eax,%eax ret -- cgit v1.2.3-70-g09d2