summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2013-08-18 15:34:07 +0000
committerSzabolcs Nagy <nsz@port70.net>2013-08-18 16:27:21 +0000
commitd8764bf84022397ff9d22310f78fcd78d801e2bf (patch)
tree289574d06451582fd67b5cde646cd6bfd368fa66
parentbaba2630c9cf778b41e218dfa980fcfdbec808e6 (diff)
downloadmusl-d8764bf84022397ff9d22310f78fcd78d801e2bf.tar.gz
musl-d8764bf84022397ff9d22310f78fcd78d801e2bf.tar.bz2
musl-d8764bf84022397ff9d22310f78fcd78d801e2bf.tar.xz
musl-d8764bf84022397ff9d22310f78fcd78d801e2bf.zip
optimize x86 feclearexcept: only use save/restore x87 fenv if needed
the x87 exception summary (ES) and stack fault (SF) flags may be spuriously cleared by feclearexcept using the fnclex instruction, but these flags are not observable through libc hence maintaining their state is not critical.
-rw-r--r--src/fenv/i386/fenv.s39
-rw-r--r--src/fenv/x86_64/fenv.s26
2 files changed, 38 insertions, 27 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s
index 9bba40a5..a8540add 100644
--- a/src/fenv/i386/fenv.s
+++ b/src/fenv/i386/fenv.s
@@ -4,26 +4,41 @@
.type feclearexcept,@function
feclearexcept:
mov 4(%esp),%ecx
- not %ecx
+ fnstsw %ax
# consider sse fenv as well if the cpu has XMM capability
call 1f
1: addl $__hwcap-1b,(%esp)
pop %edx
testl $0x02000000,(%edx)
+ jz 2f
+ # maintain exceptions in the sse mxcsr, clear x87 exceptions
+ test %eax,%ecx
jz 1f
- stmxcsr 4(%esp)
- and %ecx,4(%esp)
- ldmxcsr 4(%esp)
-1: test $0x3f,%ecx
- jnz 2f
-1: fnclex
- xor %eax,%eax
+ fnclex
+1: push %edx
+ stmxcsr (%esp)
+ pop %edx
+ and $0x3f,%eax
+ or %eax,%edx
+ test %edx,%ecx
+ jz 1f
+ not %ecx
+ and %ecx,%edx
+ push %edx
+ ldmxcsr (%esp)
+ pop %edx
+1: xor %eax,%eax
ret
-2: fnstsw %ax
- # TODO: only load/store fenv if exceptions arent clear yet
- and %ecx,%eax
+ # only do the expensive x87 fenv load/store when needed
+2: test %eax,%ecx
jz 1b
- sub $32,%esp
+ not %ecx
+ and %ecx,%eax
+ test $0x3f,%eax
+ jz 1f
+ fnclex
+ jmp 1b
+1: sub $32,%esp
fnstenv (%esp)
mov %al,4(%esp)
fldenv (%esp)
diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s
index c48dade3..dda6b61a 100644
--- a/src/fenv/x86_64/fenv.s
+++ b/src/fenv/x86_64/fenv.s
@@ -1,25 +1,21 @@
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
+ # maintain exceptions in the sse mxcsr, clear x87 exceptions
mov %edi,%ecx
+ fnstsw %ax
+ test %eax,%ecx
+ jz 1f
+ fnclex
+1: stmxcsr -8(%rsp)
+ and $0x3f,%eax
+ or %eax,-8(%rsp)
+ test %ecx,-8(%rsp)
+ jz 1f
not %ecx
- stmxcsr -8(%rsp)
and %ecx,-8(%rsp)
ldmxcsr -8(%rsp)
- test $0x3f,%ecx
- jnz 2f
-1: fnclex
- xor %eax,%eax
- ret
-2: fnstsw %ax
- and %ecx,%eax
- jz 1b
- sub $32,%rsp
- fnstenv (%rsp)
- mov %al,4(%rsp)
- fldenv (%rsp)
- add $32,%rsp
- xor %eax,%eax
+1: xor %eax,%eax
ret
.global feraiseexcept