From 5a9c8c05a5a0cdced4122589184fd795b761bb4a Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 16 Aug 2015 18:15:18 +0000 Subject: mitigate performance regression in libc-internal locks on x86_64 commit 3c43c0761e1725fd5f89a9c028cbf43250abb913 fixed missing synchronization in the atomic store operation for i386 and x86_64, but opted to use mfence for the barrier on x86_64 where it's always available. however, in practice mfence is significantly slower than the barrier approach used on i386 (a nop-like lock orl operation). this commit changes x86_64 (and x32) to use the faster barrier. --- arch/x86_64/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/atomic.h b/arch/x86_64/atomic.h index 2ab1f7a2..7690183d 100644 --- a/arch/x86_64/atomic.h +++ b/arch/x86_64/atomic.h @@ -83,7 +83,7 @@ static inline void a_dec(volatile int *x) static inline void a_store(volatile int *p, int x) { - __asm__( "mov %1, %0 ; mfence" : "=m"(*p) : "r"(x) : "memory" ); + __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" ); } static inline void a_spin() -- cgit v1.2.3-70-g09d2