summaryrefslogtreecommitdiff
path: root/arch/sh/atomic_arch.h
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2016-01-21 19:28:15 +0000
committerRich Felker <dalias@aerifal.cx>2016-01-21 19:43:04 +0000
commit61b1e75f7d8004461f2e18f171c26c2f545eed32 (patch)
tree7a88f8d51ee3f049745b147b7f36fc8ee22e0e62 /arch/sh/atomic_arch.h
parent1315596b510189b5159e742110b504177bdd4932 (diff)
downloadmusl-61b1e75f7d8004461f2e18f171c26c2f545eed32.tar.gz
musl-61b1e75f7d8004461f2e18f171c26c2f545eed32.tar.bz2
musl-61b1e75f7d8004461f2e18f171c26c2f545eed32.tar.xz
musl-61b1e75f7d8004461f2e18f171c26c2f545eed32.zip
overhaul sh atomics for new atomics framework, add j-core cas.l backend
sh needs runtime-selected atomic backends since there are a number of supported models that use non-forwards-compatible (non-smp-compatible) atomic mechanisms. previously, the code paths for this were highly inefficient since they involved C function calls with multiple branches in the callee and heavy spills in the caller. the new code performs calls the runtime-selected asm fragment from inline asm with extremely minimal clobbers, rather than using a function call. for the sh4a case where the atomic mechanism is known and there is no forward-compatibility issue, the movli.l and movco.l instructions are provided as a_ll and a_sc, allowing the new shared atomic.h to generate efficient inline versions of all the basic atomic operations without needing a cas loop.
Diffstat (limited to 'arch/sh/atomic_arch.h')
-rw-r--r--arch/sh/atomic_arch.h110
1 files changed, 30 insertions, 80 deletions
diff --git a/arch/sh/atomic_arch.h b/arch/sh/atomic_arch.h
index 2ac77246..74444d5d 100644
--- a/arch/sh/atomic_arch.h
+++ b/arch/sh/atomic_arch.h
@@ -1,96 +1,46 @@
-#define LLSC_CLOBBERS "r0", "t", "memory"
-#define LLSC_START(mem) "synco\n" \
- "0: movli.l @" mem ", r0\n"
-#define LLSC_END(mem) \
- "1: movco.l r0, @" mem "\n" \
- " bf 0b\n" \
- " synco\n"
+#if defined(__SH4A__)
-static inline int __sh_cas_llsc(volatile int *p, int t, int s)
+#define a_ll a_ll
+static inline int a_ll(volatile int *p)
{
- int old;
- __asm__ __volatile__(
- LLSC_START("%1")
- " mov r0, %0\n"
- " cmp/eq %0, %2\n"
- " bf 1f\n"
- " mov %3, r0\n"
- LLSC_END("%1")
- : "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
- return old;
+ int v;
+ __asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p));
+ return v;
}
-static inline int __sh_swap_llsc(volatile int *x, int v)
+#define a_sc a_sc
+static inline int a_sc(volatile int *p, int v)
{
- int old;
- __asm__ __volatile__(
- LLSC_START("%1")
- " mov r0, %0\n"
- " mov %2, r0\n"
- LLSC_END("%1")
- : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
- return old;
+ int r;
+ __asm__ __volatile__ (
+ "movco.l %2, @%3 ; movt %0"
+ : "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc");
+ return r;
}
-static inline int __sh_fetch_add_llsc(volatile int *x, int v)
+#define a_barrier a_barrier
+static inline void a_barrier()
{
- int old;
- __asm__ __volatile__(
- LLSC_START("%1")
- " mov r0, %0\n"
- " add %2, r0\n"
- LLSC_END("%1")
- : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
- return old;
+ __asm__ __volatile__ ("synco" : : "memory");
}
-static inline void __sh_store_llsc(volatile int *p, int x)
-{
- __asm__ __volatile__(
- " synco\n"
- " mov.l %1, @%0\n"
- " synco\n"
- : : "r"(p), "r"(x) : "memory");
-}
+#define a_pre_llsc a_barrier
+#define a_post_llsc a_barrier
-static inline void __sh_and_llsc(volatile int *x, int v)
-{
- __asm__ __volatile__(
- LLSC_START("%0")
- " and %1, r0\n"
- LLSC_END("%0")
- : : "r"(x), "r"(v) : LLSC_CLOBBERS);
-}
+#else
-static inline void __sh_or_llsc(volatile int *x, int v)
+#define a_cas a_cas
+__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr;
+static inline int a_cas(volatile int *p, int t, int s)
{
- __asm__ __volatile__(
- LLSC_START("%0")
- " or %1, r0\n"
- LLSC_END("%0")
- : : "r"(x), "r"(v) : LLSC_CLOBBERS);
+ register int r1 __asm__("r1");
+ register int r2 __asm__("r2") = t;
+ register int r3 __asm__("r3") = s;
+ __asm__ __volatile__ (
+ "jsr @%4 ; nop"
+ : "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr)
+ : "memory", "pr", "cc");
+ return r3;
}
-#ifdef __SH4A__
-#define a_cas(p,t,s) __sh_cas_llsc(p,t,s)
-#define a_swap(x,v) __sh_swap_llsc(x,v)
-#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v)
-#define a_store(x,v) __sh_store_llsc(x, v)
-#define a_and(x,v) __sh_and_llsc(x, v)
-#define a_or(x,v) __sh_or_llsc(x, v)
-#else
-
-int __sh_cas(volatile int *, int, int);
-int __sh_swap(volatile int *, int);
-int __sh_fetch_add(volatile int *, int);
-void __sh_store(volatile int *, int);
-void __sh_and(volatile int *, int);
-void __sh_or(volatile int *, int);
-
-#define a_cas(p,t,s) __sh_cas(p,t,s)
-#define a_swap(x,v) __sh_swap(x,v)
-#define a_fetch_add(x,v) __sh_fetch_add(x, v)
-#define a_store(x,v) __sh_store(x, v)
-#define a_and(x,v) __sh_and(x, v)
-#define a_or(x,v) __sh_or(x, v)
#endif