diff options
Diffstat (limited to 'experimental/firefox-esr/fix-arm-atomics-grsec.patch')
-rw-r--r-- | experimental/firefox-esr/fix-arm-atomics-grsec.patch | 306 |
1 files changed, 306 insertions, 0 deletions
diff --git a/experimental/firefox-esr/fix-arm-atomics-grsec.patch b/experimental/firefox-esr/fix-arm-atomics-grsec.patch new file mode 100644 index 000000000..0eb58f093 --- /dev/null +++ b/experimental/firefox-esr/fix-arm-atomics-grsec.patch @@ -0,0 +1,306 @@ +--- mozilla-release/ipc/chromium/src/base/atomicops_internals_arm_gcc.h.orig ++++ mozilla-release/ipc/chromium/src/base/atomicops_internals_arm_gcc.h +@@ -12,43 +35,194 @@ + namespace base { + namespace subtle { + +-// 0xffff0fc0 is the hard coded address of a function provided by +-// the kernel which implements an atomic compare-exchange. On older +-// ARM architecture revisions (pre-v6) this may be implemented using +-// a syscall. This address is stable, and in active use (hard coded) +-// by at least glibc-2.7 and the Android C library. +-typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value, +- Atomic32 new_value, +- volatile Atomic32* ptr); +-LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) = +- (LinuxKernelCmpxchgFunc) 0xffff0fc0; ++// Memory barriers on ARM are funky, but the kernel is here to help: ++// ++// * ARMv5 didn't support SMP, there is no memory barrier instruction at ++// all on this architecture, or when targeting its machine code. ++// ++// * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by ++// writing a random value to a very specific coprocessor register. ++// ++// * On ARMv7, the "dmb" instruction is used to perform a full memory ++// barrier (though writing to the co-processor will still work). ++// However, on single core devices (e.g. Nexus One, or Nexus S), ++// this instruction will take up to 200 ns, which is huge, even though ++// it's completely un-needed on these devices. ++// ++// * There is no easy way to determine at runtime if the device is ++// single or multi-core. However, the kernel provides a useful helper ++// function at a fixed memory address (0xffff0fa0), which will always ++// perform a memory barrier in the most efficient way. I.e. on single ++// core devices, this is an empty function that exits immediately. ++// On multi-core devices, it implements a full memory barrier. ++// ++// * This source could be compiled to ARMv5 machine code that runs on a ++// multi-core ARMv6 or ARMv7 device. In this case, memory barriers ++// are needed for correct execution. Always call the kernel helper, even ++// when targeting ARMv5TE. ++// + +-typedef void (*LinuxKernelMemoryBarrierFunc)(void); +-LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) = +- (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; ++inline void MemoryBarrier() { ++#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ ++ defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) ++ __asm__ __volatile__("dmb ish" ::: "memory"); ++#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ ++ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ ++ defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ++ __asm__ __volatile__("mcr p15,0,r0,c7,c10,5" ::: "memory"); ++#elif defined(__linux__) || defined(__ANDROID__) ++ // Note: This is a function call, which is also an implicit compiler barrier. ++ typedef void (*KernelMemoryBarrierFunc)(); ++ ((KernelMemoryBarrierFunc)0xffff0fa0)(); ++#error MemoryBarrier() is not implemented on this platform. ++#endif ++} + ++// An ARM toolchain would only define one of these depending on which ++// variant of the target architecture is being used. This tests against ++// any known ARMv6 or ARMv7 variant, where it is possible to directly ++// use ldrex/strex instructions to implement fast atomic operations. ++#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ ++ defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ ++ defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ ++ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ ++ defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) + + inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { +- Atomic32 prev_value = *ptr; ++ Atomic32 prev_value; ++ int reloop; + do { +- if (!pLinuxKernelCmpxchg(old_value, new_value, +- const_cast<Atomic32*>(ptr))) { +- return old_value; +- } +- prev_value = *ptr; +- } while (prev_value == old_value); ++ // The following is equivalent to: ++ // ++ // prev_value = LDREX(ptr) ++ // reloop = 0 ++ // if (prev_value != old_value) ++ // reloop = STREX(ptr, new_value) ++ __asm__ __volatile__(" ldrex %0, [%3]\n" ++ " mov %1, #0\n" ++ " cmp %0, %4\n" ++#ifdef __thumb2__ ++ " it eq\n" ++#endif ++ " strexeq %1, %5, [%3]\n" ++ : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr) ++ : "r"(ptr), "r"(old_value), "r"(new_value) ++ : "cc", "memory"); ++ } while (reloop != 0); + return prev_value; + } + ++inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, ++ Atomic32 old_value, ++ Atomic32 new_value) { ++ Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value); ++ MemoryBarrier(); ++ return result; ++} ++ ++inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, ++ Atomic32 old_value, ++ Atomic32 new_value) { ++ MemoryBarrier(); ++ return NoBarrier_CompareAndSwap(ptr, old_value, new_value); ++} ++ ++inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, ++ Atomic32 increment) { ++ Atomic32 value; ++ int reloop; ++ do { ++ // Equivalent to: ++ // ++ // value = LDREX(ptr) ++ // value += increment ++ // reloop = STREX(ptr, value) ++ // ++ __asm__ __volatile__(" ldrex %0, [%3]\n" ++ " add %0, %0, %4\n" ++ " strex %1, %0, [%3]\n" ++ : "=&r"(value), "=&r"(reloop), "+m"(*ptr) ++ : "r"(ptr), "r"(increment) ++ : "cc", "memory"); ++ } while (reloop); ++ return value; ++} ++ ++inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, ++ Atomic32 increment) { ++ // TODO(digit): Investigate if it's possible to implement this with ++ // a single MemoryBarrier() operation between the LDREX and STREX. ++ // See http://crbug.com/246514 ++ MemoryBarrier(); ++ Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment); ++ MemoryBarrier(); ++ return result; ++} ++ + inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value; ++ int reloop; + do { ++ // old_value = LDREX(ptr) ++ // reloop = STREX(ptr, new_value) ++ __asm__ __volatile__(" ldrex %0, [%3]\n" ++ " strex %1, %4, [%3]\n" ++ : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr) ++ : "r"(ptr), "r"(new_value) ++ : "cc", "memory"); ++ } while (reloop != 0); ++ return old_value; ++} ++ ++// This tests against any known ARMv5 variant. ++#elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ ++ defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) ++ ++// The kernel also provides a helper function to perform an atomic ++// compare-and-swap operation at the hard-wired address 0xffff0fc0. ++// On ARMv5, this is implemented by a special code path that the kernel ++// detects and treats specially when thread pre-emption happens. ++// On ARMv6 and higher, it uses LDREX/STREX instructions instead. ++// ++// Note that this always perform a full memory barrier, there is no ++// need to add calls MemoryBarrier() before or after it. It also ++// returns 0 on success, and 1 on exit. ++// ++// Available and reliable since Linux 2.6.24. Both Android and ChromeOS ++// use newer kernel revisions, so this should not be a concern. ++namespace { ++ ++inline int LinuxKernelCmpxchg(Atomic32 old_value, ++ Atomic32 new_value, ++ volatile Atomic32* ptr) { ++ typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*); ++ return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr); ++} ++ ++} // namespace ++ ++inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, ++ Atomic32 old_value, ++ Atomic32 new_value) { ++ Atomic32 prev_value; ++ for (;;) { ++ prev_value = *ptr; ++ if (prev_value != old_value) ++ return prev_value; ++ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) ++ return old_value; ++ } ++} ++ ++inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ Atomic32 old_value; ++ do { + old_value = *ptr; +- } while (pLinuxKernelCmpxchg(old_value, new_value, +- const_cast<Atomic32*>(ptr))); ++ } while (LinuxKernelCmpxchg(old_value, new_value, ptr)); + return old_value; + } + +@@ -63,36 +237,57 @@ + // Atomic exchange the old value with an incremented one. + Atomic32 old_value = *ptr; + Atomic32 new_value = old_value + increment; +- if (pLinuxKernelCmpxchg(old_value, new_value, +- const_cast<Atomic32*>(ptr)) == 0) { ++ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) { + // The exchange took place as expected. + return new_value; + } + // Otherwise, *ptr changed mid-loop and we need to retry. + } +- + } + + inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { +- return NoBarrier_CompareAndSwap(ptr, old_value, new_value); ++ Atomic32 prev_value; ++ for (;;) { ++ prev_value = *ptr; ++ if (prev_value != old_value) { ++ // Always ensure acquire semantics. ++ MemoryBarrier(); ++ return prev_value; ++ } ++ if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) ++ return old_value; ++ } + } + + inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { +- return NoBarrier_CompareAndSwap(ptr, old_value, new_value); ++ // This could be implemented as: ++ // MemoryBarrier(); ++ // return NoBarrier_CompareAndSwap(); ++ // ++ // But would use 3 barriers per succesful CAS. To save performance, ++ // use Acquire_CompareAndSwap(). Its implementation guarantees that: ++ // - A succesful swap uses only 2 barriers (in the kernel helper). ++ // - An early return due to (prev_value != old_value) performs ++ // a memory barrier with no store, which is equivalent to the ++ // generic implementation above. ++ return Acquire_CompareAndSwap(ptr, old_value, new_value); + } + ++#else ++# error "Your CPU's ARM architecture is not supported yet" ++#endif ++ ++// NOTE: Atomicity of the following load and store operations is only ++// guaranteed in case of 32-bit alignement of |ptr| values. ++ + inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + } + +-inline void MemoryBarrier() { +- pLinuxKernelMemoryBarrier(); +-} +- + inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +@@ -103,9 +298,7 @@ + *ptr = value; + } + +-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { +- return *ptr; +-} ++inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; } + + inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; +@@ -118,7 +311,6 @@ + return *ptr; + } + +-} // namespace base::subtle +-} // namespace base ++} } // namespace base::subtle + + #endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ |