From ba18c1ecc6a18203ad8496791154af86f706f632 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 22 Feb 2019 02:56:10 -0500 Subject: add membarrier syscall wrapper, refactor dynamic tls install to use it the motivation for this change is twofold. first, it gets the fallback logic out of the dynamic linker, improving code readability and organization. second, it provides application code that wants to use the membarrier syscall, which depends on preregistration of intent before the process becomes multithreaded unless unbounded latency is acceptable, with a symbol that, when linked, ensures that this registration happens. --- src/include/sys/membarrier.h | 9 ++++++ src/internal/pthread_impl.h | 2 +- src/linux/membarrier.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ src/thread/pthread_create.c | 4 +-- 4 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 src/include/sys/membarrier.h create mode 100644 src/linux/membarrier.c (limited to 'src') diff --git a/src/include/sys/membarrier.h b/src/include/sys/membarrier.h new file mode 100644 index 00000000..3654491c --- /dev/null +++ b/src/include/sys/membarrier.h @@ -0,0 +1,9 @@ +#ifndef SYS_MEMBARRIER_H +#define SYS_MEMBARRIER_H + +#include "../../../include/sys/membarrier.h" +#include + +hidden int __membarrier(int, int); + +#endif diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index de089967..9b001421 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -130,7 +130,7 @@ hidden int __init_tp(void *); hidden void *__copy_tls(unsigned char *); hidden void __reset_tls(); -hidden void __dl_prepare_for_threads(void); +hidden void __membarrier_init(void); hidden void __dl_thread_cleanup(void); hidden void __testcancel(); hidden void __do_cleanup_push(struct __ptcb *); diff --git a/src/linux/membarrier.c b/src/linux/membarrier.c new file mode 100644 index 00000000..26d143e7 --- /dev/null +++ b/src/linux/membarrier.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include "pthread_impl.h" +#include "syscall.h" + +static void dummy_0(void) +{ +} + +static void dummy_1(pthread_t t) +{ +} + +weak_alias(dummy_0, __tl_lock); +weak_alias(dummy_0, __tl_unlock); +weak_alias(dummy_1, __tl_sync); + +static sem_t barrier_sem; + +static void bcast_barrier(int s) +{ + sem_post(&barrier_sem); +} + +int __membarrier(int cmd, int flags) +{ + int r = __syscall(SYS_membarrier, cmd, flags); + /* Emulate the private expedited command, which is needed by the + * dynamic linker for installation of dynamic TLS, for older + * kernels that lack the syscall. Unlike the syscall, this only + * synchronizes with threads of the process, not other processes + * sharing the VM, but such sharing is not a supported usage + * anyway. */ + if (r && cmd == MEMBARRIER_CMD_PRIVATE_EXPEDITED && !flags) { + pthread_t self=__pthread_self(), td; + sigset_t set; + __block_app_sigs(&set); + __tl_lock(); + sem_init(&barrier_sem, 0, 0); + struct sigaction sa = { + .sa_flags = SA_RESTART, + .sa_handler = bcast_barrier + }; + memset(&sa.sa_mask, -1, sizeof sa.sa_mask); + __libc_sigaction(SIGSYNCCALL, &sa, 0); + for (td=self->next; td!=self; td=td->next) + __syscall(SYS_tkill, td->tid, SIGSYNCCALL); + for (td=self->next; td!=self; td=td->next) + sem_wait(&barrier_sem); + sa.sa_handler = SIG_IGN; + __libc_sigaction(SIGSYNCCALL, &sa, 0); + sem_destroy(&barrier_sem); + __tl_unlock(); + __restore_sigs(&set); + return 0; + } + return __syscall_ret(r); +} + +void __membarrier_init(void) +{ + /* If membarrier is linked, attempt to pre-register to be able to use + * the private expedited command before the process becomes multi- + * threaded, since registering later has bad, potentially unbounded + * latency. This syscall should be essentially free, and it's arguably + * a mistake in the API design that registration was even required. + * For other commands, registration may impose some cost, so it's left + * to the application to do so if desired. Unfortunately this means + * library code initialized after the process becomes multi-threaded + * cannot use these features without accepting registration latency. */ + __syscall(SYS_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0); +} + +weak_alias(__membarrier, membarrier); diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 54c03554..7d4dc2ed 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -15,7 +15,7 @@ weak_alias(dummy_0, __release_ptc); weak_alias(dummy_0, __pthread_tsd_run_dtors); weak_alias(dummy_0, __do_orphaned_stdio_locks); weak_alias(dummy_0, __dl_thread_cleanup); -weak_alias(dummy_0, __dl_prepare_for_threads); +weak_alias(dummy_0, __membarrier_init); static int tl_lock_count; static int tl_lock_waiters; @@ -246,7 +246,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att init_file_lock(__stderr_used); __syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8); self->tsd = (void **)__pthread_tsd_main; - __dl_prepare_for_threads(); + __membarrier_init(); libc.threaded = 1; } if (attrp && !c11) attr = *attrp; -- cgit v1.2.3-70-g09d2