diff options
-rw-r--r-- | ldso/dynlink.c | 121 | ||||
-rw-r--r-- | src/internal/pthread_impl.h | 1 | ||||
-rw-r--r-- | src/ldso/aarch64/tlsdesc.s | 59 | ||||
-rw-r--r-- | src/ldso/arm/tlsdesc.S | 19 | ||||
-rw-r--r-- | src/ldso/i386/tlsdesc.s | 8 | ||||
-rw-r--r-- | src/ldso/x86_64/tlsdesc.s | 21 | ||||
-rw-r--r-- | src/thread/__tls_get_addr.c | 7 | ||||
-rw-r--r-- | src/thread/i386/tls.s | 8 | ||||
-rw-r--r-- | src/thread/pthread_create.c | 2 |
9 files changed, 86 insertions, 160 deletions
diff --git a/ldso/dynlink.c b/ldso/dynlink.c index ec921dfd..9e2adb21 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -17,6 +17,7 @@ #include <pthread.h> #include <ctype.h> #include <dlfcn.h> +#include <semaphore.h> #include "pthread_impl.h" #include "libc.h" #include "dynlink.h" @@ -1338,48 +1339,6 @@ void __init_tls(size_t *auxv) { } -hidden void *__tls_get_new(tls_mod_off_t *v) -{ - pthread_t self = __pthread_self(); - - /* Block signals to make accessing new TLS async-signal-safe */ - sigset_t set; - __block_all_sigs(&set); - if (v[0] <= self->dtv[0]) { - __restore_sigs(&set); - return (void *)(self->dtv[v[0]] + v[1]); - } - - /* This is safe without any locks held because, if the caller - * is able to request the Nth entry of the DTV, the DSO list - * must be valid at least that far out and it was synchronized - * at program startup or by an already-completed call to dlopen. */ - struct dso *p; - for (p=head; p->tls_id != v[0]; p=p->next); - - /* Get new DTV space from new DSO */ - uintptr_t *newdtv = p->new_dtv + - (v[0]+1)*a_fetch_add(&p->new_dtv_idx,1); - memcpy(newdtv, self->dtv, (self->dtv[0]+1) * sizeof(uintptr_t)); - newdtv[0] = v[0]; - self->dtv = self->dtv_copy = newdtv; - - /* Get new TLS memory from all new DSOs up to the requested one */ - unsigned char *mem; - for (p=head; ; p=p->next) { - if (!p->tls_id || self->dtv[p->tls_id]) continue; - mem = p->new_tls + (p->tls.size + p->tls.align) - * a_fetch_add(&p->new_tls_idx,1); - mem += ((uintptr_t)p->tls.image - (uintptr_t)mem) - & (p->tls.align-1); - self->dtv[p->tls_id] = (uintptr_t)mem + DTP_OFFSET; - memcpy(mem, p->tls.image, p->tls.len); - if (p->tls_id == v[0]) break; - } - __restore_sigs(&set); - return mem + v[1] + DTP_OFFSET; -} - static void update_tls_size() { libc.tls_cnt = tls_cnt; @@ -1392,6 +1351,82 @@ static void update_tls_size() tls_align); } +void __dl_prepare_for_threads(void) +{ + /* MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED */ + __syscall(SYS_membarrier, 1<<4, 0); +} + +static sem_t barrier_sem; +static void bcast_barrier(int s) +{ + sem_post(&barrier_sem); +} + +static void install_new_tls(void) +{ + sigset_t set; + pthread_t self = __pthread_self(), td; + uintptr_t (*newdtv)[tls_cnt+1] = (void *)tail->new_dtv; + struct dso *p; + size_t i, j; + size_t old_cnt = self->dtv[0]; + + __block_app_sigs(&set); + __tl_lock(); + /* Copy existing dtv contents from all existing threads. */ + for (i=0, td=self; !i || td!=self; i++, td=td->next) { + memcpy(newdtv+i, td->dtv, + (old_cnt+1)*sizeof(uintptr_t)); + newdtv[i][0] = tls_cnt; + } + /* Install new dtls into the enlarged, uninstalled dtv copies. */ + for (p=head; ; p=p->next) { + if (!p->tls_id || self->dtv[p->tls_id]) continue; + unsigned char *mem = p->new_tls; + for (j=0; j<i; j++) { + unsigned char *new = mem; + new += ((uintptr_t)p->tls.image - (uintptr_t)mem) + & (p->tls.align-1); + memcpy(new, p->tls.image, p->tls.len); + newdtv[j][p->tls_id] = + (uintptr_t)new + DTP_OFFSET; + mem += p->tls.size + p->tls.align; + } + if (p->tls_id == tls_cnt) break; + } + + /* Broadcast barrier to ensure contents of new dtv is visible + * if the new dtv pointer is. Use SYS_membarrier if it works, + * otherwise emulate with a signal. */ + + /* MEMBARRIER_CMD_PRIVATE_EXPEDITED */ + if (__syscall(SYS_membarrier, 1<<3, 0)) { + sem_init(&barrier_sem, 0, 0); + struct sigaction sa = { + .sa_flags = SA_RESTART, + .sa_handler = bcast_barrier + }; + memset(&sa.sa_mask, -1, sizeof sa.sa_mask); + __libc_sigaction(SIGSYNCCALL, &sa, 0); + for (td=self->next; td!=self; td=td->next) + if (j) __syscall(SYS_tkill, td->tid, SIGSYNCCALL); + for (td=self->next; td!=self; td=td->next) + sem_wait(&barrier_sem); + sa.sa_handler = SIG_IGN; + __libc_sigaction(SIGSYNCCALL, &sa, 0); + sem_destroy(&barrier_sem); + } + + /* Install new dtv for each thread. */ + for (j=0, td=self; !j || td!=self; j++, td=td->next) { + td->dtv = td->dtv_copy = newdtv[j]; + } + + __tl_unlock(); + __restore_sigs(&set); +} + /* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the * following stage 2 and stage 3 functions via primitive symbolic lookup * since it does not have access to their addresses to begin with. */ @@ -1864,6 +1899,8 @@ void *dlopen(const char *file, int mode) redo_lazy_relocs(); update_tls_size(); + if (tls_cnt != orig_tls_cnt) + install_new_tls(); _dl_debug_state(); orig_tail = tail; end: diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index d5d969ec..de089967 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -130,6 +130,7 @@ hidden int __init_tp(void *); hidden void *__copy_tls(unsigned char *); hidden void __reset_tls(); +hidden void __dl_prepare_for_threads(void); hidden void __dl_thread_cleanup(void); hidden void __testcancel(); hidden void __do_cleanup_push(struct __ptcb *); diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s index 8e4004d7..c91baa45 100644 --- a/src/ldso/aarch64/tlsdesc.s +++ b/src/ldso/aarch64/tlsdesc.s @@ -29,67 +29,10 @@ __tlsdesc_dynamic: ldr x0,[x0,#8] // p ldr x2,[x0] // p->modidx ldr x3,[x1,#-8] // dtv - ldr x4,[x3] // dtv[0] - cmp x2,x4 - b.hi 1f ldr x2,[x3,x2,lsl #3] // dtv[p->modidx] ldr x0,[x0,#8] // p->off add x0,x0,x2 -2: sub x0,x0,x1 + sub x0,x0,x1 ldp x3,x4,[sp,#16] ldp x1,x2,[sp],#32 ret - - // save all registers __tls_get_new may clobber - // update sp in two steps because offset must be in [-512,509] -1: stp x29,x30,[sp,#-160]! - stp x5,x6,[sp,#16] - stp x7,x8,[sp,#32] - stp x9,x10,[sp,#48] - stp x11,x12,[sp,#64] - stp x13,x14,[sp,#80] - stp x15,x16,[sp,#96] - stp x17,x18,[sp,#112] - stp q0,q1,[sp,#128] - stp q2,q3,[sp,#-480]! - stp q4,q5,[sp,#32] - stp q6,q7,[sp,#64] - stp q8,q9,[sp,#96] - stp q10,q11,[sp,#128] - stp q12,q13,[sp,#160] - stp q14,q15,[sp,#192] - stp q16,q17,[sp,#224] - stp q18,q19,[sp,#256] - stp q20,q21,[sp,#288] - stp q22,q23,[sp,#320] - stp q24,q25,[sp,#352] - stp q26,q27,[sp,#384] - stp q28,q29,[sp,#416] - stp q30,q31,[sp,#448] - bl __tls_get_new - mrs x1,tpidr_el0 - ldp q4,q5,[sp,#32] - ldp q6,q7,[sp,#64] - ldp q8,q9,[sp,#96] - ldp q10,q11,[sp,#128] - ldp q12,q13,[sp,#160] - ldp q14,q15,[sp,#192] - ldp q16,q17,[sp,#224] - ldp q18,q19,[sp,#256] - ldp q20,q21,[sp,#288] - ldp q22,q23,[sp,#320] - ldp q24,q25,[sp,#352] - ldp q26,q27,[sp,#384] - ldp q28,q29,[sp,#416] - ldp q30,q31,[sp,#448] - ldp q2,q3,[sp],#480 - ldp x5,x6,[sp,#16] - ldp x7,x8,[sp,#32] - ldp x9,x10,[sp,#48] - ldp x11,x12,[sp,#64] - ldp x13,x14,[sp,#80] - ldp x15,x16,[sp,#96] - ldp x17,x18,[sp,#112] - ldp q0,q1,[sp,#128] - ldp x29,x30,[sp],#160 - b 2b diff --git a/src/ldso/arm/tlsdesc.S b/src/ldso/arm/tlsdesc.S index 4e67c3e2..455eac1d 100644 --- a/src/ldso/arm/tlsdesc.S +++ b/src/ldso/arm/tlsdesc.S @@ -35,13 +35,9 @@ __tlsdesc_dynamic: #endif #endif ldr r3,[r0,#-4] // r3 = dtv - ldr ip,[r3] // ip = dtv slot count - cmp r1,ip - bhi 3f ldr ip,[r3,r1,LSL #2] sub r0,ip,r0 add r0,r0,r2 // r0 = r3[r1]-r0+r2 -4: #if __ARM_ARCH >= 5 pop {r2,r3,ip,pc} #else @@ -49,21 +45,6 @@ __tlsdesc_dynamic: bx lr #endif -3: -#if __ARM_PCS_VFP || !__SOFTFP__ - .fpu vfp - vpush {d0-d7} -#endif - push {r0-r3} - add r0,sp,#4 - bl __tls_get_new - pop {r1-r3,ip} -#if __ARM_PCS_VFP || !__SOFTFP__ - vpop {d0-d7} -#endif - sub r0,r0,r1 // r0 = retval-tp - b 4b - #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 #else diff --git a/src/ldso/i386/tlsdesc.s b/src/ldso/i386/tlsdesc.s index 4a553bce..a5c0100c 100644 --- a/src/ldso/i386/tlsdesc.s +++ b/src/ldso/i386/tlsdesc.s @@ -17,15 +17,9 @@ __tlsdesc_dynamic: mov %gs:4,%edx push %ecx mov (%eax),%ecx - cmp %ecx,(%edx) - jc 1f mov 4(%eax),%eax add (%edx,%ecx,4),%eax -2: pop %ecx + pop %ecx sub %gs:0,%eax pop %edx ret -1: push %eax - call __tls_get_new - pop %ecx - jmp 2b diff --git a/src/ldso/x86_64/tlsdesc.s b/src/ldso/x86_64/tlsdesc.s index 8238c3eb..0151d15c 100644 --- a/src/ldso/x86_64/tlsdesc.s +++ b/src/ldso/x86_64/tlsdesc.s @@ -17,28 +17,9 @@ __tlsdesc_dynamic: mov %fs:8,%rdx push %rcx mov (%rax),%rcx - cmp %rcx,(%rdx) - jc 1f mov 8(%rax),%rax add (%rdx,%rcx,8),%rax -2: pop %rcx + pop %rcx sub %fs:0,%rax pop %rdx ret -1: push %rdi - push %rdi - push %rsi - push %r8 - push %r9 - push %r10 - push %r11 - mov %rax,%rdi - call __tls_get_new - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rsi - pop %rdi - pop %rdi - jmp 2b diff --git a/src/thread/__tls_get_addr.c b/src/thread/__tls_get_addr.c index d7afdabd..19524fe0 100644 --- a/src/thread/__tls_get_addr.c +++ b/src/thread/__tls_get_addr.c @@ -1,12 +1,7 @@ -#include <stddef.h> #include "pthread_impl.h" void *__tls_get_addr(tls_mod_off_t *v) { pthread_t self = __pthread_self(); - if (v[0] <= self->dtv[0]) - return (void *)(self->dtv[v[0]] + v[1]); - return __tls_get_new(v); + return (void *)(self->dtv[v[0]] + v[1]); } - -weak_alias(__tls_get_addr, __tls_get_new); diff --git a/src/thread/i386/tls.s b/src/thread/i386/tls.s index 76d5d462..6e4c4cb9 100644 --- a/src/thread/i386/tls.s +++ b/src/thread/i386/tls.s @@ -4,14 +4,6 @@ ___tls_get_addr: mov %gs:4,%edx mov (%eax),%ecx - cmp %ecx,(%edx) - jc 1f mov 4(%eax),%eax add (%edx,%ecx,4),%eax ret -1: push %eax -.weak __tls_get_new -.hidden __tls_get_new - call __tls_get_new - pop %edx - ret diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index cec82157..0142b347 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -15,6 +15,7 @@ weak_alias(dummy_0, __release_ptc); weak_alias(dummy_0, __pthread_tsd_run_dtors); weak_alias(dummy_0, __do_orphaned_stdio_locks); weak_alias(dummy_0, __dl_thread_cleanup); +weak_alias(dummy_0, __dl_prepare_for_threads); void __tl_lock(void) { @@ -235,6 +236,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att init_file_lock(__stderr_used); __syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8); self->tsd = (void **)__pthread_tsd_main; + __dl_prepare_for_threads(); libc.threaded = 1; } if (attrp && !c11) attr = *attrp; |