summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2015-11-12 15:50:26 -0500
committerRich Felker <dalias@aerifal.cx>2015-11-12 16:07:00 -0500
commitd56460c939c94a6c547abe8238f442b8de10bfbd (patch)
treeaaba49b1834dfdaafabb371d39bf4e1111935d18
parentad1cd43a86645ba2d4f7c8747240452a349d6bc1 (diff)
downloadmusl-d56460c939c94a6c547abe8238f442b8de10bfbd.tar.gz
musl-d56460c939c94a6c547abe8238f442b8de10bfbd.tar.bz2
musl-d56460c939c94a6c547abe8238f442b8de10bfbd.tar.xz
musl-d56460c939c94a6c547abe8238f442b8de10bfbd.zip
unify static and dynamic linked implementations of thread-local storage
this both allows removal of some of the main remaining uses of the SHARED macro and clears one obstacle to static-linked dlopen support, which may be added at some point in the future. specialized single-TLS-module versions of __copy_tls and __reset_tls are removed and replaced with code adapted from their dynamic-linked versions, capable of operating on a whole chain of TLS modules, and use of the dynamic linker's DSO chain (which contains large struct dso objects) by these functions is replaced with a new chain of struct tls_module objects containing only the information needed for implementing TLS. this may also yield some performance benefit initializing TLS for a new thread when a large number of modules without TLS have been loaded, since since there is no need to walk structures for modules without TLS.
-rw-r--r--src/env/__init_tls.c71
-rw-r--r--src/env/__reset_tls.c23
-rw-r--r--src/internal/libc.h9
-rw-r--r--src/ldso/dynlink.c140
4 files changed, 111 insertions, 132 deletions
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index 73551e6c..0107a545 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -8,9 +8,6 @@
#include "atomic.h"
#include "syscall.h"
-#ifndef SHARED
-static
-#endif
int __init_tp(void *p)
{
pthread_t td = p;
@@ -24,8 +21,6 @@ int __init_tp(void *p)
return 0;
}
-#ifndef SHARED
-
static struct builtin_tls {
char c;
struct pthread pt;
@@ -33,33 +28,40 @@ static struct builtin_tls {
} builtin_tls[1];
#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
-struct tls_image {
- void *image;
- size_t len, size, align;
-} __static_tls;
-
-#define T __static_tls
+static struct tls_module main_tls;
void *__copy_tls(unsigned char *mem)
{
pthread_t td;
- if (!T.image) return mem;
- void **dtv = (void *)mem;
- dtv[0] = (void *)1;
+ struct tls_module *p;
+ size_t i;
+ void **dtv;
+
#ifdef TLS_ABOVE_TP
- mem += sizeof(void *) * 2;
- mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1);
+ dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1);
+
+ mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1);
td = (pthread_t)mem;
mem += sizeof(struct pthread);
+
+ for (i=1, p=libc.tls_head; p; i++, p=p->next) {
+ dtv[i] = mem + p->offset;
+ memcpy(dtv[i], p->image, p->len);
+ }
#else
+ dtv = (void **)mem;
+
mem += libc.tls_size - sizeof(struct pthread);
- mem -= (uintptr_t)mem & (T.align-1);
+ mem -= (uintptr_t)mem & (libc.tls_align-1);
td = (pthread_t)mem;
- mem -= T.size;
+
+ for (i=1, p=libc.tls_head; p; i++, p=p->next) {
+ dtv[i] = mem - p->offset;
+ memcpy(dtv[i], p->image, p->len);
+ }
#endif
+ dtv[0] = (void *)libc.tls_cnt;
td->dtv = td->dtv_copy = dtv;
- dtv[1] = mem;
- memcpy(mem, T.image, T.len);
return td;
}
@@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr;
typedef Elf64_Phdr Phdr;
#endif
-void __init_tls(size_t *aux)
+static void static_init_tls(size_t *aux)
{
unsigned char *p;
size_t n;
@@ -86,16 +88,24 @@ void __init_tls(size_t *aux)
}
if (tls_phdr) {
- T.image = (void *)(base + tls_phdr->p_vaddr);
- T.len = tls_phdr->p_filesz;
- T.size = tls_phdr->p_memsz;
- T.align = tls_phdr->p_align;
+ main_tls.image = (void *)(base + tls_phdr->p_vaddr);
+ main_tls.len = tls_phdr->p_filesz;
+ main_tls.size = tls_phdr->p_memsz;
+ main_tls.align = tls_phdr->p_align;
+ libc.tls_cnt = 1;
+ libc.tls_head = &main_tls;
}
- T.size += (-T.size - (uintptr_t)T.image) & (T.align-1);
- if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN;
+ main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
+ & (main_tls.align-1);
+ if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
+#ifndef TLS_ABOVE_TP
+ main_tls.offset = main_tls.size;
+#endif
- libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread)
+ libc.tls_align = main_tls.align;
+ libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
+ + main_tls.size + main_tls.align
+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
if (libc.tls_size > sizeof builtin_tls) {
@@ -117,6 +127,5 @@ void __init_tls(size_t *aux)
if (__init_tp(__copy_tls(mem)) < 0)
a_crash();
}
-#else
-void __init_tls(size_t *auxv) { }
-#endif
+
+weak_alias(static_init_tls, __init_tls);
diff --git a/src/env/__reset_tls.c b/src/env/__reset_tls.c
index bd61f311..677e57f5 100644
--- a/src/env/__reset_tls.c
+++ b/src/env/__reset_tls.c
@@ -1,21 +1,16 @@
-#ifndef SHARED
-
#include <string.h>
#include "pthread_impl.h"
-
-extern struct tls_image {
- void *image;
- size_t len, size, align;
-} __static_tls;
-
-#define T __static_tls
+#include "libc.h"
void __reset_tls()
{
- if (!T.size) return;
pthread_t self = __pthread_self();
- memcpy(self->dtv[1], T.image, T.len);
- memset((char *)self->dtv[1]+T.len, 0, T.size-T.len);
+ struct tls_module *p;
+ size_t i, n = (size_t)self->dtv[0];
+ if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) {
+ if (!self->dtv[i]) continue;
+ memcpy(self->dtv[i], p->image, p->len);
+ memset((char *)self->dtv[i]+p->len, 0,
+ p->size - p->len);
+ }
}
-
-#endif
diff --git a/src/internal/libc.h b/src/internal/libc.h
index 98c7535a..5e145183 100644
--- a/src/internal/libc.h
+++ b/src/internal/libc.h
@@ -11,13 +11,20 @@ struct __locale_struct {
const struct __locale_map *volatile cat[6];
};
+struct tls_module {
+ struct tls_module *next;
+ void *image;
+ size_t len, size, align, offset;
+};
+
struct __libc {
int can_do_threads;
int threaded;
int secure;
volatile int threads_minus_1;
size_t *auxv;
- size_t tls_size;
+ struct tls_module *tls_head;
+ size_t tls_size, tls_align, tls_cnt;
size_t page_size;
struct __locale_struct global_locale;
};
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index ac755d94..0326baf0 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -70,8 +70,8 @@ struct dso {
char kernel_mapped;
struct dso **deps, *needed_by;
char *rpath_orig, *rpath;
- void *tls_image;
- size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+ struct tls_module tls;
+ size_t tls_id;
size_t relro_start, relro_end;
void **new_dtv;
unsigned char *new_tls;
@@ -99,6 +99,7 @@ struct symdef {
int __init_tp(void *);
void __init_libc(char **, char *);
+void *__copy_tls(unsigned char *);
const char *__libc_get_version(void);
@@ -123,6 +124,7 @@ static int noload;
static jmp_buf *rtld_fail;
static pthread_rwlock_t lock;
static struct debug debug;
+static struct tls_module *tls_tail;
static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
static size_t static_tls_cnt;
static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
@@ -397,14 +399,14 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
break;
#ifdef TLS_ABOVE_TP
case REL_TPOFF:
- *reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend;
+ *reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend;
break;
#else
case REL_TPOFF:
- *reloc_addr = tls_val - def.dso->tls_offset + addend;
+ *reloc_addr = tls_val - def.dso->tls.offset + addend;
break;
case REL_TPOFF_NEG:
- *reloc_addr = def.dso->tls_offset - tls_val + addend;
+ *reloc_addr = def.dso->tls.offset - tls_val + addend;
break;
#endif
case REL_TLSDESC:
@@ -426,10 +428,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
} else {
reloc_addr[0] = (size_t)__tlsdesc_static;
#ifdef TLS_ABOVE_TP
- reloc_addr[1] = tls_val + def.dso->tls_offset
+ reloc_addr[1] = tls_val + def.dso->tls.offset
+ TPOFF_K + addend;
#else
- reloc_addr[1] = tls_val - def.dso->tls_offset
+ reloc_addr[1] = tls_val - def.dso->tls.offset
+ addend;
#endif
}
@@ -567,9 +569,9 @@ static void *map_library(int fd, struct dso *dso)
dyn = ph->p_vaddr;
} else if (ph->p_type == PT_TLS) {
tls_image = ph->p_vaddr;
- dso->tls_align = ph->p_align;
- dso->tls_len = ph->p_filesz;
- dso->tls_size = ph->p_memsz;
+ dso->tls.align = ph->p_align;
+ dso->tls.len = ph->p_filesz;
+ dso->tls.size = ph->p_memsz;
} else if (ph->p_type == PT_GNU_RELRO) {
dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
@@ -694,7 +696,7 @@ static void *map_library(int fd, struct dso *dso)
done_mapping:
dso->base = base;
dso->dynv = laddr(dso, dyn);
- if (dso->tls_size) dso->tls_image = laddr(dso, tls_image);
+ if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
if (!runtime) reclaim_gaps(dso);
free(allocated_buf);
return map;
@@ -1011,8 +1013,8 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
* extended DTV capable of storing an additional slot for
* the newly-loaded DSO. */
alloc_size = sizeof *p + strlen(pathname) + 1;
- if (runtime && temp_dso.tls_image) {
- size_t per_th = temp_dso.tls_size + temp_dso.tls_align
+ if (runtime && temp_dso.tls.image) {
+ size_t per_th = temp_dso.tls.size + temp_dso.tls.align
+ sizeof(void *) * (tls_cnt+3);
n_th = libc.threads_minus_1 + 1;
if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
@@ -1033,22 +1035,25 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
strcpy(p->name, pathname);
/* Add a shortname only if name arg was not an explicit pathname. */
if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
- if (p->tls_image) {
+ if (p->tls.image) {
p->tls_id = ++tls_cnt;
- tls_align = MAXP2(tls_align, p->tls_align);
+ tls_align = MAXP2(tls_align, p->tls.align);
#ifdef TLS_ABOVE_TP
- p->tls_offset = tls_offset + ( (tls_align-1) &
- -(tls_offset + (uintptr_t)p->tls_image) );
- tls_offset += p->tls_size;
+ p->tls.offset = tls_offset + ( (tls_align-1) &
+ -(tls_offset + (uintptr_t)p->tls.image) );
+ tls_offset += p->tls.size;
#else
- tls_offset += p->tls_size + p->tls_align - 1;
- tls_offset -= (tls_offset + (uintptr_t)p->tls_image)
- & (p->tls_align-1);
- p->tls_offset = tls_offset;
+ tls_offset += p->tls.size + p->tls.align - 1;
+ tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
+ & (p->tls.align-1);
+ p->tls.offset = tls_offset;
#endif
p->new_dtv = (void *)(-sizeof(size_t) &
(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
+ if (tls_tail) tls_tail->next = &p->tls;
+ else libc.tls_head = &p->tls;
+ tls_tail = &p->tls;
}
tail->next = p;
@@ -1238,53 +1243,8 @@ static void dl_debug_state(void)
weak_alias(dl_debug_state, _dl_debug_state);
-void __reset_tls()
+void __init_tls(size_t *auxv)
{
- pthread_t self = __pthread_self();
- struct dso *p;
- for (p=head; p; p=p->next) {
- if (!p->tls_id || !self->dtv[p->tls_id]) continue;
- memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len);
- memset((char *)self->dtv[p->tls_id]+p->tls_len, 0,
- p->tls_size - p->tls_len);
- if (p->tls_id == (size_t)self->dtv[0]) break;
- }
-}
-
-void *__copy_tls(unsigned char *mem)
-{
- pthread_t td;
- struct dso *p;
- void **dtv;
-
-#ifdef TLS_ABOVE_TP
- dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1);
-
- mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1);
- td = (pthread_t)mem;
- mem += sizeof(struct pthread);
-
- for (p=head; p; p=p->next) {
- if (!p->tls_id) continue;
- dtv[p->tls_id] = mem + p->tls_offset;
- memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
- }
-#else
- dtv = (void **)mem;
-
- mem += libc.tls_size - sizeof(struct pthread);
- mem -= (uintptr_t)mem & (tls_align-1);
- td = (pthread_t)mem;
-
- for (p=head; p; p=p->next) {
- if (!p->tls_id) continue;
- dtv[p->tls_id] = mem - p->tls_offset;
- memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
- }
-#endif
- dtv[0] = (void *)tls_cnt;
- td->dtv = td->dtv_copy = dtv;
- return td;
}
__attribute__((__visibility__("hidden")))
@@ -1321,12 +1281,12 @@ void *__tls_get_new(size_t *v)
unsigned char *mem;
for (p=head; ; p=p->next) {
if (!p->tls_id || self->dtv[p->tls_id]) continue;
- mem = p->new_tls + (p->tls_size + p->tls_align)
+ mem = p->new_tls + (p->tls.size + p->tls.align)
* a_fetch_add(&p->new_tls_idx,1);
- mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
- & (p->tls_align-1);
+ mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
+ & (p->tls.align-1);
self->dtv[p->tls_id] = mem;
- memcpy(mem, p->tls_image, p->tls_len);
+ memcpy(mem, p->tls.image, p->tls.len);
if (p->tls_id == v[0]) break;
}
__restore_sigs(&set);
@@ -1335,6 +1295,8 @@ void *__tls_get_new(size_t *v)
static void update_tls_size()
{
+ libc.tls_cnt = tls_cnt;
+ libc.tls_align = tls_align;
libc.tls_size = ALIGN(
(1+tls_cnt) * sizeof(void *) +
tls_offset +
@@ -1445,6 +1407,7 @@ _Noreturn void __dls3(size_t *sp)
* use during dynamic linking. If possible it will also serve as the
* thread pointer at runtime. */
libc.tls_size = sizeof builtin_tls;
+ libc.tls_align = tls_align;
if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
a_crash();
}
@@ -1472,13 +1435,13 @@ _Noreturn void __dls3(size_t *sp)
interp_off = (size_t)phdr->p_vaddr;
else if (phdr->p_type == PT_TLS) {
tls_image = phdr->p_vaddr;
- app.tls_len = phdr->p_filesz;
- app.tls_size = phdr->p_memsz;
- app.tls_align = phdr->p_align;
+ app.tls.len = phdr->p_filesz;
+ app.tls.size = phdr->p_memsz;
+ app.tls.align = phdr->p_align;
}
}
if (DL_FDPIC) app.loadmap = app_loadmap;
- if (app.tls_size) app.tls_image = laddr(&app, tls_image);
+ if (app.tls.size) app.tls.image = laddr(&app, tls_image);
if (interp_off) ldso.name = laddr(&app, interp_off);
if ((aux[0] & (1UL<<AT_EXECFN))
&& strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
@@ -1547,19 +1510,20 @@ _Noreturn void __dls3(size_t *sp)
dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
}
}
- if (app.tls_size) {
+ if (app.tls.size) {
+ libc.tls_head = &app.tls;
app.tls_id = tls_cnt = 1;
#ifdef TLS_ABOVE_TP
- app.tls_offset = 0;
- tls_offset = app.tls_size
- + ( -((uintptr_t)app.tls_image + app.tls_size)
- & (app.tls_align-1) );
+ app.tls.offset = 0;
+ tls_offset = app.tls.size
+ + ( -((uintptr_t)app.tls.image + app.tls.size)
+ & (app.tls.align-1) );
#else
- tls_offset = app.tls_offset = app.tls_size
- + ( -((uintptr_t)app.tls_image + app.tls_size)
- & (app.tls_align-1) );
+ tls_offset = app.tls.offset = app.tls.size
+ + ( -((uintptr_t)app.tls.image + app.tls.size)
+ & (app.tls.align-1) );
#endif
- tls_align = MAXP2(tls_align, app.tls_align);
+ tls_align = MAXP2(tls_align, app.tls.align);
}
app.global = 1;
decode_dyn(&app);
@@ -1668,6 +1632,7 @@ _Noreturn void __dls3(size_t *sp)
void *dlopen(const char *file, int mode)
{
struct dso *volatile p, *orig_tail, *next;
+ struct tls_module *orig_tls_tail;
size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
size_t i;
int cs;
@@ -1680,6 +1645,7 @@ void *dlopen(const char *file, int mode)
__inhibit_ptc();
p = 0;
+ orig_tls_tail = tls_tail;
orig_tls_cnt = tls_cnt;
orig_tls_offset = tls_offset;
orig_tls_align = tls_align;
@@ -1706,6 +1672,8 @@ void *dlopen(const char *file, int mode)
unmap_library(p);
free(p);
}
+ if (!orig_tls_tail) libc.tls_head = 0;
+ tls_tail = orig_tls_tail;
tls_cnt = orig_tls_cnt;
tls_offset = orig_tls_offset;
tls_align = orig_tls_align;
@@ -1922,7 +1890,7 @@ int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void
info.dlpi_adds = gencnt;
info.dlpi_subs = 0;
info.dlpi_tls_modid = current->tls_id;
- info.dlpi_tls_data = current->tls_image;
+ info.dlpi_tls_data = current->tls.image;
ret = (callback)(&info, sizeof (info), data);