summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2019-04-10 17:10:36 -0400
committerRich Felker <dalias@aerifal.cx>2019-04-10 17:10:36 -0400
commit22e5bbd0deadcbd767864bd714e890b70e1fe1df (patch)
tree4482da4b24c0e08b29769dad08bf800207db1227
parente97681d6f2c44bf5fa9ecdd30607cb63c780062e (diff)
downloadmusl-22e5bbd0deadcbd767864bd714e890b70e1fe1df.tar.gz
musl-22e5bbd0deadcbd767864bd714e890b70e1fe1df.tar.bz2
musl-22e5bbd0deadcbd767864bd714e890b70e1fe1df.tar.xz
musl-22e5bbd0deadcbd767864bd714e890b70e1fe1df.zip
overhaul i386 syscall mechanism not to depend on external asm source
this is the first part of a series of patches intended to make __syscall fully self-contained in the object file produced using syscall.h, which will make it possible for crt1 code to perform syscalls. the (confusingly named) i386 __vsyscall mechanism, which this commit removes, was introduced before the presence of a valid thread pointer was mandatory; back then the thread pointer was setup lazily only if threads were used. the intent was to be able to perform syscalls using the kernel's fast entry point in the VDSO, which can use the sysenter (Intel) or syscall (AMD) instruction instead of int $128, but without inlining an access to the __syscall global at the point of each syscall, which would incur a significant size cost from PIC setup everywhere. the mechanism also shuffled registers/calling convention around to avoid spills of call-saved registers, and to avoid allocating ebx or ebp via asm constraints, since there are plenty of broken-but-supported compiler versions which are incapable of allocating ebx with -fPIC or ebp with -fno-omit-frame-pointer. the new mechanism preserves the properties of avoiding spills and avoiding allocation of ebx/ebp in constraints, but does it inline, using some fairly simple register shuffling, and uses a field of the thread structure rather than global data for the vdso-provided syscall code address. for now, the external __syscall function is refactored not to use the old __vsyscall so it can be kept, but the intent is to remove it too.
-rw-r--r--arch/i386/syscall_arch.h29
-rw-r--r--ldso/dynlink.c3
-rw-r--r--src/env/__init_tls.c2
-rw-r--r--src/env/__libc_start_main.c2
-rw-r--r--src/internal/defsysinfo.c3
-rw-r--r--src/internal/i386/defsysinfo.s9
-rw-r--r--src/internal/i386/syscall.s81
-rw-r--r--src/internal/libc.c1
-rw-r--r--src/thread/pthread_create.c1
9 files changed, 51 insertions, 80 deletions
diff --git a/arch/i386/syscall_arch.h b/arch/i386/syscall_arch.h
index 4c9d874a..8fe35424 100644
--- a/arch/i386/syscall_arch.h
+++ b/arch/i386/syscall_arch.h
@@ -3,52 +3,63 @@
((union { long long ll; long l[2]; }){ .ll = x }).l[1]
#define __SYSCALL_LL_O(x) __SYSCALL_LL_E((x))
+#if SYSCALL_NO_TLS
+#define SYSCALL_INSNS "int $128"
+#else
+#define SYSCALL_INSNS "call *%%gs:16"
+#endif
+
+#define SYSCALL_INSNS_12 "xchg %%ebx,%%edx ; " SYSCALL_INSNS " ; xchg %%ebx,%%edx"
+#define SYSCALL_INSNS_34 "xchg %%ebx,%%edi ; " SYSCALL_INSNS " ; xchg %%ebx,%%edi"
+
static inline long __syscall0(long n)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS : "=a"(__ret) : "a"(n) : "memory");
return __ret;
}
static inline long __syscall1(long n, long a1)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS_12 : "=a"(__ret) : "a"(n), "d"(a1) : "memory");
return __ret;
}
static inline long __syscall2(long n, long a1, long a2)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS_12 : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
return __ret;
}
static inline long __syscall3(long n, long a1, long a2, long a3)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS_34 : "=a"(__ret) : "a"(n), "D"(a1), "c"(a2), "d"(a3) : "memory");
return __ret;
}
static inline long __syscall4(long n, long a1, long a2, long a3, long a4)
{
unsigned long __ret;
- __asm__ __volatile__ (".hidden __vsyscall ; call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4) : "memory");
+ __asm__ __volatile__ (SYSCALL_INSNS_34 : "=a"(__ret) : "a"(n), "D"(a1), "c"(a2), "d"(a3), "S"(a4) : "memory");
return __ret;
}
static inline long __syscall5(long n, long a1, long a2, long a3, long a4, long a5)
{
- unsigned long __ret;
- __asm__ __volatile__ ("push %6 ; .hidden __vsyscall ; call __vsyscall ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"(a5) : "memory");
+ unsigned long __ret, __tmp;
+ __asm__ __volatile__ ("mov %%ebx,%1 ; mov %3,%%ebx ; " SYSCALL_INSNS " ; mov %1,%%ebx"
+ : "=a"(__ret), "=m"(__tmp) : "a"(n), "g"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
return __ret;
}
static inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
{
- unsigned long __ret;
- __asm__ __volatile__ ("push %6 ; .hidden __vsyscall6 ; call __vsyscall6 ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"(0+(long[]){a5, a6}) : "memory");
+ unsigned long __ret, __tmp1, __tmp2;
+ __asm__ __volatile__ ("mov %%ebx,%1 ; mov %%ebp,%2 ; mov %4,%%ebx ; mov %9,%%ebp ; " SYSCALL_INSNS " ; mov %2,%%ebp ; mov %1,%%ebx"
+ : "=a"(__ret), "=m"(__tmp1), "=m"(__tmp2) : "a"(n), "g"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5), "g"(a6) : "memory");
return __ret;
}
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index 7cb66db9..42a5470d 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -1,4 +1,5 @@
#define _GNU_SOURCE
+#define SYSCALL_NO_TLS 1
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
@@ -1685,6 +1686,8 @@ _Noreturn void __dls3(size_t *sp)
libc.auxv = auxv = (void *)(argv+i+1);
decode_vec(auxv, aux, AUX_CNT);
__hwcap = aux[AT_HWCAP];
+ search_vec(auxv, &__sysinfo, AT_SYSINFO);
+ __pthread_self()->sysinfo = __sysinfo;
libc.page_size = aux[AT_PAGESZ];
libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index f1874f2a..5f12500c 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -1,3 +1,4 @@
+#define SYSCALL_NO_TLS 1
#include <elf.h>
#include <limits.h>
#include <sys/mman.h>
@@ -21,6 +22,7 @@ int __init_tp(void *p)
td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock);
td->locale = &libc.global_locale;
td->robust_list.head = &td->robust_list.head;
+ td->sysinfo = __sysinfo;
td->next = td->prev = td;
return 0;
}
diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c
index 7c95f822..8fbe5262 100644
--- a/src/env/__libc_start_main.c
+++ b/src/env/__libc_start_main.c
@@ -28,7 +28,7 @@ void __init_libc(char **envp, char *pn)
libc.auxv = auxv = (void *)(envp+i+1);
for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT) aux[auxv[i]] = auxv[i+1];
__hwcap = aux[AT_HWCAP];
- __sysinfo = aux[AT_SYSINFO];
+ if (aux[AT_SYSINFO]) __sysinfo = aux[AT_SYSINFO];
libc.page_size = aux[AT_PAGESZ];
if (!pn) pn = (void*)aux[AT_EXECFN];
diff --git a/src/internal/defsysinfo.c b/src/internal/defsysinfo.c
new file mode 100644
index 00000000..6d4117db
--- /dev/null
+++ b/src/internal/defsysinfo.c
@@ -0,0 +1,3 @@
+#include "libc.h"
+
+size_t __sysinfo;
diff --git a/src/internal/i386/defsysinfo.s b/src/internal/i386/defsysinfo.s
new file mode 100644
index 00000000..f1b5b0f2
--- /dev/null
+++ b/src/internal/i386/defsysinfo.s
@@ -0,0 +1,9 @@
+1: int $128
+ ret
+
+.data
+.align 4
+.hidden __sysinfo
+.global __sysinfo
+__sysinfo:
+ .long 1b
diff --git a/src/internal/i386/syscall.s b/src/internal/i386/syscall.s
index 0ebf2218..004ddfef 100644
--- a/src/internal/i386/syscall.s
+++ b/src/internal/i386/syscall.s
@@ -1,78 +1,21 @@
-.hidden __sysinfo
-
-# The calling convention for __vsyscall has the syscall number
-# and 5 args arriving as: eax, edx, ecx, edi, esi, 4(%esp).
-# This ensures that the inline asm in the C code never has to touch
-# ebx or ebp (which are unavailable in PIC and frame-pointer-using
-# code, respectively), and optimizes for size/simplicity in the caller.
-
-.global __vsyscall
-.hidden __vsyscall
-.type __vsyscall,@function
-__vsyscall:
- push %edi
- push %ebx
- mov %edx,%ebx
- mov %edi,%edx
- mov 12(%esp),%edi
- push %eax
- call 1f
-2: mov %ebx,%edx
- pop %ebx
- pop %ebx
- pop %edi
- ret
-
-1: mov (%esp),%eax
- add $[__sysinfo-2b],%eax
- mov (%eax),%eax
- test %eax,%eax
- jz 1f
- push %eax
- mov 8(%esp),%eax
- ret # tail call to kernel vsyscall entry
-1: mov 4(%esp),%eax
- int $128
- ret
-
-# The __vsyscall6 entry point is used only for 6-argument syscalls.
-# Instead of passing the 5th argument on the stack, a pointer to the
-# 5th and 6th arguments is passed. This is ugly, but there are no
-# register constraints the inline asm could use that would make it
-# possible to pass two arguments on the stack.
-
-.global __vsyscall6
-.hidden __vsyscall6
-.type __vsyscall6,@function
-__vsyscall6:
- push %ebp
- push %eax
- mov 12(%esp), %ebp
- mov (%ebp), %eax
- mov 4(%ebp), %ebp
- push %eax
- mov 4(%esp),%eax
- call __vsyscall
- pop %ebp
- pop %ebp
- pop %ebp
- ret
-
.global __syscall
.hidden __syscall
.type __syscall,@function
__syscall:
- lea 24(%esp),%eax
+ push %ebx
push %esi
push %edi
- push %eax
- mov 16(%esp),%eax
- mov 20(%esp),%edx
- mov 24(%esp),%ecx
- mov 28(%esp),%edi
- mov 32(%esp),%esi
- call __vsyscall6
- pop %edi
+ push %ebp
+ mov 20(%esp),%eax
+ mov 24(%esp),%ebx
+ mov 28(%esp),%ecx
+ mov 32(%esp),%edx
+ mov 36(%esp),%esi
+ mov 40(%esp),%edi
+ mov 44(%esp),%ebp
+ call *%gs:16
+ pop %ebp
pop %edi
pop %esi
+ pop %ebx
ret
diff --git a/src/internal/libc.c b/src/internal/libc.c
index 2e10942d..cb051810 100644
--- a/src/internal/libc.c
+++ b/src/internal/libc.c
@@ -3,7 +3,6 @@
struct __libc __libc;
size_t __hwcap;
-size_t __sysinfo;
char *__progname=0, *__progname_full=0;
weak_alias(__progname, program_invocation_short_name);
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index 7d4dc2ed..ebf61ded 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -315,6 +315,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
}
new->robust_list.head = &new->robust_list.head;
new->CANARY = self->CANARY;
+ new->sysinfo = self->sysinfo;
/* Setup argument structure for the new thread on its stack.
* It's safe to access from the caller only until the thread