From 01ef3dd9c5fa7a56aa370f244dd08e05c73010f5 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 10 Mar 2015 21:18:41 +0000 Subject: add aarch64 port This adds complete aarch64 target support including bigendian subarch. Some of the long double math functions are known to be broken otherwise interfaces should be fully functional, but at this point consider this port experimental. Initial work on this port was done by Sireesh Tripurari and Kevin Bortis. --- src/fenv/aarch64/fenv.s | 67 +++++++++++++++++++++++++ src/internal/aarch64/syscall.s | 13 +++++ src/ldso/aarch64/dlsym.s | 5 ++ src/ldso/aarch64/start.s | 18 +++++++ src/ldso/aarch64/tlsdesc.s | 92 ++++++++++++++++++++++++++++++++++ src/math/aarch64/fabs.s | 6 +++ src/math/aarch64/fabsf.s | 6 +++ src/math/aarch64/sqrt.s | 6 +++ src/math/aarch64/sqrtf.s | 6 +++ src/setjmp/aarch64/longjmp.s | 24 +++++++++ src/setjmp/aarch64/setjmp.s | 24 +++++++++ src/signal/aarch64/restore.s | 8 +++ src/signal/aarch64/sigsetjmp.s | 19 +++++++ src/thread/aarch64/__set_thread_area.s | 6 +++ src/thread/aarch64/__unmapself.s | 7 +++ src/thread/aarch64/clone.s | 29 +++++++++++ src/thread/aarch64/syscall_cp.s | 27 ++++++++++ 17 files changed, 363 insertions(+) create mode 100644 src/fenv/aarch64/fenv.s create mode 100644 src/internal/aarch64/syscall.s create mode 100644 src/ldso/aarch64/dlsym.s create mode 100644 src/ldso/aarch64/start.s create mode 100644 src/ldso/aarch64/tlsdesc.s create mode 100644 src/math/aarch64/fabs.s create mode 100644 src/math/aarch64/fabsf.s create mode 100644 src/math/aarch64/sqrt.s create mode 100644 src/math/aarch64/sqrtf.s create mode 100644 src/setjmp/aarch64/longjmp.s create mode 100644 src/setjmp/aarch64/setjmp.s create mode 100644 src/signal/aarch64/restore.s create mode 100644 src/signal/aarch64/sigsetjmp.s create mode 100644 src/thread/aarch64/__set_thread_area.s create mode 100644 src/thread/aarch64/__unmapself.s create mode 100644 src/thread/aarch64/clone.s create mode 100644 src/thread/aarch64/syscall_cp.s (limited to 'src') diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s new file mode 100644 index 00000000..f0db5d9d --- /dev/null +++ b/src/fenv/aarch64/fenv.s @@ -0,0 +1,67 @@ +.global fegetround +.type fegetround,%function +fegetround: + mrs x0, fpcr + and w0, w0, #0xc00000 + ret + +.global __fesetround +.type __fesetround,%function +__fesetround: + mrs x1, fpcr + bic w1, w1, #0xc00000 + orr w1, w1, w0 + msr fpcr, x1 + mov w0, #0 + ret + +.global fetestexcept +.type fetestexcept,%function +fetestexcept: + and w0, w0, #0x1f + mrs x1, fpsr + and w0, w0, w1 + ret + +.global feclearexcept +.type feclearexcept,%function +feclearexcept: + and w0, w0, #0x1f + mrs x1, fpsr + bic w1, w1, w0 + msr fpsr, x1 + mov w0, #0 + ret + +.global feraiseexcept +.type feraiseexcept,%function +feraiseexcept: + and w0, w0, #0x1f + mrs x1, fpsr + orr w1, w1, w0 + msr fpsr, x1 + mov w0, #0 + ret + +.global fegetenv +.type fegetenv,%function +fegetenv: + mrs x1, fpcr + mrs x2, fpsr + stp w1, w2, [x0] + mov w0, #0 + ret + +// TODO preserve some bits +.global fesetenv +.type fesetenv,%function +fesetenv: + mov x1, #0 + mov x2, #0 + cmn x0, #1 + b.eq 1f + ldp w1, w2, [x0] +1: msr fpcr, x1 + msr fpsr, x2 + mov w0, #0 + ret diff --git a/src/internal/aarch64/syscall.s b/src/internal/aarch64/syscall.s new file mode 100644 index 00000000..48fac924 --- /dev/null +++ b/src/internal/aarch64/syscall.s @@ -0,0 +1,13 @@ +.global __syscall +.type __syscall,%function +__syscall: + uxtw x8,w0 + mov x0,x1 + mov x1,x2 + mov x2,x3 + mov x3,x4 + mov x4,x5 + mov x5,x6 + mov x6,x7 + svc 0 + ret diff --git a/src/ldso/aarch64/dlsym.s b/src/ldso/aarch64/dlsym.s new file mode 100644 index 00000000..be2dce52 --- /dev/null +++ b/src/ldso/aarch64/dlsym.s @@ -0,0 +1,5 @@ +.global dlsym +.type dlsym,%function +dlsym: + mov x2,x30 + b __dlsym diff --git a/src/ldso/aarch64/start.s b/src/ldso/aarch64/start.s new file mode 100644 index 00000000..41d1d1e2 --- /dev/null +++ b/src/ldso/aarch64/start.s @@ -0,0 +1,18 @@ +.global _dlstart +_dlstart: + ldr x0,[sp] + add x1,sp,#8 + bl __dynlink + mov x1,sp + ldr x2,[x1],#8 +1: sub x2,x2,1 + ldr x3,[x1],#8 + cmn x3,#1 + b.eq 1b + add x2,x2,1 + str x3,[x1,#-8]! + str x2,[x1,#-8]! + mov sp,x1 + mov x1,x0 + mov x0,#0 + blr x1 diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s new file mode 100644 index 00000000..32064bd7 --- /dev/null +++ b/src/ldso/aarch64/tlsdesc.s @@ -0,0 +1,92 @@ +// long __tlsdesc_static(long *a) +// { +// return a[1]; +// } +.global __tlsdesc_static +.type __tlsdesc_static,@function +__tlsdesc_static: + ldr x0,[x0,#8] + ret + +// long __tlsdesc_dynamic(long *a) +// { +// struct {size_t modidx,off;} *p = (void*)a[1]; +// size_t *dtv = *(size_t**)(tp + 16 - 8); +// if (p->modidx <= dtv[0]) +// return dtv[p->modidx] + p->off - tp; +// return __tls_get_addr(p) - tp; +// } +.global __tlsdesc_dynamic +.type __tlsdesc_dynamic,@function +__tlsdesc_dynamic: + stp x1,x2,[sp,#-32]! + stp x3,x4,[sp,#16] + mrs x1,tpidr_el0 // tp + ldr x0,[x0,#8] // p + ldr x2,[x0] // p->modidx + add x3,x1,#8 + ldr x3,[x3] // dtv + ldr x4,[x3] // dtv[0] + cmp x2,x4 + b.hi 1f + ldr x2,[x3,x2,lsl #3] // dtv[p->modidx] + ldr x0,[x0,#8] // p->off + add x0,x0,x2 +2: sub x0,x0,x1 + ldp x3,x4,[sp,#16] + ldp x1,x2,[sp],#32 + ret + + // save all registers __tls_get_addr may clobber + // ugly because addr offset must be in [-512,509] +1: stp x29,x30,[sp,#-160]! + stp x5,x6,[sp,#16] + stp x7,x8,[sp,#32] + stp x9,x10,[sp,#48] + stp x11,x12,[sp,#64] + stp x13,x14,[sp,#80] + stp x15,x16,[sp,#96] + stp x17,x18,[sp,#112] + stp q0,q1,[sp,#128] + stp q2,q3,[sp,#-480]! + stp q4,q5,[sp,#32] + stp q6,q7,[sp,#64] + stp q8,q9,[sp,#96] + stp q10,q11,[sp,#128] + stp q12,q13,[sp,#160] + stp q14,q15,[sp,#192] + stp q16,q17,[sp,#224] + stp q18,q19,[sp,#256] + stp q20,q21,[sp,#288] + stp q22,q23,[sp,#320] + stp q24,q25,[sp,#352] + stp q26,q27,[sp,#384] + stp q28,q29,[sp,#416] + stp q30,q31,[sp,#448] + bl __tls_get_addr + mrs x1,tpidr_el0 + ldp q4,q5,[sp,#32] + ldp q6,q7,[sp,#64] + ldp q8,q9,[sp,#96] + ldp q10,q11,[sp,#128] + ldp q12,q13,[sp,#160] + ldp q14,q15,[sp,#192] + ldp q16,q17,[sp,#224] + ldp q18,q19,[sp,#256] + ldp q20,q21,[sp,#288] + ldp q22,q23,[sp,#320] + ldp q24,q25,[sp,#352] + ldp q26,q27,[sp,#384] + ldp q28,q29,[sp,#416] + ldp q30,q31,[sp,#448] + ldp q2,q3,[sp],#480 + ldp x5,x6,[sp,#16] + ldp x7,x8,[sp,#32] + ldp x9,x10,[sp,#48] + ldp x11,x12,[sp,#64] + ldp x13,x14,[sp,#80] + ldp x15,x16,[sp,#96] + ldp x17,x18,[sp,#112] + ldp q0,q1,[sp,#128] + ldp x29,x30,[sp],#160 + b 2b diff --git a/src/math/aarch64/fabs.s b/src/math/aarch64/fabs.s new file mode 100644 index 00000000..8c04d091 --- /dev/null +++ b/src/math/aarch64/fabs.s @@ -0,0 +1,6 @@ +.text +.global fabs +.type fabs,%function +fabs: + fabs d0, d0 + ret diff --git a/src/math/aarch64/fabsf.s b/src/math/aarch64/fabsf.s new file mode 100644 index 00000000..6e96dd43 --- /dev/null +++ b/src/math/aarch64/fabsf.s @@ -0,0 +1,6 @@ +.text +.global fabsf +.type fabsf,%function +fabsf: + fabs s0, s0 + ret diff --git a/src/math/aarch64/sqrt.s b/src/math/aarch64/sqrt.s new file mode 100644 index 00000000..1917e18d --- /dev/null +++ b/src/math/aarch64/sqrt.s @@ -0,0 +1,6 @@ +.text +.global sqrt +.type sqrt,%function +sqrt: + fsqrt d0, d0 + ret diff --git a/src/math/aarch64/sqrtf.s b/src/math/aarch64/sqrtf.s new file mode 100644 index 00000000..1639497b --- /dev/null +++ b/src/math/aarch64/sqrtf.s @@ -0,0 +1,6 @@ +.text +.global sqrtf +.type sqrtf,%function +sqrtf: + fsqrt s0, s0 + ret diff --git a/src/setjmp/aarch64/longjmp.s b/src/setjmp/aarch64/longjmp.s new file mode 100644 index 00000000..7c4655fa --- /dev/null +++ b/src/setjmp/aarch64/longjmp.s @@ -0,0 +1,24 @@ +.global _longjmp +.global longjmp +.type _longjmp,%function +.type longjmp,%function +_longjmp: +longjmp: + // IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers + ldp x19, x20, [x0,#0] + ldp x21, x22, [x0,#16] + ldp x23, x24, [x0,#32] + ldp x25, x26, [x0,#48] + ldp x27, x28, [x0,#64] + ldp x29, x30, [x0,#80] + ldr x2, [x0,#104] + mov sp, x2 + ldp d8 , d9, [x0,#112] + ldp d10, d11, [x0,#128] + ldp d12, d13, [x0,#144] + ldp d14, d15, [x0,#160] + + mov x0, x1 + cbnz x1, 1f + mov x0, #1 +1: br x30 diff --git a/src/setjmp/aarch64/setjmp.s b/src/setjmp/aarch64/setjmp.s new file mode 100644 index 00000000..f49288aa --- /dev/null +++ b/src/setjmp/aarch64/setjmp.s @@ -0,0 +1,24 @@ +.global __setjmp +.global _setjmp +.global setjmp +.type __setjmp,@function +.type _setjmp,@function +.type setjmp,@function +__setjmp: +_setjmp: +setjmp: + // IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers + stp x19, x20, [x0,#0] + stp x21, x22, [x0,#16] + stp x23, x24, [x0,#32] + stp x25, x26, [x0,#48] + stp x27, x28, [x0,#64] + stp x29, x30, [x0,#80] + mov x2, sp + str x2, [x0,#104] + stp d8, d9, [x0,#112] + stp d10, d11, [x0,#128] + stp d12, d13, [x0,#144] + stp d14, d15, [x0,#160] + mov x0, #0 + ret diff --git a/src/signal/aarch64/restore.s b/src/signal/aarch64/restore.s new file mode 100644 index 00000000..d3d0243d --- /dev/null +++ b/src/signal/aarch64/restore.s @@ -0,0 +1,8 @@ +.global __restore +.type __restore,%function +__restore: +.global __restore_rt +.type __restore_rt,%function +__restore_rt: + mov x8,#139 // SYS_rt_sigreturn + svc 0 diff --git a/src/signal/aarch64/sigsetjmp.s b/src/signal/aarch64/sigsetjmp.s new file mode 100644 index 00000000..e0f83f06 --- /dev/null +++ b/src/signal/aarch64/sigsetjmp.s @@ -0,0 +1,19 @@ +.global sigsetjmp +.global __sigsetjmp +.type sigsetjmp,%function +.type __sigsetjmp,%function +sigsetjmp: +__sigsetjmp: + str x1,[x0,#176] + cbz x1,setjmp + + // TODO errno? + // sigprocmask(SIG_SETMASK, 0, (sigset_t*)buf->__ss); + stp x0,x30,[sp,#-16]! + add x2,x0,#184 + mov x1,#0 + mov x0,#2 + bl sigprocmask + ldp x0,x30,[sp],#16 + + b setjmp diff --git a/src/thread/aarch64/__set_thread_area.s b/src/thread/aarch64/__set_thread_area.s new file mode 100644 index 00000000..97a80acc --- /dev/null +++ b/src/thread/aarch64/__set_thread_area.s @@ -0,0 +1,6 @@ +.global __set_thread_area +.type __set_thread_area,@function +__set_thread_area: + msr tpidr_el0,x0 + mov w0,#0 + ret diff --git a/src/thread/aarch64/__unmapself.s b/src/thread/aarch64/__unmapself.s new file mode 100644 index 00000000..2c5d254f --- /dev/null +++ b/src/thread/aarch64/__unmapself.s @@ -0,0 +1,7 @@ +.global __unmapself +.type __unmapself,%function +__unmapself: + mov x8,#215 // SYS_munmap + svc 0 + mov x8,#93 // SYS_exit + svc 0 diff --git a/src/thread/aarch64/clone.s b/src/thread/aarch64/clone.s new file mode 100644 index 00000000..50af913c --- /dev/null +++ b/src/thread/aarch64/clone.s @@ -0,0 +1,29 @@ +// __clone(func, stack, flags, arg, ptid, tls, ctid) +// x0, x1, w2, x3, x4, x5, x6 + +// syscall(SYS_clone, flags, stack, ptid, tls, ctid) +// x8, x0, x1, x2, x3, x4 + +.global __clone +.type __clone,%function +__clone: + // align stack and save func,arg + and x1,x1,#-16 + stp x0,x3,[x1,#-16]! + + // syscall + uxtw x0,w2 + mov x2,x4 + mov x3,x5 + mov x4,x6 + mov x8,#220 // SYS_clone + svc #0 + + cbz x0,1f + // parent + ret + // child +1: ldp x1,x0,[sp],#16 + blr x1 + mov x8,#93 // SYS_exit + svc #0 diff --git a/src/thread/aarch64/syscall_cp.s b/src/thread/aarch64/syscall_cp.s new file mode 100644 index 00000000..6302a0bd --- /dev/null +++ b/src/thread/aarch64/syscall_cp.s @@ -0,0 +1,27 @@ +// __syscall_cp_asm(&self->cancel, nr, u, v, w, x, y, z) +// x0 x1 x2 x3 x4 x5 x6 x7 + +// syscall(nr, u, v, w, x, y, z) +// x8 x0 x1 x2 x3 x4 x5 + +.global __syscall_cp_asm +.type __syscall_cp_asm,%function +__syscall_cp_asm: +.global __cp_begin +__cp_begin: + ldr w0,[x0] + cbnz w0,1f + mov x8,x1 + mov x0,x2 + mov x1,x3 + mov x2,x4 + mov x3,x5 + mov x4,x6 + mov x5,x7 + svc 0 +.global __cp_end +__cp_end: + ret + + // cbnz might not be able to jump far enough +1: b __cancel -- cgit v1.2.3-70-g09d2