From 1278e5015f6925d86274d6363c4cedf2ce47bfcb Mon Sep 17 00:00:00 2001
From: "A. Wilcox" <AWilcox@Wilcox-Tech.com>
Date: Tue, 14 May 2019 14:07:50 +0200
Subject: [PATCH] [PATCH] Ensure ELFv2 is supported on PPC64

---
 coregrind/m_dispatch/dispatch-ppc64be-linux.S | 101 ++++++++++++++++--
 coregrind/m_initimg/initimg-linux.c           |   3 +-
 coregrind/m_libcsetjmp.c                      |   7 +-
 coregrind/m_machine.c                         |   3 +-
 coregrind/m_main.c                            |  21 ++--
 coregrind/m_sigframe/sigframe-ppc64-linux.c   |   3 +-
 coregrind/m_signals.c                         |  11 +-
 coregrind/m_stacktrace.c                      |  12 ++-
 coregrind/m_syscall.c                         |   9 +-
 coregrind/m_syswrap/syscall-ppc64be-linux.S   |  19 +++-
 coregrind/m_syswrap/syswrap-ppc64-linux.c     |   9 +-
 coregrind/m_trampoline.S                      |   4 +-
 coregrind/m_translate.c                       |  26 ++---
 coregrind/m_ume/elf.c                         |   8 +-
 coregrind/pub_core_machine.h                  |  12 ++-
 coregrind/vg_preloaded.c                      |   3 +-
 include/valgrind.h                            |  16 ++-
 memcheck/mc_leakcheck.c                       |   2 +-
 memcheck/mc_machine.c                         |   3 +-
 19 files changed, 205 insertions(+), 67 deletions(-)

diff --git a/coregrind/m_dispatch/dispatch-ppc64be-linux.S b/coregrind/m_dispatch/dispatch-ppc64be-linux.S
index c5592d4..292b236 100644
--- a/coregrind/m_dispatch/dispatch-ppc64be-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc64be-linux.S
@@ -88,14 +88,26 @@ void VG_(disp_run_translations)( UWord* two_words,
 .section ".text"
 .align   2
 .globl   VG_(disp_run_translations)
+#if _CALL_ELF == 2
+.type VG_(disp_run_translations),@function
+VG_(disp_run_translations):
+.type    .VG_(disp_run_translations),@function
+#else
 .section ".opd","aw"
 .align   3
 VG_(disp_run_translations):
 .quad    .VG_(disp_run_translations),.TOC.@tocbase,0
 .previous
 .type    .VG_(disp_run_translations),@function
+#endif
 .globl   .VG_(disp_run_translations)
 .VG_(disp_run_translations):
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_run_translations), .-VG_(disp_run_translations)
+#endif
+
 	/* r3 holds two_words */
 	/* r4 holds guest_state */
         /* r5 holds host_addr */
@@ -244,6 +256,11 @@ VG_(disp_run_translations):
 
         /* Set up the guest state ptr */
         mr      31,4      /* r31 (generated code gsp) = r4 */
+#if  _CALL_ELF == 2
+/*  for the LE ABI need to setup r2 and r12 */
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+#endif
 
         /* and jump into the code cache.  Chained translations in
            the code cache run, until for whatever reason, they can't
@@ -398,7 +415,9 @@ VG_(disp_run_translations):
         mtlr    0
         addi    1,1,624   /* stack_size */
         blr
-
+#if _CALL_ELF == 2
+	.size VG_(disp_run_translations),.-VG_(disp_run_translations)
+#endif
 
 /*----------------------------------------------------*/
 /*--- Continuation points                          ---*/
@@ -408,14 +427,24 @@ VG_(disp_run_translations):
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_chain_me_to_slowEP)
-        .section ".opd","aw"
+#if  _CALL_ELF == 2
+        .type VG_(disp_cp_chain_me_to_slowEP),@function
+	VG_(disp_cp_chain_me_to_slowEP):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_chain_me_to_slowEP):
         .quad    .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
         .previous
+#endif
         .type    .VG_(disp_cp_chain_me_to_slowEP),@function
         .globl   .VG_(disp_cp_chain_me_to_slowEP)
 .VG_(disp_cp_chain_me_to_slowEP):
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_chain_me_to_slowEP), .-VG_(disp_cp_chain_me_to_slowEP)
+#endif
         /* We got called.  The return address indicates
            where the patching needs to happen.  Collect
            the return address and, exit back to C land,
@@ -428,20 +457,33 @@ VG_(disp_cp_chain_me_to_slowEP):
         */
         subi 7,7,20+4+4
         b    .postamble
+#if  _CALL_ELF == 2
+        .size VG_(disp_cp_chain_me_to_slowEP),.-VG_(disp_cp_chain_me_to_slowEP)
+#endif
 
 /* ------ Chain me to fast entry point ------ */
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_chain_me_to_fastEP)
-        .section ".opd","aw"
+#if  _CALL_ELF == 2
+        .type VG_(disp_cp_chain_me_to_fastEP),@function
+VG_(disp_cp_chain_me_to_fastEP):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_chain_me_to_fastEP):
         .quad    .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
         .previous
+#endif
         .type    .VG_(disp_cp_chain_me_to_fastEP),@function
         .globl   .VG_(disp_cp_chain_me_to_fastEP)
 .VG_(disp_cp_chain_me_to_fastEP):
-        /* We got called.  The return address indicates
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_chain_me_to_fastEP), .-VG_(disp_cp_chain_me_to_fastEP)
+#endif
+	/* We got called.  The return address indicates
            where the patching needs to happen.  Collect
            the return address and, exit back to C land,
            handing the caller the pair (Chain_me_S, RA) */
@@ -453,19 +495,32 @@ VG_(disp_cp_chain_me_to_fastEP):
         */
         subi 7,7,20+4+4
         b    .postamble
+#if _CALL_ELF == 2
+        .size VG_(disp_cp_chain_me_to_fastEP),.-VG_(disp_cp_chain_me_to_fastEP)
+#endif
 
 /* ------ Indirect but boring jump ------ */
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_xindir)
+#if _CALL_ELF == 2
+        .type VG_(disp_cp_xindir),@function
+VG_(disp_cp_xindir):
+#else
         .section ".opd","aw"
         .align   3
 VG_(disp_cp_xindir):
         .quad    .VG_(disp_cp_xindir),.TOC.@tocbase,0
         .previous
+#endif
         .type    .VG_(disp_cp_xindir),@function
         .globl   .VG_(disp_cp_xindir)
 .VG_(disp_cp_xindir):
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_xindir), .-VG_(disp_cp_xindir)
+#endif
         /* Where are we going? */
         ld    20, OFFSET_ppc64_CIA(31)
 
@@ -584,44 +639,72 @@ VG_(disp_cp_xindir):
         li    7,0
         b     .postamble
 	/*NOTREACHED*/
+#if _CALL_ELF == 2
+        .size VG_(disp_cp_xindir),.-VG_(disp_cp_xindir)
+#endif
 
 /* ------ Assisted jump ------ */
 .section ".text"
         .align   2
         .globl   VG_(disp_cp_xassisted)
-        .section ".opd","aw"
+#if _CALL_ELF == 2
+        .type VG_(disp_cp_xassisted),@function
+VG_(disp_cp_xassisted):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_xassisted):
         .quad    .VG_(disp_cp_xassisted),.TOC.@tocbase,0
         .previous
-        .type    .VG_(disp_cp_xassisted),@function
+#endif
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_xassisted), .-VG_(disp_cp_xassisted)
+#endif
+	.type    .VG_(disp_cp_xassisted),@function
         .globl   .VG_(disp_cp_xassisted)
 .VG_(disp_cp_xassisted):
         /* r31 contains the TRC */
         mr      6,31
         li      7,0
         b       .postamble
+#if _CALL_ELF == 2
+        .size VG_(disp_cp_xassisted),.-VG_(disp_cp_xassisted)
+#endif
 
 /* ------ Event check failed ------ */
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_evcheck_fail)
-        .section ".opd","aw"
+#if _CALL_ELF == 2
+        .type VG_(disp_cp_evcheck_fail),@function
+VG_(disp_cp_evcheck_fail):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_evcheck_fail):
         .quad    .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
         .previous
+#endif
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_evcheck_fail), .-VG_(disp_cp_evcheck_fail)
+#endif
         .type    .VG_(disp_cp_evcheck_fail),@function
         .globl   .VG_(disp_cp_evcheck_fail)
 .VG_(disp_cp_evcheck_fail):
         li      6,VG_TRC_INNER_COUNTERZERO
         li      7,0
         b       .postamble
+#if  _CALL_ELF == 2
+       .size VG_(disp_cp_evcheck_fail),.-VG_(disp_cp_evcheck_fail)
+#endif
 
-        
 .size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
 
-#endif // defined(VGP_ppc64be_linux)
+#endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
 
 /* Let the linker know we don't need an executable stack */
 MARK_STACK_NO_EXEC
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index 8a7f0d0..6891641 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -1117,7 +1117,8 @@ void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii )
    arch->vex.guest_GPR1 = iifii.initial_client_SP;
    arch->vex.guest_GPR2 = iifii.initial_client_TOC;
    arch->vex.guest_CIA  = iifii.initial_client_IP;
-#if defined(VGP_ppc64le_linux)
+#if (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+    && !defined(VG_PLAT_USES_PPCTOC)
    arch->vex.guest_GPR12 = iifii.initial_client_IP;
 #endif
 
diff --git a/coregrind/m_libcsetjmp.c b/coregrind/m_libcsetjmp.c
index 85ffc12..51273a0 100644
--- a/coregrind/m_libcsetjmp.c
+++ b/coregrind/m_libcsetjmp.c
@@ -35,6 +35,7 @@
 
 #include "pub_core_basics.h"
 #include "pub_core_libcsetjmp.h"    /* self */
+#include "pub_core_machine.h"       /* VG_PLAT_USES_PPCTOC */
 
 /* See include/pub_tool_libcsetjmp.h for background and rationale. */
 
@@ -151,7 +152,8 @@ __asm__(
 
 /* ------------ ppc64-linux ------------ */
 
-#if defined(VGP_ppc64be_linux)
+#if (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+    && defined(VG_PLAT_USES_PPCTOC)
 
 __asm__(
 ".section \".toc\",\"aw\""          "\n"
@@ -270,7 +272,8 @@ __asm__(
 ".previous"  "\n"
 );
 
-#elif defined(VGP_ppc64le_linux)
+#elif (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && !defined(VG_PLAT_USES_PPCTOC)
 __asm__(
 ".section \".toc\",\"aw\""          "\n"
 
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index df842aa..de37070 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -2073,8 +2073,9 @@ void* VG_(fnptr_to_fnentry)( void* f )
       || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
       || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
-      || defined(VGP_nanomips_linux)
+      || defined(VGP_nanomips_linux) \
+      || (defined(VGP_ppc64be_linux) && !defined(VG_PLAT_USES_PPCTOC))
    return f;
 #  elif defined(VGP_ppc64be_linux)
    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 21df679..855c933 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -2303,7 +2303,7 @@ static void final_tidyup(ThreadId tid)
       return; /* won't do it */
    }
 
-#  if defined(VGP_ppc64be_linux)
+#  if defined(VGP_ppc64be_linux) && defined(VG_PLAT_USES_PPCTOC)
    Addr r2 = VG_(get_tocptr)(VG_(current_DiEpoch)(),
                              freeres_wrapper);
    if (r2 == 0) {
@@ -2336,13 +2336,15 @@ static void final_tidyup(ThreadId tid)
       appropriately. */
    VG_(set_IP)(tid, freeres_wrapper);
 
-#  if defined(VGP_ppc64be_linux)
+#  if (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && defined(VG_PLAT_USES_PPCTOC)
    VG_(threads)[tid].arch.vex.guest_GPR2 = r2;
    VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
             offsetof(VexGuestPPC64State, guest_GPR2),
             sizeof(VG_(threads)[tid].arch.vex.guest_GPR2));
-#  elif  defined(VGP_ppc64le_linux)
-   /* setting GPR2 but not really needed, GPR12 is needed */
+#  elif  (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+         && !defined(VG_PLAT_USES_PPCTOC)
+  /* setting GPR2 but not really needed, GPR12 is needed */
    VG_(threads)[tid].arch.vex.guest_GPR2  = freeres_wrapper;
    VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
             offsetof(VexGuestPPC64State, guest_GPR2),
@@ -2657,9 +2659,10 @@ asm("\n"
     "\ttrap\n"
     ".previous\n"
 );
-#elif defined(VGP_ppc64be_linux)
+#elif (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && defined(VG_PLAT_USES_PPCTOC)
 asm("\n"
-    /* PPC64 ELF ABI says '_start' points to a function descriptor.
+    /* PPC64 ELFv1 ABI says '_start' points to a function descriptor.
        So we must have one, and that is what goes into the .opd section. */
     "\t.align 2\n"
     "\t.global _start\n"
@@ -2703,9 +2706,9 @@ asm("\n"
     "\tnop\n"
     "\ttrap\n"
 );
-#elif defined(VGP_ppc64le_linux)
-/* Little Endian uses ELF version 2 but in the future may also
- * support other ELF versions.
+#elif (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && !defined(VG_PLAT_USES_PPCTOC)
+/* ELF version 2 does not use function descriptors.
  */
 asm("\n"
     "\t.align 2\n"
diff --git a/coregrind/m_sigframe/sigframe-ppc64-linux.c b/coregrind/m_sigframe/sigframe-ppc64-linux.c
index b16606c..cc65783 100644
--- a/coregrind/m_sigframe/sigframe-ppc64-linux.c
+++ b/coregrind/m_sigframe/sigframe-ppc64-linux.c
@@ -263,7 +263,8 @@ void VG_(sigframe_create)( ThreadId tid,
 
    /* Handler is in fact a standard ppc64-linux function descriptor, 
       so extract the function entry point and also the toc ptr to use. */
-#if defined(VGP_ppc64be_linux)
+#if (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+    && defined(VG_PLAT_USES_PPCTOC)
    SET_SIGNAL_GPR(tid, 2, (Addr) ((ULong*)handler)[1]);
    tst->arch.vex.guest_CIA = (Addr) ((ULong*)handler)[0];
 #else
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
index 7591eb3..51fce79 100644
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -897,7 +897,9 @@ extern void my_sigreturn(void);
    "	sc\n" \
    ".previous\n"
 
-#elif defined(VGP_ppc64be_linux)
+#elif (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && defined(VG_PLAT_USES_PPCTOC)
+/* ELF version 1.  Used in some legacy environments on LE, and most BE. */
 #  define _MY_SIGRETURN(name) \
    ".align   2\n" \
    ".globl   my_sigreturn\n" \
@@ -912,9 +914,10 @@ extern void my_sigreturn(void);
    "	li	0, " #name "\n" \
    "	sc\n"
 
-#elif defined(VGP_ppc64le_linux)
-/* Little Endian supports ELF version 2.  In the future, it may
- * support other versions.
+#elif (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && !defined(VG_PLAT_USES_PPCTOC)
+/* ELF version 2.  In the future, PPC64 may support other versions.  Used by
+ * most LE environments and any BE environment running musl libc.
  */
 #  define _MY_SIGRETURN(name) \
    ".align   2\n" \
diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c
index b3ac89f..5ff90ef 100644
--- a/coregrind/m_stacktrace.c
+++ b/coregrind/m_stacktrace.c
@@ -726,7 +726,8 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
                                Addr fp_max_orig )
 {
    Bool  lr_is_first_RA = False;
-#  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
+#  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+      || defined(VGP_ppc64le_linux)
    Word redir_stack_size = 0;
    Word redirs_used      = 0;
 #  endif
@@ -787,7 +788,8 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
    redirs_used      = 0;
 #  endif
 
-#  if defined(VG_PLAT_USES_PPCTOC) || defined (VGP_ppc64le_linux)
+#  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+      || defined(VGP_ppc64le_linux)
    /* Deal with bogus LR values caused by function
       interception/wrapping on ppc-TOC platforms; see comment on
       similar code a few lines further down. */
@@ -839,7 +841,8 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
         /* On ppc64-linux (ppc64-elf, really), the lr save
            slot is 2 words back from sp, whereas on ppc32-elf(?) it's
            only one word back. */
-#        if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
+#        if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+            || defined(VGP_ppc64le_linux)
          const Int lr_offset = 2;
 #        else
          const Int lr_offset = 1;
@@ -858,7 +861,8 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
             else
                ip = (((UWord*)fp)[lr_offset]);
 
-#           if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
+#           if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+               || defined(VGP_ppc64le_linux)
             /* Nasty hack to do with function replacement/wrapping on
                ppc64-linux.  If LR points to our magic return stub,
                then we are in a wrapped or intercepted function, in
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
index 5948cec..c1cdfab 100644
--- a/coregrind/m_syscall.c
+++ b/coregrind/m_syscall.c
@@ -470,7 +470,8 @@ asm(
 ".previous\n"
 );
 
-#elif defined(VGP_ppc64be_linux)
+#elif (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && defined(VG_PLAT_USES_PPCTOC)
 /* Due to the need to return 65 bits of result, this is completely
    different from the ppc32 case.  The single arg register points to a
    7-word block containing the syscall # and the 6 args.  The syscall
@@ -506,15 +507,15 @@ asm(
 "        blr\n"
 );
 
-#elif defined(VGP_ppc64le_linux)
+#elif (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+      && !defined(VG_PLAT_USES_PPCTOC)
 /* Due to the need to return 65 bits of result, this is completely
    different from the ppc32 case.  The single arg register points to a
    7-word block containing the syscall # and the 6 args.  The syscall
    result proper is put in [0] of the block, and %cr0.so is in the
    bottom bit of [1]. */
 extern void do_syscall_WRK ( ULong* argblock );
-/* Little Endian supports ELF version 2.  In the future, it may support
- * other versions as well.
+/* ELF version 2.  In the future, PPC64 may support other versions as well.
  */
 asm(
 ".align   2\n"
diff --git a/coregrind/m_syswrap/syscall-ppc64be-linux.S b/coregrind/m_syswrap/syscall-ppc64be-linux.S
index 16e9ced..db0d8b4 100644
--- a/coregrind/m_syswrap/syscall-ppc64be-linux.S
+++ b/coregrind/m_syswrap/syscall-ppc64be-linux.S
@@ -76,11 +76,24 @@
 
 .align 2
 .globl ML_(do_syscall_for_client_WRK)
+#if _CALL_ELF == 2
+.type .ML_(do_syscall_for_client_WRK),@function
+ML_(do_syscall_for_client_WRK):
+0:      addis         2,12,.TOC.-0b@ha
+        addi          2,2,.TOC.-0b@l
+        .localentry   ML_(do_syscall_for_client_WRK), .-ML_(do_syscall_for_client_WRK)
+#else
 .section ".opd","aw"
 .align 3
-ML_(do_syscall_for_client_WRK):	
+ML_(do_syscall_for_client_WRK):
 .quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
 .previous
+#endif
+#if _CALL_ELF == 2
+0:      addis        2,12,.TOC.-0b@ha
+        addi         2,2,.TOC.-0b@l
+        .localentry  ML_(do_syscall_for_client_WRK), .-ML_(do_syscall_for_client_WRK)
+#endif
 .type .ML_(do_syscall_for_client_WRK),@function
 .globl .ML_(do_syscall_for_client_WRK)
 .ML_(do_syscall_for_client_WRK):
@@ -145,7 +158,9 @@ ML_(do_syscall_for_client_WRK):
 	/* failure: return 0x8000 | error code */
 7:	ori	3,3,0x8000	/* FAILURE -- ensure return value is nonzero */
         b       5b
-
+#if _CALL_ELF == 2
+        .size .ML_(do_syscall_for_client_WRK),.-.ML_(do_syscall_for_client_WRK)
+#endif
 .section .rodata
 /* export the ranges so that
    VG_(fixup_guest_state_after_syscall_interrupted) can do the
diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
index eada099..7186fe3 100644
--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
@@ -41,6 +41,7 @@
 #include "pub_core_libcprint.h"
 #include "pub_core_libcproc.h"
 #include "pub_core_libcsignal.h"
+#include "pub_core_machine.h"
 #include "pub_core_options.h"
 #include "pub_core_scheduler.h"
 #include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
@@ -76,7 +77,8 @@ void ML_(call_on_new_stack_0_1) ( Addr stack,
    address, the second word is the TOC ptr (r2), and the third word is
    the static chain value. */
 asm(
-#if defined(VGP_ppc64be_linux)
+#if (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+    && defined(VG_PLAT_USES_PPCTOC)
 "   .align   2\n"
 "   .globl   vgModuleLocal_call_on_new_stack_0_1\n"
 "   .section \".opd\",\"aw\"\n"
@@ -126,7 +128,7 @@ asm(
 "   bctr\n\t"              // jump to dst
 "   trap\n"                // should never get here
 #else
-//  ppc64le_linux
+//  ppc64 ELFv2 Linux
 "   .align   2\n"
 "   .globl   vgModuleLocal_call_on_new_stack_0_1\n"
 "vgModuleLocal_call_on_new_stack_0_1:\n"
@@ -211,7 +213,8 @@ asm(
 
 // See priv_syswrap-linux.h for arg profile.
 asm(
-#if defined(VGP_ppc64be_linux)
+#if (defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)) \
+    && defined(VG_PLAT_USES_PPCTOC)
 "   .align   2\n"
 "   .globl   do_syscall_clone_ppc64_linux\n"
 "   .section \".opd\",\"aw\"\n"
diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
index 0488b54..d00916a 100644
--- a/coregrind/m_trampoline.S
+++ b/coregrind/m_trampoline.S
@@ -469,11 +469,11 @@ VG_(ppctoc_magic_redirect_return_stub):
 	.align 2
 	.globl VG_(ppc64_linux_REDIR_FOR_strlen)
 #if !defined VGP_ppc64be_linux || _CALL_ELF == 2
-        /* Little Endian uses ELF version 2 */
+        /* ELF version 2 */
         .type VG_(ppc64_linux_REDIR_FOR_strlen),@function
 VG_(ppc64_linux_REDIR_FOR_strlen):
 #else
-        /* Big Endian uses ELF version 1 */
+        /* ELF version 1 */
 	.section        ".opd","aw"
 	.align 3
 VG_(ppc64_linux_REDIR_FOR_strlen):
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index 3602a46..6dd2845 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -1006,7 +1006,8 @@ static IRExpr* mkU32 ( UInt n ) {
    return IRExpr_Const(IRConst_U32(n));
 }
 
-#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
+#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+    || defined(VGP_ppc64le_linux)
 static IRExpr* mkU8 ( UChar n ) {
    return IRExpr_Const(IRConst_U8(n));
 }
@@ -1234,7 +1235,8 @@ static void gen_push_and_set_LR_R2 ( IRSB* bb, Addr new_R2_value )
 }
 #endif
 
-#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
+#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+    || defined(VGP_ppc64le_linux)
 
 static void gen_pop_R2_LR_then_bLR ( IRSB* bb )
 {
@@ -1263,7 +1265,8 @@ static void gen_pop_R2_LR_then_bLR ( IRSB* bb )
 }
 #endif
 
-#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
+#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+    || defined(VGP_ppc64le_linux)
 
 static
 Bool mk_preamble__ppctoc_magic_return_stub ( void* closureV, IRSB* bb )
@@ -1285,7 +1288,7 @@ Bool mk_preamble__ppctoc_magic_return_stub ( void* closureV, IRSB* bb )
 }
 #endif
 
-#if defined(VGP_ppc64le_linux)
+#if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
 /* Generate code to push LR and R2 onto this thread's redir stack.
    Need to save R2 in case we redirect to a global entry point.  The
    value of R2 is not preserved when entering the global entry point.
@@ -1366,9 +1369,7 @@ Bool mk_preamble__set_NRADDR_to_zero ( void* closureV, IRSB* bb )
      gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( VG_(current_DiEpoch)(),
                                                    closure->readdr ) );
    }
-#  endif
-
-#if defined(VGP_ppc64le_linux)
+#elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
    VgCallbackClosure* closure = (VgCallbackClosure*)closureV;
    Int offB_GPR12 = offsetof(VexGuestArchState, guest_GPR12);
    addStmtToIRSB(bb, IRStmt_Put(offB_GPR12, mkU64(closure->readdr)));
@@ -1424,8 +1425,7 @@ Bool mk_preamble__set_NRADDR_to_nraddr ( void* closureV, IRSB* bb )
    );
    gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( VG_(current_DiEpoch)(),
                                                  closure->readdr ) );
-#  endif
-#if defined(VGP_ppc64le_linux)
+#elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
    /* This saves the r2 before leaving the function.  We need to move
     * guest_NRADDR_GPR2 back to R2 on return.
     */
@@ -1648,7 +1648,8 @@ Bool VG_(translate) ( ThreadId tid,
       preamble_fn = mk_preamble__set_NRADDR_to_nraddr;
 
    /* LE we setup the LR */
-#  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
+#  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64be_linux) \
+      || defined(VGP_ppc64le_linux)
    if (nraddr == (Addr)&VG_(ppctoc_magic_redirect_return_stub)) {
       /* If entering the special return stub, this means a wrapped or
          redirected function is returning.  Make this translation one
@@ -1692,13 +1693,14 @@ Bool VG_(translate) ( ThreadId tid,
    vex_abiinfo.guest_ppc_zap_RZ_at_bl         = NULL;
 #  endif
 
-#  if defined(VGP_ppc64be_linux)
+#  if defined(VGP_ppc64be_linux) && defined(VG_PLAT_USES_PPCTOC)
    vex_abiinfo.guest_ppc_zap_RZ_at_blr        = True;
    vex_abiinfo.guest_ppc_zap_RZ_at_bl         = const_True;
    vex_abiinfo.host_ppc_calls_use_fndescrs    = True;
 #  endif
 
-#  if defined(VGP_ppc64le_linux)
+#  if (defined(VGP_ppc64be_linux) && !defined(VG_PLAT_USES_PPCTOC)) \
+      || defined(VGP_ppc64le_linux)
    vex_abiinfo.guest_ppc_zap_RZ_at_blr        = True;
    vex_abiinfo.guest_ppc_zap_RZ_at_bl         = const_True;
    vex_abiinfo.host_ppc_calls_use_fndescrs    = False;
diff --git a/coregrind/m_ume/elf.c b/coregrind/m_ume/elf.c
index 21eb52b..f1e6b47 100644
--- a/coregrind/m_ume/elf.c
+++ b/coregrind/m_ume/elf.c
@@ -847,8 +847,8 @@ Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
    info->exe_base = minaddr + ebase;
    info->exe_end  = maxaddr + ebase;
 
-#if defined(VGP_ppc64be_linux)
-   /* On PPC64BE, ELF ver 1, a func ptr is represented by a TOC entry ptr.
+#if defined(VGP_ppc64be_linux) && defined(VG_PLAT_USES_PPCTOC)
+   /* On PPC64, ELF ver 1, a func ptr is represented by a TOC entry ptr.
       This TOC entry contains three words; the first word is the function
       address, the second word is the TOC ptr (r2), and the third word
       is the static chain value. */
@@ -856,8 +856,8 @@ Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
    info->init_toc = ((ULong*)entry)[1];
    info->init_ip  += info->interp_offset;
    info->init_toc += info->interp_offset;
-#elif defined(VGP_ppc64le_linux)
-   /* On PPC64LE, ELF ver 2. API doesn't use a func ptr */
+#elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
+   /* On PPC64, ELF ver 2. API doesn't use a func ptr */
    info->init_ip  = (Addr)entry;
    info->init_toc = 0; /* meaningless on this platform */
 #else
diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h
index d6af843..400148d 100644
--- a/coregrind/pub_core_machine.h
+++ b/coregrind/pub_core_machine.h
@@ -60,12 +60,20 @@
 #  define VG_ELF_DATA2XXX     ELFDATA2MSB
 #  define VG_ELF_MACHINE      EM_PPC64
 #  define VG_ELF_CLASS        ELFCLASS64
-#  define VG_PLAT_USES_PPCTOC 1
+#  if !defined(_CALL_ELF) || _CALL_ELF == 1
+#    define VG_PLAT_USES_PPCTOC 1
+#  else
+#    undef VG_PLAT_USES_PPCTOC
+#  endif
 #elif defined(VGP_ppc64le_linux)
 #  define VG_ELF_DATA2XXX     ELFDATA2LSB
 #  define VG_ELF_MACHINE      EM_PPC64
 #  define VG_ELF_CLASS        ELFCLASS64
-#  undef VG_PLAT_USES_PPCTOC
+#  if defined(_CALL_ELF) && _CALL_ELF == 1
+#    define VG_PLAT_USES_PPCTOC 1
+#  else  // assume ELFv2 by default if no ABI is specified
+#    undef VG_PLAT_USES_PPCTOC
+#  endif
 #elif defined(VGP_arm_linux)
 #  define VG_ELF_DATA2XXX     ELFDATA2LSB
 #  define VG_ELF_MACHINE      EM_ARM
diff --git a/coregrind/vg_preloaded.c b/coregrind/vg_preloaded.c
index ad03343..f300fbe 100644
--- a/coregrind/vg_preloaded.c
+++ b/coregrind/vg_preloaded.c
@@ -45,6 +45,7 @@
 #include "pub_core_basics.h"
 #include "pub_core_clreq.h"
 #include "pub_core_debuginfo.h"  // Needed for pub_core_redir.h
+#include "pub_core_machine.h"    // For VG_PLAT_USES_PPCTOC
 #include "pub_core_redir.h"      // For VG_NOTIFY_ON_LOAD
 
 #if defined(VGO_linux) || defined(VGO_solaris)
@@ -103,7 +104,7 @@ void * VG_NOTIFY_ON_LOAD(ifunc_wrapper) (void)
     VALGRIND_GET_ORIG_FN(fn);
     CALL_FN_W_v(result, fn);
 
-#if defined(VGP_ppc64be_linux)
+#if defined(VGP_ppc64be_linux) && defined(VG_PLAT_USES_PPCTOC)
    /* ppc64be uses function descriptors, so get the actual function entry
       address for the client request, but return the function descriptor
       from this function. 
diff --git a/include/valgrind.h b/include/valgrind.h
index cc8c2b8..3d30e7f 100644
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -143,12 +143,20 @@
 #  define PLAT_amd64_linux 1
 #elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__)
 #  define PLAT_ppc32_linux 1
-#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF != 2
-/* Big Endian uses ELF version 1 */
+#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && defined(__BIG_ENDIAN__)
 #  define PLAT_ppc64be_linux 1
-#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF == 2
-/* Little Endian uses ELF version 2 */
+#  if _CALL_ELF == 2
+#    define PLAT_ppc64_linux_abi2
+#  else
+#    define PLAT_ppc64_linux_abi1
+#  endif
+#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && !defined(__BIG_ENDIAN__)
 #  define PLAT_ppc64le_linux 1
+#  if _CALL_ELF == 2
+#    define PLAT_ppc64_linux_abi2
+#  else
+#    define PLAT_ppc64_linux_abi1
+#  endif
 #elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
 #  define PLAT_arm_linux 1
 #elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__)
diff --git a/memcheck/mc_leakcheck.c b/memcheck/mc_leakcheck.c
index 7822444..c239f5b 100644
--- a/memcheck/mc_leakcheck.c
+++ b/memcheck/mc_leakcheck.c
@@ -653,7 +653,7 @@ static Bool aligned_ptr_above_page0_is_vtable_addr(Addr ptr)
       if (pot_fn == 0)
          continue; // NULL fn pointer. Seems it can happen in vtable.
       seg = VG_(am_find_nsegment) (pot_fn);
-#if defined(VGA_ppc64be)
+#if defined(VGA_ppc64be) && (!defined(_CALL_ELF) || _CALL_ELF == 1)
       // ppc64BE uses a thunk table (function descriptors), so we have one
       // more level of indirection to follow.
       if (seg == NULL
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
index 4ce746e..e03ab33 100644
--- a/memcheck/mc_machine.c
+++ b/memcheck/mc_machine.c
@@ -132,7 +132,8 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
             return GOF(GPRn);
          by testing ox instead of o, and setting ox back 4 bytes when sz == 4.
       */
-#     if defined(VGA_ppc64le)
+#     if (defined(VGA_ppc64be) && (defined(_CALL_ELF) && _CALL_ELF == 2)) \
+       || defined(VGA_ppc64le)
       Int ox = o;
 #     else
       Int ox = sz == 8 ? o : (o - 4);
-- 
2.21.0