elf: relocate PT_INTERP (#42318)

Relocation of `PT_INTERP` in ELF files already happens to work from long to short path, thanks to generic binary relocation (i.e. find and replace). This PR improves it: 1. Adds logic to grow `PT_INTERP` strings through patchelf (which is only useful if the interpreter and rpath paths are the _only_ paths in the binary that need to be relocated) 2. Makes shrinking `PT_INTERP` cleaner. Before this PR when you would use Spack-built glibc as link dep, and relocate executables using its dynamic linker, you'd end up with ``` $ file exe exe: ELF 64-bit LSD pie executable, ..., interpreter /////////////////////////////////////////////////path/to/glibc/lib/ld-linux.so ``` With this PR you get something sensible: ``` $ file exe exe: ELF 64-bit LSD pie executable, ..., interpreter /path/to/glibc/lib/ld-linux.so ``` When Spack cannot modify the interpreter or rpath strings in-place, it errors out without modifying the file, and leaves both tasks to patchelf instead. Also add type hints to `elf.py`.
author: Harmen Stoppels <me@harmenstoppels.nl> 2024-01-30 22:36:49 +0100
committer: GitHub <noreply@github.com> 2024-01-30 22:36:49 +0100
commit: 28eea2994f04ec73f647cf636de433fbc9317c4c (patch)
tree: 177b258cbb2b0a855cdb901c2f3c1b02b2f82515 /lib
parent: 6d55caabe88f00aff7ce82e07f1de5b8f136e1e3 (diff)
download: spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.gz
spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.bz2
spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.xz
spack-28eea2994f04ec73f647cf636de433fbc9317c4c.zip
7 files changed, 306 insertions, 208 deletions
diff --git a/lib/spack/spack/bootstrap/core.py b/lib/spack/spack/bootstrap/core.py
index 6ff18db843..34aff29b55 100644
--- a/lib/spack/spack/bootstrap/core.py
+++ b/lib/spack/spack/bootstrap/core.py
@@ -542,7 +542,7 @@ def verify_patchelf(patchelf: "spack.util.executable.Executable") -> bool:
     return version >= spack.version.Version("0.13.1")
 
 
-def ensure_patchelf_in_path_or_raise() -> None:
+def ensure_patchelf_in_path_or_raise() -> spack.util.executable.Executable:
     """Ensure patchelf is in the PATH or raise."""
     # The old concretizer is not smart and we're doing its job: if the latest patchelf
     # does not concretize because the compiler doesn't support C++17, we try to
diff --git a/lib/spack/spack/hooks/drop_redundant_rpaths.py b/lib/spack/spack/hooks/drop_redundant_rpaths.py
index 0c43fc1b5a..2577ce18f4 100644
--- a/lib/spack/spack/hooks/drop_redundant_rpaths.py
+++ b/lib/spack/spack/hooks/drop_redundant_rpaths.py
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
 import os
-from typing import IO, Optional, Tuple
+from typing import BinaryIO, Optional, Tuple
 
 import llnl.util.tty as tty
 from llnl.util.filesystem import BaseDirectoryVisitor, visit_directory_tree
@@ -18,7 +18,7 @@ def should_keep(path: bytes) -> bool:
     return path.startswith(b"$") or (os.path.isabs(path) and os.path.lexists(path))
 
 
-def _drop_redundant_rpaths(f: IO) -> Optional[Tuple[bytes, bytes]]:
+def _drop_redundant_rpaths(f: BinaryIO) -> Optional[Tuple[bytes, bytes]]:
     """Drop redundant entries from rpath.
 
     Args:
diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py
index 6fcb4e98bc..0bbbba7ef0 100644
--- a/lib/spack/spack/relocate.py
+++ b/lib/spack/spack/relocate.py
@@ -7,6 +7,7 @@ import itertools
 import os
 import re
 from collections import OrderedDict
+from typing import List, Optional
 
 import macholib.mach_o
 import macholib.MachO
@@ -47,7 +48,7 @@ class InstallRootStringError(spack.error.SpackError):
 
 
 @memoized
-def _patchelf():
+def _patchelf() -> Optional[executable.Executable]:
     """Return the full path to the patchelf binary, if available, else None."""
     import spack.bootstrap
 
@@ -55,9 +56,7 @@ def _patchelf():
         return None
 
     with spack.bootstrap.ensure_bootstrap_configuration():
-        patchelf = spack.bootstrap.ensure_patchelf_in_path_or_raise()
-
-    return patchelf.path
+        return spack.bootstrap.ensure_patchelf_in_path_or_raise()
 
 
 def _elf_rpaths_for(path):
@@ -340,31 +339,34 @@ def macholib_get_paths(cur_path):
     return (rpaths, deps, ident)
 
 
-def _set_elf_rpaths(target, rpaths):
-    """Replace the original RPATH of the target with the paths passed
-    as arguments.
+def _set_elf_rpaths_and_interpreter(
+    target: str, rpaths: List[str], interpreter: Optional[str] = None
+) -> Optional[str]:
+    """Replace the original RPATH of the target with the paths passed as arguments.
 
     Args:
         target: target executable. Must be an ELF object.
         rpaths: paths to be set in the RPATH
+        interpreter: optionally set the interpreter
 
     Returns:
-        A string concatenating the stdout and stderr of the call
-        to ``patchelf`` if it was invoked
+        A string concatenating the stdout and stderr of the call to ``patchelf`` if it was invoked
     """
     # Join the paths using ':' as a separator
     rpaths_str = ":".join(rpaths)
 
-    patchelf, output = executable.Executable(_patchelf()), None
     try:
+        # TODO: error handling is not great here?
         # TODO: revisit the use of --force-rpath as it might be conditional
         # TODO: if we want to support setting RUNPATH from binary packages
-        patchelf_args = ["--force-rpath", "--set-rpath", rpaths_str, target]
-        output = patchelf(*patchelf_args, output=str, error=str)
+        args = ["--force-rpath", "--set-rpath", rpaths_str]
+        if interpreter:
+            args.extend(["--set-interpreter", interpreter])
+        args.append(target)
+        return _patchelf()(*args, output=str, error=str)
     except executable.ProcessError as e:
-        msg = "patchelf --force-rpath --set-rpath {0} failed with error {1}"
-        tty.warn(msg.format(target, e))
-    return output
+        tty.warn(str(e))
+        return None
 
 
 def needs_binary_relocation(m_type, m_subtype):
@@ -501,10 +503,12 @@ def new_relocate_elf_binaries(binaries, prefix_to_prefix):
 
     for path in binaries:
         try:
-            elf.replace_rpath_in_place_or_raise(path, prefix_to_prefix)
-        except elf.ElfDynamicSectionUpdateFailed as e:
-            # Fall back to the old `patchelf --set-rpath` method.
-            _set_elf_rpaths(path, e.new.decode("utf-8").split(":"))
+            elf.substitute_rpath_and_pt_interp_in_place_or_raise(path, prefix_to_prefix)
+        except elf.ElfCStringUpdatesFailed as e:
+            # Fall back to `patchelf --set-rpath ... --set-interpreter ...`
+            rpaths = e.rpath.new_value.decode("utf-8").split(":") if e.rpath else []
+            interpreter = e.pt_interp.new_value.decode("utf-8") if e.pt_interp else None
+            _set_elf_rpaths_and_interpreter(path, rpaths=rpaths, interpreter=interpreter)
 
 
 def relocate_elf_binaries(
@@ -546,10 +550,10 @@ def relocate_elf_binaries(
             new_rpaths = _make_relative(new_binary, new_root, new_norm_rpaths)
             # check to see if relative rpaths are changed before rewriting
             if sorted(new_rpaths) != sorted(orig_rpaths):
-                _set_elf_rpaths(new_binary, new_rpaths)
+                _set_elf_rpaths_and_interpreter(new_binary, new_rpaths)
         else:
             new_rpaths = _transform_rpaths(orig_rpaths, orig_root, new_prefixes)
-            _set_elf_rpaths(new_binary, new_rpaths)
+            _set_elf_rpaths_and_interpreter(new_binary, new_rpaths)
 
 
 def make_link_relative(new_links, orig_links):
@@ -596,7 +600,7 @@ def make_elf_binaries_relative(new_binaries, orig_binaries, orig_layout_root):
         orig_rpaths = _elf_rpaths_for(new_binary)
         if orig_rpaths:
             new_rpaths = _make_relative(orig_binary, orig_layout_root, orig_rpaths)
-            _set_elf_rpaths(new_binary, new_rpaths)
+            _set_elf_rpaths_and_interpreter(new_binary, new_rpaths)
 
 
 def warn_if_link_cant_be_relocated(link, target):
diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py
index 1f6e054232..0fa9b5f127 100644
--- a/lib/spack/spack/test/conftest.py
+++ b/lib/spack/spack/test/conftest.py
@@ -1851,7 +1851,7 @@ def binary_with_rpaths(prefix_tmpdir):
     paths are encoded with `$ORIGIN` prepended.
     """
 
-    def _factory(rpaths, message="Hello world!"):
+    def _factory(rpaths, message="Hello world!", dynamic_linker="/lib64/ld-linux.so.2"):
         source = prefix_tmpdir.join("main.c")
         source.write(
             """
@@ -1867,10 +1867,10 @@ def binary_with_rpaths(prefix_tmpdir):
         executable = source.dirpath("main.x")
         # Encode relative RPATHs using `$ORIGIN` as the root prefix
         rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths]
-        rpath_str = ":".join(rpaths)
         opts = [
             "-Wl,--disable-new-dtags",
-            "-Wl,-rpath={0}".format(rpath_str),
+            f"-Wl,-rpath={':'.join(rpaths)}",
+            f"-Wl,--dynamic-linker,{dynamic_linker}",
             str(source),
             "-o",
             str(executable),
diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py
index 6c8c0b1594..0ed6af59e2 100644
--- a/lib/spack/spack/test/relocate.py
+++ b/lib/spack/spack/test/relocate.py
@@ -47,14 +47,6 @@ def text_in_bin(text, binary):
 
 
 @pytest.fixture()
-def mock_patchelf(tmpdir, mock_executable):
-    def _factory(output):
-        return mock_executable("patchelf", output=output)
-
-    return _factory
-
-
-@pytest.fixture()
 def make_dylib(tmpdir_factory):
     """Create a shared library with unfriendly qualities.
 
diff --git a/lib/spack/spack/test/util/elf.py b/lib/spack/spack/test/util/elf.py
index b337c00073..a77d7ab637 100644
--- a/lib/spack/spack/test/util/elf.py
+++ b/lib/spack/spack/test/util/elf.py
@@ -5,7 +5,6 @@
 
 
 import io
-from collections import OrderedDict
 
 import pytest
 
@@ -137,45 +136,46 @@ def test_only_header():
 
 @pytest.mark.requires_executables("gcc")
 @skip_unless_linux
-def test_elf_get_and_replace_rpaths(binary_with_rpaths):
-    long_rpaths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"]
-    executable = str(binary_with_rpaths(rpaths=long_rpaths))
+def test_elf_get_and_replace_rpaths_and_pt_interp(binary_with_rpaths):
+    long_paths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"]
+    executable = str(
+        binary_with_rpaths(rpaths=long_paths, dynamic_linker="/very/long/prefix-b/lib/ld.so")
+    )
 
     # Before
-    assert elf.get_rpaths(executable) == long_rpaths
-
-    replacements = OrderedDict(
-        [
-            (b"/very/long/prefix-a", b"/short-a"),
-            (b"/very/long/prefix-b", b"/short-b"),
-            (b"/very/long", b"/dont"),
-        ]
-    )
+    assert elf.get_rpaths(executable) == long_paths
+
+    replacements = {
+        b"/very/long/prefix-a": b"/short-a",
+        b"/very/long/prefix-b": b"/short-b",
+        b"/very/long": b"/dont",
+    }
 
     # Replace once: should modify the file.
-    assert elf.replace_rpath_in_place_or_raise(executable, replacements)
+    assert elf.substitute_rpath_and_pt_interp_in_place_or_raise(executable, replacements)
 
     # Replace twice: nothing to be done.
-    assert not elf.replace_rpath_in_place_or_raise(executable, replacements)
+    assert not elf.substitute_rpath_and_pt_interp_in_place_or_raise(executable, replacements)
 
     # Verify the rpaths were modified correctly
     assert elf.get_rpaths(executable) == ["/short-a/x", "/short-b/y"]
+    assert elf.get_interpreter(executable) == "/short-b/lib/ld.so"
 
     # Going back to long rpaths should fail, since we've added trailing \0
     # bytes, and replacement can't assume it can write back in repeated null
     # bytes -- it may correspond to zero-length strings for example.
-    with pytest.raises(
-        elf.ElfDynamicSectionUpdateFailed,
-        match="New rpath /very/long/prefix-a/x:/very/long/prefix-b/y is "
-        "longer than old rpath /short-a/x:/short-b/y",
-    ):
-        elf.replace_rpath_in_place_or_raise(
-            executable,
-            OrderedDict(
-                [(b"/short-a", b"/very/long/prefix-a"), (b"/short-b", b"/very/long/prefix-b")]
-            ),
+    with pytest.raises(elf.ElfCStringUpdatesFailed) as info:
+        elf.substitute_rpath_and_pt_interp_in_place_or_raise(
+            executable, {b"/short-a": b"/very/long/prefix-a", b"/short-b": b"/very/long/prefix-b"}
         )
 
+    assert info.value.rpath is not None
+    assert info.value.pt_interp is not None
+    assert info.value.rpath.old_value == b"/short-a/x:/short-b/y"
+    assert info.value.rpath.new_value == b"/very/long/prefix-a/x:/very/long/prefix-b/y"
+    assert info.value.pt_interp.old_value == b"/short-b/lib/ld.so"
+    assert info.value.pt_interp.new_value == b"/very/long/prefix-b/lib/ld.so"
+
 
 @pytest.mark.requires_executables("gcc")
 @skip_unless_linux
diff --git a/lib/spack/spack/util/elf.py b/lib/spack/spack/util/elf.py
index d04d7ca16c..6047c2f4da 100644
--- a/lib/spack/spack/util/elf.py
+++ b/lib/spack/spack/util/elf.py
@@ -6,53 +6,59 @@
 import bisect
 import re
 import struct
-from collections import namedtuple
 from struct import calcsize, unpack, unpack_from
-
-ElfHeader = namedtuple(
-    "ElfHeader",
-    [
-        "e_type",
-        "e_machine",
-        "e_version",
-        "e_entry",
-        "e_phoff",
-        "e_shoff",
-        "e_flags",
-        "e_ehsize",
-        "e_phentsize",
-        "e_phnum",
-        "e_shentsize",
-        "e_shnum",
-        "e_shstrndx",
-    ],
-)
-
-SectionHeader = namedtuple(
-    "SectionHeader",
-    [
-        "sh_name",
-        "sh_type",
-        "sh_flags",
-        "sh_addr",
-        "sh_offset",
-        "sh_size",
-        "sh_link",
-        "sh_info",
-        "sh_addralign",
-        "sh_entsize",
-    ],
-)
-
-ProgramHeader32 = namedtuple(
-    "ProgramHeader32",
-    ["p_type", "p_offset", "p_vaddr", "p_paddr", "p_filesz", "p_memsz", "p_flags", "p_align"],
-)
-
-ProgramHeader64 = namedtuple(
-    "ProgramHeader64",
-    ["p_type", "p_flags", "p_offset", "p_vaddr", "p_paddr", "p_filesz", "p_memsz", "p_align"],
-)
+from typing import BinaryIO, Dict, List, NamedTuple, Optional, Pattern, Tuple
+
+
+class ElfHeader(NamedTuple):
+    e_type: int
+    e_machine: int
+    e_version: int
+    e_entry: int
+    e_phoff: int
+    e_shoff: int
+    e_flags: int
+    e_ehsize: int
+    e_phentsize: int
+    e_phnum: int
+    e_shentsize: int
+    e_shnum: int
+    e_shstrndx: int
+
+
+class SectionHeader(NamedTuple):
+    sh_name: int
+    sh_type: int
+    sh_flags: int
+    sh_addr: int
+    sh_offset: int
+    sh_size: int
+    sh_link: int
+    sh_info: int
+    sh_addralign: int
+    sh_entsize: int
+
+
+class ProgramHeader32(NamedTuple):
+    p_type: int
+    p_offset: int
+    p_vaddr: int
+    p_paddr: int
+    p_filesz: int
+    p_memsz: int
+    p_flags: int
+    p_align: int
+
+
+class ProgramHeader64(NamedTuple):
+    p_type: int
+    p_flags: int
+    p_offset: int
+    p_vaddr: int
+    p_paddr: int
+    p_filesz: int
+    p_memsz: int
+    p_align: int
 
 
 class ELF_CONSTANTS:
@@ -78,6 +84,31 @@ class ELF_CONSTANTS:
 class ElfFile:
     """Parsed ELF file."""
 
+    is_64_bit: bool
+    is_little_endian: bool
+    byte_order: str
+    elf_hdr: ElfHeader
+    pt_load: List[Tuple[int, int]]
+    has_pt_interp: bool
+    pt_interp_p_offset: int
+    pt_interp_p_filesz: int
+    pt_interp_str: bytes
+    has_pt_dynamic: bool
+    pt_dynamic_p_offset: int
+    pt_dynamic_p_filesz: int
+    pt_dynamic_strtab_offset: int
+    has_rpath: bool
+    dt_rpath_offset: int
+    dt_rpath_str: bytes
+    rpath_strtab_offset: int
+    is_runpath: bool
+    has_needed: bool
+    dt_needed_strtab_offsets: List[int]
+    dt_needed_strs: List[bytes]
+    has_soname: bool
+    dt_soname_strtab_offset: int
+    dt_soname_str: bytes
+
     __slots__ = [
         "is_64_bit",
         "is_little_endian",
@@ -120,13 +151,13 @@ class ElfFile:
         self.has_pt_interp = False
 
 
-def parse_c_string(byte_string, start=0):
+def parse_c_string(byte_string: bytes, start: int = 0) -> bytes:
     """
     Retrieve a C-string at a given offset in a byte string
 
     Arguments:
-        byte_string (bytes): String
-        start (int): Offset into the string
+        byte_string: String
+        start: Offset into the string
 
     Returns:
         bytes: A copy of the C-string excluding the terminating null byte
@@ -137,15 +168,15 @@ def parse_c_string(byte_string, start=0):
     return byte_string[start:str_end]
 
 
-def read_exactly(f, num_bytes, msg):
+def read_exactly(f: BinaryIO, num_bytes: int, msg: str) -> bytes:
     """
     Read exactly num_bytes at the current offset, otherwise raise
     a parsing error with the given error message.
 
     Arguments:
         f: file handle
-        num_bytes (int): Number of bytes to read
-        msg (str): Error to show when bytes cannot be read
+        num_bytes: Number of bytes to read
+        msg: Error to show when bytes cannot be read
 
     Returns:
         bytes: the ``num_bytes`` bytes that were read.
@@ -156,19 +187,18 @@ def read_exactly(f, num_bytes, msg):
     return data
 
 
-def parse_program_headers(f, elf):
+def parse_program_headers(f: BinaryIO, elf: ElfFile) -> None:
     """
     Parse program headers
 
     Arguments:
         f: file handle
-        elf (ElfFile): ELF file parser data
+        elf: ELF file parser data
     """
     # Forward to the program header
     f.seek(elf.elf_hdr.e_phoff)
 
     # Here we have to make a mapping from virtual address to offset in the file.
-    ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32
     ph_fmt = elf.byte_order + ("LLQQQQQQ" if elf.is_64_bit else "LLLLLLLL")
     ph_size = calcsize(ph_fmt)
     ph_num = elf.elf_hdr.e_phnum
@@ -176,28 +206,31 @@ def parse_program_headers(f, elf):
     # Read all program headers in one go
     data = read_exactly(f, ph_num * ph_size, "Malformed program header")
 
+    ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32
+
     for i in range(ph_num):
-        ph = ProgramHeader._make(unpack_from(ph_fmt, data, i * ph_size))
+        # mypy currently does not understand the union of two named tuples with equal fields
+        ph = ProgramHeader(*unpack_from(ph_fmt, data, i * ph_size))
 
         # Skip segments of size 0; we don't distinguish between missing segment and
         # empty segments. I've see an empty PT_DYNAMIC section for an ELF file that
         # contained debug data.
-        if ph.p_filesz == 0:
+        if ph.p_filesz == 0:  # type: ignore
             continue
 
         # For PT_LOAD entries: Save offsets and virtual addrs of the loaded ELF segments
         # This way we can map offsets by virtual address to offsets in the file.
-        if ph.p_type == ELF_CONSTANTS.PT_LOAD:
-            elf.pt_load.append((ph.p_offset, ph.p_vaddr))
+        if ph.p_type == ELF_CONSTANTS.PT_LOAD:  # type: ignore
+            elf.pt_load.append((ph.p_offset, ph.p_vaddr))  # type: ignore
 
-        elif ph.p_type == ELF_CONSTANTS.PT_INTERP:
-            elf.pt_interp_p_offset = ph.p_offset
-            elf.pt_interp_p_filesz = ph.p_filesz
+        elif ph.p_type == ELF_CONSTANTS.PT_INTERP:  # type: ignore
+            elf.pt_interp_p_offset = ph.p_offset  # type: ignore
+            elf.pt_interp_p_filesz = ph.p_filesz  # type: ignore
             elf.has_pt_interp = True
 
-        elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC:
-            elf.pt_dynamic_p_offset = ph.p_offset
-            elf.pt_dynamic_p_filesz = ph.p_filesz
+        elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC:  # type: ignore
+            elf.pt_dynamic_p_offset = ph.p_offset  # type: ignore
+            elf.pt_dynamic_p_filesz = ph.p_filesz  # type: ignore
             elf.has_pt_dynamic = True
 
     # The linker sorts PT_LOAD segments by vaddr, but let's do it just to be sure, since
@@ -205,27 +238,27 @@ def parse_program_headers(f, elf):
     elf.pt_load.sort(key=lambda x: x[1])
 
 
-def parse_pt_interp(f, elf):
+def parse_pt_interp(f: BinaryIO, elf: ElfFile) -> None:
     """
     Parse the interpreter (i.e. absolute path to the dynamic linker)
 
     Arguments:
         f: file handle
-        elf (ElfFile): ELF file parser data
+        elf: ELF file parser data
     """
     f.seek(elf.pt_interp_p_offset)
     data = read_exactly(f, elf.pt_interp_p_filesz, "Malformed PT_INTERP entry")
     elf.pt_interp_str = parse_c_string(data)
 
 
-def find_strtab_size_at_offset(f, elf, offset):
+def find_strtab_size_at_offset(f: BinaryIO, elf: ElfFile, offset: int) -> int:
     """
     Retrieve the size of a string table section at a particular known offset
 
     Arguments:
         f: file handle
-        elf (ElfFile): ELF file parser data
-        offset (int): offset of the section in the file (i.e. ``sh_offset``)
+        elf: ELF file parser data
+        offset: offset of the section in the file (i.e. ``sh_offset``)
 
     Returns:
         int: the size of the string table in bytes
@@ -235,50 +268,49 @@ def find_strtab_size_at_offset(f, elf, offset):
     f.seek(elf.elf_hdr.e_shoff)
     for _ in range(elf.elf_hdr.e_shnum):
         data = read_exactly(f, section_hdr_size, "Malformed section header")
-        sh = SectionHeader._make(unpack(section_hdr_fmt, data))
+        sh = SectionHeader(*unpack(section_hdr_fmt, data))
         if sh.sh_type == ELF_CONSTANTS.SHT_STRTAB and sh.sh_offset == offset:
             return sh.sh_size
 
     raise ElfParsingError("Could not determine strtab size")
 
 
-def retrieve_strtab(f, elf, offset):
+def retrieve_strtab(f: BinaryIO, elf: ElfFile, offset: int) -> bytes:
     """
     Read a full string table at the given offset, which
     requires looking it up in the section headers.
 
     Arguments:
-        elf (ElfFile): ELF file parser data
-        vaddr (int): virtual address
+        elf: ELF file parser data
+        vaddr: virtual address
 
-    Returns:
-        bytes: file offset
+    Returns: file offset
     """
     size = find_strtab_size_at_offset(f, elf, offset)
     f.seek(offset)
     return read_exactly(f, size, "Could not read string table")
 
 
-def vaddr_to_offset(elf, vaddr):
+def vaddr_to_offset(elf: ElfFile, vaddr: int) -> int:
     """
     Given a virtual address, find the corresponding offset in the ELF file itself.
 
     Arguments:
-        elf (ElfFile): ELF file parser data
-        vaddr (int): virtual address
+        elf: ELF file parser data
+        vaddr: virtual address
     """
     idx = bisect.bisect_right([p_vaddr for (p_offset, p_vaddr) in elf.pt_load], vaddr) - 1
     p_offset, p_vaddr = elf.pt_load[idx]
     return p_offset - p_vaddr + vaddr
 
 
-def parse_pt_dynamic(f, elf):
+def parse_pt_dynamic(f: BinaryIO, elf: ElfFile) -> None:
     """
     Parse the dynamic section of an ELF file
 
     Arguments:
         f: file handle
-        elf (ElfFile): ELF file parse data
+        elf: ELF file parse data
     """
     dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL")
     dynamic_array_size = calcsize(dynamic_array_fmt)
@@ -347,7 +379,7 @@ def parse_pt_dynamic(f, elf):
         elf.dt_rpath_str = parse_c_string(string_table, elf.rpath_strtab_offset)
 
 
-def parse_header(f, elf):
+def parse_header(f: BinaryIO, elf: ElfFile) -> None:
     # Read the 32/64 bit class independent part of the header and validate
     e_ident = f.read(16)
 
@@ -374,10 +406,12 @@ def parse_header(f, elf):
     elf_header_fmt = elf.byte_order + ("HHLQQQLHHHHHH" if elf.is_64_bit else "HHLLLLLHHHHHH")
     hdr_size = calcsize(elf_header_fmt)
     data = read_exactly(f, hdr_size, "ELF header malformed")
-    elf.elf_hdr = ElfHeader._make(unpack(elf_header_fmt, data))
+    elf.elf_hdr = ElfHeader(*unpack(elf_header_fmt, data))
 
 
-def _do_parse_elf(f, interpreter=True, dynamic_section=True, only_header=False):
+def _do_parse_elf(
+    f: BinaryIO, interpreter: bool = True, dynamic_section: bool = True, only_header: bool = False
+) -> ElfFile:
     # We don't (yet?) allow parsing ELF files at a nonzero offset, we just
     # jump to absolute offsets as they are specified in the ELF file.
     if f.tell() != 0:
@@ -406,7 +440,12 @@ def _do_parse_elf(f, interpreter=True, dynamic_section=True, only_header=False):
     return elf
 
 
-def parse_elf(f, interpreter=False, dynamic_section=False, only_header=False):
+def parse_elf(
+    f: BinaryIO,
+    interpreter: bool = False,
+    dynamic_section: bool = False,
+    only_header: bool = False,
+) -> ElfFile:
     """Given a file handle f for an ELF file opened in binary mode, return an ElfFile
     object that is stores data about rpaths"""
     try:
@@ -417,28 +456,30 @@ def parse_elf(f, interpreter=False, dynamic_section=False, only_header=False):
         raise ElfParsingError("Malformed ELF file")
 
 
-def get_rpaths(path):
-    """Returns list of rpaths of the given file as UTF-8 strings, or None if the file
-    does not have any rpaths."""
+def get_rpaths(path: str) -> Optional[List[str]]:
+    """Returns list of rpaths of the given file as UTF-8 strings, or None if not set."""
     try:
         with open(path, "rb") as f:
             elf = parse_elf(f, interpreter=False, dynamic_section=True)
+            return elf.dt_rpath_str.decode("utf-8").split(":") if elf.has_rpath else None
     except ElfParsingError:
         return None
 
-    if not elf.has_rpath:
-        return None
 
-    # If it does, split the string in components
-    rpath = elf.dt_rpath_str
-    rpath = rpath.decode("utf-8")
-    return rpath.split(":")
+def get_interpreter(path: str) -> Optional[str]:
+    """Returns the interpreter of the given file as UTF-8 string, or None if not set."""
+    try:
+        with open(path, "rb") as f:
+            elf = parse_elf(f, interpreter=True, dynamic_section=False)
+            return elf.pt_interp_str.decode("utf-8") if elf.has_pt_interp else None
+    except ElfParsingError:
+        return None
 
 
-def delete_rpath(path):
-    """Modifies a binary to remove the rpath. It zeros out the rpath string
-    and also drops the DT_R(UN)PATH entry from the dynamic section, so it doesn't
-    show up in 'readelf -d file', nor in 'strings file'."""
+def delete_rpath(path: str) -> None:
+    """Modifies a binary to remove the rpath. It zeros out the rpath string and also drops the
+    DT_R(UN)PATH entry from the dynamic section, so it doesn't show up in 'readelf -d file', nor
+    in 'strings file'."""
     with open(path, "rb+") as f:
         elf = parse_elf(f, interpreter=False, dynamic_section=True)
 
@@ -476,75 +517,136 @@ def delete_rpath(path):
             old_offset += dynamic_array_size
 
 
-def replace_rpath_in_place_or_raise(path, substitutions):
-    regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys()))
+class CStringType:
+    PT_INTERP = 1
+    RPATH = 2
 
-    try:
-        with open(path, "rb+") as f:
-            elf = parse_elf(f, interpreter=False, dynamic_section=True)
 
-            # If there's no RPATH, then there's no need to replace anything.
-            if not elf.has_rpath:
-                return False
+class UpdateCStringAction:
+    def __init__(self, old_value: bytes, new_value: bytes, offset: int):
+        self.old_value = old_value
+        self.new_value = new_value
+        self.offset = offset
 
-            # Get the non-empty rpaths. Sometimes there's a bunch of trailing
-            # colons ::::: used for padding, we don't add them back to make it
-            # more likely that the string doesn't grow.
-            rpaths = list(filter(len, elf.dt_rpath_str.split(b":")))
+    @property
+    def inplace(self) -> bool:
+        return len(self.new_value) <= len(self.old_value)
 
-            num_rpaths = len(rpaths)
+    def apply(self, f: BinaryIO) -> None:
+        assert self.inplace
 
-            if num_rpaths == 0:
-                return False
+        f.seek(self.offset)
+        f.write(self.new_value)
 
-            changed = False
-            for i in range(num_rpaths):
-                old_rpath = rpaths[i]
-                match = regex.match(old_rpath)
-                if match:
-                    changed = True
-                    rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :]
+        # We zero out the bits we shortened because (a) it should be a
+        # C-string and (b) it's nice not to have spurious parts of old
+        # paths in the output of `strings file`. Note that we're all
+        # good when pad == 0; the original terminating null is used.
+        f.write(b"\x00" * (len(self.old_value) - len(self.new_value)))
 
-            # Nothing to replace!
-            if not changed:
-                return False
 
-            new_rpath_string = b":".join(rpaths)
+def _get_rpath_substitution(
+    elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes]
+) -> Optional[UpdateCStringAction]:
+    """Make rpath substitutions in-place."""
+    # If there's no RPATH, then there's no need to replace anything.
+    if not elf.has_rpath:
+        return None
+
+    # Get the non-empty rpaths. Sometimes there's a bunch of trailing
+    # colons ::::: used for padding, we don't add them back to make it
+    # more likely that the string doesn't grow.
+    rpaths = list(filter(len, elf.dt_rpath_str.split(b":")))
 
-            pad = len(elf.dt_rpath_str) - len(new_rpath_string)
+    num_rpaths = len(rpaths)
 
-            if pad < 0:
-                raise ElfDynamicSectionUpdateFailed(elf.dt_rpath_str, new_rpath_string)
+    if num_rpaths == 0:
+        return None
+
+    changed = False
+    for i in range(num_rpaths):
+        old_rpath = rpaths[i]
+        match = regex.match(old_rpath)
+        if match:
+            changed = True
+            rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :]
+
+    # Nothing to replace!
+    if not changed:
+        return None
+
+    return UpdateCStringAction(
+        old_value=elf.dt_rpath_str,
+        new_value=b":".join(rpaths),
+        # The rpath is at a given offset in the string table used by the dynamic section.
+        offset=elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset,
+    )
+
+
+def _get_pt_interp_substitution(
+    elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes]
+) -> Optional[UpdateCStringAction]:
+    """Make interpreter substitutions in-place."""
+    if not elf.has_pt_interp:
+        return None
+
+    match = regex.match(elf.pt_interp_str)
+    if not match:
+        return None
+
+    return UpdateCStringAction(
+        old_value=elf.pt_interp_str,
+        new_value=substitutions[match.group()] + elf.pt_interp_str[match.end() :],
+        offset=elf.pt_interp_p_offset,
+    )
 
-            # We zero out the bits we shortened because (a) it should be a
-            # C-string and (b) it's nice not to have spurious parts of old
-            # paths in the output of `strings file`. Note that we're all
-            # good when pad == 0; the original terminating null is used.
-            new_rpath_string += b"\x00" * pad
 
-            # The rpath is at a given offset in the string table used by the
-            # dynamic section.
-            rpath_offset = elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset
+def substitute_rpath_and_pt_interp_in_place_or_raise(
+    path: str, substitutions: Dict[bytes, bytes]
+) -> bool:
+    """Returns true if the rpath and interpreter were modified, false if there was nothing to do.
+    Raises ElfCStringUpdatesFailed if the ELF file cannot be updated in-place. This exception
+    contains a list of actions to perform with other tools. The file is left untouched in this
+    case."""
+    regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys()))
+
+    try:
+        with open(path, "rb+") as f:
+            elf = parse_elf(f, interpreter=True, dynamic_section=True)
+
+            # Get the actions to perform.
+            rpath = _get_rpath_substitution(elf, regex, substitutions)
+            pt_interp = _get_pt_interp_substitution(elf, regex, substitutions)
+
+            # Nothing to do.
+            if not rpath and not pt_interp:
+                return False
+
+            # If we can't update in-place, leave it to other tools, don't do partial updates.
+            if rpath and not rpath.inplace or pt_interp and not pt_interp.inplace:
+                raise ElfCStringUpdatesFailed(rpath, pt_interp)
+
+            # Otherwise, apply the updates.
+            if rpath:
+                rpath.apply(f)
+
+            if pt_interp:
+                pt_interp.apply(f)
 
-            f.seek(rpath_offset)
-            f.write(new_rpath_string)
             return True
 
     except ElfParsingError:
-        # This just means the file wasnt an elf file, so there's no point
+        # This just means the file wasn't an elf file, so there's no point
         # in updating its rpath anyways; ignore this problem.
         return False
 
 
-class ElfDynamicSectionUpdateFailed(Exception):
-    def __init__(self, old, new):
-        self.old = old
-        self.new = new
-        super().__init__(
-            "New rpath {} is longer than old rpath {}".format(
-                new.decode("utf-8"), old.decode("utf-8")
-            )
-        )
+class ElfCStringUpdatesFailed(Exception):
+    def __init__(
+        self, rpath: Optional[UpdateCStringAction], pt_interp: Optional[UpdateCStringAction]
+    ):
+        self.rpath = rpath
+        self.pt_interp = pt_interp
 
 
 class ElfParsingError(Exception):
author	Harmen Stoppels <me@harmenstoppels.nl>	2024-01-30 22:36:49 +0100
committer	GitHub <noreply@github.com>	2024-01-30 22:36:49 +0100
commit	28eea2994f04ec73f647cf636de433fbc9317c4c (patch)
tree	177b258cbb2b0a855cdb901c2f3c1b02b2f82515 /lib
parent	6d55caabe88f00aff7ce82e07f1de5b8f136e1e3 (diff)
download	spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.gz spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.bz2 spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.xz spack-28eea2994f04ec73f647cf636de433fbc9317c4c.zip