diff options
author | Harmen Stoppels <me@harmenstoppels.nl> | 2024-01-30 22:36:49 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-30 22:36:49 +0100 |
commit | 28eea2994f04ec73f647cf636de433fbc9317c4c (patch) | |
tree | 177b258cbb2b0a855cdb901c2f3c1b02b2f82515 /lib | |
parent | 6d55caabe88f00aff7ce82e07f1de5b8f136e1e3 (diff) | |
download | spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.gz spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.bz2 spack-28eea2994f04ec73f647cf636de433fbc9317c4c.tar.xz spack-28eea2994f04ec73f647cf636de433fbc9317c4c.zip |
elf: relocate PT_INTERP (#42318)
Relocation of `PT_INTERP` in ELF files already happens to work from long to short path, thanks to generic binary relocation (i.e. find and replace). This PR improves it:
1. Adds logic to grow `PT_INTERP` strings through patchelf (which is only useful if the interpreter and rpath paths are the _only_ paths in the binary that need to be relocated)
2. Makes shrinking `PT_INTERP` cleaner. Before this PR when you would use Spack-built glibc as link dep, and relocate
executables using its dynamic linker, you'd end up with
```
$ file exe
exe: ELF 64-bit LSD pie executable, ..., interpreter /////////////////////////////////////////////////path/to/glibc/lib/ld-linux.so
```
With this PR you get something sensible:
```
$ file exe
exe: ELF 64-bit LSD pie executable, ..., interpreter /path/to/glibc/lib/ld-linux.so
```
When Spack cannot modify the interpreter or rpath strings in-place, it errors out without modifying the file, and leaves both tasks to patchelf instead.
Also add type hints to `elf.py`.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/bootstrap/core.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/hooks/drop_redundant_rpaths.py | 4 | ||||
-rw-r--r-- | lib/spack/spack/relocate.py | 48 | ||||
-rw-r--r-- | lib/spack/spack/test/conftest.py | 6 | ||||
-rw-r--r-- | lib/spack/spack/test/relocate.py | 8 | ||||
-rw-r--r-- | lib/spack/spack/test/util/elf.py | 50 | ||||
-rw-r--r-- | lib/spack/spack/util/elf.py | 396 |
7 files changed, 306 insertions, 208 deletions
diff --git a/lib/spack/spack/bootstrap/core.py b/lib/spack/spack/bootstrap/core.py index 6ff18db843..34aff29b55 100644 --- a/lib/spack/spack/bootstrap/core.py +++ b/lib/spack/spack/bootstrap/core.py @@ -542,7 +542,7 @@ def verify_patchelf(patchelf: "spack.util.executable.Executable") -> bool: return version >= spack.version.Version("0.13.1") -def ensure_patchelf_in_path_or_raise() -> None: +def ensure_patchelf_in_path_or_raise() -> spack.util.executable.Executable: """Ensure patchelf is in the PATH or raise.""" # The old concretizer is not smart and we're doing its job: if the latest patchelf # does not concretize because the compiler doesn't support C++17, we try to diff --git a/lib/spack/spack/hooks/drop_redundant_rpaths.py b/lib/spack/spack/hooks/drop_redundant_rpaths.py index 0c43fc1b5a..2577ce18f4 100644 --- a/lib/spack/spack/hooks/drop_redundant_rpaths.py +++ b/lib/spack/spack/hooks/drop_redundant_rpaths.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import os -from typing import IO, Optional, Tuple +from typing import BinaryIO, Optional, Tuple import llnl.util.tty as tty from llnl.util.filesystem import BaseDirectoryVisitor, visit_directory_tree @@ -18,7 +18,7 @@ def should_keep(path: bytes) -> bool: return path.startswith(b"$") or (os.path.isabs(path) and os.path.lexists(path)) -def _drop_redundant_rpaths(f: IO) -> Optional[Tuple[bytes, bytes]]: +def _drop_redundant_rpaths(f: BinaryIO) -> Optional[Tuple[bytes, bytes]]: """Drop redundant entries from rpath. Args: diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py index 6fcb4e98bc..0bbbba7ef0 100644 --- a/lib/spack/spack/relocate.py +++ b/lib/spack/spack/relocate.py @@ -7,6 +7,7 @@ import itertools import os import re from collections import OrderedDict +from typing import List, Optional import macholib.mach_o import macholib.MachO @@ -47,7 +48,7 @@ class InstallRootStringError(spack.error.SpackError): @memoized -def _patchelf(): +def _patchelf() -> Optional[executable.Executable]: """Return the full path to the patchelf binary, if available, else None.""" import spack.bootstrap @@ -55,9 +56,7 @@ def _patchelf(): return None with spack.bootstrap.ensure_bootstrap_configuration(): - patchelf = spack.bootstrap.ensure_patchelf_in_path_or_raise() - - return patchelf.path + return spack.bootstrap.ensure_patchelf_in_path_or_raise() def _elf_rpaths_for(path): @@ -340,31 +339,34 @@ def macholib_get_paths(cur_path): return (rpaths, deps, ident) -def _set_elf_rpaths(target, rpaths): - """Replace the original RPATH of the target with the paths passed - as arguments. +def _set_elf_rpaths_and_interpreter( + target: str, rpaths: List[str], interpreter: Optional[str] = None +) -> Optional[str]: + """Replace the original RPATH of the target with the paths passed as arguments. Args: target: target executable. Must be an ELF object. rpaths: paths to be set in the RPATH + interpreter: optionally set the interpreter Returns: - A string concatenating the stdout and stderr of the call - to ``patchelf`` if it was invoked + A string concatenating the stdout and stderr of the call to ``patchelf`` if it was invoked """ # Join the paths using ':' as a separator rpaths_str = ":".join(rpaths) - patchelf, output = executable.Executable(_patchelf()), None try: + # TODO: error handling is not great here? # TODO: revisit the use of --force-rpath as it might be conditional # TODO: if we want to support setting RUNPATH from binary packages - patchelf_args = ["--force-rpath", "--set-rpath", rpaths_str, target] - output = patchelf(*patchelf_args, output=str, error=str) + args = ["--force-rpath", "--set-rpath", rpaths_str] + if interpreter: + args.extend(["--set-interpreter", interpreter]) + args.append(target) + return _patchelf()(*args, output=str, error=str) except executable.ProcessError as e: - msg = "patchelf --force-rpath --set-rpath {0} failed with error {1}" - tty.warn(msg.format(target, e)) - return output + tty.warn(str(e)) + return None def needs_binary_relocation(m_type, m_subtype): @@ -501,10 +503,12 @@ def new_relocate_elf_binaries(binaries, prefix_to_prefix): for path in binaries: try: - elf.replace_rpath_in_place_or_raise(path, prefix_to_prefix) - except elf.ElfDynamicSectionUpdateFailed as e: - # Fall back to the old `patchelf --set-rpath` method. - _set_elf_rpaths(path, e.new.decode("utf-8").split(":")) + elf.substitute_rpath_and_pt_interp_in_place_or_raise(path, prefix_to_prefix) + except elf.ElfCStringUpdatesFailed as e: + # Fall back to `patchelf --set-rpath ... --set-interpreter ...` + rpaths = e.rpath.new_value.decode("utf-8").split(":") if e.rpath else [] + interpreter = e.pt_interp.new_value.decode("utf-8") if e.pt_interp else None + _set_elf_rpaths_and_interpreter(path, rpaths=rpaths, interpreter=interpreter) def relocate_elf_binaries( @@ -546,10 +550,10 @@ def relocate_elf_binaries( new_rpaths = _make_relative(new_binary, new_root, new_norm_rpaths) # check to see if relative rpaths are changed before rewriting if sorted(new_rpaths) != sorted(orig_rpaths): - _set_elf_rpaths(new_binary, new_rpaths) + _set_elf_rpaths_and_interpreter(new_binary, new_rpaths) else: new_rpaths = _transform_rpaths(orig_rpaths, orig_root, new_prefixes) - _set_elf_rpaths(new_binary, new_rpaths) + _set_elf_rpaths_and_interpreter(new_binary, new_rpaths) def make_link_relative(new_links, orig_links): @@ -596,7 +600,7 @@ def make_elf_binaries_relative(new_binaries, orig_binaries, orig_layout_root): orig_rpaths = _elf_rpaths_for(new_binary) if orig_rpaths: new_rpaths = _make_relative(orig_binary, orig_layout_root, orig_rpaths) - _set_elf_rpaths(new_binary, new_rpaths) + _set_elf_rpaths_and_interpreter(new_binary, new_rpaths) def warn_if_link_cant_be_relocated(link, target): diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index 1f6e054232..0fa9b5f127 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -1851,7 +1851,7 @@ def binary_with_rpaths(prefix_tmpdir): paths are encoded with `$ORIGIN` prepended. """ - def _factory(rpaths, message="Hello world!"): + def _factory(rpaths, message="Hello world!", dynamic_linker="/lib64/ld-linux.so.2"): source = prefix_tmpdir.join("main.c") source.write( """ @@ -1867,10 +1867,10 @@ def binary_with_rpaths(prefix_tmpdir): executable = source.dirpath("main.x") # Encode relative RPATHs using `$ORIGIN` as the root prefix rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths] - rpath_str = ":".join(rpaths) opts = [ "-Wl,--disable-new-dtags", - "-Wl,-rpath={0}".format(rpath_str), + f"-Wl,-rpath={':'.join(rpaths)}", + f"-Wl,--dynamic-linker,{dynamic_linker}", str(source), "-o", str(executable), diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py index 6c8c0b1594..0ed6af59e2 100644 --- a/lib/spack/spack/test/relocate.py +++ b/lib/spack/spack/test/relocate.py @@ -47,14 +47,6 @@ def text_in_bin(text, binary): @pytest.fixture() -def mock_patchelf(tmpdir, mock_executable): - def _factory(output): - return mock_executable("patchelf", output=output) - - return _factory - - -@pytest.fixture() def make_dylib(tmpdir_factory): """Create a shared library with unfriendly qualities. diff --git a/lib/spack/spack/test/util/elf.py b/lib/spack/spack/test/util/elf.py index b337c00073..a77d7ab637 100644 --- a/lib/spack/spack/test/util/elf.py +++ b/lib/spack/spack/test/util/elf.py @@ -5,7 +5,6 @@ import io -from collections import OrderedDict import pytest @@ -137,45 +136,46 @@ def test_only_header(): @pytest.mark.requires_executables("gcc") @skip_unless_linux -def test_elf_get_and_replace_rpaths(binary_with_rpaths): - long_rpaths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"] - executable = str(binary_with_rpaths(rpaths=long_rpaths)) +def test_elf_get_and_replace_rpaths_and_pt_interp(binary_with_rpaths): + long_paths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"] + executable = str( + binary_with_rpaths(rpaths=long_paths, dynamic_linker="/very/long/prefix-b/lib/ld.so") + ) # Before - assert elf.get_rpaths(executable) == long_rpaths - - replacements = OrderedDict( - [ - (b"/very/long/prefix-a", b"/short-a"), - (b"/very/long/prefix-b", b"/short-b"), - (b"/very/long", b"/dont"), - ] - ) + assert elf.get_rpaths(executable) == long_paths + + replacements = { + b"/very/long/prefix-a": b"/short-a", + b"/very/long/prefix-b": b"/short-b", + b"/very/long": b"/dont", + } # Replace once: should modify the file. - assert elf.replace_rpath_in_place_or_raise(executable, replacements) + assert elf.substitute_rpath_and_pt_interp_in_place_or_raise(executable, replacements) # Replace twice: nothing to be done. - assert not elf.replace_rpath_in_place_or_raise(executable, replacements) + assert not elf.substitute_rpath_and_pt_interp_in_place_or_raise(executable, replacements) # Verify the rpaths were modified correctly assert elf.get_rpaths(executable) == ["/short-a/x", "/short-b/y"] + assert elf.get_interpreter(executable) == "/short-b/lib/ld.so" # Going back to long rpaths should fail, since we've added trailing \0 # bytes, and replacement can't assume it can write back in repeated null # bytes -- it may correspond to zero-length strings for example. - with pytest.raises( - elf.ElfDynamicSectionUpdateFailed, - match="New rpath /very/long/prefix-a/x:/very/long/prefix-b/y is " - "longer than old rpath /short-a/x:/short-b/y", - ): - elf.replace_rpath_in_place_or_raise( - executable, - OrderedDict( - [(b"/short-a", b"/very/long/prefix-a"), (b"/short-b", b"/very/long/prefix-b")] - ), + with pytest.raises(elf.ElfCStringUpdatesFailed) as info: + elf.substitute_rpath_and_pt_interp_in_place_or_raise( + executable, {b"/short-a": b"/very/long/prefix-a", b"/short-b": b"/very/long/prefix-b"} ) + assert info.value.rpath is not None + assert info.value.pt_interp is not None + assert info.value.rpath.old_value == b"/short-a/x:/short-b/y" + assert info.value.rpath.new_value == b"/very/long/prefix-a/x:/very/long/prefix-b/y" + assert info.value.pt_interp.old_value == b"/short-b/lib/ld.so" + assert info.value.pt_interp.new_value == b"/very/long/prefix-b/lib/ld.so" + @pytest.mark.requires_executables("gcc") @skip_unless_linux diff --git a/lib/spack/spack/util/elf.py b/lib/spack/spack/util/elf.py index d04d7ca16c..6047c2f4da 100644 --- a/lib/spack/spack/util/elf.py +++ b/lib/spack/spack/util/elf.py @@ -6,53 +6,59 @@ import bisect import re import struct -from collections import namedtuple from struct import calcsize, unpack, unpack_from - -ElfHeader = namedtuple( - "ElfHeader", - [ - "e_type", - "e_machine", - "e_version", - "e_entry", - "e_phoff", - "e_shoff", - "e_flags", - "e_ehsize", - "e_phentsize", - "e_phnum", - "e_shentsize", - "e_shnum", - "e_shstrndx", - ], -) - -SectionHeader = namedtuple( - "SectionHeader", - [ - "sh_name", - "sh_type", - "sh_flags", - "sh_addr", - "sh_offset", - "sh_size", - "sh_link", - "sh_info", - "sh_addralign", - "sh_entsize", - ], -) - -ProgramHeader32 = namedtuple( - "ProgramHeader32", - ["p_type", "p_offset", "p_vaddr", "p_paddr", "p_filesz", "p_memsz", "p_flags", "p_align"], -) - -ProgramHeader64 = namedtuple( - "ProgramHeader64", - ["p_type", "p_flags", "p_offset", "p_vaddr", "p_paddr", "p_filesz", "p_memsz", "p_align"], -) +from typing import BinaryIO, Dict, List, NamedTuple, Optional, Pattern, Tuple + + +class ElfHeader(NamedTuple): + e_type: int + e_machine: int + e_version: int + e_entry: int + e_phoff: int + e_shoff: int + e_flags: int + e_ehsize: int + e_phentsize: int + e_phnum: int + e_shentsize: int + e_shnum: int + e_shstrndx: int + + +class SectionHeader(NamedTuple): + sh_name: int + sh_type: int + sh_flags: int + sh_addr: int + sh_offset: int + sh_size: int + sh_link: int + sh_info: int + sh_addralign: int + sh_entsize: int + + +class ProgramHeader32(NamedTuple): + p_type: int + p_offset: int + p_vaddr: int + p_paddr: int + p_filesz: int + p_memsz: int + p_flags: int + p_align: int + + +class ProgramHeader64(NamedTuple): + p_type: int + p_flags: int + p_offset: int + p_vaddr: int + p_paddr: int + p_filesz: int + p_memsz: int + p_align: int class ELF_CONSTANTS: @@ -78,6 +84,31 @@ class ELF_CONSTANTS: class ElfFile: """Parsed ELF file.""" + is_64_bit: bool + is_little_endian: bool + byte_order: str + elf_hdr: ElfHeader + pt_load: List[Tuple[int, int]] + has_pt_interp: bool + pt_interp_p_offset: int + pt_interp_p_filesz: int + pt_interp_str: bytes + has_pt_dynamic: bool + pt_dynamic_p_offset: int + pt_dynamic_p_filesz: int + pt_dynamic_strtab_offset: int + has_rpath: bool + dt_rpath_offset: int + dt_rpath_str: bytes + rpath_strtab_offset: int + is_runpath: bool + has_needed: bool + dt_needed_strtab_offsets: List[int] + dt_needed_strs: List[bytes] + has_soname: bool + dt_soname_strtab_offset: int + dt_soname_str: bytes + __slots__ = [ "is_64_bit", "is_little_endian", @@ -120,13 +151,13 @@ class ElfFile: self.has_pt_interp = False -def parse_c_string(byte_string, start=0): +def parse_c_string(byte_string: bytes, start: int = 0) -> bytes: """ Retrieve a C-string at a given offset in a byte string Arguments: - byte_string (bytes): String - start (int): Offset into the string + byte_string: String + start: Offset into the string Returns: bytes: A copy of the C-string excluding the terminating null byte @@ -137,15 +168,15 @@ def parse_c_string(byte_string, start=0): return byte_string[start:str_end] -def read_exactly(f, num_bytes, msg): +def read_exactly(f: BinaryIO, num_bytes: int, msg: str) -> bytes: """ Read exactly num_bytes at the current offset, otherwise raise a parsing error with the given error message. Arguments: f: file handle - num_bytes (int): Number of bytes to read - msg (str): Error to show when bytes cannot be read + num_bytes: Number of bytes to read + msg: Error to show when bytes cannot be read Returns: bytes: the ``num_bytes`` bytes that were read. @@ -156,19 +187,18 @@ def read_exactly(f, num_bytes, msg): return data -def parse_program_headers(f, elf): +def parse_program_headers(f: BinaryIO, elf: ElfFile) -> None: """ Parse program headers Arguments: f: file handle - elf (ElfFile): ELF file parser data + elf: ELF file parser data """ # Forward to the program header f.seek(elf.elf_hdr.e_phoff) # Here we have to make a mapping from virtual address to offset in the file. - ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32 ph_fmt = elf.byte_order + ("LLQQQQQQ" if elf.is_64_bit else "LLLLLLLL") ph_size = calcsize(ph_fmt) ph_num = elf.elf_hdr.e_phnum @@ -176,28 +206,31 @@ def parse_program_headers(f, elf): # Read all program headers in one go data = read_exactly(f, ph_num * ph_size, "Malformed program header") + ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32 + for i in range(ph_num): - ph = ProgramHeader._make(unpack_from(ph_fmt, data, i * ph_size)) + # mypy currently does not understand the union of two named tuples with equal fields + ph = ProgramHeader(*unpack_from(ph_fmt, data, i * ph_size)) # Skip segments of size 0; we don't distinguish between missing segment and # empty segments. I've see an empty PT_DYNAMIC section for an ELF file that # contained debug data. - if ph.p_filesz == 0: + if ph.p_filesz == 0: # type: ignore continue # For PT_LOAD entries: Save offsets and virtual addrs of the loaded ELF segments # This way we can map offsets by virtual address to offsets in the file. - if ph.p_type == ELF_CONSTANTS.PT_LOAD: - elf.pt_load.append((ph.p_offset, ph.p_vaddr)) + if ph.p_type == ELF_CONSTANTS.PT_LOAD: # type: ignore + elf.pt_load.append((ph.p_offset, ph.p_vaddr)) # type: ignore - elif ph.p_type == ELF_CONSTANTS.PT_INTERP: - elf.pt_interp_p_offset = ph.p_offset - elf.pt_interp_p_filesz = ph.p_filesz + elif ph.p_type == ELF_CONSTANTS.PT_INTERP: # type: ignore + elf.pt_interp_p_offset = ph.p_offset # type: ignore + elf.pt_interp_p_filesz = ph.p_filesz # type: ignore elf.has_pt_interp = True - elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC: - elf.pt_dynamic_p_offset = ph.p_offset - elf.pt_dynamic_p_filesz = ph.p_filesz + elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC: # type: ignore + elf.pt_dynamic_p_offset = ph.p_offset # type: ignore + elf.pt_dynamic_p_filesz = ph.p_filesz # type: ignore elf.has_pt_dynamic = True # The linker sorts PT_LOAD segments by vaddr, but let's do it just to be sure, since @@ -205,27 +238,27 @@ def parse_program_headers(f, elf): elf.pt_load.sort(key=lambda x: x[1]) -def parse_pt_interp(f, elf): +def parse_pt_interp(f: BinaryIO, elf: ElfFile) -> None: """ Parse the interpreter (i.e. absolute path to the dynamic linker) Arguments: f: file handle - elf (ElfFile): ELF file parser data + elf: ELF file parser data """ f.seek(elf.pt_interp_p_offset) data = read_exactly(f, elf.pt_interp_p_filesz, "Malformed PT_INTERP entry") elf.pt_interp_str = parse_c_string(data) -def find_strtab_size_at_offset(f, elf, offset): +def find_strtab_size_at_offset(f: BinaryIO, elf: ElfFile, offset: int) -> int: """ Retrieve the size of a string table section at a particular known offset Arguments: f: file handle - elf (ElfFile): ELF file parser data - offset (int): offset of the section in the file (i.e. ``sh_offset``) + elf: ELF file parser data + offset: offset of the section in the file (i.e. ``sh_offset``) Returns: int: the size of the string table in bytes @@ -235,50 +268,49 @@ def find_strtab_size_at_offset(f, elf, offset): f.seek(elf.elf_hdr.e_shoff) for _ in range(elf.elf_hdr.e_shnum): data = read_exactly(f, section_hdr_size, "Malformed section header") - sh = SectionHeader._make(unpack(section_hdr_fmt, data)) + sh = SectionHeader(*unpack(section_hdr_fmt, data)) if sh.sh_type == ELF_CONSTANTS.SHT_STRTAB and sh.sh_offset == offset: return sh.sh_size raise ElfParsingError("Could not determine strtab size") -def retrieve_strtab(f, elf, offset): +def retrieve_strtab(f: BinaryIO, elf: ElfFile, offset: int) -> bytes: """ Read a full string table at the given offset, which requires looking it up in the section headers. Arguments: - elf (ElfFile): ELF file parser data - vaddr (int): virtual address + elf: ELF file parser data + vaddr: virtual address - Returns: - bytes: file offset + Returns: file offset """ size = find_strtab_size_at_offset(f, elf, offset) f.seek(offset) return read_exactly(f, size, "Could not read string table") -def vaddr_to_offset(elf, vaddr): +def vaddr_to_offset(elf: ElfFile, vaddr: int) -> int: """ Given a virtual address, find the corresponding offset in the ELF file itself. Arguments: - elf (ElfFile): ELF file parser data - vaddr (int): virtual address + elf: ELF file parser data + vaddr: virtual address """ idx = bisect.bisect_right([p_vaddr for (p_offset, p_vaddr) in elf.pt_load], vaddr) - 1 p_offset, p_vaddr = elf.pt_load[idx] return p_offset - p_vaddr + vaddr -def parse_pt_dynamic(f, elf): +def parse_pt_dynamic(f: BinaryIO, elf: ElfFile) -> None: """ Parse the dynamic section of an ELF file Arguments: f: file handle - elf (ElfFile): ELF file parse data + elf: ELF file parse data """ dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL") dynamic_array_size = calcsize(dynamic_array_fmt) @@ -347,7 +379,7 @@ def parse_pt_dynamic(f, elf): elf.dt_rpath_str = parse_c_string(string_table, elf.rpath_strtab_offset) -def parse_header(f, elf): +def parse_header(f: BinaryIO, elf: ElfFile) -> None: # Read the 32/64 bit class independent part of the header and validate e_ident = f.read(16) @@ -374,10 +406,12 @@ def parse_header(f, elf): elf_header_fmt = elf.byte_order + ("HHLQQQLHHHHHH" if elf.is_64_bit else "HHLLLLLHHHHHH") hdr_size = calcsize(elf_header_fmt) data = read_exactly(f, hdr_size, "ELF header malformed") - elf.elf_hdr = ElfHeader._make(unpack(elf_header_fmt, data)) + elf.elf_hdr = ElfHeader(*unpack(elf_header_fmt, data)) -def _do_parse_elf(f, interpreter=True, dynamic_section=True, only_header=False): +def _do_parse_elf( + f: BinaryIO, interpreter: bool = True, dynamic_section: bool = True, only_header: bool = False +) -> ElfFile: # We don't (yet?) allow parsing ELF files at a nonzero offset, we just # jump to absolute offsets as they are specified in the ELF file. if f.tell() != 0: @@ -406,7 +440,12 @@ def _do_parse_elf(f, interpreter=True, dynamic_section=True, only_header=False): return elf -def parse_elf(f, interpreter=False, dynamic_section=False, only_header=False): +def parse_elf( + f: BinaryIO, + interpreter: bool = False, + dynamic_section: bool = False, + only_header: bool = False, +) -> ElfFile: """Given a file handle f for an ELF file opened in binary mode, return an ElfFile object that is stores data about rpaths""" try: @@ -417,28 +456,30 @@ def parse_elf(f, interpreter=False, dynamic_section=False, only_header=False): raise ElfParsingError("Malformed ELF file") -def get_rpaths(path): - """Returns list of rpaths of the given file as UTF-8 strings, or None if the file - does not have any rpaths.""" +def get_rpaths(path: str) -> Optional[List[str]]: + """Returns list of rpaths of the given file as UTF-8 strings, or None if not set.""" try: with open(path, "rb") as f: elf = parse_elf(f, interpreter=False, dynamic_section=True) + return elf.dt_rpath_str.decode("utf-8").split(":") if elf.has_rpath else None except ElfParsingError: return None - if not elf.has_rpath: - return None - # If it does, split the string in components - rpath = elf.dt_rpath_str - rpath = rpath.decode("utf-8") - return rpath.split(":") +def get_interpreter(path: str) -> Optional[str]: + """Returns the interpreter of the given file as UTF-8 string, or None if not set.""" + try: + with open(path, "rb") as f: + elf = parse_elf(f, interpreter=True, dynamic_section=False) + return elf.pt_interp_str.decode("utf-8") if elf.has_pt_interp else None + except ElfParsingError: + return None -def delete_rpath(path): - """Modifies a binary to remove the rpath. It zeros out the rpath string - and also drops the DT_R(UN)PATH entry from the dynamic section, so it doesn't - show up in 'readelf -d file', nor in 'strings file'.""" +def delete_rpath(path: str) -> None: + """Modifies a binary to remove the rpath. It zeros out the rpath string and also drops the + DT_R(UN)PATH entry from the dynamic section, so it doesn't show up in 'readelf -d file', nor + in 'strings file'.""" with open(path, "rb+") as f: elf = parse_elf(f, interpreter=False, dynamic_section=True) @@ -476,75 +517,136 @@ def delete_rpath(path): old_offset += dynamic_array_size -def replace_rpath_in_place_or_raise(path, substitutions): - regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys())) +class CStringType: + PT_INTERP = 1 + RPATH = 2 - try: - with open(path, "rb+") as f: - elf = parse_elf(f, interpreter=False, dynamic_section=True) - # If there's no RPATH, then there's no need to replace anything. - if not elf.has_rpath: - return False +class UpdateCStringAction: + def __init__(self, old_value: bytes, new_value: bytes, offset: int): + self.old_value = old_value + self.new_value = new_value + self.offset = offset - # Get the non-empty rpaths. Sometimes there's a bunch of trailing - # colons ::::: used for padding, we don't add them back to make it - # more likely that the string doesn't grow. - rpaths = list(filter(len, elf.dt_rpath_str.split(b":"))) + @property + def inplace(self) -> bool: + return len(self.new_value) <= len(self.old_value) - num_rpaths = len(rpaths) + def apply(self, f: BinaryIO) -> None: + assert self.inplace - if num_rpaths == 0: - return False + f.seek(self.offset) + f.write(self.new_value) - changed = False - for i in range(num_rpaths): - old_rpath = rpaths[i] - match = regex.match(old_rpath) - if match: - changed = True - rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :] + # We zero out the bits we shortened because (a) it should be a + # C-string and (b) it's nice not to have spurious parts of old + # paths in the output of `strings file`. Note that we're all + # good when pad == 0; the original terminating null is used. + f.write(b"\x00" * (len(self.old_value) - len(self.new_value))) - # Nothing to replace! - if not changed: - return False - new_rpath_string = b":".join(rpaths) +def _get_rpath_substitution( + elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes] +) -> Optional[UpdateCStringAction]: + """Make rpath substitutions in-place.""" + # If there's no RPATH, then there's no need to replace anything. + if not elf.has_rpath: + return None + + # Get the non-empty rpaths. Sometimes there's a bunch of trailing + # colons ::::: used for padding, we don't add them back to make it + # more likely that the string doesn't grow. + rpaths = list(filter(len, elf.dt_rpath_str.split(b":"))) - pad = len(elf.dt_rpath_str) - len(new_rpath_string) + num_rpaths = len(rpaths) - if pad < 0: - raise ElfDynamicSectionUpdateFailed(elf.dt_rpath_str, new_rpath_string) + if num_rpaths == 0: + return None + + changed = False + for i in range(num_rpaths): + old_rpath = rpaths[i] + match = regex.match(old_rpath) + if match: + changed = True + rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :] + + # Nothing to replace! + if not changed: + return None + + return UpdateCStringAction( + old_value=elf.dt_rpath_str, + new_value=b":".join(rpaths), + # The rpath is at a given offset in the string table used by the dynamic section. + offset=elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset, + ) + + +def _get_pt_interp_substitution( + elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes] +) -> Optional[UpdateCStringAction]: + """Make interpreter substitutions in-place.""" + if not elf.has_pt_interp: + return None + + match = regex.match(elf.pt_interp_str) + if not match: + return None + + return UpdateCStringAction( + old_value=elf.pt_interp_str, + new_value=substitutions[match.group()] + elf.pt_interp_str[match.end() :], + offset=elf.pt_interp_p_offset, + ) - # We zero out the bits we shortened because (a) it should be a - # C-string and (b) it's nice not to have spurious parts of old - # paths in the output of `strings file`. Note that we're all - # good when pad == 0; the original terminating null is used. - new_rpath_string += b"\x00" * pad - # The rpath is at a given offset in the string table used by the - # dynamic section. - rpath_offset = elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset +def substitute_rpath_and_pt_interp_in_place_or_raise( + path: str, substitutions: Dict[bytes, bytes] +) -> bool: + """Returns true if the rpath and interpreter were modified, false if there was nothing to do. + Raises ElfCStringUpdatesFailed if the ELF file cannot be updated in-place. This exception + contains a list of actions to perform with other tools. The file is left untouched in this + case.""" + regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys())) + + try: + with open(path, "rb+") as f: + elf = parse_elf(f, interpreter=True, dynamic_section=True) + + # Get the actions to perform. + rpath = _get_rpath_substitution(elf, regex, substitutions) + pt_interp = _get_pt_interp_substitution(elf, regex, substitutions) + + # Nothing to do. + if not rpath and not pt_interp: + return False + + # If we can't update in-place, leave it to other tools, don't do partial updates. + if rpath and not rpath.inplace or pt_interp and not pt_interp.inplace: + raise ElfCStringUpdatesFailed(rpath, pt_interp) + + # Otherwise, apply the updates. + if rpath: + rpath.apply(f) + + if pt_interp: + pt_interp.apply(f) - f.seek(rpath_offset) - f.write(new_rpath_string) return True except ElfParsingError: - # This just means the file wasnt an elf file, so there's no point + # This just means the file wasn't an elf file, so there's no point # in updating its rpath anyways; ignore this problem. return False -class ElfDynamicSectionUpdateFailed(Exception): - def __init__(self, old, new): - self.old = old - self.new = new - super().__init__( - "New rpath {} is longer than old rpath {}".format( - new.decode("utf-8"), old.decode("utf-8") - ) - ) +class ElfCStringUpdatesFailed(Exception): + def __init__( + self, rpath: Optional[UpdateCStringAction], pt_interp: Optional[UpdateCStringAction] + ): + self.rpath = rpath + self.pt_interp = pt_interp class ElfParsingError(Exception): |