diff options
author | Harmen Stoppels <harmenstoppels@gmail.com> | 2022-11-01 20:42:06 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-01 19:42:06 +0000 |
commit | 230e96fbb8d3e24082ce4f7571f05981be5c148b (patch) | |
tree | 56c032cacb50215f6d9d31f11c2aa1d7e62d4439 /lib | |
parent | 6b3ea9463097524dfc1b329b0cd9af6d935a02e7 (diff) | |
download | spack-230e96fbb8d3e24082ce4f7571f05981be5c148b.tar.gz spack-230e96fbb8d3e24082ce4f7571f05981be5c148b.tar.bz2 spack-230e96fbb8d3e24082ce4f7571f05981be5c148b.tar.xz spack-230e96fbb8d3e24082ce4f7571f05981be5c148b.zip |
Add elf parsing utility function (#33628)
Introduces `spack.util.elf.parse_elf(file_handle)`
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/test/conftest.py | 36 | ||||
-rw-r--r-- | lib/spack/spack/test/relocate.py | 56 | ||||
-rw-r--r-- | lib/spack/spack/test/util/elf.py | 130 | ||||
-rw-r--r-- | lib/spack/spack/util/elf.py | 459 |
4 files changed, 635 insertions, 46 deletions
diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index 9ae346c720..1f3c17916c 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -1789,3 +1789,39 @@ def mock_spider_configs(mock_config_data, monkeypatch): @pytest.fixture(scope="function") def mock_tty_stdout(monkeypatch): monkeypatch.setattr(sys.stdout, "isatty", lambda: True) + + +@pytest.fixture() +def binary_with_rpaths(tmpdir): + """Factory fixture that compiles an ELF binary setting its RPATH. Relative + paths are encoded with `$ORIGIN` prepended. + """ + + def _factory(rpaths, message="Hello world!"): + source = tmpdir.join("main.c") + source.write( + """ + #include <stdio.h> + int main(){{ + printf("{0}"); + }} + """.format( + message + ) + ) + gcc = spack.util.executable.which("gcc") + executable = source.dirpath("main.x") + # Encode relative RPATHs using `$ORIGIN` as the root prefix + rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths] + rpath_str = ":".join(rpaths) + opts = [ + "-Wl,--disable-new-dtags", + "-Wl,-rpath={0}".format(rpath_str), + str(source), + "-o", + str(executable), + ] + gcc(*opts) + return executable + + return _factory diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py index 4c9dd8be66..99dd7a460c 100644 --- a/lib/spack/spack/test/relocate.py +++ b/lib/spack/spack/test/relocate.py @@ -83,42 +83,6 @@ def mock_patchelf(tmpdir, mock_executable): @pytest.fixture() -def hello_world(tmpdir): - """Factory fixture that compiles an ELF binary setting its RPATH. Relative - paths are encoded with `$ORIGIN` prepended. - """ - - def _factory(rpaths, message="Hello world!"): - source = tmpdir.join("main.c") - source.write( - """ - #include <stdio.h> - int main(){{ - printf("{0}"); - }} - """.format( - message - ) - ) - gcc = spack.util.executable.which("gcc") - executable = source.dirpath("main.x") - # Encode relative RPATHs using `$ORIGIN` as the root prefix - rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths] - rpath_str = ":".join(rpaths) - opts = [ - "-Wl,--disable-new-dtags", - "-Wl,-rpath={0}".format(rpath_str), - str(source), - "-o", - str(executable), - ] - gcc(*opts) - return executable - - return _factory - - -@pytest.fixture() def make_dylib(tmpdir_factory): """Create a shared library with unfriendly qualities. @@ -315,9 +279,9 @@ def test_set_elf_rpaths_warning(mock_patchelf): @pytest.mark.requires_executables("patchelf", "strings", "file", "gcc") @skip_unless_linux -def test_replace_prefix_bin(hello_world): +def test_replace_prefix_bin(binary_with_rpaths): # Compile an "Hello world!" executable and set RPATHs - executable = hello_world(rpaths=["/usr/lib", "/usr/lib64"]) + executable = binary_with_rpaths(rpaths=["/usr/lib", "/usr/lib64"]) # Relocate the RPATHs spack.relocate._replace_prefix_bin(str(executable), {b"/usr": b"/foo"}) @@ -328,9 +292,9 @@ def test_replace_prefix_bin(hello_world): @pytest.mark.requires_executables("patchelf", "strings", "file", "gcc") @skip_unless_linux -def test_relocate_elf_binaries_absolute_paths(hello_world, copy_binary, tmpdir): +def test_relocate_elf_binaries_absolute_paths(binary_with_rpaths, copy_binary, tmpdir): # Create an executable, set some RPATHs, copy it to another location - orig_binary = hello_world(rpaths=[str(tmpdir.mkdir("lib")), "/usr/lib64"]) + orig_binary = binary_with_rpaths(rpaths=[str(tmpdir.mkdir("lib")), "/usr/lib64"]) new_binary = copy_binary(orig_binary) spack.relocate.relocate_elf_binaries( @@ -350,9 +314,9 @@ def test_relocate_elf_binaries_absolute_paths(hello_world, copy_binary, tmpdir): @pytest.mark.requires_executables("patchelf", "strings", "file", "gcc") @skip_unless_linux -def test_relocate_elf_binaries_relative_paths(hello_world, copy_binary): +def test_relocate_elf_binaries_relative_paths(binary_with_rpaths, copy_binary): # Create an executable, set some RPATHs, copy it to another location - orig_binary = hello_world(rpaths=["lib", "lib64", "/opt/local/lib"]) + orig_binary = binary_with_rpaths(rpaths=["lib", "lib64", "/opt/local/lib"]) new_binary = copy_binary(orig_binary) spack.relocate.relocate_elf_binaries( @@ -371,8 +335,8 @@ def test_relocate_elf_binaries_relative_paths(hello_world, copy_binary): @pytest.mark.requires_executables("patchelf", "strings", "file", "gcc") @skip_unless_linux -def test_make_elf_binaries_relative(hello_world, copy_binary, tmpdir): - orig_binary = hello_world( +def test_make_elf_binaries_relative(binary_with_rpaths, copy_binary, tmpdir): + orig_binary = binary_with_rpaths( rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"] ) new_binary = copy_binary(orig_binary) @@ -393,8 +357,8 @@ def test_raise_if_not_relocatable(monkeypatch): @pytest.mark.requires_executables("patchelf", "strings", "file", "gcc") @skip_unless_linux -def test_relocate_text_bin(hello_world, copy_binary, tmpdir): - orig_binary = hello_world( +def test_relocate_text_bin(binary_with_rpaths, copy_binary, tmpdir): + orig_binary = binary_with_rpaths( rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"], message=str(tmpdir), ) diff --git a/lib/spack/spack/test/util/elf.py b/lib/spack/spack/test/util/elf.py new file mode 100644 index 0000000000..cf6b3aeb27 --- /dev/null +++ b/lib/spack/spack/test/util/elf.py @@ -0,0 +1,130 @@ +# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + + +import io + +import pytest + +import llnl.util.filesystem as fs + +import spack.platforms +import spack.util.elf as elf +import spack.util.executable + + +# note that our elf parser is platform independent... but I guess creating an elf file +# is slightly more difficult with system tools on non-linux. +def skip_unless_linux(f): + return pytest.mark.skipif( + str(spack.platforms.real_host()) != "linux", + reason="implementation currently requires linux", + )(f) + + +@pytest.mark.requires_executables("gcc") +@skip_unless_linux +def test_elf_get_rpaths(binary_with_rpaths): + # Compile an "Hello world!" executable and set RPATHs + long_rpaths = ["/very/long/prefix/x", "/very/long/prefix/y"] + executable = str(binary_with_rpaths(rpaths=long_rpaths)) + assert elf.get_rpaths(executable) == long_rpaths + + +@pytest.mark.requires_executables("gcc") +@skip_unless_linux +@pytest.mark.parametrize( + "linker_flag,is_runpath", + [ + ("-Wl,--disable-new-dtags", False), + ("-Wl,--enable-new-dtags", True), + ], +) +def test_elf_parsing_shared_linking(linker_flag, is_runpath, tmpdir): + gcc = spack.util.executable.which("gcc") + + with fs.working_dir(str(tmpdir)): + # Create a library to link to so we can force a dynamic section in an ELF file + with open("foo.c", "w") as f: + f.write("int foo(){return 0;}") + with open("bar.c", "w") as f: + f.write("int foo(); int _start(){return foo();}") + + # Create library and executable linking to it. + gcc("-shared", "-o", "libfoo.so", "-Wl,-soname,libfoo.so.1", "-nostdlib", "foo.c") + gcc( + "-o", + "bar", + linker_flag, + "-Wl,-rpath,/first", + "-Wl,-rpath,/second", + "-Wl,--no-as-needed", + "-nostdlib", + "libfoo.so", + "bar.c", + "-o", + "bar", + ) + + with open("libfoo.so", "rb") as f: + foo_parsed = elf.parse_elf(f, interpreter=True, dynamic_section=True) + + assert not foo_parsed.has_pt_interp + assert foo_parsed.has_pt_dynamic + assert not foo_parsed.has_rpath + assert not foo_parsed.has_needed + assert foo_parsed.has_soname + assert foo_parsed.dt_soname_str == b"libfoo.so.1" + + with open("bar", "rb") as f: + bar_parsed = elf.parse_elf(f, interpreter=True, dynamic_section=True) + + assert bar_parsed.has_pt_interp + assert bar_parsed.has_pt_dynamic + assert bar_parsed.has_rpath + assert bar_parsed.has_needed + assert not bar_parsed.has_soname + assert bar_parsed.dt_rpath_str == b"/first:/second" + assert bar_parsed.dt_needed_strs == [b"libfoo.so.1"] + + +def test_broken_elf(): + # No elf magic + with pytest.raises(elf.ElfParsingError, match="Not an ELF file"): + elf.parse_elf(io.BytesIO(b"x")) + + # Incomplete ELF header + with pytest.raises(elf.ElfParsingError, match="Not an ELF file"): + elf.parse_elf(io.BytesIO(b"\x7fELF")) + + # Invalid class + with pytest.raises(elf.ElfParsingError, match="Invalid class"): + elf.parse_elf(io.BytesIO(b"\x7fELF\x09\x01" + b"\x00" * 10)) + + # Invalid data type + with pytest.raises(elf.ElfParsingError, match="Invalid data type"): + elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x09" + b"\x00" * 10)) + + # 64-bit needs at least 64 bytes of header; this is only 56 bytes + with pytest.raises(elf.ElfParsingError, match="ELF header malformed"): + elf.parse_elf(io.BytesIO(b"\x7fELF\x02\x01" + b"\x00" * 50)) + + # 32-bit needs at least 52 bytes of header; this is only 46 bytes + with pytest.raises(elf.ElfParsingError, match="ELF header malformed"): + elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x01" + b"\x00" * 40)) + + # Not a ET_DYN/ET_EXEC on a 32-bit LE ELF + with pytest.raises(elf.ElfParsingError, match="Not an ET_DYN or ET_EXEC"): + elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x01" + (b"\x00" * 10) + b"\x09" + (b"\x00" * 35))) + + +def test_parser_doesnt_deal_with_nonzero_offset(): + # Currently we don't have logic to parse ELF files at nonzero offsets in a file + # This could be useful when e.g. modifying an ELF file inside a tarball or so, + # but currently we cannot. + elf_at_offset_one = io.BytesIO(b"\x00\x7fELF\x01\x01" + b"\x00" * 10) + elf_at_offset_one.read(1) + with pytest.raises(elf.ElfParsingError, match="Cannot parse at a nonzero offset"): + elf.parse_elf(elf_at_offset_one) diff --git a/lib/spack/spack/util/elf.py b/lib/spack/spack/util/elf.py new file mode 100644 index 0000000000..bb9bfbb22d --- /dev/null +++ b/lib/spack/spack/util/elf.py @@ -0,0 +1,459 @@ +# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import bisect +import struct +import sys +from collections import namedtuple +from struct import calcsize, unpack, unpack_from + +ElfHeader = namedtuple( + "ElfHeader", + [ + "e_type", + "e_machine", + "e_version", + "e_entry", + "e_phoff", + "e_shoff", + "e_flags", + "e_ehsize", + "e_phentsize", + "e_phnum", + "e_shentsize", + "e_shnum", + "e_shstrndx", + ], +) + +SectionHeader = namedtuple( + "SectionHeader", + [ + "sh_name", + "sh_type", + "sh_flags", + "sh_addr", + "sh_offset", + "sh_size", + "sh_link", + "sh_info", + "sh_addralign", + "sh_entsize", + ], +) + +ProgramHeader32 = namedtuple( + "ProgramHeader32", + [ + "p_type", + "p_offset", + "p_vaddr", + "p_paddr", + "p_filesz", + "p_memsz", + "p_flags", + "p_align", + ], +) + +ProgramHeader64 = namedtuple( + "ProgramHeader64", + [ + "p_type", + "p_flags", + "p_offset", + "p_vaddr", + "p_paddr", + "p_filesz", + "p_memsz", + "p_align", + ], +) + + +class ELF_CONSTANTS: + MAGIC = b"\x7fELF" + CLASS32 = 1 + CLASS64 = 2 + DATA2LSB = 1 + DATA2MSB = 2 + ET_EXEC = 2 + ET_DYN = 3 + PT_LOAD = 1 + PT_DYNAMIC = 2 + PT_INTERP = 3 + DT_NULL = 0 + DT_NEEDED = 1 + DT_STRTAB = 5 + DT_SONAME = 14 + DT_RPATH = 15 + DT_RUNPATH = 29 + SHT_STRTAB = 3 + + +def get_byte_at(byte_array, idx): + if sys.version_info[0] < 3: + return ord(byte_array[idx]) + return byte_array[idx] + + +class ElfParsingError(Exception): + pass + + +class ElfFile(object): + """Parsed ELF file.""" + + __slots__ = [ + "is_64_bit", + "is_little_endian", + "byte_order", + "elf_hdr", + "pt_load", + # pt_interp + "has_pt_interp", + "pt_interp_p_offset", + "pt_interp_p_filesz", + "pt_interp_str", + # pt_dynamic + "has_pt_dynamic", + "pt_dynamic_p_offset", + "pt_dynamic_p_filesz", + # rpath + "has_rpath", + "dt_rpath_offset", + "dt_rpath_str", + "rpath_strtab_offset", + "is_runpath", + # dt needed + "has_needed", + "dt_needed_strtab_offsets", + "dt_needed_strs", + # dt soname + "has_soname", + "dt_soname_strtab_offset", + "dt_soname_str", + ] + + def __init__(self): + self.dt_needed_strtab_offsets = [] + self.has_soname = False + self.has_rpath = False + self.has_needed = False + self.pt_load = [] + self.has_pt_dynamic = False + self.has_pt_interp = False + + +def parse_c_string(byte_string, start=0): + """ + Retrieve a C-string at a given offset in a byte string + + Arguments: + byte_string (bytes): String + start (int): Offset into the string + + Returns: + bytes: A copy of the C-string excluding the terminating null byte + """ + str_end = byte_string.find(b"\0", start) + if str_end == -1: + raise ElfParsingError("C-string is not null terminated") + return byte_string[start:str_end] + + +def read_exactly(f, num_bytes, msg): + """ + Read exactly num_bytes at the current offset, otherwise raise + a parsing error with the given error message. + + Arguments: + f: file handle + num_bytes (int): Number of bytes to read + msg (str): Error to show when bytes cannot be read + + Returns: + bytes: the ``num_bytes`` bytes that were read. + """ + data = f.read(num_bytes) + if len(data) != num_bytes: + raise ElfParsingError(msg) + return data + + +def parse_program_headers(f, elf): + """ + Parse program headers + + Arguments: + f: file handle + elf (ElfFile): ELF file parser data + """ + # Forward to the program header + f.seek(elf.elf_hdr.e_phoff) + + # Here we have to make a mapping from virtual address to offset in the file. + ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32 + ph_fmt = elf.byte_order + ("LLQQQQQQ" if elf.is_64_bit else "LLLLLLLL") + ph_size = calcsize(ph_fmt) + ph_num = elf.elf_hdr.e_phnum + + # Read all program headers in one go + data = read_exactly(f, ph_num * ph_size, "Malformed program header") + + for i in range(ph_num): + ph = ProgramHeader._make(unpack_from(ph_fmt, data, i * ph_size)) + + # Skip segments of size 0; we don't distinguish between missing segment and + # empty segments. I've see an empty PT_DYNAMIC section for an ELF file that + # contained debug data. + if ph.p_filesz == 0: + continue + + # For PT_LOAD entries: Save offsets and virtual addrs of the loaded ELF segments + # This way we can map offsets by virtual address to offsets in the file. + if ph.p_type == ELF_CONSTANTS.PT_LOAD: + elf.pt_load.append((ph.p_offset, ph.p_vaddr)) + + elif ph.p_type == ELF_CONSTANTS.PT_INTERP: + elf.pt_interp_p_offset = ph.p_offset + elf.pt_interp_p_filesz = ph.p_filesz + elf.has_pt_interp = True + + elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC: + elf.pt_dynamic_p_offset = ph.p_offset + elf.pt_dynamic_p_filesz = ph.p_filesz + elf.has_pt_dynamic = True + + # The linker sorts PT_LOAD segments by vaddr, but let's do it just to be sure, since + # patchelf for example has a flag to leave them in an arbitrary order. + elf.pt_load.sort(key=lambda x: x[1]) + + +def parse_pt_interp(f, elf): + """ + Parse the interpreter (i.e. absolute path to the dynamic linker) + + Arguments: + f: file handle + elf (ElfFile): ELF file parser data + """ + f.seek(elf.pt_interp_p_offset) + data = read_exactly(f, elf.pt_interp_p_filesz, "Malformed PT_INTERP entry") + elf.pt_interp_str = parse_c_string(data) + + +def find_strtab_size_at_offset(f, elf, offset): + """ + Retrieve the size of a string table section at a particular known offset + + Arguments: + f: file handle + elf (ElfFile): ELF file parser data + offset (int): offset of the section in the file (i.e. ``sh_offset``) + + Returns: + int: the size of the string table in bytes + """ + section_hdr_fmt = elf.byte_order + ("LLQQQQLLQQ" if elf.is_64_bit else "LLLLLLLLLL") + section_hdr_size = calcsize(section_hdr_fmt) + f.seek(elf.elf_hdr.e_shoff) + for _ in range(elf.elf_hdr.e_shnum): + data = read_exactly(f, section_hdr_size, "Malformed section header") + sh = SectionHeader._make(unpack(section_hdr_fmt, data)) + if sh.sh_type == ELF_CONSTANTS.SHT_STRTAB and sh.sh_offset == offset: + return sh.sh_size + + raise ElfParsingError("Could not determine strtab size") + + +def retrieve_strtab(f, elf, offset): + """ + Read a full string table at the given offset, which + requires looking it up in the section headers. + + Arguments: + elf (ElfFile): ELF file parser data + vaddr (int): virtual address + + Returns: + bytes: file offset + """ + size = find_strtab_size_at_offset(f, elf, offset) + f.seek(offset) + return read_exactly(f, size, "Could not read string table") + + +def vaddr_to_offset(elf, vaddr): + """ + Given a virtual address, find the corresponding offset in the ELF file itself. + + Arguments: + elf (ElfFile): ELF file parser data + vaddr (int): virtual address + """ + idx = bisect.bisect_right([p_vaddr for (p_offset, p_vaddr) in elf.pt_load], vaddr) - 1 + p_offset, p_vaddr = elf.pt_load[idx] + return p_offset - p_vaddr + vaddr + + +def parse_pt_dynamic(f, elf): + """ + Parse the dynamic section of an ELF file + + Arguments: + f: file handle + elf (ElfFile): ELF file parse data + """ + dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL") + dynamic_array_size = calcsize(dynamic_array_fmt) + + current_offset = elf.pt_dynamic_p_offset + count_rpath = 0 + count_runpath = 0 + count_strtab = 0 + + f.seek(elf.pt_dynamic_p_offset) + + # In case of broken ELF files, don't read beyond the advertized size. + for _ in range(elf.pt_dynamic_p_filesz // dynamic_array_size): + data = read_exactly(f, dynamic_array_size, "Malformed dynamic array entry") + tag, val = unpack(dynamic_array_fmt, data) + if tag == ELF_CONSTANTS.DT_NULL: + break + elif tag == ELF_CONSTANTS.DT_RPATH: + count_rpath += 1 + elf.rpath_strtab_offset = val + elf.dt_rpath_offset = current_offset + elf.is_runpath = False + elf.has_rpath = True + elif tag == ELF_CONSTANTS.DT_RUNPATH: + count_runpath += 1 + elf.rpath_strtab_offset = val + elf.dt_rpath_offset = current_offset + elf.is_runpath = True + elf.has_rpath = True + elif tag == ELF_CONSTANTS.DT_STRTAB: + count_strtab += 1 + strtab_vaddr = val + elif tag == ELF_CONSTANTS.DT_NEEDED: + elf.has_needed = True + elf.dt_needed_strtab_offsets.append(val) + elif tag == ELF_CONSTANTS.DT_SONAME: + elf.has_soname = True + elf.dt_soname_strtab_offset = val + current_offset += dynamic_array_size + + # No rpath/runpath, that happens. + if count_rpath == count_runpath == 0: + elf.has_rpath = False + elif count_rpath + count_runpath != 1: + raise ElfParsingError("Could not find a unique rpath/runpath.") + + if count_strtab != 1: + raise ElfParsingError("Could not find a unique strtab of for the dynamic section strings") + + # Nothing to retrieve, so don't bother getting the string table. + if not (elf.has_rpath or elf.has_soname or elf.has_needed): + return + + string_table = retrieve_strtab(f, elf, vaddr_to_offset(elf, strtab_vaddr)) + + if elf.has_needed: + elf.dt_needed_strs = list( + parse_c_string(string_table, offset) for offset in elf.dt_needed_strtab_offsets + ) + + if elf.has_soname: + elf.dt_soname_str = parse_c_string(string_table, elf.dt_soname_strtab_offset) + + if elf.has_rpath: + elf.dt_rpath_str = parse_c_string(string_table, elf.rpath_strtab_offset) + + +def parse_header(f, elf): + # Read the 32/64 bit class independent part of the header and validate + e_ident = f.read(16) + + # Require ELF magic bytes. + if len(e_ident) != 16 or e_ident[:4] != ELF_CONSTANTS.MAGIC: + raise ElfParsingError("Not an ELF file") + + # Defensively require a valid class and data. + e_ident_class, e_ident_data = get_byte_at(e_ident, 4), get_byte_at(e_ident, 5) + + if e_ident_class not in (ELF_CONSTANTS.CLASS32, ELF_CONSTANTS.CLASS64): + raise ElfParsingError("Invalid class found") + + if e_ident_data not in (ELF_CONSTANTS.DATA2LSB, ELF_CONSTANTS.DATA2MSB): + raise ElfParsingError("Invalid data type") + + elf.is_64_bit = e_ident_class == ELF_CONSTANTS.CLASS64 + elf.is_little_endian = e_ident_data == ELF_CONSTANTS.DATA2LSB + + # Set up byte order and types for unpacking + elf.byte_order = "<" if elf.is_little_endian else ">" + + # Parse the rest of the header + elf_header_fmt = elf.byte_order + ("HHLQQQLHHHHHH" if elf.is_64_bit else "HHLLLLLHHHHHH") + hdr_size = calcsize(elf_header_fmt) + data = read_exactly(f, hdr_size, "ELF header malformed") + elf.elf_hdr = ElfHeader._make(unpack(elf_header_fmt, data)) + + +def _do_parse_elf(f, interpreter=True, dynamic_section=True): + # We don't (yet?) allow parsing ELF files at a nonzero offset, we just + # jump to absolute offsets as they are specified in the ELF file. + if f.tell() != 0: + raise ElfParsingError("Cannot parse at a nonzero offset") + + elf = ElfFile() + parse_header(f, elf) + + # We don't handle anything but executables and shared libraries now. + if elf.elf_hdr.e_type not in (ELF_CONSTANTS.ET_EXEC, ELF_CONSTANTS.ET_DYN): + raise ElfParsingError("Not an ET_DYN or ET_EXEC type") + + parse_program_headers(f, elf) + + # Parse PT_INTERP section + if interpreter and elf.has_pt_interp: + parse_pt_interp(f, elf) + + # Parse PT_DYNAMIC section. + if dynamic_section and elf.has_pt_dynamic and len(elf.pt_load) > 0: + parse_pt_dynamic(f, elf) + + return elf + + +def parse_elf(f, interpreter=False, dynamic_section=False): + """Given a file handle f for an ELF file opened in binary mode, return an ElfFile + object that is stores data about rpaths""" + try: + return _do_parse_elf(f, interpreter, dynamic_section) + except (DeprecationWarning, struct.error): + # According to the docs old versions of Python can throw DeprecationWarning + # instead of struct.error. + raise ElfParsingError("Malformed ELF file") + + +def get_rpaths(path): + """Returns list of rpaths of the given file as UTF-8 strings, or None if the file + does not have any rpaths.""" + try: + with open(path, "rb") as f: + elf = parse_elf(f, interpreter=False, dynamic_section=True) + except ElfParsingError: + return None + + if not elf.has_rpath: + return None + + # If it does, split the string in components + rpath = elf.dt_rpath_str + if sys.version_info[0] >= 3: + rpath = rpath.decode("utf-8") + return rpath.split(":") |