summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2022-11-01 20:42:06 +0100
committerGitHub <noreply@github.com>2022-11-01 19:42:06 +0000
commit230e96fbb8d3e24082ce4f7571f05981be5c148b (patch)
tree56c032cacb50215f6d9d31f11c2aa1d7e62d4439 /lib
parent6b3ea9463097524dfc1b329b0cd9af6d935a02e7 (diff)
downloadspack-230e96fbb8d3e24082ce4f7571f05981be5c148b.tar.gz
spack-230e96fbb8d3e24082ce4f7571f05981be5c148b.tar.bz2
spack-230e96fbb8d3e24082ce4f7571f05981be5c148b.tar.xz
spack-230e96fbb8d3e24082ce4f7571f05981be5c148b.zip
Add elf parsing utility function (#33628)
Introduces `spack.util.elf.parse_elf(file_handle)`
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/test/conftest.py36
-rw-r--r--lib/spack/spack/test/relocate.py56
-rw-r--r--lib/spack/spack/test/util/elf.py130
-rw-r--r--lib/spack/spack/util/elf.py459
4 files changed, 635 insertions, 46 deletions
diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py
index 9ae346c720..1f3c17916c 100644
--- a/lib/spack/spack/test/conftest.py
+++ b/lib/spack/spack/test/conftest.py
@@ -1789,3 +1789,39 @@ def mock_spider_configs(mock_config_data, monkeypatch):
@pytest.fixture(scope="function")
def mock_tty_stdout(monkeypatch):
monkeypatch.setattr(sys.stdout, "isatty", lambda: True)
+
+
+@pytest.fixture()
+def binary_with_rpaths(tmpdir):
+ """Factory fixture that compiles an ELF binary setting its RPATH. Relative
+ paths are encoded with `$ORIGIN` prepended.
+ """
+
+ def _factory(rpaths, message="Hello world!"):
+ source = tmpdir.join("main.c")
+ source.write(
+ """
+ #include <stdio.h>
+ int main(){{
+ printf("{0}");
+ }}
+ """.format(
+ message
+ )
+ )
+ gcc = spack.util.executable.which("gcc")
+ executable = source.dirpath("main.x")
+ # Encode relative RPATHs using `$ORIGIN` as the root prefix
+ rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths]
+ rpath_str = ":".join(rpaths)
+ opts = [
+ "-Wl,--disable-new-dtags",
+ "-Wl,-rpath={0}".format(rpath_str),
+ str(source),
+ "-o",
+ str(executable),
+ ]
+ gcc(*opts)
+ return executable
+
+ return _factory
diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py
index 4c9dd8be66..99dd7a460c 100644
--- a/lib/spack/spack/test/relocate.py
+++ b/lib/spack/spack/test/relocate.py
@@ -83,42 +83,6 @@ def mock_patchelf(tmpdir, mock_executable):
@pytest.fixture()
-def hello_world(tmpdir):
- """Factory fixture that compiles an ELF binary setting its RPATH. Relative
- paths are encoded with `$ORIGIN` prepended.
- """
-
- def _factory(rpaths, message="Hello world!"):
- source = tmpdir.join("main.c")
- source.write(
- """
- #include <stdio.h>
- int main(){{
- printf("{0}");
- }}
- """.format(
- message
- )
- )
- gcc = spack.util.executable.which("gcc")
- executable = source.dirpath("main.x")
- # Encode relative RPATHs using `$ORIGIN` as the root prefix
- rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths]
- rpath_str = ":".join(rpaths)
- opts = [
- "-Wl,--disable-new-dtags",
- "-Wl,-rpath={0}".format(rpath_str),
- str(source),
- "-o",
- str(executable),
- ]
- gcc(*opts)
- return executable
-
- return _factory
-
-
-@pytest.fixture()
def make_dylib(tmpdir_factory):
"""Create a shared library with unfriendly qualities.
@@ -315,9 +279,9 @@ def test_set_elf_rpaths_warning(mock_patchelf):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
-def test_replace_prefix_bin(hello_world):
+def test_replace_prefix_bin(binary_with_rpaths):
# Compile an "Hello world!" executable and set RPATHs
- executable = hello_world(rpaths=["/usr/lib", "/usr/lib64"])
+ executable = binary_with_rpaths(rpaths=["/usr/lib", "/usr/lib64"])
# Relocate the RPATHs
spack.relocate._replace_prefix_bin(str(executable), {b"/usr": b"/foo"})
@@ -328,9 +292,9 @@ def test_replace_prefix_bin(hello_world):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
-def test_relocate_elf_binaries_absolute_paths(hello_world, copy_binary, tmpdir):
+def test_relocate_elf_binaries_absolute_paths(binary_with_rpaths, copy_binary, tmpdir):
# Create an executable, set some RPATHs, copy it to another location
- orig_binary = hello_world(rpaths=[str(tmpdir.mkdir("lib")), "/usr/lib64"])
+ orig_binary = binary_with_rpaths(rpaths=[str(tmpdir.mkdir("lib")), "/usr/lib64"])
new_binary = copy_binary(orig_binary)
spack.relocate.relocate_elf_binaries(
@@ -350,9 +314,9 @@ def test_relocate_elf_binaries_absolute_paths(hello_world, copy_binary, tmpdir):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
-def test_relocate_elf_binaries_relative_paths(hello_world, copy_binary):
+def test_relocate_elf_binaries_relative_paths(binary_with_rpaths, copy_binary):
# Create an executable, set some RPATHs, copy it to another location
- orig_binary = hello_world(rpaths=["lib", "lib64", "/opt/local/lib"])
+ orig_binary = binary_with_rpaths(rpaths=["lib", "lib64", "/opt/local/lib"])
new_binary = copy_binary(orig_binary)
spack.relocate.relocate_elf_binaries(
@@ -371,8 +335,8 @@ def test_relocate_elf_binaries_relative_paths(hello_world, copy_binary):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
-def test_make_elf_binaries_relative(hello_world, copy_binary, tmpdir):
- orig_binary = hello_world(
+def test_make_elf_binaries_relative(binary_with_rpaths, copy_binary, tmpdir):
+ orig_binary = binary_with_rpaths(
rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"]
)
new_binary = copy_binary(orig_binary)
@@ -393,8 +357,8 @@ def test_raise_if_not_relocatable(monkeypatch):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
-def test_relocate_text_bin(hello_world, copy_binary, tmpdir):
- orig_binary = hello_world(
+def test_relocate_text_bin(binary_with_rpaths, copy_binary, tmpdir):
+ orig_binary = binary_with_rpaths(
rpaths=[str(tmpdir.mkdir("lib")), str(tmpdir.mkdir("lib64")), "/opt/local/lib"],
message=str(tmpdir),
)
diff --git a/lib/spack/spack/test/util/elf.py b/lib/spack/spack/test/util/elf.py
new file mode 100644
index 0000000000..cf6b3aeb27
--- /dev/null
+++ b/lib/spack/spack/test/util/elf.py
@@ -0,0 +1,130 @@
+# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+
+import io
+
+import pytest
+
+import llnl.util.filesystem as fs
+
+import spack.platforms
+import spack.util.elf as elf
+import spack.util.executable
+
+
+# note that our elf parser is platform independent... but I guess creating an elf file
+# is slightly more difficult with system tools on non-linux.
+def skip_unless_linux(f):
+ return pytest.mark.skipif(
+ str(spack.platforms.real_host()) != "linux",
+ reason="implementation currently requires linux",
+ )(f)
+
+
+@pytest.mark.requires_executables("gcc")
+@skip_unless_linux
+def test_elf_get_rpaths(binary_with_rpaths):
+ # Compile an "Hello world!" executable and set RPATHs
+ long_rpaths = ["/very/long/prefix/x", "/very/long/prefix/y"]
+ executable = str(binary_with_rpaths(rpaths=long_rpaths))
+ assert elf.get_rpaths(executable) == long_rpaths
+
+
+@pytest.mark.requires_executables("gcc")
+@skip_unless_linux
+@pytest.mark.parametrize(
+ "linker_flag,is_runpath",
+ [
+ ("-Wl,--disable-new-dtags", False),
+ ("-Wl,--enable-new-dtags", True),
+ ],
+)
+def test_elf_parsing_shared_linking(linker_flag, is_runpath, tmpdir):
+ gcc = spack.util.executable.which("gcc")
+
+ with fs.working_dir(str(tmpdir)):
+ # Create a library to link to so we can force a dynamic section in an ELF file
+ with open("foo.c", "w") as f:
+ f.write("int foo(){return 0;}")
+ with open("bar.c", "w") as f:
+ f.write("int foo(); int _start(){return foo();}")
+
+ # Create library and executable linking to it.
+ gcc("-shared", "-o", "libfoo.so", "-Wl,-soname,libfoo.so.1", "-nostdlib", "foo.c")
+ gcc(
+ "-o",
+ "bar",
+ linker_flag,
+ "-Wl,-rpath,/first",
+ "-Wl,-rpath,/second",
+ "-Wl,--no-as-needed",
+ "-nostdlib",
+ "libfoo.so",
+ "bar.c",
+ "-o",
+ "bar",
+ )
+
+ with open("libfoo.so", "rb") as f:
+ foo_parsed = elf.parse_elf(f, interpreter=True, dynamic_section=True)
+
+ assert not foo_parsed.has_pt_interp
+ assert foo_parsed.has_pt_dynamic
+ assert not foo_parsed.has_rpath
+ assert not foo_parsed.has_needed
+ assert foo_parsed.has_soname
+ assert foo_parsed.dt_soname_str == b"libfoo.so.1"
+
+ with open("bar", "rb") as f:
+ bar_parsed = elf.parse_elf(f, interpreter=True, dynamic_section=True)
+
+ assert bar_parsed.has_pt_interp
+ assert bar_parsed.has_pt_dynamic
+ assert bar_parsed.has_rpath
+ assert bar_parsed.has_needed
+ assert not bar_parsed.has_soname
+ assert bar_parsed.dt_rpath_str == b"/first:/second"
+ assert bar_parsed.dt_needed_strs == [b"libfoo.so.1"]
+
+
+def test_broken_elf():
+ # No elf magic
+ with pytest.raises(elf.ElfParsingError, match="Not an ELF file"):
+ elf.parse_elf(io.BytesIO(b"x"))
+
+ # Incomplete ELF header
+ with pytest.raises(elf.ElfParsingError, match="Not an ELF file"):
+ elf.parse_elf(io.BytesIO(b"\x7fELF"))
+
+ # Invalid class
+ with pytest.raises(elf.ElfParsingError, match="Invalid class"):
+ elf.parse_elf(io.BytesIO(b"\x7fELF\x09\x01" + b"\x00" * 10))
+
+ # Invalid data type
+ with pytest.raises(elf.ElfParsingError, match="Invalid data type"):
+ elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x09" + b"\x00" * 10))
+
+ # 64-bit needs at least 64 bytes of header; this is only 56 bytes
+ with pytest.raises(elf.ElfParsingError, match="ELF header malformed"):
+ elf.parse_elf(io.BytesIO(b"\x7fELF\x02\x01" + b"\x00" * 50))
+
+ # 32-bit needs at least 52 bytes of header; this is only 46 bytes
+ with pytest.raises(elf.ElfParsingError, match="ELF header malformed"):
+ elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x01" + b"\x00" * 40))
+
+ # Not a ET_DYN/ET_EXEC on a 32-bit LE ELF
+ with pytest.raises(elf.ElfParsingError, match="Not an ET_DYN or ET_EXEC"):
+ elf.parse_elf(io.BytesIO(b"\x7fELF\x01\x01" + (b"\x00" * 10) + b"\x09" + (b"\x00" * 35)))
+
+
+def test_parser_doesnt_deal_with_nonzero_offset():
+ # Currently we don't have logic to parse ELF files at nonzero offsets in a file
+ # This could be useful when e.g. modifying an ELF file inside a tarball or so,
+ # but currently we cannot.
+ elf_at_offset_one = io.BytesIO(b"\x00\x7fELF\x01\x01" + b"\x00" * 10)
+ elf_at_offset_one.read(1)
+ with pytest.raises(elf.ElfParsingError, match="Cannot parse at a nonzero offset"):
+ elf.parse_elf(elf_at_offset_one)
diff --git a/lib/spack/spack/util/elf.py b/lib/spack/spack/util/elf.py
new file mode 100644
index 0000000000..bb9bfbb22d
--- /dev/null
+++ b/lib/spack/spack/util/elf.py
@@ -0,0 +1,459 @@
+# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import bisect
+import struct
+import sys
+from collections import namedtuple
+from struct import calcsize, unpack, unpack_from
+
+ElfHeader = namedtuple(
+ "ElfHeader",
+ [
+ "e_type",
+ "e_machine",
+ "e_version",
+ "e_entry",
+ "e_phoff",
+ "e_shoff",
+ "e_flags",
+ "e_ehsize",
+ "e_phentsize",
+ "e_phnum",
+ "e_shentsize",
+ "e_shnum",
+ "e_shstrndx",
+ ],
+)
+
+SectionHeader = namedtuple(
+ "SectionHeader",
+ [
+ "sh_name",
+ "sh_type",
+ "sh_flags",
+ "sh_addr",
+ "sh_offset",
+ "sh_size",
+ "sh_link",
+ "sh_info",
+ "sh_addralign",
+ "sh_entsize",
+ ],
+)
+
+ProgramHeader32 = namedtuple(
+ "ProgramHeader32",
+ [
+ "p_type",
+ "p_offset",
+ "p_vaddr",
+ "p_paddr",
+ "p_filesz",
+ "p_memsz",
+ "p_flags",
+ "p_align",
+ ],
+)
+
+ProgramHeader64 = namedtuple(
+ "ProgramHeader64",
+ [
+ "p_type",
+ "p_flags",
+ "p_offset",
+ "p_vaddr",
+ "p_paddr",
+ "p_filesz",
+ "p_memsz",
+ "p_align",
+ ],
+)
+
+
+class ELF_CONSTANTS:
+ MAGIC = b"\x7fELF"
+ CLASS32 = 1
+ CLASS64 = 2
+ DATA2LSB = 1
+ DATA2MSB = 2
+ ET_EXEC = 2
+ ET_DYN = 3
+ PT_LOAD = 1
+ PT_DYNAMIC = 2
+ PT_INTERP = 3
+ DT_NULL = 0
+ DT_NEEDED = 1
+ DT_STRTAB = 5
+ DT_SONAME = 14
+ DT_RPATH = 15
+ DT_RUNPATH = 29
+ SHT_STRTAB = 3
+
+
+def get_byte_at(byte_array, idx):
+ if sys.version_info[0] < 3:
+ return ord(byte_array[idx])
+ return byte_array[idx]
+
+
+class ElfParsingError(Exception):
+ pass
+
+
+class ElfFile(object):
+ """Parsed ELF file."""
+
+ __slots__ = [
+ "is_64_bit",
+ "is_little_endian",
+ "byte_order",
+ "elf_hdr",
+ "pt_load",
+ # pt_interp
+ "has_pt_interp",
+ "pt_interp_p_offset",
+ "pt_interp_p_filesz",
+ "pt_interp_str",
+ # pt_dynamic
+ "has_pt_dynamic",
+ "pt_dynamic_p_offset",
+ "pt_dynamic_p_filesz",
+ # rpath
+ "has_rpath",
+ "dt_rpath_offset",
+ "dt_rpath_str",
+ "rpath_strtab_offset",
+ "is_runpath",
+ # dt needed
+ "has_needed",
+ "dt_needed_strtab_offsets",
+ "dt_needed_strs",
+ # dt soname
+ "has_soname",
+ "dt_soname_strtab_offset",
+ "dt_soname_str",
+ ]
+
+ def __init__(self):
+ self.dt_needed_strtab_offsets = []
+ self.has_soname = False
+ self.has_rpath = False
+ self.has_needed = False
+ self.pt_load = []
+ self.has_pt_dynamic = False
+ self.has_pt_interp = False
+
+
+def parse_c_string(byte_string, start=0):
+ """
+ Retrieve a C-string at a given offset in a byte string
+
+ Arguments:
+ byte_string (bytes): String
+ start (int): Offset into the string
+
+ Returns:
+ bytes: A copy of the C-string excluding the terminating null byte
+ """
+ str_end = byte_string.find(b"\0", start)
+ if str_end == -1:
+ raise ElfParsingError("C-string is not null terminated")
+ return byte_string[start:str_end]
+
+
+def read_exactly(f, num_bytes, msg):
+ """
+ Read exactly num_bytes at the current offset, otherwise raise
+ a parsing error with the given error message.
+
+ Arguments:
+ f: file handle
+ num_bytes (int): Number of bytes to read
+ msg (str): Error to show when bytes cannot be read
+
+ Returns:
+ bytes: the ``num_bytes`` bytes that were read.
+ """
+ data = f.read(num_bytes)
+ if len(data) != num_bytes:
+ raise ElfParsingError(msg)
+ return data
+
+
+def parse_program_headers(f, elf):
+ """
+ Parse program headers
+
+ Arguments:
+ f: file handle
+ elf (ElfFile): ELF file parser data
+ """
+ # Forward to the program header
+ f.seek(elf.elf_hdr.e_phoff)
+
+ # Here we have to make a mapping from virtual address to offset in the file.
+ ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32
+ ph_fmt = elf.byte_order + ("LLQQQQQQ" if elf.is_64_bit else "LLLLLLLL")
+ ph_size = calcsize(ph_fmt)
+ ph_num = elf.elf_hdr.e_phnum
+
+ # Read all program headers in one go
+ data = read_exactly(f, ph_num * ph_size, "Malformed program header")
+
+ for i in range(ph_num):
+ ph = ProgramHeader._make(unpack_from(ph_fmt, data, i * ph_size))
+
+ # Skip segments of size 0; we don't distinguish between missing segment and
+ # empty segments. I've see an empty PT_DYNAMIC section for an ELF file that
+ # contained debug data.
+ if ph.p_filesz == 0:
+ continue
+
+ # For PT_LOAD entries: Save offsets and virtual addrs of the loaded ELF segments
+ # This way we can map offsets by virtual address to offsets in the file.
+ if ph.p_type == ELF_CONSTANTS.PT_LOAD:
+ elf.pt_load.append((ph.p_offset, ph.p_vaddr))
+
+ elif ph.p_type == ELF_CONSTANTS.PT_INTERP:
+ elf.pt_interp_p_offset = ph.p_offset
+ elf.pt_interp_p_filesz = ph.p_filesz
+ elf.has_pt_interp = True
+
+ elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC:
+ elf.pt_dynamic_p_offset = ph.p_offset
+ elf.pt_dynamic_p_filesz = ph.p_filesz
+ elf.has_pt_dynamic = True
+
+ # The linker sorts PT_LOAD segments by vaddr, but let's do it just to be sure, since
+ # patchelf for example has a flag to leave them in an arbitrary order.
+ elf.pt_load.sort(key=lambda x: x[1])
+
+
+def parse_pt_interp(f, elf):
+ """
+ Parse the interpreter (i.e. absolute path to the dynamic linker)
+
+ Arguments:
+ f: file handle
+ elf (ElfFile): ELF file parser data
+ """
+ f.seek(elf.pt_interp_p_offset)
+ data = read_exactly(f, elf.pt_interp_p_filesz, "Malformed PT_INTERP entry")
+ elf.pt_interp_str = parse_c_string(data)
+
+
+def find_strtab_size_at_offset(f, elf, offset):
+ """
+ Retrieve the size of a string table section at a particular known offset
+
+ Arguments:
+ f: file handle
+ elf (ElfFile): ELF file parser data
+ offset (int): offset of the section in the file (i.e. ``sh_offset``)
+
+ Returns:
+ int: the size of the string table in bytes
+ """
+ section_hdr_fmt = elf.byte_order + ("LLQQQQLLQQ" if elf.is_64_bit else "LLLLLLLLLL")
+ section_hdr_size = calcsize(section_hdr_fmt)
+ f.seek(elf.elf_hdr.e_shoff)
+ for _ in range(elf.elf_hdr.e_shnum):
+ data = read_exactly(f, section_hdr_size, "Malformed section header")
+ sh = SectionHeader._make(unpack(section_hdr_fmt, data))
+ if sh.sh_type == ELF_CONSTANTS.SHT_STRTAB and sh.sh_offset == offset:
+ return sh.sh_size
+
+ raise ElfParsingError("Could not determine strtab size")
+
+
+def retrieve_strtab(f, elf, offset):
+ """
+ Read a full string table at the given offset, which
+ requires looking it up in the section headers.
+
+ Arguments:
+ elf (ElfFile): ELF file parser data
+ vaddr (int): virtual address
+
+ Returns:
+ bytes: file offset
+ """
+ size = find_strtab_size_at_offset(f, elf, offset)
+ f.seek(offset)
+ return read_exactly(f, size, "Could not read string table")
+
+
+def vaddr_to_offset(elf, vaddr):
+ """
+ Given a virtual address, find the corresponding offset in the ELF file itself.
+
+ Arguments:
+ elf (ElfFile): ELF file parser data
+ vaddr (int): virtual address
+ """
+ idx = bisect.bisect_right([p_vaddr for (p_offset, p_vaddr) in elf.pt_load], vaddr) - 1
+ p_offset, p_vaddr = elf.pt_load[idx]
+ return p_offset - p_vaddr + vaddr
+
+
+def parse_pt_dynamic(f, elf):
+ """
+ Parse the dynamic section of an ELF file
+
+ Arguments:
+ f: file handle
+ elf (ElfFile): ELF file parse data
+ """
+ dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL")
+ dynamic_array_size = calcsize(dynamic_array_fmt)
+
+ current_offset = elf.pt_dynamic_p_offset
+ count_rpath = 0
+ count_runpath = 0
+ count_strtab = 0
+
+ f.seek(elf.pt_dynamic_p_offset)
+
+ # In case of broken ELF files, don't read beyond the advertized size.
+ for _ in range(elf.pt_dynamic_p_filesz // dynamic_array_size):
+ data = read_exactly(f, dynamic_array_size, "Malformed dynamic array entry")
+ tag, val = unpack(dynamic_array_fmt, data)
+ if tag == ELF_CONSTANTS.DT_NULL:
+ break
+ elif tag == ELF_CONSTANTS.DT_RPATH:
+ count_rpath += 1
+ elf.rpath_strtab_offset = val
+ elf.dt_rpath_offset = current_offset
+ elf.is_runpath = False
+ elf.has_rpath = True
+ elif tag == ELF_CONSTANTS.DT_RUNPATH:
+ count_runpath += 1
+ elf.rpath_strtab_offset = val
+ elf.dt_rpath_offset = current_offset
+ elf.is_runpath = True
+ elf.has_rpath = True
+ elif tag == ELF_CONSTANTS.DT_STRTAB:
+ count_strtab += 1
+ strtab_vaddr = val
+ elif tag == ELF_CONSTANTS.DT_NEEDED:
+ elf.has_needed = True
+ elf.dt_needed_strtab_offsets.append(val)
+ elif tag == ELF_CONSTANTS.DT_SONAME:
+ elf.has_soname = True
+ elf.dt_soname_strtab_offset = val
+ current_offset += dynamic_array_size
+
+ # No rpath/runpath, that happens.
+ if count_rpath == count_runpath == 0:
+ elf.has_rpath = False
+ elif count_rpath + count_runpath != 1:
+ raise ElfParsingError("Could not find a unique rpath/runpath.")
+
+ if count_strtab != 1:
+ raise ElfParsingError("Could not find a unique strtab of for the dynamic section strings")
+
+ # Nothing to retrieve, so don't bother getting the string table.
+ if not (elf.has_rpath or elf.has_soname or elf.has_needed):
+ return
+
+ string_table = retrieve_strtab(f, elf, vaddr_to_offset(elf, strtab_vaddr))
+
+ if elf.has_needed:
+ elf.dt_needed_strs = list(
+ parse_c_string(string_table, offset) for offset in elf.dt_needed_strtab_offsets
+ )
+
+ if elf.has_soname:
+ elf.dt_soname_str = parse_c_string(string_table, elf.dt_soname_strtab_offset)
+
+ if elf.has_rpath:
+ elf.dt_rpath_str = parse_c_string(string_table, elf.rpath_strtab_offset)
+
+
+def parse_header(f, elf):
+ # Read the 32/64 bit class independent part of the header and validate
+ e_ident = f.read(16)
+
+ # Require ELF magic bytes.
+ if len(e_ident) != 16 or e_ident[:4] != ELF_CONSTANTS.MAGIC:
+ raise ElfParsingError("Not an ELF file")
+
+ # Defensively require a valid class and data.
+ e_ident_class, e_ident_data = get_byte_at(e_ident, 4), get_byte_at(e_ident, 5)
+
+ if e_ident_class not in (ELF_CONSTANTS.CLASS32, ELF_CONSTANTS.CLASS64):
+ raise ElfParsingError("Invalid class found")
+
+ if e_ident_data not in (ELF_CONSTANTS.DATA2LSB, ELF_CONSTANTS.DATA2MSB):
+ raise ElfParsingError("Invalid data type")
+
+ elf.is_64_bit = e_ident_class == ELF_CONSTANTS.CLASS64
+ elf.is_little_endian = e_ident_data == ELF_CONSTANTS.DATA2LSB
+
+ # Set up byte order and types for unpacking
+ elf.byte_order = "<" if elf.is_little_endian else ">"
+
+ # Parse the rest of the header
+ elf_header_fmt = elf.byte_order + ("HHLQQQLHHHHHH" if elf.is_64_bit else "HHLLLLLHHHHHH")
+ hdr_size = calcsize(elf_header_fmt)
+ data = read_exactly(f, hdr_size, "ELF header malformed")
+ elf.elf_hdr = ElfHeader._make(unpack(elf_header_fmt, data))
+
+
+def _do_parse_elf(f, interpreter=True, dynamic_section=True):
+ # We don't (yet?) allow parsing ELF files at a nonzero offset, we just
+ # jump to absolute offsets as they are specified in the ELF file.
+ if f.tell() != 0:
+ raise ElfParsingError("Cannot parse at a nonzero offset")
+
+ elf = ElfFile()
+ parse_header(f, elf)
+
+ # We don't handle anything but executables and shared libraries now.
+ if elf.elf_hdr.e_type not in (ELF_CONSTANTS.ET_EXEC, ELF_CONSTANTS.ET_DYN):
+ raise ElfParsingError("Not an ET_DYN or ET_EXEC type")
+
+ parse_program_headers(f, elf)
+
+ # Parse PT_INTERP section
+ if interpreter and elf.has_pt_interp:
+ parse_pt_interp(f, elf)
+
+ # Parse PT_DYNAMIC section.
+ if dynamic_section and elf.has_pt_dynamic and len(elf.pt_load) > 0:
+ parse_pt_dynamic(f, elf)
+
+ return elf
+
+
+def parse_elf(f, interpreter=False, dynamic_section=False):
+ """Given a file handle f for an ELF file opened in binary mode, return an ElfFile
+ object that is stores data about rpaths"""
+ try:
+ return _do_parse_elf(f, interpreter, dynamic_section)
+ except (DeprecationWarning, struct.error):
+ # According to the docs old versions of Python can throw DeprecationWarning
+ # instead of struct.error.
+ raise ElfParsingError("Malformed ELF file")
+
+
+def get_rpaths(path):
+ """Returns list of rpaths of the given file as UTF-8 strings, or None if the file
+ does not have any rpaths."""
+ try:
+ with open(path, "rb") as f:
+ elf = parse_elf(f, interpreter=False, dynamic_section=True)
+ except ElfParsingError:
+ return None
+
+ if not elf.has_rpath:
+ return None
+
+ # If it does, split the string in components
+ rpath = elf.dt_rpath_str
+ if sys.version_info[0] >= 3:
+ rpath = rpath.decode("utf-8")
+ return rpath.split(":")