diff options
author | Harmen Stoppels <harmenstoppels@gmail.com> | 2022-11-04 10:30:53 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-04 02:30:53 -0700 |
commit | 1d4919924de9743f7ec6f47616d2c67d3074648b (patch) | |
tree | a206d8a52f1f0057af7677c9d91bf34102239c7a | |
parent | b1f896e6c75560cdb4f315cd8e68a653d085db63 (diff) | |
download | spack-1d4919924de9743f7ec6f47616d2c67d3074648b.tar.gz spack-1d4919924de9743f7ec6f47616d2c67d3074648b.tar.bz2 spack-1d4919924de9743f7ec6f47616d2c67d3074648b.tar.xz spack-1d4919924de9743f7ec6f47616d2c67d3074648b.zip |
Add in-place RPATH replacement (#27610)
Whenever the rpath string actually _grows_, it falls back to patchelf,
when it stays the same length or gets shorter, we update it in-place,
padded with null bytes.
This PR only deals with absolute -> absolute rpath replacement. We don't
use `_build_tarball(relative=True)` in our CI. If `relative` then it falls
back to the old replacement code.
With this PR, relocation time goes down significantly, likely because patchelf
does some odd things with mmap, causing lots of overhead. Example:
- `binutils`: 700MB installed, goes from `1.91s` to `0.57s`, or `3.4x` faster.
Relocation time: 27% -> 10% of total install time
- `llvm`: 6.8GB installed, goes from `28.56s` to `5.38`, or `5.3x` faster.
Relocation time: 44% -> 13% of total install time
The bottleneck is now decompression.
Note: I'm somewhat confused about the "relative rpath" code paths. Right
now this PR only deals with absolute -> absolute replacement. As far as
I understand, if you embrace relative rpaths when uploading to the
buildcache, the whole point is you _don't_ want to patch rpaths on
install? So it seems fine to not expand `$ORIGIN` again imho.
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 6 | ||||
-rw-r--r-- | lib/spack/spack/relocate.py | 43 | ||||
-rw-r--r-- | lib/spack/spack/test/relocate.py | 18 | ||||
-rw-r--r-- | lib/spack/spack/test/util/elf.py | 55 | ||||
-rw-r--r-- | lib/spack/spack/util/elf.py | 85 |
5 files changed, 155 insertions, 52 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 3069e234b2..3de0bcf9db 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -1634,7 +1634,11 @@ def relocate_package(spec, allow_root): old_prefix, new_prefix, ) - if "elf" in platform.binary_formats: + elif "elf" in platform.binary_formats and not rel: + # The new ELF dynamic section relocation logic only handles absolute to + # absolute relocation. + relocate.new_relocate_elf_binaries(files_to_relocate, prefix_to_prefix_bin) + elif "elf" in platform.binary_formats and rel: relocate.relocate_elf_binaries( files_to_relocate, old_layout_root, diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py index 8a5e77e22e..b8c47b5864 100644 --- a/lib/spack/spack/relocate.py +++ b/lib/spack/spack/relocate.py @@ -7,6 +7,7 @@ import multiprocessing.pool import os import re import shutil +from collections import OrderedDict import macholib.mach_o import macholib.MachO @@ -21,6 +22,7 @@ import spack.bootstrap import spack.platforms import spack.repo import spack.spec +import spack.util.elf as elf import spack.util.executable as executable is_macos = str(spack.platforms.real_host()) == "darwin" @@ -93,27 +95,15 @@ def _patchelf(): def _elf_rpaths_for(path): """Return the RPATHs for an executable or a library. - The RPATHs are obtained by ``patchelf --print-rpath PATH``. - Args: path (str): full path to the executable or library Return: - RPATHs as a list of strings. + RPATHs as a list of strings. Returns an empty array + on ELF parsing errors, or when the ELF file simply + has no rpaths. """ - # If we're relocating patchelf itself, use it - patchelf_path = path if path.endswith("/bin/patchelf") else _patchelf() - patchelf = executable.Executable(patchelf_path) - - output = "" - try: - output = patchelf("--print-rpath", path, output=str, error=str) - output = output.strip("\n") - except executable.ProcessError as e: - msg = "patchelf --print-rpath {0} produced an error [{1}]" - tty.warn(msg.format(path, str(e))) - - return output.split(":") if output else [] + return elf.get_rpaths(path) or [] def _make_relative(reference_file, path_root, paths): @@ -384,15 +374,13 @@ def _set_elf_rpaths(target, rpaths): """Replace the original RPATH of the target with the paths passed as arguments. - This function uses ``patchelf`` to set RPATHs. - Args: target: target executable. Must be an ELF object. rpaths: paths to be set in the RPATH Returns: A string concatenating the stdout and stderr of the call - to ``patchelf`` + to ``patchelf`` if it was invoked """ # Join the paths using ':' as a separator rpaths_str = ":".join(rpaths) @@ -595,6 +583,23 @@ def _transform_rpaths(orig_rpaths, orig_root, new_prefixes): return new_rpaths +def new_relocate_elf_binaries(binaries, prefix_to_prefix): + """Take a list of binaries, and an ordered dictionary of + prefix to prefix mapping, and update the rpaths accordingly.""" + + # Transform to binary string + prefix_to_prefix = OrderedDict( + (k.encode("utf-8"), v.encode("utf-8")) for (k, v) in prefix_to_prefix.items() + ) + + for path in binaries: + try: + elf.replace_rpath_in_place_or_raise(path, prefix_to_prefix) + except elf.ElfDynamicSectionUpdateFailed as e: + # Fall back to the old `patchelf --set-rpath` method. + _set_elf_rpaths(path, e.new.decode("utf-8").split(":")) + + def relocate_elf_binaries( binaries, orig_root, new_root, new_prefixes, rel, orig_prefix, new_prefix ): diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py index 99dd7a460c..da6d800e0a 100644 --- a/lib/spack/spack/test/relocate.py +++ b/lib/spack/spack/test/relocate.py @@ -193,24 +193,6 @@ def test_file_is_relocatable_errors(tmpdir): @pytest.mark.parametrize( - "patchelf_behavior,expected", - [ - ("echo ", []), - ("echo /opt/foo/lib:/opt/foo/lib64", ["/opt/foo/lib", "/opt/foo/lib64"]), - ("exit 1", []), - ], -) -def test_existing_rpaths(patchelf_behavior, expected, mock_patchelf): - # Here we are mocking an executable that is always called "patchelf" - # because that will skip the part where we try to build patchelf - # by ourselves. The executable will output some rpaths like - # `patchelf --print-rpath` would. - path = mock_patchelf(patchelf_behavior) - rpaths = spack.relocate._elf_rpaths_for(path) - assert rpaths == expected - - -@pytest.mark.parametrize( "start_path,path_root,paths,expected", [ ( diff --git a/lib/spack/spack/test/util/elf.py b/lib/spack/spack/test/util/elf.py index cf6b3aeb27..b57477f54b 100644 --- a/lib/spack/spack/test/util/elf.py +++ b/lib/spack/spack/test/util/elf.py @@ -5,6 +5,7 @@ import io +from collections import OrderedDict import pytest @@ -26,15 +27,6 @@ def skip_unless_linux(f): @pytest.mark.requires_executables("gcc") @skip_unless_linux -def test_elf_get_rpaths(binary_with_rpaths): - # Compile an "Hello world!" executable and set RPATHs - long_rpaths = ["/very/long/prefix/x", "/very/long/prefix/y"] - executable = str(binary_with_rpaths(rpaths=long_rpaths)) - assert elf.get_rpaths(executable) == long_rpaths - - -@pytest.mark.requires_executables("gcc") -@skip_unless_linux @pytest.mark.parametrize( "linker_flag,is_runpath", [ @@ -128,3 +120,48 @@ def test_parser_doesnt_deal_with_nonzero_offset(): elf_at_offset_one.read(1) with pytest.raises(elf.ElfParsingError, match="Cannot parse at a nonzero offset"): elf.parse_elf(elf_at_offset_one) + + +@pytest.mark.requires_executables("gcc") +@skip_unless_linux +def test_elf_get_and_replace_rpaths(binary_with_rpaths): + long_rpaths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"] + executable = str(binary_with_rpaths(rpaths=long_rpaths)) + + # Before + assert elf.get_rpaths(executable) == long_rpaths + + replacements = OrderedDict( + [ + (b"/very/long/prefix-a", b"/short-a"), + (b"/very/long/prefix-b", b"/short-b"), + (b"/very/long", b"/dont"), + ] + ) + + # Replace once: should modify the file. + assert elf.replace_rpath_in_place_or_raise(executable, replacements) + + # Replace twice: nothing to be done. + assert not elf.replace_rpath_in_place_or_raise(executable, replacements) + + # Verify the rpaths were modified correctly + assert elf.get_rpaths(executable) == ["/short-a/x", "/short-b/y"] + + # Going back to long rpaths should fail, since we've added trailing \0 + # bytes, and replacement can't assume it can write back in repeated null + # bytes -- it may correspond to zero-length strings for example. + with pytest.raises( + elf.ElfDynamicSectionUpdateFailed, + match="New rpath /very/long/prefix-a/x:/very/long/prefix-b/y is " + "longer than old rpath /short-a/x:/short-b/y", + ): + elf.replace_rpath_in_place_or_raise( + executable, + OrderedDict( + [ + (b"/short-a", b"/very/long/prefix-a"), + (b"/short-b", b"/very/long/prefix-b"), + ] + ), + ) diff --git a/lib/spack/spack/util/elf.py b/lib/spack/spack/util/elf.py index bb9bfbb22d..0b2e5a4e71 100644 --- a/lib/spack/spack/util/elf.py +++ b/lib/spack/spack/util/elf.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import bisect +import re import struct import sys from collections import namedtuple @@ -99,10 +100,6 @@ def get_byte_at(byte_array, idx): return byte_array[idx] -class ElfParsingError(Exception): - pass - - class ElfFile(object): """Parsed ELF file.""" @@ -121,6 +118,7 @@ class ElfFile(object): "has_pt_dynamic", "pt_dynamic_p_offset", "pt_dynamic_p_filesz", + "pt_dynamic_strtab_offset", # string table for dynamic section # rpath "has_rpath", "dt_rpath_offset", @@ -359,7 +357,8 @@ def parse_pt_dynamic(f, elf): if not (elf.has_rpath or elf.has_soname or elf.has_needed): return - string_table = retrieve_strtab(f, elf, vaddr_to_offset(elf, strtab_vaddr)) + elf.pt_dynamic_strtab_offset = vaddr_to_offset(elf, strtab_vaddr) + string_table = retrieve_strtab(f, elf, elf.pt_dynamic_strtab_offset) if elf.has_needed: elf.dt_needed_strs = list( @@ -457,3 +456,79 @@ def get_rpaths(path): if sys.version_info[0] >= 3: rpath = rpath.decode("utf-8") return rpath.split(":") + + +def replace_rpath_in_place_or_raise(path, substitutions): + regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys())) + + try: + with open(path, "rb+") as f: + elf = parse_elf(f, interpreter=False, dynamic_section=True) + + # If there's no RPATH, then there's no need to replace anything. + if not elf.has_rpath: + return False + + # Get the non-empty rpaths. Sometimes there's a bunch of trailing + # colons ::::: used for padding, we don't add them back to make it + # more likely that the string doesn't grow. + rpaths = list(filter(len, elf.dt_rpath_str.split(b":"))) + + num_rpaths = len(rpaths) + + if num_rpaths == 0: + return False + + changed = False + for i in range(num_rpaths): + old_rpath = rpaths[i] + match = regex.match(old_rpath) + if match: + changed = True + rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :] + + # Nothing to replace! + if not changed: + return False + + new_rpath_string = b":".join(rpaths) + + pad = len(elf.dt_rpath_str) - len(new_rpath_string) + + if pad < 0: + raise ElfDynamicSectionUpdateFailed(elf.dt_rpath_str, new_rpath_string) + + # We zero out the bits we shortened because (a) it should be a + # C-string and (b) it's nice not to have spurious parts of old + # paths in the output of `strings file`. Note that we're all + # good when pad == 0; the original terminating null is used. + new_rpath_string += b"\x00" * pad + + # The rpath is at a given offset in the string table used by the + # dynamic section. + rpath_offset = elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset + + f.seek(rpath_offset) + f.write(new_rpath_string) + return True + + except ElfParsingError: + # This just means the file wasnt an elf file, so there's no point + # in updating its rpath anyways; ignore this problem. + return False + + +class ElfDynamicSectionUpdateFailed(Exception): + def __init__(self, old, new): + self.old = old + self.new = new + super(ElfDynamicSectionUpdateFailed, self).__init__( + "New rpath {} is longer than old rpath {}".format( + new.decode("utf-8"), + old.decode("utf-8"), + ) + ) + + +class ElfParsingError(Exception): + pass |