summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2022-11-04 10:30:53 +0100
committerGitHub <noreply@github.com>2022-11-04 02:30:53 -0700
commit1d4919924de9743f7ec6f47616d2c67d3074648b (patch)
treea206d8a52f1f0057af7677c9d91bf34102239c7a
parentb1f896e6c75560cdb4f315cd8e68a653d085db63 (diff)
downloadspack-1d4919924de9743f7ec6f47616d2c67d3074648b.tar.gz
spack-1d4919924de9743f7ec6f47616d2c67d3074648b.tar.bz2
spack-1d4919924de9743f7ec6f47616d2c67d3074648b.tar.xz
spack-1d4919924de9743f7ec6f47616d2c67d3074648b.zip
Add in-place RPATH replacement (#27610)
Whenever the rpath string actually _grows_, it falls back to patchelf, when it stays the same length or gets shorter, we update it in-place, padded with null bytes. This PR only deals with absolute -> absolute rpath replacement. We don't use `_build_tarball(relative=True)` in our CI. If `relative` then it falls back to the old replacement code. With this PR, relocation time goes down significantly, likely because patchelf does some odd things with mmap, causing lots of overhead. Example: - `binutils`: 700MB installed, goes from `1.91s` to `0.57s`, or `3.4x` faster. Relocation time: 27% -> 10% of total install time - `llvm`: 6.8GB installed, goes from `28.56s` to `5.38`, or `5.3x` faster. Relocation time: 44% -> 13% of total install time The bottleneck is now decompression. Note: I'm somewhat confused about the "relative rpath" code paths. Right now this PR only deals with absolute -> absolute replacement. As far as I understand, if you embrace relative rpaths when uploading to the buildcache, the whole point is you _don't_ want to patch rpaths on install? So it seems fine to not expand `$ORIGIN` again imho.
-rw-r--r--lib/spack/spack/binary_distribution.py6
-rw-r--r--lib/spack/spack/relocate.py43
-rw-r--r--lib/spack/spack/test/relocate.py18
-rw-r--r--lib/spack/spack/test/util/elf.py55
-rw-r--r--lib/spack/spack/util/elf.py85
5 files changed, 155 insertions, 52 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index 3069e234b2..3de0bcf9db 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -1634,7 +1634,11 @@ def relocate_package(spec, allow_root):
old_prefix,
new_prefix,
)
- if "elf" in platform.binary_formats:
+ elif "elf" in platform.binary_formats and not rel:
+ # The new ELF dynamic section relocation logic only handles absolute to
+ # absolute relocation.
+ relocate.new_relocate_elf_binaries(files_to_relocate, prefix_to_prefix_bin)
+ elif "elf" in platform.binary_formats and rel:
relocate.relocate_elf_binaries(
files_to_relocate,
old_layout_root,
diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py
index 8a5e77e22e..b8c47b5864 100644
--- a/lib/spack/spack/relocate.py
+++ b/lib/spack/spack/relocate.py
@@ -7,6 +7,7 @@ import multiprocessing.pool
import os
import re
import shutil
+from collections import OrderedDict
import macholib.mach_o
import macholib.MachO
@@ -21,6 +22,7 @@ import spack.bootstrap
import spack.platforms
import spack.repo
import spack.spec
+import spack.util.elf as elf
import spack.util.executable as executable
is_macos = str(spack.platforms.real_host()) == "darwin"
@@ -93,27 +95,15 @@ def _patchelf():
def _elf_rpaths_for(path):
"""Return the RPATHs for an executable or a library.
- The RPATHs are obtained by ``patchelf --print-rpath PATH``.
-
Args:
path (str): full path to the executable or library
Return:
- RPATHs as a list of strings.
+ RPATHs as a list of strings. Returns an empty array
+ on ELF parsing errors, or when the ELF file simply
+ has no rpaths.
"""
- # If we're relocating patchelf itself, use it
- patchelf_path = path if path.endswith("/bin/patchelf") else _patchelf()
- patchelf = executable.Executable(patchelf_path)
-
- output = ""
- try:
- output = patchelf("--print-rpath", path, output=str, error=str)
- output = output.strip("\n")
- except executable.ProcessError as e:
- msg = "patchelf --print-rpath {0} produced an error [{1}]"
- tty.warn(msg.format(path, str(e)))
-
- return output.split(":") if output else []
+ return elf.get_rpaths(path) or []
def _make_relative(reference_file, path_root, paths):
@@ -384,15 +374,13 @@ def _set_elf_rpaths(target, rpaths):
"""Replace the original RPATH of the target with the paths passed
as arguments.
- This function uses ``patchelf`` to set RPATHs.
-
Args:
target: target executable. Must be an ELF object.
rpaths: paths to be set in the RPATH
Returns:
A string concatenating the stdout and stderr of the call
- to ``patchelf``
+ to ``patchelf`` if it was invoked
"""
# Join the paths using ':' as a separator
rpaths_str = ":".join(rpaths)
@@ -595,6 +583,23 @@ def _transform_rpaths(orig_rpaths, orig_root, new_prefixes):
return new_rpaths
+def new_relocate_elf_binaries(binaries, prefix_to_prefix):
+ """Take a list of binaries, and an ordered dictionary of
+ prefix to prefix mapping, and update the rpaths accordingly."""
+
+ # Transform to binary string
+ prefix_to_prefix = OrderedDict(
+ (k.encode("utf-8"), v.encode("utf-8")) for (k, v) in prefix_to_prefix.items()
+ )
+
+ for path in binaries:
+ try:
+ elf.replace_rpath_in_place_or_raise(path, prefix_to_prefix)
+ except elf.ElfDynamicSectionUpdateFailed as e:
+ # Fall back to the old `patchelf --set-rpath` method.
+ _set_elf_rpaths(path, e.new.decode("utf-8").split(":"))
+
+
def relocate_elf_binaries(
binaries, orig_root, new_root, new_prefixes, rel, orig_prefix, new_prefix
):
diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py
index 99dd7a460c..da6d800e0a 100644
--- a/lib/spack/spack/test/relocate.py
+++ b/lib/spack/spack/test/relocate.py
@@ -193,24 +193,6 @@ def test_file_is_relocatable_errors(tmpdir):
@pytest.mark.parametrize(
- "patchelf_behavior,expected",
- [
- ("echo ", []),
- ("echo /opt/foo/lib:/opt/foo/lib64", ["/opt/foo/lib", "/opt/foo/lib64"]),
- ("exit 1", []),
- ],
-)
-def test_existing_rpaths(patchelf_behavior, expected, mock_patchelf):
- # Here we are mocking an executable that is always called "patchelf"
- # because that will skip the part where we try to build patchelf
- # by ourselves. The executable will output some rpaths like
- # `patchelf --print-rpath` would.
- path = mock_patchelf(patchelf_behavior)
- rpaths = spack.relocate._elf_rpaths_for(path)
- assert rpaths == expected
-
-
-@pytest.mark.parametrize(
"start_path,path_root,paths,expected",
[
(
diff --git a/lib/spack/spack/test/util/elf.py b/lib/spack/spack/test/util/elf.py
index cf6b3aeb27..b57477f54b 100644
--- a/lib/spack/spack/test/util/elf.py
+++ b/lib/spack/spack/test/util/elf.py
@@ -5,6 +5,7 @@
import io
+from collections import OrderedDict
import pytest
@@ -26,15 +27,6 @@ def skip_unless_linux(f):
@pytest.mark.requires_executables("gcc")
@skip_unless_linux
-def test_elf_get_rpaths(binary_with_rpaths):
- # Compile an "Hello world!" executable and set RPATHs
- long_rpaths = ["/very/long/prefix/x", "/very/long/prefix/y"]
- executable = str(binary_with_rpaths(rpaths=long_rpaths))
- assert elf.get_rpaths(executable) == long_rpaths
-
-
-@pytest.mark.requires_executables("gcc")
-@skip_unless_linux
@pytest.mark.parametrize(
"linker_flag,is_runpath",
[
@@ -128,3 +120,48 @@ def test_parser_doesnt_deal_with_nonzero_offset():
elf_at_offset_one.read(1)
with pytest.raises(elf.ElfParsingError, match="Cannot parse at a nonzero offset"):
elf.parse_elf(elf_at_offset_one)
+
+
+@pytest.mark.requires_executables("gcc")
+@skip_unless_linux
+def test_elf_get_and_replace_rpaths(binary_with_rpaths):
+ long_rpaths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"]
+ executable = str(binary_with_rpaths(rpaths=long_rpaths))
+
+ # Before
+ assert elf.get_rpaths(executable) == long_rpaths
+
+ replacements = OrderedDict(
+ [
+ (b"/very/long/prefix-a", b"/short-a"),
+ (b"/very/long/prefix-b", b"/short-b"),
+ (b"/very/long", b"/dont"),
+ ]
+ )
+
+ # Replace once: should modify the file.
+ assert elf.replace_rpath_in_place_or_raise(executable, replacements)
+
+ # Replace twice: nothing to be done.
+ assert not elf.replace_rpath_in_place_or_raise(executable, replacements)
+
+ # Verify the rpaths were modified correctly
+ assert elf.get_rpaths(executable) == ["/short-a/x", "/short-b/y"]
+
+ # Going back to long rpaths should fail, since we've added trailing \0
+ # bytes, and replacement can't assume it can write back in repeated null
+ # bytes -- it may correspond to zero-length strings for example.
+ with pytest.raises(
+ elf.ElfDynamicSectionUpdateFailed,
+ match="New rpath /very/long/prefix-a/x:/very/long/prefix-b/y is "
+ "longer than old rpath /short-a/x:/short-b/y",
+ ):
+ elf.replace_rpath_in_place_or_raise(
+ executable,
+ OrderedDict(
+ [
+ (b"/short-a", b"/very/long/prefix-a"),
+ (b"/short-b", b"/very/long/prefix-b"),
+ ]
+ ),
+ )
diff --git a/lib/spack/spack/util/elf.py b/lib/spack/spack/util/elf.py
index bb9bfbb22d..0b2e5a4e71 100644
--- a/lib/spack/spack/util/elf.py
+++ b/lib/spack/spack/util/elf.py
@@ -4,6 +4,7 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import bisect
+import re
import struct
import sys
from collections import namedtuple
@@ -99,10 +100,6 @@ def get_byte_at(byte_array, idx):
return byte_array[idx]
-class ElfParsingError(Exception):
- pass
-
-
class ElfFile(object):
"""Parsed ELF file."""
@@ -121,6 +118,7 @@ class ElfFile(object):
"has_pt_dynamic",
"pt_dynamic_p_offset",
"pt_dynamic_p_filesz",
+ "pt_dynamic_strtab_offset", # string table for dynamic section
# rpath
"has_rpath",
"dt_rpath_offset",
@@ -359,7 +357,8 @@ def parse_pt_dynamic(f, elf):
if not (elf.has_rpath or elf.has_soname or elf.has_needed):
return
- string_table = retrieve_strtab(f, elf, vaddr_to_offset(elf, strtab_vaddr))
+ elf.pt_dynamic_strtab_offset = vaddr_to_offset(elf, strtab_vaddr)
+ string_table = retrieve_strtab(f, elf, elf.pt_dynamic_strtab_offset)
if elf.has_needed:
elf.dt_needed_strs = list(
@@ -457,3 +456,79 @@ def get_rpaths(path):
if sys.version_info[0] >= 3:
rpath = rpath.decode("utf-8")
return rpath.split(":")
+
+
+def replace_rpath_in_place_or_raise(path, substitutions):
+ regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys()))
+
+ try:
+ with open(path, "rb+") as f:
+ elf = parse_elf(f, interpreter=False, dynamic_section=True)
+
+ # If there's no RPATH, then there's no need to replace anything.
+ if not elf.has_rpath:
+ return False
+
+ # Get the non-empty rpaths. Sometimes there's a bunch of trailing
+ # colons ::::: used for padding, we don't add them back to make it
+ # more likely that the string doesn't grow.
+ rpaths = list(filter(len, elf.dt_rpath_str.split(b":")))
+
+ num_rpaths = len(rpaths)
+
+ if num_rpaths == 0:
+ return False
+
+ changed = False
+ for i in range(num_rpaths):
+ old_rpath = rpaths[i]
+ match = regex.match(old_rpath)
+ if match:
+ changed = True
+ rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :]
+
+ # Nothing to replace!
+ if not changed:
+ return False
+
+ new_rpath_string = b":".join(rpaths)
+
+ pad = len(elf.dt_rpath_str) - len(new_rpath_string)
+
+ if pad < 0:
+ raise ElfDynamicSectionUpdateFailed(elf.dt_rpath_str, new_rpath_string)
+
+ # We zero out the bits we shortened because (a) it should be a
+ # C-string and (b) it's nice not to have spurious parts of old
+ # paths in the output of `strings file`. Note that we're all
+ # good when pad == 0; the original terminating null is used.
+ new_rpath_string += b"\x00" * pad
+
+ # The rpath is at a given offset in the string table used by the
+ # dynamic section.
+ rpath_offset = elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset
+
+ f.seek(rpath_offset)
+ f.write(new_rpath_string)
+ return True
+
+ except ElfParsingError:
+ # This just means the file wasnt an elf file, so there's no point
+ # in updating its rpath anyways; ignore this problem.
+ return False
+
+
+class ElfDynamicSectionUpdateFailed(Exception):
+ def __init__(self, old, new):
+ self.old = old
+ self.new = new
+ super(ElfDynamicSectionUpdateFailed, self).__init__(
+ "New rpath {} is longer than old rpath {}".format(
+ new.decode("utf-8"),
+ old.decode("utf-8"),
+ )
+ )
+
+
+class ElfParsingError(Exception):
+ pass