summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2022-10-31 10:08:16 +0100
committerGitHub <noreply@github.com>2022-10-31 10:08:16 +0100
commit616d5a89d424391d51becf9b89953199c1f622de (patch)
tree57e3e960aece2bd4659340fa7f2407d80ecbd846 /lib
parentfb4be98f263be83591af09fb284405314370a564 (diff)
downloadspack-616d5a89d424391d51becf9b89953199c1f622de.tar.gz
spack-616d5a89d424391d51becf9b89953199c1f622de.tar.bz2
spack-616d5a89d424391d51becf9b89953199c1f622de.tar.xz
spack-616d5a89d424391d51becf9b89953199c1f622de.zip
_replace_prefix_bin performance improvements (#33590)
- single pass over the binary data matching all prefixes - collect offsets and replacement strings - do in-place updates with `fseek` / `fwrite`, since typically our replacement touch O(few bytes) while the file is O(many megabytes) - be nice: leave the file untouched if some string can't be replaced
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/relocate.py47
-rw-r--r--lib/spack/spack/test/relocate.py30
2 files changed, 51 insertions, 26 deletions
diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py
index 0d7525eb05..dca55af17e 100644
--- a/lib/spack/spack/relocate.py
+++ b/lib/spack/spack/relocate.py
@@ -467,40 +467,35 @@ def _replace_prefix_bin(filename, byte_prefixes):
"""Replace all the occurrences of the old install prefix with a
new install prefix in binary files.
- The new install prefix is prefixed with ``os.sep`` until the
+ The new install prefix is prefixed with ``b'/'`` until the
lengths of the prefixes are the same.
Args:
filename (str): target binary file
byte_prefixes (OrderedDict): OrderedDictionary where the keys are
- precompiled regex of the old prefixes and the values are the new
- prefixes (uft-8 encoded)
+ binary strings of the old prefixes and the values are the new
+ binary prefixes
"""
+ all_prefixes = re.compile(b"|".join(re.escape(prefix) for prefix in byte_prefixes.keys()))
+
+ def padded_replacement(old):
+ new = byte_prefixes[old]
+ pad = len(old) - len(new)
+ if pad < 0:
+ raise BinaryTextReplaceError(old, new)
+ return new + b"/" * pad
with open(filename, "rb+") as f:
- data = f.read()
- f.seek(0)
- for orig_bytes, new_bytes in byte_prefixes.items():
- original_data_len = len(data)
- # Skip this hassle if not found
- if orig_bytes not in data:
- continue
- # We only care about this problem if we are about to replace
- length_compatible = len(new_bytes) <= len(orig_bytes)
- if not length_compatible:
- tty.debug("Binary failing to relocate is %s" % filename)
- raise BinaryTextReplaceError(orig_bytes, new_bytes)
- pad_length = len(orig_bytes) - len(new_bytes)
- padding = os.sep * pad_length
- padding = padding.encode("utf-8")
- data = data.replace(orig_bytes, new_bytes + padding)
- # Really needs to be the same length
- if not len(data) == original_data_len:
- print("Length of pad:", pad_length, "should be", len(padding))
- print(new_bytes, "was to replace", orig_bytes)
- raise BinaryStringReplacementError(filename, original_data_len, len(data))
- f.write(data)
- f.truncate()
+ # Register what replacement string to put on what offsets in the file.
+ replacements_at_offset = [
+ (padded_replacement(m.group(0)), m.start())
+ for m in re.finditer(all_prefixes, f.read())
+ ]
+
+ # Apply the replacements
+ for replacement, offset in replacements_at_offset:
+ f.seek(offset)
+ f.write(replacement)
def relocate_macho_binaries(
diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py
index 8e8819ca48..4c9dd8be66 100644
--- a/lib/spack/spack/test/relocate.py
+++ b/lib/spack/spack/test/relocate.py
@@ -7,6 +7,7 @@ import os.path
import re
import shutil
import sys
+from collections import OrderedDict
import pytest
@@ -501,3 +502,32 @@ def test_utf8_paths_to_single_binary_regex():
# Match "unsafe" dir name
string = b"don't match /safe/a/path but do match /safe/[a-z]/file"
assert regex.search(string).group(0) == b"/safe/[a-z]/file"
+
+
+def test_ordered_replacement(tmpdir):
+ # This tests whether binary text replacement respects order, so that
+ # a long package prefix is replaced before a shorter sub-prefix like
+ # the root of the spack store (as a fallback).
+ def replace_and_expect(prefix_map, before, after):
+ file = str(tmpdir.join("file"))
+ with open(file, "wb") as f:
+ f.write(before)
+ spack.relocate._replace_prefix_bin(file, prefix_map)
+ with open(file, "rb") as f:
+ assert f.read() == after
+
+ replace_and_expect(
+ OrderedDict(
+ [(b"/old-spack/opt/specific-package", b"/first"), (b"/old-spack/opt", b"/second")]
+ ),
+ b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
+ b"Binary with /first///////////////////////// and /second///////",
+ )
+
+ replace_and_expect(
+ OrderedDict(
+ [(b"/old-spack/opt", b"/second"), (b"/old-spack/opt/specific-package", b"/first")]
+ ),
+ b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
+ b"Binary with /second////////specific-package and /second///////",
+ )