From 616d5a89d424391d51becf9b89953199c1f622de Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Mon, 31 Oct 2022 10:08:16 +0100 Subject: _replace_prefix_bin performance improvements (#33590) - single pass over the binary data matching all prefixes - collect offsets and replacement strings - do in-place updates with `fseek` / `fwrite`, since typically our replacement touch O(few bytes) while the file is O(many megabytes) - be nice: leave the file untouched if some string can't be replaced --- lib/spack/spack/relocate.py | 47 ++++++++++++++++++---------------------- lib/spack/spack/test/relocate.py | 30 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py index 0d7525eb05..dca55af17e 100644 --- a/lib/spack/spack/relocate.py +++ b/lib/spack/spack/relocate.py @@ -467,40 +467,35 @@ def _replace_prefix_bin(filename, byte_prefixes): """Replace all the occurrences of the old install prefix with a new install prefix in binary files. - The new install prefix is prefixed with ``os.sep`` until the + The new install prefix is prefixed with ``b'/'`` until the lengths of the prefixes are the same. Args: filename (str): target binary file byte_prefixes (OrderedDict): OrderedDictionary where the keys are - precompiled regex of the old prefixes and the values are the new - prefixes (uft-8 encoded) + binary strings of the old prefixes and the values are the new + binary prefixes """ + all_prefixes = re.compile(b"|".join(re.escape(prefix) for prefix in byte_prefixes.keys())) + + def padded_replacement(old): + new = byte_prefixes[old] + pad = len(old) - len(new) + if pad < 0: + raise BinaryTextReplaceError(old, new) + return new + b"/" * pad with open(filename, "rb+") as f: - data = f.read() - f.seek(0) - for orig_bytes, new_bytes in byte_prefixes.items(): - original_data_len = len(data) - # Skip this hassle if not found - if orig_bytes not in data: - continue - # We only care about this problem if we are about to replace - length_compatible = len(new_bytes) <= len(orig_bytes) - if not length_compatible: - tty.debug("Binary failing to relocate is %s" % filename) - raise BinaryTextReplaceError(orig_bytes, new_bytes) - pad_length = len(orig_bytes) - len(new_bytes) - padding = os.sep * pad_length - padding = padding.encode("utf-8") - data = data.replace(orig_bytes, new_bytes + padding) - # Really needs to be the same length - if not len(data) == original_data_len: - print("Length of pad:", pad_length, "should be", len(padding)) - print(new_bytes, "was to replace", orig_bytes) - raise BinaryStringReplacementError(filename, original_data_len, len(data)) - f.write(data) - f.truncate() + # Register what replacement string to put on what offsets in the file. + replacements_at_offset = [ + (padded_replacement(m.group(0)), m.start()) + for m in re.finditer(all_prefixes, f.read()) + ] + + # Apply the replacements + for replacement, offset in replacements_at_offset: + f.seek(offset) + f.write(replacement) def relocate_macho_binaries( diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py index 8e8819ca48..4c9dd8be66 100644 --- a/lib/spack/spack/test/relocate.py +++ b/lib/spack/spack/test/relocate.py @@ -7,6 +7,7 @@ import os.path import re import shutil import sys +from collections import OrderedDict import pytest @@ -501,3 +502,32 @@ def test_utf8_paths_to_single_binary_regex(): # Match "unsafe" dir name string = b"don't match /safe/a/path but do match /safe/[a-z]/file" assert regex.search(string).group(0) == b"/safe/[a-z]/file" + + +def test_ordered_replacement(tmpdir): + # This tests whether binary text replacement respects order, so that + # a long package prefix is replaced before a shorter sub-prefix like + # the root of the spack store (as a fallback). + def replace_and_expect(prefix_map, before, after): + file = str(tmpdir.join("file")) + with open(file, "wb") as f: + f.write(before) + spack.relocate._replace_prefix_bin(file, prefix_map) + with open(file, "rb") as f: + assert f.read() == after + + replace_and_expect( + OrderedDict( + [(b"/old-spack/opt/specific-package", b"/first"), (b"/old-spack/opt", b"/second")] + ), + b"Binary with /old-spack/opt/specific-package and /old-spack/opt", + b"Binary with /first///////////////////////// and /second///////", + ) + + replace_and_expect( + OrderedDict( + [(b"/old-spack/opt", b"/second"), (b"/old-spack/opt/specific-package", b"/first")] + ), + b"Binary with /old-spack/opt/specific-package and /old-spack/opt", + b"Binary with /second////////specific-package and /second///////", + ) -- cgit v1.2.3-60-g2f50