From b92bdf8c7202429d785e8c867bd83d92df1f3f4a Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Wed, 26 Oct 2022 10:41:31 +0200 Subject: Relocation regex single pass (#33496) Instead of looping over multiple regexes and the entire text file contents, create a giant regex with all literal prefixes and do a single pass over files to detect prefixes. Not only is a single pass faster, it's also likely that the regex is compiled better, given that most prefixes share a common ... prefix. --- lib/spack/spack/binary_distribution.py | 20 ++++++++------------ lib/spack/spack/relocate.py | 8 ++++++-- lib/spack/spack/test/relocate.py | 29 ++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index d22572d8a4..7c62c58cb1 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -42,7 +42,7 @@ import spack.util.spack_yaml as syaml import spack.util.url as url_util import spack.util.web as web_util from spack.caches import misc_cache_location -from spack.relocate import utf8_path_to_binary_regex +from spack.relocate import utf8_paths_to_single_binary_regex from spack.spec import Spec from spack.stage import Stage @@ -694,13 +694,10 @@ class BuildManifestVisitor(BaseDirectoryVisitor): return False -def file_matches_any_binary_regex(path, regexes): +def file_matches(path, regex): with open(path, "rb") as f: contents = f.read() - for regex in regexes: - if regex.search(contents): - return True - return False + return bool(regex.search(contents)) def get_buildfile_manifest(spec): @@ -734,8 +731,8 @@ def get_buildfile_manifest(spec): prefixes.append(spack.hooks.sbang.sbang_install_path()) prefixes.append(str(spack.store.layout.root)) - # Create a list regexes matching collected prefixes - compiled_prefixes = [utf8_path_to_binary_regex(prefix) for prefix in prefixes] + # Create a giant regex that matches all prefixes + regex = utf8_paths_to_single_binary_regex(prefixes) # Symlinks. @@ -767,10 +764,9 @@ def get_buildfile_manifest(spec): data["binary_to_relocate_fullpath"].append(abs_path) continue - elif relocate.needs_text_relocation(m_type, m_subtype): - if file_matches_any_binary_regex(abs_path, compiled_prefixes): - data["text_to_relocate"].append(rel_path) - continue + elif relocate.needs_text_relocation(m_type, m_subtype) and file_matches(abs_path, regex): + data["text_to_relocate"].append(rel_path) + continue data["other"].append(abs_path) diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py index 667e8de4e0..0d7525eb05 100644 --- a/lib/spack/spack/relocate.py +++ b/lib/spack/spack/relocate.py @@ -747,9 +747,13 @@ def relocate_links(links, orig_layout_root, orig_install_prefix, new_install_pre def utf8_path_to_binary_regex(prefix): """Create a (binary) regex that matches the input path in utf8""" prefix_bytes = re.escape(prefix).encode("utf-8") - prefix_rexp = re.compile(b"(?