summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2022-10-26 10:41:31 +0200
committerGitHub <noreply@github.com>2022-10-26 10:41:31 +0200
commitb92bdf8c7202429d785e8c867bd83d92df1f3f4a (patch)
tree3973c91c562708f849adedafe4074d020121c8a7
parenta2520e80c0b2bee62cc4d57090ded2422b96263a (diff)
downloadspack-b92bdf8c7202429d785e8c867bd83d92df1f3f4a.tar.gz
spack-b92bdf8c7202429d785e8c867bd83d92df1f3f4a.tar.bz2
spack-b92bdf8c7202429d785e8c867bd83d92df1f3f4a.tar.xz
spack-b92bdf8c7202429d785e8c867bd83d92df1f3f4a.zip
Relocation regex single pass (#33496)
Instead of looping over multiple regexes and the entire text file contents, create a giant regex with all literal prefixes and do a single pass over files to detect prefixes. Not only is a single pass faster, it's also likely that the regex is compiled better, given that most prefixes share a common ... prefix.
-rw-r--r--lib/spack/spack/binary_distribution.py20
-rw-r--r--lib/spack/spack/relocate.py8
-rw-r--r--lib/spack/spack/test/relocate.py29
3 files changed, 36 insertions, 21 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index d22572d8a4..7c62c58cb1 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -42,7 +42,7 @@ import spack.util.spack_yaml as syaml
import spack.util.url as url_util
import spack.util.web as web_util
from spack.caches import misc_cache_location
-from spack.relocate import utf8_path_to_binary_regex
+from spack.relocate import utf8_paths_to_single_binary_regex
from spack.spec import Spec
from spack.stage import Stage
@@ -694,13 +694,10 @@ class BuildManifestVisitor(BaseDirectoryVisitor):
return False
-def file_matches_any_binary_regex(path, regexes):
+def file_matches(path, regex):
with open(path, "rb") as f:
contents = f.read()
- for regex in regexes:
- if regex.search(contents):
- return True
- return False
+ return bool(regex.search(contents))
def get_buildfile_manifest(spec):
@@ -734,8 +731,8 @@ def get_buildfile_manifest(spec):
prefixes.append(spack.hooks.sbang.sbang_install_path())
prefixes.append(str(spack.store.layout.root))
- # Create a list regexes matching collected prefixes
- compiled_prefixes = [utf8_path_to_binary_regex(prefix) for prefix in prefixes]
+ # Create a giant regex that matches all prefixes
+ regex = utf8_paths_to_single_binary_regex(prefixes)
# Symlinks.
@@ -767,10 +764,9 @@ def get_buildfile_manifest(spec):
data["binary_to_relocate_fullpath"].append(abs_path)
continue
- elif relocate.needs_text_relocation(m_type, m_subtype):
- if file_matches_any_binary_regex(abs_path, compiled_prefixes):
- data["text_to_relocate"].append(rel_path)
- continue
+ elif relocate.needs_text_relocation(m_type, m_subtype) and file_matches(abs_path, regex):
+ data["text_to_relocate"].append(rel_path)
+ continue
data["other"].append(abs_path)
diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py
index 667e8de4e0..0d7525eb05 100644
--- a/lib/spack/spack/relocate.py
+++ b/lib/spack/spack/relocate.py
@@ -747,9 +747,13 @@ def relocate_links(links, orig_layout_root, orig_install_prefix, new_install_pre
def utf8_path_to_binary_regex(prefix):
"""Create a (binary) regex that matches the input path in utf8"""
prefix_bytes = re.escape(prefix).encode("utf-8")
- prefix_rexp = re.compile(b"(?<![\\w\\-_/])([\\w\\-_]*?)%s([\\w\\-_/]*)" % prefix_bytes)
+ return re.compile(b"(?<![\\w\\-_/])([\\w\\-_]*?)%s([\\w\\-_/]*)" % prefix_bytes)
- return prefix_rexp
+
+def utf8_paths_to_single_binary_regex(prefixes):
+ """Create a (binary) regex that matches any input path in utf8"""
+ all_prefixes = b"|".join(re.escape(prefix).encode("utf-8") for prefix in prefixes)
+ return re.compile(b"(?<![\\w\\-_/])([\\w\\-_]*?)(%s)([\\w\\-_/]*)" % all_prefixes)
def unsafe_relocate_text(files, prefixes, concurrency=32):
diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py
index 42e392c1ba..8e8819ca48 100644
--- a/lib/spack/spack/test/relocate.py
+++ b/lib/spack/spack/test/relocate.py
@@ -20,7 +20,7 @@ import spack.spec
import spack.store
import spack.tengine
import spack.util.executable
-from spack.relocate import utf8_path_to_binary_regex
+from spack.relocate import utf8_path_to_binary_regex, utf8_paths_to_single_binary_regex
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="Tests fail on Windows")
@@ -480,9 +480,24 @@ def test_fixup_macos_rpaths(make_dylib, make_object_file):
def test_text_relocation_regex_is_safe():
- assert (
- utf8_path_to_binary_regex("/[a-z]/")
- .search(b"This does not match /a/, but this does: /[a-z]/.")
- .group(0)
- == b"/[a-z]/"
- )
+ # Test whether prefix regex is properly escaped
+ string = b"This does not match /a/, but this does: /[a-z]/."
+ assert utf8_path_to_binary_regex("/[a-z]/").search(string).group(0) == b"/[a-z]/"
+
+
+def test_utf8_paths_to_single_binary_regex():
+ regex = utf8_paths_to_single_binary_regex(["/first/path", "/second/path", "/safe/[a-z]"])
+ # Match nothing
+ assert not regex.search(b"text /neither/first/path text /the/second/path text")
+
+ # Match first
+ string = b"contains both /first/path/subdir and /second/path/sub"
+ assert regex.search(string).group(0) == b"/first/path/subdir"
+
+ # Match second
+ string = b"contains both /not/first/path/subdir but /second/path/subdir"
+ assert regex.search(string).group(0) == b"/second/path/subdir"
+
+ # Match "unsafe" dir name
+ string = b"don't match /safe/a/path but do match /safe/[a-z]/file"
+ assert regex.search(string).group(0) == b"/safe/[a-z]/file"