From 9cde25b39e8ca20e262dca2d8f145770db284b73 Mon Sep 17 00:00:00 2001 From: Peter Scheibel Date: Tue, 17 Oct 2023 11:33:59 -0700 Subject: Allow / in GitVersion (#39398) This commit allows version specifiers to refer to git branches that contain forward slashes. For example, the following is valid syntax now: pkg@git.releases/1.0 It also adds a new method `Spec.format_path(fmt)` which is like `Spec.format`, but also maps unsafe characters to `_` after interpolation. The difference is as follows: >>> Spec("pkg@git.releases/1.0").format("{name}/{version}") 'pkg/git.releases/1.0' >>> Spec("pkg@git.releases/1.0").format_path("{name}/{version}") 'pkg/git.releases_1.0' The `format_path` method is used in all projections. Notice that this method also maps `=` to `_` >>> Spec("pkg@git.main=1.0").format_path("{name}/{version}") 'pkg/git.main_1.0' which should avoid syntax issues when `Spec.prefix` is literally copied into a Makefile as sometimes happens in AutotoolsPackage or MakefilePackage --- lib/spack/llnl/util/filesystem.py | 31 +++++++++++++ lib/spack/spack/binary_distribution.py | 12 ++--- lib/spack/spack/cmd/ci.py | 4 +- lib/spack/spack/cmd/install.py | 3 +- lib/spack/spack/directory_layout.py | 2 +- lib/spack/spack/filesystem_view.py | 6 +-- lib/spack/spack/install_test.py | 2 +- lib/spack/spack/modules/common.py | 2 +- lib/spack/spack/modules/lmod.py | 9 ++-- lib/spack/spack/package_base.py | 7 ++- lib/spack/spack/parser.py | 13 ++++-- lib/spack/spack/spec.py | 41 +++++++++++++++++ lib/spack/spack/stage.py | 2 +- lib/spack/spack/test/spec_semantics.py | 80 ++++++++++++++++++++++++++++++++++ lib/spack/spack/test/spec_syntax.py | 8 ++++ lib/spack/spack/test/versions.py | 19 ++++++++ 16 files changed, 214 insertions(+), 27 deletions(-) diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py index 8f4217049d..47c66248b5 100644 --- a/lib/spack/llnl/util/filesystem.py +++ b/lib/spack/llnl/util/filesystem.py @@ -156,6 +156,37 @@ if sys.version_info < (3, 7, 4): shutil.copystat = copystat +def polite_path(components: Iterable[str]): + """ + Given a list of strings which are intended to be path components, + generate a path, and format each component to avoid generating extra + path entries. + + For example all "/", "\", and ":" characters will be replaced with + "_". Other characters like "=" will also be replaced. + """ + return os.path.join(*[polite_filename(x) for x in components]) + + +@memoized +def _polite_antipattern(): + # A regex of all the characters we don't want in a filename + return re.compile(r"[^A-Za-z0-9_.-]") + + +def polite_filename(filename: str) -> str: + """ + Replace generally problematic filename characters with underscores. + + This differs from sanitize_filename in that it is more aggressive in + changing characters in the name. For example it removes "=" which can + confuse path parsing in external tools. + """ + # This character set applies for both Windows and Linux. It does not + # account for reserved filenames in Windows. + return _polite_antipattern().sub("_", filename) + + def getuid(): if sys.platform == "win32": import ctypes diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 5559e89820..7484fee097 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -797,11 +797,7 @@ def tarball_directory_name(spec): Return name of the tarball directory according to the convention -//-/ """ - return os.path.join( - str(spec.architecture), - f"{spec.compiler.name}-{spec.compiler.version}", - f"{spec.name}-{spec.version}", - ) + return spec.format_path("{architecture}/{compiler.name}-{compiler.version}/{name}-{version}") def tarball_name(spec, ext): @@ -809,10 +805,10 @@ def tarball_name(spec, ext): Return the name of the tarfile according to the convention --- """ - return ( - f"{spec.architecture}-{spec.compiler.name}-{spec.compiler.version}-" - f"{spec.name}-{spec.version}-{spec.dag_hash()}{ext}" + spec_formatted = spec.format_path( + "{architecture}-{compiler.name}-{compiler.version}-{name}-{version}-{hash}" ) + return f"{spec_formatted}{ext}" def tarball_path_name(spec, ext): diff --git a/lib/spack/spack/cmd/ci.py b/lib/spack/spack/cmd/ci.py index b30483218a..cf2ee11c04 100644 --- a/lib/spack/spack/cmd/ci.py +++ b/lib/spack/spack/cmd/ci.py @@ -579,7 +579,9 @@ def ci_rebuild(args): "SPACK_COLOR=always", "SPACK_INSTALL_FLAGS={}".format(args_to_string(deps_install_args)), "-j$(nproc)", - "install-deps/{}".format(job_spec.format("{name}-{version}-{hash}")), + "install-deps/{}".format( + ev.depfile.MakefileSpec(job_spec).safe_format("{name}-{version}-{hash}") + ), ], spack_cmd + ["install"] + root_install_args, ] diff --git a/lib/spack/spack/cmd/install.py b/lib/spack/spack/cmd/install.py index 2f49732094..b74f982755 100644 --- a/lib/spack/spack/cmd/install.py +++ b/lib/spack/spack/cmd/install.py @@ -240,8 +240,7 @@ def default_log_file(spec): """Computes the default filename for the log file and creates the corresponding directory if not present """ - fmt = "test-{x.name}-{x.version}-{hash}.xml" - basename = fmt.format(x=spec, hash=spec.dag_hash()) + basename = spec.format_path("test-{name}-{version}-{hash}.xml") dirname = fs.os.path.join(spack.paths.reports_path, "junit") fs.mkdirp(dirname) return fs.os.path.join(dirname, basename) diff --git a/lib/spack/spack/directory_layout.py b/lib/spack/spack/directory_layout.py index 46bb6c8557..c0741a037c 100644 --- a/lib/spack/spack/directory_layout.py +++ b/lib/spack/spack/directory_layout.py @@ -104,7 +104,7 @@ class DirectoryLayout: _check_concrete(spec) projection = spack.projections.get_projection(self.projections, spec) - path = spec.format(projection) + path = spec.format_path(projection) return str(Path(path)) def write_spec(self, spec, path): diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py index f0e79afd7d..e6631fecbf 100644 --- a/lib/spack/spack/filesystem_view.py +++ b/lib/spack/spack/filesystem_view.py @@ -500,7 +500,7 @@ class YamlFilesystemView(FilesystemView): proj = spack.projections.get_projection(self.projections, locator_spec) if proj: - return os.path.join(self._root, locator_spec.format(proj)) + return os.path.join(self._root, locator_spec.format_path(proj)) return self._root def get_all_specs(self): @@ -776,7 +776,7 @@ class SimpleFilesystemView(FilesystemView): spec = spec.package.extendee_spec p = spack.projections.get_projection(self.projections, spec) - return spec.format(p) if p else "" + return spec.format_path(p) if p else "" def get_projection_for_spec(self, spec): """ @@ -791,7 +791,7 @@ class SimpleFilesystemView(FilesystemView): proj = spack.projections.get_projection(self.projections, spec) if proj: - return os.path.join(self._root, spec.format(proj)) + return os.path.join(self._root, spec.format_path(proj)) return self._root diff --git a/lib/spack/spack/install_test.py b/lib/spack/spack/install_test.py index 0d8fa782b6..662a1536c4 100644 --- a/lib/spack/spack/install_test.py +++ b/lib/spack/spack/install_test.py @@ -1039,7 +1039,7 @@ class TestSuite: Returns: str: the install test package identifier """ - return spec.format("{name}-{version}-{hash:7}") + return spec.format_path("{name}-{version}-{hash:7}") @classmethod def test_log_name(cls, spec): diff --git a/lib/spack/spack/modules/common.py b/lib/spack/spack/modules/common.py index 4a3485c864..4b60f52bf4 100644 --- a/lib/spack/spack/modules/common.py +++ b/lib/spack/spack/modules/common.py @@ -586,7 +586,7 @@ class BaseFileLayout: if not projection: projection = self.conf.default_projections["all"] - name = self.spec.format(projection) + name = self.spec.format_path(projection) # Not everybody is working on linux... parts = name.split("/") name = os.path.join(*parts) diff --git a/lib/spack/spack/modules/lmod.py b/lib/spack/spack/modules/lmod.py index 5c001c9ead..d81e07e0bf 100644 --- a/lib/spack/spack/modules/lmod.py +++ b/lib/spack/spack/modules/lmod.py @@ -9,6 +9,7 @@ import os.path import posixpath from typing import Any, Dict, List +import llnl.util.filesystem as fs import llnl.util.lang as lang import spack.compilers @@ -283,8 +284,10 @@ class LmodFileLayout(BaseFileLayout): Returns: str: part of the path associated with the service """ + # General format for the path part - path_part_fmt = os.path.join("{token.name}", "{token.version}") + def path_part_fmt(token): + return fs.polite_path([f"{token.name}", f"{token.version}"]) # If we are dealing with a core compiler, return 'Core' core_compilers = self.conf.core_compilers @@ -296,13 +299,13 @@ class LmodFileLayout(BaseFileLayout): # CompilerSpec does not have a hash, as we are not allowed to # use different flavors of the same compiler if name == "compiler": - return path_part_fmt.format(token=value) + return path_part_fmt(token=value) # In case the hierarchy token refers to a virtual provider # we need to append a hash to the version to distinguish # among flavors of the same library (e.g. openblas~openmp vs. # openblas+openmp) - path = path_part_fmt.format(token=value) + path = path_part_fmt(token=value) path = "-".join([path, value.dag_hash(length=7)]) return path diff --git a/lib/spack/spack/package_base.py b/lib/spack/spack/package_base.py index 940c12c11a..81cc9b8d61 100644 --- a/lib/spack/spack/package_base.py +++ b/lib/spack/spack/package_base.py @@ -991,13 +991,14 @@ class PackageBase(WindowsRPath, PackageViewMixin, metaclass=PackageMeta): return None def _make_resource_stage(self, root_stage, resource): + pretty_resource_name = fsys.polite_filename(f"{resource.name}-{self.version}") return ResourceStage( resource.fetcher, root=root_stage, resource=resource, name=self._resource_stage(resource), mirror_paths=spack.mirror.mirror_archive_paths( - resource.fetcher, os.path.join(self.name, f"{resource.name}-{self.version}") + resource.fetcher, os.path.join(self.name, pretty_resource_name) ), path=self.path, ) @@ -1008,8 +1009,10 @@ class PackageBase(WindowsRPath, PackageViewMixin, metaclass=PackageMeta): def _make_root_stage(self, fetcher): # Construct a mirror path (TODO: get this out of package.py) + format_string = "{name}-{version}" + pretty_name = self.spec.format_path(format_string) mirror_paths = spack.mirror.mirror_archive_paths( - fetcher, os.path.join(self.name, f"{self.name}-{self.version}"), self.spec + fetcher, os.path.join(self.name, pretty_name), self.spec ) # Construct a path where the stage should build.. s = self.spec diff --git a/lib/spack/spack/parser.py b/lib/spack/spack/parser.py index 5e46ddb1b1..7e3532e948 100644 --- a/lib/spack/spack/parser.py +++ b/lib/spack/spack/parser.py @@ -76,7 +76,9 @@ IS_WINDOWS = sys.platform == "win32" IDENTIFIER = r"(?:[a-zA-Z_0-9][a-zA-Z_0-9\-]*)" DOTTED_IDENTIFIER = rf"(?:{IDENTIFIER}(?:\.{IDENTIFIER})+)" GIT_HASH = r"(?:[A-Fa-f0-9]{40})" -GIT_VERSION = rf"(?:(?:git\.(?:{DOTTED_IDENTIFIER}|{IDENTIFIER}))|(?:{GIT_HASH}))" +#: Git refs include branch names, and can contain "." and "/" +GIT_REF = r"(?:[a-zA-Z_0-9][a-zA-Z_0-9./\-]*)" +GIT_VERSION_PATTERN = rf"(?:(?:git\.(?:{GIT_REF}))|(?:{GIT_HASH}))" NAME = r"[a-zA-Z_0-9][a-zA-Z_0-9\-.]*" @@ -127,7 +129,8 @@ class TokenType(TokenBase): # Dependency DEPENDENCY = r"(?:\^)" # Version - VERSION_HASH_PAIR = rf"(?:@(?:{GIT_VERSION})=(?:{VERSION}))" + VERSION_HASH_PAIR = rf"(?:@(?:{GIT_VERSION_PATTERN})=(?:{VERSION}))" + GIT_VERSION = rf"@(?:{GIT_VERSION_PATTERN})" VERSION = rf"(?:@\s*(?:{VERSION_LIST}))" # Variants PROPAGATED_BOOL_VARIANT = rf"(?:(?:\+\+|~~|--)\s*{NAME})" @@ -358,8 +361,10 @@ class SpecNodeParser: compiler_name.strip(), compiler_version ) self.has_compiler = True - elif self.ctx.accept(TokenType.VERSION) or self.ctx.accept( - TokenType.VERSION_HASH_PAIR + elif ( + self.ctx.accept(TokenType.VERSION_HASH_PAIR) + or self.ctx.accept(TokenType.GIT_VERSION) + or self.ctx.accept(TokenType.VERSION) ): if self.has_version: raise spack.spec.MultipleVersionError( diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 08576419f0..07b3e56c7d 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -54,6 +54,7 @@ import enum import io import itertools import os +import pathlib import platform import re import socket @@ -4453,6 +4454,42 @@ class Spec: kwargs.setdefault("color", None) return self.format(*args, **kwargs) + def format_path( + # self, format_string: str, _path_ctor: Optional[pathlib.PurePath] = None + self, + format_string: str, + _path_ctor: Optional[Callable[[Any], pathlib.PurePath]] = None, + ) -> str: + """Given a `format_string` that is intended as a path, generate a string + like from `Spec.format`, but eliminate extra path separators introduced by + formatting of Spec properties. + + Path separators explicitly added to the string are preserved, so for example + "{name}/{version}" would generate a directory based on the Spec's name, and + a subdirectory based on its version; this function guarantees though that + the resulting string would only have two directories (i.e. that if under + normal circumstances that `str(Spec.version)` would contain a path + separator, it would not in this case). + """ + format_component_with_sep = r"\{[^}]*[/\\][^}]*}" + if re.search(format_component_with_sep, format_string): + raise SpecFormatPathError( + f"Invalid path format string: cannot contain {{/...}}\n\t{format_string}" + ) + + path_ctor = _path_ctor or pathlib.PurePath + format_string_as_path = path_ctor(format_string) + if format_string_as_path.is_absolute(): + output_path_components = [format_string_as_path.parts[0]] + input_path_components = list(format_string_as_path.parts[1:]) + else: + output_path_components = [] + input_path_components = list(format_string_as_path.parts) + output_path_components += [ + fs.polite_filename(self.format(x)) for x in input_path_components + ] + return str(path_ctor(*output_path_components)) + def __str__(self): sorted_nodes = [self] + sorted( self.traverse(root=False), key=lambda x: x.name or x.abstract_hash @@ -5379,6 +5416,10 @@ class SpecFormatStringError(spack.error.SpecError): """Called for errors in Spec format strings.""" +class SpecFormatPathError(spack.error.SpecError): + """Called for errors in Spec path-format strings.""" + + class SpecFormatSigilError(SpecFormatStringError): """Called for mismatched sigils and attributes in format strings""" diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 90fb193a63..d53ec5fee8 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -58,7 +58,7 @@ def compute_stage_name(spec): """Determine stage name given a spec""" default_stage_structure = stage_prefix + "{name}-{version}-{hash}" stage_name_structure = spack.config.get("config:stage_name", default=default_stage_structure) - return spec.format(format_string=stage_name_structure) + return spec.format_path(format_string=stage_name_structure) def create_stage_root(path: str) -> None: diff --git a/lib/spack/spack/test/spec_semantics.py b/lib/spack/spack/test/spec_semantics.py index 662ea5ef0e..579ba4486c 100644 --- a/lib/spack/spack/test/spec_semantics.py +++ b/lib/spack/spack/test/spec_semantics.py @@ -3,6 +3,8 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import pathlib + import pytest import spack.directives @@ -1005,6 +1007,84 @@ class TestSpecSemantics: assert new_spec.compiler_flags["cxxflags"] == ["-O1"] +@pytest.mark.parametrize( + "spec_str,format_str,expected", + [ + ("zlib@git.foo/bar", "{name}-{version}", str(pathlib.Path("zlib-git.foo_bar"))), + ("zlib@git.foo/bar", "{name}-{version}-{/hash}", None), + ("zlib@git.foo/bar", "{name}/{version}", str(pathlib.Path("zlib", "git.foo_bar"))), + ( + "zlib@{0}=1.0%gcc".format("a" * 40), + "{name}/{version}/{compiler}", + str(pathlib.Path("zlib", "{0}_1.0".format("a" * 40), "gcc")), + ), + ( + "zlib@git.foo/bar=1.0%gcc", + "{name}/{version}/{compiler}", + str(pathlib.Path("zlib", "git.foo_bar_1.0", "gcc")), + ), + ], +) +def test_spec_format_path(spec_str, format_str, expected): + _check_spec_format_path(spec_str, format_str, expected) + + +def _check_spec_format_path(spec_str, format_str, expected, path_ctor=None): + spec = Spec(spec_str) + if not expected: + with pytest.raises((spack.spec.SpecFormatPathError, spack.spec.SpecFormatStringError)): + spec.format_path(format_str, _path_ctor=path_ctor) + else: + formatted = spec.format_path(format_str, _path_ctor=path_ctor) + assert formatted == expected + + +@pytest.mark.parametrize( + "spec_str,format_str,expected", + [ + ( + "zlib@git.foo/bar", + r"C:\\installroot\{name}\{version}", + r"C:\installroot\zlib\git.foo_bar", + ), + ( + "zlib@git.foo/bar", + r"\\hostname\sharename\{name}\{version}", + r"\\hostname\sharename\zlib\git.foo_bar", + ), + # Windows doesn't attribute any significance to a leading + # "/" so it is discarded + ("zlib@git.foo/bar", r"/installroot/{name}/{version}", r"installroot\zlib\git.foo_bar"), + ], +) +def test_spec_format_path_windows(spec_str, format_str, expected): + _check_spec_format_path(spec_str, format_str, expected, path_ctor=pathlib.PureWindowsPath) + + +@pytest.mark.parametrize( + "spec_str,format_str,expected", + [ + ("zlib@git.foo/bar", r"/installroot/{name}/{version}", "/installroot/zlib/git.foo_bar"), + ("zlib@git.foo/bar", r"//installroot/{name}/{version}", "//installroot/zlib/git.foo_bar"), + # This is likely unintentional on Linux: Firstly, "\" is not a + # path separator for POSIX, so this is treated as a single path + # component (containing literal "\" characters); secondly, + # Spec.format treats "\" as an escape character, so is + # discarded (unless directly following another "\") + ( + "zlib@git.foo/bar", + r"C:\\installroot\package-{name}-{version}", + r"C__installrootpackage-zlib-git.foo_bar", + ), + # "\" is not a POSIX separator, and Spec.format treats "\{" as a literal + # "{", which means that the resulting format string is invalid + ("zlib@git.foo/bar", r"package\{name}\{version}", None), + ], +) +def test_spec_format_path_posix(spec_str, format_str, expected): + _check_spec_format_path(spec_str, format_str, expected, path_ctor=pathlib.PurePosixPath) + + @pytest.mark.regression("3887") @pytest.mark.parametrize("spec_str", ["py-extension2", "extension1", "perl-extension"]) def test_is_extension_after_round_trip_to_dict(config, mock_packages, spec_str): diff --git a/lib/spack/spack/test/spec_syntax.py b/lib/spack/spack/test/spec_syntax.py index b79b829f96..d731fcd31c 100644 --- a/lib/spack/spack/test/spec_syntax.py +++ b/lib/spack/spack/test/spec_syntax.py @@ -517,6 +517,14 @@ def specfile_for(default_mock_concretization): [Token(TokenType.VERSION, value="@:0.4"), Token(TokenType.COMPILER, value="% nvhpc")], "@:0.4%nvhpc", ), + ( + "zlib@git.foo/bar", + [ + Token(TokenType.UNQUALIFIED_PACKAGE_NAME, "zlib"), + Token(TokenType.GIT_VERSION, "@git.foo/bar"), + ], + "zlib@git.foo/bar", + ), ], ) def test_parse_single_spec(spec_str, tokens, expected_roundtrip): diff --git a/lib/spack/spack/test/versions.py b/lib/spack/spack/test/versions.py index 1dcf28cd71..50fcc19798 100644 --- a/lib/spack/spack/test/versions.py +++ b/lib/spack/spack/test/versions.py @@ -675,6 +675,25 @@ def test_git_ref_comparisons(mock_git_version_info, install_mockery, mock_packag assert str(spec_branch.version) == "git.1.x=1.2" +def test_git_branch_with_slash(): + class MockLookup(object): + def get(self, ref): + assert ref == "feature/bar" + return "1.2", 0 + + v = spack.version.from_string("git.feature/bar") + assert isinstance(v, GitVersion) + v.attach_lookup(MockLookup()) + + # Create a version range + test_number_version = spack.version.from_string("1.2") + v.satisfies(test_number_version) + + serialized = VersionList([v]).to_dict() + v_deserialized = VersionList.from_dict(serialized) + assert v_deserialized[0].ref == "feature/bar" + + @pytest.mark.parametrize( "string,git", [ -- cgit v1.2.3-70-g09d2