summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorVanessasaurus <814322+vsoch@users.noreply.github.com>2021-09-14 23:12:34 -0600
committerGitHub <noreply@github.com>2021-09-14 22:12:34 -0700
commitef5ad4eb34b351ba650ca92409c0295d5569efe4 (patch)
tree2c345a82de2a42ba8843be00d9bd9f35a8903781 /lib
parentc3bc3e61aa68d9ac408056f62b7ae9bc6516fd69 (diff)
downloadspack-ef5ad4eb34b351ba650ca92409c0295d5569efe4.tar.gz
spack-ef5ad4eb34b351ba650ca92409c0295d5569efe4.tar.bz2
spack-ef5ad4eb34b351ba650ca92409c0295d5569efe4.tar.xz
spack-ef5ad4eb34b351ba650ca92409c0295d5569efe4.zip
Adding ability to compare git references to spack install (#24639)
This will allow a user to (from anywhere a Spec is parsed including both name and version) refer to a git commit in lieu of a package version, and be able to make comparisons with releases in the history based on commits (or with other commits). We do this by way of: - Adding a property, is_commit, to a version, meaning I can always check if a version is a commit and then change some action. - Adding an attribute to the Version object which can lookup commits from a git repo and find the last known version before that commit, and the distance - Construct new Version comparators, which are tuples. For normal versions, they are unchanged. For commits with a previous version x.y.z, d commits away, the comparator is (x, y, z, '', d). For commits with no previous version, the comparator is ('', d) where d is the distance from the first commit in the repo. - Metadata on git commits is cached in the misc_cache, for quick lookup later. - Git repos are cached as bare repos in `~/.spack/git_repos` - In both caches, git repo urls are turned into file paths within the cache If a commit cannot be found in the cached git repo, we fetch from the repo. If a commit is found in the cached metadata, we do not recompare to newly downloaded tags (assuming repo structure does not change). The cached metadata may be thrown out by using the `spack clean -m` option if you know the repo structure has changed in a way that invalidates existing entries. Future work will include automatic updates. # Finding previous versions Spack will search the repo for any tags that match the string of a version given by the `version` directive. Spack will also search for any tags that match `v + string` for any version string. Beyond that, Spack will search for tags that match a SEMVER regex (i.e., tags of the form x.y.z) and interpret those tags as valid versions as well. Future work will increase the breadth of tags understood by Spack For each tag, Spack queries git to determine whether the tag is an ancestor of the commit in question or not. Spack then sorts the tags that are ancestors of the commit by commit-distance in the repo, and takes the nearest ancestor. The version represented by that tag is listed as the previous version for the commit. Not all commits will find a previous version, depending on the package workflow. Future work may enable more tangential relationships between commits and versions to be discovered, but many commits in real world git repos require human knowledge to associate with a most recent previous version. Future work will also allow packages to specify commit/tag/version relationships manually for such situations. # Version comparisons. The empty string is a valid component of a Spack version tuple, and is in fact the lowest-valued component. It cannot be generated as part of any valid version. These two characteristics make it perfect for delineating previous versions from distances. For any version x.y.z, (x, y, z, '', _) will be less than any "real" version beginning x.y.z. This ensures that no distance from a release will cause the commit to be interpreted as "greater than" a version which is not an ancestor of it. Signed-off-by: vsoch <vsoch@users.noreply.github.com> Co-authored-by: vsoch <vsoch@users.noreply.github.com> Co-authored-by: Gregory Becker <becker33@llnl.gov> Co-authored-by: Todd Gamblin <tgamblin@llnl.gov>
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/fetch_strategy.py87
-rw-r--r--lib/spack/spack/paths.py3
-rw-r--r--lib/spack/spack/spec.py9
-rw-r--r--lib/spack/spack/test/cmd/install.py29
-rw-r--r--lib/spack/spack/test/conftest.py97
-rw-r--r--lib/spack/spack/test/git_fetch.py2
-rw-r--r--lib/spack/spack/test/util/util_url.py72
-rw-r--r--lib/spack/spack/test/versions.py26
-rw-r--r--lib/spack/spack/url.py4
-rw-r--r--lib/spack/spack/util/url.py53
-rw-r--r--lib/spack/spack/version.py275
11 files changed, 614 insertions, 43 deletions
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index 136d786e3f..6db41e2328 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -49,10 +49,10 @@ import spack.util.crypto as crypto
import spack.util.pattern as pattern
import spack.util.url as url_util
import spack.util.web as web_util
+import spack.version
from spack.util.compression import decompressor_for, extension
from spack.util.executable import CommandNotFoundError, which
from spack.util.string import comma_and, quote
-from spack.version import Version, ver
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
@@ -750,7 +750,7 @@ class GoFetchStrategy(VCSFetchStrategy):
@property
def go_version(self):
vstring = self.go('version', output=str).split(' ')[2]
- return Version(vstring)
+ return spack.version.Version(vstring)
@property
def go(self):
@@ -843,7 +843,7 @@ class GitFetchStrategy(VCSFetchStrategy):
"""
version_output = git_exe('--version', output=str)
m = re.search(GitFetchStrategy.git_version_re, version_output)
- return Version(m.group(1))
+ return spack.version.Version(m.group(1))
@property
def git(self):
@@ -852,7 +852,7 @@ class GitFetchStrategy(VCSFetchStrategy):
# Disable advice for a quieter fetch
# https://github.com/git/git/blob/master/Documentation/RelNotes/1.7.2.txt
- if self.git_version >= Version('1.7.2'):
+ if self.git_version >= spack.version.Version('1.7.2'):
self._git.add_default_arg('-c')
self._git.add_default_arg('advice.detachedHead=false')
@@ -895,25 +895,52 @@ class GitFetchStrategy(VCSFetchStrategy):
tty.debug('Already fetched {0}'.format(self.stage.source_path))
return
+ self.clone(commit=self.commit, branch=self.branch, tag=self.tag)
+
+ def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False):
+ """
+ Clone a repository to a path.
+
+ This method handles cloning from git, but does not require a stage.
+
+ Arguments:
+ dest (str or None): The path into which the code is cloned. If None,
+ requires a stage and uses the stage's source path.
+ commit (str or None): A commit to fetch from the remote. Only one of
+ commit, branch, and tag may be non-None.
+ branch (str or None): A branch to fetch from the remote.
+ tag (str or None): A tag to fetch from the remote.
+ bare (bool): Execute a "bare" git clone (--bare option to git)
+ """
+ # Default to spack source path
+ dest = dest or self.stage.source_path
tty.debug('Cloning git repository: {0}'.format(self._repo_info()))
git = self.git
- if self.commit:
+ debug = spack.config.get('config:debug')
+
+ if bare:
+ # We don't need to worry about which commit/branch/tag is checked out
+ clone_args = ['clone', '--bare']
+ if not debug:
+ clone_args.append('--quiet')
+ clone_args.extend([self.url, dest])
+ git(*clone_args)
+ elif commit:
# Need to do a regular clone and check out everything if
# they asked for a particular commit.
- debug = spack.config.get('config:debug')
-
clone_args = ['clone', self.url]
if not debug:
clone_args.insert(1, '--quiet')
with temp_cwd():
git(*clone_args)
repo_name = get_single_file('.')
- self.stage.srcdir = repo_name
- shutil.move(repo_name, self.stage.source_path)
+ if self.stage:
+ self.stage.srcdir = repo_name
+ shutil.move(repo_name, dest)
- with working_dir(self.stage.source_path):
- checkout_args = ['checkout', self.commit]
+ with working_dir(dest):
+ checkout_args = ['checkout', commit]
if not debug:
checkout_args.insert(1, '--quiet')
git(*checkout_args)
@@ -921,18 +948,18 @@ class GitFetchStrategy(VCSFetchStrategy):
else:
# Can be more efficient if not checking out a specific commit.
args = ['clone']
- if not spack.config.get('config:debug'):
+ if not debug:
args.append('--quiet')
# If we want a particular branch ask for it.
- if self.branch:
- args.extend(['--branch', self.branch])
- elif self.tag and self.git_version >= ver('1.8.5.2'):
- args.extend(['--branch', self.tag])
+ if branch:
+ args.extend(['--branch', branch])
+ elif tag and self.git_version >= spack.version.ver('1.8.5.2'):
+ args.extend(['--branch', tag])
# Try to be efficient if we're using a new enough git.
# This checks out only one branch's history
- if self.git_version >= ver('1.7.10'):
+ if self.git_version >= spack.version.ver('1.7.10'):
if self.get_full_repo:
args.append('--no-single-branch')
else:
@@ -942,7 +969,7 @@ class GitFetchStrategy(VCSFetchStrategy):
# Yet more efficiency: only download a 1-commit deep
# tree, if the in-use git and protocol permit it.
if (not self.get_full_repo) and \
- self.git_version >= ver('1.7.1') and \
+ self.git_version >= spack.version.ver('1.7.1') and \
self.protocol_supports_shallow_clone():
args.extend(['--depth', '1'])
@@ -950,14 +977,15 @@ class GitFetchStrategy(VCSFetchStrategy):
git(*args)
repo_name = get_single_file('.')
- self.stage.srcdir = repo_name
- shutil.move(repo_name, self.stage.source_path)
+ if self.stage:
+ self.stage.srcdir = repo_name
+ shutil.move(repo_name, dest)
- with working_dir(self.stage.source_path):
+ with working_dir(dest):
# For tags, be conservative and check them out AFTER
# cloning. Later git versions can do this with clone
# --branch, but older ones fail.
- if self.tag and self.git_version < ver('1.8.5.2'):
+ if tag and self.git_version < spack.version.ver('1.8.5.2'):
# pull --tags returns a "special" error code of 1 in
# older versions that we have to ignore.
# see: https://github.com/git/git/commit/19d122b
@@ -971,7 +999,7 @@ class GitFetchStrategy(VCSFetchStrategy):
git(*co_args)
if self.submodules_delete:
- with working_dir(self.stage.source_path):
+ with working_dir(dest):
for submodule_to_delete in self.submodules_delete:
args = ['rm', submodule_to_delete]
if not spack.config.get('config:debug'):
@@ -980,7 +1008,7 @@ class GitFetchStrategy(VCSFetchStrategy):
# Init submodules if the user asked for them.
if self.submodules:
- with working_dir(self.stage.source_path):
+ with working_dir(dest):
args = ['submodule', 'update', '--init', '--recursive']
if not spack.config.get('config:debug'):
args.insert(1, '--quiet')
@@ -1502,8 +1530,15 @@ def for_package_version(pkg, version):
check_pkg_attributes(pkg)
- if not isinstance(version, Version):
- version = Version(version)
+ if not isinstance(version, spack.version.Version):
+ version = spack.version.Version(version)
+
+ # if it's a commit, we must use a GitFetchStrategy
+ if version.is_commit and hasattr(pkg, "git"):
+ # Populate the version with comparisons to other commits
+ version.generate_commit_lookup(pkg)
+ fetcher = GitFetchStrategy(git=pkg.git, commit=str(version))
+ return fetcher
# If it's not a known version, try to extrapolate one by URL
if version not in pkg.versions:
diff --git a/lib/spack/spack/paths.py b/lib/spack/spack/paths.py
index 76eb9dfdac..627be54bd7 100644
--- a/lib/spack/spack/paths.py
+++ b/lib/spack/spack/paths.py
@@ -56,6 +56,9 @@ user_bootstrap_path = os.path.join(user_config_path, 'bootstrap')
reports_path = os.path.join(user_config_path, "reports")
monitor_path = os.path.join(reports_path, "monitor")
+# We cache repositories (git) in first, extracted metadata in second
+user_repos_cache_path = os.path.join(user_config_path, 'git_repos')
+
opt_path = os.path.join(prefix, "opt")
etc_path = os.path.join(prefix, "etc")
system_etc_path = '/etc'
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index 567a8215fb..b026a3ab93 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -4699,6 +4699,15 @@ class SpecParser(spack.parse.Parser):
except spack.parse.ParseError as e:
raise SpecParseError(e)
+ # Generate lookups for git-commit-based versions
+ for spec in specs:
+ # Cannot do lookups for versions in anonymous specs
+ # Only allow concrete versions using git for now
+ if spec.name and spec.versions.concrete and spec.version.is_commit:
+ pkg = spec.package
+ if hasattr(pkg, 'git'):
+ spec.version.generate_commit_lookup(pkg)
+
return specs
def spec_from_file(self):
diff --git a/lib/spack/spack/test/cmd/install.py b/lib/spack/spack/test/cmd/install.py
index 4378662a98..a8a8c8b556 100644
--- a/lib/spack/spack/test/cmd/install.py
+++ b/lib/spack/spack/test/cmd/install.py
@@ -21,6 +21,7 @@ import spack.config
import spack.environment as ev
import spack.hash_types as ht
import spack.package
+import spack.util.executable
from spack.error import SpackError
from spack.main import SpackCommand
from spack.spec import CompilerSpec, Spec
@@ -224,7 +225,7 @@ def test_install_overwrite(
def test_install_overwrite_not_installed(
- mock_packages, mock_archive, mock_fetch, config, install_mockery
+ mock_packages, mock_archive, mock_fetch, config, install_mockery,
):
# Try to install a spec and then to reinstall it.
spec = Spec('libdwarf')
@@ -236,6 +237,32 @@ def test_install_overwrite_not_installed(
assert os.path.exists(spec.prefix)
+def test_install_commit(
+ mock_git_version_info, install_mockery, mock_packages, monkeypatch):
+ """
+ Test installing a git package from a commit.
+
+ This ensures Spack appropriately associates commit versions with their
+ packages in time to do version lookups. Details of version lookup tested elsewhere
+ """
+ repo_path, filename, commits = mock_git_version_info
+ monkeypatch.setattr(spack.package.PackageBase,
+ 'git', 'file://%s' % repo_path,
+ raising=False)
+
+ commit = commits[-1]
+ spec = spack.spec.Spec('git-test-commit@%s' % commit)
+ spec.concretize()
+ spec.package.do_install()
+
+ # Ensure first commit file contents were written
+ installed = os.listdir(spec.prefix.bin)
+ assert filename in installed
+ with open(spec.prefix.bin.join(filename), 'r') as f:
+ content = f.read().strip()
+ assert content == '[]' # contents are weird for another test
+
+
def test_install_overwrite_multiple(
mock_packages, mock_archive, mock_fetch, config, install_mockery
):
diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py
index 9e44547ce1..cd6ce907e7 100644
--- a/lib/spack/spack/test/conftest.py
+++ b/lib/spack/spack/test/conftest.py
@@ -61,6 +61,103 @@ def last_two_git_commits(scope='session'):
yield regex.findall(git_log_out)
+def write_file(filename, contents):
+ with open(filename, 'w') as f:
+ f.write(contents)
+
+
+commit_counter = 0
+
+
+@pytest.fixture
+def mock_git_version_info(tmpdir, scope="function"):
+ """Create a mock git repo with known structure
+
+ The structure of commits in this repo is as follows::
+
+ | o fourth 1.x commit (1.2)
+ | o third 1.x commit
+ | |
+ o | fourth main commit (v2.0)
+ o | third main commit
+ | |
+ | o second 1.x commit (v1.1)
+ | o first 1.x commit
+ | /
+ |/
+ o second commit (v1.0)
+ o first commit
+
+ The repo consists of a single file, in which the Version._cmp representation
+ of each commit is expressed as a string.
+
+ Important attributes of the repo for test coverage are: multiple branches,
+ version tags on multiple branches, and version order is not equal to time
+ order or topological order.
+ """
+ git = spack.util.executable.which('git', required=True)
+ repo_path = str(tmpdir.mkdir('git_repo'))
+ filename = 'file.txt'
+
+ def commit(message):
+ global commit_counter
+ git('commit', '--date', '2020-01-%02d 12:0:00 +0300' % commit_counter,
+ '-am', message)
+ commit_counter += 1
+
+ with working_dir(repo_path):
+ git("init")
+
+ git('config', 'user.name', 'Spack')
+ git('config', 'user.email', 'spack@spack.io')
+
+ # Add two commits on main branch
+ write_file(filename, '[]')
+ git('add', filename)
+ commit('first commit')
+
+ # Get name of default branch (differs by git version)
+ main = git('rev-parse', '--abbrev-ref', 'HEAD', output=str, error=str).strip()
+
+ # Tag second commit as v1.0
+ write_file(filename, "[1, 0]")
+ commit('second commit')
+ git('tag', 'v1.0')
+
+ # Add two commits and a tag on 1.x branch
+ git('checkout', '-b', '1.x')
+ write_file(filename, "[1, 0, '', 1]")
+ commit('first 1.x commit')
+
+ write_file(filename, "[1, 1]")
+ commit('second 1.x commit')
+ git('tag', 'v1.1')
+
+ # Add two commits and a tag on main branch
+ git('checkout', main)
+ write_file(filename, "[1, 0, '', 1]")
+ commit('third main commit')
+ write_file(filename, "[2, 0]")
+ commit('fourth main commit')
+ git('tag', 'v2.0')
+
+ # Add two more commits on 1.x branch to ensure we aren't cheating by using time
+ git('checkout', '1.x')
+ write_file(filename, "[1, 1, '', 1]")
+ commit('third 1.x commit')
+ write_file(filename, "[1, 2]")
+ commit('fourth 1.x commit')
+ git('tag', '1.2') # test robust parsing to different syntax, no v
+
+ # Get the commits in topo order
+ log = git('log', '--all', '--pretty=format:%H', '--topo-order',
+ output=str, error=str)
+ commits = [c for c in log.split('\n') if c]
+
+ # Return the git directory to install, the filename used, and the commits
+ yield repo_path, filename, commits
+
+
@pytest.fixture(autouse=True)
def clear_recorded_monkeypatches():
yield
diff --git a/lib/spack/spack/test/git_fetch.py b/lib/spack/spack/test/git_fetch.py
index 1ac7b473c7..9c4c993953 100644
--- a/lib/spack/spack/test/git_fetch.py
+++ b/lib/spack/spack/test/git_fetch.py
@@ -212,7 +212,7 @@ def test_get_full_repo(get_full_repo, git_version, mock_git_repository,
ncommits = len(commits)
if get_full_repo:
- assert(nbranches == 5)
+ assert(nbranches >= 5)
assert(ncommits == 2)
else:
assert(nbranches == 2)
diff --git a/lib/spack/spack/test/util/util_url.py b/lib/spack/spack/test/util/util_url.py
index 3ac8708e70..12b713ba4b 100644
--- a/lib/spack/spack/test/util/util_url.py
+++ b/lib/spack/spack/test/util/util_url.py
@@ -7,6 +7,8 @@
import os
import os.path
+import pytest
+
import spack.paths
import spack.util.url as url_util
@@ -303,3 +305,73 @@ def test_url_join_absolute_paths():
assert(url_util.join(*args, resolve_href=False) ==
'http://example.com/path/resource')
+
+
+@pytest.mark.parametrize("url,parts", [
+ ("ssh://user@host.xz:500/path/to/repo.git/",
+ ("ssh", "user", "host.xz", 500, "/path/to/repo.git")),
+ ("ssh://user@host.xz/path/to/repo.git/",
+ ("ssh", "user", "host.xz", None, "/path/to/repo.git")),
+ ("ssh://host.xz:500/path/to/repo.git/",
+ ("ssh", None, "host.xz", 500, "/path/to/repo.git")),
+ ("ssh://host.xz/path/to/repo.git/",
+ ("ssh", None, "host.xz", None, "/path/to/repo.git")),
+ ("ssh://user@host.xz/path/to/repo.git/",
+ ("ssh", "user", "host.xz", None, "/path/to/repo.git")),
+ ("ssh://host.xz/path/to/repo.git/",
+ ("ssh", None, "host.xz", None, "/path/to/repo.git")),
+ ("ssh://user@host.xz/~user/path/to/repo.git/",
+ ("ssh", "user", "host.xz", None, "~user/path/to/repo.git")),
+ ("ssh://host.xz/~user/path/to/repo.git/",
+ ("ssh", None, "host.xz", None, "~user/path/to/repo.git")),
+ ("ssh://user@host.xz/~/path/to/repo.git",
+ ("ssh", "user", "host.xz", None, "~/path/to/repo.git")),
+ ("ssh://host.xz/~/path/to/repo.git",
+ ("ssh", None, "host.xz", None, "~/path/to/repo.git")),
+ ("git@github.com:spack/spack.git",
+ (None, "git", "github.com", None, "spack/spack.git")),
+ ("user@host.xz:/path/to/repo.git/",
+ (None, "user", "host.xz", None, "/path/to/repo.git")),
+ ("host.xz:/path/to/repo.git/",
+ (None, None, "host.xz", None, "/path/to/repo.git")),
+ ("user@host.xz:~user/path/to/repo.git/",
+ (None, "user", "host.xz", None, "~user/path/to/repo.git")),
+ ("host.xz:~user/path/to/repo.git/",
+ (None, None, "host.xz", None, "~user/path/to/repo.git")),
+ ("user@host.xz:path/to/repo.git",
+ (None, "user", "host.xz", None, "path/to/repo.git")),
+ ("host.xz:path/to/repo.git",
+ (None, None, "host.xz", None, "path/to/repo.git")),
+ ("rsync://host.xz/path/to/repo.git/",
+ ("rsync", None, "host.xz", None, "/path/to/repo.git")),
+ ("git://host.xz/path/to/repo.git/",
+ ("git", None, "host.xz", None, "/path/to/repo.git")),
+ ("git://host.xz/~user/path/to/repo.git/",
+ ("git", None, "host.xz", None, "~user/path/to/repo.git")),
+ ("http://host.xz/path/to/repo.git/",
+ ("http", None, "host.xz", None, "/path/to/repo.git")),
+ ("https://host.xz/path/to/repo.git/",
+ ("https", None, "host.xz", None, "/path/to/repo.git")),
+ ("https://github.com/spack/spack",
+ ("https", None, "github.com", None, "/spack/spack")),
+ ("https://github.com/spack/spack/",
+ ("https", None, "github.com", None, "/spack/spack")),
+ ("file:///path/to/repo.git/",
+ ("file", None, None, None, "/path/to/repo.git")),
+ ("file://~/path/to/repo.git/",
+ ("file", None, None, None, "~/path/to/repo.git")),
+ # bad ports should give us None
+ ("ssh://host.xz:port/path/to/repo.git/", None),
+ # bad ports should give us None
+ ("ssh://host-foo.xz:port/path/to/repo.git/", None),
+ # regular file paths should give us None
+ ("/path/to/repo.git/", None),
+ ("path/to/repo.git/", None),
+ ("~/path/to/repo.git", None),
+])
+def test_git_url_parse(url, parts):
+ if parts is None:
+ with pytest.raises(ValueError):
+ url_util.parse_git_url(url)
+ else:
+ assert parts == url_util.parse_git_url(url)
diff --git a/lib/spack/spack/test/versions.py b/lib/spack/spack/test/versions.py
index 8e6d5430ee..a3d11108ef 100644
--- a/lib/spack/spack/test/versions.py
+++ b/lib/spack/spack/test/versions.py
@@ -7,8 +7,15 @@
We try to maintain compatibility with RPM's version semantics
where it makes sense.
"""
+import os
+
import pytest
+from llnl.util.filesystem import working_dir
+
+import spack.package
+import spack.spec
+from spack.util.executable import which
from spack.version import Version, VersionList, ver
@@ -576,3 +583,22 @@ def test_invalid_versions(version_str):
"""Ensure invalid versions are rejected with a ValueError"""
with pytest.raises(ValueError):
Version(version_str)
+
+
+def test_versions_from_git(mock_git_version_info, monkeypatch, mock_packages):
+ repo_path, filename, commits = mock_git_version_info
+ monkeypatch.setattr(spack.package.PackageBase, 'git', 'file://%s' % repo_path,
+ raising=False)
+
+ for commit in commits:
+ spec = spack.spec.Spec('git-test-commit@%s' % commit)
+ version = spec.version
+ comparator = [str(v) if not isinstance(v, int) else v
+ for v in version._cmp(version.commit_lookup)]
+
+ with working_dir(repo_path):
+ which('git')('checkout', commit)
+ with open(os.path.join(repo_path, filename), 'r') as f:
+ expected = f.read()
+
+ assert str(comparator) == expected
diff --git a/lib/spack/spack/url.py b/lib/spack/spack/url.py
index bfe74318f6..6e78bd6cb8 100644
--- a/lib/spack/spack/url.py
+++ b/lib/spack/spack/url.py
@@ -36,7 +36,7 @@ from llnl.util.tty.color import cescape, colorize
import spack.error
import spack.util.compression as comp
-from spack.version import Version
+import spack.version
#
@@ -621,7 +621,7 @@ def parse_version(path):
UndetectableVersionError: If the URL does not match any regexes
"""
version, start, length, i, regex = parse_version_offset(path)
- return Version(version)
+ return spack.version.Version(version)
def parse_name_offset(path, v=None):
diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py
index 141b4b8093..72fc331fe3 100644
--- a/lib/spack/spack/util/url.py
+++ b/lib/spack/spack/util/url.py
@@ -248,3 +248,56 @@ def _join(base_url, path, *extra, **kwargs):
params=params,
query=query,
fragment=None))
+
+
+git_re = (
+ r"^(?:([a-z]+)://)?" # 1. optional scheme
+ r"(?:([^@]+)@)?" # 2. optional user
+ r"([^:/~]+)?" # 3. optional hostname
+ r"(?(1)(?::([^:/]+))?|:)" # 4. :<optional port> if scheme else :
+ r"(.*[^/])/?$" # 5. path
+)
+
+
+def parse_git_url(url):
+ """Parse git URL into components.
+
+ This parses URLs that look like:
+
+ * ``https://host.com:443/path/to/repo.git``, or
+ * ``git@host.com:path/to/repo.git``
+
+ Anything not matching those patterns is likely a local
+ file or invalid.
+
+ Returned components are as follows (optional values can be ``None``):
+
+ 1. ``scheme`` (optional): git, ssh, http, https
+ 2. ``user`` (optional): ``git@`` for github, username for http or ssh
+ 3. ``hostname``: domain of server
+ 4. ``port`` (optional): port on server
+ 5. ``path``: path on the server, e.g. spack/spack
+
+ Returns:
+ (tuple): tuple containing URL components as above
+
+ Raises ``ValueError`` for invalid URLs.
+ """
+ match = re.match(git_re, url)
+ if not match:
+ raise ValueError("bad git URL: %s" % url)
+
+ # initial parse
+ scheme, user, hostname, port, path = match.groups()
+
+ # special handling for ~ paths (they're never absolute)
+ if path.startswith("/~"):
+ path = path[1:]
+
+ if port is not None:
+ try:
+ port = int(port)
+ except ValueError:
+ raise ValueError("bad port in git url: %s" % url)
+
+ return (scheme, user, hostname, port, path)
diff --git a/lib/spack/spack/version.py b/lib/spack/spack/version.py
index 0ed1239216..9aad046f25 100644
--- a/lib/spack/spack/version.py
+++ b/lib/spack/spack/version.py
@@ -25,13 +25,21 @@ be called on any of the types::
concrete
"""
import numbers
+import os
import re
from bisect import bisect_left
from functools import wraps
from six import string_types
+import llnl.util.tty as tty
+from llnl.util.filesystem import mkdirp, working_dir
+
+import spack.caches
import spack.error
+import spack.paths
+import spack.util.executable
+import spack.util.spack_json as sjson
from spack.util.spack_yaml import syaml_dict
__all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
@@ -39,9 +47,17 @@ __all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
# Valid version characters
VALID_VERSION = re.compile(r'^[A-Za-z0-9_.-]+$')
+# regex for a commit version
+COMMIT_VERSION = re.compile(r'^[a-z0-9]{40}$')
+
# regex for version segments
SEGMENT_REGEX = re.compile(r'(?:(?P<num>[0-9]+)|(?P<str>[a-zA-Z]+))(?P<sep>[_.-]*)')
+# regular expression for semantic versioning
+SEMVER_REGEX = re.compile(".+(?P<semver>([0-9]+)[.]([0-9]+)[.]([0-9]+)"
+ "(?:-([0-9A-Za-z-]+(?:[.][0-9A-Za-z-]+)*))?"
+ "(?:[+][0-9A-Za-z-]+)?)")
+
# Infinity-like versions. The order in the list implies the comparison rules
infinity_versions = ['develop', 'main', 'master', 'head', 'trunk']
@@ -151,7 +167,7 @@ class VersionStrComponent(object):
class Version(object):
"""Class to represent versions"""
- __slots__ = ['version', 'separators', 'string']
+ __slots__ = ['version', 'separators', 'string', 'commit_lookup']
def __init__(self, string):
if not isinstance(string, str):
@@ -164,13 +180,40 @@ class Version(object):
if not VALID_VERSION.match(string):
raise ValueError("Bad characters in version string: %s" % string)
- # Split version into alphabetical and numeric segments simultaneously
+ # An object that can lookup git commits to compare them to versions
+ self.commit_lookup = None
+
segments = SEGMENT_REGEX.findall(string)
self.version = tuple(
int(m[0]) if m[0] else VersionStrComponent(m[1]) for m in segments
)
self.separators = tuple(m[2] for m in segments)
+ def _cmp(self, other_lookups=None):
+ commit_lookup = self.commit_lookup or other_lookups
+
+ if self.is_commit and commit_lookup:
+ commit_info = commit_lookup.get(self.string)
+ if commit_info:
+ prev_version, distance = commit_info
+
+ # Extend previous version by empty component and distance
+ # If commit is exactly a known version, no distance suffix
+ prev_tuple = Version(prev_version).version if prev_version else ()
+ dist_suffix = (VersionStrComponent(''), distance) if distance else ()
+ return prev_tuple + dist_suffix
+
+ return self.version
+
+ @property
+ def is_commit(self):
+ """
+ Determine if the original string is referencing a commit.
+ """
+ if self.string in infinity_versions:
+ return False
+ return COMMIT_VERSION.match(self.string) is not None
+
@property
def dotted(self):
"""The dotted representation of the version.
@@ -276,10 +319,13 @@ class Version(object):
gcc@4.7 so that when a user asks to build with gcc@4.7, we can find
a suitable compiler.
"""
+ self_cmp = self._cmp(other.commit_lookup)
+ other_cmp = other._cmp(self.commit_lookup)
- nself = len(self.version)
- nother = len(other.version)
- return nother <= nself and self.version[:nother] == other.version
+ # Do the final comparison
+ nself = len(self_cmp)
+ nother = len(other_cmp)
+ return nother <= nself and self_cmp[:nother] == other_cmp
def __iter__(self):
return iter(self.version)
@@ -331,13 +377,22 @@ class Version(object):
if other is None:
return False
+ # If either is a commit and we haven't indexed yet, can't compare
+ if (other.is_commit or self.is_commit) and not (self.commit_lookup or
+ other.commit_lookup):
+ return False
+
# Use tuple comparison assisted by VersionStrComponent for performance
- return self.version < other.version
+ return self._cmp(other.commit_lookup) < other._cmp(self.commit_lookup)
@coerced
def __eq__(self, other):
- return (other is not None and
- type(other) == Version and self.version == other.version)
+
+ # Cut out early if we don't have a version
+ if other is None or type(other) != Version:
+ return False
+
+ return self._cmp(other.commit_lookup) == other._cmp(self.commit_lookup)
@coerced
def __ne__(self, other):
@@ -362,18 +417,23 @@ class Version(object):
def __contains__(self, other):
if other is None:
return False
- return other.version[:len(self.version)] == self.version
+
+ self_cmp = self._cmp(other.commit_lookup)
+ return other._cmp(self.commit_lookup)[:len(self_cmp)] == self_cmp
def is_predecessor(self, other):
"""True if the other version is the immediate predecessor of this one.
- That is, NO versions v exist such that:
+ That is, NO non-commit versions v exist such that:
(self < v < other and v not in self).
"""
- if len(self.version) != len(other.version):
+ self_cmp = self._cmp(self.commit_lookup)
+ other_cmp = other._cmp(other.commit_lookup)
+
+ if self_cmp[:-1] != other_cmp[:-1]:
return False
- sl = self.version[-1]
- ol = other.version[-1]
+ sl = self_cmp[-1]
+ ol = other_cmp[-1]
return type(sl) == int and type(ol) == int and (ol - sl == 1)
def is_successor(self, other):
@@ -401,6 +461,34 @@ class Version(object):
else:
return VersionList()
+ def generate_commit_lookup(self, pkg):
+ """
+ Use the git fetcher to look up a version for a commit.
+
+ Since we want to optimize the clone and lookup, we do the clone once
+ and store it in the user specified git repository cache. We also need
+ context of the package to get known versions, which could be tags if
+ they are linked to Git Releases. If we are unable to determine the
+ context of the version, we cannot continue. This implementation is
+ alongside the GitFetcher because eventually the git repos cache will
+ be one and the same with the source cache.
+
+ Args:
+ fetcher: the fetcher to use.
+ versions: the known versions of the package
+ """
+ if self.commit_lookup:
+ return
+
+ # Sanity check we have a commit
+ if not self.is_commit:
+ tty.die("%s is not a commit." % self)
+
+ # Generate a commit looker-upper
+ self.commit_lookup = CommitLookup(pkg)
+ self.commit_lookup.get(self.string)
+ self.commit_lookup.save()
+
class VersionRange(object):
@@ -886,3 +974,164 @@ class VersionError(spack.error.SpackError):
class VersionChecksumError(VersionError):
"""Raised for version checksum errors."""
+
+
+class VersionLookupError(VersionError):
+ """Raised for errors looking up git commits as versions."""
+
+
+class CommitLookup(object):
+ """An object for cached lookups of git commits
+
+ CommitLookup objects delegate to the misc_cache for locking.
+ CommitLookup objects may be attached to a Version object for which
+ Version.is_commit returns True to allow for comparisons between git commits
+ and versions as represented by tags in the git repository.
+ """
+ def __init__(self, pkg):
+ self.pkg = pkg
+
+ # We require the full git repository history
+ import spack.fetch_strategy # break cycle
+ fetcher = spack.fetch_strategy.GitFetchStrategy(git=pkg.git)
+ fetcher.get_full_repo = True
+ self.fetcher = fetcher
+
+ self.data = {}
+
+ # Cache data in misc_cache
+ key_base = 'git_metadata'
+ if not self.repository_uri.startswith('/'):
+ key_base += '/'
+ self.cache_key = key_base + self.repository_uri
+ spack.caches.misc_cache.init_entry(self.cache_key)
+ self.cache_path = spack.caches.misc_cache.cache_path(self.cache_key)
+
+ @property
+ def repository_uri(self):
+ """
+ Identifier for git repos used within the repo and metadata caches.
+
+ """
+ try:
+ components = [str(c).lstrip('/')
+ for c in spack.util.url.parse_git_url(self.pkg.git)
+ if c]
+ return os.path.join(*components)
+ except ValueError:
+ # If it's not a git url, it's a local path
+ return os.path.abspath(self.pkg.git)
+
+ def save(self):
+ """
+ Save the data to file
+ """
+ with spack.caches.misc_cache.write_transaction(self.cache_key) as (old, new):
+ sjson.dump(self.data, new)
+
+ def load_data(self):
+ """
+ Load data if the path already exists.
+ """
+ if os.path.isfile(self.cache_path):
+ with spack.caches.misc_cache.read_transaction(self.cache_key) as cache_file:
+ self.data = sjson.load(cache_file)
+
+ def get(self, commit):
+ if not self.data:
+ self.load_data()
+
+ if commit not in self.data:
+ self.data[commit] = self.lookup_commit(commit)
+ self.save()
+
+ return self.data[commit]
+
+ def lookup_commit(self, commit):
+ """Lookup the previous version and distance for a given commit.
+
+ We use git to compare the known versions from package to the git tags,
+ as well as any git tags that are SEMVER versions, and find the latest
+ known version prior to the commit, as well as the distance from that version
+ to the commit in the git repo. Those values are used to compare Version objects.
+ """
+ dest = os.path.join(spack.paths.user_repos_cache_path, self.repository_uri)
+ if dest.endswith('.git'):
+ dest = dest[:-4]
+
+ # prepare a cache for the repository
+ dest_parent = os.path.dirname(dest)
+ if not os.path.exists(dest_parent):
+ mkdirp(dest_parent)
+
+ # Only clone if we don't have it!
+ if not os.path.exists(dest):
+ self.fetcher.clone(dest, bare=True)
+
+ # Lookup commit info
+ with working_dir(dest):
+ self.fetcher.git("fetch", '--tags')
+
+ # Ensure commit is an object known to git
+ # Note the brackets are literals, the commit replaces the format string
+ # This will raise a ProcessError if the commit does not exist
+ # We may later design a custom error to re-raise
+ self.fetcher.git('cat-file', '-e', '%s^{commit}' % commit)
+
+ # List tags (refs) by date, so last reference of a tag is newest
+ tag_info = self.fetcher.git(
+ "for-each-ref", "--sort=creatordate", "--format",
+ "%(objectname) %(refname)", "refs/tags", output=str).split('\n')
+
+ # Lookup of commits to spack versions
+ commit_to_version = {}
+
+ for entry in tag_info:
+ if not entry:
+ continue
+ tag_commit, tag = entry.split()
+ tag = tag.replace('refs/tags/', '', 1)
+
+ # For each tag, try to match to a version
+ for v in [v.string for v in self.pkg.versions]:
+ if v == tag or 'v' + v == tag:
+ commit_to_version[tag_commit] = v
+ break
+ else:
+ # try to parse tag to copare versions spack does not know
+ match = SEMVER_REGEX.match(tag)
+ if match:
+ semver = match.groupdict()['semver']
+ commit_to_version[tag_commit] = semver
+
+ ancestor_commits = []
+ for tag_commit in commit_to_version:
+ self.fetcher.git(
+ 'merge-base', '--is-ancestor', tag_commit, commit,
+ ignore_errors=[1])
+ if self.fetcher.git.returncode == 0:
+ distance = self.fetcher.git(
+ 'rev-list', '%s..%s' % (tag_commit, commit), '--count',
+ output=str, error=str).strip()
+ ancestor_commits.append((tag_commit, int(distance)))
+
+ # Get nearest ancestor that is a known version
+ ancestor_commits.sort(key=lambda x: x[1])
+ if ancestor_commits:
+ prev_version_commit, distance = ancestor_commits[0]
+ prev_version = commit_to_version[prev_version_commit]
+ else:
+ # Get list of all commits, this is in reverse order
+ # We use this to get the first commit below
+ commit_info = self.fetcher.git("log", "--all", "--pretty=format:%H",
+ output=str)
+ commits = [c for c in commit_info.split('\n') if c]
+
+ # No previous version and distance from first commit
+ prev_version = None
+ distance = int(self.fetcher.git(
+ 'rev-list', '%s..%s' % (commits[-1], commit), '--count',
+ output=str, error=str
+ ).strip())
+
+ return prev_version, distance