summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/spack/docs/packaging_guide.rst40
-rw-r--r--lib/spack/spack/fetch_strategy.py132
-rw-r--r--lib/spack/spack/test/conftest.py23
-rw-r--r--lib/spack/spack/test/git_fetch.py35
-rw-r--r--lib/spack/spack/test/packages.py12
-rw-r--r--lib/spack/spack/version/git_ref_lookup.py2
-rw-r--r--var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py17
7 files changed, 242 insertions, 19 deletions
diff --git a/lib/spack/docs/packaging_guide.rst b/lib/spack/docs/packaging_guide.rst
index 629b87e8e0..a736ff8c79 100644
--- a/lib/spack/docs/packaging_guide.rst
+++ b/lib/spack/docs/packaging_guide.rst
@@ -1263,6 +1263,11 @@ Git fetching supports the following parameters to ``version``:
option ``--depth 1`` will be used if the version of git and the specified
transport protocol support it, and ``--single-branch`` will be used if the
version of git supports it.
+* ``git_sparse_paths``: Use ``sparse-checkout`` to only clone these relative paths.
+ This feature requires ``git`` to be version ``2.25.0`` or later but is useful for
+ large repositories that have separate portions that can be built independently.
+ If paths provided are directories then all the subdirectories and associated files
+ will also be cloned.
Only one of ``tag``, ``branch``, or ``commit`` can be used at a time.
@@ -1361,6 +1366,41 @@ Submodules
For more information about git submodules see the manpage of git: ``man
git-submodule``.
+Sparse-Checkout
+ You can supply ``git_sparse_paths`` at the package or version level to utilize git's
+ sparse-checkout feature. This will only clone the paths that are specified in the
+ ``git_sparse_paths`` attribute for the package along with the files in the top level directory.
+ This feature allows you to only clone what you need from a large repository.
+ Note that this is a newer feature in git and requries git ``2.25.0`` or greater.
+ If ``git_sparse_paths`` is supplied and the git version is too old
+ then a warning will be issued and that package will use the standard cloning operations instead.
+ ``git_sparse_paths`` should be supplied as a list of paths, a callable function for versions,
+ or a more complex package attribute using the ``@property`` decorator. The return value should be
+ a list for a callable implementation of ``git_sparse_paths``.
+
+ .. code-block:: python
+
+ def sparse_path_function(package)
+ """a callable function that can be used in side a version"""
+ # paths can be directories or functions, all subdirectories and files are included
+ paths = ["doe", "rae", "me/file.cpp"]
+ if package.spec.version > Version("1.2.0"):
+ paths.extend(["fae"])
+ return paths
+
+ class MyPackage(package):
+ # can also be a package attribute that will be used if not specified in versions
+ git_sparse_paths = ["doe", "rae"]
+
+ # use the package attribute
+ version("1.0.0")
+ version("1.1.0")
+ # use the function
+ version("1.1.5", git_sparse_paths=sparse_path_func)
+ version("1.2.0", git_sparse_paths=sparse_path_func)
+ version("1.2.5", git_sparse_paths=sparse_path_func)
+ version("1.1.5", git_sparse_paths=sparse_path_func)
+
.. _github-fetch:
^^^^^^
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index 4bbc143fc9..589b341f5c 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -720,6 +720,7 @@ class GitFetchStrategy(VCSFetchStrategy):
"submodules",
"get_full_repo",
"submodules_delete",
+ "git_sparse_paths",
]
git_version_re = r"git version (\S+)"
@@ -735,6 +736,7 @@ class GitFetchStrategy(VCSFetchStrategy):
self.submodules = kwargs.get("submodules", False)
self.submodules_delete = kwargs.get("submodules_delete", False)
self.get_full_repo = kwargs.get("get_full_repo", False)
+ self.git_sparse_paths = kwargs.get("git_sparse_paths", None)
@property
def git_version(self):
@@ -802,38 +804,50 @@ class GitFetchStrategy(VCSFetchStrategy):
tty.debug("Already fetched {0}".format(self.stage.source_path))
return
- self.clone(commit=self.commit, branch=self.branch, tag=self.tag)
+ if self.git_sparse_paths:
+ self._sparse_clone_src(commit=self.commit, branch=self.branch, tag=self.tag)
+ else:
+ self._clone_src(commit=self.commit, branch=self.branch, tag=self.tag)
+ self.submodule_operations()
- def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False):
+ def bare_clone(self, dest):
"""
- Clone a repository to a path.
+ Execute a bare clone for metadata only
- This method handles cloning from git, but does not require a stage.
+ Requires a destination since bare cloning does not provide source
+ and shouldn't be used for staging.
+ """
+ # Default to spack source path
+ tty.debug("Cloning git repository: {0}".format(self._repo_info()))
+
+ git = self.git
+ debug = spack.config.get("config:debug")
+
+ # We don't need to worry about which commit/branch/tag is checked out
+ clone_args = ["clone", "--bare"]
+ if not debug:
+ clone_args.append("--quiet")
+ clone_args.extend([self.url, dest])
+ git(*clone_args)
+
+ def _clone_src(self, commit=None, branch=None, tag=None):
+ """
+ Clone a repository to a path using git.
Arguments:
- dest (str or None): The path into which the code is cloned. If None,
- requires a stage and uses the stage's source path.
commit (str or None): A commit to fetch from the remote. Only one of
commit, branch, and tag may be non-None.
branch (str or None): A branch to fetch from the remote.
tag (str or None): A tag to fetch from the remote.
- bare (bool): Execute a "bare" git clone (--bare option to git)
"""
# Default to spack source path
- dest = dest or self.stage.source_path
+ dest = self.stage.source_path
tty.debug("Cloning git repository: {0}".format(self._repo_info()))
git = self.git
debug = spack.config.get("config:debug")
- if bare:
- # We don't need to worry about which commit/branch/tag is checked out
- clone_args = ["clone", "--bare"]
- if not debug:
- clone_args.append("--quiet")
- clone_args.extend([self.url, dest])
- git(*clone_args)
- elif commit:
+ if commit:
# Need to do a regular clone and check out everything if
# they asked for a particular commit.
clone_args = ["clone", self.url]
@@ -912,6 +926,85 @@ class GitFetchStrategy(VCSFetchStrategy):
git(*pull_args, ignore_errors=1)
git(*co_args)
+ def _sparse_clone_src(self, commit=None, branch=None, tag=None, **kwargs):
+ """
+ Use git's sparse checkout feature to clone portions of a git repository
+
+ Arguments:
+ commit (str or None): A commit to fetch from the remote. Only one of
+ commit, branch, and tag may be non-None.
+ branch (str or None): A branch to fetch from the remote.
+ tag (str or None): A tag to fetch from the remote.
+ """
+ dest = self.stage.source_path
+ git = self.git
+
+ if self.git_version < spack.version.Version("2.25.0.0"):
+ # code paths exist where the package is not set. Assure some indentifier for the
+ # package that was configured for sparse checkout exists in the error message
+ identifier = str(self.url)
+ if self.package:
+ identifier += f" ({self.package.name})"
+ tty.warn(
+ (
+ f"{identifier} is configured for git sparse-checkout "
+ "but the git version is too old to support sparse cloning. "
+ "Cloning the full repository instead."
+ )
+ )
+ self._clone_src(commit, branch, tag)
+ else:
+ # default to depth=2 to allow for retention of some git properties
+ depth = kwargs.get("depth", 2)
+ needs_fetch = branch or tag
+ git_ref = branch or tag or commit
+
+ assert git_ref
+
+ clone_args = ["clone"]
+
+ if needs_fetch:
+ clone_args.extend(["--branch", git_ref])
+
+ if self.get_full_repo:
+ clone_args.append("--no-single-branch")
+ else:
+ clone_args.append("--single-branch")
+
+ clone_args.extend(
+ [f"--depth={depth}", "--no-checkout", "--filter=blob:none", self.url]
+ )
+
+ sparse_args = ["sparse-checkout", "set"]
+
+ if callable(self.git_sparse_paths):
+ sparse_args.extend(self.git_sparse_paths())
+ else:
+ sparse_args.extend([p for p in self.git_sparse_paths])
+
+ sparse_args.append("--cone")
+
+ checkout_args = ["checkout", git_ref]
+
+ if not spack.config.get("config:debug"):
+ clone_args.insert(1, "--quiet")
+ checkout_args.insert(1, "--quiet")
+
+ with temp_cwd():
+ git(*clone_args)
+ repo_name = get_single_file(".")
+ if self.stage:
+ self.stage.srcdir = repo_name
+ shutil.move(repo_name, dest)
+
+ with working_dir(dest):
+ git(*sparse_args)
+ git(*checkout_args)
+
+ def submodule_operations(self):
+ dest = self.stage.source_path
+ git = self.git
+
if self.submodules_delete:
with working_dir(dest):
for submodule_to_delete in self.submodules_delete:
@@ -1541,8 +1634,11 @@ def _from_merged_attrs(fetcher, pkg, version):
attrs["fetch_options"] = pkg.fetch_options
attrs.update(pkg.versions[version])
- if fetcher.url_attr == "git" and hasattr(pkg, "submodules"):
- attrs.setdefault("submodules", pkg.submodules)
+ if fetcher.url_attr == "git":
+ pkg_attr_list = ["submodules", "git_sparse_paths"]
+ for pkg_attr in pkg_attr_list:
+ if hasattr(pkg, pkg_attr):
+ attrs.setdefault(pkg_attr, getattr(pkg, pkg_attr))
return fetcher(**attrs)
diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py
index cb978b97f3..c3926c67ab 100644
--- a/lib/spack/spack/test/conftest.py
+++ b/lib/spack/spack/test/conftest.py
@@ -1418,6 +1418,24 @@ def mock_git_repository(git, tmpdir_factory):
r1 = rev_hash(branch)
r1_file = branch_file
+ multiple_directories_branch = "many_dirs"
+ num_dirs = 3
+ num_files = 2
+ dir_files = []
+ for i in range(num_dirs):
+ for j in range(num_files):
+ dir_files.append(f"dir{i}/file{j}")
+
+ git("checkout", "-b", multiple_directories_branch)
+ for f in dir_files:
+ repodir.ensure(f, file=True)
+ git("add", f)
+
+ git("-c", "commit.gpgsign=false", "commit", "-m", "many_dirs add files")
+
+ # restore default
+ git("checkout", default_branch)
+
# Map of version -> bunch. Each bunch includes; all the args
# that must be specified as part of a version() declaration (used to
# manufacture a version for the 'git-test' package); the associated
@@ -1437,6 +1455,11 @@ def mock_git_repository(git, tmpdir_factory):
"default-no-per-version-git": Bunch(
revision=default_branch, file=r0_file, args={"branch": default_branch}
),
+ "many-directories": Bunch(
+ revision=multiple_directories_branch,
+ file=dir_files[0],
+ args={"git": url, "branch": multiple_directories_branch},
+ ),
}
t = Bunch(
diff --git a/lib/spack/spack/test/git_fetch.py b/lib/spack/spack/test/git_fetch.py
index 52b164e422..b709780651 100644
--- a/lib/spack/spack/test/git_fetch.py
+++ b/lib/spack/spack/test/git_fetch.py
@@ -390,3 +390,38 @@ def test_gitsubmodules_falsey(
assert not os.path.isfile(file_path)
file_path = os.path.join(s.package.stage.source_path, "third_party/submodule1/r0_file_1")
assert not os.path.isfile(file_path)
+
+
+@pytest.mark.disable_clean_stage_check
+def test_git_sparse_paths_partial_clone(
+ mock_git_repository, git_version, default_mock_concretization, mutable_mock_repo, monkeypatch
+):
+ """
+ Test partial clone of repository when using git_sparse_paths property
+ """
+ type_of_test = "many-directories"
+ sparse_paths = ["dir0"]
+ omitted_paths = ["dir1", "dir2"]
+ t = mock_git_repository.checks[type_of_test]
+ args = copy.copy(t.args)
+ args["git_sparse_paths"] = sparse_paths
+ s = default_mock_concretization("git-test")
+ monkeypatch.setitem(s.package.versions, Version("git"), args)
+ s.package.do_stage()
+ with working_dir(s.package.stage.source_path):
+ # top level directory files are cloned via sparse-checkout
+ assert os.path.isfile("r0_file")
+
+ for p in sparse_paths:
+ assert os.path.isdir(p)
+
+ if git_version < Version("2.25.0.0"):
+ # older versions of git should fall back to a full clone
+ for p in omitted_paths:
+ assert os.path.isdir(p)
+ else:
+ for p in omitted_paths:
+ assert not os.path.isdir(p)
+
+ # fixture file is in the sparse-path expansion tree
+ assert os.path.isfile(t.file)
diff --git a/lib/spack/spack/test/packages.py b/lib/spack/spack/test/packages.py
index d00db3ed0c..4f16fb71e8 100644
--- a/lib/spack/spack/test/packages.py
+++ b/lib/spack/spack/test/packages.py
@@ -259,6 +259,7 @@ def test_git_url_top_level_git_versions(version_str, tag, commit, branch):
assert fetcher.tag == tag
assert fetcher.commit == commit
assert fetcher.branch == branch
+ assert fetcher.url == pkg_factory("git-url-top-level").git
@pytest.mark.usefixtures("mock_packages", "config")
@@ -319,3 +320,14 @@ def test_package_deprecated_version(mock_packages, mock_fetch, mock_stage):
assert spack.package_base.deprecated_version(pkg_cls, "1.1.0")
assert not spack.package_base.deprecated_version(pkg_cls, "1.0.0")
+
+
+def test_package_can_have_sparse_checkout_properties(mock_packages, mock_fetch, mock_stage):
+ spec = Spec("git-sparsepaths-pkg")
+ pkg_cls = spack.repo.PATH.get_pkg_class(spec.name)
+ assert hasattr(pkg_cls, "git_sparse_paths")
+
+ fetcher = spack.fetch_strategy.for_package_version(pkg_cls(spec), "1.0")
+ assert isinstance(fetcher, spack.fetch_strategy.GitFetchStrategy)
+ assert hasattr(fetcher, "git_sparse_paths")
+ assert fetcher.git_sparse_paths == pkg_cls.git_sparse_paths
diff --git a/lib/spack/spack/version/git_ref_lookup.py b/lib/spack/spack/version/git_ref_lookup.py
index e6c47194fe..6168fc44a5 100644
--- a/lib/spack/spack/version/git_ref_lookup.py
+++ b/lib/spack/spack/version/git_ref_lookup.py
@@ -138,7 +138,7 @@ class GitRefLookup(AbstractRefLookup):
# Only clone if we don't have it!
if not os.path.exists(dest):
- self.fetcher.clone(dest, bare=True)
+ self.fetcher.bare_clone(dest)
# Lookup commit info
with working_dir(dest):
diff --git a/var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py b/var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py
new file mode 100644
index 0000000000..b37aba8660
--- /dev/null
+++ b/var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py
@@ -0,0 +1,17 @@
+# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from spack.package import *
+
+
+class GitSparsepathsPkg(Package):
+ """Mock package with git_sparse_paths attribute"""
+
+ homepage = "http://www.git-fetch-example.com"
+ git = "https://a/really.com/big/repo.git"
+
+ version("1.0", tag="v1.0")
+
+ git_sparse_paths = ["foo", "bar", "bing/bang"]