diff options
-rw-r--r-- | lib/spack/docs/packaging_guide.rst | 40 | ||||
-rw-r--r-- | lib/spack/spack/fetch_strategy.py | 132 | ||||
-rw-r--r-- | lib/spack/spack/test/conftest.py | 23 | ||||
-rw-r--r-- | lib/spack/spack/test/git_fetch.py | 35 | ||||
-rw-r--r-- | lib/spack/spack/test/packages.py | 12 | ||||
-rw-r--r-- | lib/spack/spack/version/git_ref_lookup.py | 2 | ||||
-rw-r--r-- | var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py | 17 |
7 files changed, 242 insertions, 19 deletions
diff --git a/lib/spack/docs/packaging_guide.rst b/lib/spack/docs/packaging_guide.rst index 629b87e8e0..a736ff8c79 100644 --- a/lib/spack/docs/packaging_guide.rst +++ b/lib/spack/docs/packaging_guide.rst @@ -1263,6 +1263,11 @@ Git fetching supports the following parameters to ``version``: option ``--depth 1`` will be used if the version of git and the specified transport protocol support it, and ``--single-branch`` will be used if the version of git supports it. +* ``git_sparse_paths``: Use ``sparse-checkout`` to only clone these relative paths. + This feature requires ``git`` to be version ``2.25.0`` or later but is useful for + large repositories that have separate portions that can be built independently. + If paths provided are directories then all the subdirectories and associated files + will also be cloned. Only one of ``tag``, ``branch``, or ``commit`` can be used at a time. @@ -1361,6 +1366,41 @@ Submodules For more information about git submodules see the manpage of git: ``man git-submodule``. +Sparse-Checkout + You can supply ``git_sparse_paths`` at the package or version level to utilize git's + sparse-checkout feature. This will only clone the paths that are specified in the + ``git_sparse_paths`` attribute for the package along with the files in the top level directory. + This feature allows you to only clone what you need from a large repository. + Note that this is a newer feature in git and requries git ``2.25.0`` or greater. + If ``git_sparse_paths`` is supplied and the git version is too old + then a warning will be issued and that package will use the standard cloning operations instead. + ``git_sparse_paths`` should be supplied as a list of paths, a callable function for versions, + or a more complex package attribute using the ``@property`` decorator. The return value should be + a list for a callable implementation of ``git_sparse_paths``. + + .. code-block:: python + + def sparse_path_function(package) + """a callable function that can be used in side a version""" + # paths can be directories or functions, all subdirectories and files are included + paths = ["doe", "rae", "me/file.cpp"] + if package.spec.version > Version("1.2.0"): + paths.extend(["fae"]) + return paths + + class MyPackage(package): + # can also be a package attribute that will be used if not specified in versions + git_sparse_paths = ["doe", "rae"] + + # use the package attribute + version("1.0.0") + version("1.1.0") + # use the function + version("1.1.5", git_sparse_paths=sparse_path_func) + version("1.2.0", git_sparse_paths=sparse_path_func) + version("1.2.5", git_sparse_paths=sparse_path_func) + version("1.1.5", git_sparse_paths=sparse_path_func) + .. _github-fetch: ^^^^^^ diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 4bbc143fc9..589b341f5c 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -720,6 +720,7 @@ class GitFetchStrategy(VCSFetchStrategy): "submodules", "get_full_repo", "submodules_delete", + "git_sparse_paths", ] git_version_re = r"git version (\S+)" @@ -735,6 +736,7 @@ class GitFetchStrategy(VCSFetchStrategy): self.submodules = kwargs.get("submodules", False) self.submodules_delete = kwargs.get("submodules_delete", False) self.get_full_repo = kwargs.get("get_full_repo", False) + self.git_sparse_paths = kwargs.get("git_sparse_paths", None) @property def git_version(self): @@ -802,38 +804,50 @@ class GitFetchStrategy(VCSFetchStrategy): tty.debug("Already fetched {0}".format(self.stage.source_path)) return - self.clone(commit=self.commit, branch=self.branch, tag=self.tag) + if self.git_sparse_paths: + self._sparse_clone_src(commit=self.commit, branch=self.branch, tag=self.tag) + else: + self._clone_src(commit=self.commit, branch=self.branch, tag=self.tag) + self.submodule_operations() - def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False): + def bare_clone(self, dest): """ - Clone a repository to a path. + Execute a bare clone for metadata only - This method handles cloning from git, but does not require a stage. + Requires a destination since bare cloning does not provide source + and shouldn't be used for staging. + """ + # Default to spack source path + tty.debug("Cloning git repository: {0}".format(self._repo_info())) + + git = self.git + debug = spack.config.get("config:debug") + + # We don't need to worry about which commit/branch/tag is checked out + clone_args = ["clone", "--bare"] + if not debug: + clone_args.append("--quiet") + clone_args.extend([self.url, dest]) + git(*clone_args) + + def _clone_src(self, commit=None, branch=None, tag=None): + """ + Clone a repository to a path using git. Arguments: - dest (str or None): The path into which the code is cloned. If None, - requires a stage and uses the stage's source path. commit (str or None): A commit to fetch from the remote. Only one of commit, branch, and tag may be non-None. branch (str or None): A branch to fetch from the remote. tag (str or None): A tag to fetch from the remote. - bare (bool): Execute a "bare" git clone (--bare option to git) """ # Default to spack source path - dest = dest or self.stage.source_path + dest = self.stage.source_path tty.debug("Cloning git repository: {0}".format(self._repo_info())) git = self.git debug = spack.config.get("config:debug") - if bare: - # We don't need to worry about which commit/branch/tag is checked out - clone_args = ["clone", "--bare"] - if not debug: - clone_args.append("--quiet") - clone_args.extend([self.url, dest]) - git(*clone_args) - elif commit: + if commit: # Need to do a regular clone and check out everything if # they asked for a particular commit. clone_args = ["clone", self.url] @@ -912,6 +926,85 @@ class GitFetchStrategy(VCSFetchStrategy): git(*pull_args, ignore_errors=1) git(*co_args) + def _sparse_clone_src(self, commit=None, branch=None, tag=None, **kwargs): + """ + Use git's sparse checkout feature to clone portions of a git repository + + Arguments: + commit (str or None): A commit to fetch from the remote. Only one of + commit, branch, and tag may be non-None. + branch (str or None): A branch to fetch from the remote. + tag (str or None): A tag to fetch from the remote. + """ + dest = self.stage.source_path + git = self.git + + if self.git_version < spack.version.Version("2.25.0.0"): + # code paths exist where the package is not set. Assure some indentifier for the + # package that was configured for sparse checkout exists in the error message + identifier = str(self.url) + if self.package: + identifier += f" ({self.package.name})" + tty.warn( + ( + f"{identifier} is configured for git sparse-checkout " + "but the git version is too old to support sparse cloning. " + "Cloning the full repository instead." + ) + ) + self._clone_src(commit, branch, tag) + else: + # default to depth=2 to allow for retention of some git properties + depth = kwargs.get("depth", 2) + needs_fetch = branch or tag + git_ref = branch or tag or commit + + assert git_ref + + clone_args = ["clone"] + + if needs_fetch: + clone_args.extend(["--branch", git_ref]) + + if self.get_full_repo: + clone_args.append("--no-single-branch") + else: + clone_args.append("--single-branch") + + clone_args.extend( + [f"--depth={depth}", "--no-checkout", "--filter=blob:none", self.url] + ) + + sparse_args = ["sparse-checkout", "set"] + + if callable(self.git_sparse_paths): + sparse_args.extend(self.git_sparse_paths()) + else: + sparse_args.extend([p for p in self.git_sparse_paths]) + + sparse_args.append("--cone") + + checkout_args = ["checkout", git_ref] + + if not spack.config.get("config:debug"): + clone_args.insert(1, "--quiet") + checkout_args.insert(1, "--quiet") + + with temp_cwd(): + git(*clone_args) + repo_name = get_single_file(".") + if self.stage: + self.stage.srcdir = repo_name + shutil.move(repo_name, dest) + + with working_dir(dest): + git(*sparse_args) + git(*checkout_args) + + def submodule_operations(self): + dest = self.stage.source_path + git = self.git + if self.submodules_delete: with working_dir(dest): for submodule_to_delete in self.submodules_delete: @@ -1541,8 +1634,11 @@ def _from_merged_attrs(fetcher, pkg, version): attrs["fetch_options"] = pkg.fetch_options attrs.update(pkg.versions[version]) - if fetcher.url_attr == "git" and hasattr(pkg, "submodules"): - attrs.setdefault("submodules", pkg.submodules) + if fetcher.url_attr == "git": + pkg_attr_list = ["submodules", "git_sparse_paths"] + for pkg_attr in pkg_attr_list: + if hasattr(pkg, pkg_attr): + attrs.setdefault(pkg_attr, getattr(pkg, pkg_attr)) return fetcher(**attrs) diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index cb978b97f3..c3926c67ab 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -1418,6 +1418,24 @@ def mock_git_repository(git, tmpdir_factory): r1 = rev_hash(branch) r1_file = branch_file + multiple_directories_branch = "many_dirs" + num_dirs = 3 + num_files = 2 + dir_files = [] + for i in range(num_dirs): + for j in range(num_files): + dir_files.append(f"dir{i}/file{j}") + + git("checkout", "-b", multiple_directories_branch) + for f in dir_files: + repodir.ensure(f, file=True) + git("add", f) + + git("-c", "commit.gpgsign=false", "commit", "-m", "many_dirs add files") + + # restore default + git("checkout", default_branch) + # Map of version -> bunch. Each bunch includes; all the args # that must be specified as part of a version() declaration (used to # manufacture a version for the 'git-test' package); the associated @@ -1437,6 +1455,11 @@ def mock_git_repository(git, tmpdir_factory): "default-no-per-version-git": Bunch( revision=default_branch, file=r0_file, args={"branch": default_branch} ), + "many-directories": Bunch( + revision=multiple_directories_branch, + file=dir_files[0], + args={"git": url, "branch": multiple_directories_branch}, + ), } t = Bunch( diff --git a/lib/spack/spack/test/git_fetch.py b/lib/spack/spack/test/git_fetch.py index 52b164e422..b709780651 100644 --- a/lib/spack/spack/test/git_fetch.py +++ b/lib/spack/spack/test/git_fetch.py @@ -390,3 +390,38 @@ def test_gitsubmodules_falsey( assert not os.path.isfile(file_path) file_path = os.path.join(s.package.stage.source_path, "third_party/submodule1/r0_file_1") assert not os.path.isfile(file_path) + + +@pytest.mark.disable_clean_stage_check +def test_git_sparse_paths_partial_clone( + mock_git_repository, git_version, default_mock_concretization, mutable_mock_repo, monkeypatch +): + """ + Test partial clone of repository when using git_sparse_paths property + """ + type_of_test = "many-directories" + sparse_paths = ["dir0"] + omitted_paths = ["dir1", "dir2"] + t = mock_git_repository.checks[type_of_test] + args = copy.copy(t.args) + args["git_sparse_paths"] = sparse_paths + s = default_mock_concretization("git-test") + monkeypatch.setitem(s.package.versions, Version("git"), args) + s.package.do_stage() + with working_dir(s.package.stage.source_path): + # top level directory files are cloned via sparse-checkout + assert os.path.isfile("r0_file") + + for p in sparse_paths: + assert os.path.isdir(p) + + if git_version < Version("2.25.0.0"): + # older versions of git should fall back to a full clone + for p in omitted_paths: + assert os.path.isdir(p) + else: + for p in omitted_paths: + assert not os.path.isdir(p) + + # fixture file is in the sparse-path expansion tree + assert os.path.isfile(t.file) diff --git a/lib/spack/spack/test/packages.py b/lib/spack/spack/test/packages.py index d00db3ed0c..4f16fb71e8 100644 --- a/lib/spack/spack/test/packages.py +++ b/lib/spack/spack/test/packages.py @@ -259,6 +259,7 @@ def test_git_url_top_level_git_versions(version_str, tag, commit, branch): assert fetcher.tag == tag assert fetcher.commit == commit assert fetcher.branch == branch + assert fetcher.url == pkg_factory("git-url-top-level").git @pytest.mark.usefixtures("mock_packages", "config") @@ -319,3 +320,14 @@ def test_package_deprecated_version(mock_packages, mock_fetch, mock_stage): assert spack.package_base.deprecated_version(pkg_cls, "1.1.0") assert not spack.package_base.deprecated_version(pkg_cls, "1.0.0") + + +def test_package_can_have_sparse_checkout_properties(mock_packages, mock_fetch, mock_stage): + spec = Spec("git-sparsepaths-pkg") + pkg_cls = spack.repo.PATH.get_pkg_class(spec.name) + assert hasattr(pkg_cls, "git_sparse_paths") + + fetcher = spack.fetch_strategy.for_package_version(pkg_cls(spec), "1.0") + assert isinstance(fetcher, spack.fetch_strategy.GitFetchStrategy) + assert hasattr(fetcher, "git_sparse_paths") + assert fetcher.git_sparse_paths == pkg_cls.git_sparse_paths diff --git a/lib/spack/spack/version/git_ref_lookup.py b/lib/spack/spack/version/git_ref_lookup.py index e6c47194fe..6168fc44a5 100644 --- a/lib/spack/spack/version/git_ref_lookup.py +++ b/lib/spack/spack/version/git_ref_lookup.py @@ -138,7 +138,7 @@ class GitRefLookup(AbstractRefLookup): # Only clone if we don't have it! if not os.path.exists(dest): - self.fetcher.clone(dest, bare=True) + self.fetcher.bare_clone(dest) # Lookup commit info with working_dir(dest): diff --git a/var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py b/var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py new file mode 100644 index 0000000000..b37aba8660 --- /dev/null +++ b/var/spack/repos/builtin.mock/packages/git-sparsepaths-pkg/package.py @@ -0,0 +1,17 @@ +# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from spack.package import * + + +class GitSparsepathsPkg(Package): + """Mock package with git_sparse_paths attribute""" + + homepage = "http://www.git-fetch-example.com" + git = "https://a/really.com/big/repo.git" + + version("1.0", tag="v1.0") + + git_sparse_paths = ["foo", "bar", "bing/bang"] |