diff options
author | psakievich <psakiev@sandia.gov> | 2024-08-14 23:28:34 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-15 05:28:34 +0000 |
commit | 1b82779087ed4bb7b90d30da02ffff9b655d6fcc (patch) | |
tree | 61a2dd234bc950f63cbbd8191937a89278e8f218 /lib | |
parent | 55b1b0f3f039d35c71fd775be5bbed0f071c0bb2 (diff) | |
download | spack-1b82779087ed4bb7b90d30da02ffff9b655d6fcc.tar.gz spack-1b82779087ed4bb7b90d30da02ffff9b655d6fcc.tar.bz2 spack-1b82779087ed4bb7b90d30da02ffff9b655d6fcc.tar.xz spack-1b82779087ed4bb7b90d30da02ffff9b655d6fcc.zip |
Add options for sparse checkout in GitFetcher (#45473)
* Add options for sparse checkout in GitFetcher
Newer versions of git have a beta feature called sparse checkout
that allow users to check out a portion of a large repo.
This feature will be ideal for monolithic repo projects that want to
model their infrastructure via spack. This PR implements an addition
to the GitFetcher that allows users to add a `git_sparse_paths`
attribute to package classes or versions which will then use sparse
checkout on those directories/files for the package.
* Style
* Split git clone into multiple functions
* Add sparse-checkout impl
* Internalize src clone functions
* Docs
* Adding sparse clone test
* Add test for partial clone
* [@spackbot] updating style on behalf of psakievich
* Small fixes
* Restore default branch status
* Fix attributes for package
* Update lib/spack/docs/packaging_guide.rst
Co-authored-by: Matthew Mosby <44072882+mdmosby@users.noreply.github.com>
* Extend unit test to multiple git versions
* style
---------
Co-authored-by: psakievich <psakievich@users.noreply.github.com>
Co-authored-by: Matthew Mosby <44072882+mdmosby@users.noreply.github.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/docs/packaging_guide.rst | 40 | ||||
-rw-r--r-- | lib/spack/spack/fetch_strategy.py | 132 | ||||
-rw-r--r-- | lib/spack/spack/test/conftest.py | 23 | ||||
-rw-r--r-- | lib/spack/spack/test/git_fetch.py | 35 | ||||
-rw-r--r-- | lib/spack/spack/test/packages.py | 12 | ||||
-rw-r--r-- | lib/spack/spack/version/git_ref_lookup.py | 2 |
6 files changed, 225 insertions, 19 deletions
diff --git a/lib/spack/docs/packaging_guide.rst b/lib/spack/docs/packaging_guide.rst index 629b87e8e0..a736ff8c79 100644 --- a/lib/spack/docs/packaging_guide.rst +++ b/lib/spack/docs/packaging_guide.rst @@ -1263,6 +1263,11 @@ Git fetching supports the following parameters to ``version``: option ``--depth 1`` will be used if the version of git and the specified transport protocol support it, and ``--single-branch`` will be used if the version of git supports it. +* ``git_sparse_paths``: Use ``sparse-checkout`` to only clone these relative paths. + This feature requires ``git`` to be version ``2.25.0`` or later but is useful for + large repositories that have separate portions that can be built independently. + If paths provided are directories then all the subdirectories and associated files + will also be cloned. Only one of ``tag``, ``branch``, or ``commit`` can be used at a time. @@ -1361,6 +1366,41 @@ Submodules For more information about git submodules see the manpage of git: ``man git-submodule``. +Sparse-Checkout + You can supply ``git_sparse_paths`` at the package or version level to utilize git's + sparse-checkout feature. This will only clone the paths that are specified in the + ``git_sparse_paths`` attribute for the package along with the files in the top level directory. + This feature allows you to only clone what you need from a large repository. + Note that this is a newer feature in git and requries git ``2.25.0`` or greater. + If ``git_sparse_paths`` is supplied and the git version is too old + then a warning will be issued and that package will use the standard cloning operations instead. + ``git_sparse_paths`` should be supplied as a list of paths, a callable function for versions, + or a more complex package attribute using the ``@property`` decorator. The return value should be + a list for a callable implementation of ``git_sparse_paths``. + + .. code-block:: python + + def sparse_path_function(package) + """a callable function that can be used in side a version""" + # paths can be directories or functions, all subdirectories and files are included + paths = ["doe", "rae", "me/file.cpp"] + if package.spec.version > Version("1.2.0"): + paths.extend(["fae"]) + return paths + + class MyPackage(package): + # can also be a package attribute that will be used if not specified in versions + git_sparse_paths = ["doe", "rae"] + + # use the package attribute + version("1.0.0") + version("1.1.0") + # use the function + version("1.1.5", git_sparse_paths=sparse_path_func) + version("1.2.0", git_sparse_paths=sparse_path_func) + version("1.2.5", git_sparse_paths=sparse_path_func) + version("1.1.5", git_sparse_paths=sparse_path_func) + .. _github-fetch: ^^^^^^ diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 4bbc143fc9..589b341f5c 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -720,6 +720,7 @@ class GitFetchStrategy(VCSFetchStrategy): "submodules", "get_full_repo", "submodules_delete", + "git_sparse_paths", ] git_version_re = r"git version (\S+)" @@ -735,6 +736,7 @@ class GitFetchStrategy(VCSFetchStrategy): self.submodules = kwargs.get("submodules", False) self.submodules_delete = kwargs.get("submodules_delete", False) self.get_full_repo = kwargs.get("get_full_repo", False) + self.git_sparse_paths = kwargs.get("git_sparse_paths", None) @property def git_version(self): @@ -802,38 +804,50 @@ class GitFetchStrategy(VCSFetchStrategy): tty.debug("Already fetched {0}".format(self.stage.source_path)) return - self.clone(commit=self.commit, branch=self.branch, tag=self.tag) + if self.git_sparse_paths: + self._sparse_clone_src(commit=self.commit, branch=self.branch, tag=self.tag) + else: + self._clone_src(commit=self.commit, branch=self.branch, tag=self.tag) + self.submodule_operations() - def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False): + def bare_clone(self, dest): """ - Clone a repository to a path. + Execute a bare clone for metadata only - This method handles cloning from git, but does not require a stage. + Requires a destination since bare cloning does not provide source + and shouldn't be used for staging. + """ + # Default to spack source path + tty.debug("Cloning git repository: {0}".format(self._repo_info())) + + git = self.git + debug = spack.config.get("config:debug") + + # We don't need to worry about which commit/branch/tag is checked out + clone_args = ["clone", "--bare"] + if not debug: + clone_args.append("--quiet") + clone_args.extend([self.url, dest]) + git(*clone_args) + + def _clone_src(self, commit=None, branch=None, tag=None): + """ + Clone a repository to a path using git. Arguments: - dest (str or None): The path into which the code is cloned. If None, - requires a stage and uses the stage's source path. commit (str or None): A commit to fetch from the remote. Only one of commit, branch, and tag may be non-None. branch (str or None): A branch to fetch from the remote. tag (str or None): A tag to fetch from the remote. - bare (bool): Execute a "bare" git clone (--bare option to git) """ # Default to spack source path - dest = dest or self.stage.source_path + dest = self.stage.source_path tty.debug("Cloning git repository: {0}".format(self._repo_info())) git = self.git debug = spack.config.get("config:debug") - if bare: - # We don't need to worry about which commit/branch/tag is checked out - clone_args = ["clone", "--bare"] - if not debug: - clone_args.append("--quiet") - clone_args.extend([self.url, dest]) - git(*clone_args) - elif commit: + if commit: # Need to do a regular clone and check out everything if # they asked for a particular commit. clone_args = ["clone", self.url] @@ -912,6 +926,85 @@ class GitFetchStrategy(VCSFetchStrategy): git(*pull_args, ignore_errors=1) git(*co_args) + def _sparse_clone_src(self, commit=None, branch=None, tag=None, **kwargs): + """ + Use git's sparse checkout feature to clone portions of a git repository + + Arguments: + commit (str or None): A commit to fetch from the remote. Only one of + commit, branch, and tag may be non-None. + branch (str or None): A branch to fetch from the remote. + tag (str or None): A tag to fetch from the remote. + """ + dest = self.stage.source_path + git = self.git + + if self.git_version < spack.version.Version("2.25.0.0"): + # code paths exist where the package is not set. Assure some indentifier for the + # package that was configured for sparse checkout exists in the error message + identifier = str(self.url) + if self.package: + identifier += f" ({self.package.name})" + tty.warn( + ( + f"{identifier} is configured for git sparse-checkout " + "but the git version is too old to support sparse cloning. " + "Cloning the full repository instead." + ) + ) + self._clone_src(commit, branch, tag) + else: + # default to depth=2 to allow for retention of some git properties + depth = kwargs.get("depth", 2) + needs_fetch = branch or tag + git_ref = branch or tag or commit + + assert git_ref + + clone_args = ["clone"] + + if needs_fetch: + clone_args.extend(["--branch", git_ref]) + + if self.get_full_repo: + clone_args.append("--no-single-branch") + else: + clone_args.append("--single-branch") + + clone_args.extend( + [f"--depth={depth}", "--no-checkout", "--filter=blob:none", self.url] + ) + + sparse_args = ["sparse-checkout", "set"] + + if callable(self.git_sparse_paths): + sparse_args.extend(self.git_sparse_paths()) + else: + sparse_args.extend([p for p in self.git_sparse_paths]) + + sparse_args.append("--cone") + + checkout_args = ["checkout", git_ref] + + if not spack.config.get("config:debug"): + clone_args.insert(1, "--quiet") + checkout_args.insert(1, "--quiet") + + with temp_cwd(): + git(*clone_args) + repo_name = get_single_file(".") + if self.stage: + self.stage.srcdir = repo_name + shutil.move(repo_name, dest) + + with working_dir(dest): + git(*sparse_args) + git(*checkout_args) + + def submodule_operations(self): + dest = self.stage.source_path + git = self.git + if self.submodules_delete: with working_dir(dest): for submodule_to_delete in self.submodules_delete: @@ -1541,8 +1634,11 @@ def _from_merged_attrs(fetcher, pkg, version): attrs["fetch_options"] = pkg.fetch_options attrs.update(pkg.versions[version]) - if fetcher.url_attr == "git" and hasattr(pkg, "submodules"): - attrs.setdefault("submodules", pkg.submodules) + if fetcher.url_attr == "git": + pkg_attr_list = ["submodules", "git_sparse_paths"] + for pkg_attr in pkg_attr_list: + if hasattr(pkg, pkg_attr): + attrs.setdefault(pkg_attr, getattr(pkg, pkg_attr)) return fetcher(**attrs) diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index cb978b97f3..c3926c67ab 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -1418,6 +1418,24 @@ def mock_git_repository(git, tmpdir_factory): r1 = rev_hash(branch) r1_file = branch_file + multiple_directories_branch = "many_dirs" + num_dirs = 3 + num_files = 2 + dir_files = [] + for i in range(num_dirs): + for j in range(num_files): + dir_files.append(f"dir{i}/file{j}") + + git("checkout", "-b", multiple_directories_branch) + for f in dir_files: + repodir.ensure(f, file=True) + git("add", f) + + git("-c", "commit.gpgsign=false", "commit", "-m", "many_dirs add files") + + # restore default + git("checkout", default_branch) + # Map of version -> bunch. Each bunch includes; all the args # that must be specified as part of a version() declaration (used to # manufacture a version for the 'git-test' package); the associated @@ -1437,6 +1455,11 @@ def mock_git_repository(git, tmpdir_factory): "default-no-per-version-git": Bunch( revision=default_branch, file=r0_file, args={"branch": default_branch} ), + "many-directories": Bunch( + revision=multiple_directories_branch, + file=dir_files[0], + args={"git": url, "branch": multiple_directories_branch}, + ), } t = Bunch( diff --git a/lib/spack/spack/test/git_fetch.py b/lib/spack/spack/test/git_fetch.py index 52b164e422..b709780651 100644 --- a/lib/spack/spack/test/git_fetch.py +++ b/lib/spack/spack/test/git_fetch.py @@ -390,3 +390,38 @@ def test_gitsubmodules_falsey( assert not os.path.isfile(file_path) file_path = os.path.join(s.package.stage.source_path, "third_party/submodule1/r0_file_1") assert not os.path.isfile(file_path) + + +@pytest.mark.disable_clean_stage_check +def test_git_sparse_paths_partial_clone( + mock_git_repository, git_version, default_mock_concretization, mutable_mock_repo, monkeypatch +): + """ + Test partial clone of repository when using git_sparse_paths property + """ + type_of_test = "many-directories" + sparse_paths = ["dir0"] + omitted_paths = ["dir1", "dir2"] + t = mock_git_repository.checks[type_of_test] + args = copy.copy(t.args) + args["git_sparse_paths"] = sparse_paths + s = default_mock_concretization("git-test") + monkeypatch.setitem(s.package.versions, Version("git"), args) + s.package.do_stage() + with working_dir(s.package.stage.source_path): + # top level directory files are cloned via sparse-checkout + assert os.path.isfile("r0_file") + + for p in sparse_paths: + assert os.path.isdir(p) + + if git_version < Version("2.25.0.0"): + # older versions of git should fall back to a full clone + for p in omitted_paths: + assert os.path.isdir(p) + else: + for p in omitted_paths: + assert not os.path.isdir(p) + + # fixture file is in the sparse-path expansion tree + assert os.path.isfile(t.file) diff --git a/lib/spack/spack/test/packages.py b/lib/spack/spack/test/packages.py index d00db3ed0c..4f16fb71e8 100644 --- a/lib/spack/spack/test/packages.py +++ b/lib/spack/spack/test/packages.py @@ -259,6 +259,7 @@ def test_git_url_top_level_git_versions(version_str, tag, commit, branch): assert fetcher.tag == tag assert fetcher.commit == commit assert fetcher.branch == branch + assert fetcher.url == pkg_factory("git-url-top-level").git @pytest.mark.usefixtures("mock_packages", "config") @@ -319,3 +320,14 @@ def test_package_deprecated_version(mock_packages, mock_fetch, mock_stage): assert spack.package_base.deprecated_version(pkg_cls, "1.1.0") assert not spack.package_base.deprecated_version(pkg_cls, "1.0.0") + + +def test_package_can_have_sparse_checkout_properties(mock_packages, mock_fetch, mock_stage): + spec = Spec("git-sparsepaths-pkg") + pkg_cls = spack.repo.PATH.get_pkg_class(spec.name) + assert hasattr(pkg_cls, "git_sparse_paths") + + fetcher = spack.fetch_strategy.for_package_version(pkg_cls(spec), "1.0") + assert isinstance(fetcher, spack.fetch_strategy.GitFetchStrategy) + assert hasattr(fetcher, "git_sparse_paths") + assert fetcher.git_sparse_paths == pkg_cls.git_sparse_paths diff --git a/lib/spack/spack/version/git_ref_lookup.py b/lib/spack/spack/version/git_ref_lookup.py index e6c47194fe..6168fc44a5 100644 --- a/lib/spack/spack/version/git_ref_lookup.py +++ b/lib/spack/spack/version/git_ref_lookup.py @@ -138,7 +138,7 @@ class GitRefLookup(AbstractRefLookup): # Only clone if we don't have it! if not os.path.exists(dest): - self.fetcher.clone(dest, bare=True) + self.fetcher.bare_clone(dest) # Lookup commit info with working_dir(dest): |