summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorScott Wittenburg <scott.wittenburg@kitware.com>2022-03-30 18:17:29 -0600
committerGitHub <noreply@github.com>2022-03-30 17:17:29 -0700
commit685e3d7ae9b968aa429c08a642aba4e718e0b419 (patch)
tree0486ec86022adb177b7ec60b70c98add0f792fb9 /lib
parentf97be99f2a193d23435f7a8511a471f2a781cfa5 (diff)
downloadspack-685e3d7ae9b968aa429c08a642aba4e718e0b419.tar.gz
spack-685e3d7ae9b968aa429c08a642aba4e718e0b419.tar.bz2
spack-685e3d7ae9b968aa429c08a642aba4e718e0b419.tar.xz
spack-685e3d7ae9b968aa429c08a642aba4e718e0b419.zip
spack ci: filter untouched pkgs from PR pipelines (#29697)
We've previously generated CI pipelines for PRs, and they rebuild any packages that don't have a binary in an existing build cache. The assumption we were making was that ALL prior merged builds would be in cache, but due to the way we do security in the pipeline, they aren't. `develop` pipelines can take a while to catch up with the latest PRs, and while it does that, there may be a bunch of redundant builds on PRs that duplicate things being rebuilt on `develop`. Until we can do better caching of PR builds, we'll have this problem. We can do better in PRs, though, by *only* rebuilding things in the CI environment that are actually touched by the PR. This change computes exactly what packages are changed by a PR branch and *only* includes those packages' dependents and dependencies in the generated pipeline. Other as-yet unbuilt packages are pruned from CI for the PR. For `develop` pipelines, we still want to build everything to ensure that the stack works, and to ensure that `develop` catches up with PRs. This is especially true since we do not do rebuilds for *every* commit on `develop` -- just the most recent one after each `develop` pipeline finishes. Since we skip around, we may end up missing builds unless we ensure that we rebuild everything. We differentiate between `develop` and PR pipelines in `.gitlab-ci.yml` by setting `SPACK_PRUNE_UNTOUCHED` for PRs. `develop` will still have the old behavior. - [x] Add `SPACK_PRUNE_UNTOUCHED` variable to `spack ci` - [x] Refactor `spack pkg` command by moving historical package checking logic to `spack.repo` - [x] Implement pruning logic in `spack ci` to remove untouched packages - [x] add tests
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/ci.py83
-rw-r--r--lib/spack/spack/cmd/pkg.py99
-rw-r--r--lib/spack/spack/repo.py113
-rw-r--r--lib/spack/spack/test/ci.py37
-rw-r--r--lib/spack/spack/test/cmd/ci.py50
-rw-r--r--lib/spack/spack/test/cmd/pkg.py6
6 files changed, 290 insertions, 98 deletions
diff --git a/lib/spack/spack/ci.py b/lib/spack/spack/ci.py
index e7606135c3..b48f6d44d2 100644
--- a/lib/spack/spack/ci.py
+++ b/lib/spack/spack/ci.py
@@ -24,7 +24,6 @@ import llnl.util.tty as tty
import spack
import spack.binary_distribution as bindist
-import spack.cmd
import spack.compilers as compilers
import spack.config as cfg
import spack.environment as ev
@@ -514,6 +513,60 @@ def format_job_needs(phase_name, strip_compilers, dep_jobs,
return needs_list
+def get_change_revisions():
+ """If this is a git repo get the revisions to use when checking
+ for changed packages and spack core modules."""
+ git_dir = os.path.join(spack.paths.prefix, '.git')
+ if os.path.exists(git_dir) and os.path.isdir(git_dir):
+ # TODO: This will only find changed packages from the last
+ # TODO: commit. While this may work for single merge commits
+ # TODO: when merging the topic branch into the base, it will
+ # TODO: require more thought outside of that narrow case.
+ return 'HEAD^', 'HEAD'
+ return None, None
+
+
+def compute_affected_packages(rev1='HEAD^', rev2='HEAD'):
+ """Determine which packages were added, removed or changed
+ between rev1 and rev2, and return the names as a set"""
+ return spack.repo.get_all_package_diffs('ARC', rev1=rev1, rev2=rev2)
+
+
+def get_spec_filter_list(env, affected_pkgs, dependencies=True, dependents=True):
+ """Given a list of package names, and assuming an active and
+ concretized environment, return a set of concrete specs from
+ the environment corresponding to any of the affected pkgs (or
+ optionally to any of their dependencies/dependents).
+
+ Arguments:
+
+ env (spack.environment.Environment): Active concrete environment
+ affected_pkgs (List[str]): Affected package names
+ dependencies (bool): Include dependencies of affected packages
+ dependents (bool): Include dependents of affected pacakges
+
+ Returns:
+
+ A list of concrete specs from the active environment including
+ those associated with affected packages, and possible their
+ dependencies and dependents as well.
+ """
+ affected_specs = set()
+ all_concrete_specs = env.all_specs()
+ tty.debug('All concrete environment specs:')
+ for s in all_concrete_specs:
+ tty.debug(' {0}/{1}'.format(s.name, s.dag_hash()[:7]))
+ for pkg in affected_pkgs:
+ env_matches = [s for s in all_concrete_specs if s.name == pkg]
+ for match in env_matches:
+ affected_specs.add(match)
+ if dependencies:
+ affected_specs.update(match.traverse(direction='children', root=False))
+ if dependents:
+ affected_specs.update(match.traverse(direction='parents', root=False))
+ return affected_specs
+
+
def generate_gitlab_ci_yaml(env, print_summary, output_file,
prune_dag=False, check_index_only=False,
run_optimizer=False, use_dependencies=False,
@@ -546,6 +599,24 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file,
tty.verbose("Using CDash auth token from environment")
cdash_auth_token = os.environ.get('SPACK_CDASH_AUTH_TOKEN')
+ prune_untouched_packages = os.environ.get('SPACK_PRUNE_UNTOUCHED', None)
+ if prune_untouched_packages:
+ # Requested to prune untouched packages, but assume we won't do that
+ # unless we're actually in a git repo.
+ prune_untouched_packages = False
+ rev1, rev2 = get_change_revisions()
+ tty.debug('Got following revisions: rev1={0}, rev2={1}'.format(rev1, rev2))
+ if rev1 and rev2:
+ prune_untouched_packages = True
+ affected_pkgs = compute_affected_packages(rev1, rev2)
+ tty.debug('affected pkgs:')
+ for p in affected_pkgs:
+ tty.debug(' {0}'.format(p))
+ affected_specs = get_spec_filter_list(env, affected_pkgs)
+ tty.debug('all affected specs:')
+ for s in affected_specs:
+ tty.debug(' {0}'.format(s.name))
+
generate_job_name = os.environ.get('CI_JOB_NAME', None)
parent_pipeline_id = os.environ.get('CI_PIPELINE_ID', None)
@@ -742,6 +813,13 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file,
release_spec_dag_hash = release_spec.dag_hash()
release_spec_build_hash = release_spec.build_hash()
+ if prune_untouched_packages:
+ if release_spec not in affected_specs:
+ tty.debug('Pruning {0}, untouched by change.'.format(
+ release_spec.name))
+ spec_record['needs_rebuild'] = False
+ continue
+
runner_attribs = find_matching_config(
release_spec, gitlab_ci)
@@ -903,7 +981,8 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file,
tty.debug(debug_msg)
if prune_dag and not rebuild_spec:
- tty.debug('Pruning spec that does not need to be rebuilt.')
+ tty.debug('Pruning {0}, does not need rebuild.'.format(
+ release_spec.name))
continue
if (broken_spec_urls is not None and
diff --git a/lib/spack/spack/cmd/pkg.py b/lib/spack/spack/cmd/pkg.py
index 3acc9c04af..dec604ddf3 100644
--- a/lib/spack/spack/cmd/pkg.py
+++ b/lib/spack/spack/cmd/pkg.py
@@ -5,12 +5,9 @@
from __future__ import print_function
-import os
-import re
import sys
import llnl.util.tty as tty
-from llnl.util.filesystem import working_dir
from llnl.util.tty.colify import colify
import spack.cmd
@@ -18,7 +15,6 @@ import spack.cmd.common.arguments as arguments
import spack.paths
import spack.repo
import spack.util.package_hash as ph
-from spack.util.executable import which
description = "query packages associated with particular git revisions"
section = "developer"
@@ -82,78 +78,19 @@ def setup_parser(subparser):
arguments.add_common_arguments(hash_parser, ['spec'])
-def packages_path():
- """Get the test repo if it is active, otherwise the builtin repo."""
- try:
- return spack.repo.path.get_repo('builtin.mock').packages_path
- except spack.repo.UnknownNamespaceError:
- return spack.repo.path.get_repo('builtin').packages_path
-
-
-class GitExe:
- # Wrapper around Executable for git to set working directory for all
- # invocations.
- #
- # Not using -C as that is not supported for git < 1.8.5.
- def __init__(self):
- self._git_cmd = which('git', required=True)
-
- def __call__(self, *args, **kwargs):
- with working_dir(packages_path()):
- return self._git_cmd(*args, **kwargs)
-
-
-_git = None
-
-
-def get_git():
- """Get a git executable that runs *within* the packages path."""
- global _git
- if _git is None:
- _git = GitExe()
- return _git
-
-
-def list_packages(rev):
- git = get_git()
-
- # git ls-tree does not support ... merge-base syntax, so do it manually
- if rev.endswith('...'):
- ref = rev.replace('...', '')
- rev = git('merge-base', ref, 'HEAD', output=str).strip()
-
- output = git('ls-tree', '--name-only', rev, output=str)
- return sorted(line for line in output.split('\n')
- if line and not line.startswith('.'))
-
-
def pkg_add(args):
"""add a package to the git stage with `git add`"""
- git = get_git()
-
- for pkg_name in args.packages:
- filename = spack.repo.path.filename_for_package_name(pkg_name)
- if not os.path.isfile(filename):
- tty.die("No such package: %s. Path does not exist:" %
- pkg_name, filename)
-
- git('add', filename)
+ spack.repo.add_package_to_git_stage(args.packages)
def pkg_list(args):
"""list packages associated with a particular spack git revision"""
- colify(list_packages(args.rev))
-
-
-def diff_packages(rev1, rev2):
- p1 = set(list_packages(rev1))
- p2 = set(list_packages(rev2))
- return p1.difference(p2), p2.difference(p1)
+ colify(spack.repo.list_packages(args.rev))
def pkg_diff(args):
"""compare packages available in two different git revisions"""
- u1, u2 = diff_packages(args.rev1, args.rev2)
+ u1, u2 = spack.repo.diff_packages(args.rev1, args.rev2)
if u1:
print("%s:" % args.rev1)
@@ -168,45 +105,21 @@ def pkg_diff(args):
def pkg_removed(args):
"""show packages removed since a commit"""
- u1, u2 = diff_packages(args.rev1, args.rev2)
+ u1, u2 = spack.repo.diff_packages(args.rev1, args.rev2)
if u1:
colify(sorted(u1))
def pkg_added(args):
"""show packages added since a commit"""
- u1, u2 = diff_packages(args.rev1, args.rev2)
+ u1, u2 = spack.repo.diff_packages(args.rev1, args.rev2)
if u2:
colify(sorted(u2))
def pkg_changed(args):
"""show packages changed since a commit"""
- lower_type = args.type.lower()
- if not re.match('^[arc]*$', lower_type):
- tty.die("Invald change type: '%s'." % args.type,
- "Can contain only A (added), R (removed), or C (changed)")
-
- removed, added = diff_packages(args.rev1, args.rev2)
-
- git = get_git()
- out = git('diff', '--relative', '--name-only', args.rev1, args.rev2,
- output=str).strip()
-
- lines = [] if not out else re.split(r'\s+', out)
- changed = set()
- for path in lines:
- pkg_name, _, _ = path.partition(os.sep)
- if pkg_name not in added and pkg_name not in removed:
- changed.add(pkg_name)
-
- packages = set()
- if 'a' in lower_type:
- packages |= added
- if 'r' in lower_type:
- packages |= removed
- if 'c' in lower_type:
- packages |= changed
+ packages = spack.repo.get_all_package_diffs(args.type, args.rev1, args.rev2)
if packages:
colify(sorted(packages))
diff --git a/lib/spack/spack/repo.py b/lib/spack/spack/repo.py
index 904c22d9ba..6577310048 100644
--- a/lib/spack/spack/repo.py
+++ b/lib/spack/spack/repo.py
@@ -27,6 +27,7 @@ import llnl.util.filesystem as fs
import llnl.util.lang
import llnl.util.tty as tty
from llnl.util.compat import Mapping
+from llnl.util.filesystem import working_dir
import spack.caches
import spack.config
@@ -38,6 +39,7 @@ import spack.tag
import spack.util.imp as simp
import spack.util.naming as nm
import spack.util.path
+from spack.util.executable import which
#: Super-namespace for all packages.
#: Package modules are imported as spack.pkg.<namespace>.<pkg-name>.
@@ -77,6 +79,117 @@ NOT_PROVIDED = object()
_package_prepend = 'from __future__ import absolute_import; from spack.pkgkit import *'
+def packages_path():
+ """Get the test repo if it is active, otherwise the builtin repo."""
+ try:
+ return spack.repo.path.get_repo('builtin.mock').packages_path
+ except spack.repo.UnknownNamespaceError:
+ return spack.repo.path.get_repo('builtin').packages_path
+
+
+class GitExe:
+ # Wrapper around Executable for git to set working directory for all
+ # invocations.
+ #
+ # Not using -C as that is not supported for git < 1.8.5.
+ def __init__(self):
+ self._git_cmd = which('git', required=True)
+
+ def __call__(self, *args, **kwargs):
+ with working_dir(packages_path()):
+ return self._git_cmd(*args, **kwargs)
+
+
+_git = None
+
+
+def get_git():
+ """Get a git executable that runs *within* the packages path."""
+ global _git
+ if _git is None:
+ _git = GitExe()
+ return _git
+
+
+def list_packages(rev):
+ """List all packages associated with the given revision"""
+ git = get_git()
+
+ # git ls-tree does not support ... merge-base syntax, so do it manually
+ if rev.endswith('...'):
+ ref = rev.replace('...', '')
+ rev = git('merge-base', ref, 'HEAD', output=str).strip()
+
+ output = git('ls-tree', '--name-only', rev, output=str)
+ return sorted(line for line in output.split('\n')
+ if line and not line.startswith('.'))
+
+
+def diff_packages(rev1, rev2):
+ """Compute packages lists for the two revisions and return a tuple
+ containing all the packages in rev1 but not in rev2 and all the
+ packages in rev2 but not in rev1."""
+ p1 = set(list_packages(rev1))
+ p2 = set(list_packages(rev2))
+ return p1.difference(p2), p2.difference(p1)
+
+
+def get_all_package_diffs(type, rev1='HEAD^1', rev2='HEAD'):
+ """Show packages changed, added, or removed (or any combination of those)
+ since a commit.
+
+ Arguments:
+
+ type (str): String containing one or more of 'A', 'B', 'C'
+ rev1 (str): Revision to compare against, default is 'HEAD^'
+ rev2 (str): Revision to compare to rev1, default is 'HEAD'
+
+ Returns:
+
+ A set contain names of affected packages.
+ """
+ lower_type = type.lower()
+ if not re.match('^[arc]*$', lower_type):
+ tty.die("Invald change type: '%s'." % type,
+ "Can contain only A (added), R (removed), or C (changed)")
+
+ removed, added = diff_packages(rev1, rev2)
+
+ git = get_git()
+ out = git('diff', '--relative', '--name-only', rev1, rev2,
+ output=str).strip()
+
+ lines = [] if not out else re.split(r'\s+', out)
+ changed = set()
+ for path in lines:
+ pkg_name, _, _ = path.partition(os.sep)
+ if pkg_name not in added and pkg_name not in removed:
+ changed.add(pkg_name)
+
+ packages = set()
+ if 'a' in lower_type:
+ packages |= added
+ if 'r' in lower_type:
+ packages |= removed
+ if 'c' in lower_type:
+ packages |= changed
+
+ return packages
+
+
+def add_package_to_git_stage(packages):
+ """add a package to the git stage with `git add`"""
+ git = get_git()
+
+ for pkg_name in packages:
+ filename = spack.repo.path.filename_for_package_name(pkg_name)
+ if not os.path.isfile(filename):
+ tty.die("No such package: %s. Path does not exist:" %
+ pkg_name, filename)
+
+ git('add', filename)
+
+
def autospec(function):
"""Decorator that automatically converts the first argument of a
function to a Spec.
diff --git a/lib/spack/spack/test/ci.py b/lib/spack/spack/test/ci.py
index 4b74959f68..cd87c2dc60 100644
--- a/lib/spack/spack/test/ci.py
+++ b/lib/spack/spack/test/ci.py
@@ -540,3 +540,40 @@ def test_ci_workarounds():
ci_opt.sort_yaml_obj(actual), default_flow_style=True)
assert(predicted == actual)
+
+
+def test_get_spec_filter_list(mutable_mock_env_path, config, mutable_mock_repo):
+ """Test that given an active environment and list of touched pkgs,
+ we get the right list of possibly-changed env specs"""
+ e1 = ev.create('test')
+ e1.add('mpileaks')
+ e1.add('hypre')
+ e1.concretize()
+
+ """
+ Concretizing the above environment results in the following graphs:
+
+ mpileaks -> mpich (provides mpi virtual dep of mpileaks)
+ -> callpath -> dyninst -> libelf
+ -> libdwarf -> libelf
+ -> mpich (provides mpi dep of callpath)
+
+ hypre -> openblas-with-lapack (provides lapack and blas virtual deps of hypre)
+ """
+
+ touched = ['libdwarf']
+
+ # traversing both directions from libdwarf in the graphs depicted
+ # above results in the following possibly affected env specs:
+ # mpileaks, callpath, dyninst, libdwarf, and libelf. Unaffected
+ # specs are mpich, plus hypre and it's dependencies.
+
+ affected_specs = ci.get_spec_filter_list(e1, touched)
+ affected_pkg_names = set([s.name for s in affected_specs])
+ expected_affected_pkg_names = set(['mpileaks',
+ 'callpath',
+ 'dyninst',
+ 'libdwarf',
+ 'libelf'])
+
+ assert affected_pkg_names == expected_affected_pkg_names
diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py
index 9e257539bb..41761a7e81 100644
--- a/lib/spack/spack/test/cmd/ci.py
+++ b/lib/spack/spack/test/cmd/ci.py
@@ -1462,6 +1462,56 @@ spack:
_validate_needs_graph(new_yaml_contents, needs_graph, False)
+def test_ci_generate_prune_untouched(tmpdir, mutable_mock_env_path,
+ install_mockery, mock_packages,
+ project_dir_env, monkeypatch):
+ """Test pipeline generation with pruning works to eliminate
+ specs that were not affected by a change"""
+ project_dir_env(tmpdir.strpath)
+ mirror_url = 'https://my.fake.mirror'
+ filename = str(tmpdir.join('spack.yaml'))
+ with open(filename, 'w') as f:
+ f.write("""\
+spack:
+ specs:
+ - archive-files
+ - callpath
+ mirrors:
+ some-mirror: {0}
+ gitlab-ci:
+ mappings:
+ - match:
+ - arch=test-debian6-core2
+ runner-attributes:
+ tags:
+ - donotcare
+ image: donotcare
+""".format(mirror_url))
+ with tmpdir.as_cwd():
+ env_cmd('create', 'test', './spack.yaml')
+ outputfile = str(tmpdir.join('.gitlab-ci.yml'))
+
+ def fake_compute_affected(r1=None, r2=None):
+ return ['libdwarf']
+
+ with ev.read('test'):
+ # This kind of pruning is enabled with the following env var
+ os.environ['SPACK_PRUNE_UNTOUCHED'] = 'TRUE'
+ monkeypatch.setattr(
+ ci, 'compute_affected_packages', fake_compute_affected)
+ ci_cmd('generate', '--output-file', outputfile)
+ os.environ.pop('SPACK_PRUNE_UNTOUCHED')
+
+ with open(outputfile) as f:
+ contents = f.read()
+ yaml_contents = syaml.load(contents)
+
+ for ci_key in yaml_contents.keys():
+ if 'archive-files' in ci_key or 'mpich' in ci_key:
+ print('Error: archive-files and mpich should have been pruned')
+ assert(False)
+
+
def test_ci_subcommands_without_mirror(tmpdir, mutable_mock_env_path,
mock_packages,
install_mockery, project_dir_env,
diff --git a/lib/spack/spack/test/cmd/pkg.py b/lib/spack/spack/test/cmd/pkg.py
index 7f681ad9b9..352ffca4ac 100644
--- a/lib/spack/spack/test/cmd/pkg.py
+++ b/lib/spack/spack/test/cmd/pkg.py
@@ -13,8 +13,8 @@ import pytest
from llnl.util.filesystem import mkdirp, working_dir
-import spack.cmd.pkg
import spack.main
+import spack.repo
from spack.util.executable import which
pytestmark = pytest.mark.skipif(not which('git'),
@@ -106,12 +106,12 @@ pkg = spack.main.SpackCommand('pkg')
def test_packages_path():
- assert (spack.cmd.pkg.packages_path() ==
+ assert (spack.repo.packages_path() ==
spack.repo.path.get_repo('builtin').packages_path)
def test_mock_packages_path(mock_packages):
- assert (spack.cmd.pkg.packages_path() ==
+ assert (spack.repo.packages_path() ==
spack.repo.path.get_repo('builtin.mock').packages_path)