From 7cc2db1b4e8af328d7e6ef1d34886c9a70081a6e Mon Sep 17 00:00:00 2001 From: Zack Galbreath Date: Fri, 2 Apr 2021 19:40:47 -0400 Subject: Check against a list of known-broken specs in `ci generate` (#22690) * Strip leading slash from S3 key in url_exists() * Check against a list of known-broken specs in `ci generate` --- lib/spack/docs/pipelines.rst | 9 ++++++- lib/spack/spack/ci.py | 23 ++++++++++++++++ lib/spack/spack/schema/gitlab_ci.py | 1 + lib/spack/spack/test/cmd/ci.py | 52 +++++++++++++++++++++++++++++++++++++ lib/spack/spack/test/web.py | 24 +++++++++++++++++ lib/spack/spack/util/s3.py | 5 +++- lib/spack/spack/util/web.py | 26 +++++++++---------- 7 files changed, 124 insertions(+), 16 deletions(-) diff --git a/lib/spack/docs/pipelines.rst b/lib/spack/docs/pipelines.rst index c3c86e75c9..973ec2182c 100644 --- a/lib/spack/docs/pipelines.rst +++ b/lib/spack/docs/pipelines.rst @@ -240,6 +240,13 @@ takes a boolean and determines whether the pipeline uses artifacts to store and pass along the buildcaches from one stage to the next (the default if you don't provide this option is ``False``). +The optional ``broken-specs-url`` key tells Spack to check against a list of +specs that are known to be currently broken in ``develop``. If any such specs +are found, the ``spack ci generate`` command will fail with an error message +informing the user what broken specs were encountered. This allows the pipeline +to fail early and avoid wasting compute resources attempting to build packages +that will not succeed. + The optional ``cdash`` section provides information that will be used by the ``spack ci generate`` command (invoked by ``spack ci start``) for reporting to CDash. All the jobs generated from this environment will belong to a @@ -554,7 +561,7 @@ provision of a custom ``script`` section. The reason for this is to run Now imagine you have long pipelines with many specs to be built, and you are pointing to a spack repository and branch that has a tendency to change -frequently, such as the main repo and it's ``develop`` branch. If each child +frequently, such as the main repo and its ``develop`` branch. If each child job checks out the ``develop`` branch, that could result in some jobs running with one SHA of spack, while later jobs run with another. To help avoid this issue, the pipeline generation process saves global variables called diff --git a/lib/spack/spack/ci.py b/lib/spack/spack/ci.py index ac79fb642e..4daffac44c 100644 --- a/lib/spack/spack/ci.py +++ b/lib/spack/spack/ci.py @@ -575,6 +575,13 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False, ci_mirrors = yaml_root['mirrors'] mirror_urls = [url for url in ci_mirrors.values()] + # Check for a list of "known broken" specs that we should not bother + # trying to build. + broken_specs_url = '' + known_broken_specs_encountered = [] + if 'broken-specs-url' in gitlab_ci: + broken_specs_url = gitlab_ci['broken-specs-url'] + enable_artifacts_buildcache = False if 'enable-artifacts-buildcache' in gitlab_ci: enable_artifacts_buildcache = gitlab_ci['enable-artifacts-buildcache'] @@ -665,6 +672,14 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False, pkg_name = pkg_name_from_spec_label(spec_label) release_spec = root_spec[pkg_name] + # Check if this spec is in our list of known failures. + if broken_specs_url: + full_hash = release_spec.full_hash() + broken_spec_path = url_util.join(broken_specs_url, full_hash) + if web_util.url_exists(broken_spec_path): + known_broken_specs_encountered.append('{0} ({1})'.format( + release_spec, full_hash)) + runner_attribs = find_matching_config( release_spec, gitlab_ci) @@ -1029,6 +1044,14 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False, sorted_output = {'no-specs-to-rebuild': noop_job} + if known_broken_specs_encountered: + error_msg = ( + 'Pipeline generation failed due to the presence of the ' + 'following specs that are known to be broken in develop:\n') + for broken_spec in known_broken_specs_encountered: + error_msg += '* {0}\n'.format(broken_spec) + tty.die(error_msg) + with open(output_file, 'w') as outf: outf.write(syaml.dump_config(sorted_output, default_flow_style=True)) diff --git a/lib/spack/spack/schema/gitlab_ci.py b/lib/spack/spack/schema/gitlab_ci.py index bfc8ca2647..d6d8f564a3 100644 --- a/lib/spack/spack/schema/gitlab_ci.py +++ b/lib/spack/spack/schema/gitlab_ci.py @@ -111,6 +111,7 @@ core_shared_properties = union_dicts( }, 'service-job-attributes': runner_selector_schema, 'rebuild-index': {'type': 'boolean'}, + 'broken-specs-url': {'type': 'string'}, }, ) diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py index 927c1e9ce8..d1cac9e78c 100644 --- a/lib/spack/spack/test/cmd/ci.py +++ b/lib/spack/spack/test/cmd/ci.py @@ -1404,3 +1404,55 @@ spack: # Cleanup job should be 2nd to last, just before rebuild-index assert('stage' in cleanup_job) assert(cleanup_job['stage'] == stages[-2]) + + +def test_ci_generate_read_broken_specs_url(tmpdir, mutable_mock_env_path, + env_deactivate, install_mockery, + mock_packages, monkeypatch): + """Verify that `broken-specs-url` works as intended""" + spec_a = Spec('a') + spec_a.concretize() + a_full_hash = spec_a.full_hash() + + spec_flattendeps = Spec('flatten-deps') + spec_flattendeps.concretize() + flattendeps_full_hash = spec_flattendeps.full_hash() + + # Mark 'a' as broken (but not 'flatten-deps') + broken_spec_a_path = str(tmpdir.join(a_full_hash)) + with open(broken_spec_a_path, 'w') as bsf: + bsf.write('') + + # Test that `spack ci generate` notices this broken spec and fails. + filename = str(tmpdir.join('spack.yaml')) + with open(filename, 'w') as f: + f.write("""\ +spack: + specs: + - flatten-deps + - a + mirrors: + some-mirror: https://my.fake.mirror + gitlab-ci: + broken-specs-url: "{0}" + mappings: + - match: + - archive-files + runner-attributes: + tags: + - donotcare + image: donotcare +""".format(tmpdir.strpath)) + + with tmpdir.as_cwd(): + env_cmd('create', 'test', './spack.yaml') + with ev.read('test'): + # Check output of the 'generate' subcommand + output = ci_cmd('generate', output=str, fail_on_error=False) + assert('known to be broken' in output) + + ex = '({0})'.format(a_full_hash) + assert(ex in output) + + ex = '({0})'.format(flattendeps_full_hash) + assert(ex not in output) diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py index 4d80507c52..d5f76632a4 100644 --- a/lib/spack/spack/test/web.py +++ b/lib/spack/spack/test/web.py @@ -215,6 +215,11 @@ class MockPaginator(object): return MockPages() +class MockClientError(Exception): + def __init__(self): + self.response = {'Error': {'Code': 'NoSuchKey'}} + + class MockS3Client(object): def get_paginator(self, *args, **kwargs): return MockPaginator() @@ -233,6 +238,12 @@ class MockS3Client(object): def delete_object(self, *args, **kwargs): pass + def get_object(self, Bucket=None, Key=None): + self.ClientError = MockClientError + if Bucket == 'my-bucket' and Key == 'subdirectory/my-file': + return True + raise self.ClientError + def test_remove_s3_url(monkeypatch, capfd): fake_s3_url = 's3://my-bucket/subdirectory/mirror' @@ -254,3 +265,16 @@ def test_remove_s3_url(monkeypatch, capfd): assert('Failed to delete keyone (Access Denied)' in err) assert('Deleted keythree' in err) assert('Deleted keytwo' in err) + + +def test_s3_url_exists(monkeypatch, capfd): + def mock_create_s3_session(url): + return MockS3Client() + monkeypatch.setattr( + spack.util.s3, 'create_s3_session', mock_create_s3_session) + + fake_s3_url_exists = 's3://my-bucket/subdirectory/my-file' + assert(spack.util.web.url_exists(fake_s3_url_exists)) + + fake_s3_url_does_not_exist = 's3://my-bucket/subdirectory/my-notfound-file' + assert(not spack.util.web.url_exists(fake_s3_url_does_not_exist)) diff --git a/lib/spack/spack/util/s3.py b/lib/spack/spack/util/s3.py index a82aca10c1..31a18093e1 100644 --- a/lib/spack/spack/util/s3.py +++ b/lib/spack/spack/util/s3.py @@ -22,6 +22,7 @@ def create_s3_session(url): # want to require boto as a dependency unless the user actually wants to # access S3 mirrors. from boto3 import Session + from botocore.exceptions import ClientError session = Session() @@ -41,4 +42,6 @@ def create_s3_session(url): s3_client_args["config"] = Config(signature_version=UNSIGNED) - return session.client('s3', **s3_client_args) + client = session.client('s3', **s3_client_args) + client.ClientError = ClientError + return client diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 1dac12faca..f59f3bb064 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -20,17 +20,6 @@ import six from six.moves.urllib.error import URLError from six.moves.urllib.request import urlopen, Request -if sys.version_info < (3, 0): - # Python 2 had these in the HTMLParser package. - from HTMLParser import HTMLParser, HTMLParseError # novm -else: - # In Python 3, things moved to html.parser - from html.parser import HTMLParser - - # Also, HTMLParseError is deprecated and never raised. - class HTMLParseError(Exception): - pass - from llnl.util.filesystem import mkdirp import llnl.util.tty as tty @@ -45,6 +34,16 @@ import llnl.util.lang from spack.util.compression import ALLOWED_ARCHIVE_TYPES +if sys.version_info < (3, 0): + # Python 2 had these in the HTMLParser package. + from HTMLParser import HTMLParser, HTMLParseError # novm +else: + # In Python 3, things moved to html.parser + from html.parser import HTMLParser + + # Also, HTMLParseError is deprecated and never raised. + class HTMLParseError(Exception): + pass # Timeout in seconds for web requests _timeout = 10 @@ -211,11 +210,10 @@ def url_exists(url): if url.scheme == 's3': s3 = s3_util.create_s3_session(url) - from botocore.exceptions import ClientError try: - s3.get_object(Bucket=url.netloc, Key=url.path) + s3.get_object(Bucket=url.netloc, Key=url.path.lstrip('/')) return True - except ClientError as err: + except s3.ClientError as err: if err.response['Error']['Code'] == 'NoSuchKey': return False raise err -- cgit v1.2.3-70-g09d2