diff options
author | Scott Wittenburg <scott.wittenburg@kitware.com> | 2021-02-16 18:21:18 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-16 18:21:18 -0700 |
commit | 5b0507cc6536f8b243d398f4e1b78b5eee298753 (patch) | |
tree | fe27c0916bf3117ef64c4a0453742599a8666d5d | |
parent | 1fe51ffe18f6207c2565db3a921d797d18e17bfd (diff) | |
download | spack-5b0507cc6536f8b243d398f4e1b78b5eee298753.tar.gz spack-5b0507cc6536f8b243d398f4e1b78b5eee298753.tar.bz2 spack-5b0507cc6536f8b243d398f4e1b78b5eee298753.tar.xz spack-5b0507cc6536f8b243d398f4e1b78b5eee298753.zip |
Pipelines: Temporary buildcache storage (#21474)
Before this change, in pipeline environments where runners do not have access
to persistent shared file-system storage, the only way to pass buildcaches to
dependents in later stages was by using the "enable-artifacts-buildcache" flag
in the gitlab-ci section of the spack.yaml. This change supports a second
mechanism, named "temporary-storage-url-prefix", which can be provided instead
of the "enable-artifacts-buildcache" feature, but the two cannot be used at the
same time. If this prefix is provided (only "file://" and "s3://" urls are
supported), the gitlab "CI_PIPELINE_ID" will be appended to it to create a url
for a mirror where pipeline jobs will write buildcache entries for use by jobs
in subsequent stages. If this prefix is provided, a cleanup job will be
generated to run after all the rebuild jobs have finished that will delete the
contents of the temporary mirror. To support this behavior a new mirror
sub-command has been added: "spack mirror destroy" which can take either a
mirror name or url.
This change also fixes a bug in generation of "needs" list for each job. Each
jobs "needs" list is supposed to only contain direct dependencies for scheduling
purposes, unless "enable-artifacts-buildcache" is specified. Only in that case
are the needs lists supposed to contain all transitive dependencies. This
changes fixes a bug that caused the needs lists to always contain all transitive
dependencies, regardless of whether or not "enable-artifacts-buildcache" was
specified.
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 8 | ||||
-rw-r--r-- | lib/spack/spack/ci.py | 47 | ||||
-rw-r--r-- | lib/spack/spack/cmd/ci.py | 40 | ||||
-rw-r--r-- | lib/spack/spack/cmd/mirror.py | 27 | ||||
-rw-r--r-- | lib/spack/spack/schema/gitlab_ci.py | 124 | ||||
-rw-r--r-- | lib/spack/spack/test/cmd/ci.py | 120 | ||||
-rw-r--r-- | lib/spack/spack/test/cmd/mirror.py | 39 | ||||
-rw-r--r-- | lib/spack/spack/test/web.py | 59 | ||||
-rw-r--r-- | lib/spack/spack/util/web.py | 46 | ||||
-rwxr-xr-x | share/spack/spack-completion.bash | 6 |
10 files changed, 432 insertions, 84 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 9e8ca3a204..e477c3104a 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -763,6 +763,10 @@ def generate_package_index(cache_prefix): url_util.join(cache_prefix, 'index.json.hash'), keep_original=False, extra_args={'ContentType': 'text/plain'}) + except Exception as err: + msg = 'Encountered problem pushing package index to {0}: {1}'.format( + cache_prefix, err) + tty.warn(msg) finally: shutil.rmtree(tmpdir) @@ -823,6 +827,10 @@ def generate_key_index(key_prefix, tmpdir=None): url_util.join(key_prefix, 'index.json'), keep_original=False, extra_args={'ContentType': 'application/json'}) + except Exception as err: + msg = 'Encountered problem pushing key index to {0}: {1}'.format( + key_prefix, err) + tty.warn(msg) finally: if remove_tmpdir: shutil.rmtree(tmpdir) diff --git a/lib/spack/spack/ci.py b/lib/spack/spack/ci.py index 6f48c90e15..e12699fb5c 100644 --- a/lib/spack/spack/ci.py +++ b/lib/spack/spack/ci.py @@ -42,6 +42,7 @@ JOB_RETRY_CONDITIONS = [ ] SPACK_PR_MIRRORS_ROOT_URL = 's3://spack-pr-mirrors' +TEMP_STORAGE_MIRROR_NAME = 'ci_temporary_mirror' spack_gpg = spack.main.SpackCommand('gpg') spack_compiler = spack.main.SpackCommand('compiler') @@ -396,8 +397,6 @@ def compute_spec_deps(spec_list, check_index_only=False): # root_spec = get_spec_string(spec) root_spec = spec - rkey = spec_deps_key(spec) - for s in spec.traverse(deptype=all): if s.external: tty.msg('Will not stage external pkg: {0}'.format(s)) @@ -413,8 +412,6 @@ def compute_spec_deps(spec_list, check_index_only=False): 'needs_rebuild': not up_to_date_mirrors, } - append_dep(rkey, skey) - for d in s.dependencies(deptype=all): dkey = spec_deps_key(d) if d.external: @@ -586,6 +583,10 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False, if 'rebuild-index' in gitlab_ci and gitlab_ci['rebuild-index'] is False: rebuild_index_enabled = False + temp_storage_url_prefix = None + if 'temporary-storage-url-prefix' in gitlab_ci: + temp_storage_url_prefix = gitlab_ci['temporary-storage-url-prefix'] + bootstrap_specs = [] phases = [] if 'bootstrap' in gitlab_ci: @@ -928,9 +929,30 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False, ] if job_id > 0: - if rebuild_index_enabled and not is_pr_pipeline: + if temp_storage_url_prefix: + # There were some rebuild jobs scheduled, so we will need to + # schedule a job to clean up the temporary storage location + # associated with this pipeline. + stage_names.append('cleanup-temp-storage') + cleanup_job = {} + + if service_job_config: + copy_attributes(default_attrs, + service_job_config, + cleanup_job) + + cleanup_job['stage'] = 'cleanup-temp-storage' + cleanup_job['script'] = [ + 'spack mirror destroy --mirror-url {0}/$CI_PIPELINE_ID'.format( + temp_storage_url_prefix) + ] + cleanup_job['when'] = 'always' + + output_object['cleanup'] = cleanup_job + + if rebuild_index_enabled: # Add a final job to regenerate the index - final_stage = 'stage-rebuild-index' + stage_names.append('stage-rebuild-index') final_job = {} if service_job_config: @@ -938,15 +960,18 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False, service_job_config, final_job) - final_script = 'spack buildcache update-index --keys' - final_script = '{0} -d {1}'.format(final_script, mirror_urls[0]) + index_target_mirror = mirror_urls[0] + if is_pr_pipeline: + index_target_mirror = pr_mirror_url - final_job['stage'] = final_stage - final_job['script'] = [final_script] + final_job['stage'] = 'stage-rebuild-index' + final_job['script'] = [ + 'spack buildcache update-index --keys -d {0}'.format( + index_target_mirror) + ] final_job['when'] = 'always' output_object['rebuild-index'] = final_job - stage_names.append(final_stage) output_object['stages'] = stage_names diff --git a/lib/spack/spack/cmd/ci.py b/lib/spack/spack/cmd/ci.py index 9adb7e11c7..f2151708a8 100644 --- a/lib/spack/spack/cmd/ci.py +++ b/lib/spack/spack/cmd/ci.py @@ -17,6 +17,7 @@ import spack.cmd.buildcache as buildcache import spack.environment as ev import spack.hash_types as ht import spack.util.executable as exe +import spack.util.url as url_util description = "manage continuous integration pipelines" @@ -159,6 +160,7 @@ def ci_rebuild(args): # SPACK_SIGNING_KEY ci_artifact_dir = get_env_var('CI_PROJECT_DIR') + ci_pipeline_id = get_env_var('CI_PIPELINE_ID') signing_key = get_env_var('SPACK_SIGNING_KEY') root_spec = get_env_var('SPACK_ROOT_SPEC') job_spec_pkg_name = get_env_var('SPACK_JOB_SPEC_PKG_NAME') @@ -219,19 +221,28 @@ def ci_rebuild(args): spack_is_pr_pipeline = True if pr_env_var == 'True' else False + pipeline_mirror_url = None + temp_storage_url_prefix = None + if 'temporary-storage-url-prefix' in gitlab_ci: + temp_storage_url_prefix = gitlab_ci['temporary-storage-url-prefix'] + pipeline_mirror_url = url_util.join( + temp_storage_url_prefix, ci_pipeline_id) + enable_artifacts_mirror = False - artifact_mirror_url = None if 'enable-artifacts-buildcache' in gitlab_ci: enable_artifacts_mirror = gitlab_ci['enable-artifacts-buildcache'] - if enable_artifacts_mirror or spack_is_pr_pipeline: - # If this is a PR pipeline, we will override the setting to - # make sure that artifacts buildcache is enabled. Otherwise - # jobs will not have binary deps available since we do not - # allow pushing binaries to remote mirror during PR pipelines + if (enable_artifacts_mirror or (spack_is_pr_pipeline and + not enable_artifacts_mirror and not temp_storage_url_prefix)): + # If you explicitly enabled the artifacts buildcache feature, or + # if this is a PR pipeline but you did not enable either of the + # per-pipeline temporary storage features, we force the use of + # artifacts buildcache. Otherwise jobs will not have binary + # dependencies from previous stages available since we do not + # allow pushing binaries to the remote mirror during PR pipelines. enable_artifacts_mirror = True - artifact_mirror_url = 'file://' + local_mirror_dir + pipeline_mirror_url = 'file://' + local_mirror_dir mirror_msg = 'artifact buildcache enabled, mirror url: {0}'.format( - artifact_mirror_url) + pipeline_mirror_url) tty.debug(mirror_msg) # Clean out scratch directory from last stage @@ -325,8 +336,8 @@ def ci_rebuild(args): if pr_mirror_url: add_mirror('ci_pr_mirror', pr_mirror_url) - if enable_artifacts_mirror: - add_mirror('ci_artifact_mirror', artifact_mirror_url) + if pipeline_mirror_url: + add_mirror(spack_ci.TEMP_STORAGE_MIRROR_NAME, pipeline_mirror_url) tty.debug('listing spack mirrors:') spack_cmd('mirror', 'list') @@ -423,17 +434,18 @@ def ci_rebuild(args): tty.msg('Permission problem writing to mirror') tty.msg(err_msg) - # Create another copy of that buildcache on "local artifact - # mirror" (only done if artifacts buildcache is enabled) + # Create another copy of that buildcache in the per-pipeline + # temporary storage mirror (this is only done if either artifacts + # buildcache is enabled or a temporary storage url prefix is set) spack_ci.push_mirror_contents(env, job_spec, job_spec_yaml_path, - artifact_mirror_url, cdash_build_id, + pipeline_mirror_url, cdash_build_id, sign_binaries) # Relate this build to its dependencies on CDash (if enabled) if enable_cdash: spack_ci.relate_cdash_builds( spec_map, cdash_base_url, cdash_build_id, cdash_project, - artifact_mirror_url or pr_mirror_url or remote_mirror_url) + pipeline_mirror_url or pr_mirror_url or remote_mirror_url) def ci_reindex(args): diff --git a/lib/spack/spack/cmd/mirror.py b/lib/spack/spack/cmd/mirror.py index 2f0611a2c4..45aba2441a 100644 --- a/lib/spack/spack/cmd/mirror.py +++ b/lib/spack/spack/cmd/mirror.py @@ -67,6 +67,19 @@ def setup_parser(subparser): " retrieve all versions of each package") arguments.add_common_arguments(create_parser, ['specs']) + # Destroy + destroy_parser = sp.add_parser('destroy', help=mirror_destroy.__doc__) + + destroy_target = destroy_parser.add_mutually_exclusive_group(required=True) + destroy_target.add_argument('-m', '--mirror-name', + metavar='mirror_name', + type=str, + help="find mirror to destroy by name") + destroy_target.add_argument('--mirror-url', + metavar='mirror_url', + type=str, + help="find mirror to destroy by url") + # used to construct scope arguments below scopes = spack.config.scopes() scopes_metavar = spack.config.scopes_metavar @@ -360,8 +373,22 @@ def mirror_create(args): sys.exit(1) +def mirror_destroy(args): + """Given a url, recursively delete everything under it.""" + mirror_url = None + + if args.mirror_name: + result = spack.mirror.MirrorCollection().lookup(args.mirror_name) + mirror_url = result.push_url + elif args.mirror_url: + mirror_url = args.mirror_url + + web_util.remove_url(mirror_url, recursive=True) + + def mirror(parser, args): action = {'create': mirror_create, + 'destroy': mirror_destroy, 'add': mirror_add, 'remove': mirror_remove, 'rm': mirror_remove, diff --git a/lib/spack/spack/schema/gitlab_ci.py b/lib/spack/spack/schema/gitlab_ci.py index 31af841cca..bfc8ca2647 100644 --- a/lib/spack/spack/schema/gitlab_ci.py +++ b/lib/spack/spack/schema/gitlab_ci.py @@ -65,66 +65,90 @@ runner_selector_schema = { 'properties': runner_attributes_schema_items, } -#: Properties for inclusion in other schemas -properties = { - 'gitlab-ci': { - 'type': 'object', - 'additionalProperties': False, - 'required': ['mappings'], - 'patternProperties': union_dicts( - runner_attributes_schema_items, - { - 'bootstrap': { - 'type': 'array', - 'items': { - 'anyOf': [ - { - 'type': 'string', - }, { - 'type': 'object', - 'additionalProperties': False, - 'required': ['name'], - 'properties': { - 'name': { - 'type': 'string', - }, - 'compiler-agnostic': { - 'type': 'boolean', - 'default': False, - }, - }, - }, - ], - }, - }, - 'mappings': { - 'type': 'array', - 'items': { + +core_shared_properties = union_dicts( + runner_attributes_schema_items, + { + 'bootstrap': { + 'type': 'array', + 'items': { + 'anyOf': [ + { + 'type': 'string', + }, { 'type': 'object', 'additionalProperties': False, - 'required': ['match'], + 'required': ['name'], 'properties': { - 'match': { - 'type': 'array', - 'items': { - 'type': 'string', - }, + 'name': { + 'type': 'string', + }, + 'compiler-agnostic': { + 'type': 'boolean', + 'default': False, }, - 'runner-attributes': runner_selector_schema, }, }, + ], + }, + }, + 'mappings': { + 'type': 'array', + 'items': { + 'type': 'object', + 'additionalProperties': False, + 'required': ['match'], + 'properties': { + 'match': { + 'type': 'array', + 'items': { + 'type': 'string', + }, + }, + 'runner-attributes': runner_selector_schema, }, - 'enable-artifacts-buildcache': { - 'type': 'boolean', - 'default': False, - }, - 'service-job-attributes': runner_selector_schema, - 'rebuild-index': {'type': 'boolean'}, - } - ), + }, + }, + 'service-job-attributes': runner_selector_schema, + 'rebuild-index': {'type': 'boolean'}, }, +) + +gitlab_ci_properties = { + 'anyOf': [ + { + 'type': 'object', + 'additionalProperties': False, + 'required': ['mappings'], + 'properties': union_dicts( + core_shared_properties, + { + 'enable-artifacts-buildcache': { + 'type': 'boolean', + }, + }, + ), + }, + { + 'type': 'object', + 'additionalProperties': False, + 'required': ['mappings'], + 'properties': union_dicts( + core_shared_properties, + { + 'temporary-storage-url-prefix': { + 'type': 'string', + }, + }, + ), + }, + ] } +#: Properties for inclusion in other schemas +properties = { + 'gitlab-ci': gitlab_ci_properties, +} #: Full schema with metadata schema = { diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py index 69e4aa7410..3be182c7e5 100644 --- a/lib/spack/spack/test/cmd/ci.py +++ b/lib/spack/spack/test/cmd/ci.py @@ -7,7 +7,7 @@ import filecmp import json import os import pytest -from jsonschema import validate +from jsonschema import validate, ValidationError import spack import spack.ci as ci @@ -20,6 +20,7 @@ import spack.paths as spack_paths import spack.repo as repo from spack.schema.buildcache_spec import schema as spec_yaml_schema from spack.schema.database_index import schema as db_idx_schema +from spack.schema.gitlab_ci import schema as gitlab_ci_schema from spack.spec import Spec, CompilerSpec from spack.util.mock_package import MockPackageMultiRepo import spack.util.executable as exe @@ -185,10 +186,18 @@ def _validate_needs_graph(yaml_contents, needs_graph, artifacts): for needs_def_name, needs_list in needs_graph.items(): if job_name.startswith(needs_def_name): # check job needs against the expected needs definition + j_needs = job_def['needs'] + print('job {0} needs:'.format(needs_def_name)) + print([j['job'] for j in j_needs]) + print('expected:') + print([nl for nl in needs_list]) assert all([job_needs['job'][:job_needs['job'].index('/')] - in needs_list for job_needs in job_def['needs']]) + in needs_list for job_needs in j_needs]) + assert(all([nl in + [n['job'][:n['job'].index('/')] for n in j_needs] + for nl in needs_list])) assert all([job_needs['artifacts'] == artifacts - for job_needs in job_def['needs']]) + for job_needs in j_needs]) break @@ -378,7 +387,10 @@ spack: fake_token = 'notreallyatokenbutshouldnotmatter' os.environ['SPACK_CDASH_AUTH_TOKEN'] = fake_token copy_to_file = str(tmpdir.join('backup-ci.yml')) - output = ci_cmd('generate', '--copy-to', copy_to_file, output=str) + try: + output = ci_cmd('generate', '--copy-to', copy_to_file, output=str) + finally: + del os.environ['SPACK_CDASH_AUTH_TOKEN'] # That fake token should still have resulted in being unable to # register build group with cdash, but the workload should # still have been generated. @@ -582,7 +594,11 @@ spack: os.environ['SPACK_PR_BRANCH'] = 'fake-test-branch' monkeypatch.setattr( ci, 'SPACK_PR_MIRRORS_ROOT_URL', r"file:///fake/mirror") - ci_cmd('generate', '--output-file', outputfile) + try: + ci_cmd('generate', '--output-file', outputfile) + finally: + del os.environ['SPACK_IS_PR_PIPELINE'] + del os.environ['SPACK_PR_BRANCH'] with open(outputfile) as f: contents = f.read() @@ -1271,3 +1287,97 @@ spack: output = ci_cmd('rebuild-index', output=str, fail_on_error=False) ex = 'spack ci rebuild-index requires an env containing a mirror' assert(ex in output) + + +def test_ensure_only_one_temporary_storage(): + """Make sure 'gitlab-ci' section of env does not allow specification of + both 'enable-artifacts-buildcache' and 'temporary-storage-url-prefix'.""" + gitlab_ci_template = """ + gitlab-ci: + {0} + mappings: + - match: + - notcheckedhere + runner-attributes: + tags: + - donotcare +""" + + enable_artifacts = 'enable-artifacts-buildcache: True' + temp_storage = 'temporary-storage-url-prefix: file:///temp/mirror' + specify_both = """{0} + {1} +""".format(enable_artifacts, temp_storage) + specify_neither = '' + + # User can specify "enable-artifacts-buildcache" (boolean) + yaml_obj = syaml.load(gitlab_ci_template.format(enable_artifacts)) + validate(yaml_obj, gitlab_ci_schema) + + # User can also specify "temporary-storage-url-prefix" (string) + yaml_obj = syaml.load(gitlab_ci_template.format(temp_storage)) + validate(yaml_obj, gitlab_ci_schema) + + # However, specifying both should fail to validate + yaml_obj = syaml.load(gitlab_ci_template.format(specify_both)) + with pytest.raises(ValidationError): + validate(yaml_obj, gitlab_ci_schema) + + # Specifying neither should be fine too, as neither of these properties + # should be required + yaml_obj = syaml.load(gitlab_ci_template.format(specify_neither)) + validate(yaml_obj, gitlab_ci_schema) + + +def test_ci_generate_temp_storage_url(tmpdir, mutable_mock_env_path, + env_deactivate, install_mockery, + mock_packages, monkeypatch): + """Verify correct behavior when using temporary-storage-url-prefix""" + filename = str(tmpdir.join('spack.yaml')) + with open(filename, 'w') as f: + f.write("""\ +spack: + specs: + - archive-files + mirrors: + some-mirror: https://my.fake.mirror + gitlab-ci: + temporary-storage-url-prefix: file:///work/temp/mirror + mappings: + - match: + - archive-files + runner-attributes: + tags: + - donotcare + image: donotcare +""") + + with tmpdir.as_cwd(): + env_cmd('create', 'test', './spack.yaml') + outputfile = str(tmpdir.join('.gitlab-ci.yml')) + + monkeypatch.setattr( + ci, 'SPACK_PR_MIRRORS_ROOT_URL', r"file:///fake/mirror") + + with ev.read('test'): + ci_cmd('generate', '--output-file', outputfile) + + with open(outputfile) as of: + pipeline_doc = syaml.load(of.read()) + + print(pipeline_doc) + + assert('cleanup' in pipeline_doc) + cleanup_job = pipeline_doc['cleanup'] + + assert('script' in cleanup_job) + cleanup_task = cleanup_job['script'][0] + + assert(cleanup_task.startswith('spack mirror destroy')) + + assert('stages' in pipeline_doc) + stages = pipeline_doc['stages'] + + # Cleanup job should be 2nd to last, just before rebuild-index + assert('stage' in cleanup_job) + assert(cleanup_job['stage'] == stages[-2]) diff --git a/lib/spack/spack/test/cmd/mirror.py b/lib/spack/spack/test/cmd/mirror.py index 6540ab7d37..b72487e5ab 100644 --- a/lib/spack/spack/test/cmd/mirror.py +++ b/lib/spack/spack/test/cmd/mirror.py @@ -14,6 +14,9 @@ mirror = SpackCommand('mirror') env = SpackCommand('env') add = SpackCommand('add') concretize = SpackCommand('concretize') +install = SpackCommand('install') +buildcache = SpackCommand('buildcache') +uninstall = SpackCommand('uninstall') @pytest.fixture @@ -183,3 +186,39 @@ def test_mirror_name_collision(tmp_scope): with pytest.raises(SpackCommandError): mirror('add', '--scope', tmp_scope, 'first', '1') + + +def test_mirror_destroy(install_mockery_mutable_config, + mock_packages, mock_fetch, mock_archive, + mutable_config, monkeypatch, tmpdir): + # Create a temp mirror directory for buildcache usage + mirror_dir = tmpdir.join('mirror_dir') + mirror_url = 'file://{0}'.format(mirror_dir.strpath) + mirror('add', 'atest', mirror_url) + + spec_name = 'libdwarf' + + # Put a binary package in a buildcache + install('--no-cache', spec_name) + buildcache('create', '-u', '-a', '-f', '-d', mirror_dir.strpath, spec_name) + + contents = os.listdir(mirror_dir.strpath) + assert('build_cache' in contents) + + # Destroy mirror by name + mirror('destroy', '-m', 'atest') + + assert(not os.path.exists(mirror_dir.strpath)) + + buildcache('create', '-u', '-a', '-f', '-d', mirror_dir.strpath, spec_name) + + contents = os.listdir(mirror_dir.strpath) + assert('build_cache' in contents) + + # Destroy mirror by url + mirror('destroy', '--mirror-url', mirror_url) + + assert(not os.path.exists(mirror_dir.strpath)) + + uninstall('-y', spec_name) + mirror('remove', 'atest') diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py index abe3e6de80..4d80507c52 100644 --- a/lib/spack/spack/test/web.py +++ b/lib/spack/spack/test/web.py @@ -6,10 +6,14 @@ import os import ordereddict_backport import pytest +import spack.config import spack.paths import spack.util.web +import spack.util.s3 from spack.version import ver +import llnl.util.tty as tty + def _create_url(relative_url): web_data_path = os.path.join(spack.paths.test_path, 'data', 'web') @@ -195,3 +199,58 @@ def test_list_url(tmpdir): 'file-0.txt', 'file-1.txt', 'file-2.txt'] + + +class MockPages(object): + def search(self, *args, **kwargs): + return [ + {'Key': 'keyone'}, + {'Key': 'keytwo'}, + {'Key': 'keythree'}, + ] + + +class MockPaginator(object): + def paginate(self, *args, **kwargs): + return MockPages() + + +class MockS3Client(object): + def get_paginator(self, *args, **kwargs): + return MockPaginator() + + def delete_objects(self, *args, **kwargs): + return { + 'Errors': [ + {'Key': 'keyone', 'Message': 'Access Denied'} + ], + 'Deleted': [ + {'Key': 'keytwo'}, + {'Key': 'keythree'} + ], + } + + def delete_object(self, *args, **kwargs): + pass + + +def test_remove_s3_url(monkeypatch, capfd): + fake_s3_url = 's3://my-bucket/subdirectory/mirror' + + def mock_create_s3_session(url): + return MockS3Client() + + monkeypatch.setattr( + spack.util.s3, 'create_s3_session', mock_create_s3_session) + + current_debug_level = tty.debug_level() + tty.set_debug(1) + + spack.util.web.remove_url(fake_s3_url, recursive=True) + err = capfd.readouterr()[1] + + tty.set_debug(current_debug_level) + + assert('Failed to delete keyone (Access Denied)' in err) + assert('Deleted keythree' in err) + assert('Deleted keytwo' in err) diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 755344b379..1dac12faca 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -229,17 +229,57 @@ def url_exists(url): return False -def remove_url(url): +def _debug_print_delete_results(result): + if 'Deleted' in result: + for d in result['Deleted']: + tty.debug('Deleted {0}'.format(d['Key'])) + if 'Errors' in result: + for e in result['Errors']: + tty.debug('Failed to delete {0} ({1})'.format( + e['Key'], e['Message'])) + + +def remove_url(url, recursive=False): url = url_util.parse(url) local_path = url_util.local_file_path(url) if local_path: - os.remove(local_path) + if recursive: + shutil.rmtree(local_path) + else: + os.remove(local_path) return if url.scheme == 's3': s3 = s3_util.create_s3_session(url) - s3.delete_object(Bucket=url.netloc, Key=url.path) + bucket = url.netloc + if recursive: + # Because list_objects_v2 can only return up to 1000 items + # at a time, we have to paginate to make sure we get it all + prefix = url.path.strip('/') + paginator = s3.get_paginator('list_objects_v2') + pages = paginator.paginate(Bucket=bucket, Prefix=prefix) + + delete_request = {'Objects': []} + for item in pages.search('Contents'): + if not item: + continue + + delete_request['Objects'].append({'Key': item['Key']}) + + # Make sure we do not try to hit S3 with a list of more + # than 1000 items + if len(delete_request['Objects']) >= 1000: + r = s3.delete_objects(Bucket=bucket, Delete=delete_request) + _debug_print_delete_results(r) + delete_request = {'Objects': []} + + # Delete any items that remain + if len(delete_request['Objects']): + r = s3.delete_objects(Bucket=bucket, Delete=delete_request) + _debug_print_delete_results(r) + else: + s3.delete_object(Bucket=bucket, Key=url.path) return # Don't even try for other URL schemes. diff --git a/share/spack/spack-completion.bash b/share/spack/spack-completion.bash index cf0e79cdac..62829d474a 100755 --- a/share/spack/spack-completion.bash +++ b/share/spack/spack-completion.bash @@ -1123,7 +1123,7 @@ _spack_mirror() { then SPACK_COMPREPLY="-h --help -n --no-checksum --deprecated" else - SPACK_COMPREPLY="create add remove rm set-url list" + SPACK_COMPREPLY="create destroy add remove rm set-url list" fi } @@ -1136,6 +1136,10 @@ _spack_mirror_create() { fi } +_spack_mirror_destroy() { + SPACK_COMPREPLY="-h --help -m --mirror-name --mirror-url" +} + _spack_mirror_add() { if $list_options then |