From 0592c580302ca8edf6f404d046d2e670a69bba6e Mon Sep 17 00:00:00 2001 From: Omar Padron Date: Mon, 9 Dec 2019 17:23:33 -0500 Subject: Follow up/11117 fixes and testing (#13607) * fix docstring in generate_package_index() refering to "public" keys as "signing" keys * use explicit kwargs in push_to_url() * simplify url_util.parse() per tgamblin's suggestion * replace standardize_header_names() with the much simpler get_header() * add some basic tests * update s3_fetch tests * update S3 list code to strip leading slashes from prefix * correct minor warning regression introduced in #11117 * add more tests * flake8 fixes * add capsys fixture to mirror_crud test * add get_header() tests * use get_header() in more places * incorporate review comments --- lib/spack/spack/binary_distribution.py | 4 +- lib/spack/spack/fetch_strategy.py | 2 +- lib/spack/spack/s3_handler.py | 4 +- lib/spack/spack/test/build_distribution.py | 41 +++++++ lib/spack/spack/test/cmd/mirror.py | 67 +++++++++- lib/spack/spack/test/fetch_strategy.py | 17 +++ lib/spack/spack/test/s3_fetch.py | 29 +++++ lib/spack/spack/test/web.py | 72 +++++++++-- lib/spack/spack/util/url.py | 4 +- lib/spack/spack/util/web.py | 190 ++++++++--------------------- 10 files changed, 272 insertions(+), 158 deletions(-) create mode 100644 lib/spack/spack/test/build_distribution.py create mode 100644 lib/spack/spack/test/fetch_strategy.py create mode 100644 lib/spack/spack/test/s3_fetch.py (limited to 'lib') diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 3b10cca180..6d9d45e12a 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -272,7 +272,7 @@ def generate_package_index(cache_prefix): Creates (or replaces) the "index.html" page at the location given in cache_prefix. This page contains a link for each binary package (*.yaml) - and signing key (*.key) under cache_prefix. + and public key (*.key) under cache_prefix. """ tmpdir = tempfile.mkdtemp() try: @@ -679,7 +679,7 @@ def get_specs(force=False): return _cached_specs if not spack.mirror.MirrorCollection(): - tty.warn("No Spack mirrors are currently configured") + tty.debug("No Spack mirrors are currently configured") return {} urls = set() diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 13958fbdae..1f94973f22 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -1142,7 +1142,7 @@ class S3FetchStrategy(URLFetchStrategy): with open(basename, 'wb') as f: shutil.copyfileobj(stream, f) - content_type = headers['Content-type'] + content_type = web_util.get_header(headers, 'Content-type') if content_type == 'text/html': warn_content_type_mismatch(self.archive_file or "the archive") diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py index 2a54b9ecb1..ea7f0673ff 100644 --- a/lib/spack/spack/s3_handler.py +++ b/lib/spack/spack/s3_handler.py @@ -11,7 +11,6 @@ import six.moves.urllib.error as urllib_error import spack.util.s3 as s3_util import spack.util.url as url_util -import spack.util.web as web_util # NOTE(opadron): Workaround issue in boto where its StreamingBody @@ -54,8 +53,7 @@ def _s3_open(url): # NOTE(opadron): Apply workaround here (see above) stream = WrapStream(obj['Body']) - headers = web_util.standardize_header_names( - obj['ResponseMetadata']['HTTPHeaders']) + headers = obj['ResponseMetadata']['HTTPHeaders'] return url, headers, stream diff --git a/lib/spack/spack/test/build_distribution.py b/lib/spack/spack/test/build_distribution.py new file mode 100644 index 0000000000..9d127ddf45 --- /dev/null +++ b/lib/spack/spack/test/build_distribution.py @@ -0,0 +1,41 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import pytest + +import os +import os.path + +import spack.spec +import spack.binary_distribution + +install = spack.main.SpackCommand('install') + + +def test_build_tarball_overwrite( + install_mockery, mock_fetch, monkeypatch, tmpdir): + + with tmpdir.as_cwd(): + spec = spack.spec.Spec('trivial-install-test-package').concretized() + install(str(spec)) + + # Runs fine the first time, throws the second time + spack.binary_distribution.build_tarball(spec, '.', unsigned=True) + with pytest.raises(spack.binary_distribution.NoOverwriteException): + spack.binary_distribution.build_tarball(spec, '.', unsigned=True) + + # Should work fine with force=True + spack.binary_distribution.build_tarball( + spec, '.', force=True, unsigned=True) + + # Remove the tarball and try again. + # This must *also* throw, because of the existing .spec.yaml file + os.remove(os.path.join( + spack.binary_distribution.build_cache_prefix('.'), + spack.binary_distribution.tarball_directory_name(spec), + spack.binary_distribution.tarball_name(spec, '.spack'))) + + with pytest.raises(spack.binary_distribution.NoOverwriteException): + spack.binary_distribution.build_tarball(spec, '.', unsigned=True) diff --git a/lib/spack/spack/test/cmd/mirror.py b/lib/spack/spack/test/cmd/mirror.py index 889d81f98b..f29e135d82 100644 --- a/lib/spack/spack/test/cmd/mirror.py +++ b/lib/spack/spack/test/cmd/mirror.py @@ -6,7 +6,7 @@ import pytest import os -from spack.main import SpackCommand +from spack.main import SpackCommand, SpackCommandError import spack.environment as ev import spack.config @@ -16,6 +16,25 @@ add = SpackCommand('add') concretize = SpackCommand('concretize') +@pytest.fixture +def tmp_scope(): + """Creates a temporary configuration scope""" + + base_name = 'internal-testing-scope' + current_overrides = set( + x.name for x in + spack.config.config.matching_scopes(r'^{0}'.format(base_name))) + + num_overrides = 0 + scope_name = base_name + while scope_name in current_overrides: + scope_name = '{0}{1}'.format(base_name, num_overrides) + num_overrides += 1 + + with spack.config.override(spack.config.InternalConfigScope(scope_name)): + yield scope_name + + @pytest.mark.disable_clean_stage_check @pytest.mark.regression('8083') def test_regression_8083(tmpdir, capfd, mock_packages, mock_fetch, config): @@ -45,3 +64,49 @@ def test_mirror_from_env(tmpdir, mock_packages, mock_fetch, config, mirror_res = os.listdir(os.path.join(mirror_dir, spec.name)) expected = ['%s.tar.gz' % spec.format('{name}-{version}')] assert mirror_res == expected + + +def test_mirror_crud(tmp_scope, capsys): + with capsys.disabled(): + mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io') + + output = mirror('remove', '--scope', tmp_scope, 'mirror') + assert 'Removed mirror' in output + + mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io') + + # no-op + output = mirror('set-url', '--scope', tmp_scope, + 'mirror', 'http://spack.io') + assert 'Url already set' in output + + output = mirror('set-url', '--scope', tmp_scope, + '--push', 'mirror', 's3://spack-public') + assert 'Changed (push) url' in output + + # no-op + output = mirror('set-url', '--scope', tmp_scope, + '--push', 'mirror', 's3://spack-public') + assert 'Url already set' in output + + output = mirror('remove', '--scope', tmp_scope, 'mirror') + assert 'Removed mirror' in output + + output = mirror('list', '--scope', tmp_scope) + assert 'No mirrors configured' in output + + +def test_mirror_nonexisting(tmp_scope): + with pytest.raises(SpackCommandError): + mirror('remove', '--scope', tmp_scope, 'not-a-mirror') + + with pytest.raises(SpackCommandError): + mirror('set-url', '--scope', tmp_scope, + 'not-a-mirror', 'http://spack.io') + + +def test_mirror_name_collision(tmp_scope): + mirror('add', '--scope', tmp_scope, 'first', '1') + + with pytest.raises(SpackCommandError): + mirror('add', '--scope', tmp_scope, 'first', '1') diff --git a/lib/spack/spack/test/fetch_strategy.py b/lib/spack/spack/test/fetch_strategy.py new file mode 100644 index 0000000000..ab1aa35408 --- /dev/null +++ b/lib/spack/spack/test/fetch_strategy.py @@ -0,0 +1,17 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import pytest + +from spack.fetch_strategy import from_url_scheme + + +def test_fetchstrategy_bad_url_scheme(): + """Ensure that trying to make a fetch strategy from a URL with an + unsupported scheme fails as expected.""" + + with pytest.raises(ValueError): + fetcher = from_url_scheme( # noqa: F841 + 'bogus-scheme://example.com/a/b/c') diff --git a/lib/spack/spack/test/s3_fetch.py b/lib/spack/spack/test/s3_fetch.py new file mode 100644 index 0000000000..d904417ed0 --- /dev/null +++ b/lib/spack/spack/test/s3_fetch.py @@ -0,0 +1,29 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import pytest + +import spack.fetch_strategy as spack_fs +import spack.stage as spack_stage + + +def test_s3fetchstrategy_sans_url(): + """Ensure constructor with no URL fails.""" + with pytest.raises(ValueError): + spack_fs.S3FetchStrategy(None) + + +def test_s3fetchstrategy_bad_url(tmpdir): + """Ensure fetch with bad URL fails as expected.""" + testpath = str(tmpdir) + + fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist') + assert fetcher is not None + + with spack_stage.Stage(fetcher, path=testpath) as stage: + assert stage is not None + assert fetcher.archive_file is None + with pytest.raises(spack_fs.FetchError): + fetcher.fetch() diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py index c80e29b523..75f5930230 100644 --- a/lib/spack/spack/test/web.py +++ b/lib/spack/spack/test/web.py @@ -5,9 +5,12 @@ """Tests for web.py.""" import os +import pytest + +from ordereddict_backport import OrderedDict import spack.paths -from spack.util.web import spider, find_versions_of_archive +import spack.util.web as web_util from spack.version import ver @@ -23,7 +26,7 @@ page_4 = 'file://' + os.path.join(web_data_path, '4.html') def test_spider_0(): - pages, links = spider(root, depth=0) + pages, links = web_util.spider(root, depth=0) assert root in pages assert page_1 not in pages @@ -41,7 +44,7 @@ def test_spider_0(): def test_spider_1(): - pages, links = spider(root, depth=1) + pages, links = web_util.spider(root, depth=1) assert root in pages assert page_1 in pages @@ -60,7 +63,7 @@ def test_spider_1(): def test_spider_2(): - pages, links = spider(root, depth=2) + pages, links = web_util.spider(root, depth=2) assert root in pages assert page_1 in pages @@ -81,7 +84,7 @@ def test_spider_2(): def test_spider_3(): - pages, links = spider(root, depth=3) + pages, links = web_util.spider(root, depth=3) assert root in pages assert page_1 in pages @@ -104,31 +107,36 @@ def test_spider_3(): def test_find_versions_of_archive_0(): - versions = find_versions_of_archive(root_tarball, root, list_depth=0) + versions = web_util.find_versions_of_archive( + root_tarball, root, list_depth=0) assert ver('0.0.0') in versions def test_find_versions_of_archive_1(): - versions = find_versions_of_archive(root_tarball, root, list_depth=1) + versions = web_util.find_versions_of_archive( + root_tarball, root, list_depth=1) assert ver('0.0.0') in versions assert ver('1.0.0') in versions def test_find_versions_of_archive_2(): - versions = find_versions_of_archive(root_tarball, root, list_depth=2) + versions = web_util.find_versions_of_archive( + root_tarball, root, list_depth=2) assert ver('0.0.0') in versions assert ver('1.0.0') in versions assert ver('2.0.0') in versions def test_find_exotic_versions_of_archive_2(): - versions = find_versions_of_archive(root_tarball, root, list_depth=2) + versions = web_util.find_versions_of_archive( + root_tarball, root, list_depth=2) # up for grabs to make this better. assert ver('2.0.0b2') in versions def test_find_versions_of_archive_3(): - versions = find_versions_of_archive(root_tarball, root, list_depth=3) + versions = web_util.find_versions_of_archive( + root_tarball, root, list_depth=3) assert ver('0.0.0') in versions assert ver('1.0.0') in versions assert ver('2.0.0') in versions @@ -137,7 +145,49 @@ def test_find_versions_of_archive_3(): def test_find_exotic_versions_of_archive_3(): - versions = find_versions_of_archive(root_tarball, root, list_depth=3) + versions = web_util.find_versions_of_archive( + root_tarball, root, list_depth=3) assert ver('2.0.0b2') in versions assert ver('3.0a1') in versions assert ver('4.5-rc5') in versions + + +def test_get_header(): + headers = { + 'Content-type': 'text/plain' + } + + # looking up headers should just work like a plain dict + # lookup when there is an entry with the right key + assert(web_util.get_header(headers, 'Content-type') == 'text/plain') + + # looking up headers should still work if there is a fuzzy match + assert(web_util.get_header(headers, 'contentType') == 'text/plain') + + # ...unless there is an exact match for the "fuzzy" spelling. + headers['contentType'] = 'text/html' + assert(web_util.get_header(headers, 'contentType') == 'text/html') + + # If lookup has to fallback to fuzzy matching and there are more than one + # fuzzy match, the result depends on the internal ordering of the given + # mapping + headers = OrderedDict() + headers['Content-type'] = 'text/plain' + headers['contentType'] = 'text/html' + + assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain') + del headers['Content-type'] + assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html') + + # Same as above, but different ordering + headers = OrderedDict() + headers['contentType'] = 'text/html' + headers['Content-type'] = 'text/plain' + + assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html') + del headers['contentType'] + assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain') + + # If there isn't even a fuzzy match, raise KeyError + with pytest.raises(KeyError): + web_util.get_header(headers, 'ContentLength') diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py index 7ac12e7b81..29beb88ff9 100644 --- a/lib/spack/spack/util/url.py +++ b/lib/spack/spack/util/url.py @@ -9,6 +9,7 @@ Utility functions for parsing, formatting, and manipulating URLs. import itertools import os.path +import re from six import string_types import six.moves.urllib.parse as urllib_parse @@ -69,8 +70,7 @@ def parse(url, scheme='file'): if scheme == 'file': path = spack.util.path.canonicalize_path(netloc + path) - while path.startswith('//'): - path = path[1:] + path = re.sub(r'^/+', '/', path) netloc = '' return urllib_parse.ParseResult(scheme=scheme, diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 1fe58d6415..5c76deda2b 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -15,9 +15,6 @@ import ssl import sys import traceback -from itertools import product - -import six from six.moves.urllib.request import urlopen, Request from six.moves.urllib.error import URLError import multiprocessing.pool @@ -50,30 +47,6 @@ from spack.util.compression import ALLOWED_ARCHIVE_TYPES # Timeout in seconds for web requests _timeout = 10 -# See docstring for standardize_header_names() -_separators = ('', ' ', '_', '-') -HTTP_HEADER_NAME_ALIASES = { - "Accept-ranges": set( - ''.join((A, 'ccept', sep, R, 'anges')) - for A, sep, R in product('Aa', _separators, 'Rr')), - - "Content-length": set( - ''.join((C, 'ontent', sep, L, 'ength')) - for C, sep, L in product('Cc', _separators, 'Ll')), - - "Content-type": set( - ''.join((C, 'ontent', sep, T, 'ype')) - for C, sep, T in product('Cc', _separators, 'Tt')), - - "Date": set(('Date', 'date')), - - "Last-modified": set( - ''.join((L, 'ast', sep, M, 'odified')) - for L, sep, M in product('Ll', _separators, 'Mm')), - - "Server": set(('Server', 'server')) -} - class LinkParser(HTMLParser): """This parser just takes an HTML page and strips out the hrefs on the @@ -173,7 +146,7 @@ def read_from_url(url, accept_content_type=None): req.get_method = lambda: "HEAD" resp = _urlopen(req, timeout=_timeout, context=context) - content_type = resp.headers.get('Content-type') + content_type = get_header(resp.headers, 'Content-type') # Do the real GET request when we know it's just HTML. req.get_method = lambda: "GET" @@ -185,7 +158,7 @@ def read_from_url(url, accept_content_type=None): ERROR=str(err))) if accept_content_type and not is_web_url: - content_type = response.headers.get('Content-type') + content_type = get_header(response.headers, 'Content-type') reject_content_type = ( accept_content_type and ( @@ -208,9 +181,8 @@ def warn_no_ssl_cert_checking(): "your Python to enable certificate verification.") -def push_to_url(local_file_path, remote_path, **kwargs): - keep_original = kwargs.get('keep_original', True) - +def push_to_url( + local_file_path, remote_path, keep_original=True, extra_args=None): remote_url = url_util.parse(remote_path) verify_ssl = spack.config.get('config:verify_ssl') @@ -235,7 +207,8 @@ def push_to_url(local_file_path, remote_path, **kwargs): os.remove(local_file_path) elif remote_url.scheme == 's3': - extra_args = kwargs.get('extra_args', {}) + if extra_args is None: + extra_args = {} remote_path = remote_url.path while remote_path.startswith('/'): @@ -296,10 +269,25 @@ def remove_url(url): # Don't even try for other URL schemes. -def _list_s3_objects(client, url, num_entries, start_after=None): +def _iter_s3_contents(contents, prefix): + for entry in contents: + key = entry['Key'] + + if not key.startswith('/'): + key = '/' + key + + key = os.path.relpath(key, prefix) + + if key == '.': + continue + + yield key + + +def _list_s3_objects(client, bucket, prefix, num_entries, start_after=None): list_args = dict( - Bucket=url.netloc, - Prefix=url.path, + Bucket=bucket, + Prefix=prefix[1:], MaxKeys=num_entries) if start_after is not None: @@ -311,21 +299,19 @@ def _list_s3_objects(client, url, num_entries, start_after=None): if result['IsTruncated']: last_key = result['Contents'][-1]['Key'] - iter = (key for key in - ( - os.path.relpath(entry['Key'], url.path) - for entry in result['Contents'] - ) - if key != '.') + iter = _iter_s3_contents(result['Contents'], prefix) return iter, last_key def _iter_s3_prefix(client, url, num_entries=1024): key = None + bucket = url.netloc + prefix = re.sub(r'^/*', '/', url.path) + while True: contents, key = _list_s3_objects( - client, url, num_entries, start_after=key) + client, bucket, prefix, num_entries, start_after=key) for x in contents: yield x @@ -577,106 +563,34 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0): return versions -def standardize_header_names(headers): - """Replace certain header names with standardized spellings. - - Standardizes the spellings of the following header names: - - Accept-ranges - - Content-length - - Content-type - - Date - - Last-modified - - Server - - Every name considered is translated to one of the above names if the only - difference between the two is how the first letters of each word are - capitalized; whether words are separated; or, if separated, whether they - are so by a dash (-), underscore (_), or space ( ). Header names that - cannot be mapped as described above are returned unaltered. - - For example: The standard spelling of "Content-length" would be substituted - for any of the following names: - - Content-length - - content_length - - contentlength - - content_Length - - contentLength - - content Length - - ... and any other header name, such as "Content-encoding", would not be - altered, regardless of spelling. - - If headers is a string, then it (or an appropriate substitute) is returned. - - If headers is a non-empty tuple, headers[0] is a string, and there exists a - standardized spelling for header[0] that differs from it, then a new tuple - is returned. This tuple has the same elements as headers, except the first - element is the standardized spelling for headers[0]. - - If headers is a sequence, then a new list is considered, where each element - is its corresponding element in headers, but mapped as above if a string or - tuple. This new list is returned if at least one of its elements differ - from their corrsponding element in headers. - - If headers is a mapping, then a new dict is considered, where the key in - each item is the key of its corresponding item in headers, mapped as above - if a string or tuple. The value is taken from the corresponding item. If - the keys of multiple items in headers map to the same key after being - standardized, then the value for the resulting item is undefined. The new - dict is returned if at least one of its items has a key that differs from - that of their corresponding item in headers, or if the keys of multiple - items in headers map to the same key after being standardized. - - In all other cases headers is returned unaltered. - """ - if isinstance(headers, six.string_types): - for standardized_spelling, other_spellings in ( - HTTP_HEADER_NAME_ALIASES.items()): - if headers in other_spellings: - if headers == standardized_spelling: - return headers - return standardized_spelling - return headers - - if isinstance(headers, tuple): - if not headers: - return headers - old = headers[0] - if isinstance(old, six.string_types): - new = standardize_header_names(old) - if old is not new: - return (new,) + headers[1:] - return headers - - try: - changed = False - new_dict = {} - for key, value in headers.items(): - if isinstance(key, (tuple, six.string_types)): - old_key, key = key, standardize_header_names(key) - changed = changed or key is not old_key - - new_dict[key] = value +def get_header(headers, header_name): + """Looks up a dict of headers for the given header value. - return new_dict if changed else headers - except (AttributeError, TypeError, ValueError): - pass + Looks up a dict of headers, [headers], for a header value given by + [header_name]. Returns headers[header_name] if header_name is in headers. + Otherwise, the first fuzzy match is returned, if any. - try: - changed = False - new_list = [] - for item in headers: - if isinstance(item, (tuple, six.string_types)): - old_item, item = item, standardize_header_names(item) - changed = changed or item is not old_item + This fuzzy matching is performed by discarding word separators and + capitalization, so that for example, "Content-length", "content_length", + "conTENtLength", etc., all match. In the case of multiple fuzzy-matches, + the returned value is the "first" such match given the underlying mapping's + ordering, or unspecified if no such ordering is defined. - new_list.append(item) + If header_name is not in headers, and no such fuzzy match exists, then a + KeyError is raised. + """ - return new_list if changed else headers - except TypeError: - pass + def unfuzz(header): + return re.sub(r'[ _-]', '', header).lower() - return headers + try: + return headers[header_name] + except KeyError: + unfuzzed_header_name = unfuzz(header_name) + for header, value in headers.items(): + if unfuzz(header) == unfuzzed_header_name: + return value + raise class SpackWebError(spack.error.SpackError): -- cgit v1.2.3-60-g2f50