diff options
author | Omar Padron <omar.padron@kitware.com> | 2019-10-22 03:32:04 -0400 |
---|---|---|
committer | Todd Gamblin <tgamblin@llnl.gov> | 2019-10-22 00:32:04 -0700 |
commit | fd58c98b0edd198e2b1bc8c9f81208d7d6010178 (patch) | |
tree | 826bf9531541343d71ea4fa7fc28f36bc346135c | |
parent | 6cb972a9d25cee6ed8b92195cccd8ad2255e98a9 (diff) | |
download | spack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.tar.gz spack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.tar.bz2 spack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.tar.xz spack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.zip |
fetching: S3 upload and download (#11117)
This extends Spack functionality so that it can fetch sources and binaries from-, push sources and binaries to-, and index the contents of- mirrors hosted on an S3 bucket.
High level to-do list:
- [x] Extend mirrors configuration to add support for `file://`, and `s3://` URLs.
- [x] Ensure all fetching, pushing, and indexing operations work for `file://` URLs.
- [x] Implement S3 source fetching
- [x] Implement S3 binary mirror indexing
- [x] Implement S3 binary package fetching
- [x] Implement S3 source pushing
- [x] Implement S3 binary package pushing
Important details:
* refactor URL handling to handle S3 URLs and mirror URLs more gracefully.
- updated parse() to accept already-parsed URL objects. an equivalent object
is returned with any extra s3-related attributes intact. Objects created with
urllib can also be passed, and the additional s3 handling logic will still be applied.
* update mirror schema/parsing (mirror can have separate fetch/push URLs)
* implement s3_fetch_strategy/several utility changes
* provide more feature-complete S3 fetching
* update buildcache create command to support S3
* Move the core logic for reading data from S3 out of the s3 fetch strategy and into
the s3 URL handler. The s3 fetch strategy now calls into `read_from_url()` Since
read_from_url can now handle S3 URLs, the S3 fetch strategy is redundant. It's
not clear whether the ideal design is to have S3 fetching functionality in a fetch
strategy, directly implemented in read_from_url, or both.
* expanded what can be passed to `spack buildcache` via the -d flag: In addition
to a directory on the local filesystem, the name of a configured mirror can be
passed, or a push URL can be passed directly.
-rw-r--r-- | .gitlab-ci.yml | 5 | ||||
-rwxr-xr-x | bin/rebuild-index.sh | 2 | ||||
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 233 | ||||
-rw-r--r-- | lib/spack/spack/caches.py | 16 | ||||
-rw-r--r-- | lib/spack/spack/cmd/buildcache.py | 28 | ||||
-rw-r--r-- | lib/spack/spack/cmd/checksum.py | 4 | ||||
-rw-r--r-- | lib/spack/spack/cmd/create.py | 3 | ||||
-rw-r--r-- | lib/spack/spack/cmd/mirror.py | 125 | ||||
-rw-r--r-- | lib/spack/spack/cmd/url.py | 8 | ||||
-rw-r--r-- | lib/spack/spack/fetch_strategy.py | 104 | ||||
-rw-r--r-- | lib/spack/spack/mirror.py | 230 | ||||
-rw-r--r-- | lib/spack/spack/s3_handler.py | 92 | ||||
-rw-r--r-- | lib/spack/spack/schema/mirrors.py | 14 | ||||
-rw-r--r-- | lib/spack/spack/stage.py | 118 | ||||
-rw-r--r-- | lib/spack/spack/test/cmd/pkg.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/test/config.py | 1 | ||||
-rw-r--r-- | lib/spack/spack/test/llnl/util/lock.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/test/stage.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/util/s3.py | 44 | ||||
-rw-r--r-- | lib/spack/spack/util/url.py | 175 | ||||
-rw-r--r-- | lib/spack/spack/util/web.py | 501 |
21 files changed, 1420 insertions, 289 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c4d6bcaab6..f0cfd456ff 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,3 +1,8 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + generate ci jobs: script: - "./bin/generate-gitlab-ci-yml.sh" diff --git a/bin/rebuild-index.sh b/bin/rebuild-index.sh index 09e14a9cee..009010baf1 100755 --- a/bin/rebuild-index.sh +++ b/bin/rebuild-index.sh @@ -10,4 +10,4 @@ set -x SPACK_BIN_DIR="${CI_PROJECT_DIR}/bin" export PATH="${SPACK_BIN_DIR}:${PATH}" -spack upload-s3 index +spack buildcache update-index -d "$MIRROR_URL" diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index cbc0f22327..aab07b8a84 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import codecs import os import re import tarfile @@ -23,14 +24,32 @@ import spack.fetch_strategy as fs import spack.util.gpg as gpg_util import spack.relocate as relocate import spack.util.spack_yaml as syaml +import spack.mirror +import spack.util.url as url_util +import spack.util.web as web_util + from spack.spec import Spec from spack.stage import Stage from spack.util.gpg import Gpg -from spack.util.web import spider, read_from_url from spack.util.executable import ProcessError _build_cache_relative_path = 'build_cache' +BUILD_CACHE_INDEX_TEMPLATE = ''' +<html> +<head> + <title>{title}</title> +</head> +<body> +<ul> +{path_list} +</ul> +</body> +</html> +''' + +BUILD_CACHE_INDEX_ENTRY_TEMPLATE = ' <li><a href="{path}">{path}</a></li>' + class NoOverwriteException(Exception): """ @@ -101,7 +120,7 @@ def build_cache_relative_path(): return _build_cache_relative_path -def build_cache_directory(prefix): +def build_cache_prefix(prefix): return os.path.join(prefix, build_cache_relative_path()) @@ -246,29 +265,36 @@ def sign_tarball(key, force, specfile_path): Gpg.sign(key, specfile_path, '%s.asc' % specfile_path) -def _generate_html_index(path_list, output_path): - f = open(output_path, 'w') - header = """<html>\n -<head>\n</head>\n -<list>\n""" - footer = "</list>\n</html>\n" - f.write(header) - for path in path_list: - rel = os.path.basename(path) - f.write('<li><a href="%s"> %s</a>\n' % (rel, rel)) - f.write(footer) - f.close() - - -def generate_package_index(build_cache_dir): - yaml_list = os.listdir(build_cache_dir) - path_list = [os.path.join(build_cache_dir, l) for l in yaml_list] +def generate_package_index(cache_prefix): + """Create the build cache index page. - index_html_path_tmp = os.path.join(build_cache_dir, 'index.html.tmp') - index_html_path = os.path.join(build_cache_dir, 'index.html') - - _generate_html_index(path_list, index_html_path_tmp) - shutil.move(index_html_path_tmp, index_html_path) + Creates (or replaces) the "index.html" page at the location given in + cache_prefix. This page contains a link for each binary package (*.yaml) + and signing key (*.key) under cache_prefix. + """ + tmpdir = tempfile.mkdtemp() + try: + index_html_path = os.path.join(tmpdir, 'index.html') + file_list = ( + entry + for entry in web_util.list_url(cache_prefix) + if (entry.endswith('.yaml') + or entry.endswith('.key'))) + + with open(index_html_path, 'w') as f: + f.write(BUILD_CACHE_INDEX_TEMPLATE.format( + title='Spack Package Index', + path_list='\n'.join( + BUILD_CACHE_INDEX_ENTRY_TEMPLATE.format(path=path) + for path in file_list))) + + web_util.push_to_url( + index_html_path, + url_util.join(cache_prefix, 'index.html'), + keep_original=False, + extra_args={'ContentType': 'text/html'}) + finally: + shutil.rmtree(tmpdir) def build_tarball(spec, outdir, force=False, rel=False, unsigned=False, @@ -281,33 +307,41 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False, raise ValueError('spec must be concrete to build tarball') # set up some paths - build_cache_dir = build_cache_directory(outdir) + tmpdir = tempfile.mkdtemp() + cache_prefix = build_cache_prefix(tmpdir) tarfile_name = tarball_name(spec, '.tar.gz') - tarfile_dir = os.path.join(build_cache_dir, - tarball_directory_name(spec)) + tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec)) tarfile_path = os.path.join(tarfile_dir, tarfile_name) - mkdirp(tarfile_dir) spackfile_path = os.path.join( - build_cache_dir, tarball_path_name(spec, '.spack')) - if os.path.exists(spackfile_path): + cache_prefix, tarball_path_name(spec, '.spack')) + + remote_spackfile_path = url_util.join( + outdir, os.path.relpath(spackfile_path, tmpdir)) + + mkdirp(tarfile_dir) + if web_util.url_exists(remote_spackfile_path): if force: - os.remove(spackfile_path) + web_util.remove_url(remote_spackfile_path) else: - raise NoOverwriteException(str(spackfile_path)) + raise NoOverwriteException(url_util.format(remote_spackfile_path)) + # need to copy the spec file so the build cache can be downloaded # without concretizing with the current spack packages # and preferences spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml") specfile_name = tarball_name(spec, '.spec.yaml') specfile_path = os.path.realpath( - os.path.join(build_cache_dir, specfile_name)) + os.path.join(cache_prefix, specfile_name)) - if os.path.exists(specfile_path): + remote_specfile_path = url_util.join( + outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir))) + + if web_util.url_exists(remote_specfile_path): if force: - os.remove(specfile_path) + web_util.remove_url(remote_specfile_path) else: - raise NoOverwriteException(str(specfile_path)) + raise NoOverwriteException(url_util.format(remote_specfile_path)) # make a copy of the install directory to work with workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix)) @@ -324,6 +358,7 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False, except Exception as e: shutil.rmtree(workdir) shutil.rmtree(tarfile_dir) + shutil.rmtree(tmpdir) tty.die(e) else: try: @@ -331,7 +366,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False, except Exception as e: shutil.rmtree(workdir) shutil.rmtree(tarfile_dir) + shutil.rmtree(tmpdir) tty.die(e) + # create compressed tarball of the install prefix with closing(tarfile.open(tarfile_path, 'w:gz')) as tar: tar.add(name='%s' % workdir, @@ -360,7 +397,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False, spec_dict['full_hash'] = spec.full_hash() tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format( - spec_dict['full_hash'], spec.name, specfile_path)) + spec_dict['full_hash'], + spec.name, + url_util.format(remote_specfile_path))) tty.debug(spec.tree()) with open(specfile_path, 'w') as outfile: @@ -382,9 +421,19 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False, if not unsigned: os.remove('%s.asc' % specfile_path) - # create an index.html for the build_cache directory so specs can be found - if regenerate_index: - generate_package_index(build_cache_dir) + web_util.push_to_url( + spackfile_path, remote_spackfile_path, keep_original=False) + web_util.push_to_url( + specfile_path, remote_specfile_path, keep_original=False) + + try: + # create an index.html for the build_cache directory so specs can be + # found + if regenerate_index: + generate_package_index(url_util.join( + outdir, os.path.relpath(cache_prefix, tmpdir))) + finally: + shutil.rmtree(tmpdir) return None @@ -394,13 +443,16 @@ def download_tarball(spec): Download binary tarball for given package into stage area Return True if successful """ - mirrors = spack.config.get('mirrors') - if len(mirrors) == 0: + if not spack.mirror.MirrorCollection(): tty.die("Please add a spack mirror to allow " + "download of pre-compiled packages.") + tarball = tarball_path_name(spec, '.spack') - for mirror_name, mirror_url in mirrors.items(): - url = mirror_url + '/' + _build_cache_relative_path + '/' + tarball + + for mirror in spack.mirror.MirrorCollection().values(): + url = url_util.join( + mirror.fetch_url, _build_cache_relative_path, tarball) + # stage the tarball into standard place stage = Stage(url, name="build_cache", keep=True) try: @@ -408,6 +460,7 @@ def download_tarball(spec): return stage.save_filename except fs.FetchError: continue + return None @@ -610,26 +663,29 @@ def get_specs(force=False): tty.debug("Using previously-retrieved specs") return _cached_specs - mirrors = spack.config.get('mirrors') - if len(mirrors) == 0: - tty.debug("No Spack mirrors are currently configured") + if not spack.mirror.MirrorCollection(): + tty.warn("No Spack mirrors are currently configured") return {} urls = set() - for mirror_name, mirror_url in mirrors.items(): - if mirror_url.startswith('file'): - mirror = mirror_url.replace( - 'file://', '') + "/" + _build_cache_relative_path - tty.msg("Finding buildcaches in %s" % mirror) - if os.path.exists(mirror): - files = os.listdir(mirror) + for mirror in spack.mirror.MirrorCollection().values(): + fetch_url_build_cache = url_util.join( + mirror.fetch_url, _build_cache_relative_path) + + mirror_dir = url_util.local_file_path(fetch_url_build_cache) + if mirror_dir: + tty.msg("Finding buildcaches in %s" % mirror_dir) + if os.path.exists(mirror_dir): + files = os.listdir(mirror_dir) for file in files: if re.search('spec.yaml', file): - link = 'file://' + mirror + '/' + file + link = url_util.join(fetch_url_build_cache, file) urls.add(link) else: - tty.msg("Finding buildcaches on %s" % mirror_url) - p, links = spider(mirror_url + "/" + _build_cache_relative_path) + tty.msg("Finding buildcaches at %s" % + url_util.format(fetch_url_build_cache)) + p, links = web_util.spider( + url_util.join(fetch_url_build_cache, 'index.html')) for link in links: if re.search("spec.yaml", link): urls.add(link) @@ -659,28 +715,33 @@ def get_keys(install=False, trust=False, force=False): """ Get pgp public keys available on mirror """ - mirrors = spack.config.get('mirrors') - if len(mirrors) == 0: + if not spack.mirror.MirrorCollection(): tty.die("Please add a spack mirror to allow " + "download of build caches.") keys = set() - for mirror_name, mirror_url in mirrors.items(): - if mirror_url.startswith('file'): - mirror = os.path.join( - mirror_url.replace('file://', ''), _build_cache_relative_path) - tty.msg("Finding public keys in %s" % mirror) - files = os.listdir(mirror) + + for mirror in spack.mirror.MirrorCollection().values(): + fetch_url_build_cache = url_util.join( + mirror.fetch_url, _build_cache_relative_path) + + mirror_dir = url_util.local_file_path(fetch_url_build_cache) + if mirror_dir: + tty.msg("Finding public keys in %s" % mirror_dir) + files = os.listdir(mirror_dir) for file in files: if re.search(r'\.key', file): - link = 'file://' + mirror + '/' + file + link = url_util.join(fetch_url_build_cache, file) keys.add(link) else: - tty.msg("Finding public keys on %s" % mirror_url) - p, links = spider(mirror_url + "/build_cache", depth=1) + tty.msg("Finding public keys at %s" % + url_util.format(fetch_url_build_cache)) + p, links = web_util.spider(fetch_url_build_cache, depth=1) + for link in links: if re.search(r'\.key', link): keys.add(link) + for link in keys: with Stage(link, name="build_cache", keep=True) as stage: if os.path.exists(stage.save_filename) and force: @@ -717,15 +778,16 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False): # Try to retrieve the .spec.yaml directly, based on the known # format of the name, in order to determine if the package # needs to be rebuilt. - build_cache_dir = build_cache_directory(mirror_url) + cache_prefix = build_cache_prefix(mirror_url) spec_yaml_file_name = tarball_name(spec, '.spec.yaml') - file_path = os.path.join(build_cache_dir, spec_yaml_file_name) + file_path = os.path.join(cache_prefix, spec_yaml_file_name) result_of_error = 'Package ({0}) will {1}be rebuilt'.format( spec.short_spec, '' if rebuild_on_errors else 'not ') try: - yaml_contents = read_from_url(file_path) + _, _, yaml_file = web_util.read_from_url(file_path) + yaml_contents = codecs.getreader('utf-8')(yaml_file).read() except URLError as url_err: err_msg = [ 'Unable to determine whether {0} needs rebuilding,', @@ -782,22 +844,22 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None, """ rebuilds = {} - for mirror_name, mirror_url in mirrors.items(): - tty.msg('Checking for built specs at %s' % mirror_url) + for mirror in spack.mirror.MirrorCollection(mirrors).values(): + tty.msg('Checking for built specs at %s' % mirror.fetch_url) rebuild_list = [] for spec in specs: - if needs_rebuild(spec, mirror_url, rebuild_on_errors): + if needs_rebuild(spec, mirror.fetch_url, rebuild_on_errors): rebuild_list.append({ 'short_spec': spec.short_spec, 'hash': spec.dag_hash() }) if rebuild_list: - rebuilds[mirror_url] = { - 'mirrorName': mirror_name, - 'mirrorUrl': mirror_url, + rebuilds[mirror.fetch_url] = { + 'mirrorName': mirror.name, + 'mirrorUrl': mirror.fetch_url, 'rebuildSpecs': rebuild_list } @@ -810,33 +872,36 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None, def _download_buildcache_entry(mirror_root, descriptions): for description in descriptions: - url = os.path.join(mirror_root, description['url']) + description_url = os.path.join(mirror_root, description['url']) path = description['path'] fail_if_missing = description['required'] mkdirp(path) - stage = Stage(url, name="build_cache", path=path, keep=True) + stage = Stage( + description_url, name="build_cache", path=path, keep=True) try: stage.fetch() except fs.FetchError as e: tty.debug(e) if fail_if_missing: - tty.error('Failed to download required url {0}'.format(url)) + tty.error('Failed to download required url {0}'.format( + description_url)) return False return True def download_buildcache_entry(file_descriptions): - mirrors = spack.config.get('mirrors') - if len(mirrors) == 0: + if not spack.mirror.MirrorCollection(): tty.die("Please add a spack mirror to allow " + "download of buildcache entries.") - for mirror_name, mirror_url in mirrors.items(): - mirror_root = os.path.join(mirror_url, _build_cache_relative_path) + for mirror in spack.mirror.MirrorCollection().values(): + mirror_root = os.path.join( + mirror.fetch_url, + _build_cache_relative_path) if _download_buildcache_entry(mirror_root, file_descriptions): return True diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index dfd750fa82..e2352b2fcc 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -9,11 +9,13 @@ import os import llnl.util.lang from llnl.util.filesystem import mkdirp +import spack.error import spack.paths import spack.config import spack.fetch_strategy import spack.util.file_cache -from spack.util.path import canonicalize_path +import spack.util.path +import spack.util.url as url_util def _misc_cache(): @@ -25,7 +27,7 @@ def _misc_cache(): path = spack.config.get('config:misc_cache') if not path: path = os.path.join(spack.paths.user_config_path, 'cache') - path = canonicalize_path(path) + path = spack.util.path.canonicalize_path(path) return spack.util.file_cache.FileCache(path) @@ -43,22 +45,26 @@ def _fetch_cache(): path = spack.config.get('config:source_cache') if not path: path = os.path.join(spack.paths.var_path, "cache") - path = canonicalize_path(path) + path = spack.util.path.canonicalize_path(path) return spack.fetch_strategy.FsCache(path) class MirrorCache(object): def __init__(self, root): - self.root = os.path.abspath(root) + self.root = url_util.local_file_path(root) + if not self.root: + raise spack.error.SpackError( + 'MirrorCaches only work with file:// URLs') + self.new_resources = set() self.existing_resources = set() def store(self, fetcher, relative_dest): # Note this will archive package sources even if they would not # normally be cached (e.g. the current tip of an hg/git branch) - dst = os.path.join(self.root, relative_dest) + if os.path.exists(dst): self.existing_resources.add(relative_dest) else: diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py index 121a6f4aa5..ee09a33f39 100644 --- a/lib/spack/spack/cmd/buildcache.py +++ b/lib/spack/spack/cmd/buildcache.py @@ -14,6 +14,7 @@ import spack.cmd import spack.cmd.common.arguments as arguments import spack.environment as ev import spack.hash_types as ht +import spack.mirror import spack.relocate import spack.repo import spack.spec @@ -21,6 +22,8 @@ import spack.store import spack.config import spack.repo import spack.store +import spack.util.url as url_util + from spack.error import SpecError from spack.spec import Spec, save_dependency_spec_yamls @@ -205,6 +208,13 @@ def setup_parser(subparser): help='Destination mirror url') copy.set_defaults(func=buildcache_copy) + # Update buildcache index without copying any additional packages + update_index = subparsers.add_parser( + 'update-index', help=buildcache_update_index.__doc__) + update_index.add_argument( + '-d', '--mirror-url', default=None, help='Destination mirror url') + update_index.set_defaults(func=buildcache_update_index) + def find_matching_specs(pkgs, allow_multiple_matches=False, env=None): """Returns a list of specs matching the not necessarily @@ -312,9 +322,14 @@ def createtarball(args): " yaml file containing a spec to install") pkgs = set(packages) specs = set() + outdir = '.' if args.directory: outdir = args.directory + + mirror = spack.mirror.MirrorCollection().lookup(outdir) + outdir = url_util.format(mirror.push_url) + signkey = None if args.key: signkey = args.key @@ -649,6 +664,19 @@ def buildcache_copy(args): shutil.copyfile(cdashid_src_path, cdashid_dest_path) +def buildcache_update_index(args): + """Update a buildcache index.""" + outdir = '.' + if args.mirror_url: + outdir = args.mirror_url + + mirror = spack.mirror.MirrorCollection().lookup(outdir) + outdir = url_util.format(mirror.push_url) + + bindist.generate_package_index( + url_util.join(outdir, bindist.build_cache_relative_path())) + + def buildcache(parser, args): if args.func: args.func(args) diff --git a/lib/spack/spack/cmd/checksum.py b/lib/spack/spack/cmd/checksum.py index ecc71d3060..2518dfef9f 100644 --- a/lib/spack/spack/cmd/checksum.py +++ b/lib/spack/spack/cmd/checksum.py @@ -11,8 +11,8 @@ import llnl.util.tty as tty import spack.cmd import spack.repo +import spack.stage import spack.util.crypto -import spack.util.web from spack.util.naming import valid_fully_qualified_module_name from spack.version import ver, Version @@ -56,7 +56,7 @@ def checksum(parser, args): if not url_dict: tty.die("Could not find any versions for {0}".format(pkg.name)) - version_lines = spack.util.web.get_checksums_for_versions( + version_lines = spack.stage.get_checksums_for_versions( url_dict, pkg.name, keep_stage=args.keep_stage) print() diff --git a/lib/spack/spack/cmd/create.py b/lib/spack/spack/cmd/create.py index 527a7a883c..6c68617acd 100644 --- a/lib/spack/spack/cmd/create.py +++ b/lib/spack/spack/cmd/create.py @@ -13,6 +13,7 @@ from llnl.util.filesystem import mkdirp import spack.util.web import spack.repo +import spack.stage from spack.spec import Spec from spack.util.editor import editor from spack.util.executable import which, ProcessError @@ -618,7 +619,7 @@ def get_versions(args, name): version = parse_version(args.url) url_dict = {version: args.url} - versions = spack.util.web.get_checksums_for_versions( + versions = spack.stage.get_checksums_for_versions( url_dict, name, first_stage_function=guesser, keep_stage=args.keep_stage) else: diff --git a/lib/spack/spack/cmd/mirror.py b/lib/spack/spack/cmd/mirror.py index 723e310ad6..91ed40a4c5 100644 --- a/lib/spack/spack/cmd/mirror.py +++ b/lib/spack/spack/cmd/mirror.py @@ -4,20 +4,21 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import sys -import os -from datetime import datetime import argparse import llnl.util.tty as tty from llnl.util.tty.colify import colify import spack.cmd +import spack.cmd.common.arguments as arguments import spack.concretize import spack.config +import spack.environment as ev import spack.mirror import spack.repo -import spack.cmd.common.arguments as arguments -import spack.environment as ev +import spack.util.url as url_util +import spack.util.web as web_util + from spack.spec import Spec from spack.error import SpackError from spack.util.spack_yaml import syaml_dict @@ -73,6 +74,19 @@ def setup_parser(subparser): default=spack.config.default_modify_scope(), help="configuration scope to modify") + # Set-Url + set_url_parser = sp.add_parser('set-url', help=mirror_set_url.__doc__) + set_url_parser.add_argument('name', help="mnemonic name for mirror") + set_url_parser.add_argument( + 'url', help="url of mirror directory from 'spack mirror create'") + set_url_parser.add_argument( + '--push', action='store_true', + help="set only the URL used for uploading new packages") + set_url_parser.add_argument( + '--scope', choices=scopes, metavar=scopes_metavar, + default=spack.config.default_modify_scope(), + help="configuration scope to modify") + # List list_parser = sp.add_parser('list', help=mirror_list.__doc__) list_parser.add_argument( @@ -83,20 +97,14 @@ def setup_parser(subparser): def mirror_add(args): """Add a mirror to Spack.""" - url = args.url - if url.startswith('/'): - url = 'file://' + url + url = url_util.format(args.url) mirrors = spack.config.get('mirrors', scope=args.scope) if not mirrors: mirrors = syaml_dict() - for name, u in mirrors.items(): - if name == args.name: - tty.die("Mirror with name %s already exists." % name) - if u == url: - tty.die("Mirror with url %s already exists." % url) - # should only be one item per mirror dict. + if args.name in mirrors: + tty.die("Mirror with name %s already exists." % args.name) items = [(n, u) for n, u in mirrors.items()] items.insert(0, (args.name, url)) @@ -117,21 +125,86 @@ def mirror_remove(args): old_value = mirrors.pop(name) spack.config.set('mirrors', mirrors, scope=args.scope) - tty.msg("Removed mirror %s with url %s" % (name, old_value)) + + debug_msg_url = "url %s" + debug_msg = ["Removed mirror %s with"] + values = [name] + + try: + fetch_value = old_value['fetch'] + push_value = old_value['push'] + + debug_msg.extend(("fetch", debug_msg_url, "and push", debug_msg_url)) + values.extend((fetch_value, push_value)) + except TypeError: + debug_msg.append(debug_msg_url) + values.append(old_value) + + tty.debug(" ".join(debug_msg) % tuple(values)) + tty.msg("Removed mirror %s." % name) + + +def mirror_set_url(args): + """Change the URL of a mirror.""" + url = url_util.format(args.url) + + mirrors = spack.config.get('mirrors', scope=args.scope) + if not mirrors: + mirrors = syaml_dict() + + if args.name not in mirrors: + tty.die("No mirror found with name %s." % args.name) + + entry = mirrors[args.name] + + try: + fetch_url = entry['fetch'] + push_url = entry['push'] + except TypeError: + fetch_url, push_url = entry, entry + + changes_made = False + + if args.push: + changes_made = changes_made or push_url != url + push_url = url + else: + changes_made = ( + changes_made or fetch_url != push_url or push_url != url) + + fetch_url, push_url = url, url + + items = [ + ( + (n, u) + if n != args.name else ( + (n, {"fetch": fetch_url, "push": push_url}) + if fetch_url != push_url else (n, fetch_url) + ) + ) + for n, u in mirrors.items() + ] + + mirrors = syaml_dict(items) + spack.config.set('mirrors', mirrors, scope=args.scope) + + if changes_made: + tty.msg( + "Changed%s url for mirror %s." % + ((" (push)" if args.push else ""), args.name)) + else: + tty.msg("Url already set for mirror %s." % args.name) def mirror_list(args): """Print out available mirrors to the console.""" - mirrors = spack.config.get('mirrors', scope=args.scope) + + mirrors = spack.mirror.MirrorCollection(scope=args.scope) if not mirrors: tty.msg("No mirrors configured.") return - max_len = max(len(n) for n in mirrors.keys()) - fmt = "%%-%ds%%s" % (max_len + 4) - - for name in mirrors: - print(fmt % (name, mirrors[name])) + mirrors.display() def _read_specs_from_file(filename): @@ -188,14 +261,13 @@ def mirror_create(args): msg = 'Skipping {0} as it is an external spec.' tty.msg(msg.format(spec.cshort_spec)) - # Default name for directory is spack-mirror-<DATESTAMP> - directory = args.directory - if not directory: - timestamp = datetime.now().strftime("%Y-%m-%d") - directory = 'spack-mirror-' + timestamp + mirror = spack.mirror.Mirror( + args.directory or spack.config.get('config:source_cache')) + + directory = url_util.format(mirror.push_url) # Make sure nothing is in the way. - existed = os.path.isdir(directory) + existed = web_util.url_exists(directory) # Actually do the work to create the mirror present, mirrored, error = spack.mirror.create( @@ -220,6 +292,7 @@ def mirror(parser, args): 'add': mirror_add, 'remove': mirror_remove, 'rm': mirror_remove, + 'set-url': mirror_set_url, 'list': mirror_list} if args.no_checksum: diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py index 10257b2608..f1ce050a90 100644 --- a/lib/spack/spack/cmd/url.py +++ b/lib/spack/spack/cmd/url.py @@ -5,10 +5,8 @@ from __future__ import division, print_function from collections import defaultdict -try: - from urllib.parse import urlparse -except ImportError: - from urlparse import urlparse + +import six.moves.urllib.parse as urllib_parse import spack.fetch_strategy as fs import spack.repo @@ -262,7 +260,7 @@ def url_stats(args): self.checksums[algo] += 1 # parse out the URL scheme (https/http/ftp/etc.) - urlinfo = urlparse(fetcher.url) + urlinfo = urllib_parse.urlparse(fetcher.url) self.schemes[urlinfo.scheme] += 1 elif url_type == 'git': diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 32239d81ce..4812211812 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -23,6 +23,7 @@ in order to build it. They need to define the following methods: Archive a source directory, e.g. for creating a mirror. """ import os +import os.path import sys import re import shutil @@ -30,6 +31,7 @@ import copy import xml.etree.ElementTree from functools import wraps from six import string_types, with_metaclass +import six.moves.urllib.parse as urllib_parse import llnl.util.tty as tty from llnl.util.filesystem import ( @@ -39,6 +41,9 @@ import spack.config import spack.error import spack.util.crypto as crypto import spack.util.pattern as pattern +import spack.util.web as web_util +import spack.util.url as url_util + from spack.util.executable import which from spack.util.string import comma_and, quote from spack.version import Version, ver @@ -48,6 +53,17 @@ from spack.util.compression import decompressor_for, extension #: List of all fetch strategies, created by FetchStrategy metaclass. all_strategies = [] +CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = ( + "The contents of {subject} look like {content_type}. Either the URL" + " you are trying to use does not exist or you have an internet gateway" + " issue. You can remove the bad archive using 'spack clean" + " <package>', then try again using the correct URL.") + + +def warn_content_type_mismatch(subject, content_type='HTML'): + tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format( + subject=subject, content_type=content_type)) + def _needs_stage(fun): """Many methods on fetch strategies require a stage to be set @@ -351,12 +367,7 @@ class URLFetchStrategy(FetchStrategy): content_types = re.findall(r'Content-Type:[^\r\n]+', headers, flags=re.IGNORECASE) if content_types and 'text/html' in content_types[-1]: - msg = ("The contents of {0} look like HTML. Either the URL " - "you are trying to use does not exist or you have an " - "internet gateway issue. You can remove the bad archive " - "using 'spack clean <package>', then try again using " - "the correct URL.") - tty.warn(msg.format(self.archive_file or "the archive")) + warn_content_type_mismatch(self.archive_file or "the archive") if save_file: os.rename(partial_file, save_file) @@ -449,7 +460,10 @@ class URLFetchStrategy(FetchStrategy): if not self.archive_file: raise NoArchiveFileError("Cannot call archive() before fetching.") - shutil.copyfile(self.archive_file, destination) + web_util.push_to_url( + self.archive_file, + destination, + keep_original=True) @_needs_stage def check(self): @@ -1063,6 +1077,54 @@ class HgFetchStrategy(VCSFetchStrategy): return "[hg] %s" % self.url +class S3FetchStrategy(URLFetchStrategy): + """FetchStrategy that pulls from an S3 bucket.""" + enabled = True + url_attr = 's3' + + def __init__(self, *args, **kwargs): + try: + super(S3FetchStrategy, self).__init__(*args, **kwargs) + except ValueError: + if not kwargs.get('url'): + raise ValueError( + "S3FetchStrategy requires a url for fetching.") + + @_needs_stage + def fetch(self): + if self.archive_file: + tty.msg("Already downloaded %s" % self.archive_file) + return + + parsed_url = url_util.parse(self.url) + if parsed_url.scheme != 's3': + raise ValueError( + 'S3FetchStrategy can only fetch from s3:// urls.') + + tty.msg("Fetching %s" % self.url) + + basename = os.path.basename(parsed_url.path) + + with working_dir(self.stage.path): + _, headers, stream = web_util.read_from_url(self.url) + + with open(basename, 'wb') as f: + shutil.copyfileobj(stream, f) + + content_type = headers['Content-type'] + + if content_type == 'text/html': + warn_content_type_mismatch(self.archive_file or "the archive") + + if self.stage.save_filename: + os.rename( + os.path.join(self.stage.path, basename), + self.stage.save_filename) + + if not self.archive_file: + raise FailedDownloadError(self.url) + + def from_url(url): """Given a URL, find an appropriate fetch strategy for it. Currently just gives you a URLFetchStrategy that uses curl. @@ -1206,6 +1268,34 @@ def for_package_version(pkg, version): raise InvalidArgsError(pkg, version, **args) +def from_url_scheme(url, *args, **kwargs): + """Finds a suitable FetchStrategy by matching its url_attr with the scheme + in the given url.""" + + url = kwargs.get('url', url) + parsed_url = urllib_parse.urlparse(url, scheme='file') + + scheme_mapping = ( + kwargs.get('scheme_mapping') or + { + 'file': 'url', + 'http': 'url', + 'https': 'url' + }) + + scheme = parsed_url.scheme + scheme = scheme_mapping.get(scheme, scheme) + + for fetcher in all_strategies: + url_attr = getattr(fetcher, 'url_attr', None) + if url_attr and url_attr == scheme: + return fetcher(url, *args, **kwargs) + + raise ValueError( + 'No FetchStrategy found for url with scheme: "{SCHEME}"'.format( + SCHEME=parsed_url.scheme)) + + def from_list_url(pkg): """If a package provides a URL which lists URLs for resources by version, this can can create a fetcher for a URL discovered for diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index 45aa779c69..e2329b6861 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -13,6 +13,18 @@ to download packages directly from a mirror (e.g., on an intranet). """ import sys import os +import os.path +import operator + +import six + +import ruamel.yaml.error as yaml_error + +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping + import llnl.util.tty as tty from llnl.util.filesystem import mkdirp @@ -20,9 +32,205 @@ import spack.config import spack.error import spack.url as url import spack.fetch_strategy as fs -from spack.spec import Spec +import spack.util.spack_json as sjson +import spack.util.spack_yaml as syaml +import spack.util.url as url_util +import spack.spec from spack.version import VersionList from spack.util.compression import allowed_archive +from spack.util.spack_yaml import syaml_dict + + +def _display_mirror_entry(size, name, url, type_=None): + if type_: + type_ = "".join((" (", type_, ")")) + else: + type_ = "" + + print("%-*s%s%s" % (size + 4, name, url, type_)) + + +class Mirror(object): + """Represents a named location for storing source tarballs and binary + packages. + + Mirrors have a fetch_url that indicate where and how artifacts are fetched + from them, and a push_url that indicate where and how artifacts are pushed + to them. These two URLs are usually the same. + """ + + def __init__(self, fetch_url, push_url=None, name=None): + self._fetch_url = fetch_url + self._push_url = push_url + self._name = name + + def to_json(self, stream=None): + return sjson.dump(self.to_dict(), stream) + + def to_yaml(self, stream=None): + return syaml.dump(self.to_dict(), stream) + + @staticmethod + def from_yaml(stream, name=None): + try: + data = syaml.load(stream) + return Mirror.from_dict(data, name) + except yaml_error.MarkedYAMLError as e: + raise syaml.SpackYAMLError("error parsing YAML spec:", str(e)) + + @staticmethod + def from_json(stream, name=None): + d = sjson.load(stream) + return Mirror.from_dict(d, name) + + def to_dict(self): + if self._push_url is None: + return self._fetch_url + else: + return syaml_dict([ + ('fetch', self._fetch_url), + ('push', self._push_url)]) + + @staticmethod + def from_dict(d, name=None): + if isinstance(d, six.string_types): + return Mirror(d, name=name) + else: + return Mirror(d['fetch'], d['push'], name) + + def display(self, max_len=0): + if self._push_url is None: + _display_mirror_entry(max_len, self._name, self._fetch_url) + else: + _display_mirror_entry( + max_len, self._name, self._fetch_url, "fetch") + _display_mirror_entry( + max_len, self._name, self._push_url, "push") + + def __str__(self): + name = self._name + if name is None: + name = '' + else: + name = ' "%s"' % name + + if self._push_url is None: + return "[Mirror%s (%s)]" % (name, self._fetch_url) + + return "[Mirror%s (fetch: %s, push: %s)]" % ( + name, self._fetch_url, self._push_url) + + def __repr__(self): + return ''.join(( + 'Mirror(', + ', '.join( + '%s=%s' % (k, repr(v)) + for k, v in ( + ('fetch_url', self._fetch_url), + ('push_url', self._push_url), + ('name', self._name)) + if k == 'fetch_url' or v), + ')' + )) + + @property + def name(self): + return self._name or "<unnamed>" + + @property + def fetch_url(self): + return self._fetch_url + + @fetch_url.setter + def fetch_url(self, url): + self._fetch_url = url + self._normalize() + + @property + def push_url(self): + if self._push_url is None: + return self._fetch_url + return self._push_url + + @push_url.setter + def push_url(self, url): + self._push_url = url + self._normalize() + + def _normalize(self): + if self._push_url is not None and self._push_url == self._fetch_url: + self._push_url = None + + +class MirrorCollection(Mapping): + """A mapping of mirror names to mirrors.""" + + def __init__(self, mirrors=None, scope=None): + self._mirrors = dict( + (name, Mirror.from_dict(mirror, name)) + for name, mirror in ( + mirrors.items() if mirrors is not None else + spack.config.get('mirrors', scope=scope).items())) + + def to_json(self, stream=None): + return sjson.dump(self.to_dict(True), stream) + + def to_yaml(self, stream=None): + return syaml.dump(self.to_dict(True), stream) + + @staticmethod + def from_yaml(stream, name=None): + try: + data = syaml.load(stream) + return MirrorCollection(data) + except yaml_error.MarkedYAMLError as e: + raise syaml.SpackYAMLError("error parsing YAML spec:", str(e)) + + @staticmethod + def from_json(stream, name=None): + d = sjson.load(stream) + return MirrorCollection(d) + + def to_dict(self, recursive=False): + return syaml_dict(sorted( + ( + (k, (v.to_dict() if recursive else v)) + for (k, v) in self._mirrors.items() + ), key=operator.itemgetter(0) + )) + + @staticmethod + def from_dict(d): + return MirrorCollection(d) + + def __getitem__(self, item): + return self._mirrors[item] + + def display(self): + max_len = max(len(mirror.name) for mirror in self._mirrors.values()) + for mirror in self._mirrors.values(): + mirror.display(max_len) + + def lookup(self, name_or_url): + """Looks up and returns a Mirror. + + If this MirrorCollection contains a named Mirror under the name + [name_or_url], then that mirror is returned. Otherwise, [name_or_url] + is assumed to be a mirror URL, and an anonymous mirror with the given + URL is returned. + """ + result = self.get(name_or_url) + + if result is None: + result = Mirror(fetch_url=name_or_url) + + return result + + def __iter__(self): + return iter(self._mirrors) + + def __len__(self): + return len(self._mirrors) def mirror_archive_filename(spec, fetcher, resource_id=None): @@ -114,7 +322,7 @@ def get_matching_versions(specs, **kwargs): # Generate only versions that satisfy the spec. if spec.concrete or v.satisfies(spec.versions): - s = Spec(pkg.name) + s = spack.spec.Spec(pkg.name) s.versions = VersionList([v]) s.variants = spec.variants.copy() # This is needed to avoid hanging references during the @@ -166,12 +374,17 @@ def create(path, specs, **kwargs): it creates specs for those versions. If the version satisfies any spec in the specs list, it is downloaded and added to the mirror. """ + parsed = url_util.parse(path) + mirror_root = url_util.local_file_path(parsed) + # Make sure nothing is in the way. - if os.path.isfile(path): - raise MirrorError("%s already exists and is a file." % path) + if mirror_root and os.path.isfile(mirror_root): + raise MirrorError("%s already exists and is a file." % mirror_root) # automatically spec-ify anything in the specs array. - specs = [s if isinstance(s, Spec) else Spec(s) for s in specs] + specs = [ + s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s) + for s in specs] # Get concrete specs for each matching version of these specs. version_specs = get_matching_versions( @@ -180,8 +393,7 @@ def create(path, specs, **kwargs): s.concretize() # Get the absolute path of the root before we start jumping around. - mirror_root = os.path.abspath(path) - if not os.path.isdir(mirror_root): + if mirror_root and not os.path.isdir(mirror_root): try: mkdirp(mirror_root) except OSError as e: @@ -195,12 +407,12 @@ def create(path, specs, **kwargs): 'error': [] } - mirror_cache = spack.caches.MirrorCache(mirror_root) + mirror_cache = spack.caches.MirrorCache(parsed) try: spack.caches.mirror_cache = mirror_cache # Iterate through packages and download all safe tarballs for each for spec in version_specs: - add_single_spec(spec, mirror_root, categories, **kwargs) + add_single_spec(spec, parsed, categories, **kwargs) finally: spack.caches.mirror_cache = None diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py new file mode 100644 index 0000000000..2a54b9ecb1 --- /dev/null +++ b/lib/spack/spack/s3_handler.py @@ -0,0 +1,92 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from io import BufferedReader + +import six.moves.urllib.response as urllib_response +import six.moves.urllib.request as urllib_request +import six.moves.urllib.error as urllib_error + +import spack.util.s3 as s3_util +import spack.util.url as url_util +import spack.util.web as web_util + + +# NOTE(opadron): Workaround issue in boto where its StreamingBody +# implementation is missing several APIs expected from IOBase. These missing +# APIs prevent the streams returned by boto from being passed as-are along to +# urllib. +# +# https://github.com/boto/botocore/issues/879 +# https://github.com/python/cpython/pull/3249 +class WrapStream(BufferedReader): + def __init__(self, raw): + raw.readable = lambda: True + raw.writable = lambda: False + raw.seekable = lambda: False + raw.closed = False + raw.flush = lambda: None + super(WrapStream, self).__init__(raw) + + def detach(self): + self.raw = None + + def read(self, *args, **kwargs): + return self.raw.read(*args, **kwargs) + + def __getattr__(self, key): + return getattr(self.raw, key) + + +def _s3_open(url): + parsed = url_util.parse(url) + s3 = s3_util.create_s3_session(parsed) + + bucket = parsed.netloc + key = parsed.path + + if key.startswith('/'): + key = key[1:] + + obj = s3.get_object(Bucket=bucket, Key=key) + + # NOTE(opadron): Apply workaround here (see above) + stream = WrapStream(obj['Body']) + headers = web_util.standardize_header_names( + obj['ResponseMetadata']['HTTPHeaders']) + + return url, headers, stream + + +class UrllibS3Handler(urllib_request.HTTPSHandler): + def s3_open(self, req): + orig_url = req.get_full_url() + from botocore.exceptions import ClientError + try: + url, headers, stream = _s3_open(orig_url) + return urllib_response.addinfourl(stream, headers, url) + except ClientError as err: + # if no such [KEY], but [KEY]/index.html exists, + # return that, instead. + if err.response['Error']['Code'] == 'NoSuchKey': + try: + _, headers, stream = _s3_open( + url_util.join(orig_url, 'index.html')) + return urllib_response.addinfourl( + stream, headers, orig_url) + + except ClientError as err2: + if err.response['Error']['Code'] == 'NoSuchKey': + # raise original error + raise urllib_error.URLError(err) + + raise urllib_error.URLError(err2) + + raise urllib_error.URLError(err) + + +S3OpenerDirector = urllib_request.build_opener(UrllibS3Handler()) + +open = S3OpenerDirector.open diff --git a/lib/spack/spack/schema/mirrors.py b/lib/spack/spack/schema/mirrors.py index 551267bd4f..92e6c9bca1 100644 --- a/lib/spack/spack/schema/mirrors.py +++ b/lib/spack/spack/schema/mirrors.py @@ -17,7 +17,19 @@ properties = { 'default': {}, 'additionalProperties': False, 'patternProperties': { - r'\w[\w-]*': {'type': 'string'}, + r'\w[\w-]*': { + 'anyOf': [ + {'type': 'string'}, + { + 'type': 'object', + 'required': ['fetch', 'push'], + 'properties': { + 'fetch': {'type': 'string'}, + 'push': {'type': 'string'} + } + } + ] + }, }, }, } diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 6b27d37adf..9621938bcd 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -12,7 +12,6 @@ import tempfile import getpass from six import string_types from six import iteritems -from six.moves.urllib.parse import urljoin import llnl.util.tty as tty from llnl.util.filesystem import mkdirp, can_access, install, install_tree @@ -20,12 +19,16 @@ from llnl.util.filesystem import partition_path, remove_linked_tree import spack.paths import spack.caches +import spack.cmd import spack.config import spack.error +import spack.mirror import spack.util.lock import spack.fetch_strategy as fs import spack.util.pattern as pattern import spack.util.path as sup +import spack.util.url as url_util + from spack.util.crypto import prefix_bits, bit_length @@ -252,7 +255,7 @@ class Stage(object): # TODO: fetch/stage coupling needs to be reworked -- the logic # TODO: here is convoluted and not modular enough. if isinstance(url_or_fetch_strategy, string_types): - self.fetcher = fs.from_url(url_or_fetch_strategy) + self.fetcher = fs.from_url_scheme(url_or_fetch_strategy) elif isinstance(url_or_fetch_strategy, fs.FetchStrategy): self.fetcher = url_or_fetch_strategy else: @@ -397,16 +400,9 @@ class Stage(object): # TODO: CompositeFetchStrategy here. self.skip_checksum_for_mirror = True if self.mirror_path: - mirrors = spack.config.get('mirrors') - - # Join URLs of mirror roots with mirror paths. Because - # urljoin() will strip everything past the final '/' in - # the root, so we add a '/' if it is not present. - mir_roots = [ - sup.substitute_path_variables(root) if root.endswith(os.sep) - else sup.substitute_path_variables(root) + os.sep - for root in mirrors.values()] - urls = [urljoin(root, self.mirror_path) for root in mir_roots] + urls = [ + url_util.join(mirror.fetch_url, self.mirror_path) + for mirror in spack.mirror.MirrorCollection().values()] # If this archive is normally fetched from a tarball URL, # then use the same digest. `spack mirror` ensures that @@ -425,9 +421,12 @@ class Stage(object): # Add URL strategies for all the mirrors with the digest for url in urls: - fetchers.insert( - 0, fs.URLFetchStrategy( - url, digest, expand=expand, extension=extension)) + fetchers.append(fs.from_url_scheme( + url, digest, expand=expand, extension=extension)) + # fetchers.insert( + # 0, fs.URLFetchStrategy( + # url, digest, expand=expand, extension=extension)) + if self.default_fetcher.cachable: fetchers.insert( 0, spack.caches.fetch_cache.fetcher( @@ -708,6 +707,91 @@ def purge(): remove_linked_tree(stage_path) +def get_checksums_for_versions( + url_dict, name, first_stage_function=None, keep_stage=False): + """Fetches and checksums archives from URLs. + + This function is called by both ``spack checksum`` and ``spack + create``. The ``first_stage_function`` argument allows the caller to + inspect the first downloaded archive, e.g., to determine the build + system. + + Args: + url_dict (dict): A dictionary of the form: version -> URL + name (str): The name of the package + first_stage_function (callable): function that takes a Stage and a URL; + this is run on the stage of the first URL downloaded + keep_stage (bool): whether to keep staging area when command completes + + Returns: + (str): A multi-line string containing versions and corresponding hashes + + """ + sorted_versions = sorted(url_dict.keys(), reverse=True) + + # Find length of longest string in the list for padding + max_len = max(len(str(v)) for v in sorted_versions) + num_ver = len(sorted_versions) + + tty.msg("Found {0} version{1} of {2}:".format( + num_ver, '' if num_ver == 1 else 's', name), + "", + *spack.cmd.elide_list( + ["{0:{1}} {2}".format(str(v), max_len, url_dict[v]) + for v in sorted_versions])) + tty.msg('') + + archives_to_fetch = tty.get_number( + "How many would you like to checksum?", default=1, abort='q') + + if not archives_to_fetch: + tty.die("Aborted.") + + versions = sorted_versions[:archives_to_fetch] + urls = [url_dict[v] for v in versions] + + tty.msg("Downloading...") + version_hashes = [] + i = 0 + for url, version in zip(urls, versions): + try: + with Stage(url, keep=keep_stage) as stage: + # Fetch the archive + stage.fetch() + if i == 0 and first_stage_function: + # Only run first_stage_function the first time, + # no need to run it every time + first_stage_function(stage, url) + + # Checksum the archive and add it to the list + version_hashes.append((version, spack.util.crypto.checksum( + hashlib.sha256, stage.archive_file))) + i += 1 + except FailedDownloadError: + tty.msg("Failed to fetch {0}".format(url)) + except Exception as e: + tty.msg("Something failed on {0}, skipping.".format(url), + " ({0})".format(e)) + + if not version_hashes: + tty.die("Could not fetch any versions for {0}".format(name)) + + # Find length of longest string in the list for padding + max_len = max(len(str(v)) for v, h in version_hashes) + + # Generate the version directives to put in a package.py + version_lines = "\n".join([ + " version('{0}', {1}sha256='{2}')".format( + v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes + ]) + + num_hash = len(version_hashes) + tty.msg("Checksummed {0} version{1} of {2}".format( + num_hash, '' if num_hash == 1 else 's', name)) + + return version_lines + + class StageError(spack.error.SpackError): """"Superclass for all errors encountered during staging.""" @@ -720,5 +804,9 @@ class RestageError(StageError): """"Error encountered during restaging.""" +class VersionFetchError(StageError): + """Raised when we can't determine a URL to fetch a package.""" + + # Keep this in namespace for convenience FailedDownloadError = fs.FailedDownloadError diff --git a/lib/spack/spack/test/cmd/pkg.py b/lib/spack/spack/test/cmd/pkg.py index c9e8e74c4a..e7d1006cc1 100644 --- a/lib/spack/spack/test/cmd/pkg.py +++ b/lib/spack/spack/test/cmd/pkg.py @@ -53,6 +53,8 @@ def mock_pkg_git_repo(tmpdir_factory): # initial commit with mock packages git('add', '.') + git('config', 'user.email', 'testing@spack.io') + git('config', 'user.name', 'Spack Testing') git('commit', '-m', 'initial mock repo commit') # add commit with pkg-a, pkg-b, pkg-c packages diff --git a/lib/spack/spack/test/config.py b/lib/spack/spack/test/config.py index 2de4e55281..3b85bb2a23 100644 --- a/lib/spack/spack/test/config.py +++ b/lib/spack/spack/test/config.py @@ -595,6 +595,7 @@ def test_bad_config_section(mock_config): spack.config.get('foobar') +@pytest.mark.skipif(os.getuid() == 0, reason='user is root') def test_bad_command_line_scopes(tmpdir, mock_config): cfg = spack.config.Configuration() diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index cf962ada4f..d8081d108c 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -546,6 +546,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path): timeout_write(lock_path, 5, 1)) +@pytest.mark.skipif(os.getuid() == 0, reason='user is root') def test_read_lock_on_read_only_lockfile(lock_dir, lock_path): """read-only directory, read-only lockfile.""" touch(lock_path) @@ -573,6 +574,7 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path): pass +@pytest.mark.skipif(os.getuid() == 0, reason='user is root') def test_read_lock_no_lockfile(lock_dir, lock_path): """read-only directory, no lockfile (so can't create).""" with read_only(lock_dir): diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py index 66b358435f..cc4f944867 100644 --- a/lib/spack/spack/test/stage.py +++ b/lib/spack/spack/test/stage.py @@ -653,6 +653,7 @@ class TestStage(object): assert source_path.endswith(spack.stage._source_path_subdir) assert not os.path.exists(source_path) + @pytest.mark.skipif(os.getuid() == 0, reason='user is root') def test_first_accessible_path(self, tmpdir): """Test _first_accessible_path names.""" spack_dir = tmpdir.join('paths') @@ -783,6 +784,7 @@ class TestStage(object): assert spack.stage._resolve_paths(paths) == res_paths + @pytest.mark.skipif(os.getuid() == 0, reason='user is root') def test_get_stage_root_bad_path(self, clear_stage_root): """Ensure an invalid stage path root raises a StageError.""" with spack.config.override('config:build_stage', '/no/such/path'): diff --git a/lib/spack/spack/util/s3.py b/lib/spack/spack/util/s3.py new file mode 100644 index 0000000000..ee6b3d56cf --- /dev/null +++ b/lib/spack/spack/util/s3.py @@ -0,0 +1,44 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import os + +import six.moves.urllib.parse as urllib_parse + +import spack +import spack.util.url as url_util + + +def create_s3_session(url): + url = url_util.parse(url) + if url.scheme != 's3': + raise ValueError( + 'Can not create S3 session from URL with scheme: {SCHEME}'.format( + SCHEME=url.scheme)) + + # NOTE(opadron): import boto and friends as late as possible. We don't + # want to require boto as a dependency unless the user actually wants to + # access S3 mirrors. + from boto3 import Session + + session = Session() + + s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')} + + endpoint_url = os.environ.get('S3_ENDPOINT_URL') + if endpoint_url: + if urllib_parse.urlparse(endpoint_url, scheme=None).scheme is None: + endpoint_url = '://'.join(('https', endpoint_url)) + + s3_client_args['endpoint_url'] = endpoint_url + + # if no access credentials provided above, then access anonymously + if not session.get_credentials(): + from botocore import UNSIGNED + from botocore.client import Config + + s3_client_args["config"] = Config(signature_version=UNSIGNED) + + return session.client('s3', **s3_client_args) diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py new file mode 100644 index 0000000000..6b2786f244 --- /dev/null +++ b/lib/spack/spack/util/url.py @@ -0,0 +1,175 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +""" +Utility functions for parsing, formatting, and manipulating URLs. +""" + +import itertools +import os.path + +from six import string_types +import six.moves.urllib.parse as urllib_parse + +import spack.util.path + + +def _split_all(path): + """Split path into its atomic components. + + Returns the shortest list, L, of strings such that os.path.join(*L) == path + and os.path.split(element) == ('', element) for every element in L except + possibly the first. This first element may possibly have the value of '/', + or some other OS-dependent path root. + """ + result = [] + a = path + old_a = None + while a != old_a: + (old_a, (a, b)) = a, os.path.split(a) + + if a or b: + result.insert(0, b or '/') + + return result + + +def local_file_path(url): + """Get a local file path from a url. + + If url is a file:// URL, return the absolute path to the local + file or directory referenced by it. Otherwise, return None. + """ + if isinstance(url, string_types): + url = parse(url) + + if url.scheme == 'file': + return url.path + return None + + +def parse(url, scheme='file'): + """Parse a mirror url. + + For file:// URLs, the netloc and path components are concatenated and + passed through spack.util.path.canoncalize_path(). + + Otherwise, the returned value is the same as urllib's urlparse() with + allow_fragments=False. + """ + + url_obj = ( + urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False) + if isinstance(url, string_types) else url) + + (scheme, netloc, path, params, query, _) = url_obj + scheme = (scheme or 'file').lower() + + if scheme == 'file': + path = spack.util.path.canonicalize_path(netloc + path) + while path.startswith('//'): + path = path[1:] + netloc = '' + + return urllib_parse.ParseResult(scheme=scheme, + netloc=netloc, + path=path, + params=params, + query=query, + fragment=None) + + +def format(parsed_url): + """Format a URL string + + Returns a canonicalized format of the given URL as a string. + """ + if isinstance(parsed_url, string_types): + parsed_url = parse(parsed_url) + + return parsed_url.geturl() + + +def join(base_url, path, *extra, **kwargs): + """Joins a base URL with one or more local URL path components + + If resolve_href is True, treat the base URL as though it where the locator + of a web page, and the remaining URL path components as though they formed + a relative URL to be resolved against it (i.e.: as in os.path.join(...)). + The result is an absolute URL to the resource to which a user's browser + would navigate if they clicked on a link with an "href" attribute equal to + the relative URL. + + If resolve_href is False (default), then the URL path components are joined + as in os.path.join(). + + Examples: + base_url = 's3://bucket/index.html' + body = fetch_body(prefix) + link = get_href(body) # link == '../other-bucket/document.txt' + + # wrong - link is a local URL that needs to be resolved against base_url + spack.util.url.join(base_url, link) + 's3://bucket/other_bucket/document.txt' + + # correct - resolve local URL against base_url + spack.util.url.join(base_url, link, resolve_href=True) + 's3://other_bucket/document.txt' + + prefix = 'https://mirror.spack.io/build_cache' + + # wrong - prefix is just a URL prefix + spack.util.url.join(prefix, 'my-package', resolve_href=True) + 'https://mirror.spack.io/my-package' + + # correct - simply append additional URL path components + spack.util.url.join(prefix, 'my-package', resolve_href=False) # default + 'https://mirror.spack.io/build_cache/my-package' + """ + base_url = parse(base_url) + resolve_href = kwargs.get('resolve_href', False) + + (scheme, netloc, base_path, params, query, _) = base_url + scheme = scheme.lower() + + path_tokens = [ + part for part in itertools.chain( + _split_all(path), + itertools.chain.from_iterable( + _split_all(extra_path) for extra_path in extra)) + if part and part != '/'] + + base_path_args = ['/fake-root'] + if scheme == 's3': + if netloc: + base_path_args.append(netloc) + + if base_path.startswith('/'): + base_path = base_path[1:] + + base_path_args.append(base_path) + + if resolve_href: + new_base_path, _ = os.path.split(os.path.join(*base_path_args)) + base_path_args = [new_base_path] + + base_path_args.extend(path_tokens) + base_path = os.path.relpath(os.path.join(*base_path_args), '/fake-root') + + if scheme == 's3': + path_tokens = [ + part for part in _split_all(base_path) + if part and part != '/'] + + if path_tokens: + netloc = path_tokens.pop(0) + base_path = os.path.join('', *path_tokens) + + return format(urllib_parse.ParseResult(scheme=scheme, + netloc=netloc, + path=base_path, + params=params, + query=query, + fragment=None)) diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index da2d5bbeb9..e0a23fb444 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -5,16 +5,21 @@ from __future__ import print_function +import codecs +import errno import re import os +import os.path +import shutil import ssl import sys import traceback -import hashlib +from itertools import product + +import six from six.moves.urllib.request import urlopen, Request from six.moves.urllib.error import URLError -from six.moves.urllib.parse import urljoin import multiprocessing.pool try: @@ -28,20 +33,47 @@ except ImportError: class HTMLParseError(Exception): pass +from llnl.util.filesystem import mkdirp import llnl.util.tty as tty -import spack.config import spack.cmd -import spack.url -import spack.stage +import spack.config import spack.error +import spack.url import spack.util.crypto +import spack.util.s3 as s3_util +import spack.util.url as url_util + from spack.util.compression import ALLOWED_ARCHIVE_TYPES # Timeout in seconds for web requests _timeout = 10 +# See docstring for standardize_header_names() +_separators = ('', ' ', '_', '-') +HTTP_HEADER_NAME_ALIASES = { + "Accept-ranges": set( + ''.join((A, 'ccept', sep, R, 'anges')) + for A, sep, R in product('Aa', _separators, 'Rr')), + + "Content-length": set( + ''.join((C, 'ontent', sep, L, 'ength')) + for C, sep, L in product('Cc', _separators, 'Ll')), + + "Content-type": set( + ''.join((C, 'ontent', sep, T, 'ype')) + for C, sep, T in product('Cc', _separators, 'Tt')), + + "Date": set(('Date', 'date')), + + "Last-modified": set( + ''.join((L, 'ast', sep, M, 'odified')) + for L, sep, M in product('Ll', _separators, 'Mm')), + + "Server": set(('Server', 'server')) +} + class LinkParser(HTMLParser): """This parser just takes an HTML page and strips out the hrefs on the @@ -59,7 +91,7 @@ class LinkParser(HTMLParser): class NonDaemonProcess(multiprocessing.Process): - """Process tha allows sub-processes, so pools can have sub-pools.""" + """Process that allows sub-processes, so pools can have sub-pools.""" @property def daemon(self): return False @@ -86,25 +118,53 @@ else: super(NonDaemonPool, self).__init__(*args, **kwargs) -def _read_from_url(url, accept_content_type=None): +def uses_ssl(parsed_url): + if parsed_url.scheme == 'https': + return True + + if parsed_url.scheme == 's3': + endpoint_url = os.environ.get('S3_ENDPOINT_URL') + if not endpoint_url: + return True + + if url_util.parse(endpoint_url, scheme='https').scheme == 'https': + return True + + return False + + +__UNABLE_TO_VERIFY_SSL = ( + lambda pyver: ( + (pyver < (2, 7, 9)) or + ((3,) < pyver < (3, 4, 3)) + ))(sys.version_info) + + +def read_from_url(url, accept_content_type=None): + url = url_util.parse(url) context = None - verify_ssl = spack.config.get('config:verify_ssl') - pyver = sys.version_info - if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)): - if verify_ssl: - tty.warn("Spack will not check SSL certificates. You need to " - "update your Python to enable certificate " - "verification.") - elif verify_ssl: - # without a defined context, urlopen will not verify the ssl cert for - # python 3.x - context = ssl.create_default_context() - else: - context = ssl._create_unverified_context() - req = Request(url) + verify_ssl = spack.config.get('config:verify_ssl') - if accept_content_type: + # Don't even bother with a context unless the URL scheme is one that uses + # SSL certs. + if uses_ssl(url): + if verify_ssl: + if __UNABLE_TO_VERIFY_SSL: + # User wants SSL verification, but it cannot be provided. + warn_no_ssl_cert_checking() + else: + # User wants SSL verification, and it *can* be provided. + context = ssl.create_default_context() + else: + # User has explicitly indicated that they do not want SSL + # verification. + context = ssl._create_unverified_context() + + req = Request(url_util.format(url)) + content_type = None + is_web_url = url.scheme in ('http', 'https') + if accept_content_type and is_web_url: # Make a HEAD request first to check the content type. This lets # us ignore tarballs and gigantic files. # It would be nice to do this with the HTTP Accept header to avoid @@ -113,29 +173,179 @@ def _read_from_url(url, accept_content_type=None): req.get_method = lambda: "HEAD" resp = _urlopen(req, timeout=_timeout, context=context) - if "Content-type" not in resp.headers: - tty.debug("ignoring page " + url) - return None, None - - if not resp.headers["Content-type"].startswith(accept_content_type): - tty.debug("ignoring page " + url + " with content type " + - resp.headers["Content-type"]) - return None, None + content_type = resp.headers.get('Content-type') # Do the real GET request when we know it's just HTML. req.get_method = lambda: "GET" response = _urlopen(req, timeout=_timeout, context=context) - response_url = response.geturl() - # Read the page and and stick it in the map we'll return - page = response.read().decode('utf-8') + if accept_content_type and not is_web_url: + content_type = response.headers.get('Content-type') - return response_url, page + reject_content_type = ( + accept_content_type and ( + content_type is None or + not content_type.startswith(accept_content_type))) + if reject_content_type: + tty.debug("ignoring page {0}{1}{2}".format( + url_util.format(url), + " with content type " if content_type is not None else "", + content_type or "")) -def read_from_url(url, accept_content_type=None): - resp_url, contents = _read_from_url(url, accept_content_type) - return contents + return None, None, None + + return response.geturl(), response.headers, response + + +def warn_no_ssl_cert_checking(): + tty.warn("Spack will not check SSL certificates. You need to update " + "your Python to enable certificate verification.") + + +def push_to_url(local_path, remote_path, **kwargs): + keep_original = kwargs.get('keep_original', True) + + local_url = url_util.parse(local_path) + local_file_path = url_util.local_file_path(local_url) + if local_file_path is None: + raise ValueError('local path must be a file:// url') + + remote_url = url_util.parse(remote_path) + verify_ssl = spack.config.get('config:verify_ssl') + + if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url): + warn_no_ssl_cert_checking() + + remote_file_path = url_util.local_file_path(remote_url) + if remote_file_path is not None: + mkdirp(os.path.dirname(remote_file_path)) + if keep_original: + shutil.copy(local_file_path, remote_file_path) + else: + try: + os.rename(local_file_path, remote_file_path) + except OSError as e: + if e.errno == errno.EXDEV: + # NOTE(opadron): The above move failed because it crosses + # filesystem boundaries. Copy the file (plus original + # metadata), and then delete the original. This operation + # needs to be done in separate steps. + shutil.copy2(local_file_path, remote_file_path) + os.remove(local_file_path) + + elif remote_url.scheme == 's3': + extra_args = kwargs.get('extra_args', {}) + + remote_path = remote_url.path + while remote_path.startswith('/'): + remote_path = remote_path[1:] + + s3 = s3_util.create_s3_session(remote_url) + s3.upload_file(local_file_path, remote_url.netloc, + remote_path, ExtraArgs=extra_args) + + if not keep_original: + os.remove(local_file_path) + + else: + raise NotImplementedError( + 'Unrecognized URL scheme: {SCHEME}'.format( + SCHEME=remote_url.scheme)) + + +def url_exists(url): + url = url_util.parse(url) + local_path = url_util.local_file_path(url) + if local_path: + return os.path.exists(local_path) + + if url.scheme == 's3': + s3 = s3_util.create_s3_session(url) + from botocore.exceptions import ClientError + try: + s3.get_object(Bucket=url.netloc, Key=url.path) + return True + except ClientError as err: + if err.response['Error']['Code'] == 'NoSuchKey': + return False + raise err + + # otherwise, just try to "read" from the URL, and assume that *any* + # non-throwing response contains the resource represented by the URL + try: + read_from_url(url) + return True + except URLError: + return False + + +def remove_url(url): + url = url_util.parse(url) + + local_path = url_util.local_file_path(url) + if local_path: + os.remove(local_path) + return + + if url.scheme == 's3': + s3 = s3_util.create_s3_session(url) + s3.delete_object(Bucket=url.s3_bucket, Key=url.path) + return + + # Don't even try for other URL schemes. + + +def _list_s3_objects(client, url, num_entries, start_after=None): + list_args = dict( + Bucket=url.netloc, + Prefix=url.path, + MaxKeys=num_entries) + + if start_after is not None: + list_args['StartAfter'] = start_after + + result = client.list_objects_v2(**list_args) + + last_key = None + if result['IsTruncated']: + last_key = result['Contents'][-1]['Key'] + + iter = (key for key in + ( + os.path.relpath(entry['Key'], url.path) + for entry in result['Contents'] + ) + if key != '.') + + return iter, last_key + + +def _iter_s3_prefix(client, url, num_entries=1024): + key = None + while True: + contents, key = _list_s3_objects( + client, url, num_entries, start_after=key) + + for x in contents: + yield x + + if not key: + break + + +def list_url(url): + url = url_util.parse(url) + + local_path = url_util.local_file_path(url) + if local_path: + return os.listdir(local_path) + + if url.scheme == 's3': + s3 = s3_util.create_s3_session(url) + return list(set( + key.split('/', 1)[0] + for key in _iter_s3_prefix(s3, url))) def _spider(url, visited, root, depth, max_depth, raise_on_error): @@ -154,16 +364,12 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error): pages = {} # dict from page URL -> text content. links = set() # set of all links seen on visited pages. - # root may end with index.html -- chop that off. - if root.endswith('/index.html'): - root = re.sub('/index.html$', '', root) - try: - response_url, page = _read_from_url(url, 'text/html') - - if not response_url or not page: + response_url, _, response = read_from_url(url, 'text/html') + if not response_url or not response: return pages, links + page = codecs.getreader('utf-8')(response).read() pages[response_url] = page # Parse out the links in the page @@ -173,8 +379,10 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error): while link_parser.links: raw_link = link_parser.links.pop() - abs_link = urljoin(response_url, raw_link.strip()) - + abs_link = url_util.join( + response_url, + raw_link.strip(), + resolve_href=True) links.add(abs_link) # Skip stuff that looks like an archive @@ -243,16 +451,28 @@ def _spider_wrapper(args): return _spider(*args) -def _urlopen(*args, **kwargs): +def _urlopen(req, *args, **kwargs): """Wrapper for compatibility with old versions of Python.""" - # We don't pass 'context' parameter to urlopen because it - # was introduces only starting versions 2.7.9 and 3.4.3 of Python. - if 'context' in kwargs and kwargs['context'] is None: + url = req + try: + url = url.get_full_url() + except AttributeError: + pass + + # We don't pass 'context' parameter because it was only introduced starting + # with versions 2.7.9 and 3.4.3 of Python. + if 'context' in kwargs: del kwargs['context'] - return urlopen(*args, **kwargs) + opener = urlopen + if url_util.parse(url).scheme == 's3': + import spack.s3_handler + opener = spack.s3_handler.open -def spider(root_url, depth=0): + return opener(req, *args, **kwargs) + + +def spider(root, depth=0): """Gets web pages from a root URL. If depth is specified (e.g., depth=2), then this will also follow @@ -262,7 +482,9 @@ def spider(root_url, depth=0): performance over a sequential fetch. """ - pages, links = _spider(root_url, set(), root_url, 0, depth, False) + + root = url_util.parse(root) + pages, links = _spider(root, set(), root, 0, depth, False) return pages, links @@ -356,99 +578,112 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0): return versions -def get_checksums_for_versions( - url_dict, name, first_stage_function=None, keep_stage=False): - """Fetches and checksums archives from URLs. - - This function is called by both ``spack checksum`` and ``spack - create``. The ``first_stage_function`` argument allows the caller to - inspect the first downloaded archive, e.g., to determine the build - system. - - Args: - url_dict (dict): A dictionary of the form: version -> URL - name (str): The name of the package - first_stage_function (callable): function that takes a Stage and a URL; - this is run on the stage of the first URL downloaded - keep_stage (bool): whether to keep staging area when command completes - - Returns: - (str): A multi-line string containing versions and corresponding hashes - +def standardize_header_names(headers): + """Replace certain header names with standardized spellings. + + Standardizes the spellings of the following header names: + - Accept-ranges + - Content-length + - Content-type + - Date + - Last-modified + - Server + + Every name considered is translated to one of the above names if the only + difference between the two is how the first letters of each word are + capitalized; whether words are separated; or, if separated, whether they + are so by a dash (-), underscore (_), or space ( ). Header names that + cannot be mapped as described above are returned unaltered. + + For example: The standard spelling of "Content-length" would be substituted + for any of the following names: + - Content-length + - content_length + - contentlength + - content_Length + - contentLength + - content Length + + ... and any other header name, such as "Content-encoding", would not be + altered, regardless of spelling. + + If headers is a string, then it (or an appropriate substitute) is returned. + + If headers is a non-empty tuple, headers[0] is a string, and there exists a + standardized spelling for header[0] that differs from it, then a new tuple + is returned. This tuple has the same elements as headers, except the first + element is the standardized spelling for headers[0]. + + If headers is a sequence, then a new list is considered, where each element + is its corresponding element in headers, but mapped as above if a string or + tuple. This new list is returned if at least one of its elements differ + from their corrsponding element in headers. + + If headers is a mapping, then a new dict is considered, where the key in + each item is the key of its corresponding item in headers, mapped as above + if a string or tuple. The value is taken from the corresponding item. If + the keys of multiple items in headers map to the same key after being + standardized, then the value for the resulting item is undefined. The new + dict is returned if at least one of its items has a key that differs from + that of their corresponding item in headers, or if the keys of multiple + items in headers map to the same key after being standardized. + + In all other cases headers is returned unaltered. """ - sorted_versions = sorted(url_dict.keys(), reverse=True) + if isinstance(headers, six.string_types): + for standardized_spelling, other_spellings in ( + HTTP_HEADER_NAME_ALIASES.items()): + if headers in other_spellings: + if headers == standardized_spelling: + return headers + return standardized_spelling + return headers + + if isinstance(headers, tuple): + if not headers: + return headers + old = headers[0] + if isinstance(old, six.string_types): + new = standardize_header_names(old) + if old is not new: + return (new,) + headers[1:] + return headers - # Find length of longest string in the list for padding - max_len = max(len(str(v)) for v in sorted_versions) - num_ver = len(sorted_versions) + try: + changed = False + new_dict = {} + for key, value in headers.items(): + if isinstance(key, (tuple, six.string_types)): + old_key, key = key, standardize_header_names(key) + changed = changed or key is not old_key - tty.msg("Found {0} version{1} of {2}:".format( - num_ver, '' if num_ver == 1 else 's', name), - "", - *spack.cmd.elide_list( - ["{0:{1}} {2}".format(str(v), max_len, url_dict[v]) - for v in sorted_versions])) - print() + new_dict[key] = value + + return new_dict if changed else headers + except (AttributeError, TypeError, ValueError): + pass - archives_to_fetch = tty.get_number( - "How many would you like to checksum?", default=1, abort='q') + try: + changed = False + new_list = [] + for item in headers: + if isinstance(item, (tuple, six.string_types)): + old_item, item = item, standardize_header_names(item) + changed = changed or item is not old_item - if not archives_to_fetch: - tty.die("Aborted.") + new_list.append(item) - versions = sorted_versions[:archives_to_fetch] - urls = [url_dict[v] for v in versions] + return new_list if changed else headers + except TypeError: + pass - tty.msg("Downloading...") - version_hashes = [] - i = 0 - for url, version in zip(urls, versions): - try: - with spack.stage.Stage(url, keep=keep_stage) as stage: - # Fetch the archive - stage.fetch() - if i == 0 and first_stage_function: - # Only run first_stage_function the first time, - # no need to run it every time - first_stage_function(stage, url) - - # Checksum the archive and add it to the list - version_hashes.append((version, spack.util.crypto.checksum( - hashlib.sha256, stage.archive_file))) - i += 1 - except spack.stage.FailedDownloadError: - tty.msg("Failed to fetch {0}".format(url)) - except Exception as e: - tty.msg("Something failed on {0}, skipping.".format(url), - " ({0})".format(e)) - - if not version_hashes: - tty.die("Could not fetch any versions for {0}".format(name)) - - # Find length of longest string in the list for padding - max_len = max(len(str(v)) for v, h in version_hashes) - - # Generate the version directives to put in a package.py - version_lines = "\n".join([ - " version('{0}', {1}sha256='{2}')".format( - v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes - ]) - - num_hash = len(version_hashes) - tty.msg("Checksummed {0} version{1} of {2}".format( - num_hash, '' if num_hash == 1 else 's', name)) - - return version_lines + return headers class SpackWebError(spack.error.SpackError): """Superclass for Spack web spidering errors.""" -class VersionFetchError(SpackWebError): - """Raised when we can't determine a URL to fetch a package.""" - - class NoNetworkConnectionError(SpackWebError): """Raised when an operation can't get an internet connection.""" def __init__(self, message, url): |