From fd58c98b0edd198e2b1bc8c9f81208d7d6010178 Mon Sep 17 00:00:00 2001
From: Omar Padron <omar.padron@kitware.com>
Date: Tue, 22 Oct 2019 03:32:04 -0400
Subject: fetching: S3 upload and download (#11117)

This extends Spack functionality so that it can fetch sources and binaries from-, push sources and binaries to-, and index the contents of- mirrors hosted on an S3 bucket.

High level to-do list:

- [x] Extend mirrors configuration to add support for `file://`, and `s3://` URLs.
- [x] Ensure all fetching, pushing, and indexing operations work for `file://` URLs.
- [x] Implement S3 source fetching
- [x] Implement S3 binary mirror indexing
- [x] Implement S3 binary package fetching
- [x] Implement S3 source pushing
- [x] Implement S3 binary package pushing

Important details:

* refactor URL handling to handle S3 URLs and mirror URLs more gracefully.
  - updated parse() to accept already-parsed URL objects.  an equivalent object
    is returned with any extra s3-related attributes intact.  Objects created with
    urllib can also be passed, and the additional s3 handling logic will still be applied.

* update mirror schema/parsing (mirror can have separate fetch/push URLs)
* implement s3_fetch_strategy/several utility changes
* provide more feature-complete S3 fetching
* update buildcache create command to support S3

* Move the core logic for reading data from S3 out of the s3 fetch strategy and into
  the s3 URL handler.  The s3 fetch strategy now calls into `read_from_url()` Since
  read_from_url can now handle S3 URLs, the S3 fetch strategy is redundant.  It's
  not clear whether the ideal design is to have S3 fetching functionality in a fetch
  strategy, directly implemented in read_from_url, or both.

* expanded what can be passed to `spack buildcache` via the -d flag: In addition
  to a directory on the local filesystem, the name of a configured mirror can be
  passed, or a push URL can be passed directly.
---
 .gitlab-ci.yml                         |   5 +
 bin/rebuild-index.sh                   |   2 +-
 lib/spack/spack/binary_distribution.py | 233 +++++++++------
 lib/spack/spack/caches.py              |  16 +-
 lib/spack/spack/cmd/buildcache.py      |  28 ++
 lib/spack/spack/cmd/checksum.py        |   4 +-
 lib/spack/spack/cmd/create.py          |   3 +-
 lib/spack/spack/cmd/mirror.py          | 125 ++++++--
 lib/spack/spack/cmd/url.py             |   8 +-
 lib/spack/spack/fetch_strategy.py      | 104 ++++++-
 lib/spack/spack/mirror.py              | 230 ++++++++++++++-
 lib/spack/spack/s3_handler.py          |  92 ++++++
 lib/spack/spack/schema/mirrors.py      |  14 +-
 lib/spack/spack/stage.py               | 118 +++++++-
 lib/spack/spack/test/cmd/pkg.py        |   2 +
 lib/spack/spack/test/config.py         |   1 +
 lib/spack/spack/test/llnl/util/lock.py |   2 +
 lib/spack/spack/test/stage.py          |   2 +
 lib/spack/spack/util/s3.py             |  44 +++
 lib/spack/spack/util/url.py            | 175 ++++++++++++
 lib/spack/spack/util/web.py            | 501 ++++++++++++++++++++++++---------
 21 files changed, 1420 insertions(+), 289 deletions(-)
 create mode 100644 lib/spack/spack/s3_handler.py
 create mode 100644 lib/spack/spack/util/s3.py
 create mode 100644 lib/spack/spack/util/url.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c4d6bcaab6..f0cfd456ff 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,3 +1,8 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
 generate ci jobs:
   script:
     - "./bin/generate-gitlab-ci-yml.sh"
diff --git a/bin/rebuild-index.sh b/bin/rebuild-index.sh
index 09e14a9cee..009010baf1 100755
--- a/bin/rebuild-index.sh
+++ b/bin/rebuild-index.sh
@@ -10,4 +10,4 @@ set -x
 SPACK_BIN_DIR="${CI_PROJECT_DIR}/bin"
 export PATH="${SPACK_BIN_DIR}:${PATH}"
 
-spack upload-s3 index
+spack buildcache update-index -d "$MIRROR_URL"
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index cbc0f22327..aab07b8a84 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
+import codecs
 import os
 import re
 import tarfile
@@ -23,14 +24,32 @@ import spack.fetch_strategy as fs
 import spack.util.gpg as gpg_util
 import spack.relocate as relocate
 import spack.util.spack_yaml as syaml
+import spack.mirror
+import spack.util.url as url_util
+import spack.util.web as web_util
+
 from spack.spec import Spec
 from spack.stage import Stage
 from spack.util.gpg import Gpg
-from spack.util.web import spider, read_from_url
 from spack.util.executable import ProcessError
 
 _build_cache_relative_path = 'build_cache'
 
+BUILD_CACHE_INDEX_TEMPLATE = '''
+<html>
+<head>
+  <title>{title}</title>
+</head>
+<body>
+<ul>
+{path_list}
+</ul>
+</body>
+</html>
+'''
+
+BUILD_CACHE_INDEX_ENTRY_TEMPLATE = '  <li><a href="{path}">{path}</a></li>'
+
 
 class NoOverwriteException(Exception):
     """
@@ -101,7 +120,7 @@ def build_cache_relative_path():
     return _build_cache_relative_path
 
 
-def build_cache_directory(prefix):
+def build_cache_prefix(prefix):
     return os.path.join(prefix, build_cache_relative_path())
 
 
@@ -246,29 +265,36 @@ def sign_tarball(key, force, specfile_path):
     Gpg.sign(key, specfile_path, '%s.asc' % specfile_path)
 
 
-def _generate_html_index(path_list, output_path):
-    f = open(output_path, 'w')
-    header = """<html>\n
-<head>\n</head>\n
-<list>\n"""
-    footer = "</list>\n</html>\n"
-    f.write(header)
-    for path in path_list:
-        rel = os.path.basename(path)
-        f.write('<li><a href="%s"> %s</a>\n' % (rel, rel))
-    f.write(footer)
-    f.close()
-
-
-def generate_package_index(build_cache_dir):
-    yaml_list = os.listdir(build_cache_dir)
-    path_list = [os.path.join(build_cache_dir, l) for l in yaml_list]
+def generate_package_index(cache_prefix):
+    """Create the build cache index page.
 
-    index_html_path_tmp = os.path.join(build_cache_dir, 'index.html.tmp')
-    index_html_path = os.path.join(build_cache_dir, 'index.html')
-
-    _generate_html_index(path_list, index_html_path_tmp)
-    shutil.move(index_html_path_tmp, index_html_path)
+    Creates (or replaces) the "index.html" page at the location given in
+    cache_prefix.  This page contains a link for each binary package (*.yaml)
+    and signing key (*.key) under cache_prefix.
+    """
+    tmpdir = tempfile.mkdtemp()
+    try:
+        index_html_path = os.path.join(tmpdir, 'index.html')
+        file_list = (
+            entry
+            for entry in web_util.list_url(cache_prefix)
+            if (entry.endswith('.yaml')
+                or entry.endswith('.key')))
+
+        with open(index_html_path, 'w') as f:
+            f.write(BUILD_CACHE_INDEX_TEMPLATE.format(
+                title='Spack Package Index',
+                path_list='\n'.join(
+                    BUILD_CACHE_INDEX_ENTRY_TEMPLATE.format(path=path)
+                    for path in file_list)))
+
+        web_util.push_to_url(
+            index_html_path,
+            url_util.join(cache_prefix, 'index.html'),
+            keep_original=False,
+            extra_args={'ContentType': 'text/html'})
+    finally:
+        shutil.rmtree(tmpdir)
 
 
 def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
@@ -281,33 +307,41 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
         raise ValueError('spec must be concrete to build tarball')
 
     # set up some paths
-    build_cache_dir = build_cache_directory(outdir)
+    tmpdir = tempfile.mkdtemp()
+    cache_prefix = build_cache_prefix(tmpdir)
 
     tarfile_name = tarball_name(spec, '.tar.gz')
-    tarfile_dir = os.path.join(build_cache_dir,
-                               tarball_directory_name(spec))
+    tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec))
     tarfile_path = os.path.join(tarfile_dir, tarfile_name)
-    mkdirp(tarfile_dir)
     spackfile_path = os.path.join(
-        build_cache_dir, tarball_path_name(spec, '.spack'))
-    if os.path.exists(spackfile_path):
+        cache_prefix, tarball_path_name(spec, '.spack'))
+
+    remote_spackfile_path = url_util.join(
+        outdir, os.path.relpath(spackfile_path, tmpdir))
+
+    mkdirp(tarfile_dir)
+    if web_util.url_exists(remote_spackfile_path):
         if force:
-            os.remove(spackfile_path)
+            web_util.remove_url(remote_spackfile_path)
         else:
-            raise NoOverwriteException(str(spackfile_path))
+            raise NoOverwriteException(url_util.format(remote_spackfile_path))
+
     # need to copy the spec file so the build cache can be downloaded
     # without concretizing with the current spack packages
     # and preferences
     spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml")
     specfile_name = tarball_name(spec, '.spec.yaml')
     specfile_path = os.path.realpath(
-        os.path.join(build_cache_dir, specfile_name))
+        os.path.join(cache_prefix, specfile_name))
 
-    if os.path.exists(specfile_path):
+    remote_specfile_path = url_util.join(
+        outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir)))
+
+    if web_util.url_exists(remote_specfile_path):
         if force:
-            os.remove(specfile_path)
+            web_util.remove_url(remote_specfile_path)
         else:
-            raise NoOverwriteException(str(specfile_path))
+            raise NoOverwriteException(url_util.format(remote_specfile_path))
 
     # make a copy of the install directory to work with
     workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix))
@@ -324,6 +358,7 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
         except Exception as e:
             shutil.rmtree(workdir)
             shutil.rmtree(tarfile_dir)
+            shutil.rmtree(tmpdir)
             tty.die(e)
     else:
         try:
@@ -331,7 +366,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
         except Exception as e:
             shutil.rmtree(workdir)
             shutil.rmtree(tarfile_dir)
+            shutil.rmtree(tmpdir)
             tty.die(e)
+
     # create compressed tarball of the install prefix
     with closing(tarfile.open(tarfile_path, 'w:gz')) as tar:
         tar.add(name='%s' % workdir,
@@ -360,7 +397,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
     spec_dict['full_hash'] = spec.full_hash()
 
     tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format(
-        spec_dict['full_hash'], spec.name, specfile_path))
+        spec_dict['full_hash'],
+        spec.name,
+        url_util.format(remote_specfile_path)))
     tty.debug(spec.tree())
 
     with open(specfile_path, 'w') as outfile:
@@ -382,9 +421,19 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
     if not unsigned:
         os.remove('%s.asc' % specfile_path)
 
-    # create an index.html for the build_cache directory so specs can be found
-    if regenerate_index:
-        generate_package_index(build_cache_dir)
+    web_util.push_to_url(
+        spackfile_path, remote_spackfile_path, keep_original=False)
+    web_util.push_to_url(
+        specfile_path, remote_specfile_path, keep_original=False)
+
+    try:
+        # create an index.html for the build_cache directory so specs can be
+        # found
+        if regenerate_index:
+            generate_package_index(url_util.join(
+                outdir, os.path.relpath(cache_prefix, tmpdir)))
+    finally:
+        shutil.rmtree(tmpdir)
 
     return None
 
@@ -394,13 +443,16 @@ def download_tarball(spec):
     Download binary tarball for given package into stage area
     Return True if successful
     """
-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
+    if not spack.mirror.MirrorCollection():
         tty.die("Please add a spack mirror to allow " +
                 "download of pre-compiled packages.")
+
     tarball = tarball_path_name(spec, '.spack')
-    for mirror_name, mirror_url in mirrors.items():
-        url = mirror_url + '/' + _build_cache_relative_path + '/' + tarball
+
+    for mirror in spack.mirror.MirrorCollection().values():
+        url = url_util.join(
+            mirror.fetch_url, _build_cache_relative_path, tarball)
+
         # stage the tarball into standard place
         stage = Stage(url, name="build_cache", keep=True)
         try:
@@ -408,6 +460,7 @@ def download_tarball(spec):
             return stage.save_filename
         except fs.FetchError:
             continue
+
     return None
 
 
@@ -610,26 +663,29 @@ def get_specs(force=False):
         tty.debug("Using previously-retrieved specs")
         return _cached_specs
 
-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
-        tty.debug("No Spack mirrors are currently configured")
+    if not spack.mirror.MirrorCollection():
+        tty.warn("No Spack mirrors are currently configured")
         return {}
 
     urls = set()
-    for mirror_name, mirror_url in mirrors.items():
-        if mirror_url.startswith('file'):
-            mirror = mirror_url.replace(
-                'file://', '') + "/" + _build_cache_relative_path
-            tty.msg("Finding buildcaches in %s" % mirror)
-            if os.path.exists(mirror):
-                files = os.listdir(mirror)
+    for mirror in spack.mirror.MirrorCollection().values():
+        fetch_url_build_cache = url_util.join(
+            mirror.fetch_url, _build_cache_relative_path)
+
+        mirror_dir = url_util.local_file_path(fetch_url_build_cache)
+        if mirror_dir:
+            tty.msg("Finding buildcaches in %s" % mirror_dir)
+            if os.path.exists(mirror_dir):
+                files = os.listdir(mirror_dir)
                 for file in files:
                     if re.search('spec.yaml', file):
-                        link = 'file://' + mirror + '/' + file
+                        link = url_util.join(fetch_url_build_cache, file)
                         urls.add(link)
         else:
-            tty.msg("Finding buildcaches on %s" % mirror_url)
-            p, links = spider(mirror_url + "/" + _build_cache_relative_path)
+            tty.msg("Finding buildcaches at %s" %
+                    url_util.format(fetch_url_build_cache))
+            p, links = web_util.spider(
+                url_util.join(fetch_url_build_cache, 'index.html'))
             for link in links:
                 if re.search("spec.yaml", link):
                     urls.add(link)
@@ -659,28 +715,33 @@ def get_keys(install=False, trust=False, force=False):
     """
     Get pgp public keys available on mirror
     """
-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
+    if not spack.mirror.MirrorCollection():
         tty.die("Please add a spack mirror to allow " +
                 "download of build caches.")
 
     keys = set()
-    for mirror_name, mirror_url in mirrors.items():
-        if mirror_url.startswith('file'):
-            mirror = os.path.join(
-                mirror_url.replace('file://', ''), _build_cache_relative_path)
-            tty.msg("Finding public keys in %s" % mirror)
-            files = os.listdir(mirror)
+
+    for mirror in spack.mirror.MirrorCollection().values():
+        fetch_url_build_cache = url_util.join(
+            mirror.fetch_url, _build_cache_relative_path)
+
+        mirror_dir = url_util.local_file_path(fetch_url_build_cache)
+        if mirror_dir:
+            tty.msg("Finding public keys in %s" % mirror_dir)
+            files = os.listdir(mirror_dir)
             for file in files:
                 if re.search(r'\.key', file):
-                    link = 'file://' + mirror + '/' + file
+                    link = url_util.join(fetch_url_build_cache, file)
                     keys.add(link)
         else:
-            tty.msg("Finding public keys on %s" % mirror_url)
-            p, links = spider(mirror_url + "/build_cache", depth=1)
+            tty.msg("Finding public keys at %s" %
+                    url_util.format(fetch_url_build_cache))
+            p, links = web_util.spider(fetch_url_build_cache, depth=1)
+
             for link in links:
                 if re.search(r'\.key', link):
                     keys.add(link)
+
         for link in keys:
             with Stage(link, name="build_cache", keep=True) as stage:
                 if os.path.exists(stage.save_filename) and force:
@@ -717,15 +778,16 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False):
     # Try to retrieve the .spec.yaml directly, based on the known
     # format of the name, in order to determine if the package
     # needs to be rebuilt.
-    build_cache_dir = build_cache_directory(mirror_url)
+    cache_prefix = build_cache_prefix(mirror_url)
     spec_yaml_file_name = tarball_name(spec, '.spec.yaml')
-    file_path = os.path.join(build_cache_dir, spec_yaml_file_name)
+    file_path = os.path.join(cache_prefix, spec_yaml_file_name)
 
     result_of_error = 'Package ({0}) will {1}be rebuilt'.format(
         spec.short_spec, '' if rebuild_on_errors else 'not ')
 
     try:
-        yaml_contents = read_from_url(file_path)
+        _, _, yaml_file = web_util.read_from_url(file_path)
+        yaml_contents = codecs.getreader('utf-8')(yaml_file).read()
     except URLError as url_err:
         err_msg = [
             'Unable to determine whether {0} needs rebuilding,',
@@ -782,22 +844,22 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
 
     """
     rebuilds = {}
-    for mirror_name, mirror_url in mirrors.items():
-        tty.msg('Checking for built specs at %s' % mirror_url)
+    for mirror in spack.mirror.MirrorCollection(mirrors).values():
+        tty.msg('Checking for built specs at %s' % mirror.fetch_url)
 
         rebuild_list = []
 
         for spec in specs:
-            if needs_rebuild(spec, mirror_url, rebuild_on_errors):
+            if needs_rebuild(spec, mirror.fetch_url, rebuild_on_errors):
                 rebuild_list.append({
                     'short_spec': spec.short_spec,
                     'hash': spec.dag_hash()
                 })
 
         if rebuild_list:
-            rebuilds[mirror_url] = {
-                'mirrorName': mirror_name,
-                'mirrorUrl': mirror_url,
+            rebuilds[mirror.fetch_url] = {
+                'mirrorName': mirror.name,
+                'mirrorUrl': mirror.fetch_url,
                 'rebuildSpecs': rebuild_list
             }
 
@@ -810,33 +872,36 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
 
 def _download_buildcache_entry(mirror_root, descriptions):
     for description in descriptions:
-        url = os.path.join(mirror_root, description['url'])
+        description_url = os.path.join(mirror_root, description['url'])
         path = description['path']
         fail_if_missing = description['required']
 
         mkdirp(path)
 
-        stage = Stage(url, name="build_cache", path=path, keep=True)
+        stage = Stage(
+            description_url, name="build_cache", path=path, keep=True)
 
         try:
             stage.fetch()
         except fs.FetchError as e:
             tty.debug(e)
             if fail_if_missing:
-                tty.error('Failed to download required url {0}'.format(url))
+                tty.error('Failed to download required url {0}'.format(
+                    description_url))
                 return False
 
     return True
 
 
 def download_buildcache_entry(file_descriptions):
-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
+    if not spack.mirror.MirrorCollection():
         tty.die("Please add a spack mirror to allow " +
                 "download of buildcache entries.")
 
-    for mirror_name, mirror_url in mirrors.items():
-        mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
+    for mirror in spack.mirror.MirrorCollection().values():
+        mirror_root = os.path.join(
+            mirror.fetch_url,
+            _build_cache_relative_path)
 
         if _download_buildcache_entry(mirror_root, file_descriptions):
             return True
diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py
index dfd750fa82..e2352b2fcc 100644
--- a/lib/spack/spack/caches.py
+++ b/lib/spack/spack/caches.py
@@ -9,11 +9,13 @@ import os
 import llnl.util.lang
 from llnl.util.filesystem import mkdirp
 
+import spack.error
 import spack.paths
 import spack.config
 import spack.fetch_strategy
 import spack.util.file_cache
-from spack.util.path import canonicalize_path
+import spack.util.path
+import spack.util.url as url_util
 
 
 def _misc_cache():
@@ -25,7 +27,7 @@ def _misc_cache():
     path = spack.config.get('config:misc_cache')
     if not path:
         path = os.path.join(spack.paths.user_config_path, 'cache')
-    path = canonicalize_path(path)
+    path = spack.util.path.canonicalize_path(path)
 
     return spack.util.file_cache.FileCache(path)
 
@@ -43,22 +45,26 @@ def _fetch_cache():
     path = spack.config.get('config:source_cache')
     if not path:
         path = os.path.join(spack.paths.var_path, "cache")
-    path = canonicalize_path(path)
+    path = spack.util.path.canonicalize_path(path)
 
     return spack.fetch_strategy.FsCache(path)
 
 
 class MirrorCache(object):
     def __init__(self, root):
-        self.root = os.path.abspath(root)
+        self.root = url_util.local_file_path(root)
+        if not self.root:
+            raise spack.error.SpackError(
+                'MirrorCaches only work with file:// URLs')
+
         self.new_resources = set()
         self.existing_resources = set()
 
     def store(self, fetcher, relative_dest):
         # Note this will archive package sources even if they would not
         # normally be cached (e.g. the current tip of an hg/git branch)
-
         dst = os.path.join(self.root, relative_dest)
+
         if os.path.exists(dst):
             self.existing_resources.add(relative_dest)
         else:
diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py
index 121a6f4aa5..ee09a33f39 100644
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@@ -14,6 +14,7 @@ import spack.cmd
 import spack.cmd.common.arguments as arguments
 import spack.environment as ev
 import spack.hash_types as ht
+import spack.mirror
 import spack.relocate
 import spack.repo
 import spack.spec
@@ -21,6 +22,8 @@ import spack.store
 import spack.config
 import spack.repo
 import spack.store
+import spack.util.url as url_util
+
 from spack.error import SpecError
 from spack.spec import Spec, save_dependency_spec_yamls
 
@@ -205,6 +208,13 @@ def setup_parser(subparser):
         help='Destination mirror url')
     copy.set_defaults(func=buildcache_copy)
 
+    # Update buildcache index without copying any additional packages
+    update_index = subparsers.add_parser(
+        'update-index', help=buildcache_update_index.__doc__)
+    update_index.add_argument(
+        '-d', '--mirror-url', default=None, help='Destination mirror url')
+    update_index.set_defaults(func=buildcache_update_index)
+
 
 def find_matching_specs(pkgs, allow_multiple_matches=False, env=None):
     """Returns a list of specs matching the not necessarily
@@ -312,9 +322,14 @@ def createtarball(args):
                 " yaml file containing a spec to install")
     pkgs = set(packages)
     specs = set()
+
     outdir = '.'
     if args.directory:
         outdir = args.directory
+
+    mirror = spack.mirror.MirrorCollection().lookup(outdir)
+    outdir = url_util.format(mirror.push_url)
+
     signkey = None
     if args.key:
         signkey = args.key
@@ -649,6 +664,19 @@ def buildcache_copy(args):
         shutil.copyfile(cdashid_src_path, cdashid_dest_path)
 
 
+def buildcache_update_index(args):
+    """Update a buildcache index."""
+    outdir = '.'
+    if args.mirror_url:
+        outdir = args.mirror_url
+
+    mirror = spack.mirror.MirrorCollection().lookup(outdir)
+    outdir = url_util.format(mirror.push_url)
+
+    bindist.generate_package_index(
+        url_util.join(outdir, bindist.build_cache_relative_path()))
+
+
 def buildcache(parser, args):
     if args.func:
         args.func(args)
diff --git a/lib/spack/spack/cmd/checksum.py b/lib/spack/spack/cmd/checksum.py
index ecc71d3060..2518dfef9f 100644
--- a/lib/spack/spack/cmd/checksum.py
+++ b/lib/spack/spack/cmd/checksum.py
@@ -11,8 +11,8 @@ import llnl.util.tty as tty
 
 import spack.cmd
 import spack.repo
+import spack.stage
 import spack.util.crypto
-import spack.util.web
 from spack.util.naming import valid_fully_qualified_module_name
 from spack.version import ver, Version
 
@@ -56,7 +56,7 @@ def checksum(parser, args):
         if not url_dict:
             tty.die("Could not find any versions for {0}".format(pkg.name))
 
-    version_lines = spack.util.web.get_checksums_for_versions(
+    version_lines = spack.stage.get_checksums_for_versions(
         url_dict, pkg.name, keep_stage=args.keep_stage)
 
     print()
diff --git a/lib/spack/spack/cmd/create.py b/lib/spack/spack/cmd/create.py
index 527a7a883c..6c68617acd 100644
--- a/lib/spack/spack/cmd/create.py
+++ b/lib/spack/spack/cmd/create.py
@@ -13,6 +13,7 @@ from llnl.util.filesystem import mkdirp
 
 import spack.util.web
 import spack.repo
+import spack.stage
 from spack.spec import Spec
 from spack.util.editor import editor
 from spack.util.executable import which, ProcessError
@@ -618,7 +619,7 @@ def get_versions(args, name):
             version = parse_version(args.url)
             url_dict = {version: args.url}
 
-        versions = spack.util.web.get_checksums_for_versions(
+        versions = spack.stage.get_checksums_for_versions(
             url_dict, name, first_stage_function=guesser,
             keep_stage=args.keep_stage)
     else:
diff --git a/lib/spack/spack/cmd/mirror.py b/lib/spack/spack/cmd/mirror.py
index 723e310ad6..91ed40a4c5 100644
--- a/lib/spack/spack/cmd/mirror.py
+++ b/lib/spack/spack/cmd/mirror.py
@@ -4,20 +4,21 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
 import sys
-import os
-from datetime import datetime
 
 import argparse
 import llnl.util.tty as tty
 from llnl.util.tty.colify import colify
 
 import spack.cmd
+import spack.cmd.common.arguments as arguments
 import spack.concretize
 import spack.config
+import spack.environment as ev
 import spack.mirror
 import spack.repo
-import spack.cmd.common.arguments as arguments
-import spack.environment as ev
+import spack.util.url as url_util
+import spack.util.web as web_util
+
 from spack.spec import Spec
 from spack.error import SpackError
 from spack.util.spack_yaml import syaml_dict
@@ -73,6 +74,19 @@ def setup_parser(subparser):
         default=spack.config.default_modify_scope(),
         help="configuration scope to modify")
 
+    # Set-Url
+    set_url_parser = sp.add_parser('set-url', help=mirror_set_url.__doc__)
+    set_url_parser.add_argument('name', help="mnemonic name for mirror")
+    set_url_parser.add_argument(
+        'url', help="url of mirror directory from 'spack mirror create'")
+    set_url_parser.add_argument(
+        '--push', action='store_true',
+        help="set only the URL used for uploading new packages")
+    set_url_parser.add_argument(
+        '--scope', choices=scopes, metavar=scopes_metavar,
+        default=spack.config.default_modify_scope(),
+        help="configuration scope to modify")
+
     # List
     list_parser = sp.add_parser('list', help=mirror_list.__doc__)
     list_parser.add_argument(
@@ -83,20 +97,14 @@ def setup_parser(subparser):
 
 def mirror_add(args):
     """Add a mirror to Spack."""
-    url = args.url
-    if url.startswith('/'):
-        url = 'file://' + url
+    url = url_util.format(args.url)
 
     mirrors = spack.config.get('mirrors', scope=args.scope)
     if not mirrors:
         mirrors = syaml_dict()
 
-    for name, u in mirrors.items():
-        if name == args.name:
-            tty.die("Mirror with name %s already exists." % name)
-        if u == url:
-            tty.die("Mirror with url %s already exists." % url)
-        # should only be one item per mirror dict.
+    if args.name in mirrors:
+        tty.die("Mirror with name %s already exists." % args.name)
 
     items = [(n, u) for n, u in mirrors.items()]
     items.insert(0, (args.name, url))
@@ -117,21 +125,86 @@ def mirror_remove(args):
 
     old_value = mirrors.pop(name)
     spack.config.set('mirrors', mirrors, scope=args.scope)
-    tty.msg("Removed mirror %s with url %s" % (name, old_value))
+
+    debug_msg_url = "url %s"
+    debug_msg = ["Removed mirror %s with"]
+    values = [name]
+
+    try:
+        fetch_value = old_value['fetch']
+        push_value = old_value['push']
+
+        debug_msg.extend(("fetch", debug_msg_url, "and push", debug_msg_url))
+        values.extend((fetch_value, push_value))
+    except TypeError:
+        debug_msg.append(debug_msg_url)
+        values.append(old_value)
+
+    tty.debug(" ".join(debug_msg) % tuple(values))
+    tty.msg("Removed mirror %s." % name)
+
+
+def mirror_set_url(args):
+    """Change the URL of a mirror."""
+    url = url_util.format(args.url)
+
+    mirrors = spack.config.get('mirrors', scope=args.scope)
+    if not mirrors:
+        mirrors = syaml_dict()
+
+    if args.name not in mirrors:
+        tty.die("No mirror found with name %s." % args.name)
+
+    entry = mirrors[args.name]
+
+    try:
+        fetch_url = entry['fetch']
+        push_url = entry['push']
+    except TypeError:
+        fetch_url, push_url = entry, entry
+
+    changes_made = False
+
+    if args.push:
+        changes_made = changes_made or push_url != url
+        push_url = url
+    else:
+        changes_made = (
+            changes_made or fetch_url != push_url or push_url != url)
+
+        fetch_url, push_url = url, url
+
+    items = [
+        (
+            (n, u)
+            if n != args.name else (
+                (n, {"fetch": fetch_url, "push": push_url})
+                if fetch_url != push_url else (n, fetch_url)
+            )
+        )
+        for n, u in mirrors.items()
+    ]
+
+    mirrors = syaml_dict(items)
+    spack.config.set('mirrors', mirrors, scope=args.scope)
+
+    if changes_made:
+        tty.msg(
+            "Changed%s url for mirror %s." %
+            ((" (push)" if args.push else ""), args.name))
+    else:
+        tty.msg("Url already set for mirror %s." % args.name)
 
 
 def mirror_list(args):
     """Print out available mirrors to the console."""
-    mirrors = spack.config.get('mirrors', scope=args.scope)
+
+    mirrors = spack.mirror.MirrorCollection(scope=args.scope)
     if not mirrors:
         tty.msg("No mirrors configured.")
         return
 
-    max_len = max(len(n) for n in mirrors.keys())
-    fmt = "%%-%ds%%s" % (max_len + 4)
-
-    for name in mirrors:
-        print(fmt % (name, mirrors[name]))
+    mirrors.display()
 
 
 def _read_specs_from_file(filename):
@@ -188,14 +261,13 @@ def mirror_create(args):
             msg = 'Skipping {0} as it is an external spec.'
             tty.msg(msg.format(spec.cshort_spec))
 
-        # Default name for directory is spack-mirror-<DATESTAMP>
-        directory = args.directory
-        if not directory:
-            timestamp = datetime.now().strftime("%Y-%m-%d")
-            directory = 'spack-mirror-' + timestamp
+        mirror = spack.mirror.Mirror(
+            args.directory or spack.config.get('config:source_cache'))
+
+        directory = url_util.format(mirror.push_url)
 
         # Make sure nothing is in the way.
-        existed = os.path.isdir(directory)
+        existed = web_util.url_exists(directory)
 
         # Actually do the work to create the mirror
         present, mirrored, error = spack.mirror.create(
@@ -220,6 +292,7 @@ def mirror(parser, args):
               'add': mirror_add,
               'remove': mirror_remove,
               'rm': mirror_remove,
+              'set-url': mirror_set_url,
               'list': mirror_list}
 
     if args.no_checksum:
diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py
index 10257b2608..f1ce050a90 100644
--- a/lib/spack/spack/cmd/url.py
+++ b/lib/spack/spack/cmd/url.py
@@ -5,10 +5,8 @@
 
 from __future__ import division, print_function
 from collections import defaultdict
-try:
-    from urllib.parse import urlparse
-except ImportError:
-    from urlparse import urlparse
+
+import six.moves.urllib.parse as urllib_parse
 
 import spack.fetch_strategy as fs
 import spack.repo
@@ -262,7 +260,7 @@ def url_stats(args):
                 self.checksums[algo] += 1
 
                 # parse out the URL scheme (https/http/ftp/etc.)
-                urlinfo = urlparse(fetcher.url)
+                urlinfo = urllib_parse.urlparse(fetcher.url)
                 self.schemes[urlinfo.scheme] += 1
 
             elif url_type == 'git':
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index 32239d81ce..4812211812 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -23,6 +23,7 @@ in order to build it.  They need to define the following methods:
         Archive a source directory, e.g. for creating a mirror.
 """
 import os
+import os.path
 import sys
 import re
 import shutil
@@ -30,6 +31,7 @@ import copy
 import xml.etree.ElementTree
 from functools import wraps
 from six import string_types, with_metaclass
+import six.moves.urllib.parse as urllib_parse
 
 import llnl.util.tty as tty
 from llnl.util.filesystem import (
@@ -39,6 +41,9 @@ import spack.config
 import spack.error
 import spack.util.crypto as crypto
 import spack.util.pattern as pattern
+import spack.util.web as web_util
+import spack.util.url as url_util
+
 from spack.util.executable import which
 from spack.util.string import comma_and, quote
 from spack.version import Version, ver
@@ -48,6 +53,17 @@ from spack.util.compression import decompressor_for, extension
 #: List of all fetch strategies, created by FetchStrategy metaclass.
 all_strategies = []
 
+CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
+    "The contents of {subject} look like {content_type}.  Either the URL"
+    " you are trying to use does not exist or you have an internet gateway"
+    " issue.  You can remove the bad archive using 'spack clean"
+    " <package>', then try again using the correct URL.")
+
+
+def warn_content_type_mismatch(subject, content_type='HTML'):
+    tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(
+        subject=subject, content_type=content_type))
+
 
 def _needs_stage(fun):
     """Many methods on fetch strategies require a stage to be set
@@ -351,12 +367,7 @@ class URLFetchStrategy(FetchStrategy):
         content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
                                    flags=re.IGNORECASE)
         if content_types and 'text/html' in content_types[-1]:
-            msg = ("The contents of {0} look like HTML. Either the URL "
-                   "you are trying to use does not exist or you have an "
-                   "internet gateway issue. You can remove the bad archive "
-                   "using 'spack clean <package>', then try again using "
-                   "the correct URL.")
-            tty.warn(msg.format(self.archive_file or "the archive"))
+            warn_content_type_mismatch(self.archive_file or "the archive")
 
         if save_file:
             os.rename(partial_file, save_file)
@@ -449,7 +460,10 @@ class URLFetchStrategy(FetchStrategy):
         if not self.archive_file:
             raise NoArchiveFileError("Cannot call archive() before fetching.")
 
-        shutil.copyfile(self.archive_file, destination)
+        web_util.push_to_url(
+            self.archive_file,
+            destination,
+            keep_original=True)
 
     @_needs_stage
     def check(self):
@@ -1063,6 +1077,54 @@ class HgFetchStrategy(VCSFetchStrategy):
         return "[hg] %s" % self.url
 
 
+class S3FetchStrategy(URLFetchStrategy):
+    """FetchStrategy that pulls from an S3 bucket."""
+    enabled = True
+    url_attr = 's3'
+
+    def __init__(self, *args, **kwargs):
+        try:
+            super(S3FetchStrategy, self).__init__(*args, **kwargs)
+        except ValueError:
+            if not kwargs.get('url'):
+                raise ValueError(
+                    "S3FetchStrategy requires a url for fetching.")
+
+    @_needs_stage
+    def fetch(self):
+        if self.archive_file:
+            tty.msg("Already downloaded %s" % self.archive_file)
+            return
+
+        parsed_url = url_util.parse(self.url)
+        if parsed_url.scheme != 's3':
+            raise ValueError(
+                'S3FetchStrategy can only fetch from s3:// urls.')
+
+        tty.msg("Fetching %s" % self.url)
+
+        basename = os.path.basename(parsed_url.path)
+
+        with working_dir(self.stage.path):
+            _, headers, stream = web_util.read_from_url(self.url)
+
+            with open(basename, 'wb') as f:
+                shutil.copyfileobj(stream, f)
+
+            content_type = headers['Content-type']
+
+        if content_type == 'text/html':
+            warn_content_type_mismatch(self.archive_file or "the archive")
+
+        if self.stage.save_filename:
+            os.rename(
+                os.path.join(self.stage.path, basename),
+                self.stage.save_filename)
+
+        if not self.archive_file:
+            raise FailedDownloadError(self.url)
+
+
 def from_url(url):
     """Given a URL, find an appropriate fetch strategy for it.
        Currently just gives you a URLFetchStrategy that uses curl.
@@ -1206,6 +1268,34 @@ def for_package_version(pkg, version):
     raise InvalidArgsError(pkg, version, **args)
 
 
+def from_url_scheme(url, *args, **kwargs):
+    """Finds a suitable FetchStrategy by matching its url_attr with the scheme
+       in the given url."""
+
+    url = kwargs.get('url', url)
+    parsed_url = urllib_parse.urlparse(url, scheme='file')
+
+    scheme_mapping = (
+        kwargs.get('scheme_mapping') or
+        {
+            'file': 'url',
+            'http': 'url',
+            'https': 'url'
+        })
+
+    scheme = parsed_url.scheme
+    scheme = scheme_mapping.get(scheme, scheme)
+
+    for fetcher in all_strategies:
+        url_attr = getattr(fetcher, 'url_attr', None)
+        if url_attr and url_attr == scheme:
+            return fetcher(url, *args, **kwargs)
+
+    raise ValueError(
+        'No FetchStrategy found for url with scheme: "{SCHEME}"'.format(
+            SCHEME=parsed_url.scheme))
+
+
 def from_list_url(pkg):
     """If a package provides a URL which lists URLs for resources by
        version, this can can create a fetcher for a URL discovered for
diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py
index 45aa779c69..e2329b6861 100644
--- a/lib/spack/spack/mirror.py
+++ b/lib/spack/spack/mirror.py
@@ -13,6 +13,18 @@ to download packages directly from a mirror (e.g., on an intranet).
 """
 import sys
 import os
+import os.path
+import operator
+
+import six
+
+import ruamel.yaml.error as yaml_error
+
+try:
+    from collections.abc import Mapping
+except ImportError:
+    from collections import Mapping
+
 import llnl.util.tty as tty
 from llnl.util.filesystem import mkdirp
 
@@ -20,9 +32,205 @@ import spack.config
 import spack.error
 import spack.url as url
 import spack.fetch_strategy as fs
-from spack.spec import Spec
+import spack.util.spack_json as sjson
+import spack.util.spack_yaml as syaml
+import spack.util.url as url_util
+import spack.spec
 from spack.version import VersionList
 from spack.util.compression import allowed_archive
+from spack.util.spack_yaml import syaml_dict
+
+
+def _display_mirror_entry(size, name, url, type_=None):
+    if type_:
+        type_ = "".join((" (", type_, ")"))
+    else:
+        type_ = ""
+
+    print("%-*s%s%s" % (size + 4, name, url, type_))
+
+
+class Mirror(object):
+    """Represents a named location for storing source tarballs and binary
+    packages.
+
+    Mirrors have a fetch_url that indicate where and how artifacts are fetched
+    from them, and a push_url that indicate where and how artifacts are pushed
+    to them.  These two URLs are usually the same.
+    """
+
+    def __init__(self, fetch_url, push_url=None, name=None):
+        self._fetch_url = fetch_url
+        self._push_url = push_url
+        self._name = name
+
+    def to_json(self, stream=None):
+        return sjson.dump(self.to_dict(), stream)
+
+    def to_yaml(self, stream=None):
+        return syaml.dump(self.to_dict(), stream)
+
+    @staticmethod
+    def from_yaml(stream, name=None):
+        try:
+            data = syaml.load(stream)
+            return Mirror.from_dict(data, name)
+        except yaml_error.MarkedYAMLError as e:
+            raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
+
+    @staticmethod
+    def from_json(stream, name=None):
+        d = sjson.load(stream)
+        return Mirror.from_dict(d, name)
+
+    def to_dict(self):
+        if self._push_url is None:
+            return self._fetch_url
+        else:
+            return syaml_dict([
+                ('fetch', self._fetch_url),
+                ('push', self._push_url)])
+
+    @staticmethod
+    def from_dict(d, name=None):
+        if isinstance(d, six.string_types):
+            return Mirror(d, name=name)
+        else:
+            return Mirror(d['fetch'], d['push'], name)
+
+    def display(self, max_len=0):
+        if self._push_url is None:
+            _display_mirror_entry(max_len, self._name, self._fetch_url)
+        else:
+            _display_mirror_entry(
+                max_len, self._name, self._fetch_url, "fetch")
+            _display_mirror_entry(
+                max_len, self._name, self._push_url, "push")
+
+    def __str__(self):
+        name = self._name
+        if name is None:
+            name = ''
+        else:
+            name = ' "%s"' % name
+
+        if self._push_url is None:
+            return "[Mirror%s (%s)]" % (name, self._fetch_url)
+
+        return "[Mirror%s (fetch: %s, push: %s)]" % (
+            name, self._fetch_url, self._push_url)
+
+    def __repr__(self):
+        return ''.join((
+            'Mirror(',
+            ', '.join(
+                '%s=%s' % (k, repr(v))
+                for k, v in (
+                    ('fetch_url', self._fetch_url),
+                    ('push_url', self._push_url),
+                    ('name', self._name))
+                if k == 'fetch_url' or v),
+            ')'
+        ))
+
+    @property
+    def name(self):
+        return self._name or "<unnamed>"
+
+    @property
+    def fetch_url(self):
+        return self._fetch_url
+
+    @fetch_url.setter
+    def fetch_url(self, url):
+        self._fetch_url = url
+        self._normalize()
+
+    @property
+    def push_url(self):
+        if self._push_url is None:
+            return self._fetch_url
+        return self._push_url
+
+    @push_url.setter
+    def push_url(self, url):
+        self._push_url = url
+        self._normalize()
+
+    def _normalize(self):
+        if self._push_url is not None and self._push_url == self._fetch_url:
+            self._push_url = None
+
+
+class MirrorCollection(Mapping):
+    """A mapping of mirror names to mirrors."""
+
+    def __init__(self, mirrors=None, scope=None):
+        self._mirrors = dict(
+            (name, Mirror.from_dict(mirror, name))
+            for name, mirror in (
+                mirrors.items() if mirrors is not None else
+                spack.config.get('mirrors', scope=scope).items()))
+
+    def to_json(self, stream=None):
+        return sjson.dump(self.to_dict(True), stream)
+
+    def to_yaml(self, stream=None):
+        return syaml.dump(self.to_dict(True), stream)
+
+    @staticmethod
+    def from_yaml(stream, name=None):
+        try:
+            data = syaml.load(stream)
+            return MirrorCollection(data)
+        except yaml_error.MarkedYAMLError as e:
+            raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
+
+    @staticmethod
+    def from_json(stream, name=None):
+        d = sjson.load(stream)
+        return MirrorCollection(d)
+
+    def to_dict(self, recursive=False):
+        return syaml_dict(sorted(
+            (
+                (k, (v.to_dict() if recursive else v))
+                for (k, v) in self._mirrors.items()
+            ), key=operator.itemgetter(0)
+        ))
+
+    @staticmethod
+    def from_dict(d):
+        return MirrorCollection(d)
+
+    def __getitem__(self, item):
+        return self._mirrors[item]
+
+    def display(self):
+        max_len = max(len(mirror.name) for mirror in self._mirrors.values())
+        for mirror in self._mirrors.values():
+            mirror.display(max_len)
+
+    def lookup(self, name_or_url):
+        """Looks up and returns a Mirror.
+
+        If this MirrorCollection contains a named Mirror under the name
+        [name_or_url], then that mirror is returned.  Otherwise, [name_or_url]
+        is assumed to be a mirror URL, and an anonymous mirror with the given
+        URL is returned.
+        """
+        result = self.get(name_or_url)
+
+        if result is None:
+            result = Mirror(fetch_url=name_or_url)
+
+        return result
+
+    def __iter__(self):
+        return iter(self._mirrors)
+
+    def __len__(self):
+        return len(self._mirrors)
 
 
 def mirror_archive_filename(spec, fetcher, resource_id=None):
@@ -114,7 +322,7 @@ def get_matching_versions(specs, **kwargs):
 
             # Generate only versions that satisfy the spec.
             if spec.concrete or v.satisfies(spec.versions):
-                s = Spec(pkg.name)
+                s = spack.spec.Spec(pkg.name)
                 s.versions = VersionList([v])
                 s.variants = spec.variants.copy()
                 # This is needed to avoid hanging references during the
@@ -166,12 +374,17 @@ def create(path, specs, **kwargs):
     it creates specs for those versions.  If the version satisfies any spec
     in the specs list, it is downloaded and added to the mirror.
     """
+    parsed = url_util.parse(path)
+    mirror_root = url_util.local_file_path(parsed)
+
     # Make sure nothing is in the way.
-    if os.path.isfile(path):
-        raise MirrorError("%s already exists and is a file." % path)
+    if mirror_root and os.path.isfile(mirror_root):
+        raise MirrorError("%s already exists and is a file." % mirror_root)
 
     # automatically spec-ify anything in the specs array.
-    specs = [s if isinstance(s, Spec) else Spec(s) for s in specs]
+    specs = [
+        s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s)
+        for s in specs]
 
     # Get concrete specs for each matching version of these specs.
     version_specs = get_matching_versions(
@@ -180,8 +393,7 @@ def create(path, specs, **kwargs):
         s.concretize()
 
     # Get the absolute path of the root before we start jumping around.
-    mirror_root = os.path.abspath(path)
-    if not os.path.isdir(mirror_root):
+    if mirror_root and not os.path.isdir(mirror_root):
         try:
             mkdirp(mirror_root)
         except OSError as e:
@@ -195,12 +407,12 @@ def create(path, specs, **kwargs):
         'error': []
     }
 
-    mirror_cache = spack.caches.MirrorCache(mirror_root)
+    mirror_cache = spack.caches.MirrorCache(parsed)
     try:
         spack.caches.mirror_cache = mirror_cache
         # Iterate through packages and download all safe tarballs for each
         for spec in version_specs:
-            add_single_spec(spec, mirror_root, categories, **kwargs)
+            add_single_spec(spec, parsed, categories, **kwargs)
     finally:
         spack.caches.mirror_cache = None
 
diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py
new file mode 100644
index 0000000000..2a54b9ecb1
--- /dev/null
+++ b/lib/spack/spack/s3_handler.py
@@ -0,0 +1,92 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from io import BufferedReader
+
+import six.moves.urllib.response as urllib_response
+import six.moves.urllib.request as urllib_request
+import six.moves.urllib.error as urllib_error
+
+import spack.util.s3 as s3_util
+import spack.util.url as url_util
+import spack.util.web as web_util
+
+
+# NOTE(opadron): Workaround issue in boto where its StreamingBody
+# implementation is missing several APIs expected from IOBase.  These missing
+# APIs prevent the streams returned by boto from being passed as-are along to
+# urllib.
+#
+# https://github.com/boto/botocore/issues/879
+# https://github.com/python/cpython/pull/3249
+class WrapStream(BufferedReader):
+    def __init__(self, raw):
+        raw.readable = lambda: True
+        raw.writable = lambda: False
+        raw.seekable = lambda: False
+        raw.closed = False
+        raw.flush = lambda: None
+        super(WrapStream, self).__init__(raw)
+
+    def detach(self):
+        self.raw = None
+
+    def read(self, *args, **kwargs):
+        return self.raw.read(*args, **kwargs)
+
+    def __getattr__(self, key):
+        return getattr(self.raw, key)
+
+
+def _s3_open(url):
+    parsed = url_util.parse(url)
+    s3 = s3_util.create_s3_session(parsed)
+
+    bucket = parsed.netloc
+    key = parsed.path
+
+    if key.startswith('/'):
+        key = key[1:]
+
+    obj = s3.get_object(Bucket=bucket, Key=key)
+
+    # NOTE(opadron): Apply workaround here (see above)
+    stream = WrapStream(obj['Body'])
+    headers = web_util.standardize_header_names(
+        obj['ResponseMetadata']['HTTPHeaders'])
+
+    return url, headers, stream
+
+
+class UrllibS3Handler(urllib_request.HTTPSHandler):
+    def s3_open(self, req):
+        orig_url = req.get_full_url()
+        from botocore.exceptions import ClientError
+        try:
+            url, headers, stream = _s3_open(orig_url)
+            return urllib_response.addinfourl(stream, headers, url)
+        except ClientError as err:
+            # if no such [KEY], but [KEY]/index.html exists,
+            # return that, instead.
+            if err.response['Error']['Code'] == 'NoSuchKey':
+                try:
+                    _, headers, stream = _s3_open(
+                        url_util.join(orig_url, 'index.html'))
+                    return urllib_response.addinfourl(
+                        stream, headers, orig_url)
+
+                except ClientError as err2:
+                    if err.response['Error']['Code'] == 'NoSuchKey':
+                        # raise original error
+                        raise urllib_error.URLError(err)
+
+                    raise urllib_error.URLError(err2)
+
+            raise urllib_error.URLError(err)
+
+
+S3OpenerDirector = urllib_request.build_opener(UrllibS3Handler())
+
+open = S3OpenerDirector.open
diff --git a/lib/spack/spack/schema/mirrors.py b/lib/spack/spack/schema/mirrors.py
index 551267bd4f..92e6c9bca1 100644
--- a/lib/spack/spack/schema/mirrors.py
+++ b/lib/spack/spack/schema/mirrors.py
@@ -17,7 +17,19 @@ properties = {
         'default': {},
         'additionalProperties': False,
         'patternProperties': {
-            r'\w[\w-]*': {'type': 'string'},
+            r'\w[\w-]*': {
+                'anyOf': [
+                    {'type': 'string'},
+                    {
+                        'type': 'object',
+                        'required': ['fetch', 'push'],
+                        'properties': {
+                            'fetch': {'type': 'string'},
+                            'push': {'type': 'string'}
+                        }
+                    }
+                ]
+            },
         },
     },
 }
diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py
index 6b27d37adf..9621938bcd 100644
--- a/lib/spack/spack/stage.py
+++ b/lib/spack/spack/stage.py
@@ -12,7 +12,6 @@ import tempfile
 import getpass
 from six import string_types
 from six import iteritems
-from six.moves.urllib.parse import urljoin
 
 import llnl.util.tty as tty
 from llnl.util.filesystem import mkdirp, can_access, install, install_tree
@@ -20,12 +19,16 @@ from llnl.util.filesystem import partition_path, remove_linked_tree
 
 import spack.paths
 import spack.caches
+import spack.cmd
 import spack.config
 import spack.error
+import spack.mirror
 import spack.util.lock
 import spack.fetch_strategy as fs
 import spack.util.pattern as pattern
 import spack.util.path as sup
+import spack.util.url as url_util
+
 from spack.util.crypto import prefix_bits, bit_length
 
 
@@ -252,7 +255,7 @@ class Stage(object):
         # TODO: fetch/stage coupling needs to be reworked -- the logic
         # TODO: here is convoluted and not modular enough.
         if isinstance(url_or_fetch_strategy, string_types):
-            self.fetcher = fs.from_url(url_or_fetch_strategy)
+            self.fetcher = fs.from_url_scheme(url_or_fetch_strategy)
         elif isinstance(url_or_fetch_strategy, fs.FetchStrategy):
             self.fetcher = url_or_fetch_strategy
         else:
@@ -397,16 +400,9 @@ class Stage(object):
         # TODO: CompositeFetchStrategy here.
         self.skip_checksum_for_mirror = True
         if self.mirror_path:
-            mirrors = spack.config.get('mirrors')
-
-            # Join URLs of mirror roots with mirror paths. Because
-            # urljoin() will strip everything past the final '/' in
-            # the root, so we add a '/' if it is not present.
-            mir_roots = [
-                sup.substitute_path_variables(root) if root.endswith(os.sep)
-                else sup.substitute_path_variables(root) + os.sep
-                for root in mirrors.values()]
-            urls = [urljoin(root, self.mirror_path) for root in mir_roots]
+            urls = [
+                url_util.join(mirror.fetch_url, self.mirror_path)
+                for mirror in spack.mirror.MirrorCollection().values()]
 
             # If this archive is normally fetched from a tarball URL,
             # then use the same digest.  `spack mirror` ensures that
@@ -425,9 +421,12 @@ class Stage(object):
 
             # Add URL strategies for all the mirrors with the digest
             for url in urls:
-                fetchers.insert(
-                    0, fs.URLFetchStrategy(
-                        url, digest, expand=expand, extension=extension))
+                fetchers.append(fs.from_url_scheme(
+                    url, digest, expand=expand, extension=extension))
+                # fetchers.insert(
+                #     0, fs.URLFetchStrategy(
+                #         url, digest, expand=expand, extension=extension))
+
             if self.default_fetcher.cachable:
                 fetchers.insert(
                     0, spack.caches.fetch_cache.fetcher(
@@ -708,6 +707,91 @@ def purge():
                 remove_linked_tree(stage_path)
 
 
+def get_checksums_for_versions(
+        url_dict, name, first_stage_function=None, keep_stage=False):
+    """Fetches and checksums archives from URLs.
+
+    This function is called by both ``spack checksum`` and ``spack
+    create``.  The ``first_stage_function`` argument allows the caller to
+    inspect the first downloaded archive, e.g., to determine the build
+    system.
+
+    Args:
+        url_dict (dict): A dictionary of the form: version -> URL
+        name (str): The name of the package
+        first_stage_function (callable): function that takes a Stage and a URL;
+            this is run on the stage of the first URL downloaded
+        keep_stage (bool): whether to keep staging area when command completes
+
+    Returns:
+        (str): A multi-line string containing versions and corresponding hashes
+
+    """
+    sorted_versions = sorted(url_dict.keys(), reverse=True)
+
+    # Find length of longest string in the list for padding
+    max_len = max(len(str(v)) for v in sorted_versions)
+    num_ver = len(sorted_versions)
+
+    tty.msg("Found {0} version{1} of {2}:".format(
+            num_ver, '' if num_ver == 1 else 's', name),
+            "",
+            *spack.cmd.elide_list(
+                ["{0:{1}}  {2}".format(str(v), max_len, url_dict[v])
+                 for v in sorted_versions]))
+    tty.msg('')
+
+    archives_to_fetch = tty.get_number(
+        "How many would you like to checksum?", default=1, abort='q')
+
+    if not archives_to_fetch:
+        tty.die("Aborted.")
+
+    versions = sorted_versions[:archives_to_fetch]
+    urls = [url_dict[v] for v in versions]
+
+    tty.msg("Downloading...")
+    version_hashes = []
+    i = 0
+    for url, version in zip(urls, versions):
+        try:
+            with Stage(url, keep=keep_stage) as stage:
+                # Fetch the archive
+                stage.fetch()
+                if i == 0 and first_stage_function:
+                    # Only run first_stage_function the first time,
+                    # no need to run it every time
+                    first_stage_function(stage, url)
+
+                # Checksum the archive and add it to the list
+                version_hashes.append((version, spack.util.crypto.checksum(
+                    hashlib.sha256, stage.archive_file)))
+                i += 1
+        except FailedDownloadError:
+            tty.msg("Failed to fetch {0}".format(url))
+        except Exception as e:
+            tty.msg("Something failed on {0}, skipping.".format(url),
+                    "  ({0})".format(e))
+
+    if not version_hashes:
+        tty.die("Could not fetch any versions for {0}".format(name))
+
+    # Find length of longest string in the list for padding
+    max_len = max(len(str(v)) for v, h in version_hashes)
+
+    # Generate the version directives to put in a package.py
+    version_lines = "\n".join([
+        "    version('{0}', {1}sha256='{2}')".format(
+            v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
+    ])
+
+    num_hash = len(version_hashes)
+    tty.msg("Checksummed {0} version{1} of {2}".format(
+        num_hash, '' if num_hash == 1 else 's', name))
+
+    return version_lines
+
+
 class StageError(spack.error.SpackError):
     """"Superclass for all errors encountered during staging."""
 
@@ -720,5 +804,9 @@ class RestageError(StageError):
     """"Error encountered during restaging."""
 
 
+class VersionFetchError(StageError):
+    """Raised when we can't determine a URL to fetch a package."""
+
+
 # Keep this in namespace for convenience
 FailedDownloadError = fs.FailedDownloadError
diff --git a/lib/spack/spack/test/cmd/pkg.py b/lib/spack/spack/test/cmd/pkg.py
index c9e8e74c4a..e7d1006cc1 100644
--- a/lib/spack/spack/test/cmd/pkg.py
+++ b/lib/spack/spack/test/cmd/pkg.py
@@ -53,6 +53,8 @@ def mock_pkg_git_repo(tmpdir_factory):
 
         # initial commit with mock packages
         git('add', '.')
+        git('config', 'user.email', 'testing@spack.io')
+        git('config', 'user.name', 'Spack Testing')
         git('commit', '-m', 'initial mock repo commit')
 
         # add commit with pkg-a, pkg-b, pkg-c packages
diff --git a/lib/spack/spack/test/config.py b/lib/spack/spack/test/config.py
index 2de4e55281..3b85bb2a23 100644
--- a/lib/spack/spack/test/config.py
+++ b/lib/spack/spack/test/config.py
@@ -595,6 +595,7 @@ def test_bad_config_section(mock_config):
         spack.config.get('foobar')
 
 
+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_bad_command_line_scopes(tmpdir, mock_config):
     cfg = spack.config.Configuration()
 
diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py
index cf962ada4f..d8081d108c 100644
--- a/lib/spack/spack/test/llnl/util/lock.py
+++ b/lib/spack/spack/test/llnl/util/lock.py
@@ -546,6 +546,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path):
         timeout_write(lock_path, 5, 1))
 
 
+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_read_lock_on_read_only_lockfile(lock_dir, lock_path):
     """read-only directory, read-only lockfile."""
     touch(lock_path)
@@ -573,6 +574,7 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path):
             pass
 
 
+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_read_lock_no_lockfile(lock_dir, lock_path):
     """read-only directory, no lockfile (so can't create)."""
     with read_only(lock_dir):
diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py
index 66b358435f..cc4f944867 100644
--- a/lib/spack/spack/test/stage.py
+++ b/lib/spack/spack/test/stage.py
@@ -653,6 +653,7 @@ class TestStage(object):
         assert source_path.endswith(spack.stage._source_path_subdir)
         assert not os.path.exists(source_path)
 
+    @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
     def test_first_accessible_path(self, tmpdir):
         """Test _first_accessible_path names."""
         spack_dir = tmpdir.join('paths')
@@ -783,6 +784,7 @@ class TestStage(object):
 
         assert spack.stage._resolve_paths(paths) == res_paths
 
+    @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
     def test_get_stage_root_bad_path(self, clear_stage_root):
         """Ensure an invalid stage path root raises a StageError."""
         with spack.config.override('config:build_stage', '/no/such/path'):
diff --git a/lib/spack/spack/util/s3.py b/lib/spack/spack/util/s3.py
new file mode 100644
index 0000000000..ee6b3d56cf
--- /dev/null
+++ b/lib/spack/spack/util/s3.py
@@ -0,0 +1,44 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import os
+
+import six.moves.urllib.parse as urllib_parse
+
+import spack
+import spack.util.url as url_util
+
+
+def create_s3_session(url):
+    url = url_util.parse(url)
+    if url.scheme != 's3':
+        raise ValueError(
+            'Can not create S3 session from URL with scheme: {SCHEME}'.format(
+                SCHEME=url.scheme))
+
+    # NOTE(opadron): import boto and friends as late as possible.  We don't
+    # want to require boto as a dependency unless the user actually wants to
+    # access S3 mirrors.
+    from boto3 import Session
+
+    session = Session()
+
+    s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')}
+
+    endpoint_url = os.environ.get('S3_ENDPOINT_URL')
+    if endpoint_url:
+        if urllib_parse.urlparse(endpoint_url, scheme=None).scheme is None:
+            endpoint_url = '://'.join(('https', endpoint_url))
+
+        s3_client_args['endpoint_url'] = endpoint_url
+
+    # if no access credentials provided above, then access anonymously
+    if not session.get_credentials():
+        from botocore import UNSIGNED
+        from botocore.client import Config
+
+        s3_client_args["config"] = Config(signature_version=UNSIGNED)
+
+    return session.client('s3', **s3_client_args)
diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py
new file mode 100644
index 0000000000..6b2786f244
--- /dev/null
+++ b/lib/spack/spack/util/url.py
@@ -0,0 +1,175 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+"""
+Utility functions for parsing, formatting, and manipulating URLs.
+"""
+
+import itertools
+import os.path
+
+from six import string_types
+import six.moves.urllib.parse as urllib_parse
+
+import spack.util.path
+
+
+def _split_all(path):
+    """Split path into its atomic components.
+
+    Returns the shortest list, L, of strings such that os.path.join(*L) == path
+    and os.path.split(element) == ('', element) for every element in L except
+    possibly the first.  This first element may possibly have the value of '/',
+    or some other OS-dependent path root.
+    """
+    result = []
+    a = path
+    old_a = None
+    while a != old_a:
+        (old_a, (a, b)) = a, os.path.split(a)
+
+        if a or b:
+            result.insert(0, b or '/')
+
+    return result
+
+
+def local_file_path(url):
+    """Get a local file path from a url.
+
+    If url is a file:// URL, return the absolute path to the local
+    file or directory referenced by it.  Otherwise, return None.
+    """
+    if isinstance(url, string_types):
+        url = parse(url)
+
+    if url.scheme == 'file':
+        return url.path
+    return None
+
+
+def parse(url, scheme='file'):
+    """Parse a mirror url.
+
+    For file:// URLs, the netloc and path components are concatenated and
+    passed through spack.util.path.canoncalize_path().
+
+    Otherwise, the returned value is the same as urllib's urlparse() with
+    allow_fragments=False.
+    """
+
+    url_obj = (
+        urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False)
+        if isinstance(url, string_types) else url)
+
+    (scheme, netloc, path, params, query, _) = url_obj
+    scheme = (scheme or 'file').lower()
+
+    if scheme == 'file':
+        path = spack.util.path.canonicalize_path(netloc + path)
+        while path.startswith('//'):
+            path = path[1:]
+        netloc = ''
+
+    return urllib_parse.ParseResult(scheme=scheme,
+                                    netloc=netloc,
+                                    path=path,
+                                    params=params,
+                                    query=query,
+                                    fragment=None)
+
+
+def format(parsed_url):
+    """Format a URL string
+
+    Returns a canonicalized format of the given URL as a string.
+    """
+    if isinstance(parsed_url, string_types):
+        parsed_url = parse(parsed_url)
+
+    return parsed_url.geturl()
+
+
+def join(base_url, path, *extra, **kwargs):
+    """Joins a base URL with one or more local URL path components
+
+    If resolve_href is True, treat the base URL as though it where the locator
+    of a web page, and the remaining URL path components as though they formed
+    a relative URL to be resolved against it (i.e.: as in os.path.join(...)).
+    The result is an absolute URL to the resource to which a user's browser
+    would navigate if they clicked on a link with an "href" attribute equal to
+    the relative URL.
+
+    If resolve_href is False (default), then the URL path components are joined
+    as in os.path.join().
+
+    Examples:
+      base_url = 's3://bucket/index.html'
+      body = fetch_body(prefix)
+      link = get_href(body) # link == '../other-bucket/document.txt'
+
+      # wrong - link is a local URL that needs to be resolved against base_url
+      spack.util.url.join(base_url, link)
+      's3://bucket/other_bucket/document.txt'
+
+      # correct - resolve local URL against base_url
+      spack.util.url.join(base_url, link, resolve_href=True)
+      's3://other_bucket/document.txt'
+
+      prefix = 'https://mirror.spack.io/build_cache'
+
+      # wrong - prefix is just a URL prefix
+      spack.util.url.join(prefix, 'my-package', resolve_href=True)
+      'https://mirror.spack.io/my-package'
+
+      # correct - simply append additional URL path components
+      spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
+      'https://mirror.spack.io/build_cache/my-package'
+    """
+    base_url = parse(base_url)
+    resolve_href = kwargs.get('resolve_href', False)
+
+    (scheme, netloc, base_path, params, query, _) = base_url
+    scheme = scheme.lower()
+
+    path_tokens = [
+        part for part in itertools.chain(
+            _split_all(path),
+            itertools.chain.from_iterable(
+                _split_all(extra_path) for extra_path in extra))
+        if part and part != '/']
+
+    base_path_args = ['/fake-root']
+    if scheme == 's3':
+        if netloc:
+            base_path_args.append(netloc)
+
+    if base_path.startswith('/'):
+        base_path = base_path[1:]
+
+    base_path_args.append(base_path)
+
+    if resolve_href:
+        new_base_path, _ = os.path.split(os.path.join(*base_path_args))
+        base_path_args = [new_base_path]
+
+    base_path_args.extend(path_tokens)
+    base_path = os.path.relpath(os.path.join(*base_path_args), '/fake-root')
+
+    if scheme == 's3':
+        path_tokens = [
+            part for part in _split_all(base_path)
+            if part and part != '/']
+
+        if path_tokens:
+            netloc = path_tokens.pop(0)
+            base_path = os.path.join('', *path_tokens)
+
+    return format(urllib_parse.ParseResult(scheme=scheme,
+                                           netloc=netloc,
+                                           path=base_path,
+                                           params=params,
+                                           query=query,
+                                           fragment=None))
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
index da2d5bbeb9..e0a23fb444 100644
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@@ -5,16 +5,21 @@
 
 from __future__ import print_function
 
+import codecs
+import errno
 import re
 import os
+import os.path
+import shutil
 import ssl
 import sys
 import traceback
-import hashlib
 
+from itertools import product
+
+import six
 from six.moves.urllib.request import urlopen, Request
 from six.moves.urllib.error import URLError
-from six.moves.urllib.parse import urljoin
 import multiprocessing.pool
 
 try:
@@ -28,20 +33,47 @@ except ImportError:
     class HTMLParseError(Exception):
         pass
 
+from llnl.util.filesystem import mkdirp
 import llnl.util.tty as tty
 
-import spack.config
 import spack.cmd
-import spack.url
-import spack.stage
+import spack.config
 import spack.error
+import spack.url
 import spack.util.crypto
+import spack.util.s3 as s3_util
+import spack.util.url as url_util
+
 from spack.util.compression import ALLOWED_ARCHIVE_TYPES
 
 
 # Timeout in seconds for web requests
 _timeout = 10
 
+# See docstring for standardize_header_names()
+_separators = ('', ' ', '_', '-')
+HTTP_HEADER_NAME_ALIASES = {
+    "Accept-ranges": set(
+        ''.join((A, 'ccept', sep, R, 'anges'))
+        for A, sep, R in product('Aa', _separators, 'Rr')),
+
+    "Content-length": set(
+        ''.join((C, 'ontent', sep, L, 'ength'))
+        for C, sep, L in product('Cc', _separators, 'Ll')),
+
+    "Content-type": set(
+        ''.join((C, 'ontent', sep, T, 'ype'))
+        for C, sep, T in product('Cc', _separators, 'Tt')),
+
+    "Date": set(('Date', 'date')),
+
+    "Last-modified": set(
+        ''.join((L, 'ast', sep, M, 'odified'))
+        for L, sep, M in product('Ll', _separators, 'Mm')),
+
+    "Server": set(('Server', 'server'))
+}
+
 
 class LinkParser(HTMLParser):
     """This parser just takes an HTML page and strips out the hrefs on the
@@ -59,7 +91,7 @@ class LinkParser(HTMLParser):
 
 
 class NonDaemonProcess(multiprocessing.Process):
-    """Process tha allows sub-processes, so pools can have sub-pools."""
+    """Process that allows sub-processes, so pools can have sub-pools."""
     @property
     def daemon(self):
         return False
@@ -86,25 +118,53 @@ else:
             super(NonDaemonPool, self).__init__(*args, **kwargs)
 
 
-def _read_from_url(url, accept_content_type=None):
+def uses_ssl(parsed_url):
+    if parsed_url.scheme == 'https':
+        return True
+
+    if parsed_url.scheme == 's3':
+        endpoint_url = os.environ.get('S3_ENDPOINT_URL')
+        if not endpoint_url:
+            return True
+
+        if url_util.parse(endpoint_url, scheme='https').scheme == 'https':
+            return True
+
+    return False
+
+
+__UNABLE_TO_VERIFY_SSL = (
+    lambda pyver: (
+        (pyver < (2, 7, 9)) or
+        ((3,) < pyver < (3, 4, 3))
+    ))(sys.version_info)
+
+
+def read_from_url(url, accept_content_type=None):
+    url = url_util.parse(url)
     context = None
-    verify_ssl = spack.config.get('config:verify_ssl')
-    pyver = sys.version_info
-    if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)):
-        if verify_ssl:
-            tty.warn("Spack will not check SSL certificates. You need to "
-                     "update your Python to enable certificate "
-                     "verification.")
-    elif verify_ssl:
-        # without a defined context, urlopen will not verify the ssl cert for
-        # python 3.x
-        context = ssl.create_default_context()
-    else:
-        context = ssl._create_unverified_context()
 
-    req = Request(url)
+    verify_ssl = spack.config.get('config:verify_ssl')
 
-    if accept_content_type:
+    # Don't even bother with a context unless the URL scheme is one that uses
+    # SSL certs.
+    if uses_ssl(url):
+        if verify_ssl:
+            if __UNABLE_TO_VERIFY_SSL:
+                # User wants SSL verification, but it cannot be provided.
+                warn_no_ssl_cert_checking()
+            else:
+                # User wants SSL verification, and it *can* be provided.
+                context = ssl.create_default_context()
+        else:
+            # User has explicitly indicated that they do not want SSL
+            # verification.
+            context = ssl._create_unverified_context()
+
+    req = Request(url_util.format(url))
+    content_type = None
+    is_web_url = url.scheme in ('http', 'https')
+    if accept_content_type and is_web_url:
         # Make a HEAD request first to check the content type.  This lets
         # us ignore tarballs and gigantic files.
         # It would be nice to do this with the HTTP Accept header to avoid
@@ -113,29 +173,179 @@ def _read_from_url(url, accept_content_type=None):
         req.get_method = lambda: "HEAD"
         resp = _urlopen(req, timeout=_timeout, context=context)
 
-        if "Content-type" not in resp.headers:
-            tty.debug("ignoring page " + url)
-            return None, None
-
-        if not resp.headers["Content-type"].startswith(accept_content_type):
-            tty.debug("ignoring page " + url + " with content type " +
-                      resp.headers["Content-type"])
-            return None, None
+        content_type = resp.headers.get('Content-type')
 
     # Do the real GET request when we know it's just HTML.
     req.get_method = lambda: "GET"
     response = _urlopen(req, timeout=_timeout, context=context)
-    response_url = response.geturl()
 
-    # Read the page and and stick it in the map we'll return
-    page = response.read().decode('utf-8')
+    if accept_content_type and not is_web_url:
+        content_type = response.headers.get('Content-type')
 
-    return response_url, page
+    reject_content_type = (
+        accept_content_type and (
+            content_type is None or
+            not content_type.startswith(accept_content_type)))
 
+    if reject_content_type:
+        tty.debug("ignoring page {0}{1}{2}".format(
+            url_util.format(url),
+            " with content type " if content_type is not None else "",
+            content_type or ""))
 
-def read_from_url(url, accept_content_type=None):
-    resp_url, contents = _read_from_url(url, accept_content_type)
-    return contents
+        return None, None, None
+
+    return response.geturl(), response.headers, response
+
+
+def warn_no_ssl_cert_checking():
+    tty.warn("Spack will not check SSL certificates. You need to update "
+             "your Python to enable certificate verification.")
+
+
+def push_to_url(local_path, remote_path, **kwargs):
+    keep_original = kwargs.get('keep_original', True)
+
+    local_url = url_util.parse(local_path)
+    local_file_path = url_util.local_file_path(local_url)
+    if local_file_path is None:
+        raise ValueError('local path must be a file:// url')
+
+    remote_url = url_util.parse(remote_path)
+    verify_ssl = spack.config.get('config:verify_ssl')
+
+    if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
+        warn_no_ssl_cert_checking()
+
+    remote_file_path = url_util.local_file_path(remote_url)
+    if remote_file_path is not None:
+        mkdirp(os.path.dirname(remote_file_path))
+        if keep_original:
+            shutil.copy(local_file_path, remote_file_path)
+        else:
+            try:
+                os.rename(local_file_path, remote_file_path)
+            except OSError as e:
+                if e.errno == errno.EXDEV:
+                    # NOTE(opadron): The above move failed because it crosses
+                    # filesystem boundaries.  Copy the file (plus original
+                    # metadata), and then delete the original.  This operation
+                    # needs to be done in separate steps.
+                    shutil.copy2(local_file_path, remote_file_path)
+                    os.remove(local_file_path)
+
+    elif remote_url.scheme == 's3':
+        extra_args = kwargs.get('extra_args', {})
+
+        remote_path = remote_url.path
+        while remote_path.startswith('/'):
+            remote_path = remote_path[1:]
+
+        s3 = s3_util.create_s3_session(remote_url)
+        s3.upload_file(local_file_path, remote_url.netloc,
+                       remote_path, ExtraArgs=extra_args)
+
+        if not keep_original:
+            os.remove(local_file_path)
+
+    else:
+        raise NotImplementedError(
+            'Unrecognized URL scheme: {SCHEME}'.format(
+                SCHEME=remote_url.scheme))
+
+
+def url_exists(url):
+    url = url_util.parse(url)
+    local_path = url_util.local_file_path(url)
+    if local_path:
+        return os.path.exists(local_path)
+
+    if url.scheme == 's3':
+        s3 = s3_util.create_s3_session(url)
+        from botocore.exceptions import ClientError
+        try:
+            s3.get_object(Bucket=url.netloc, Key=url.path)
+            return True
+        except ClientError as err:
+            if err.response['Error']['Code'] == 'NoSuchKey':
+                return False
+            raise err
+
+    # otherwise, just try to "read" from the URL, and assume that *any*
+    # non-throwing response contains the resource represented by the URL
+    try:
+        read_from_url(url)
+        return True
+    except URLError:
+        return False
+
+
+def remove_url(url):
+    url = url_util.parse(url)
+
+    local_path = url_util.local_file_path(url)
+    if local_path:
+        os.remove(local_path)
+        return
+
+    if url.scheme == 's3':
+        s3 = s3_util.create_s3_session(url)
+        s3.delete_object(Bucket=url.s3_bucket, Key=url.path)
+        return
+
+    # Don't even try for other URL schemes.
+
+
+def _list_s3_objects(client, url, num_entries, start_after=None):
+    list_args = dict(
+        Bucket=url.netloc,
+        Prefix=url.path,
+        MaxKeys=num_entries)
+
+    if start_after is not None:
+        list_args['StartAfter'] = start_after
+
+    result = client.list_objects_v2(**list_args)
+
+    last_key = None
+    if result['IsTruncated']:
+        last_key = result['Contents'][-1]['Key']
+
+    iter = (key for key in
+            (
+                os.path.relpath(entry['Key'], url.path)
+                for entry in result['Contents']
+            )
+            if key != '.')
+
+    return iter, last_key
+
+
+def _iter_s3_prefix(client, url, num_entries=1024):
+    key = None
+    while True:
+        contents, key = _list_s3_objects(
+            client, url, num_entries, start_after=key)
+
+        for x in contents:
+            yield x
+
+        if not key:
+            break
+
+
+def list_url(url):
+    url = url_util.parse(url)
+
+    local_path = url_util.local_file_path(url)
+    if local_path:
+        return os.listdir(local_path)
+
+    if url.scheme == 's3':
+        s3 = s3_util.create_s3_session(url)
+        return list(set(
+            key.split('/', 1)[0]
+            for key in _iter_s3_prefix(s3, url)))
 
 
 def _spider(url, visited, root, depth, max_depth, raise_on_error):
@@ -154,16 +364,12 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
     pages = {}     # dict from page URL -> text content.
     links = set()  # set of all links seen on visited pages.
 
-    # root may end with index.html -- chop that off.
-    if root.endswith('/index.html'):
-        root = re.sub('/index.html$', '', root)
-
     try:
-        response_url, page = _read_from_url(url, 'text/html')
-
-        if not response_url or not page:
+        response_url, _, response = read_from_url(url, 'text/html')
+        if not response_url or not response:
             return pages, links
 
+        page = codecs.getreader('utf-8')(response).read()
         pages[response_url] = page
 
         # Parse out the links in the page
@@ -173,8 +379,10 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
 
         while link_parser.links:
             raw_link = link_parser.links.pop()
-            abs_link = urljoin(response_url, raw_link.strip())
-
+            abs_link = url_util.join(
+                response_url,
+                raw_link.strip(),
+                resolve_href=True)
             links.add(abs_link)
 
             # Skip stuff that looks like an archive
@@ -243,16 +451,28 @@ def _spider_wrapper(args):
     return _spider(*args)
 
 
-def _urlopen(*args, **kwargs):
+def _urlopen(req, *args, **kwargs):
     """Wrapper for compatibility with old versions of Python."""
-    # We don't pass 'context' parameter to urlopen because it
-    # was introduces only starting versions 2.7.9 and 3.4.3 of Python.
-    if 'context' in kwargs and kwargs['context'] is None:
+    url = req
+    try:
+        url = url.get_full_url()
+    except AttributeError:
+        pass
+
+    # We don't pass 'context' parameter because it was only introduced starting
+    # with versions 2.7.9 and 3.4.3 of Python.
+    if 'context' in kwargs:
         del kwargs['context']
-    return urlopen(*args, **kwargs)
 
+    opener = urlopen
+    if url_util.parse(url).scheme == 's3':
+        import spack.s3_handler
+        opener = spack.s3_handler.open
 
-def spider(root_url, depth=0):
+    return opener(req, *args, **kwargs)
+
+
+def spider(root, depth=0):
     """Gets web pages from a root URL.
 
        If depth is specified (e.g., depth=2), then this will also follow
@@ -262,7 +482,9 @@ def spider(root_url, depth=0):
        performance over a sequential fetch.
 
     """
-    pages, links = _spider(root_url, set(), root_url, 0, depth, False)
+
+    root = url_util.parse(root)
+    pages, links = _spider(root, set(), root, 0, depth, False)
     return pages, links
 
 
@@ -356,99 +578,112 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
     return versions
 
 
-def get_checksums_for_versions(
-        url_dict, name, first_stage_function=None, keep_stage=False):
-    """Fetches and checksums archives from URLs.
-
-    This function is called by both ``spack checksum`` and ``spack
-    create``.  The ``first_stage_function`` argument allows the caller to
-    inspect the first downloaded archive, e.g., to determine the build
-    system.
-
-    Args:
-        url_dict (dict): A dictionary of the form: version -> URL
-        name (str): The name of the package
-        first_stage_function (callable): function that takes a Stage and a URL;
-            this is run on the stage of the first URL downloaded
-        keep_stage (bool): whether to keep staging area when command completes
-
-    Returns:
-        (str): A multi-line string containing versions and corresponding hashes
-
+def standardize_header_names(headers):
+    """Replace certain header names with standardized spellings.
+
+    Standardizes the spellings of the following header names:
+    - Accept-ranges
+    - Content-length
+    - Content-type
+    - Date
+    - Last-modified
+    - Server
+
+    Every name considered is translated to one of the above names if the only
+    difference between the two is how the first letters of each word are
+    capitalized; whether words are separated; or, if separated, whether they
+    are so by a dash (-), underscore (_), or space ( ).  Header names that
+    cannot be mapped as described above are returned unaltered.
+
+    For example: The standard spelling of "Content-length" would be substituted
+    for any of the following names:
+    - Content-length
+    - content_length
+    - contentlength
+    - content_Length
+    - contentLength
+    - content Length
+
+    ... and any other header name, such as "Content-encoding", would not be
+    altered, regardless of spelling.
+
+    If headers is a string, then it (or an appropriate substitute) is returned.
+
+    If headers is a non-empty tuple, headers[0] is a string, and there exists a
+    standardized spelling for header[0] that differs from it, then a new tuple
+    is returned.  This tuple has the same elements as headers, except the first
+    element is the standardized spelling for headers[0].
+
+    If headers is a sequence, then a new list is considered, where each element
+    is its corresponding element in headers, but mapped as above if a string or
+    tuple.  This new list is returned if at least one of its elements differ
+    from their corrsponding element in headers.
+
+    If headers is a mapping, then a new dict is considered, where the key in
+    each item is the key of its corresponding item in headers, mapped as above
+    if a string or tuple.  The value is taken from the corresponding item.  If
+    the keys of multiple items in headers map to the same key after being
+    standardized, then the value for the resulting item is undefined.  The new
+    dict is returned if at least one of its items has a key that differs from
+    that of their corresponding item in headers, or if the keys of multiple
+    items in headers map to the same key after being standardized.
+
+    In all other cases headers is returned unaltered.
     """
-    sorted_versions = sorted(url_dict.keys(), reverse=True)
+    if isinstance(headers, six.string_types):
+        for standardized_spelling, other_spellings in (
+                HTTP_HEADER_NAME_ALIASES.items()):
+            if headers in other_spellings:
+                if headers == standardized_spelling:
+                    return headers
+                return standardized_spelling
+        return headers
+
+    if isinstance(headers, tuple):
+        if not headers:
+            return headers
+        old = headers[0]
+        if isinstance(old, six.string_types):
+            new = standardize_header_names(old)
+            if old is not new:
+                return (new,) + headers[1:]
+        return headers
 
-    # Find length of longest string in the list for padding
-    max_len = max(len(str(v)) for v in sorted_versions)
-    num_ver = len(sorted_versions)
+    try:
+        changed = False
+        new_dict = {}
+        for key, value in headers.items():
+            if isinstance(key, (tuple, six.string_types)):
+                old_key, key = key, standardize_header_names(key)
+                changed = changed or key is not old_key
 
-    tty.msg("Found {0} version{1} of {2}:".format(
-            num_ver, '' if num_ver == 1 else 's', name),
-            "",
-            *spack.cmd.elide_list(
-                ["{0:{1}}  {2}".format(str(v), max_len, url_dict[v])
-                 for v in sorted_versions]))
-    print()
+            new_dict[key] = value
+
+        return new_dict if changed else headers
+    except (AttributeError, TypeError, ValueError):
+        pass
 
-    archives_to_fetch = tty.get_number(
-        "How many would you like to checksum?", default=1, abort='q')
+    try:
+        changed = False
+        new_list = []
+        for item in headers:
+            if isinstance(item, (tuple, six.string_types)):
+                old_item, item = item, standardize_header_names(item)
+                changed = changed or item is not old_item
 
-    if not archives_to_fetch:
-        tty.die("Aborted.")
+            new_list.append(item)
 
-    versions = sorted_versions[:archives_to_fetch]
-    urls = [url_dict[v] for v in versions]
+        return new_list if changed else headers
+    except TypeError:
+        pass
 
-    tty.msg("Downloading...")
-    version_hashes = []
-    i = 0
-    for url, version in zip(urls, versions):
-        try:
-            with spack.stage.Stage(url, keep=keep_stage) as stage:
-                # Fetch the archive
-                stage.fetch()
-                if i == 0 and first_stage_function:
-                    # Only run first_stage_function the first time,
-                    # no need to run it every time
-                    first_stage_function(stage, url)
-
-                # Checksum the archive and add it to the list
-                version_hashes.append((version, spack.util.crypto.checksum(
-                    hashlib.sha256, stage.archive_file)))
-                i += 1
-        except spack.stage.FailedDownloadError:
-            tty.msg("Failed to fetch {0}".format(url))
-        except Exception as e:
-            tty.msg("Something failed on {0}, skipping.".format(url),
-                    "  ({0})".format(e))
-
-    if not version_hashes:
-        tty.die("Could not fetch any versions for {0}".format(name))
-
-    # Find length of longest string in the list for padding
-    max_len = max(len(str(v)) for v, h in version_hashes)
-
-    # Generate the version directives to put in a package.py
-    version_lines = "\n".join([
-        "    version('{0}', {1}sha256='{2}')".format(
-            v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
-    ])
-
-    num_hash = len(version_hashes)
-    tty.msg("Checksummed {0} version{1} of {2}".format(
-        num_hash, '' if num_hash == 1 else 's', name))
-
-    return version_lines
+    return headers
 
 
 class SpackWebError(spack.error.SpackError):
     """Superclass for Spack web spidering errors."""
 
 
-class VersionFetchError(SpackWebError):
-    """Raised when we can't determine a URL to fetch a package."""
-
-
 class NoNetworkConnectionError(SpackWebError):
     """Raised when an operation can't get an internet connection."""
     def __init__(self, message, url):
-- 
cgit v1.2.3-70-g09d2