summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorOmar Padron <omar.padron@kitware.com>2019-10-22 03:32:04 -0400
committerTodd Gamblin <tgamblin@llnl.gov>2019-10-22 00:32:04 -0700
commitfd58c98b0edd198e2b1bc8c9f81208d7d6010178 (patch)
tree826bf9531541343d71ea4fa7fc28f36bc346135c /lib
parent6cb972a9d25cee6ed8b92195cccd8ad2255e98a9 (diff)
downloadspack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.tar.gz
spack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.tar.bz2
spack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.tar.xz
spack-fd58c98b0edd198e2b1bc8c9f81208d7d6010178.zip
fetching: S3 upload and download (#11117)
This extends Spack functionality so that it can fetch sources and binaries from-, push sources and binaries to-, and index the contents of- mirrors hosted on an S3 bucket. High level to-do list: - [x] Extend mirrors configuration to add support for `file://`, and `s3://` URLs. - [x] Ensure all fetching, pushing, and indexing operations work for `file://` URLs. - [x] Implement S3 source fetching - [x] Implement S3 binary mirror indexing - [x] Implement S3 binary package fetching - [x] Implement S3 source pushing - [x] Implement S3 binary package pushing Important details: * refactor URL handling to handle S3 URLs and mirror URLs more gracefully. - updated parse() to accept already-parsed URL objects. an equivalent object is returned with any extra s3-related attributes intact. Objects created with urllib can also be passed, and the additional s3 handling logic will still be applied. * update mirror schema/parsing (mirror can have separate fetch/push URLs) * implement s3_fetch_strategy/several utility changes * provide more feature-complete S3 fetching * update buildcache create command to support S3 * Move the core logic for reading data from S3 out of the s3 fetch strategy and into the s3 URL handler. The s3 fetch strategy now calls into `read_from_url()` Since read_from_url can now handle S3 URLs, the S3 fetch strategy is redundant. It's not clear whether the ideal design is to have S3 fetching functionality in a fetch strategy, directly implemented in read_from_url, or both. * expanded what can be passed to `spack buildcache` via the -d flag: In addition to a directory on the local filesystem, the name of a configured mirror can be passed, or a push URL can be passed directly.
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/binary_distribution.py233
-rw-r--r--lib/spack/spack/caches.py16
-rw-r--r--lib/spack/spack/cmd/buildcache.py28
-rw-r--r--lib/spack/spack/cmd/checksum.py4
-rw-r--r--lib/spack/spack/cmd/create.py3
-rw-r--r--lib/spack/spack/cmd/mirror.py125
-rw-r--r--lib/spack/spack/cmd/url.py8
-rw-r--r--lib/spack/spack/fetch_strategy.py104
-rw-r--r--lib/spack/spack/mirror.py230
-rw-r--r--lib/spack/spack/s3_handler.py92
-rw-r--r--lib/spack/spack/schema/mirrors.py14
-rw-r--r--lib/spack/spack/stage.py118
-rw-r--r--lib/spack/spack/test/cmd/pkg.py2
-rw-r--r--lib/spack/spack/test/config.py1
-rw-r--r--lib/spack/spack/test/llnl/util/lock.py2
-rw-r--r--lib/spack/spack/test/stage.py2
-rw-r--r--lib/spack/spack/util/s3.py44
-rw-r--r--lib/spack/spack/util/url.py175
-rw-r--r--lib/spack/spack/util/web.py501
19 files changed, 1414 insertions, 288 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index cbc0f22327..aab07b8a84 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import codecs
import os
import re
import tarfile
@@ -23,14 +24,32 @@ import spack.fetch_strategy as fs
import spack.util.gpg as gpg_util
import spack.relocate as relocate
import spack.util.spack_yaml as syaml
+import spack.mirror
+import spack.util.url as url_util
+import spack.util.web as web_util
+
from spack.spec import Spec
from spack.stage import Stage
from spack.util.gpg import Gpg
-from spack.util.web import spider, read_from_url
from spack.util.executable import ProcessError
_build_cache_relative_path = 'build_cache'
+BUILD_CACHE_INDEX_TEMPLATE = '''
+<html>
+<head>
+ <title>{title}</title>
+</head>
+<body>
+<ul>
+{path_list}
+</ul>
+</body>
+</html>
+'''
+
+BUILD_CACHE_INDEX_ENTRY_TEMPLATE = ' <li><a href="{path}">{path}</a></li>'
+
class NoOverwriteException(Exception):
"""
@@ -101,7 +120,7 @@ def build_cache_relative_path():
return _build_cache_relative_path
-def build_cache_directory(prefix):
+def build_cache_prefix(prefix):
return os.path.join(prefix, build_cache_relative_path())
@@ -246,29 +265,36 @@ def sign_tarball(key, force, specfile_path):
Gpg.sign(key, specfile_path, '%s.asc' % specfile_path)
-def _generate_html_index(path_list, output_path):
- f = open(output_path, 'w')
- header = """<html>\n
-<head>\n</head>\n
-<list>\n"""
- footer = "</list>\n</html>\n"
- f.write(header)
- for path in path_list:
- rel = os.path.basename(path)
- f.write('<li><a href="%s"> %s</a>\n' % (rel, rel))
- f.write(footer)
- f.close()
-
-
-def generate_package_index(build_cache_dir):
- yaml_list = os.listdir(build_cache_dir)
- path_list = [os.path.join(build_cache_dir, l) for l in yaml_list]
+def generate_package_index(cache_prefix):
+ """Create the build cache index page.
- index_html_path_tmp = os.path.join(build_cache_dir, 'index.html.tmp')
- index_html_path = os.path.join(build_cache_dir, 'index.html')
-
- _generate_html_index(path_list, index_html_path_tmp)
- shutil.move(index_html_path_tmp, index_html_path)
+ Creates (or replaces) the "index.html" page at the location given in
+ cache_prefix. This page contains a link for each binary package (*.yaml)
+ and signing key (*.key) under cache_prefix.
+ """
+ tmpdir = tempfile.mkdtemp()
+ try:
+ index_html_path = os.path.join(tmpdir, 'index.html')
+ file_list = (
+ entry
+ for entry in web_util.list_url(cache_prefix)
+ if (entry.endswith('.yaml')
+ or entry.endswith('.key')))
+
+ with open(index_html_path, 'w') as f:
+ f.write(BUILD_CACHE_INDEX_TEMPLATE.format(
+ title='Spack Package Index',
+ path_list='\n'.join(
+ BUILD_CACHE_INDEX_ENTRY_TEMPLATE.format(path=path)
+ for path in file_list)))
+
+ web_util.push_to_url(
+ index_html_path,
+ url_util.join(cache_prefix, 'index.html'),
+ keep_original=False,
+ extra_args={'ContentType': 'text/html'})
+ finally:
+ shutil.rmtree(tmpdir)
def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
@@ -281,33 +307,41 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
raise ValueError('spec must be concrete to build tarball')
# set up some paths
- build_cache_dir = build_cache_directory(outdir)
+ tmpdir = tempfile.mkdtemp()
+ cache_prefix = build_cache_prefix(tmpdir)
tarfile_name = tarball_name(spec, '.tar.gz')
- tarfile_dir = os.path.join(build_cache_dir,
- tarball_directory_name(spec))
+ tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec))
tarfile_path = os.path.join(tarfile_dir, tarfile_name)
- mkdirp(tarfile_dir)
spackfile_path = os.path.join(
- build_cache_dir, tarball_path_name(spec, '.spack'))
- if os.path.exists(spackfile_path):
+ cache_prefix, tarball_path_name(spec, '.spack'))
+
+ remote_spackfile_path = url_util.join(
+ outdir, os.path.relpath(spackfile_path, tmpdir))
+
+ mkdirp(tarfile_dir)
+ if web_util.url_exists(remote_spackfile_path):
if force:
- os.remove(spackfile_path)
+ web_util.remove_url(remote_spackfile_path)
else:
- raise NoOverwriteException(str(spackfile_path))
+ raise NoOverwriteException(url_util.format(remote_spackfile_path))
+
# need to copy the spec file so the build cache can be downloaded
# without concretizing with the current spack packages
# and preferences
spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml")
specfile_name = tarball_name(spec, '.spec.yaml')
specfile_path = os.path.realpath(
- os.path.join(build_cache_dir, specfile_name))
+ os.path.join(cache_prefix, specfile_name))
- if os.path.exists(specfile_path):
+ remote_specfile_path = url_util.join(
+ outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir)))
+
+ if web_util.url_exists(remote_specfile_path):
if force:
- os.remove(specfile_path)
+ web_util.remove_url(remote_specfile_path)
else:
- raise NoOverwriteException(str(specfile_path))
+ raise NoOverwriteException(url_util.format(remote_specfile_path))
# make a copy of the install directory to work with
workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix))
@@ -324,6 +358,7 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
except Exception as e:
shutil.rmtree(workdir)
shutil.rmtree(tarfile_dir)
+ shutil.rmtree(tmpdir)
tty.die(e)
else:
try:
@@ -331,7 +366,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
except Exception as e:
shutil.rmtree(workdir)
shutil.rmtree(tarfile_dir)
+ shutil.rmtree(tmpdir)
tty.die(e)
+
# create compressed tarball of the install prefix
with closing(tarfile.open(tarfile_path, 'w:gz')) as tar:
tar.add(name='%s' % workdir,
@@ -360,7 +397,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
spec_dict['full_hash'] = spec.full_hash()
tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format(
- spec_dict['full_hash'], spec.name, specfile_path))
+ spec_dict['full_hash'],
+ spec.name,
+ url_util.format(remote_specfile_path)))
tty.debug(spec.tree())
with open(specfile_path, 'w') as outfile:
@@ -382,9 +421,19 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
if not unsigned:
os.remove('%s.asc' % specfile_path)
- # create an index.html for the build_cache directory so specs can be found
- if regenerate_index:
- generate_package_index(build_cache_dir)
+ web_util.push_to_url(
+ spackfile_path, remote_spackfile_path, keep_original=False)
+ web_util.push_to_url(
+ specfile_path, remote_specfile_path, keep_original=False)
+
+ try:
+ # create an index.html for the build_cache directory so specs can be
+ # found
+ if regenerate_index:
+ generate_package_index(url_util.join(
+ outdir, os.path.relpath(cache_prefix, tmpdir)))
+ finally:
+ shutil.rmtree(tmpdir)
return None
@@ -394,13 +443,16 @@ def download_tarball(spec):
Download binary tarball for given package into stage area
Return True if successful
"""
- mirrors = spack.config.get('mirrors')
- if len(mirrors) == 0:
+ if not spack.mirror.MirrorCollection():
tty.die("Please add a spack mirror to allow " +
"download of pre-compiled packages.")
+
tarball = tarball_path_name(spec, '.spack')
- for mirror_name, mirror_url in mirrors.items():
- url = mirror_url + '/' + _build_cache_relative_path + '/' + tarball
+
+ for mirror in spack.mirror.MirrorCollection().values():
+ url = url_util.join(
+ mirror.fetch_url, _build_cache_relative_path, tarball)
+
# stage the tarball into standard place
stage = Stage(url, name="build_cache", keep=True)
try:
@@ -408,6 +460,7 @@ def download_tarball(spec):
return stage.save_filename
except fs.FetchError:
continue
+
return None
@@ -610,26 +663,29 @@ def get_specs(force=False):
tty.debug("Using previously-retrieved specs")
return _cached_specs
- mirrors = spack.config.get('mirrors')
- if len(mirrors) == 0:
- tty.debug("No Spack mirrors are currently configured")
+ if not spack.mirror.MirrorCollection():
+ tty.warn("No Spack mirrors are currently configured")
return {}
urls = set()
- for mirror_name, mirror_url in mirrors.items():
- if mirror_url.startswith('file'):
- mirror = mirror_url.replace(
- 'file://', '') + "/" + _build_cache_relative_path
- tty.msg("Finding buildcaches in %s" % mirror)
- if os.path.exists(mirror):
- files = os.listdir(mirror)
+ for mirror in spack.mirror.MirrorCollection().values():
+ fetch_url_build_cache = url_util.join(
+ mirror.fetch_url, _build_cache_relative_path)
+
+ mirror_dir = url_util.local_file_path(fetch_url_build_cache)
+ if mirror_dir:
+ tty.msg("Finding buildcaches in %s" % mirror_dir)
+ if os.path.exists(mirror_dir):
+ files = os.listdir(mirror_dir)
for file in files:
if re.search('spec.yaml', file):
- link = 'file://' + mirror + '/' + file
+ link = url_util.join(fetch_url_build_cache, file)
urls.add(link)
else:
- tty.msg("Finding buildcaches on %s" % mirror_url)
- p, links = spider(mirror_url + "/" + _build_cache_relative_path)
+ tty.msg("Finding buildcaches at %s" %
+ url_util.format(fetch_url_build_cache))
+ p, links = web_util.spider(
+ url_util.join(fetch_url_build_cache, 'index.html'))
for link in links:
if re.search("spec.yaml", link):
urls.add(link)
@@ -659,28 +715,33 @@ def get_keys(install=False, trust=False, force=False):
"""
Get pgp public keys available on mirror
"""
- mirrors = spack.config.get('mirrors')
- if len(mirrors) == 0:
+ if not spack.mirror.MirrorCollection():
tty.die("Please add a spack mirror to allow " +
"download of build caches.")
keys = set()
- for mirror_name, mirror_url in mirrors.items():
- if mirror_url.startswith('file'):
- mirror = os.path.join(
- mirror_url.replace('file://', ''), _build_cache_relative_path)
- tty.msg("Finding public keys in %s" % mirror)
- files = os.listdir(mirror)
+
+ for mirror in spack.mirror.MirrorCollection().values():
+ fetch_url_build_cache = url_util.join(
+ mirror.fetch_url, _build_cache_relative_path)
+
+ mirror_dir = url_util.local_file_path(fetch_url_build_cache)
+ if mirror_dir:
+ tty.msg("Finding public keys in %s" % mirror_dir)
+ files = os.listdir(mirror_dir)
for file in files:
if re.search(r'\.key', file):
- link = 'file://' + mirror + '/' + file
+ link = url_util.join(fetch_url_build_cache, file)
keys.add(link)
else:
- tty.msg("Finding public keys on %s" % mirror_url)
- p, links = spider(mirror_url + "/build_cache", depth=1)
+ tty.msg("Finding public keys at %s" %
+ url_util.format(fetch_url_build_cache))
+ p, links = web_util.spider(fetch_url_build_cache, depth=1)
+
for link in links:
if re.search(r'\.key', link):
keys.add(link)
+
for link in keys:
with Stage(link, name="build_cache", keep=True) as stage:
if os.path.exists(stage.save_filename) and force:
@@ -717,15 +778,16 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False):
# Try to retrieve the .spec.yaml directly, based on the known
# format of the name, in order to determine if the package
# needs to be rebuilt.
- build_cache_dir = build_cache_directory(mirror_url)
+ cache_prefix = build_cache_prefix(mirror_url)
spec_yaml_file_name = tarball_name(spec, '.spec.yaml')
- file_path = os.path.join(build_cache_dir, spec_yaml_file_name)
+ file_path = os.path.join(cache_prefix, spec_yaml_file_name)
result_of_error = 'Package ({0}) will {1}be rebuilt'.format(
spec.short_spec, '' if rebuild_on_errors else 'not ')
try:
- yaml_contents = read_from_url(file_path)
+ _, _, yaml_file = web_util.read_from_url(file_path)
+ yaml_contents = codecs.getreader('utf-8')(yaml_file).read()
except URLError as url_err:
err_msg = [
'Unable to determine whether {0} needs rebuilding,',
@@ -782,22 +844,22 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
"""
rebuilds = {}
- for mirror_name, mirror_url in mirrors.items():
- tty.msg('Checking for built specs at %s' % mirror_url)
+ for mirror in spack.mirror.MirrorCollection(mirrors).values():
+ tty.msg('Checking for built specs at %s' % mirror.fetch_url)
rebuild_list = []
for spec in specs:
- if needs_rebuild(spec, mirror_url, rebuild_on_errors):
+ if needs_rebuild(spec, mirror.fetch_url, rebuild_on_errors):
rebuild_list.append({
'short_spec': spec.short_spec,
'hash': spec.dag_hash()
})
if rebuild_list:
- rebuilds[mirror_url] = {
- 'mirrorName': mirror_name,
- 'mirrorUrl': mirror_url,
+ rebuilds[mirror.fetch_url] = {
+ 'mirrorName': mirror.name,
+ 'mirrorUrl': mirror.fetch_url,
'rebuildSpecs': rebuild_list
}
@@ -810,33 +872,36 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
def _download_buildcache_entry(mirror_root, descriptions):
for description in descriptions:
- url = os.path.join(mirror_root, description['url'])
+ description_url = os.path.join(mirror_root, description['url'])
path = description['path']
fail_if_missing = description['required']
mkdirp(path)
- stage = Stage(url, name="build_cache", path=path, keep=True)
+ stage = Stage(
+ description_url, name="build_cache", path=path, keep=True)
try:
stage.fetch()
except fs.FetchError as e:
tty.debug(e)
if fail_if_missing:
- tty.error('Failed to download required url {0}'.format(url))
+ tty.error('Failed to download required url {0}'.format(
+ description_url))
return False
return True
def download_buildcache_entry(file_descriptions):
- mirrors = spack.config.get('mirrors')
- if len(mirrors) == 0:
+ if not spack.mirror.MirrorCollection():
tty.die("Please add a spack mirror to allow " +
"download of buildcache entries.")
- for mirror_name, mirror_url in mirrors.items():
- mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
+ for mirror in spack.mirror.MirrorCollection().values():
+ mirror_root = os.path.join(
+ mirror.fetch_url,
+ _build_cache_relative_path)
if _download_buildcache_entry(mirror_root, file_descriptions):
return True
diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py
index dfd750fa82..e2352b2fcc 100644
--- a/lib/spack/spack/caches.py
+++ b/lib/spack/spack/caches.py
@@ -9,11 +9,13 @@ import os
import llnl.util.lang
from llnl.util.filesystem import mkdirp
+import spack.error
import spack.paths
import spack.config
import spack.fetch_strategy
import spack.util.file_cache
-from spack.util.path import canonicalize_path
+import spack.util.path
+import spack.util.url as url_util
def _misc_cache():
@@ -25,7 +27,7 @@ def _misc_cache():
path = spack.config.get('config:misc_cache')
if not path:
path = os.path.join(spack.paths.user_config_path, 'cache')
- path = canonicalize_path(path)
+ path = spack.util.path.canonicalize_path(path)
return spack.util.file_cache.FileCache(path)
@@ -43,22 +45,26 @@ def _fetch_cache():
path = spack.config.get('config:source_cache')
if not path:
path = os.path.join(spack.paths.var_path, "cache")
- path = canonicalize_path(path)
+ path = spack.util.path.canonicalize_path(path)
return spack.fetch_strategy.FsCache(path)
class MirrorCache(object):
def __init__(self, root):
- self.root = os.path.abspath(root)
+ self.root = url_util.local_file_path(root)
+ if not self.root:
+ raise spack.error.SpackError(
+ 'MirrorCaches only work with file:// URLs')
+
self.new_resources = set()
self.existing_resources = set()
def store(self, fetcher, relative_dest):
# Note this will archive package sources even if they would not
# normally be cached (e.g. the current tip of an hg/git branch)
-
dst = os.path.join(self.root, relative_dest)
+
if os.path.exists(dst):
self.existing_resources.add(relative_dest)
else:
diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py
index 121a6f4aa5..ee09a33f39 100644
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@@ -14,6 +14,7 @@ import spack.cmd
import spack.cmd.common.arguments as arguments
import spack.environment as ev
import spack.hash_types as ht
+import spack.mirror
import spack.relocate
import spack.repo
import spack.spec
@@ -21,6 +22,8 @@ import spack.store
import spack.config
import spack.repo
import spack.store
+import spack.util.url as url_util
+
from spack.error import SpecError
from spack.spec import Spec, save_dependency_spec_yamls
@@ -205,6 +208,13 @@ def setup_parser(subparser):
help='Destination mirror url')
copy.set_defaults(func=buildcache_copy)
+ # Update buildcache index without copying any additional packages
+ update_index = subparsers.add_parser(
+ 'update-index', help=buildcache_update_index.__doc__)
+ update_index.add_argument(
+ '-d', '--mirror-url', default=None, help='Destination mirror url')
+ update_index.set_defaults(func=buildcache_update_index)
+
def find_matching_specs(pkgs, allow_multiple_matches=False, env=None):
"""Returns a list of specs matching the not necessarily
@@ -312,9 +322,14 @@ def createtarball(args):
" yaml file containing a spec to install")
pkgs = set(packages)
specs = set()
+
outdir = '.'
if args.directory:
outdir = args.directory
+
+ mirror = spack.mirror.MirrorCollection().lookup(outdir)
+ outdir = url_util.format(mirror.push_url)
+
signkey = None
if args.key:
signkey = args.key
@@ -649,6 +664,19 @@ def buildcache_copy(args):
shutil.copyfile(cdashid_src_path, cdashid_dest_path)
+def buildcache_update_index(args):
+ """Update a buildcache index."""
+ outdir = '.'
+ if args.mirror_url:
+ outdir = args.mirror_url
+
+ mirror = spack.mirror.MirrorCollection().lookup(outdir)
+ outdir = url_util.format(mirror.push_url)
+
+ bindist.generate_package_index(
+ url_util.join(outdir, bindist.build_cache_relative_path()))
+
+
def buildcache(parser, args):
if args.func:
args.func(args)
diff --git a/lib/spack/spack/cmd/checksum.py b/lib/spack/spack/cmd/checksum.py
index ecc71d3060..2518dfef9f 100644
--- a/lib/spack/spack/cmd/checksum.py
+++ b/lib/spack/spack/cmd/checksum.py
@@ -11,8 +11,8 @@ import llnl.util.tty as tty
import spack.cmd
import spack.repo
+import spack.stage
import spack.util.crypto
-import spack.util.web
from spack.util.naming import valid_fully_qualified_module_name
from spack.version import ver, Version
@@ -56,7 +56,7 @@ def checksum(parser, args):
if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name))
- version_lines = spack.util.web.get_checksums_for_versions(
+ version_lines = spack.stage.get_checksums_for_versions(
url_dict, pkg.name, keep_stage=args.keep_stage)
print()
diff --git a/lib/spack/spack/cmd/create.py b/lib/spack/spack/cmd/create.py
index 527a7a883c..6c68617acd 100644
--- a/lib/spack/spack/cmd/create.py
+++ b/lib/spack/spack/cmd/create.py
@@ -13,6 +13,7 @@ from llnl.util.filesystem import mkdirp
import spack.util.web
import spack.repo
+import spack.stage
from spack.spec import Spec
from spack.util.editor import editor
from spack.util.executable import which, ProcessError
@@ -618,7 +619,7 @@ def get_versions(args, name):
version = parse_version(args.url)
url_dict = {version: args.url}
- versions = spack.util.web.get_checksums_for_versions(
+ versions = spack.stage.get_checksums_for_versions(
url_dict, name, first_stage_function=guesser,
keep_stage=args.keep_stage)
else:
diff --git a/lib/spack/spack/cmd/mirror.py b/lib/spack/spack/cmd/mirror.py
index 723e310ad6..91ed40a4c5 100644
--- a/lib/spack/spack/cmd/mirror.py
+++ b/lib/spack/spack/cmd/mirror.py
@@ -4,20 +4,21 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import sys
-import os
-from datetime import datetime
import argparse
import llnl.util.tty as tty
from llnl.util.tty.colify import colify
import spack.cmd
+import spack.cmd.common.arguments as arguments
import spack.concretize
import spack.config
+import spack.environment as ev
import spack.mirror
import spack.repo
-import spack.cmd.common.arguments as arguments
-import spack.environment as ev
+import spack.util.url as url_util
+import spack.util.web as web_util
+
from spack.spec import Spec
from spack.error import SpackError
from spack.util.spack_yaml import syaml_dict
@@ -73,6 +74,19 @@ def setup_parser(subparser):
default=spack.config.default_modify_scope(),
help="configuration scope to modify")
+ # Set-Url
+ set_url_parser = sp.add_parser('set-url', help=mirror_set_url.__doc__)
+ set_url_parser.add_argument('name', help="mnemonic name for mirror")
+ set_url_parser.add_argument(
+ 'url', help="url of mirror directory from 'spack mirror create'")
+ set_url_parser.add_argument(
+ '--push', action='store_true',
+ help="set only the URL used for uploading new packages")
+ set_url_parser.add_argument(
+ '--scope', choices=scopes, metavar=scopes_metavar,
+ default=spack.config.default_modify_scope(),
+ help="configuration scope to modify")
+
# List
list_parser = sp.add_parser('list', help=mirror_list.__doc__)
list_parser.add_argument(
@@ -83,20 +97,14 @@ def setup_parser(subparser):
def mirror_add(args):
"""Add a mirror to Spack."""
- url = args.url
- if url.startswith('/'):
- url = 'file://' + url
+ url = url_util.format(args.url)
mirrors = spack.config.get('mirrors', scope=args.scope)
if not mirrors:
mirrors = syaml_dict()
- for name, u in mirrors.items():
- if name == args.name:
- tty.die("Mirror with name %s already exists." % name)
- if u == url:
- tty.die("Mirror with url %s already exists." % url)
- # should only be one item per mirror dict.
+ if args.name in mirrors:
+ tty.die("Mirror with name %s already exists." % args.name)
items = [(n, u) for n, u in mirrors.items()]
items.insert(0, (args.name, url))
@@ -117,21 +125,86 @@ def mirror_remove(args):
old_value = mirrors.pop(name)
spack.config.set('mirrors', mirrors, scope=args.scope)
- tty.msg("Removed mirror %s with url %s" % (name, old_value))
+
+ debug_msg_url = "url %s"
+ debug_msg = ["Removed mirror %s with"]
+ values = [name]
+
+ try:
+ fetch_value = old_value['fetch']
+ push_value = old_value['push']
+
+ debug_msg.extend(("fetch", debug_msg_url, "and push", debug_msg_url))
+ values.extend((fetch_value, push_value))
+ except TypeError:
+ debug_msg.append(debug_msg_url)
+ values.append(old_value)
+
+ tty.debug(" ".join(debug_msg) % tuple(values))
+ tty.msg("Removed mirror %s." % name)
+
+
+def mirror_set_url(args):
+ """Change the URL of a mirror."""
+ url = url_util.format(args.url)
+
+ mirrors = spack.config.get('mirrors', scope=args.scope)
+ if not mirrors:
+ mirrors = syaml_dict()
+
+ if args.name not in mirrors:
+ tty.die("No mirror found with name %s." % args.name)
+
+ entry = mirrors[args.name]
+
+ try:
+ fetch_url = entry['fetch']
+ push_url = entry['push']
+ except TypeError:
+ fetch_url, push_url = entry, entry
+
+ changes_made = False
+
+ if args.push:
+ changes_made = changes_made or push_url != url
+ push_url = url
+ else:
+ changes_made = (
+ changes_made or fetch_url != push_url or push_url != url)
+
+ fetch_url, push_url = url, url
+
+ items = [
+ (
+ (n, u)
+ if n != args.name else (
+ (n, {"fetch": fetch_url, "push": push_url})
+ if fetch_url != push_url else (n, fetch_url)
+ )
+ )
+ for n, u in mirrors.items()
+ ]
+
+ mirrors = syaml_dict(items)
+ spack.config.set('mirrors', mirrors, scope=args.scope)
+
+ if changes_made:
+ tty.msg(
+ "Changed%s url for mirror %s." %
+ ((" (push)" if args.push else ""), args.name))
+ else:
+ tty.msg("Url already set for mirror %s." % args.name)
def mirror_list(args):
"""Print out available mirrors to the console."""
- mirrors = spack.config.get('mirrors', scope=args.scope)
+
+ mirrors = spack.mirror.MirrorCollection(scope=args.scope)
if not mirrors:
tty.msg("No mirrors configured.")
return
- max_len = max(len(n) for n in mirrors.keys())
- fmt = "%%-%ds%%s" % (max_len + 4)
-
- for name in mirrors:
- print(fmt % (name, mirrors[name]))
+ mirrors.display()
def _read_specs_from_file(filename):
@@ -188,14 +261,13 @@ def mirror_create(args):
msg = 'Skipping {0} as it is an external spec.'
tty.msg(msg.format(spec.cshort_spec))
- # Default name for directory is spack-mirror-<DATESTAMP>
- directory = args.directory
- if not directory:
- timestamp = datetime.now().strftime("%Y-%m-%d")
- directory = 'spack-mirror-' + timestamp
+ mirror = spack.mirror.Mirror(
+ args.directory or spack.config.get('config:source_cache'))
+
+ directory = url_util.format(mirror.push_url)
# Make sure nothing is in the way.
- existed = os.path.isdir(directory)
+ existed = web_util.url_exists(directory)
# Actually do the work to create the mirror
present, mirrored, error = spack.mirror.create(
@@ -220,6 +292,7 @@ def mirror(parser, args):
'add': mirror_add,
'remove': mirror_remove,
'rm': mirror_remove,
+ 'set-url': mirror_set_url,
'list': mirror_list}
if args.no_checksum:
diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py
index 10257b2608..f1ce050a90 100644
--- a/lib/spack/spack/cmd/url.py
+++ b/lib/spack/spack/cmd/url.py
@@ -5,10 +5,8 @@
from __future__ import division, print_function
from collections import defaultdict
-try:
- from urllib.parse import urlparse
-except ImportError:
- from urlparse import urlparse
+
+import six.moves.urllib.parse as urllib_parse
import spack.fetch_strategy as fs
import spack.repo
@@ -262,7 +260,7 @@ def url_stats(args):
self.checksums[algo] += 1
# parse out the URL scheme (https/http/ftp/etc.)
- urlinfo = urlparse(fetcher.url)
+ urlinfo = urllib_parse.urlparse(fetcher.url)
self.schemes[urlinfo.scheme] += 1
elif url_type == 'git':
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index 32239d81ce..4812211812 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -23,6 +23,7 @@ in order to build it. They need to define the following methods:
Archive a source directory, e.g. for creating a mirror.
"""
import os
+import os.path
import sys
import re
import shutil
@@ -30,6 +31,7 @@ import copy
import xml.etree.ElementTree
from functools import wraps
from six import string_types, with_metaclass
+import six.moves.urllib.parse as urllib_parse
import llnl.util.tty as tty
from llnl.util.filesystem import (
@@ -39,6 +41,9 @@ import spack.config
import spack.error
import spack.util.crypto as crypto
import spack.util.pattern as pattern
+import spack.util.web as web_util
+import spack.util.url as url_util
+
from spack.util.executable import which
from spack.util.string import comma_and, quote
from spack.version import Version, ver
@@ -48,6 +53,17 @@ from spack.util.compression import decompressor_for, extension
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
+CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
+ "The contents of {subject} look like {content_type}. Either the URL"
+ " you are trying to use does not exist or you have an internet gateway"
+ " issue. You can remove the bad archive using 'spack clean"
+ " <package>', then try again using the correct URL.")
+
+
+def warn_content_type_mismatch(subject, content_type='HTML'):
+ tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(
+ subject=subject, content_type=content_type))
+
def _needs_stage(fun):
"""Many methods on fetch strategies require a stage to be set
@@ -351,12 +367,7 @@ class URLFetchStrategy(FetchStrategy):
content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
flags=re.IGNORECASE)
if content_types and 'text/html' in content_types[-1]:
- msg = ("The contents of {0} look like HTML. Either the URL "
- "you are trying to use does not exist or you have an "
- "internet gateway issue. You can remove the bad archive "
- "using 'spack clean <package>', then try again using "
- "the correct URL.")
- tty.warn(msg.format(self.archive_file or "the archive"))
+ warn_content_type_mismatch(self.archive_file or "the archive")
if save_file:
os.rename(partial_file, save_file)
@@ -449,7 +460,10 @@ class URLFetchStrategy(FetchStrategy):
if not self.archive_file:
raise NoArchiveFileError("Cannot call archive() before fetching.")
- shutil.copyfile(self.archive_file, destination)
+ web_util.push_to_url(
+ self.archive_file,
+ destination,
+ keep_original=True)
@_needs_stage
def check(self):
@@ -1063,6 +1077,54 @@ class HgFetchStrategy(VCSFetchStrategy):
return "[hg] %s" % self.url
+class S3FetchStrategy(URLFetchStrategy):
+ """FetchStrategy that pulls from an S3 bucket."""
+ enabled = True
+ url_attr = 's3'
+
+ def __init__(self, *args, **kwargs):
+ try:
+ super(S3FetchStrategy, self).__init__(*args, **kwargs)
+ except ValueError:
+ if not kwargs.get('url'):
+ raise ValueError(
+ "S3FetchStrategy requires a url for fetching.")
+
+ @_needs_stage
+ def fetch(self):
+ if self.archive_file:
+ tty.msg("Already downloaded %s" % self.archive_file)
+ return
+
+ parsed_url = url_util.parse(self.url)
+ if parsed_url.scheme != 's3':
+ raise ValueError(
+ 'S3FetchStrategy can only fetch from s3:// urls.')
+
+ tty.msg("Fetching %s" % self.url)
+
+ basename = os.path.basename(parsed_url.path)
+
+ with working_dir(self.stage.path):
+ _, headers, stream = web_util.read_from_url(self.url)
+
+ with open(basename, 'wb') as f:
+ shutil.copyfileobj(stream, f)
+
+ content_type = headers['Content-type']
+
+ if content_type == 'text/html':
+ warn_content_type_mismatch(self.archive_file or "the archive")
+
+ if self.stage.save_filename:
+ os.rename(
+ os.path.join(self.stage.path, basename),
+ self.stage.save_filename)
+
+ if not self.archive_file:
+ raise FailedDownloadError(self.url)
+
+
def from_url(url):
"""Given a URL, find an appropriate fetch strategy for it.
Currently just gives you a URLFetchStrategy that uses curl.
@@ -1206,6 +1268,34 @@ def for_package_version(pkg, version):
raise InvalidArgsError(pkg, version, **args)
+def from_url_scheme(url, *args, **kwargs):
+ """Finds a suitable FetchStrategy by matching its url_attr with the scheme
+ in the given url."""
+
+ url = kwargs.get('url', url)
+ parsed_url = urllib_parse.urlparse(url, scheme='file')
+
+ scheme_mapping = (
+ kwargs.get('scheme_mapping') or
+ {
+ 'file': 'url',
+ 'http': 'url',
+ 'https': 'url'
+ })
+
+ scheme = parsed_url.scheme
+ scheme = scheme_mapping.get(scheme, scheme)
+
+ for fetcher in all_strategies:
+ url_attr = getattr(fetcher, 'url_attr', None)
+ if url_attr and url_attr == scheme:
+ return fetcher(url, *args, **kwargs)
+
+ raise ValueError(
+ 'No FetchStrategy found for url with scheme: "{SCHEME}"'.format(
+ SCHEME=parsed_url.scheme))
+
+
def from_list_url(pkg):
"""If a package provides a URL which lists URLs for resources by
version, this can can create a fetcher for a URL discovered for
diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py
index 45aa779c69..e2329b6861 100644
--- a/lib/spack/spack/mirror.py
+++ b/lib/spack/spack/mirror.py
@@ -13,6 +13,18 @@ to download packages directly from a mirror (e.g., on an intranet).
"""
import sys
import os
+import os.path
+import operator
+
+import six
+
+import ruamel.yaml.error as yaml_error
+
+try:
+ from collections.abc import Mapping
+except ImportError:
+ from collections import Mapping
+
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp
@@ -20,9 +32,205 @@ import spack.config
import spack.error
import spack.url as url
import spack.fetch_strategy as fs
-from spack.spec import Spec
+import spack.util.spack_json as sjson
+import spack.util.spack_yaml as syaml
+import spack.util.url as url_util
+import spack.spec
from spack.version import VersionList
from spack.util.compression import allowed_archive
+from spack.util.spack_yaml import syaml_dict
+
+
+def _display_mirror_entry(size, name, url, type_=None):
+ if type_:
+ type_ = "".join((" (", type_, ")"))
+ else:
+ type_ = ""
+
+ print("%-*s%s%s" % (size + 4, name, url, type_))
+
+
+class Mirror(object):
+ """Represents a named location for storing source tarballs and binary
+ packages.
+
+ Mirrors have a fetch_url that indicate where and how artifacts are fetched
+ from them, and a push_url that indicate where and how artifacts are pushed
+ to them. These two URLs are usually the same.
+ """
+
+ def __init__(self, fetch_url, push_url=None, name=None):
+ self._fetch_url = fetch_url
+ self._push_url = push_url
+ self._name = name
+
+ def to_json(self, stream=None):
+ return sjson.dump(self.to_dict(), stream)
+
+ def to_yaml(self, stream=None):
+ return syaml.dump(self.to_dict(), stream)
+
+ @staticmethod
+ def from_yaml(stream, name=None):
+ try:
+ data = syaml.load(stream)
+ return Mirror.from_dict(data, name)
+ except yaml_error.MarkedYAMLError as e:
+ raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
+
+ @staticmethod
+ def from_json(stream, name=None):
+ d = sjson.load(stream)
+ return Mirror.from_dict(d, name)
+
+ def to_dict(self):
+ if self._push_url is None:
+ return self._fetch_url
+ else:
+ return syaml_dict([
+ ('fetch', self._fetch_url),
+ ('push', self._push_url)])
+
+ @staticmethod
+ def from_dict(d, name=None):
+ if isinstance(d, six.string_types):
+ return Mirror(d, name=name)
+ else:
+ return Mirror(d['fetch'], d['push'], name)
+
+ def display(self, max_len=0):
+ if self._push_url is None:
+ _display_mirror_entry(max_len, self._name, self._fetch_url)
+ else:
+ _display_mirror_entry(
+ max_len, self._name, self._fetch_url, "fetch")
+ _display_mirror_entry(
+ max_len, self._name, self._push_url, "push")
+
+ def __str__(self):
+ name = self._name
+ if name is None:
+ name = ''
+ else:
+ name = ' "%s"' % name
+
+ if self._push_url is None:
+ return "[Mirror%s (%s)]" % (name, self._fetch_url)
+
+ return "[Mirror%s (fetch: %s, push: %s)]" % (
+ name, self._fetch_url, self._push_url)
+
+ def __repr__(self):
+ return ''.join((
+ 'Mirror(',
+ ', '.join(
+ '%s=%s' % (k, repr(v))
+ for k, v in (
+ ('fetch_url', self._fetch_url),
+ ('push_url', self._push_url),
+ ('name', self._name))
+ if k == 'fetch_url' or v),
+ ')'
+ ))
+
+ @property
+ def name(self):
+ return self._name or "<unnamed>"
+
+ @property
+ def fetch_url(self):
+ return self._fetch_url
+
+ @fetch_url.setter
+ def fetch_url(self, url):
+ self._fetch_url = url
+ self._normalize()
+
+ @property
+ def push_url(self):
+ if self._push_url is None:
+ return self._fetch_url
+ return self._push_url
+
+ @push_url.setter
+ def push_url(self, url):
+ self._push_url = url
+ self._normalize()
+
+ def _normalize(self):
+ if self._push_url is not None and self._push_url == self._fetch_url:
+ self._push_url = None
+
+
+class MirrorCollection(Mapping):
+ """A mapping of mirror names to mirrors."""
+
+ def __init__(self, mirrors=None, scope=None):
+ self._mirrors = dict(
+ (name, Mirror.from_dict(mirror, name))
+ for name, mirror in (
+ mirrors.items() if mirrors is not None else
+ spack.config.get('mirrors', scope=scope).items()))
+
+ def to_json(self, stream=None):
+ return sjson.dump(self.to_dict(True), stream)
+
+ def to_yaml(self, stream=None):
+ return syaml.dump(self.to_dict(True), stream)
+
+ @staticmethod
+ def from_yaml(stream, name=None):
+ try:
+ data = syaml.load(stream)
+ return MirrorCollection(data)
+ except yaml_error.MarkedYAMLError as e:
+ raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
+
+ @staticmethod
+ def from_json(stream, name=None):
+ d = sjson.load(stream)
+ return MirrorCollection(d)
+
+ def to_dict(self, recursive=False):
+ return syaml_dict(sorted(
+ (
+ (k, (v.to_dict() if recursive else v))
+ for (k, v) in self._mirrors.items()
+ ), key=operator.itemgetter(0)
+ ))
+
+ @staticmethod
+ def from_dict(d):
+ return MirrorCollection(d)
+
+ def __getitem__(self, item):
+ return self._mirrors[item]
+
+ def display(self):
+ max_len = max(len(mirror.name) for mirror in self._mirrors.values())
+ for mirror in self._mirrors.values():
+ mirror.display(max_len)
+
+ def lookup(self, name_or_url):
+ """Looks up and returns a Mirror.
+
+ If this MirrorCollection contains a named Mirror under the name
+ [name_or_url], then that mirror is returned. Otherwise, [name_or_url]
+ is assumed to be a mirror URL, and an anonymous mirror with the given
+ URL is returned.
+ """
+ result = self.get(name_or_url)
+
+ if result is None:
+ result = Mirror(fetch_url=name_or_url)
+
+ return result
+
+ def __iter__(self):
+ return iter(self._mirrors)
+
+ def __len__(self):
+ return len(self._mirrors)
def mirror_archive_filename(spec, fetcher, resource_id=None):
@@ -114,7 +322,7 @@ def get_matching_versions(specs, **kwargs):
# Generate only versions that satisfy the spec.
if spec.concrete or v.satisfies(spec.versions):
- s = Spec(pkg.name)
+ s = spack.spec.Spec(pkg.name)
s.versions = VersionList([v])
s.variants = spec.variants.copy()
# This is needed to avoid hanging references during the
@@ -166,12 +374,17 @@ def create(path, specs, **kwargs):
it creates specs for those versions. If the version satisfies any spec
in the specs list, it is downloaded and added to the mirror.
"""
+ parsed = url_util.parse(path)
+ mirror_root = url_util.local_file_path(parsed)
+
# Make sure nothing is in the way.
- if os.path.isfile(path):
- raise MirrorError("%s already exists and is a file." % path)
+ if mirror_root and os.path.isfile(mirror_root):
+ raise MirrorError("%s already exists and is a file." % mirror_root)
# automatically spec-ify anything in the specs array.
- specs = [s if isinstance(s, Spec) else Spec(s) for s in specs]
+ specs = [
+ s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s)
+ for s in specs]
# Get concrete specs for each matching version of these specs.
version_specs = get_matching_versions(
@@ -180,8 +393,7 @@ def create(path, specs, **kwargs):
s.concretize()
# Get the absolute path of the root before we start jumping around.
- mirror_root = os.path.abspath(path)
- if not os.path.isdir(mirror_root):
+ if mirror_root and not os.path.isdir(mirror_root):
try:
mkdirp(mirror_root)
except OSError as e:
@@ -195,12 +407,12 @@ def create(path, specs, **kwargs):
'error': []
}
- mirror_cache = spack.caches.MirrorCache(mirror_root)
+ mirror_cache = spack.caches.MirrorCache(parsed)
try:
spack.caches.mirror_cache = mirror_cache
# Iterate through packages and download all safe tarballs for each
for spec in version_specs:
- add_single_spec(spec, mirror_root, categories, **kwargs)
+ add_single_spec(spec, parsed, categories, **kwargs)
finally:
spack.caches.mirror_cache = None
diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py
new file mode 100644
index 0000000000..2a54b9ecb1
--- /dev/null
+++ b/lib/spack/spack/s3_handler.py
@@ -0,0 +1,92 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from io import BufferedReader
+
+import six.moves.urllib.response as urllib_response
+import six.moves.urllib.request as urllib_request
+import six.moves.urllib.error as urllib_error
+
+import spack.util.s3 as s3_util
+import spack.util.url as url_util
+import spack.util.web as web_util
+
+
+# NOTE(opadron): Workaround issue in boto where its StreamingBody
+# implementation is missing several APIs expected from IOBase. These missing
+# APIs prevent the streams returned by boto from being passed as-are along to
+# urllib.
+#
+# https://github.com/boto/botocore/issues/879
+# https://github.com/python/cpython/pull/3249
+class WrapStream(BufferedReader):
+ def __init__(self, raw):
+ raw.readable = lambda: True
+ raw.writable = lambda: False
+ raw.seekable = lambda: False
+ raw.closed = False
+ raw.flush = lambda: None
+ super(WrapStream, self).__init__(raw)
+
+ def detach(self):
+ self.raw = None
+
+ def read(self, *args, **kwargs):
+ return self.raw.read(*args, **kwargs)
+
+ def __getattr__(self, key):
+ return getattr(self.raw, key)
+
+
+def _s3_open(url):
+ parsed = url_util.parse(url)
+ s3 = s3_util.create_s3_session(parsed)
+
+ bucket = parsed.netloc
+ key = parsed.path
+
+ if key.startswith('/'):
+ key = key[1:]
+
+ obj = s3.get_object(Bucket=bucket, Key=key)
+
+ # NOTE(opadron): Apply workaround here (see above)
+ stream = WrapStream(obj['Body'])
+ headers = web_util.standardize_header_names(
+ obj['ResponseMetadata']['HTTPHeaders'])
+
+ return url, headers, stream
+
+
+class UrllibS3Handler(urllib_request.HTTPSHandler):
+ def s3_open(self, req):
+ orig_url = req.get_full_url()
+ from botocore.exceptions import ClientError
+ try:
+ url, headers, stream = _s3_open(orig_url)
+ return urllib_response.addinfourl(stream, headers, url)
+ except ClientError as err:
+ # if no such [KEY], but [KEY]/index.html exists,
+ # return that, instead.
+ if err.response['Error']['Code'] == 'NoSuchKey':
+ try:
+ _, headers, stream = _s3_open(
+ url_util.join(orig_url, 'index.html'))
+ return urllib_response.addinfourl(
+ stream, headers, orig_url)
+
+ except ClientError as err2:
+ if err.response['Error']['Code'] == 'NoSuchKey':
+ # raise original error
+ raise urllib_error.URLError(err)
+
+ raise urllib_error.URLError(err2)
+
+ raise urllib_error.URLError(err)
+
+
+S3OpenerDirector = urllib_request.build_opener(UrllibS3Handler())
+
+open = S3OpenerDirector.open
diff --git a/lib/spack/spack/schema/mirrors.py b/lib/spack/spack/schema/mirrors.py
index 551267bd4f..92e6c9bca1 100644
--- a/lib/spack/spack/schema/mirrors.py
+++ b/lib/spack/spack/schema/mirrors.py
@@ -17,7 +17,19 @@ properties = {
'default': {},
'additionalProperties': False,
'patternProperties': {
- r'\w[\w-]*': {'type': 'string'},
+ r'\w[\w-]*': {
+ 'anyOf': [
+ {'type': 'string'},
+ {
+ 'type': 'object',
+ 'required': ['fetch', 'push'],
+ 'properties': {
+ 'fetch': {'type': 'string'},
+ 'push': {'type': 'string'}
+ }
+ }
+ ]
+ },
},
},
}
diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py
index 6b27d37adf..9621938bcd 100644
--- a/lib/spack/spack/stage.py
+++ b/lib/spack/spack/stage.py
@@ -12,7 +12,6 @@ import tempfile
import getpass
from six import string_types
from six import iteritems
-from six.moves.urllib.parse import urljoin
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp, can_access, install, install_tree
@@ -20,12 +19,16 @@ from llnl.util.filesystem import partition_path, remove_linked_tree
import spack.paths
import spack.caches
+import spack.cmd
import spack.config
import spack.error
+import spack.mirror
import spack.util.lock
import spack.fetch_strategy as fs
import spack.util.pattern as pattern
import spack.util.path as sup
+import spack.util.url as url_util
+
from spack.util.crypto import prefix_bits, bit_length
@@ -252,7 +255,7 @@ class Stage(object):
# TODO: fetch/stage coupling needs to be reworked -- the logic
# TODO: here is convoluted and not modular enough.
if isinstance(url_or_fetch_strategy, string_types):
- self.fetcher = fs.from_url(url_or_fetch_strategy)
+ self.fetcher = fs.from_url_scheme(url_or_fetch_strategy)
elif isinstance(url_or_fetch_strategy, fs.FetchStrategy):
self.fetcher = url_or_fetch_strategy
else:
@@ -397,16 +400,9 @@ class Stage(object):
# TODO: CompositeFetchStrategy here.
self.skip_checksum_for_mirror = True
if self.mirror_path:
- mirrors = spack.config.get('mirrors')
-
- # Join URLs of mirror roots with mirror paths. Because
- # urljoin() will strip everything past the final '/' in
- # the root, so we add a '/' if it is not present.
- mir_roots = [
- sup.substitute_path_variables(root) if root.endswith(os.sep)
- else sup.substitute_path_variables(root) + os.sep
- for root in mirrors.values()]
- urls = [urljoin(root, self.mirror_path) for root in mir_roots]
+ urls = [
+ url_util.join(mirror.fetch_url, self.mirror_path)
+ for mirror in spack.mirror.MirrorCollection().values()]
# If this archive is normally fetched from a tarball URL,
# then use the same digest. `spack mirror` ensures that
@@ -425,9 +421,12 @@ class Stage(object):
# Add URL strategies for all the mirrors with the digest
for url in urls:
- fetchers.insert(
- 0, fs.URLFetchStrategy(
- url, digest, expand=expand, extension=extension))
+ fetchers.append(fs.from_url_scheme(
+ url, digest, expand=expand, extension=extension))
+ # fetchers.insert(
+ # 0, fs.URLFetchStrategy(
+ # url, digest, expand=expand, extension=extension))
+
if self.default_fetcher.cachable:
fetchers.insert(
0, spack.caches.fetch_cache.fetcher(
@@ -708,6 +707,91 @@ def purge():
remove_linked_tree(stage_path)
+def get_checksums_for_versions(
+ url_dict, name, first_stage_function=None, keep_stage=False):
+ """Fetches and checksums archives from URLs.
+
+ This function is called by both ``spack checksum`` and ``spack
+ create``. The ``first_stage_function`` argument allows the caller to
+ inspect the first downloaded archive, e.g., to determine the build
+ system.
+
+ Args:
+ url_dict (dict): A dictionary of the form: version -> URL
+ name (str): The name of the package
+ first_stage_function (callable): function that takes a Stage and a URL;
+ this is run on the stage of the first URL downloaded
+ keep_stage (bool): whether to keep staging area when command completes
+
+ Returns:
+ (str): A multi-line string containing versions and corresponding hashes
+
+ """
+ sorted_versions = sorted(url_dict.keys(), reverse=True)
+
+ # Find length of longest string in the list for padding
+ max_len = max(len(str(v)) for v in sorted_versions)
+ num_ver = len(sorted_versions)
+
+ tty.msg("Found {0} version{1} of {2}:".format(
+ num_ver, '' if num_ver == 1 else 's', name),
+ "",
+ *spack.cmd.elide_list(
+ ["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
+ for v in sorted_versions]))
+ tty.msg('')
+
+ archives_to_fetch = tty.get_number(
+ "How many would you like to checksum?", default=1, abort='q')
+
+ if not archives_to_fetch:
+ tty.die("Aborted.")
+
+ versions = sorted_versions[:archives_to_fetch]
+ urls = [url_dict[v] for v in versions]
+
+ tty.msg("Downloading...")
+ version_hashes = []
+ i = 0
+ for url, version in zip(urls, versions):
+ try:
+ with Stage(url, keep=keep_stage) as stage:
+ # Fetch the archive
+ stage.fetch()
+ if i == 0 and first_stage_function:
+ # Only run first_stage_function the first time,
+ # no need to run it every time
+ first_stage_function(stage, url)
+
+ # Checksum the archive and add it to the list
+ version_hashes.append((version, spack.util.crypto.checksum(
+ hashlib.sha256, stage.archive_file)))
+ i += 1
+ except FailedDownloadError:
+ tty.msg("Failed to fetch {0}".format(url))
+ except Exception as e:
+ tty.msg("Something failed on {0}, skipping.".format(url),
+ " ({0})".format(e))
+
+ if not version_hashes:
+ tty.die("Could not fetch any versions for {0}".format(name))
+
+ # Find length of longest string in the list for padding
+ max_len = max(len(str(v)) for v, h in version_hashes)
+
+ # Generate the version directives to put in a package.py
+ version_lines = "\n".join([
+ " version('{0}', {1}sha256='{2}')".format(
+ v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
+ ])
+
+ num_hash = len(version_hashes)
+ tty.msg("Checksummed {0} version{1} of {2}".format(
+ num_hash, '' if num_hash == 1 else 's', name))
+
+ return version_lines
+
+
class StageError(spack.error.SpackError):
""""Superclass for all errors encountered during staging."""
@@ -720,5 +804,9 @@ class RestageError(StageError):
""""Error encountered during restaging."""
+class VersionFetchError(StageError):
+ """Raised when we can't determine a URL to fetch a package."""
+
+
# Keep this in namespace for convenience
FailedDownloadError = fs.FailedDownloadError
diff --git a/lib/spack/spack/test/cmd/pkg.py b/lib/spack/spack/test/cmd/pkg.py
index c9e8e74c4a..e7d1006cc1 100644
--- a/lib/spack/spack/test/cmd/pkg.py
+++ b/lib/spack/spack/test/cmd/pkg.py
@@ -53,6 +53,8 @@ def mock_pkg_git_repo(tmpdir_factory):
# initial commit with mock packages
git('add', '.')
+ git('config', 'user.email', 'testing@spack.io')
+ git('config', 'user.name', 'Spack Testing')
git('commit', '-m', 'initial mock repo commit')
# add commit with pkg-a, pkg-b, pkg-c packages
diff --git a/lib/spack/spack/test/config.py b/lib/spack/spack/test/config.py
index 2de4e55281..3b85bb2a23 100644
--- a/lib/spack/spack/test/config.py
+++ b/lib/spack/spack/test/config.py
@@ -595,6 +595,7 @@ def test_bad_config_section(mock_config):
spack.config.get('foobar')
+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_bad_command_line_scopes(tmpdir, mock_config):
cfg = spack.config.Configuration()
diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py
index cf962ada4f..d8081d108c 100644
--- a/lib/spack/spack/test/llnl/util/lock.py
+++ b/lib/spack/spack/test/llnl/util/lock.py
@@ -546,6 +546,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path):
timeout_write(lock_path, 5, 1))
+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_read_lock_on_read_only_lockfile(lock_dir, lock_path):
"""read-only directory, read-only lockfile."""
touch(lock_path)
@@ -573,6 +574,7 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path):
pass
+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_read_lock_no_lockfile(lock_dir, lock_path):
"""read-only directory, no lockfile (so can't create)."""
with read_only(lock_dir):
diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py
index 66b358435f..cc4f944867 100644
--- a/lib/spack/spack/test/stage.py
+++ b/lib/spack/spack/test/stage.py
@@ -653,6 +653,7 @@ class TestStage(object):
assert source_path.endswith(spack.stage._source_path_subdir)
assert not os.path.exists(source_path)
+ @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_first_accessible_path(self, tmpdir):
"""Test _first_accessible_path names."""
spack_dir = tmpdir.join('paths')
@@ -783,6 +784,7 @@ class TestStage(object):
assert spack.stage._resolve_paths(paths) == res_paths
+ @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_get_stage_root_bad_path(self, clear_stage_root):
"""Ensure an invalid stage path root raises a StageError."""
with spack.config.override('config:build_stage', '/no/such/path'):
diff --git a/lib/spack/spack/util/s3.py b/lib/spack/spack/util/s3.py
new file mode 100644
index 0000000000..ee6b3d56cf
--- /dev/null
+++ b/lib/spack/spack/util/s3.py
@@ -0,0 +1,44 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import os
+
+import six.moves.urllib.parse as urllib_parse
+
+import spack
+import spack.util.url as url_util
+
+
+def create_s3_session(url):
+ url = url_util.parse(url)
+ if url.scheme != 's3':
+ raise ValueError(
+ 'Can not create S3 session from URL with scheme: {SCHEME}'.format(
+ SCHEME=url.scheme))
+
+ # NOTE(opadron): import boto and friends as late as possible. We don't
+ # want to require boto as a dependency unless the user actually wants to
+ # access S3 mirrors.
+ from boto3 import Session
+
+ session = Session()
+
+ s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')}
+
+ endpoint_url = os.environ.get('S3_ENDPOINT_URL')
+ if endpoint_url:
+ if urllib_parse.urlparse(endpoint_url, scheme=None).scheme is None:
+ endpoint_url = '://'.join(('https', endpoint_url))
+
+ s3_client_args['endpoint_url'] = endpoint_url
+
+ # if no access credentials provided above, then access anonymously
+ if not session.get_credentials():
+ from botocore import UNSIGNED
+ from botocore.client import Config
+
+ s3_client_args["config"] = Config(signature_version=UNSIGNED)
+
+ return session.client('s3', **s3_client_args)
diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py
new file mode 100644
index 0000000000..6b2786f244
--- /dev/null
+++ b/lib/spack/spack/util/url.py
@@ -0,0 +1,175 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+"""
+Utility functions for parsing, formatting, and manipulating URLs.
+"""
+
+import itertools
+import os.path
+
+from six import string_types
+import six.moves.urllib.parse as urllib_parse
+
+import spack.util.path
+
+
+def _split_all(path):
+ """Split path into its atomic components.
+
+ Returns the shortest list, L, of strings such that os.path.join(*L) == path
+ and os.path.split(element) == ('', element) for every element in L except
+ possibly the first. This first element may possibly have the value of '/',
+ or some other OS-dependent path root.
+ """
+ result = []
+ a = path
+ old_a = None
+ while a != old_a:
+ (old_a, (a, b)) = a, os.path.split(a)
+
+ if a or b:
+ result.insert(0, b or '/')
+
+ return result
+
+
+def local_file_path(url):
+ """Get a local file path from a url.
+
+ If url is a file:// URL, return the absolute path to the local
+ file or directory referenced by it. Otherwise, return None.
+ """
+ if isinstance(url, string_types):
+ url = parse(url)
+
+ if url.scheme == 'file':
+ return url.path
+ return None
+
+
+def parse(url, scheme='file'):
+ """Parse a mirror url.
+
+ For file:// URLs, the netloc and path components are concatenated and
+ passed through spack.util.path.canoncalize_path().
+
+ Otherwise, the returned value is the same as urllib's urlparse() with
+ allow_fragments=False.
+ """
+
+ url_obj = (
+ urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False)
+ if isinstance(url, string_types) else url)
+
+ (scheme, netloc, path, params, query, _) = url_obj
+ scheme = (scheme or 'file').lower()
+
+ if scheme == 'file':
+ path = spack.util.path.canonicalize_path(netloc + path)
+ while path.startswith('//'):
+ path = path[1:]
+ netloc = ''
+
+ return urllib_parse.ParseResult(scheme=scheme,
+ netloc=netloc,
+ path=path,
+ params=params,
+ query=query,
+ fragment=None)
+
+
+def format(parsed_url):
+ """Format a URL string
+
+ Returns a canonicalized format of the given URL as a string.
+ """
+ if isinstance(parsed_url, string_types):
+ parsed_url = parse(parsed_url)
+
+ return parsed_url.geturl()
+
+
+def join(base_url, path, *extra, **kwargs):
+ """Joins a base URL with one or more local URL path components
+
+ If resolve_href is True, treat the base URL as though it where the locator
+ of a web page, and the remaining URL path components as though they formed
+ a relative URL to be resolved against it (i.e.: as in os.path.join(...)).
+ The result is an absolute URL to the resource to which a user's browser
+ would navigate if they clicked on a link with an "href" attribute equal to
+ the relative URL.
+
+ If resolve_href is False (default), then the URL path components are joined
+ as in os.path.join().
+
+ Examples:
+ base_url = 's3://bucket/index.html'
+ body = fetch_body(prefix)
+ link = get_href(body) # link == '../other-bucket/document.txt'
+
+ # wrong - link is a local URL that needs to be resolved against base_url
+ spack.util.url.join(base_url, link)
+ 's3://bucket/other_bucket/document.txt'
+
+ # correct - resolve local URL against base_url
+ spack.util.url.join(base_url, link, resolve_href=True)
+ 's3://other_bucket/document.txt'
+
+ prefix = 'https://mirror.spack.io/build_cache'
+
+ # wrong - prefix is just a URL prefix
+ spack.util.url.join(prefix, 'my-package', resolve_href=True)
+ 'https://mirror.spack.io/my-package'
+
+ # correct - simply append additional URL path components
+ spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
+ 'https://mirror.spack.io/build_cache/my-package'
+ """
+ base_url = parse(base_url)
+ resolve_href = kwargs.get('resolve_href', False)
+
+ (scheme, netloc, base_path, params, query, _) = base_url
+ scheme = scheme.lower()
+
+ path_tokens = [
+ part for part in itertools.chain(
+ _split_all(path),
+ itertools.chain.from_iterable(
+ _split_all(extra_path) for extra_path in extra))
+ if part and part != '/']
+
+ base_path_args = ['/fake-root']
+ if scheme == 's3':
+ if netloc:
+ base_path_args.append(netloc)
+
+ if base_path.startswith('/'):
+ base_path = base_path[1:]
+
+ base_path_args.append(base_path)
+
+ if resolve_href:
+ new_base_path, _ = os.path.split(os.path.join(*base_path_args))
+ base_path_args = [new_base_path]
+
+ base_path_args.extend(path_tokens)
+ base_path = os.path.relpath(os.path.join(*base_path_args), '/fake-root')
+
+ if scheme == 's3':
+ path_tokens = [
+ part for part in _split_all(base_path)
+ if part and part != '/']
+
+ if path_tokens:
+ netloc = path_tokens.pop(0)
+ base_path = os.path.join('', *path_tokens)
+
+ return format(urllib_parse.ParseResult(scheme=scheme,
+ netloc=netloc,
+ path=base_path,
+ params=params,
+ query=query,
+ fragment=None))
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
index da2d5bbeb9..e0a23fb444 100644
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@@ -5,16 +5,21 @@
from __future__ import print_function
+import codecs
+import errno
import re
import os
+import os.path
+import shutil
import ssl
import sys
import traceback
-import hashlib
+from itertools import product
+
+import six
from six.moves.urllib.request import urlopen, Request
from six.moves.urllib.error import URLError
-from six.moves.urllib.parse import urljoin
import multiprocessing.pool
try:
@@ -28,20 +33,47 @@ except ImportError:
class HTMLParseError(Exception):
pass
+from llnl.util.filesystem import mkdirp
import llnl.util.tty as tty
-import spack.config
import spack.cmd
-import spack.url
-import spack.stage
+import spack.config
import spack.error
+import spack.url
import spack.util.crypto
+import spack.util.s3 as s3_util
+import spack.util.url as url_util
+
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
# Timeout in seconds for web requests
_timeout = 10
+# See docstring for standardize_header_names()
+_separators = ('', ' ', '_', '-')
+HTTP_HEADER_NAME_ALIASES = {
+ "Accept-ranges": set(
+ ''.join((A, 'ccept', sep, R, 'anges'))
+ for A, sep, R in product('Aa', _separators, 'Rr')),
+
+ "Content-length": set(
+ ''.join((C, 'ontent', sep, L, 'ength'))
+ for C, sep, L in product('Cc', _separators, 'Ll')),
+
+ "Content-type": set(
+ ''.join((C, 'ontent', sep, T, 'ype'))
+ for C, sep, T in product('Cc', _separators, 'Tt')),
+
+ "Date": set(('Date', 'date')),
+
+ "Last-modified": set(
+ ''.join((L, 'ast', sep, M, 'odified'))
+ for L, sep, M in product('Ll', _separators, 'Mm')),
+
+ "Server": set(('Server', 'server'))
+}
+
class LinkParser(HTMLParser):
"""This parser just takes an HTML page and strips out the hrefs on the
@@ -59,7 +91,7 @@ class LinkParser(HTMLParser):
class NonDaemonProcess(multiprocessing.Process):
- """Process tha allows sub-processes, so pools can have sub-pools."""
+ """Process that allows sub-processes, so pools can have sub-pools."""
@property
def daemon(self):
return False
@@ -86,25 +118,53 @@ else:
super(NonDaemonPool, self).__init__(*args, **kwargs)
-def _read_from_url(url, accept_content_type=None):
+def uses_ssl(parsed_url):
+ if parsed_url.scheme == 'https':
+ return True
+
+ if parsed_url.scheme == 's3':
+ endpoint_url = os.environ.get('S3_ENDPOINT_URL')
+ if not endpoint_url:
+ return True
+
+ if url_util.parse(endpoint_url, scheme='https').scheme == 'https':
+ return True
+
+ return False
+
+
+__UNABLE_TO_VERIFY_SSL = (
+ lambda pyver: (
+ (pyver < (2, 7, 9)) or
+ ((3,) < pyver < (3, 4, 3))
+ ))(sys.version_info)
+
+
+def read_from_url(url, accept_content_type=None):
+ url = url_util.parse(url)
context = None
- verify_ssl = spack.config.get('config:verify_ssl')
- pyver = sys.version_info
- if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)):
- if verify_ssl:
- tty.warn("Spack will not check SSL certificates. You need to "
- "update your Python to enable certificate "
- "verification.")
- elif verify_ssl:
- # without a defined context, urlopen will not verify the ssl cert for
- # python 3.x
- context = ssl.create_default_context()
- else:
- context = ssl._create_unverified_context()
- req = Request(url)
+ verify_ssl = spack.config.get('config:verify_ssl')
- if accept_content_type:
+ # Don't even bother with a context unless the URL scheme is one that uses
+ # SSL certs.
+ if uses_ssl(url):
+ if verify_ssl:
+ if __UNABLE_TO_VERIFY_SSL:
+ # User wants SSL verification, but it cannot be provided.
+ warn_no_ssl_cert_checking()
+ else:
+ # User wants SSL verification, and it *can* be provided.
+ context = ssl.create_default_context()
+ else:
+ # User has explicitly indicated that they do not want SSL
+ # verification.
+ context = ssl._create_unverified_context()
+
+ req = Request(url_util.format(url))
+ content_type = None
+ is_web_url = url.scheme in ('http', 'https')
+ if accept_content_type and is_web_url:
# Make a HEAD request first to check the content type. This lets
# us ignore tarballs and gigantic files.
# It would be nice to do this with the HTTP Accept header to avoid
@@ -113,29 +173,179 @@ def _read_from_url(url, accept_content_type=None):
req.get_method = lambda: "HEAD"
resp = _urlopen(req, timeout=_timeout, context=context)
- if "Content-type" not in resp.headers:
- tty.debug("ignoring page " + url)
- return None, None
-
- if not resp.headers["Content-type"].startswith(accept_content_type):
- tty.debug("ignoring page " + url + " with content type " +
- resp.headers["Content-type"])
- return None, None
+ content_type = resp.headers.get('Content-type')
# Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET"
response = _urlopen(req, timeout=_timeout, context=context)
- response_url = response.geturl()
- # Read the page and and stick it in the map we'll return
- page = response.read().decode('utf-8')
+ if accept_content_type and not is_web_url:
+ content_type = response.headers.get('Content-type')
- return response_url, page
+ reject_content_type = (
+ accept_content_type and (
+ content_type is None or
+ not content_type.startswith(accept_content_type)))
+ if reject_content_type:
+ tty.debug("ignoring page {0}{1}{2}".format(
+ url_util.format(url),
+ " with content type " if content_type is not None else "",
+ content_type or ""))
-def read_from_url(url, accept_content_type=None):
- resp_url, contents = _read_from_url(url, accept_content_type)
- return contents
+ return None, None, None
+
+ return response.geturl(), response.headers, response
+
+
+def warn_no_ssl_cert_checking():
+ tty.warn("Spack will not check SSL certificates. You need to update "
+ "your Python to enable certificate verification.")
+
+
+def push_to_url(local_path, remote_path, **kwargs):
+ keep_original = kwargs.get('keep_original', True)
+
+ local_url = url_util.parse(local_path)
+ local_file_path = url_util.local_file_path(local_url)
+ if local_file_path is None:
+ raise ValueError('local path must be a file:// url')
+
+ remote_url = url_util.parse(remote_path)
+ verify_ssl = spack.config.get('config:verify_ssl')
+
+ if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
+ warn_no_ssl_cert_checking()
+
+ remote_file_path = url_util.local_file_path(remote_url)
+ if remote_file_path is not None:
+ mkdirp(os.path.dirname(remote_file_path))
+ if keep_original:
+ shutil.copy(local_file_path, remote_file_path)
+ else:
+ try:
+ os.rename(local_file_path, remote_file_path)
+ except OSError as e:
+ if e.errno == errno.EXDEV:
+ # NOTE(opadron): The above move failed because it crosses
+ # filesystem boundaries. Copy the file (plus original
+ # metadata), and then delete the original. This operation
+ # needs to be done in separate steps.
+ shutil.copy2(local_file_path, remote_file_path)
+ os.remove(local_file_path)
+
+ elif remote_url.scheme == 's3':
+ extra_args = kwargs.get('extra_args', {})
+
+ remote_path = remote_url.path
+ while remote_path.startswith('/'):
+ remote_path = remote_path[1:]
+
+ s3 = s3_util.create_s3_session(remote_url)
+ s3.upload_file(local_file_path, remote_url.netloc,
+ remote_path, ExtraArgs=extra_args)
+
+ if not keep_original:
+ os.remove(local_file_path)
+
+ else:
+ raise NotImplementedError(
+ 'Unrecognized URL scheme: {SCHEME}'.format(
+ SCHEME=remote_url.scheme))
+
+
+def url_exists(url):
+ url = url_util.parse(url)
+ local_path = url_util.local_file_path(url)
+ if local_path:
+ return os.path.exists(local_path)
+
+ if url.scheme == 's3':
+ s3 = s3_util.create_s3_session(url)
+ from botocore.exceptions import ClientError
+ try:
+ s3.get_object(Bucket=url.netloc, Key=url.path)
+ return True
+ except ClientError as err:
+ if err.response['Error']['Code'] == 'NoSuchKey':
+ return False
+ raise err
+
+ # otherwise, just try to "read" from the URL, and assume that *any*
+ # non-throwing response contains the resource represented by the URL
+ try:
+ read_from_url(url)
+ return True
+ except URLError:
+ return False
+
+
+def remove_url(url):
+ url = url_util.parse(url)
+
+ local_path = url_util.local_file_path(url)
+ if local_path:
+ os.remove(local_path)
+ return
+
+ if url.scheme == 's3':
+ s3 = s3_util.create_s3_session(url)
+ s3.delete_object(Bucket=url.s3_bucket, Key=url.path)
+ return
+
+ # Don't even try for other URL schemes.
+
+
+def _list_s3_objects(client, url, num_entries, start_after=None):
+ list_args = dict(
+ Bucket=url.netloc,
+ Prefix=url.path,
+ MaxKeys=num_entries)
+
+ if start_after is not None:
+ list_args['StartAfter'] = start_after
+
+ result = client.list_objects_v2(**list_args)
+
+ last_key = None
+ if result['IsTruncated']:
+ last_key = result['Contents'][-1]['Key']
+
+ iter = (key for key in
+ (
+ os.path.relpath(entry['Key'], url.path)
+ for entry in result['Contents']
+ )
+ if key != '.')
+
+ return iter, last_key
+
+
+def _iter_s3_prefix(client, url, num_entries=1024):
+ key = None
+ while True:
+ contents, key = _list_s3_objects(
+ client, url, num_entries, start_after=key)
+
+ for x in contents:
+ yield x
+
+ if not key:
+ break
+
+
+def list_url(url):
+ url = url_util.parse(url)
+
+ local_path = url_util.local_file_path(url)
+ if local_path:
+ return os.listdir(local_path)
+
+ if url.scheme == 's3':
+ s3 = s3_util.create_s3_session(url)
+ return list(set(
+ key.split('/', 1)[0]
+ for key in _iter_s3_prefix(s3, url)))
def _spider(url, visited, root, depth, max_depth, raise_on_error):
@@ -154,16 +364,12 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
pages = {} # dict from page URL -> text content.
links = set() # set of all links seen on visited pages.
- # root may end with index.html -- chop that off.
- if root.endswith('/index.html'):
- root = re.sub('/index.html$', '', root)
-
try:
- response_url, page = _read_from_url(url, 'text/html')
-
- if not response_url or not page:
+ response_url, _, response = read_from_url(url, 'text/html')
+ if not response_url or not response:
return pages, links
+ page = codecs.getreader('utf-8')(response).read()
pages[response_url] = page
# Parse out the links in the page
@@ -173,8 +379,10 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
while link_parser.links:
raw_link = link_parser.links.pop()
- abs_link = urljoin(response_url, raw_link.strip())
-
+ abs_link = url_util.join(
+ response_url,
+ raw_link.strip(),
+ resolve_href=True)
links.add(abs_link)
# Skip stuff that looks like an archive
@@ -243,16 +451,28 @@ def _spider_wrapper(args):
return _spider(*args)
-def _urlopen(*args, **kwargs):
+def _urlopen(req, *args, **kwargs):
"""Wrapper for compatibility with old versions of Python."""
- # We don't pass 'context' parameter to urlopen because it
- # was introduces only starting versions 2.7.9 and 3.4.3 of Python.
- if 'context' in kwargs and kwargs['context'] is None:
+ url = req
+ try:
+ url = url.get_full_url()
+ except AttributeError:
+ pass
+
+ # We don't pass 'context' parameter because it was only introduced starting
+ # with versions 2.7.9 and 3.4.3 of Python.
+ if 'context' in kwargs:
del kwargs['context']
- return urlopen(*args, **kwargs)
+ opener = urlopen
+ if url_util.parse(url).scheme == 's3':
+ import spack.s3_handler
+ opener = spack.s3_handler.open
-def spider(root_url, depth=0):
+ return opener(req, *args, **kwargs)
+
+
+def spider(root, depth=0):
"""Gets web pages from a root URL.
If depth is specified (e.g., depth=2), then this will also follow
@@ -262,7 +482,9 @@ def spider(root_url, depth=0):
performance over a sequential fetch.
"""
- pages, links = _spider(root_url, set(), root_url, 0, depth, False)
+
+ root = url_util.parse(root)
+ pages, links = _spider(root, set(), root, 0, depth, False)
return pages, links
@@ -356,99 +578,112 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
return versions
-def get_checksums_for_versions(
- url_dict, name, first_stage_function=None, keep_stage=False):
- """Fetches and checksums archives from URLs.
-
- This function is called by both ``spack checksum`` and ``spack
- create``. The ``first_stage_function`` argument allows the caller to
- inspect the first downloaded archive, e.g., to determine the build
- system.
-
- Args:
- url_dict (dict): A dictionary of the form: version -> URL
- name (str): The name of the package
- first_stage_function (callable): function that takes a Stage and a URL;
- this is run on the stage of the first URL downloaded
- keep_stage (bool): whether to keep staging area when command completes
-
- Returns:
- (str): A multi-line string containing versions and corresponding hashes
-
+def standardize_header_names(headers):
+ """Replace certain header names with standardized spellings.
+
+ Standardizes the spellings of the following header names:
+ - Accept-ranges
+ - Content-length
+ - Content-type
+ - Date
+ - Last-modified
+ - Server
+
+ Every name considered is translated to one of the above names if the only
+ difference between the two is how the first letters of each word are
+ capitalized; whether words are separated; or, if separated, whether they
+ are so by a dash (-), underscore (_), or space ( ). Header names that
+ cannot be mapped as described above are returned unaltered.
+
+ For example: The standard spelling of "Content-length" would be substituted
+ for any of the following names:
+ - Content-length
+ - content_length
+ - contentlength
+ - content_Length
+ - contentLength
+ - content Length
+
+ ... and any other header name, such as "Content-encoding", would not be
+ altered, regardless of spelling.
+
+ If headers is a string, then it (or an appropriate substitute) is returned.
+
+ If headers is a non-empty tuple, headers[0] is a string, and there exists a
+ standardized spelling for header[0] that differs from it, then a new tuple
+ is returned. This tuple has the same elements as headers, except the first
+ element is the standardized spelling for headers[0].
+
+ If headers is a sequence, then a new list is considered, where each element
+ is its corresponding element in headers, but mapped as above if a string or
+ tuple. This new list is returned if at least one of its elements differ
+ from their corrsponding element in headers.
+
+ If headers is a mapping, then a new dict is considered, where the key in
+ each item is the key of its corresponding item in headers, mapped as above
+ if a string or tuple. The value is taken from the corresponding item. If
+ the keys of multiple items in headers map to the same key after being
+ standardized, then the value for the resulting item is undefined. The new
+ dict is returned if at least one of its items has a key that differs from
+ that of their corresponding item in headers, or if the keys of multiple
+ items in headers map to the same key after being standardized.
+
+ In all other cases headers is returned unaltered.
"""
- sorted_versions = sorted(url_dict.keys(), reverse=True)
+ if isinstance(headers, six.string_types):
+ for standardized_spelling, other_spellings in (
+ HTTP_HEADER_NAME_ALIASES.items()):
+ if headers in other_spellings:
+ if headers == standardized_spelling:
+ return headers
+ return standardized_spelling
+ return headers
+
+ if isinstance(headers, tuple):
+ if not headers:
+ return headers
+ old = headers[0]
+ if isinstance(old, six.string_types):
+ new = standardize_header_names(old)
+ if old is not new:
+ return (new,) + headers[1:]
+ return headers
- # Find length of longest string in the list for padding
- max_len = max(len(str(v)) for v in sorted_versions)
- num_ver = len(sorted_versions)
+ try:
+ changed = False
+ new_dict = {}
+ for key, value in headers.items():
+ if isinstance(key, (tuple, six.string_types)):
+ old_key, key = key, standardize_header_names(key)
+ changed = changed or key is not old_key
- tty.msg("Found {0} version{1} of {2}:".format(
- num_ver, '' if num_ver == 1 else 's', name),
- "",
- *spack.cmd.elide_list(
- ["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
- for v in sorted_versions]))
- print()
+ new_dict[key] = value
+
+ return new_dict if changed else headers
+ except (AttributeError, TypeError, ValueError):
+ pass
- archives_to_fetch = tty.get_number(
- "How many would you like to checksum?", default=1, abort='q')
+ try:
+ changed = False
+ new_list = []
+ for item in headers:
+ if isinstance(item, (tuple, six.string_types)):
+ old_item, item = item, standardize_header_names(item)
+ changed = changed or item is not old_item
- if not archives_to_fetch:
- tty.die("Aborted.")
+ new_list.append(item)
- versions = sorted_versions[:archives_to_fetch]
- urls = [url_dict[v] for v in versions]
+ return new_list if changed else headers
+ except TypeError:
+ pass
- tty.msg("Downloading...")
- version_hashes = []
- i = 0
- for url, version in zip(urls, versions):
- try:
- with spack.stage.Stage(url, keep=keep_stage) as stage:
- # Fetch the archive
- stage.fetch()
- if i == 0 and first_stage_function:
- # Only run first_stage_function the first time,
- # no need to run it every time
- first_stage_function(stage, url)
-
- # Checksum the archive and add it to the list
- version_hashes.append((version, spack.util.crypto.checksum(
- hashlib.sha256, stage.archive_file)))
- i += 1
- except spack.stage.FailedDownloadError:
- tty.msg("Failed to fetch {0}".format(url))
- except Exception as e:
- tty.msg("Something failed on {0}, skipping.".format(url),
- " ({0})".format(e))
-
- if not version_hashes:
- tty.die("Could not fetch any versions for {0}".format(name))
-
- # Find length of longest string in the list for padding
- max_len = max(len(str(v)) for v, h in version_hashes)
-
- # Generate the version directives to put in a package.py
- version_lines = "\n".join([
- " version('{0}', {1}sha256='{2}')".format(
- v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
- ])
-
- num_hash = len(version_hashes)
- tty.msg("Checksummed {0} version{1} of {2}".format(
- num_hash, '' if num_hash == 1 else 's', name))
-
- return version_lines
+ return headers
class SpackWebError(spack.error.SpackError):
"""Superclass for Spack web spidering errors."""
-class VersionFetchError(SpackWebError):
- """Raised when we can't determine a URL to fetch a package."""
-
-
class NoNetworkConnectionError(SpackWebError):
"""Raised when an operation can't get an internet connection."""
def __init__(self, message, url):