From 497fddfcb9fdcebac47b56c6d856110b9150b5ea Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Fri, 20 Dec 2019 23:32:18 +0100 Subject: Fetching from URLs falls back to mirrors if they exist (#13881) Users can now list mirrors of the main url in packages. - [x] Instead of just a single `url` attribute, users can provide a list (`urls`) in the package, and these will be tried by in order by the fetch strategy. - [x] To handle one of the most common mirror cases, define a `GNUMirrorPackage` mixin to handle all the standard GNU mirrors. GNU packages can set `gnu_mirror_path` to define the path within a mirror, and the mixin handles setting up all the requisite GNU mirror URLs. - [x] update all GNU packages in `builtin` to use the `GNUMirrorPackage` mixin. --- lib/spack/docs/packaging_guide.rst | 28 ++++++++ lib/spack/spack/build_systems/gnu.py | 37 ++++++++++ lib/spack/spack/cmd/url.py | 4 +- lib/spack/spack/fetch_strategy.py | 133 ++++++++++++++++++----------------- lib/spack/spack/package.py | 14 +++- lib/spack/spack/pkgkit.py | 1 + lib/spack/spack/stage.py | 4 +- lib/spack/spack/test/conftest.py | 3 - lib/spack/spack/test/url_fetch.py | 59 +++++++++++++--- 9 files changed, 198 insertions(+), 85 deletions(-) create mode 100644 lib/spack/spack/build_systems/gnu.py (limited to 'lib') diff --git a/lib/spack/docs/packaging_guide.rst b/lib/spack/docs/packaging_guide.rst index f6b87e1ce0..f3b9295f61 100644 --- a/lib/spack/docs/packaging_guide.rst +++ b/lib/spack/docs/packaging_guide.rst @@ -553,6 +553,34 @@ version. This is useful for packages that have an easy to extrapolate URL, but keep changing their URL format every few releases. With this method, you only need to specify the ``url`` when the URL changes. +""""""""""""""""""""""" +Mirrors of the main URL +""""""""""""""""""""""" + +Spack supports listing mirrors of the main URL in a package by defining +the ``urls`` attribute: + +.. code-block:: python + + class Foo(Package): + + urls = [ + 'http://example.com/foo-1.0.tar.gz', + 'http://mirror.com/foo-1.0.tar.gz' + ] + +instead of just a single ``url``. This attribute is a list of possible URLs that +will be tried in order when fetching packages. Notice that either one of ``url`` +or ``urls`` can be present in a package, but not both at the same time. + +A well-known case of packages that can be fetched from multiple mirrors is that +of GNU. For that, Spack goes a step further and defines a mixin class that +takes care of all of the plumbing and requires packagers to just define a proper +``gnu_mirror_path`` attribute: + +.. literalinclude:: _spack_root/var/spack/repos/builtin/packages/autoconf/package.py + :lines: 9-18 + ^^^^^^^^^^^^^^^^^^^^^^^^ Skipping the expand step ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/lib/spack/spack/build_systems/gnu.py b/lib/spack/spack/build_systems/gnu.py new file mode 100644 index 0000000000..0fe6f5f780 --- /dev/null +++ b/lib/spack/spack/build_systems/gnu.py @@ -0,0 +1,37 @@ +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import os.path + +import spack.package + + +class GNUMirrorPackage(spack.package.PackageBase): + """Mixin that takes care of setting url and mirrors for GNU packages.""" + #: Path of the package in a GNU mirror + gnu_mirror_path = None + + #: List of GNU mirrors used by Spack + base_mirrors = [ + 'https://ftp.gnu.org/gnu', + 'https://ftpmirror.gnu.org/', + # Fall back to http if https didn't work (for instance because + # Spack is bootstrapping curl) + 'http://ftpmirror.gnu.org/' + ] + + @property + def urls(self): + self._ensure_gnu_mirror_path_is_set_or_raise() + return [ + os.path.join(m, self.gnu_mirror_path) for m in self.base_mirrors + ] + + def _ensure_gnu_mirror_path_is_set_or_raise(self): + if self.gnu_mirror_path is None: + cls_name = type(self).__name__ + msg = ('{0} must define a `gnu_mirror_path` attribute' + ' [none defined]') + raise AttributeError(msg.format(cls_name)) diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py index 3101f28f08..0c105c65c3 100644 --- a/lib/spack/spack/cmd/url.py +++ b/lib/spack/spack/cmd/url.py @@ -135,7 +135,7 @@ def url_list(args): # Gather set of URLs from all packages for pkg in spack.repo.path.all_packages(): - url = getattr(pkg.__class__, 'url', None) + url = getattr(pkg, 'url', None) urls = url_list_parsing(args, urls, url, pkg) for params in pkg.versions.values(): @@ -174,7 +174,7 @@ def url_summary(args): for pkg in spack.repo.path.all_packages(): urls = set() - url = getattr(pkg.__class__, 'url', None) + url = getattr(pkg, 'url', None) if url: urls.add(url) diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 1f94973f22..f1ea0d35b6 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -22,33 +22,30 @@ in order to build it. They need to define the following methods: * archive() Archive a source directory, e.g. for creating a mirror. """ +import copy +import functools import os import os.path -import sys import re import shutil -import copy +import sys import xml.etree.ElementTree -from functools import wraps -from six import string_types, with_metaclass -import six.moves.urllib.parse as urllib_parse import llnl.util.tty as tty -from llnl.util.filesystem import ( - working_dir, mkdirp, temp_rename, temp_cwd, get_single_file) - +import six +import six.moves.urllib.parse as urllib_parse import spack.config import spack.error import spack.util.crypto as crypto import spack.util.pattern as pattern -import spack.util.web as web_util import spack.util.url as url_util - +import spack.util.web as web_util +from llnl.util.filesystem import ( + working_dir, mkdirp, temp_rename, temp_cwd, get_single_file) +from spack.util.compression import decompressor_for, extension from spack.util.executable import which from spack.util.string import comma_and, quote from spack.version import Version, ver -from spack.util.compression import decompressor_for, extension - #: List of all fetch strategies, created by FetchStrategy metaclass. all_strategies = [] @@ -69,7 +66,7 @@ def _needs_stage(fun): """Many methods on fetch strategies require a stage to be set using set_stage(). This decorator adds a check for self.stage.""" - @wraps(fun) + @functools.wraps(fun) def wrapper(self, *args, **kwargs): if not self.stage: raise NoStageError(fun) @@ -85,18 +82,14 @@ def _ensure_one_stage_entry(stage_path): return os.path.join(stage_path, stage_entries[0]) -class FSMeta(type): - """This metaclass registers all fetch strategies in a list.""" - def __init__(cls, name, bases, dict): - type.__init__(cls, name, bases, dict) - if cls.enabled: - all_strategies.append(cls) +def fetcher(cls): + """Decorator used to register fetch strategies.""" + all_strategies.append(cls) + return cls -class FetchStrategy(with_metaclass(FSMeta, object)): +class FetchStrategy(object): """Superclass of all fetch strategies.""" - enabled = False # Non-abstract subclasses should be enabled. - #: The URL attribute must be specified either at the package class #: level, or as a keyword argument to ``version()``. It is used to #: distinguish fetchers for different versions in the package DSL. @@ -113,16 +106,7 @@ class FetchStrategy(with_metaclass(FSMeta, object)): self.stage = None # Enable or disable caching for this strategy based on # 'no_cache' option from version directive. - self._cache_enabled = not kwargs.pop('no_cache', False) - - def set_stage(self, stage): - """This is called by Stage before any of the fetching - methods are called on the stage.""" - self.stage = stage - - @property - def cache_enabled(self): - return self._cache_enabled + self.cache_enabled = not kwargs.pop('no_cache', False) # Subclasses need to implement these methods def fetch(self): @@ -186,13 +170,18 @@ class FetchStrategy(with_metaclass(FSMeta, object)): def __str__(self): # Should be human readable URL. return "FetchStrategy.__str___" - # This method is used to match fetch strategies to version() - # arguments in packages. @classmethod def matches(cls, args): + """Predicate that matches fetch strategies to arguments of + the version directive. + + Args: + args: arguments of the version directive + """ return cls.url_attr in args +@fetcher class BundleFetchStrategy(FetchStrategy): """ Fetch strategy associated with bundle, or no-code, packages. @@ -204,9 +193,6 @@ class BundleFetchStrategy(FetchStrategy): TODO: Remove this class by refactoring resource handling and the link between composite stages and composite fetch strategies (see #11981). """ - #: This is a concrete fetch strategy for no-code packages. - enabled = True - #: There is no associated URL keyword in ``version()`` for no-code #: packages but this property is required for some strategy-related #: functions (e.g., check_pkg_attributes). @@ -236,7 +222,6 @@ class FetchStrategyComposite(object): Implements the GoF composite pattern. """ matches = FetchStrategy.matches - set_stage = FetchStrategy.set_stage def source_id(self): component_ids = tuple(i.source_id() for i in self) @@ -244,13 +229,13 @@ class FetchStrategyComposite(object): return component_ids +@fetcher class URLFetchStrategy(FetchStrategy): + """URLFetchStrategy pulls source code from a URL for an archive, check the + archive against a checksum, and decompresses the archive. + + The destination for the resulting file(s) is the standard stage path. """ - FetchStrategy that pulls source code from a URL for an archive, check the - archive against a checksum, and decompresses the archive. The destination - for the resulting file(s) is the standard stage source path. - """ - enabled = True url_attr = 'url' # these are checksum types. The generic 'checksum' is deprecated for @@ -262,6 +247,7 @@ class URLFetchStrategy(FetchStrategy): # Prefer values in kwargs to the positionals. self.url = kwargs.get('url', url) + self.mirrors = kwargs.get('mirrors', []) # digest can be set as the first argument, or from an explicit # kwarg by the hash name. @@ -297,20 +283,36 @@ class URLFetchStrategy(FetchStrategy): return os.path.sep.join( ['archive', self.digest[:2], self.digest]) + @property + def candidate_urls(self): + return [self.url] + (self.mirrors or []) + @_needs_stage def fetch(self): if self.archive_file: tty.msg("Already downloaded %s" % self.archive_file) return + for url in self.candidate_urls: + try: + partial_file, save_file = self._fetch_from_url(url) + if save_file: + os.rename(partial_file, save_file) + break + except FetchError as e: + tty.msg(str(e)) + pass + + if not self.archive_file: + raise FailedDownloadError(self.url) + + def _fetch_from_url(self, url): save_file = None partial_file = None if self.stage.save_filename: save_file = self.stage.save_filename partial_file = self.stage.save_filename + '.part' - - tty.msg("Fetching %s" % self.url) - + tty.msg("Fetching %s" % url) if partial_file: save_args = ['-C', '-', # continue partial downloads @@ -324,7 +326,9 @@ class URLFetchStrategy(FetchStrategy): '-D', '-', # print out HTML headers '-L', # resolve 3xx redirects - self.url, + # Timeout if can't establish a connection after 10 sec. + '--connect-timeout', '10', + url, ] if not spack.config.get('config:verify_ssl'): @@ -380,12 +384,7 @@ class URLFetchStrategy(FetchStrategy): flags=re.IGNORECASE) if content_types and 'text/html' in content_types[-1]: warn_content_type_mismatch(self.archive_file or "the archive") - - if save_file: - os.rename(partial_file, save_file) - - if not self.archive_file: - raise FailedDownloadError(self.url) + return partial_file, save_file @property @_needs_stage @@ -395,7 +394,7 @@ class URLFetchStrategy(FetchStrategy): @property def cachable(self): - return self._cache_enabled and bool(self.digest) + return self.cache_enabled and bool(self.digest) @_needs_stage def expand(self): @@ -522,6 +521,7 @@ class URLFetchStrategy(FetchStrategy): return "[no url]" +@fetcher class CacheURLFetchStrategy(URLFetchStrategy): """The resource associated with a cache URL may be out of date.""" @@ -597,7 +597,7 @@ class VCSFetchStrategy(FetchStrategy): patterns = kwargs.get('exclude', None) if patterns is not None: - if isinstance(patterns, string_types): + if isinstance(patterns, six.string_types): patterns = [patterns] for p in patterns: tar.add_default_arg('--exclude=%s' % p) @@ -621,6 +621,7 @@ class VCSFetchStrategy(FetchStrategy): return "%s<%s>" % (self.__class__, self.url) +@fetcher class GoFetchStrategy(VCSFetchStrategy): """Fetch strategy that employs the `go get` infrastructure. @@ -634,7 +635,6 @@ class GoFetchStrategy(VCSFetchStrategy): The fetched source will be moved to the standard stage sourcepath directory during the expand step. """ - enabled = True url_attr = 'go' def __init__(self, **kwargs): @@ -691,6 +691,7 @@ class GoFetchStrategy(VCSFetchStrategy): return "[go] %s" % self.url +@fetcher class GitFetchStrategy(VCSFetchStrategy): """ @@ -712,7 +713,6 @@ class GitFetchStrategy(VCSFetchStrategy): Repositories are cloned into the standard stage source path directory. """ - enabled = True url_attr = 'git' optional_attrs = ['tag', 'branch', 'commit', 'submodules', 'get_full_repo'] @@ -746,7 +746,7 @@ class GitFetchStrategy(VCSFetchStrategy): @property def cachable(self): - return self._cache_enabled and bool(self.commit or self.tag) + return self.cache_enabled and bool(self.commit or self.tag) def source_id(self): return self.commit or self.tag @@ -892,6 +892,7 @@ class GitFetchStrategy(VCSFetchStrategy): return '[git] {0}'.format(self._repo_info()) +@fetcher class SvnFetchStrategy(VCSFetchStrategy): """Fetch strategy that gets source code from a subversion repository. @@ -906,7 +907,6 @@ class SvnFetchStrategy(VCSFetchStrategy): Repositories are checked out into the standard stage source path directory. """ - enabled = True url_attr = 'svn' optional_attrs = ['revision'] @@ -929,7 +929,7 @@ class SvnFetchStrategy(VCSFetchStrategy): @property def cachable(self): - return self._cache_enabled and bool(self.revision) + return self.cache_enabled and bool(self.revision) def source_id(self): return self.revision @@ -991,6 +991,7 @@ class SvnFetchStrategy(VCSFetchStrategy): return "[svn] %s" % self.url +@fetcher class HgFetchStrategy(VCSFetchStrategy): """ @@ -1013,7 +1014,6 @@ class HgFetchStrategy(VCSFetchStrategy): Repositories are cloned into the standard stage source path directory. """ - enabled = True url_attr = 'hg' optional_attrs = ['revision'] @@ -1043,7 +1043,7 @@ class HgFetchStrategy(VCSFetchStrategy): @property def cachable(self): - return self._cache_enabled and bool(self.revision) + return self.cache_enabled and bool(self.revision) def source_id(self): return self.revision @@ -1108,9 +1108,9 @@ class HgFetchStrategy(VCSFetchStrategy): return "[hg] %s" % self.url +@fetcher class S3FetchStrategy(URLFetchStrategy): """FetchStrategy that pulls from an S3 bucket.""" - enabled = True url_attr = 's3' def __init__(self, *args, **kwargs): @@ -1244,10 +1244,15 @@ def _from_merged_attrs(fetcher, pkg, version): """Create a fetcher from merged package and version attributes.""" if fetcher.url_attr == 'url': url = pkg.url_for_version(version) + # TODO: refactor this logic into its own method or function + # TODO: to avoid duplication + mirrors = [spack.url.substitute_version(u, version) + for u in getattr(pkg, 'urls', [])] + attrs = {fetcher.url_attr: url, 'mirrors': mirrors} else: url = getattr(pkg, fetcher.url_attr) + attrs = {fetcher.url_attr: url} - attrs = {fetcher.url_attr: url} attrs.update(pkg.versions[version]) return fetcher(**attrs) diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 5128a1e17f..f48f296548 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -510,8 +510,8 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): maintainers = [] #: List of attributes to be excluded from a package's hash. - metadata_attrs = ['homepage', 'url', 'list_url', 'extendable', 'parallel', - 'make_jobs'] + metadata_attrs = ['homepage', 'url', 'urls', 'list_url', 'extendable', + 'parallel', 'make_jobs'] def __init__(self, spec): # this determines how the package should be built. @@ -524,6 +524,12 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): # a binary cache. self.installed_from_binary_cache = False + # Ensure that only one of these two attributes are present + if getattr(self, 'url', None) and getattr(self, 'urls', None): + msg = "a package can have either a 'url' or a 'urls' attribute" + msg += " [package '{0.name}' defines both]" + raise ValueError(msg.format(self)) + # Set a default list URL (place to find available versions) if not hasattr(self, 'list_url'): self.list_url = None @@ -750,7 +756,9 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): return version_urls[version] # If no specific URL, use the default, class-level URL - default_url = getattr(self, 'url', None) + url = getattr(self, 'url', None) + urls = getattr(self, 'urls', [None]) + default_url = url or urls.pop(0) # if no exact match AND no class-level default, use the nearest URL if not default_url: diff --git a/lib/spack/spack/pkgkit.py b/lib/spack/spack/pkgkit.py index 7ad7279e73..2ed16cff0a 100644 --- a/lib/spack/spack/pkgkit.py +++ b/lib/spack/spack/pkgkit.py @@ -30,6 +30,7 @@ from spack.build_systems.perl import PerlPackage from spack.build_systems.intel import IntelPackage from spack.build_systems.meson import MesonPackage from spack.build_systems.sip import SIPPackage +from spack.build_systems.gnu import GNUMirrorPackage from spack.mixins import filter_compiler_wrappers diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 7f49305231..6f98edc674 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -271,7 +271,7 @@ class Stage(object): else: raise ValueError( "Can't construct Stage without url or fetch strategy") - self.fetcher.set_stage(self) + self.fetcher.stage = self # self.fetcher can change with mirrors. self.default_fetcher = self.fetcher self.search_fn = search_fn @@ -458,7 +458,7 @@ class Stage(object): for fetcher in generate_fetchers(): try: - fetcher.set_stage(self) + fetcher.stage = self self.fetcher = fetcher self.fetcher.fetch() break diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index 969e2471e4..3ac4d893af 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -240,9 +240,6 @@ def mock_fetch_cache(monkeypatch): return MockCacheFetcher() class MockCacheFetcher(object): - def set_stage(self, stage): - pass - def fetch(self): raise FetchError('Mock cache always fails for tests') diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py index 8047d5e26e..b4df27336e 100644 --- a/lib/spack/spack/test/url_fetch.py +++ b/lib/spack/spack/test/url_fetch.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import collections import os import pytest @@ -10,8 +11,7 @@ from llnl.util.filesystem import working_dir, is_exe import spack.repo import spack.config -from spack.fetch_strategy import FailedDownloadError -from spack.fetch_strategy import from_list_url, URLFetchStrategy +import spack.fetch_strategy as fs from spack.spec import Spec from spack.stage import Stage from spack.version import ver @@ -23,10 +23,30 @@ def checksum_type(request): return request.param +@pytest.fixture +def pkg_factory(): + Pkg = collections.namedtuple( + 'Pkg', ['url_for_version', 'urls', 'url', 'versions'] + ) + + def factory(url, urls): + + def fn(v): + main_url = url or urls.pop(0) + return spack.url.substitute_version(main_url, v) + + return Pkg( + url_for_version=fn, url=url, urls=urls, + versions=collections.defaultdict(dict) + ) + + return factory + + def test_urlfetchstrategy_sans_url(): """Ensure constructor with no URL fails.""" with pytest.raises(ValueError): - with URLFetchStrategy(None): + with fs.URLFetchStrategy(None): pass @@ -34,8 +54,8 @@ def test_urlfetchstrategy_bad_url(tmpdir): """Ensure fetch with bad URL fails as expected.""" testpath = str(tmpdir) - with pytest.raises(FailedDownloadError): - fetcher = URLFetchStrategy(url='file:///does-not-exist') + with pytest.raises(fs.FailedDownloadError): + fetcher = fs.URLFetchStrategy(url='file:///does-not-exist') assert fetcher is not None with Stage(fetcher, path=testpath) as stage: @@ -106,8 +126,8 @@ def test_from_list_url(mock_packages, config, spec, url, digest): """ specification = Spec(spec).concretized() pkg = spack.repo.get(specification) - fetch_strategy = from_list_url(pkg) - assert isinstance(fetch_strategy, URLFetchStrategy) + fetch_strategy = fs.from_list_url(pkg) + assert isinstance(fetch_strategy, fs.URLFetchStrategy) assert os.path.basename(fetch_strategy.url) == url assert fetch_strategy.digest == digest @@ -118,8 +138,8 @@ def test_from_list_url_unspecified(mock_packages, config): spec = Spec('url-list-test @2.0.0').concretized() pkg = spack.repo.get(spec) - fetch_strategy = from_list_url(pkg) - assert isinstance(fetch_strategy, URLFetchStrategy) + fetch_strategy = fs.from_list_url(pkg) + assert isinstance(fetch_strategy, fs.URLFetchStrategy) assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz' assert fetch_strategy.digest is None @@ -128,7 +148,7 @@ def test_nosource_from_list_url(mock_packages, config): """This test confirms BundlePackages do not have list url.""" pkg = spack.repo.get('nosource') - fetch_strategy = from_list_url(pkg) + fetch_strategy = fs.from_list_url(pkg) assert fetch_strategy is None @@ -148,9 +168,26 @@ def test_url_extra_fetch(tmpdir, mock_archive): """Ensure a fetch after downloading is effectively a no-op.""" testpath = str(tmpdir) - fetcher = URLFetchStrategy(mock_archive.url) + fetcher = fs.URLFetchStrategy(mock_archive.url) with Stage(fetcher, path=testpath) as stage: assert fetcher.archive_file is None stage.fetch() assert fetcher.archive_file is not None fetcher.fetch() + + +@pytest.mark.parametrize('url,urls,version,expected', [ + (None, + ['https://ftpmirror.gnu.org/autoconf/autoconf-2.69.tar.gz', + 'https://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz'], + '2.62', + ['https://ftpmirror.gnu.org/autoconf/autoconf-2.62.tar.gz', + 'https://ftp.gnu.org/gnu/autoconf/autoconf-2.62.tar.gz']) +]) +def test_candidate_urls(pkg_factory, url, urls, version, expected): + """Tests that candidate urls include mirrors and that they go through + pattern matching and substitution for versions. + """ + pkg = pkg_factory(url, urls) + f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version) + assert f.candidate_urls == expected -- cgit v1.2.3-60-g2f50