From 3894ceebc93ed05df8cedce70967a97c77ca40cf Mon Sep 17 00:00:00 2001 From: Tamara Dahlgren <35777542+tldahlgren@users.noreply.github.com> Date: Tue, 30 Aug 2022 11:13:23 -0700 Subject: Environments: Add support for include URLs (#29026) * Preliminary support for include URLs in spack.yaml (environment) files This commit adds support in environments for external configuration files obtained from a URL with a preference for grabbing raw text from GitHub and gitlab for efficient downloads of the relevant files. The URL can also be a link to a directory that contains multiple configuration files. Remote configuration files are retrieved and cached for the environment. Configuration files with the same name will not be overwritten once cached. --- lib/spack/docs/conf.py | 1 + lib/spack/docs/environments.rst | 19 ++- lib/spack/spack/binary_distribution.py | 7 +- lib/spack/spack/cmd/buildcache.py | 3 +- lib/spack/spack/config.py | 97 ++++++++++++- lib/spack/spack/environment/environment.py | 53 ++++++- lib/spack/spack/fetch_strategy.py | 129 +++++------------ lib/spack/spack/package_base.py | 8 +- lib/spack/spack/stage.py | 3 +- lib/spack/spack/test/bootstrap.py | 2 +- lib/spack/spack/test/cmd/env.py | 168 ++++++++++++++-------- lib/spack/spack/test/cmd/spec.py | 2 +- lib/spack/spack/test/config.py | 172 +++++++++++++++++++++- lib/spack/spack/test/conftest.py | 83 ++++++++++- lib/spack/spack/test/gcs_fetch.py | 3 +- lib/spack/spack/test/packaging.py | 2 +- lib/spack/spack/test/s3_fetch.py | 3 +- lib/spack/spack/test/stage.py | 5 +- lib/spack/spack/test/url_fetch.py | 82 ++++++++++- lib/spack/spack/util/path.py | 9 +- lib/spack/spack/util/url.py | 68 ++++++--- lib/spack/spack/util/web.py | 221 +++++++++++++++++++++++++++-- 22 files changed, 919 insertions(+), 221 deletions(-) diff --git a/lib/spack/docs/conf.py b/lib/spack/docs/conf.py index e139ac7774..1108f93ff9 100644 --- a/lib/spack/docs/conf.py +++ b/lib/spack/docs/conf.py @@ -201,6 +201,7 @@ nitpick_ignore = [ ("py:class", "unittest.case.TestCase"), ("py:class", "_frozen_importlib_external.SourceFileLoader"), ("py:class", "clingo.Control"), + ("py:class", "six.moves.urllib.parse.ParseResult"), # Spack classes that are private and we don't want to expose ("py:class", "spack.provider_index._IndexBase"), ("py:class", "spack.repo._PrependFileLoader"), diff --git a/lib/spack/docs/environments.rst b/lib/spack/docs/environments.rst index 036a33b19f..9be2ab4684 100644 --- a/lib/spack/docs/environments.rst +++ b/lib/spack/docs/environments.rst @@ -478,14 +478,21 @@ them to the Environment. spack: include: - relative/path/to/config.yaml + - https://github.com/path/to/raw/config/compilers.yaml - /absolute/path/to/packages.yaml -Environments can include files with either relative or absolute -paths. Inline configurations take precedence over included -configurations, so you don't have to change shared configuration files -to make small changes to an individual Environment. Included configs -listed earlier will have higher precedence, as the included configs are -applied in reverse order. +Environments can include files or URLs. File paths can be relative or +absolute. URLs include the path to the text for individual files or +can be the path to a directory containing configuration files. + +^^^^^^^^^^^^^^^^^^^^^^^^ +Configuration precedence +^^^^^^^^^^^^^^^^^^^^^^^^ + +Inline configurations take precedence over included configurations, so +you don't have to change shared configuration files to make small changes +to an individual environment. Included configurations listed earlier will +have higher precedence, as the included configs are applied in reverse order. ------------------------------- Manually Editing the Specs List diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 88e3f2740e..e51d7d4842 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -26,7 +26,6 @@ from llnl.util.filesystem import mkdirp import spack.cmd import spack.config as config import spack.database as spack_db -import spack.fetch_strategy as fs import spack.hooks import spack.hooks.sbang import spack.mirror @@ -1231,7 +1230,7 @@ def try_fetch(url_to_fetch): try: stage.fetch() - except fs.FetchError: + except web_util.FetchError: stage.destroy() return None @@ -1954,7 +1953,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None): if not os.path.exists(stage.save_filename): try: stage.fetch() - except fs.FetchError: + except web_util.FetchError: continue tty.debug("Found key {0}".format(fingerprint)) @@ -2106,7 +2105,7 @@ def _download_buildcache_entry(mirror_root, descriptions): try: stage.fetch() break - except fs.FetchError as e: + except web_util.FetchError as e: tty.debug(e) else: if fail_if_missing: diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py index 1da0a51bc9..4c1e4d4837 100644 --- a/lib/spack/spack/cmd/buildcache.py +++ b/lib/spack/spack/cmd/buildcache.py @@ -15,7 +15,6 @@ import spack.cmd import spack.cmd.common.arguments as arguments import spack.config import spack.environment as ev -import spack.fetch_strategy as fs import spack.hash_types as ht import spack.mirror import spack.relocate @@ -711,7 +710,7 @@ def sync_fn(args): stage.create() stage.fetch() web_util.push_to_url(local_path, dest_url, keep_original=True) - except fs.FetchError as e: + except web_util.FetchError as e: tty.debug("spack buildcache unable to sync {0}".format(rel_path)) tty.debug(e) finally: diff --git a/lib/spack/spack/config.py b/lib/spack/spack/config.py index 00d1abc8b1..ea896f27c5 100644 --- a/lib/spack/spack/config.py +++ b/lib/spack/spack/config.py @@ -64,6 +64,7 @@ import spack.schema.upstreams # Hacked yaml for configuration files preserves line numbers. import spack.util.spack_yaml as syaml +import spack.util.web as web_util from spack.error import SpackError from spack.util.cpus import cpus_available @@ -994,18 +995,19 @@ def read_config_file(filename, schema=None): # schema when it's not necessary) while allowing us to validate against a # known schema when the top-level key could be incorrect. - # Ignore nonexisting files. if not os.path.exists(filename): + # Ignore nonexistent files. + tty.debug("Skipping nonexistent config path {0}".format(filename)) return None elif not os.path.isfile(filename): raise ConfigFileError("Invalid configuration. %s exists but is not a file." % filename) elif not os.access(filename, os.R_OK): - raise ConfigFileError("Config file is not readable: %s" % filename) + raise ConfigFileError("Config file is not readable: {0}".format(filename)) try: - tty.debug("Reading config file %s" % filename) + tty.debug("Reading config from file {0}".format(filename)) with open(filename) as f: data = syaml.load_config(f) @@ -1304,6 +1306,95 @@ def _config_from(scopes_or_paths): return configuration +def raw_github_gitlab_url(url): + """Transform a github URL to the raw form to avoid undesirable html. + + Args: + url: url to be converted to raw form + + Returns: (str) raw github/gitlab url or the original url + """ + # Note we rely on GitHub to redirect the 'raw' URL returned here to the + # actual URL under https://raw.githubusercontent.com/ with '/blob' + # removed and or, '/blame' if needed. + if "github" in url or "gitlab" in url: + return url.replace("/blob/", "/raw/") + + return url + + +def collect_urls(base_url): + """Return a list of configuration URLs. + + Arguments: + base_url (str): URL for a configuration (yaml) file or a directory + containing yaml file(s) + + Returns: (list) list of configuration file(s) or empty list if none + """ + if not base_url: + return [] + + extension = ".yaml" + + if base_url.endswith(extension): + return [base_url] + + # Collect configuration URLs if the base_url is a "directory". + _, links = web_util.spider(base_url, 0) + return [link for link in links if link.endswith(extension)] + + +def fetch_remote_configs(url, dest_dir, skip_existing=True): + """Retrieve configuration file(s) at the specified URL. + + Arguments: + url (str): URL for a configuration (yaml) file or a directory containing + yaml file(s) + dest_dir (str): destination directory + skip_existing (bool): Skip files that already exist in dest_dir if + ``True``; otherwise, replace those files + + Returns: (str) path to the corresponding file if URL is or contains a + single file and it is the only file in the destination directory or + the root (dest_dir) directory if multiple configuration files exist + or are retrieved. + """ + + def _fetch_file(url): + raw = raw_github_gitlab_url(url) + tty.debug("Reading config from url {0}".format(raw)) + return web_util.fetch_url_text(raw, dest_dir=dest_dir) + + if not url: + raise ConfigFileError("Cannot retrieve configuration without a URL") + + # Return the local path to the cached configuration file OR to the + # directory containing the cached configuration files. + config_links = collect_urls(url) + existing_files = os.listdir(dest_dir) if os.path.isdir(dest_dir) else [] + + paths = [] + for config_url in config_links: + basename = os.path.basename(config_url) + if skip_existing and basename in existing_files: + tty.warn( + "Will not fetch configuration from {0} since a version already" + "exists in {1}".format(config_url, dest_dir) + ) + path = os.path.join(dest_dir, basename) + else: + path = _fetch_file(config_url) + + if path: + paths.append(path) + + if paths: + return dest_dir if len(paths) > 1 else paths[0] + + raise ConfigFileError("Cannot retrieve configuration (yaml) from {0}".format(url)) + + class ConfigError(SpackError): """Superclass for all Spack config related errors.""" diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py index a0869cf533..3df8e4f1df 100644 --- a/lib/spack/spack/environment/environment.py +++ b/lib/spack/spack/environment/environment.py @@ -17,7 +17,6 @@ import six import llnl.util.filesystem as fs import llnl.util.tty as tty -from llnl.util.filesystem import rename from llnl.util.lang import dedupe from llnl.util.symlink import symlink @@ -593,7 +592,7 @@ class ViewDescriptor(object): symlink(new_root, tmp_symlink_name) # mv symlink atomically over root symlink to old_root - rename(tmp_symlink_name, self.root) + fs.rename(tmp_symlink_name, self.root) except Exception as e: # Clean up new view and temporary symlink on any failure. try: @@ -897,6 +896,11 @@ class Environment(object): def log_path(self): return os.path.join(self.path, env_subdir_name, "logs") + @property + def config_stage_dir(self): + """Directory for any staged configuration file(s).""" + return os.path.join(self.env_subdir_path, "config") + @property def view_path_default(self): # default path for environment views @@ -928,6 +932,47 @@ class Environment(object): # allow paths to contain spack config/environment variables, etc. config_path = substitute_path_variables(config_path) + # strip file URL prefix, if needed, to avoid unnecessary remote + # config processing for local files + config_path = config_path.replace("file://", "") + + if not os.path.exists(config_path): + # Stage any remote configuration file(s) + if spack.util.url.is_url_format(config_path): + staged_configs = ( + os.listdir(self.config_stage_dir) + if os.path.exists(self.config_stage_dir) + else [] + ) + basename = os.path.basename(config_path) + if basename in staged_configs: + # Do NOT re-stage configuration files over existing + # ones with the same name since there is a risk of + # losing changes (e.g., from 'spack config update'). + tty.warn( + "Will not re-stage configuration from {0} to avoid " + "losing changes to the already staged file of the " + "same name.".format(config_path) + ) + + # Recognize the configuration stage directory + # is flattened to ensure a single copy of each + # configuration file. + config_path = self.config_stage_dir + if basename.endswith(".yaml"): + config_path = os.path.join(config_path, basename) + else: + staged_path = spack.config.fetch_remote_configs( + config_path, + self.config_stage_dir, + skip_existing=True, + ) + if not staged_path: + raise SpackEnvironmentError( + "Unable to fetch remote configuration {0}".format(config_path) + ) + config_path = staged_path + # treat relative paths as relative to the environment if not os.path.isabs(config_path): config_path = os.path.join(self.path, config_path) @@ -936,10 +981,14 @@ class Environment(object): if os.path.isdir(config_path): # directories are treated as regular ConfigScopes config_name = "env:%s:%s" % (self.name, os.path.basename(config_path)) + tty.debug("Creating ConfigScope {0} for '{1}'".format(config_name, config_path)) scope = spack.config.ConfigScope(config_name, config_path) elif os.path.exists(config_path): # files are assumed to be SingleFileScopes config_name = "env:%s:%s" % (self.name, config_path) + tty.debug( + "Creating SingleFileScope {0} for '{1}'".format(config_name, config_path) + ) scope = spack.config.SingleFileScope( config_name, config_path, spack.schema.merged.schema ) diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 18230d9ee5..319aed95ca 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -52,7 +52,7 @@ import spack.url import spack.util.crypto as crypto import spack.util.pattern as pattern import spack.util.url as url_util -import spack.util.web +import spack.util.web as web_util import spack.version from spack.util.compression import decompressor_for, extension from spack.util.executable import CommandNotFoundError, which @@ -337,7 +337,7 @@ class URLFetchStrategy(FetchStrategy): url = None errors = [] for url in self.candidate_urls: - if not self._existing_url(url): + if not web_util.url_exists(url, self.curl): continue try: @@ -352,30 +352,6 @@ class URLFetchStrategy(FetchStrategy): if not self.archive_file: raise FailedDownloadError(url) - def _existing_url(self, url): - tty.debug("Checking existence of {0}".format(url)) - - if spack.config.get("config:url_fetch_method") == "curl": - curl = self.curl - # Telling curl to fetch the first byte (-r 0-0) is supposed to be - # portable. - curl_args = ["--stderr", "-", "-s", "-f", "-r", "0-0", url] - if not spack.config.get("config:verify_ssl"): - curl_args.append("-k") - _ = curl(*curl_args, fail_on_error=False, output=os.devnull) - return curl.returncode == 0 - else: - # Telling urllib to check if url is accessible - try: - url, headers, response = spack.util.web.read_from_url(url) - except spack.util.web.SpackWebError as werr: - msg = "Urllib fetch failed to verify url\ - {0}\n with error {1}".format( - url, werr - ) - raise FailedDownloadError(url, msg) - return response.getcode() is None or response.getcode() == 200 - def _fetch_from_url(self, url): if spack.config.get("config:url_fetch_method") == "curl": return self._fetch_curl(url) @@ -397,8 +373,8 @@ class URLFetchStrategy(FetchStrategy): # Run urllib but grab the mime type from the http headers try: - url, headers, response = spack.util.web.read_from_url(url) - except spack.util.web.SpackWebError as e: + url, headers, response = web_util.read_from_url(url) + except web_util.SpackWebError as e: # clean up archive on failure. if self.archive_file: os.remove(self.archive_file) @@ -433,38 +409,19 @@ class URLFetchStrategy(FetchStrategy): else: save_args = ["-O"] - curl_args = save_args + [ - "-f", # fail on >400 errors - "-D", - "-", # print out HTML headers - "-L", # resolve 3xx redirects - url, - ] - - if not spack.config.get("config:verify_ssl"): - curl_args.append("-k") - - if sys.stdout.isatty() and tty.msg_enabled(): - curl_args.append("-#") # status bar when using a tty - else: - curl_args.append("-sS") # show errors if fail - - connect_timeout = spack.config.get("config:connect_timeout", 10) - + timeout = 0 + cookie_args = [] if self.extra_options: cookie = self.extra_options.get("cookie") if cookie: - curl_args.append("-j") # junk cookies - curl_args.append("-b") # specify cookie - curl_args.append(cookie) + cookie_args.append("-j") # junk cookies + cookie_args.append("-b") # specify cookie + cookie_args.append(cookie) timeout = self.extra_options.get("timeout") - if timeout: - connect_timeout = max(connect_timeout, int(timeout)) - if connect_timeout > 0: - # Timeout if can't establish a connection after n sec. - curl_args.extend(["--connect-timeout", str(connect_timeout)]) + base_args = web_util.base_curl_fetch_args(url, timeout) + curl_args = save_args + base_args + cookie_args # Run curl but grab the mime type from the http headers curl = self.curl @@ -479,26 +436,10 @@ class URLFetchStrategy(FetchStrategy): if partial_file and os.path.lexists(partial_file): os.remove(partial_file) - if curl.returncode == 22: - # This is a 404. Curl will print the error. - raise FailedDownloadError(url, "URL %s was not found!" % url) - - elif curl.returncode == 60: - # This is a certificate error. Suggest spack -k - raise FailedDownloadError( - url, - "Curl was unable to fetch due to invalid certificate. " - "This is either an attack, or your cluster's SSL " - "configuration is bad. If you believe your SSL " - "configuration is bad, you can try running spack -k, " - "which will not check SSL certificates." - "Use this at your own risk.", - ) - - else: - # This is some other curl error. Curl will print the - # error, but print a spack message too - raise FailedDownloadError(url, "Curl failed with error %d" % curl.returncode) + try: + web_util.check_curl_code(curl.returncode) + except web_util.FetchError as err: + raise spack.fetch_strategy.FailedDownloadError(url, str(err)) self._check_headers(headers) @@ -556,7 +497,7 @@ class URLFetchStrategy(FetchStrategy): if not self.archive_file: raise NoArchiveFileError("Cannot call archive() before fetching.") - spack.util.web.push_to_url(self.archive_file, destination, keep_original=True) + web_util.push_to_url(self.archive_file, destination, keep_original=True) @_needs_stage def check(self): @@ -1385,19 +1326,19 @@ class S3FetchStrategy(URLFetchStrategy): parsed_url = url_util.parse(self.url) if parsed_url.scheme != "s3": - raise FetchError("S3FetchStrategy can only fetch from s3:// urls.") + raise web_util.FetchError("S3FetchStrategy can only fetch from s3:// urls.") tty.debug("Fetching {0}".format(self.url)) basename = os.path.basename(parsed_url.path) with working_dir(self.stage.path): - _, headers, stream = spack.util.web.read_from_url(self.url) + _, headers, stream = web_util.read_from_url(self.url) with open(basename, "wb") as f: shutil.copyfileobj(stream, f) - content_type = spack.util.web.get_header(headers, "Content-type") + content_type = web_util.get_header(headers, "Content-type") if content_type == "text/html": warn_content_type_mismatch(self.archive_file or "the archive") @@ -1426,15 +1367,13 @@ class GCSFetchStrategy(URLFetchStrategy): @_needs_stage def fetch(self): - import spack.util.web as web_util - if self.archive_file: tty.debug("Already downloaded {0}".format(self.archive_file)) return parsed_url = url_util.parse(self.url) if parsed_url.scheme != "gs": - raise FetchError("GCSFetchStrategy can only fetch from gs:// urls.") + raise web_util.FetchError("GCSFetchStrategy can only fetch from gs:// urls.") tty.debug("Fetching {0}".format(self.url)) @@ -1489,7 +1428,7 @@ def from_kwargs(**kwargs): on attribute names (e.g., ``git``, ``hg``, etc.) Raises: - FetchError: If no ``fetch_strategy`` matches the args. + spack.util.web.FetchError: If no ``fetch_strategy`` matches the args. """ for fetcher in all_strategies: if fetcher.matches(kwargs): @@ -1586,7 +1525,7 @@ def for_package_version(pkg, version): # if it's a commit, we must use a GitFetchStrategy if isinstance(version, spack.version.GitVersion): if not hasattr(pkg, "git"): - raise FetchError( + raise web_util.FetchError( "Cannot fetch git version for %s. Package has no 'git' attribute" % pkg.name ) # Populate the version with comparisons to other commits @@ -1731,15 +1670,11 @@ class FsCache(object): shutil.rmtree(self.root, ignore_errors=True) -class FetchError(spack.error.SpackError): - """Superclass for fetcher errors.""" - - -class NoCacheError(FetchError): +class NoCacheError(web_util.FetchError): """Raised when there is no cached archive for a package.""" -class FailedDownloadError(FetchError): +class FailedDownloadError(web_util.FetchError): """Raised when a download fails.""" def __init__(self, url, msg=""): @@ -1747,23 +1682,23 @@ class FailedDownloadError(FetchError): self.url = url -class NoArchiveFileError(FetchError): - """ "Raised when an archive file is expected but none exists.""" +class NoArchiveFileError(web_util.FetchError): + """Raised when an archive file is expected but none exists.""" -class NoDigestError(FetchError): +class NoDigestError(web_util.FetchError): """Raised after attempt to checksum when URL has no digest.""" -class ExtrapolationError(FetchError): +class ExtrapolationError(web_util.FetchError): """Raised when we can't extrapolate a version for a package.""" -class FetcherConflict(FetchError): +class FetcherConflict(web_util.FetchError): """Raised for packages with invalid fetch attributes.""" -class InvalidArgsError(FetchError): +class InvalidArgsError(web_util.FetchError): """Raised when a version can't be deduced from a set of arguments.""" def __init__(self, pkg=None, version=None, **args): @@ -1776,11 +1711,11 @@ class InvalidArgsError(FetchError): super(InvalidArgsError, self).__init__(msg, long_msg) -class ChecksumError(FetchError): +class ChecksumError(web_util.FetchError): """Raised when archive fails to checksum.""" -class NoStageError(FetchError): +class NoStageError(web_util.FetchError): """Raised when fetch operations are called before set_stage().""" def __init__(self, method): diff --git a/lib/spack/spack/package_base.py b/lib/spack/spack/package_base.py index 81801df9a5..401303e949 100644 --- a/lib/spack/spack/package_base.py +++ b/lib/spack/spack/package_base.py @@ -63,6 +63,7 @@ from spack.stage import ResourceStage, Stage, StageComposite, stage_prefix from spack.util.executable import ProcessError, which from spack.util.package_hash import package_hash from spack.util.prefix import Prefix +from spack.util.web import FetchError from spack.version import GitVersion, Version, VersionBase if sys.version_info[0] >= 3: @@ -3022,13 +3023,6 @@ def possible_dependencies(*pkg_or_spec, **kwargs): return visited -class FetchError(spack.error.SpackError): - """Raised when something goes wrong during fetch.""" - - def __init__(self, message, long_msg=None): - super(FetchError, self).__init__(message, long_msg) - - class PackageStillNeededError(InstallError): """Raised when package is still needed by another on uninstall.""" diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index bff07b44df..62e05d504e 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -42,6 +42,7 @@ import spack.util.path as sup import spack.util.pattern as pattern import spack.util.url as url_util from spack.util.crypto import bit_length, prefix_bits +from spack.util.web import FetchError # The well-known stage source subdirectory name. _source_path_subdir = "spack-src" @@ -529,7 +530,7 @@ class Stage(object): self.fetcher = self.default_fetcher default_msg = "All fetchers failed for {0}".format(self.name) - raise fs.FetchError(err_msg or default_msg, None) + raise FetchError(err_msg or default_msg, None) print_errors(errors) diff --git a/lib/spack/spack/test/bootstrap.py b/lib/spack/spack/test/bootstrap.py index aeacf99d76..06e9e5f2bc 100644 --- a/lib/spack/spack/test/bootstrap.py +++ b/lib/spack/spack/test/bootstrap.py @@ -134,7 +134,7 @@ def test_config_yaml_is_preserved_during_bootstrap(mutable_config): @pytest.mark.regression("26548") -def test_custom_store_in_environment(mutable_config, tmpdir): +def test_bootstrap_custom_store_in_environment(mutable_config, tmpdir): # Test that the custom store in an environment is taken into account # during bootstrapping spack_yaml = tmpdir.join("spack.yaml") diff --git a/lib/spack/spack/test/cmd/env.py b/lib/spack/spack/test/cmd/env.py index 4bb7203be7..d849931dea 100644 --- a/lib/spack/spack/test/cmd/env.py +++ b/lib/spack/spack/test/cmd/env.py @@ -29,6 +29,7 @@ from spack.stage import stage_prefix from spack.util.executable import Executable from spack.util.mock_package import MockPackageMultiRepo from spack.util.path import substitute_path_variables +from spack.util.web import FetchError from spack.version import Version # TODO-27021 @@ -656,7 +657,7 @@ env: assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) -def test_with_config_bad_include(capfd): +def test_with_config_bad_include(): env_name = "test_bad_include" test_config = """\ spack: @@ -667,15 +668,14 @@ spack: _env_create(env_name, StringIO(test_config)) e = ev.read(env_name) - with pytest.raises(SystemExit): + with pytest.raises(spack.config.ConfigFileError) as exc: with e: e.concretize() - out, err = capfd.readouterr() + err = str(exc) + assert "not retrieve configuration" in err + assert os.path.join("no", "such", "directory") in err - assert "missing include" in err - assert "/no/such/directory" in err - assert os.path.join("no", "such", "file.yaml") in err assert ev.active_environment() is None @@ -721,25 +721,44 @@ def test_env_with_include_config_files_same_basename(): assert environment_specs[1].satisfies("mpileaks@2.2") -def test_env_with_included_config_file(): - test_config = """\ +@pytest.fixture(scope="function") +def packages_file(tmpdir): + """Return the path to the packages configuration file.""" + raw_yaml = """ +packages: + mpileaks: + version: [2.2] +""" + filename = tmpdir.ensure("testconfig", "packages.yaml") + filename.write(raw_yaml) + yield filename + + +def mpileaks_env_config(include_path): + """Return the contents of an environment that includes the provided + path and lists mpileaks as the sole spec.""" + return """\ env: include: - - ./included-config.yaml + - {0} specs: - mpileaks -""" +""".format( + include_path + ) + + +def test_env_with_included_config_file(packages_file): + """Test inclusion of a relative packages configuration file added to an + existing environment.""" + include_filename = "included-config.yaml" + test_config = mpileaks_env_config(os.path.join(".", include_filename)) + _env_create("test", StringIO(test_config)) e = ev.read("test") - with open(os.path.join(e.path, "included-config.yaml"), "w") as f: - f.write( - """\ -packages: - mpileaks: - version: [2.2] -""" - ) + included_path = os.path.join(e.path, include_filename) + fs.rename(packages_file.strpath, included_path) with e: e.concretize() @@ -747,65 +766,81 @@ packages: assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) -def test_env_with_included_config_scope(): +def test_env_with_included_config_file_url(tmpdir, mutable_empty_config, packages_file): + """Test configuration inclusion of a file whose path is a URL before + the environment is concretized.""" + + spack_yaml = tmpdir.join("spack.yaml") + with spack_yaml.open("w") as f: + f.write("spack:\n include:\n - file://{0}\n".format(packages_file)) + + env = ev.Environment(tmpdir.strpath) + ev.activate(env) + scopes = env.included_config_scopes() + assert len(scopes) == 1 + + cfg = spack.config.get("packages") + assert cfg["mpileaks"]["version"] == [2.2] + + +def test_env_with_included_config_missing_file(tmpdir, mutable_empty_config): + """Test inclusion of a missing configuration file raises FetchError + noting missing file.""" + + spack_yaml = tmpdir.join("spack.yaml") + missing_file = tmpdir.join("packages.yaml") + with spack_yaml.open("w") as f: + f.write("spack:\n include:\n - {0}\n".format(missing_file.strpath)) + + env = ev.Environment(tmpdir.strpath) + with pytest.raises(FetchError, match="No such file or directory"): + ev.activate(env) + + +def test_env_with_included_config_scope(tmpdir, packages_file): + """Test inclusion of a package file from the environment's configuration + stage directory. This test is intended to represent a case where a remote + file has already been staged.""" config_scope_path = os.path.join(ev.root("test"), "config") - test_config = ( - """\ -env: - include: - - %s - specs: - - mpileaks -""" - % config_scope_path - ) + # Configure the environment to include file(s) from the environment's + # remote configuration stage directory. + test_config = mpileaks_env_config(config_scope_path) + + # Create the environment _env_create("test", StringIO(test_config)) e = ev.read("test") + # Copy the packages.yaml file to the environment configuration + # directory so it is picked up during concretization. (Using + # copy instead of rename in case the fixture scope changes.) fs.mkdirp(config_scope_path) - with open(os.path.join(config_scope_path, "packages.yaml"), "w") as f: - f.write( - """\ -packages: - mpileaks: - version: [2.2] -""" - ) + include_filename = os.path.basename(packages_file.strpath) + included_path = os.path.join(config_scope_path, include_filename) + fs.copy(packages_file.strpath, included_path) + # Ensure the concretized environment reflects contents of the + # packages.yaml file. with e: e.concretize() assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) -def test_env_with_included_config_var_path(): +def test_env_with_included_config_var_path(packages_file): + """Test inclusion of a package configuration file with path variables + "staged" in the environment's configuration stage directory.""" config_var_path = os.path.join("$tempdir", "included-config.yaml") - test_config = ( - """\ -env: - include: - - %s - specs: - - mpileaks -""" - % config_var_path - ) + test_config = mpileaks_env_config(config_var_path) _env_create("test", StringIO(test_config)) e = ev.read("test") config_real_path = substitute_path_variables(config_var_path) fs.mkdirp(os.path.dirname(config_real_path)) - with open(config_real_path, "w") as f: - f.write( - """\ -packages: - mpileaks: - version: [2.2] -""" - ) + fs.rename(packages_file.strpath, config_real_path) + assert os.path.exists(config_real_path) with e: e.concretize() @@ -3030,3 +3065,24 @@ def test_unify_when_possible_works_around_conflicts(): assert len([x for x in e.all_specs() if x.satisfies("mpileaks+opt")]) == 1 assert len([x for x in e.all_specs() if x.satisfies("mpileaks~opt")]) == 1 assert len([x for x in e.all_specs() if x.satisfies("mpich")]) == 1 + + +def test_env_include_packages_url( + tmpdir, mutable_empty_config, mock_spider_configs, mock_curl_configs +): + """Test inclusion of a (GitHub) URL.""" + develop_url = "https://github.com/fake/fake/blob/develop/" + default_packages = develop_url + "etc/fake/defaults/packages.yaml" + spack_yaml = tmpdir.join("spack.yaml") + with spack_yaml.open("w") as f: + f.write("spack:\n include:\n - {0}\n".format(default_packages)) + assert os.path.isfile(spack_yaml.strpath) + + with spack.config.override("config:url_fetch_method", "curl"): + env = ev.Environment(tmpdir.strpath) + ev.activate(env) + scopes = env.included_config_scopes() + assert len(scopes) == 1 + + cfg = spack.config.get("packages") + assert "openmpi" in cfg["all"]["providers"]["mpi"] diff --git a/lib/spack/spack/test/cmd/spec.py b/lib/spack/spack/test/cmd/spec.py index d604bc4515..8e24a7dfee 100644 --- a/lib/spack/spack/test/cmd/spec.py +++ b/lib/spack/spack/test/cmd/spec.py @@ -12,8 +12,8 @@ import spack.environment as ev import spack.error import spack.spec import spack.store -from spack.fetch_strategy import FetchError from spack.main import SpackCommand, SpackCommandError +from spack.util.web import FetchError pytestmark = pytest.mark.usefixtures("config", "mutable_mock_repo") diff --git a/lib/spack/spack/test/config.py b/lib/spack/spack/test/config.py index 5ecd761e24..f4d0b570fd 100644 --- a/lib/spack/spack/test/config.py +++ b/lib/spack/spack/test/config.py @@ -12,7 +12,8 @@ import tempfile import pytest from six import StringIO -from llnl.util.filesystem import getuid, mkdirp, touch +import llnl.util.tty as tty +from llnl.util.filesystem import getuid, join_path, mkdirp, touch, touchp import spack.config import spack.environment as ev @@ -828,9 +829,9 @@ def test_bad_config_section(mock_low_high_config): spack.config.get("foobar") -@pytest.mark.skipif(sys.platform == "win32", reason="Not supported on Windows (yet)") +@pytest.mark.skipif(sys.platform == "win32", reason="chmod not supported on Windows") @pytest.mark.skipif(getuid() == 0, reason="user is root") -def test_bad_command_line_scopes(tmpdir, mock_low_high_config): +def test_bad_command_line_scopes(tmpdir, config): cfg = spack.config.Configuration() with tmpdir.as_cwd(): @@ -1265,3 +1266,168 @@ def test_user_cache_path_is_overridable(working_env): def test_user_cache_path_is_default_when_env_var_is_empty(working_env): os.environ["SPACK_USER_CACHE_PATH"] = "" assert os.path.expanduser("~%s.spack" % os.sep) == spack.paths._get_user_cache_path() + + +github_url = "https://github.com/fake/fake/{0}/develop" +gitlab_url = "https://gitlab.fake.io/user/repo/-/blob/config/defaults" + + +@pytest.mark.parametrize( + "url,isfile", + [ + (github_url.format("tree"), False), + ("{0}/README.md".format(github_url.format("blob")), True), + ("{0}/etc/fake/defaults/packages.yaml".format(github_url.format("blob")), True), + (gitlab_url, False), + (None, False), + ], +) +def test_config_collect_urls(mutable_empty_config, mock_spider_configs, url, isfile): + with spack.config.override("config:url_fetch_method", "curl"): + urls = spack.config.collect_urls(url) + if url: + if isfile: + expected = 1 if url.endswith(".yaml") else 0 + assert len(urls) == expected + else: + # Expect multiple configuration files for a "directory" + assert len(urls) > 1 + else: + assert not urls + + +@pytest.mark.parametrize( + "url,isfile,fail", + [ + (github_url.format("tree"), False, False), + (gitlab_url, False, False), + ("{0}/README.md".format(github_url.format("blob")), True, True), + ("{0}/compilers.yaml".format(gitlab_url), True, False), + (None, False, True), + ], +) +def test_config_fetch_remote_configs( + tmpdir, + mutable_empty_config, + mock_collect_urls, + mock_curl_configs, + url, + isfile, + fail, +): + def _has_content(filename): + # The first element of all configuration files for this test happen to + # be the basename of the file so this check leverages that feature. If + # that changes, then this check will need to change accordingly. + element = "{0}:".format(os.path.splitext(os.path.basename(filename))[0]) + with open(filename, "r") as fd: + for line in fd: + if element in line: + return True + tty.debug("Expected {0} in '{1}'".format(element, filename)) + return False + + dest_dir = join_path(tmpdir.strpath, "defaults") + if fail: + msg = "Cannot retrieve configuration" + with spack.config.override("config:url_fetch_method", "curl"): + with pytest.raises(spack.config.ConfigFileError, match=msg): + spack.config.fetch_remote_configs(url, dest_dir) + else: + with spack.config.override("config:url_fetch_method", "curl"): + path = spack.config.fetch_remote_configs(url, dest_dir) + assert os.path.exists(path) + if isfile: + # Ensure correct file is "fetched" + assert os.path.basename(path) == os.path.basename(url) + # Ensure contents of the file has expected config element + assert _has_content(path) + else: + for filename in os.listdir(path): + assert _has_content(join_path(path, filename)) + + +@pytest.fixture(scope="function") +def mock_collect_urls(mock_config_data, monkeypatch): + """Mock the collection of URLs to avoid mocking spider.""" + + _, config_files = mock_config_data + + def _collect(base_url): + if not base_url: + return [] + + ext = os.path.splitext(base_url)[1] + if ext: + return [base_url] if ext == ".yaml" else [] + + return [join_path(base_url, f) for f in config_files] + + monkeypatch.setattr(spack.config, "collect_urls", _collect) + + yield + + +@pytest.mark.parametrize( + "url,skip", + [ + (github_url.format("tree"), True), + ("{0}/compilers.yaml".format(gitlab_url), True), + ], +) +def test_config_fetch_remote_configs_skip( + tmpdir, mutable_empty_config, mock_collect_urls, mock_curl_configs, url, skip +): + """Ensure skip fetching remote config file if it already exists when + required and not skipping if replacing it.""" + + def check_contents(filename, expected): + with open(filename, "r") as fd: + lines = fd.readlines() + if expected: + assert lines[0] == "compilers:" + else: + assert not lines + + dest_dir = join_path(tmpdir.strpath, "defaults") + filename = "compilers.yaml" + + # Create a stage directory with an empty configuration file + path = join_path(dest_dir, filename) + touchp(path) + + # Do NOT replace the existing cached configuration file if skipping + expected = None if skip else "compilers:" + + with spack.config.override("config:url_fetch_method", "curl"): + path = spack.config.fetch_remote_configs(url, dest_dir, skip) + result_filename = path if path.endswith(".yaml") else join_path(path, filename) + check_contents(result_filename, expected) + + +def test_config_file_dir_failure(tmpdir, mutable_empty_config): + with pytest.raises(spack.config.ConfigFileError, match="not a file"): + spack.config.read_config_file(tmpdir.strpath) + + +@pytest.mark.skipif(sys.platform == "win32", reason="chmod not supported on Windows") +def test_config_file_read_perms_failure(tmpdir, mutable_empty_config): + """Test reading a configuration file without permissions to ensure + ConfigFileError is raised.""" + filename = join_path(tmpdir.strpath, "test.yaml") + touch(filename) + os.chmod(filename, 0o200) + + with pytest.raises(spack.config.ConfigFileError, match="not readable"): + spack.config.read_config_file(filename) + + +def test_config_file_read_invalid_yaml(tmpdir, mutable_empty_config): + """Test reading a configuration file with invalid (unparseable) YAML + raises a ConfigFileError.""" + filename = join_path(tmpdir.strpath, "test.yaml") + with open(filename, "w") as f: + f.write("spack:\nview") + + with pytest.raises(spack.config.ConfigFileError, match="parsing yaml"): + spack.config.read_config_file(filename) diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index d7bdfe6ac8..ed8dc0f9bd 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -26,6 +26,7 @@ import archspec.cpu.microarchitecture import archspec.cpu.schema import llnl.util.lang +import llnl.util.tty as tty from llnl.util.filesystem import mkdirp, remove_linked_tree, working_dir import spack.binary_distribution @@ -47,8 +48,9 @@ import spack.test.cray_manifest import spack.util.executable import spack.util.gpg import spack.util.spack_yaml as syaml -from spack.fetch_strategy import FetchError, FetchStrategyComposite, URLFetchStrategy +from spack.fetch_strategy import FetchStrategyComposite, URLFetchStrategy from spack.util.pattern import Bunch +from spack.util.web import FetchError is_windows = sys.platform == "win32" @@ -1684,3 +1686,82 @@ def noncyclical_dir_structure(tmpdir): with open(j("file_3"), "wb"): pass yield d + + +@pytest.fixture(scope="function") +def mock_config_data(): + config_data_dir = os.path.join(spack.paths.test_path, "data", "config") + return config_data_dir, os.listdir(config_data_dir) + + +@pytest.fixture(scope="function") +def mock_curl_configs(mock_config_data, monkeypatch): + """ + Mock curl-based retrieval of configuration files from the web by grabbing + them from the test data configuration directory. + + Fetches a single (configuration) file if the name matches one in the test + data directory. + """ + config_data_dir, config_files = mock_config_data + + class MockCurl(object): + def __init__(self): + self.returncode = None + + def __call__(self, *args, **kwargs): + url = [a for a in args if a.startswith("http")][0] + basename = os.path.basename(url) + if os.path.splitext(url)[1]: + if basename in config_files: + filename = os.path.join(config_data_dir, basename) + + with open(filename, "r") as f: + lines = f.readlines() + write_file(os.path.basename(filename), "".join(lines)) + + self.returncode = 0 + else: + # This is a "404" and is technically only returned if -f + # flag is provided to curl. + tty.msg("curl: (22) The requested URL returned error: 404") + self.returncode = 22 + + def mock_curl(*args): + return MockCurl() + + monkeypatch.setattr(spack.util.web, "_curl", mock_curl) + + yield + + +@pytest.fixture(scope="function") +def mock_spider_configs(mock_config_data, monkeypatch): + """ + Mock retrieval of configuration file URLs from the web by grabbing + them from the test data configuration directory. + """ + config_data_dir, config_files = mock_config_data + + def _spider(*args, **kwargs): + root_urls = args[0] + if not root_urls: + return [], set() + + root_urls = [root_urls] if isinstance(root_urls, str) else root_urls + + # Any URL with an extension will be treated like a file; otherwise, + # it is considered a directory/folder and we'll grab all available + # files. + urls = [] + for url in root_urls: + if os.path.splitext(url)[1]: + urls.append(url) + else: + urls.extend([os.path.join(url, f) for f in config_files]) + + return [], set(urls) + + monkeypatch.setattr(spack.util.web, "spider", _spider) + + yield diff --git a/lib/spack/spack/test/gcs_fetch.py b/lib/spack/spack/test/gcs_fetch.py index 90657d4693..a253d9e0e8 100644 --- a/lib/spack/spack/test/gcs_fetch.py +++ b/lib/spack/spack/test/gcs_fetch.py @@ -10,6 +10,7 @@ import pytest import spack.config import spack.fetch_strategy import spack.stage +from spack.util.web import FetchError @pytest.mark.parametrize("_fetch_method", ["curl", "urllib"]) @@ -32,7 +33,7 @@ def test_gcsfetchstrategy_bad_url(tmpdir, _fetch_method): with spack.stage.Stage(fetcher, path=testpath) as stage: assert stage is not None assert fetcher.archive_file is None - with pytest.raises(spack.fetch_strategy.FetchError): + with pytest.raises(FetchError): fetcher.fetch() diff --git a/lib/spack/spack/test/packaging.py b/lib/spack/spack/test/packaging.py index c84a0e1c68..f0d1c240e8 100644 --- a/lib/spack/spack/test/packaging.py +++ b/lib/spack/spack/test/packaging.py @@ -573,7 +573,7 @@ def test_manual_download(install_mockery, mock_download, monkeypatch, manual, in monkeypatch.setattr(spack.package_base.PackageBase, "download_instr", _instr) expected = pkg.download_instr if manual else "All fetchers failed" - with pytest.raises(spack.fetch_strategy.FetchError, match=expected): + with pytest.raises(spack.util.web.FetchError, match=expected): pkg.do_fetch() diff --git a/lib/spack/spack/test/s3_fetch.py b/lib/spack/spack/test/s3_fetch.py index 4c14390112..cca91b431e 100644 --- a/lib/spack/spack/test/s3_fetch.py +++ b/lib/spack/spack/test/s3_fetch.py @@ -10,6 +10,7 @@ import pytest import spack.config as spack_config import spack.fetch_strategy as spack_fs import spack.stage as spack_stage +from spack.util.web import FetchError @pytest.mark.parametrize("_fetch_method", ["curl", "urllib"]) @@ -32,7 +33,7 @@ def test_s3fetchstrategy_bad_url(tmpdir, _fetch_method): with spack_stage.Stage(fetcher, path=testpath) as stage: assert stage is not None assert fetcher.archive_file is None - with pytest.raises(spack_fs.FetchError): + with pytest.raises(FetchError): fetcher.fetch() diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py index 44294ff6bd..ac445c373f 100644 --- a/lib/spack/spack/test/stage.py +++ b/lib/spack/spack/test/stage.py @@ -22,6 +22,7 @@ import spack.util.executable from spack.resource import Resource from spack.stage import DIYStage, ResourceStage, Stage, StageComposite from spack.util.path import canonicalize_path +from spack.util.web import FetchError # The following values are used for common fetch and stage mocking fixtures: _archive_base = "test-files" @@ -525,7 +526,7 @@ class TestStage(object): with stage: try: stage.fetch(mirror_only=True) - except spack.fetch_strategy.FetchError: + except FetchError: pass check_destroy(stage, self.stage_name) @@ -540,7 +541,7 @@ class TestStage(object): stage = Stage(failing_fetch_strategy, name=self.stage_name, search_fn=search_fn) with stage: - with pytest.raises(spack.fetch_strategy.FetchError, match=expected): + with pytest.raises(FetchError, match=expected): stage.fetch(mirror_only=False, err_msg=err_msg) check_destroy(stage, self.stage_name) diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py index 2703700233..da13611585 100644 --- a/lib/spack/spack/test/url_fetch.py +++ b/lib/spack/spack/test/url_fetch.py @@ -17,6 +17,7 @@ import spack.fetch_strategy as fs import spack.repo import spack.util.crypto as crypto import spack.util.executable +import spack.util.web as web_util from spack.spec import Spec from spack.stage import Stage from spack.util.executable import which @@ -328,9 +329,8 @@ def test_missing_curl(tmpdir, monkeypatch): err_msg = err_fmt.format(args[0]) raise spack.util.executable.CommandNotFoundError(err_msg) - # Patching the 'which' symbol imported by fetch_strategy works - # since it is too late in import processing to patch the defining - # (spack.util.executable) module's symbol. + # Patching the 'which' symbol imported by fetch_strategy needed due + # to 'from spack.util.executable import which' in this module. monkeypatch.setattr(fs, "which", _which) testpath = str(tmpdir) @@ -342,3 +342,79 @@ def test_missing_curl(tmpdir, monkeypatch): with Stage(fetcher, path=testpath) as stage: out = stage.fetch() assert err_fmt.format("curl") in out + + +def test_url_fetch_text_without_url(tmpdir): + with pytest.raises(web_util.FetchError, match="URL is required"): + web_util.fetch_url_text(None) + + +def test_url_fetch_text_curl_failures(tmpdir, monkeypatch): + """Check fetch_url_text if URL's curl is missing.""" + err_fmt = "No such command {0}" + + def _which(*args, **kwargs): + err_msg = err_fmt.format(args[0]) + raise spack.util.executable.CommandNotFoundError(err_msg) + + # Patching the 'which' symbol imported by spack.util.web needed due + # to 'from spack.util.executable import which' in this module. + monkeypatch.setattr(spack.util.web, "which", _which) + + with spack.config.override("config:url_fetch_method", "curl"): + with pytest.raises(web_util.FetchError, match="Missing required curl"): + web_util.fetch_url_text("https://github.com/") + + +def test_url_check_curl_errors(): + """Check that standard curl error returncodes raise expected errors.""" + # Check returncode 22 (i.e., 404) + with pytest.raises(web_util.FetchError, match="not found"): + web_util.check_curl_code(22) + + # Check returncode 60 (certificate error) + with pytest.raises(web_util.FetchError, match="invalid certificate"): + web_util.check_curl_code(60) + + +def test_url_missing_curl(tmpdir, monkeypatch): + """Check url_exists failures if URL's curl is missing.""" + err_fmt = "No such command {0}" + + def _which(*args, **kwargs): + err_msg = err_fmt.format(args[0]) + raise spack.util.executable.CommandNotFoundError(err_msg) + + # Patching the 'which' symbol imported by spack.util.web needed due + # to 'from spack.util.executable import which' in this module. + monkeypatch.setattr(spack.util.web, "which", _which) + + with spack.config.override("config:url_fetch_method", "curl"): + with pytest.raises(web_util.FetchError, match="Missing required curl"): + web_util.url_exists("https://github.com/") + + +def test_url_fetch_text_urllib_bad_returncode(tmpdir, monkeypatch): + class response(object): + def getcode(self): + return 404 + + def _read_from_url(*args, **kwargs): + return None, None, response() + + monkeypatch.setattr(spack.util.web, "read_from_url", _read_from_url) + + with spack.config.override("config:url_fetch_method", "urllib"): + with pytest.raises(web_util.FetchError, match="failed with error code"): + web_util.fetch_url_text("https://github.com/") + + +def test_url_fetch_text_urllib_web_error(tmpdir, monkeypatch): + def _raise_web_error(*args, **kwargs): + raise web_util.SpackWebError("bad url") + + monkeypatch.setattr(spack.util.web, "read_from_url", _raise_web_error) + + with spack.config.override("config:url_fetch_method", "urllib"): + with pytest.raises(web_util.FetchError, match="fetch failed to verify"): + web_util.fetch_url_text("https://github.com/") diff --git a/lib/spack/spack/util/path.py b/lib/spack/spack/util/path.py index 3413f48c76..981a6b672d 100644 --- a/lib/spack/spack/util/path.py +++ b/lib/spack/spack/util/path.py @@ -306,7 +306,14 @@ def add_padding(path, length): def canonicalize_path(path): - """Same as substitute_path_variables, but also take absolute path.""" + """Same as substitute_path_variables, but also take absolute path. + + Arguments: + path (str): path being converted as needed + + Returns: + (str): An absolute path with path variable substitution + """ # Get file in which path was written in case we need to make it absolute # relative to that path. filename = None diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py index 5ff0a53fa7..c9c7343585 100644 --- a/lib/spack/spack/util/url.py +++ b/lib/spack/spack/util/url.py @@ -12,7 +12,7 @@ import posixpath import re import sys -import six.moves.urllib.parse as urllib_parse +import six.moves.urllib.parse from six import string_types from spack.util.path import ( @@ -67,11 +67,18 @@ def local_file_path(url): def parse(url, scheme="file"): """Parse a url. - For file:// URLs, the netloc and path components are concatenated and - passed through spack.util.path.canoncalize_path(). + Path variable substitution is performed on file URLs as needed. The + variables are documented at + https://spack.readthedocs.io/en/latest/configuration.html#spack-specific-variables. - Otherwise, the returned value is the same as urllib's urlparse() with - allow_fragments=False. + Arguments: + url (str): URL to be parsed + scheme (str): associated URL scheme + Returns: + (six.moves.urllib.parse.ParseResult): For file scheme URLs, the + netloc and path components are concatenated and passed through + spack.util.path.canoncalize_path(). Otherwise, the returned value + is the same as urllib's urlparse() with allow_fragments=False. """ # guarantee a value passed in is of proper url format. Guarantee # allows for easier string manipulation accross platforms @@ -79,7 +86,11 @@ def parse(url, scheme="file"): require_url_format(url) url = escape_file_url(url) url_obj = ( - urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False) + six.moves.urllib.parse.urlparse( + url, + scheme=scheme, + allow_fragments=False, + ) if isinstance(url, string_types) else url ) @@ -108,8 +119,13 @@ def parse(url, scheme="file"): if sys.platform == "win32": path = convert_to_posix_path(path) - return urllib_parse.ParseResult( - scheme=scheme, netloc=netloc, path=path, params=params, query=query, fragment=None + return six.moves.urllib.parse.ParseResult( + scheme=scheme, + netloc=netloc, + path=path, + params=params, + query=query, + fragment=None, ) @@ -188,7 +204,11 @@ def join(base_url, path, *extra, **kwargs): last_abs_component = None scheme = "" for i in range(n - 1, -1, -1): - obj = urllib_parse.urlparse(paths[i], scheme="", allow_fragments=False) + obj = six.moves.urllib.parse.urlparse( + paths[i], + scheme="", + allow_fragments=False, + ) scheme = obj.scheme @@ -198,7 +218,11 @@ def join(base_url, path, *extra, **kwargs): # Without a scheme, we have to go back looking for the # next-last component that specifies a scheme. for j in range(i - 1, -1, -1): - obj = urllib_parse.urlparse(paths[j], scheme="", allow_fragments=False) + obj = six.moves.urllib.parse.urlparse( + paths[j], + scheme="", + allow_fragments=False, + ) if obj.scheme: paths[i] = "{SM}://{NL}{PATH}".format( @@ -214,13 +238,17 @@ def join(base_url, path, *extra, **kwargs): if last_abs_component is not None: paths = paths[last_abs_component:] if len(paths) == 1: - result = urllib_parse.urlparse(paths[0], scheme="file", allow_fragments=False) + result = six.moves.urllib.parse.urlparse( + paths[0], + scheme="file", + allow_fragments=False, + ) # another subtlety: If the last argument to join() is an absolute # file:// URL component with a relative path, the relative path # needs to be resolved. if result.scheme == "file" and result.netloc: - result = urllib_parse.ParseResult( + result = six.moves.urllib.parse.ParseResult( scheme=result.scheme, netloc="", path=posixpath.abspath(result.netloc + result.path), @@ -278,8 +306,13 @@ def _join(base_url, path, *extra, **kwargs): base_path = convert_to_posix_path(base_path) return format( - urllib_parse.ParseResult( - scheme=scheme, netloc=netloc, path=base_path, params=params, query=query, fragment=None + six.moves.urllib.parse.ParseResult( + scheme=scheme, + netloc=netloc, + path=base_path, + params=params, + query=query, + fragment=None, ) ) @@ -337,9 +370,12 @@ def parse_git_url(url): return (scheme, user, hostname, port, path) +def is_url_format(url): + return re.search(r"^(file://|http://|https://|ftp://|s3://|gs://|ssh://|git://|/)", url) + + def require_url_format(url): - ut = re.search(r"^(file://|http://|https://|ftp://|s3://|gs://|ssh://|git://|/)", url) - if not ut: + if not is_url_format(url): raise ValueError("Invalid url format from url: %s" % url) diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index a5c96e23da..202cb8a504 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -22,7 +22,7 @@ from six.moves.urllib.request import Request, urlopen import llnl.util.lang import llnl.util.tty as tty -from llnl.util.filesystem import mkdirp, rename +from llnl.util.filesystem import mkdirp, rename, working_dir import spack import spack.config @@ -33,6 +33,7 @@ import spack.util.gcs as gcs_util import spack.util.s3 as s3_util import spack.util.url as url_util from spack.util.compression import ALLOWED_ARCHIVE_TYPES +from spack.util.executable import CommandNotFoundError, which from spack.util.path import convert_to_posix_path #: User-Agent used in Request objects @@ -224,30 +225,222 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non ) -def url_exists(url): - url = url_util.parse(url) - local_path = url_util.local_file_path(url) +def base_curl_fetch_args(url, timeout=0): + """Return the basic fetch arguments typically used in calls to curl. + + The arguments include those for ensuring behaviors such as failing on + errors for codes over 400, printing HTML headers, resolving 3xx redirects, + status or failure handling, and connection timeouts. + + It also uses the following configuration option to set an additional + argument as needed: + + * config:connect_timeout (int): connection timeout + * config:verify_ssl (str): Perform SSL verification + + Arguments: + url (str): URL whose contents will be fetched + timeout (int): Connection timeout, which is only used if higher than + config:connect_timeout + + Returns (list): list of argument strings + """ + curl_args = [ + "-f", # fail on >400 errors + "-D", + "-", # "-D -" prints out HTML headers + "-L", # resolve 3xx redirects + url, + ] + if not spack.config.get("config:verify_ssl"): + curl_args.append("-k") + + if sys.stdout.isatty() and tty.msg_enabled(): + curl_args.append("-#") # status bar when using a tty + else: + curl_args.append("-sS") # show errors if fail + + connect_timeout = spack.config.get("config:connect_timeout", 10) + if timeout: + connect_timeout = max(int(connect_timeout), int(timeout)) + if connect_timeout > 0: + curl_args.extend(["--connect-timeout", str(connect_timeout)]) + + return curl_args + + +def check_curl_code(returncode): + """Check standard return code failures for provided arguments. + + Arguments: + returncode (int): curl return code + + Raises FetchError if the curl returncode indicates failure + """ + if returncode != 0: + if returncode == 22: + # This is a 404. Curl will print the error. + raise FetchError("URL was not found!") + + if returncode == 60: + # This is a certificate error. Suggest spack -k + raise FetchError( + "Curl was unable to fetch due to invalid certificate. " + "This is either an attack, or your cluster's SSL " + "configuration is bad. If you believe your SSL " + "configuration is bad, you can try running spack -k, " + "which will not check SSL certificates." + "Use this at your own risk." + ) + + raise FetchError("Curl failed with error {0}".format(returncode)) + + +def _curl(curl=None): + if not curl: + try: + curl = which("curl", required=True) + except CommandNotFoundError as exc: + tty.error(str(exc)) + raise FetchError("Missing required curl fetch method") + return curl + + +def fetch_url_text(url, curl=None, dest_dir="."): + """Retrieves text-only URL content using the configured fetch method. + It determines the fetch method from: + + * config:url_fetch_method (str): fetch method to use (e.g., 'curl') + + If the method is `curl`, it also uses the following configuration + options: + + * config:connect_timeout (int): connection time out + * config:verify_ssl (str): Perform SSL verification + + Arguments: + url (str): URL whose contents are to be fetched + curl (spack.util.executable.Executable or None): (optional) curl + executable if curl is the configured fetch method + dest_dir (str): (optional) destination directory for fetched text + file + + Returns (str or None): path to the fetched file + + Raises FetchError if the curl returncode indicates failure + """ + if not url: + raise FetchError("A URL is required to fetch its text") + + tty.debug("Fetching text at {0}".format(url)) + + filename = os.path.basename(url) + path = os.path.join(dest_dir, filename) + + fetch_method = spack.config.get("config:url_fetch_method") + tty.debug("Using '{0}' to fetch {1} into {2}".format(fetch_method, url, path)) + if fetch_method == "curl": + curl_exe = _curl(curl) + if not curl_exe: + raise FetchError("Missing required fetch method (curl)") + + curl_args = ["-O"] + curl_args.extend(base_curl_fetch_args(url)) + + # Curl automatically downloads file contents as filename + with working_dir(dest_dir, create=True): + _ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull) + check_curl_code(curl_exe.returncode) + + return path + + else: + try: + _, _, response = read_from_url(url) + + returncode = response.getcode() + if returncode and returncode != 200: + raise FetchError("Urllib failed with error code {0}".format(returncode)) + + output = codecs.getreader("utf-8")(response).read() + if output: + with working_dir(dest_dir, create=True): + with open(filename, "w") as f: + f.write(output) + + return path + + except SpackWebError as err: + raise FetchError("Urllib fetch failed to verify url: {0}".format(str(err))) + + return None + + +def url_exists(url, curl=None): + """Determines whether url exists. + + A scheme-specific process is used for Google Storage (`gs`) and Amazon + Simple Storage Service (`s3`) URLs; otherwise, the configured fetch + method defined by `config:url_fetch_method` is used. + + If the method is `curl`, it also uses the following configuration option: + + * config:verify_ssl (str): Perform SSL verification + + Otherwise, `urllib` will be used. + + Arguments: + url (str): URL whose existence is being checked + curl (spack.util.executable.Executable or None): (optional) curl + executable if curl is the configured fetch method + + Returns (bool): True if it exists; False otherwise. + """ + tty.debug("Checking existence of {0}".format(url)) + url_result = url_util.parse(url) + + # Check if a local file + local_path = url_util.local_file_path(url_result) if local_path: return os.path.exists(local_path) - if url.scheme == "s3": - # Check for URL specific connection information - s3 = s3_util.create_s3_session(url, connection=s3_util.get_mirror_connection(url)) + # Check if Amazon Simple Storage Service (S3) .. urllib-based fetch + if url_result.scheme == "s3": + # Check for URL-specific connection information + s3 = s3_util.create_s3_session( + url_result, connection=s3_util.get_mirror_connection(url_result) + ) # noqa: E501 try: - s3.get_object(Bucket=url.netloc, Key=url.path.lstrip("/")) + s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/")) return True except s3.ClientError as err: if err.response["Error"]["Code"] == "NoSuchKey": return False raise err - elif url.scheme == "gs": - gcs = gcs_util.GCSBlob(url) + # Check if Google Storage .. urllib-based fetch + if url_result.scheme == "gs": + gcs = gcs_util.GCSBlob(url_result) return gcs.exists() - # otherwise, just try to "read" from the URL, and assume that *any* - # non-throwing response contains the resource represented by the URL + # Otherwise, use the configured fetch method + if spack.config.get("config:url_fetch_method") == "curl": + curl_exe = _curl(curl) + if not curl_exe: + return False + + # Telling curl to fetch the first byte (-r 0-0) is supposed to be + # portable. + curl_args = ["--stderr", "-", "-s", "-f", "-r", "0-0", url] + if not spack.config.get("config:verify_ssl"): + curl_args.append("-k") + _ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull) + return curl_exe.returncode == 0 + + # If we get here, then the only other fetch method option is urllib. + # So try to "read" from the URL and assume that *any* non-throwing + # response contains the resource represented by the URL. try: read_from_url(url) return True @@ -716,6 +909,10 @@ def get_header(headers, header_name): raise +class FetchError(spack.error.SpackError): + """Superclass for fetch-related errors.""" + + class SpackWebError(spack.error.SpackError): """Superclass for Spack web spidering errors.""" -- cgit v1.2.3-70-g09d2