diff options
author | Tamara Dahlgren <35777542+tldahlgren@users.noreply.github.com> | 2022-08-30 11:13:23 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-30 11:13:23 -0700 |
commit | 3894ceebc93ed05df8cedce70967a97c77ca40cf (patch) | |
tree | fccd6bd1c972295654b5baf9360df14bfd15e5d4 /lib | |
parent | b35a0b1b409a4b90ede26c88bfb2240cf77a347f (diff) | |
download | spack-3894ceebc93ed05df8cedce70967a97c77ca40cf.tar.gz spack-3894ceebc93ed05df8cedce70967a97c77ca40cf.tar.bz2 spack-3894ceebc93ed05df8cedce70967a97c77ca40cf.tar.xz spack-3894ceebc93ed05df8cedce70967a97c77ca40cf.zip |
Environments: Add support for include URLs (#29026)
* Preliminary support for include URLs in spack.yaml (environment) files
This commit adds support in environments for external configuration files obtained from a URL with a preference for grabbing raw text from GitHub and gitlab for efficient downloads of the relevant files. The URL can also be a link to a directory that contains multiple configuration files.
Remote configuration files are retrieved and cached for the environment. Configuration files with the same name will not be overwritten once cached.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/docs/conf.py | 1 | ||||
-rw-r--r-- | lib/spack/docs/environments.rst | 19 | ||||
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 7 | ||||
-rw-r--r-- | lib/spack/spack/cmd/buildcache.py | 3 | ||||
-rw-r--r-- | lib/spack/spack/config.py | 97 | ||||
-rw-r--r-- | lib/spack/spack/environment/environment.py | 53 | ||||
-rw-r--r-- | lib/spack/spack/fetch_strategy.py | 129 | ||||
-rw-r--r-- | lib/spack/spack/package_base.py | 8 | ||||
-rw-r--r-- | lib/spack/spack/stage.py | 3 | ||||
-rw-r--r-- | lib/spack/spack/test/bootstrap.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/test/cmd/env.py | 168 | ||||
-rw-r--r-- | lib/spack/spack/test/cmd/spec.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/test/config.py | 172 | ||||
-rw-r--r-- | lib/spack/spack/test/conftest.py | 83 | ||||
-rw-r--r-- | lib/spack/spack/test/gcs_fetch.py | 3 | ||||
-rw-r--r-- | lib/spack/spack/test/packaging.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/test/s3_fetch.py | 3 | ||||
-rw-r--r-- | lib/spack/spack/test/stage.py | 5 | ||||
-rw-r--r-- | lib/spack/spack/test/url_fetch.py | 82 | ||||
-rw-r--r-- | lib/spack/spack/util/path.py | 9 | ||||
-rw-r--r-- | lib/spack/spack/util/url.py | 68 | ||||
-rw-r--r-- | lib/spack/spack/util/web.py | 221 |
22 files changed, 919 insertions, 221 deletions
diff --git a/lib/spack/docs/conf.py b/lib/spack/docs/conf.py index e139ac7774..1108f93ff9 100644 --- a/lib/spack/docs/conf.py +++ b/lib/spack/docs/conf.py @@ -201,6 +201,7 @@ nitpick_ignore = [ ("py:class", "unittest.case.TestCase"), ("py:class", "_frozen_importlib_external.SourceFileLoader"), ("py:class", "clingo.Control"), + ("py:class", "six.moves.urllib.parse.ParseResult"), # Spack classes that are private and we don't want to expose ("py:class", "spack.provider_index._IndexBase"), ("py:class", "spack.repo._PrependFileLoader"), diff --git a/lib/spack/docs/environments.rst b/lib/spack/docs/environments.rst index 036a33b19f..9be2ab4684 100644 --- a/lib/spack/docs/environments.rst +++ b/lib/spack/docs/environments.rst @@ -478,14 +478,21 @@ them to the Environment. spack: include: - relative/path/to/config.yaml + - https://github.com/path/to/raw/config/compilers.yaml - /absolute/path/to/packages.yaml -Environments can include files with either relative or absolute -paths. Inline configurations take precedence over included -configurations, so you don't have to change shared configuration files -to make small changes to an individual Environment. Included configs -listed earlier will have higher precedence, as the included configs are -applied in reverse order. +Environments can include files or URLs. File paths can be relative or +absolute. URLs include the path to the text for individual files or +can be the path to a directory containing configuration files. + +^^^^^^^^^^^^^^^^^^^^^^^^ +Configuration precedence +^^^^^^^^^^^^^^^^^^^^^^^^ + +Inline configurations take precedence over included configurations, so +you don't have to change shared configuration files to make small changes +to an individual environment. Included configurations listed earlier will +have higher precedence, as the included configs are applied in reverse order. ------------------------------- Manually Editing the Specs List diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 88e3f2740e..e51d7d4842 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -26,7 +26,6 @@ from llnl.util.filesystem import mkdirp import spack.cmd import spack.config as config import spack.database as spack_db -import spack.fetch_strategy as fs import spack.hooks import spack.hooks.sbang import spack.mirror @@ -1231,7 +1230,7 @@ def try_fetch(url_to_fetch): try: stage.fetch() - except fs.FetchError: + except web_util.FetchError: stage.destroy() return None @@ -1954,7 +1953,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None): if not os.path.exists(stage.save_filename): try: stage.fetch() - except fs.FetchError: + except web_util.FetchError: continue tty.debug("Found key {0}".format(fingerprint)) @@ -2106,7 +2105,7 @@ def _download_buildcache_entry(mirror_root, descriptions): try: stage.fetch() break - except fs.FetchError as e: + except web_util.FetchError as e: tty.debug(e) else: if fail_if_missing: diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py index 1da0a51bc9..4c1e4d4837 100644 --- a/lib/spack/spack/cmd/buildcache.py +++ b/lib/spack/spack/cmd/buildcache.py @@ -15,7 +15,6 @@ import spack.cmd import spack.cmd.common.arguments as arguments import spack.config import spack.environment as ev -import spack.fetch_strategy as fs import spack.hash_types as ht import spack.mirror import spack.relocate @@ -711,7 +710,7 @@ def sync_fn(args): stage.create() stage.fetch() web_util.push_to_url(local_path, dest_url, keep_original=True) - except fs.FetchError as e: + except web_util.FetchError as e: tty.debug("spack buildcache unable to sync {0}".format(rel_path)) tty.debug(e) finally: diff --git a/lib/spack/spack/config.py b/lib/spack/spack/config.py index 00d1abc8b1..ea896f27c5 100644 --- a/lib/spack/spack/config.py +++ b/lib/spack/spack/config.py @@ -64,6 +64,7 @@ import spack.schema.upstreams # Hacked yaml for configuration files preserves line numbers. import spack.util.spack_yaml as syaml +import spack.util.web as web_util from spack.error import SpackError from spack.util.cpus import cpus_available @@ -994,18 +995,19 @@ def read_config_file(filename, schema=None): # schema when it's not necessary) while allowing us to validate against a # known schema when the top-level key could be incorrect. - # Ignore nonexisting files. if not os.path.exists(filename): + # Ignore nonexistent files. + tty.debug("Skipping nonexistent config path {0}".format(filename)) return None elif not os.path.isfile(filename): raise ConfigFileError("Invalid configuration. %s exists but is not a file." % filename) elif not os.access(filename, os.R_OK): - raise ConfigFileError("Config file is not readable: %s" % filename) + raise ConfigFileError("Config file is not readable: {0}".format(filename)) try: - tty.debug("Reading config file %s" % filename) + tty.debug("Reading config from file {0}".format(filename)) with open(filename) as f: data = syaml.load_config(f) @@ -1304,6 +1306,95 @@ def _config_from(scopes_or_paths): return configuration +def raw_github_gitlab_url(url): + """Transform a github URL to the raw form to avoid undesirable html. + + Args: + url: url to be converted to raw form + + Returns: (str) raw github/gitlab url or the original url + """ + # Note we rely on GitHub to redirect the 'raw' URL returned here to the + # actual URL under https://raw.githubusercontent.com/ with '/blob' + # removed and or, '/blame' if needed. + if "github" in url or "gitlab" in url: + return url.replace("/blob/", "/raw/") + + return url + + +def collect_urls(base_url): + """Return a list of configuration URLs. + + Arguments: + base_url (str): URL for a configuration (yaml) file or a directory + containing yaml file(s) + + Returns: (list) list of configuration file(s) or empty list if none + """ + if not base_url: + return [] + + extension = ".yaml" + + if base_url.endswith(extension): + return [base_url] + + # Collect configuration URLs if the base_url is a "directory". + _, links = web_util.spider(base_url, 0) + return [link for link in links if link.endswith(extension)] + + +def fetch_remote_configs(url, dest_dir, skip_existing=True): + """Retrieve configuration file(s) at the specified URL. + + Arguments: + url (str): URL for a configuration (yaml) file or a directory containing + yaml file(s) + dest_dir (str): destination directory + skip_existing (bool): Skip files that already exist in dest_dir if + ``True``; otherwise, replace those files + + Returns: (str) path to the corresponding file if URL is or contains a + single file and it is the only file in the destination directory or + the root (dest_dir) directory if multiple configuration files exist + or are retrieved. + """ + + def _fetch_file(url): + raw = raw_github_gitlab_url(url) + tty.debug("Reading config from url {0}".format(raw)) + return web_util.fetch_url_text(raw, dest_dir=dest_dir) + + if not url: + raise ConfigFileError("Cannot retrieve configuration without a URL") + + # Return the local path to the cached configuration file OR to the + # directory containing the cached configuration files. + config_links = collect_urls(url) + existing_files = os.listdir(dest_dir) if os.path.isdir(dest_dir) else [] + + paths = [] + for config_url in config_links: + basename = os.path.basename(config_url) + if skip_existing and basename in existing_files: + tty.warn( + "Will not fetch configuration from {0} since a version already" + "exists in {1}".format(config_url, dest_dir) + ) + path = os.path.join(dest_dir, basename) + else: + path = _fetch_file(config_url) + + if path: + paths.append(path) + + if paths: + return dest_dir if len(paths) > 1 else paths[0] + + raise ConfigFileError("Cannot retrieve configuration (yaml) from {0}".format(url)) + + class ConfigError(SpackError): """Superclass for all Spack config related errors.""" diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py index a0869cf533..3df8e4f1df 100644 --- a/lib/spack/spack/environment/environment.py +++ b/lib/spack/spack/environment/environment.py @@ -17,7 +17,6 @@ import six import llnl.util.filesystem as fs import llnl.util.tty as tty -from llnl.util.filesystem import rename from llnl.util.lang import dedupe from llnl.util.symlink import symlink @@ -593,7 +592,7 @@ class ViewDescriptor(object): symlink(new_root, tmp_symlink_name) # mv symlink atomically over root symlink to old_root - rename(tmp_symlink_name, self.root) + fs.rename(tmp_symlink_name, self.root) except Exception as e: # Clean up new view and temporary symlink on any failure. try: @@ -898,6 +897,11 @@ class Environment(object): return os.path.join(self.path, env_subdir_name, "logs") @property + def config_stage_dir(self): + """Directory for any staged configuration file(s).""" + return os.path.join(self.env_subdir_path, "config") + + @property def view_path_default(self): # default path for environment views return os.path.join(self.env_subdir_path, "view") @@ -928,6 +932,47 @@ class Environment(object): # allow paths to contain spack config/environment variables, etc. config_path = substitute_path_variables(config_path) + # strip file URL prefix, if needed, to avoid unnecessary remote + # config processing for local files + config_path = config_path.replace("file://", "") + + if not os.path.exists(config_path): + # Stage any remote configuration file(s) + if spack.util.url.is_url_format(config_path): + staged_configs = ( + os.listdir(self.config_stage_dir) + if os.path.exists(self.config_stage_dir) + else [] + ) + basename = os.path.basename(config_path) + if basename in staged_configs: + # Do NOT re-stage configuration files over existing + # ones with the same name since there is a risk of + # losing changes (e.g., from 'spack config update'). + tty.warn( + "Will not re-stage configuration from {0} to avoid " + "losing changes to the already staged file of the " + "same name.".format(config_path) + ) + + # Recognize the configuration stage directory + # is flattened to ensure a single copy of each + # configuration file. + config_path = self.config_stage_dir + if basename.endswith(".yaml"): + config_path = os.path.join(config_path, basename) + else: + staged_path = spack.config.fetch_remote_configs( + config_path, + self.config_stage_dir, + skip_existing=True, + ) + if not staged_path: + raise SpackEnvironmentError( + "Unable to fetch remote configuration {0}".format(config_path) + ) + config_path = staged_path + # treat relative paths as relative to the environment if not os.path.isabs(config_path): config_path = os.path.join(self.path, config_path) @@ -936,10 +981,14 @@ class Environment(object): if os.path.isdir(config_path): # directories are treated as regular ConfigScopes config_name = "env:%s:%s" % (self.name, os.path.basename(config_path)) + tty.debug("Creating ConfigScope {0} for '{1}'".format(config_name, config_path)) scope = spack.config.ConfigScope(config_name, config_path) elif os.path.exists(config_path): # files are assumed to be SingleFileScopes config_name = "env:%s:%s" % (self.name, config_path) + tty.debug( + "Creating SingleFileScope {0} for '{1}'".format(config_name, config_path) + ) scope = spack.config.SingleFileScope( config_name, config_path, spack.schema.merged.schema ) diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 18230d9ee5..319aed95ca 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -52,7 +52,7 @@ import spack.url import spack.util.crypto as crypto import spack.util.pattern as pattern import spack.util.url as url_util -import spack.util.web +import spack.util.web as web_util import spack.version from spack.util.compression import decompressor_for, extension from spack.util.executable import CommandNotFoundError, which @@ -337,7 +337,7 @@ class URLFetchStrategy(FetchStrategy): url = None errors = [] for url in self.candidate_urls: - if not self._existing_url(url): + if not web_util.url_exists(url, self.curl): continue try: @@ -352,30 +352,6 @@ class URLFetchStrategy(FetchStrategy): if not self.archive_file: raise FailedDownloadError(url) - def _existing_url(self, url): - tty.debug("Checking existence of {0}".format(url)) - - if spack.config.get("config:url_fetch_method") == "curl": - curl = self.curl - # Telling curl to fetch the first byte (-r 0-0) is supposed to be - # portable. - curl_args = ["--stderr", "-", "-s", "-f", "-r", "0-0", url] - if not spack.config.get("config:verify_ssl"): - curl_args.append("-k") - _ = curl(*curl_args, fail_on_error=False, output=os.devnull) - return curl.returncode == 0 - else: - # Telling urllib to check if url is accessible - try: - url, headers, response = spack.util.web.read_from_url(url) - except spack.util.web.SpackWebError as werr: - msg = "Urllib fetch failed to verify url\ - {0}\n with error {1}".format( - url, werr - ) - raise FailedDownloadError(url, msg) - return response.getcode() is None or response.getcode() == 200 - def _fetch_from_url(self, url): if spack.config.get("config:url_fetch_method") == "curl": return self._fetch_curl(url) @@ -397,8 +373,8 @@ class URLFetchStrategy(FetchStrategy): # Run urllib but grab the mime type from the http headers try: - url, headers, response = spack.util.web.read_from_url(url) - except spack.util.web.SpackWebError as e: + url, headers, response = web_util.read_from_url(url) + except web_util.SpackWebError as e: # clean up archive on failure. if self.archive_file: os.remove(self.archive_file) @@ -433,38 +409,19 @@ class URLFetchStrategy(FetchStrategy): else: save_args = ["-O"] - curl_args = save_args + [ - "-f", # fail on >400 errors - "-D", - "-", # print out HTML headers - "-L", # resolve 3xx redirects - url, - ] - - if not spack.config.get("config:verify_ssl"): - curl_args.append("-k") - - if sys.stdout.isatty() and tty.msg_enabled(): - curl_args.append("-#") # status bar when using a tty - else: - curl_args.append("-sS") # show errors if fail - - connect_timeout = spack.config.get("config:connect_timeout", 10) - + timeout = 0 + cookie_args = [] if self.extra_options: cookie = self.extra_options.get("cookie") if cookie: - curl_args.append("-j") # junk cookies - curl_args.append("-b") # specify cookie - curl_args.append(cookie) + cookie_args.append("-j") # junk cookies + cookie_args.append("-b") # specify cookie + cookie_args.append(cookie) timeout = self.extra_options.get("timeout") - if timeout: - connect_timeout = max(connect_timeout, int(timeout)) - if connect_timeout > 0: - # Timeout if can't establish a connection after n sec. - curl_args.extend(["--connect-timeout", str(connect_timeout)]) + base_args = web_util.base_curl_fetch_args(url, timeout) + curl_args = save_args + base_args + cookie_args # Run curl but grab the mime type from the http headers curl = self.curl @@ -479,26 +436,10 @@ class URLFetchStrategy(FetchStrategy): if partial_file and os.path.lexists(partial_file): os.remove(partial_file) - if curl.returncode == 22: - # This is a 404. Curl will print the error. - raise FailedDownloadError(url, "URL %s was not found!" % url) - - elif curl.returncode == 60: - # This is a certificate error. Suggest spack -k - raise FailedDownloadError( - url, - "Curl was unable to fetch due to invalid certificate. " - "This is either an attack, or your cluster's SSL " - "configuration is bad. If you believe your SSL " - "configuration is bad, you can try running spack -k, " - "which will not check SSL certificates." - "Use this at your own risk.", - ) - - else: - # This is some other curl error. Curl will print the - # error, but print a spack message too - raise FailedDownloadError(url, "Curl failed with error %d" % curl.returncode) + try: + web_util.check_curl_code(curl.returncode) + except web_util.FetchError as err: + raise spack.fetch_strategy.FailedDownloadError(url, str(err)) self._check_headers(headers) @@ -556,7 +497,7 @@ class URLFetchStrategy(FetchStrategy): if not self.archive_file: raise NoArchiveFileError("Cannot call archive() before fetching.") - spack.util.web.push_to_url(self.archive_file, destination, keep_original=True) + web_util.push_to_url(self.archive_file, destination, keep_original=True) @_needs_stage def check(self): @@ -1385,19 +1326,19 @@ class S3FetchStrategy(URLFetchStrategy): parsed_url = url_util.parse(self.url) if parsed_url.scheme != "s3": - raise FetchError("S3FetchStrategy can only fetch from s3:// urls.") + raise web_util.FetchError("S3FetchStrategy can only fetch from s3:// urls.") tty.debug("Fetching {0}".format(self.url)) basename = os.path.basename(parsed_url.path) with working_dir(self.stage.path): - _, headers, stream = spack.util.web.read_from_url(self.url) + _, headers, stream = web_util.read_from_url(self.url) with open(basename, "wb") as f: shutil.copyfileobj(stream, f) - content_type = spack.util.web.get_header(headers, "Content-type") + content_type = web_util.get_header(headers, "Content-type") if content_type == "text/html": warn_content_type_mismatch(self.archive_file or "the archive") @@ -1426,15 +1367,13 @@ class GCSFetchStrategy(URLFetchStrategy): @_needs_stage def fetch(self): - import spack.util.web as web_util - if self.archive_file: tty.debug("Already downloaded {0}".format(self.archive_file)) return parsed_url = url_util.parse(self.url) if parsed_url.scheme != "gs": - raise FetchError("GCSFetchStrategy can only fetch from gs:// urls.") + raise web_util.FetchError("GCSFetchStrategy can only fetch from gs:// urls.") tty.debug("Fetching {0}".format(self.url)) @@ -1489,7 +1428,7 @@ def from_kwargs(**kwargs): on attribute names (e.g., ``git``, ``hg``, etc.) Raises: - FetchError: If no ``fetch_strategy`` matches the args. + spack.util.web.FetchError: If no ``fetch_strategy`` matches the args. """ for fetcher in all_strategies: if fetcher.matches(kwargs): @@ -1586,7 +1525,7 @@ def for_package_version(pkg, version): # if it's a commit, we must use a GitFetchStrategy if isinstance(version, spack.version.GitVersion): if not hasattr(pkg, "git"): - raise FetchError( + raise web_util.FetchError( "Cannot fetch git version for %s. Package has no 'git' attribute" % pkg.name ) # Populate the version with comparisons to other commits @@ -1731,15 +1670,11 @@ class FsCache(object): shutil.rmtree(self.root, ignore_errors=True) -class FetchError(spack.error.SpackError): - """Superclass for fetcher errors.""" - - -class NoCacheError(FetchError): +class NoCacheError(web_util.FetchError): """Raised when there is no cached archive for a package.""" -class FailedDownloadError(FetchError): +class FailedDownloadError(web_util.FetchError): """Raised when a download fails.""" def __init__(self, url, msg=""): @@ -1747,23 +1682,23 @@ class FailedDownloadError(FetchError): self.url = url -class NoArchiveFileError(FetchError): - """ "Raised when an archive file is expected but none exists.""" +class NoArchiveFileError(web_util.FetchError): + """Raised when an archive file is expected but none exists.""" -class NoDigestError(FetchError): +class NoDigestError(web_util.FetchError): """Raised after attempt to checksum when URL has no digest.""" -class ExtrapolationError(FetchError): +class ExtrapolationError(web_util.FetchError): """Raised when we can't extrapolate a version for a package.""" -class FetcherConflict(FetchError): +class FetcherConflict(web_util.FetchError): """Raised for packages with invalid fetch attributes.""" -class InvalidArgsError(FetchError): +class InvalidArgsError(web_util.FetchError): """Raised when a version can't be deduced from a set of arguments.""" def __init__(self, pkg=None, version=None, **args): @@ -1776,11 +1711,11 @@ class InvalidArgsError(FetchError): super(InvalidArgsError, self).__init__(msg, long_msg) -class ChecksumError(FetchError): +class ChecksumError(web_util.FetchError): """Raised when archive fails to checksum.""" -class NoStageError(FetchError): +class NoStageError(web_util.FetchError): """Raised when fetch operations are called before set_stage().""" def __init__(self, method): diff --git a/lib/spack/spack/package_base.py b/lib/spack/spack/package_base.py index 81801df9a5..401303e949 100644 --- a/lib/spack/spack/package_base.py +++ b/lib/spack/spack/package_base.py @@ -63,6 +63,7 @@ from spack.stage import ResourceStage, Stage, StageComposite, stage_prefix from spack.util.executable import ProcessError, which from spack.util.package_hash import package_hash from spack.util.prefix import Prefix +from spack.util.web import FetchError from spack.version import GitVersion, Version, VersionBase if sys.version_info[0] >= 3: @@ -3022,13 +3023,6 @@ def possible_dependencies(*pkg_or_spec, **kwargs): return visited -class FetchError(spack.error.SpackError): - """Raised when something goes wrong during fetch.""" - - def __init__(self, message, long_msg=None): - super(FetchError, self).__init__(message, long_msg) - - class PackageStillNeededError(InstallError): """Raised when package is still needed by another on uninstall.""" diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index bff07b44df..62e05d504e 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -42,6 +42,7 @@ import spack.util.path as sup import spack.util.pattern as pattern import spack.util.url as url_util from spack.util.crypto import bit_length, prefix_bits +from spack.util.web import FetchError # The well-known stage source subdirectory name. _source_path_subdir = "spack-src" @@ -529,7 +530,7 @@ class Stage(object): self.fetcher = self.default_fetcher default_msg = "All fetchers failed for {0}".format(self.name) - raise fs.FetchError(err_msg or default_msg, None) + raise FetchError(err_msg or default_msg, None) print_errors(errors) diff --git a/lib/spack/spack/test/bootstrap.py b/lib/spack/spack/test/bootstrap.py index aeacf99d76..06e9e5f2bc 100644 --- a/lib/spack/spack/test/bootstrap.py +++ b/lib/spack/spack/test/bootstrap.py @@ -134,7 +134,7 @@ def test_config_yaml_is_preserved_during_bootstrap(mutable_config): @pytest.mark.regression("26548") -def test_custom_store_in_environment(mutable_config, tmpdir): +def test_bootstrap_custom_store_in_environment(mutable_config, tmpdir): # Test that the custom store in an environment is taken into account # during bootstrapping spack_yaml = tmpdir.join("spack.yaml") diff --git a/lib/spack/spack/test/cmd/env.py b/lib/spack/spack/test/cmd/env.py index 4bb7203be7..d849931dea 100644 --- a/lib/spack/spack/test/cmd/env.py +++ b/lib/spack/spack/test/cmd/env.py @@ -29,6 +29,7 @@ from spack.stage import stage_prefix from spack.util.executable import Executable from spack.util.mock_package import MockPackageMultiRepo from spack.util.path import substitute_path_variables +from spack.util.web import FetchError from spack.version import Version # TODO-27021 @@ -656,7 +657,7 @@ env: assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) -def test_with_config_bad_include(capfd): +def test_with_config_bad_include(): env_name = "test_bad_include" test_config = """\ spack: @@ -667,15 +668,14 @@ spack: _env_create(env_name, StringIO(test_config)) e = ev.read(env_name) - with pytest.raises(SystemExit): + with pytest.raises(spack.config.ConfigFileError) as exc: with e: e.concretize() - out, err = capfd.readouterr() + err = str(exc) + assert "not retrieve configuration" in err + assert os.path.join("no", "such", "directory") in err - assert "missing include" in err - assert "/no/such/directory" in err - assert os.path.join("no", "such", "file.yaml") in err assert ev.active_environment() is None @@ -721,25 +721,44 @@ def test_env_with_include_config_files_same_basename(): assert environment_specs[1].satisfies("mpileaks@2.2") -def test_env_with_included_config_file(): - test_config = """\ +@pytest.fixture(scope="function") +def packages_file(tmpdir): + """Return the path to the packages configuration file.""" + raw_yaml = """ +packages: + mpileaks: + version: [2.2] +""" + filename = tmpdir.ensure("testconfig", "packages.yaml") + filename.write(raw_yaml) + yield filename + + +def mpileaks_env_config(include_path): + """Return the contents of an environment that includes the provided + path and lists mpileaks as the sole spec.""" + return """\ env: include: - - ./included-config.yaml + - {0} specs: - mpileaks -""" +""".format( + include_path + ) + + +def test_env_with_included_config_file(packages_file): + """Test inclusion of a relative packages configuration file added to an + existing environment.""" + include_filename = "included-config.yaml" + test_config = mpileaks_env_config(os.path.join(".", include_filename)) + _env_create("test", StringIO(test_config)) e = ev.read("test") - with open(os.path.join(e.path, "included-config.yaml"), "w") as f: - f.write( - """\ -packages: - mpileaks: - version: [2.2] -""" - ) + included_path = os.path.join(e.path, include_filename) + fs.rename(packages_file.strpath, included_path) with e: e.concretize() @@ -747,65 +766,81 @@ packages: assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) -def test_env_with_included_config_scope(): +def test_env_with_included_config_file_url(tmpdir, mutable_empty_config, packages_file): + """Test configuration inclusion of a file whose path is a URL before + the environment is concretized.""" + + spack_yaml = tmpdir.join("spack.yaml") + with spack_yaml.open("w") as f: + f.write("spack:\n include:\n - file://{0}\n".format(packages_file)) + + env = ev.Environment(tmpdir.strpath) + ev.activate(env) + scopes = env.included_config_scopes() + assert len(scopes) == 1 + + cfg = spack.config.get("packages") + assert cfg["mpileaks"]["version"] == [2.2] + + +def test_env_with_included_config_missing_file(tmpdir, mutable_empty_config): + """Test inclusion of a missing configuration file raises FetchError + noting missing file.""" + + spack_yaml = tmpdir.join("spack.yaml") + missing_file = tmpdir.join("packages.yaml") + with spack_yaml.open("w") as f: + f.write("spack:\n include:\n - {0}\n".format(missing_file.strpath)) + + env = ev.Environment(tmpdir.strpath) + with pytest.raises(FetchError, match="No such file or directory"): + ev.activate(env) + + +def test_env_with_included_config_scope(tmpdir, packages_file): + """Test inclusion of a package file from the environment's configuration + stage directory. This test is intended to represent a case where a remote + file has already been staged.""" config_scope_path = os.path.join(ev.root("test"), "config") - test_config = ( - """\ -env: - include: - - %s - specs: - - mpileaks -""" - % config_scope_path - ) + # Configure the environment to include file(s) from the environment's + # remote configuration stage directory. + test_config = mpileaks_env_config(config_scope_path) + + # Create the environment _env_create("test", StringIO(test_config)) e = ev.read("test") + # Copy the packages.yaml file to the environment configuration + # directory so it is picked up during concretization. (Using + # copy instead of rename in case the fixture scope changes.) fs.mkdirp(config_scope_path) - with open(os.path.join(config_scope_path, "packages.yaml"), "w") as f: - f.write( - """\ -packages: - mpileaks: - version: [2.2] -""" - ) + include_filename = os.path.basename(packages_file.strpath) + included_path = os.path.join(config_scope_path, include_filename) + fs.copy(packages_file.strpath, included_path) + # Ensure the concretized environment reflects contents of the + # packages.yaml file. with e: e.concretize() assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) -def test_env_with_included_config_var_path(): +def test_env_with_included_config_var_path(packages_file): + """Test inclusion of a package configuration file with path variables + "staged" in the environment's configuration stage directory.""" config_var_path = os.path.join("$tempdir", "included-config.yaml") - test_config = ( - """\ -env: - include: - - %s - specs: - - mpileaks -""" - % config_var_path - ) + test_config = mpileaks_env_config(config_var_path) _env_create("test", StringIO(test_config)) e = ev.read("test") config_real_path = substitute_path_variables(config_var_path) fs.mkdirp(os.path.dirname(config_real_path)) - with open(config_real_path, "w") as f: - f.write( - """\ -packages: - mpileaks: - version: [2.2] -""" - ) + fs.rename(packages_file.strpath, config_real_path) + assert os.path.exists(config_real_path) with e: e.concretize() @@ -3030,3 +3065,24 @@ def test_unify_when_possible_works_around_conflicts(): assert len([x for x in e.all_specs() if x.satisfies("mpileaks+opt")]) == 1 assert len([x for x in e.all_specs() if x.satisfies("mpileaks~opt")]) == 1 assert len([x for x in e.all_specs() if x.satisfies("mpich")]) == 1 + + +def test_env_include_packages_url( + tmpdir, mutable_empty_config, mock_spider_configs, mock_curl_configs +): + """Test inclusion of a (GitHub) URL.""" + develop_url = "https://github.com/fake/fake/blob/develop/" + default_packages = develop_url + "etc/fake/defaults/packages.yaml" + spack_yaml = tmpdir.join("spack.yaml") + with spack_yaml.open("w") as f: + f.write("spack:\n include:\n - {0}\n".format(default_packages)) + assert os.path.isfile(spack_yaml.strpath) + + with spack.config.override("config:url_fetch_method", "curl"): + env = ev.Environment(tmpdir.strpath) + ev.activate(env) + scopes = env.included_config_scopes() + assert len(scopes) == 1 + + cfg = spack.config.get("packages") + assert "openmpi" in cfg["all"]["providers"]["mpi"] diff --git a/lib/spack/spack/test/cmd/spec.py b/lib/spack/spack/test/cmd/spec.py index d604bc4515..8e24a7dfee 100644 --- a/lib/spack/spack/test/cmd/spec.py +++ b/lib/spack/spack/test/cmd/spec.py @@ -12,8 +12,8 @@ import spack.environment as ev import spack.error import spack.spec import spack.store -from spack.fetch_strategy import FetchError from spack.main import SpackCommand, SpackCommandError +from spack.util.web import FetchError pytestmark = pytest.mark.usefixtures("config", "mutable_mock_repo") diff --git a/lib/spack/spack/test/config.py b/lib/spack/spack/test/config.py index 5ecd761e24..f4d0b570fd 100644 --- a/lib/spack/spack/test/config.py +++ b/lib/spack/spack/test/config.py @@ -12,7 +12,8 @@ import tempfile import pytest from six import StringIO -from llnl.util.filesystem import getuid, mkdirp, touch +import llnl.util.tty as tty +from llnl.util.filesystem import getuid, join_path, mkdirp, touch, touchp import spack.config import spack.environment as ev @@ -828,9 +829,9 @@ def test_bad_config_section(mock_low_high_config): spack.config.get("foobar") -@pytest.mark.skipif(sys.platform == "win32", reason="Not supported on Windows (yet)") +@pytest.mark.skipif(sys.platform == "win32", reason="chmod not supported on Windows") @pytest.mark.skipif(getuid() == 0, reason="user is root") -def test_bad_command_line_scopes(tmpdir, mock_low_high_config): +def test_bad_command_line_scopes(tmpdir, config): cfg = spack.config.Configuration() with tmpdir.as_cwd(): @@ -1265,3 +1266,168 @@ def test_user_cache_path_is_overridable(working_env): def test_user_cache_path_is_default_when_env_var_is_empty(working_env): os.environ["SPACK_USER_CACHE_PATH"] = "" assert os.path.expanduser("~%s.spack" % os.sep) == spack.paths._get_user_cache_path() + + +github_url = "https://github.com/fake/fake/{0}/develop" +gitlab_url = "https://gitlab.fake.io/user/repo/-/blob/config/defaults" + + +@pytest.mark.parametrize( + "url,isfile", + [ + (github_url.format("tree"), False), + ("{0}/README.md".format(github_url.format("blob")), True), + ("{0}/etc/fake/defaults/packages.yaml".format(github_url.format("blob")), True), + (gitlab_url, False), + (None, False), + ], +) +def test_config_collect_urls(mutable_empty_config, mock_spider_configs, url, isfile): + with spack.config.override("config:url_fetch_method", "curl"): + urls = spack.config.collect_urls(url) + if url: + if isfile: + expected = 1 if url.endswith(".yaml") else 0 + assert len(urls) == expected + else: + # Expect multiple configuration files for a "directory" + assert len(urls) > 1 + else: + assert not urls + + +@pytest.mark.parametrize( + "url,isfile,fail", + [ + (github_url.format("tree"), False, False), + (gitlab_url, False, False), + ("{0}/README.md".format(github_url.format("blob")), True, True), + ("{0}/compilers.yaml".format(gitlab_url), True, False), + (None, False, True), + ], +) +def test_config_fetch_remote_configs( + tmpdir, + mutable_empty_config, + mock_collect_urls, + mock_curl_configs, + url, + isfile, + fail, +): + def _has_content(filename): + # The first element of all configuration files for this test happen to + # be the basename of the file so this check leverages that feature. If + # that changes, then this check will need to change accordingly. + element = "{0}:".format(os.path.splitext(os.path.basename(filename))[0]) + with open(filename, "r") as fd: + for line in fd: + if element in line: + return True + tty.debug("Expected {0} in '{1}'".format(element, filename)) + return False + + dest_dir = join_path(tmpdir.strpath, "defaults") + if fail: + msg = "Cannot retrieve configuration" + with spack.config.override("config:url_fetch_method", "curl"): + with pytest.raises(spack.config.ConfigFileError, match=msg): + spack.config.fetch_remote_configs(url, dest_dir) + else: + with spack.config.override("config:url_fetch_method", "curl"): + path = spack.config.fetch_remote_configs(url, dest_dir) + assert os.path.exists(path) + if isfile: + # Ensure correct file is "fetched" + assert os.path.basename(path) == os.path.basename(url) + # Ensure contents of the file has expected config element + assert _has_content(path) + else: + for filename in os.listdir(path): + assert _has_content(join_path(path, filename)) + + +@pytest.fixture(scope="function") +def mock_collect_urls(mock_config_data, monkeypatch): + """Mock the collection of URLs to avoid mocking spider.""" + + _, config_files = mock_config_data + + def _collect(base_url): + if not base_url: + return [] + + ext = os.path.splitext(base_url)[1] + if ext: + return [base_url] if ext == ".yaml" else [] + + return [join_path(base_url, f) for f in config_files] + + monkeypatch.setattr(spack.config, "collect_urls", _collect) + + yield + + +@pytest.mark.parametrize( + "url,skip", + [ + (github_url.format("tree"), True), + ("{0}/compilers.yaml".format(gitlab_url), True), + ], +) +def test_config_fetch_remote_configs_skip( + tmpdir, mutable_empty_config, mock_collect_urls, mock_curl_configs, url, skip +): + """Ensure skip fetching remote config file if it already exists when + required and not skipping if replacing it.""" + + def check_contents(filename, expected): + with open(filename, "r") as fd: + lines = fd.readlines() + if expected: + assert lines[0] == "compilers:" + else: + assert not lines + + dest_dir = join_path(tmpdir.strpath, "defaults") + filename = "compilers.yaml" + + # Create a stage directory with an empty configuration file + path = join_path(dest_dir, filename) + touchp(path) + + # Do NOT replace the existing cached configuration file if skipping + expected = None if skip else "compilers:" + + with spack.config.override("config:url_fetch_method", "curl"): + path = spack.config.fetch_remote_configs(url, dest_dir, skip) + result_filename = path if path.endswith(".yaml") else join_path(path, filename) + check_contents(result_filename, expected) + + +def test_config_file_dir_failure(tmpdir, mutable_empty_config): + with pytest.raises(spack.config.ConfigFileError, match="not a file"): + spack.config.read_config_file(tmpdir.strpath) + + +@pytest.mark.skipif(sys.platform == "win32", reason="chmod not supported on Windows") +def test_config_file_read_perms_failure(tmpdir, mutable_empty_config): + """Test reading a configuration file without permissions to ensure + ConfigFileError is raised.""" + filename = join_path(tmpdir.strpath, "test.yaml") + touch(filename) + os.chmod(filename, 0o200) + + with pytest.raises(spack.config.ConfigFileError, match="not readable"): + spack.config.read_config_file(filename) + + +def test_config_file_read_invalid_yaml(tmpdir, mutable_empty_config): + """Test reading a configuration file with invalid (unparseable) YAML + raises a ConfigFileError.""" + filename = join_path(tmpdir.strpath, "test.yaml") + with open(filename, "w") as f: + f.write("spack:\nview") + + with pytest.raises(spack.config.ConfigFileError, match="parsing yaml"): + spack.config.read_config_file(filename) diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index d7bdfe6ac8..ed8dc0f9bd 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -26,6 +26,7 @@ import archspec.cpu.microarchitecture import archspec.cpu.schema import llnl.util.lang +import llnl.util.tty as tty from llnl.util.filesystem import mkdirp, remove_linked_tree, working_dir import spack.binary_distribution @@ -47,8 +48,9 @@ import spack.test.cray_manifest import spack.util.executable import spack.util.gpg import spack.util.spack_yaml as syaml -from spack.fetch_strategy import FetchError, FetchStrategyComposite, URLFetchStrategy +from spack.fetch_strategy import FetchStrategyComposite, URLFetchStrategy from spack.util.pattern import Bunch +from spack.util.web import FetchError is_windows = sys.platform == "win32" @@ -1684,3 +1686,82 @@ def noncyclical_dir_structure(tmpdir): with open(j("file_3"), "wb"): pass yield d + + +@pytest.fixture(scope="function") +def mock_config_data(): + config_data_dir = os.path.join(spack.paths.test_path, "data", "config") + return config_data_dir, os.listdir(config_data_dir) + + +@pytest.fixture(scope="function") +def mock_curl_configs(mock_config_data, monkeypatch): + """ + Mock curl-based retrieval of configuration files from the web by grabbing + them from the test data configuration directory. + + Fetches a single (configuration) file if the name matches one in the test + data directory. + """ + config_data_dir, config_files = mock_config_data + + class MockCurl(object): + def __init__(self): + self.returncode = None + + def __call__(self, *args, **kwargs): + url = [a for a in args if a.startswith("http")][0] + basename = os.path.basename(url) + if os.path.splitext(url)[1]: + if basename in config_files: + filename = os.path.join(config_data_dir, basename) + + with open(filename, "r") as f: + lines = f.readlines() + write_file(os.path.basename(filename), "".join(lines)) + + self.returncode = 0 + else: + # This is a "404" and is technically only returned if -f + # flag is provided to curl. + tty.msg("curl: (22) The requested URL returned error: 404") + self.returncode = 22 + + def mock_curl(*args): + return MockCurl() + + monkeypatch.setattr(spack.util.web, "_curl", mock_curl) + + yield + + +@pytest.fixture(scope="function") +def mock_spider_configs(mock_config_data, monkeypatch): + """ + Mock retrieval of configuration file URLs from the web by grabbing + them from the test data configuration directory. + """ + config_data_dir, config_files = mock_config_data + + def _spider(*args, **kwargs): + root_urls = args[0] + if not root_urls: + return [], set() + + root_urls = [root_urls] if isinstance(root_urls, str) else root_urls + + # Any URL with an extension will be treated like a file; otherwise, + # it is considered a directory/folder and we'll grab all available + # files. + urls = [] + for url in root_urls: + if os.path.splitext(url)[1]: + urls.append(url) + else: + urls.extend([os.path.join(url, f) for f in config_files]) + + return [], set(urls) + + monkeypatch.setattr(spack.util.web, "spider", _spider) + + yield diff --git a/lib/spack/spack/test/gcs_fetch.py b/lib/spack/spack/test/gcs_fetch.py index 90657d4693..a253d9e0e8 100644 --- a/lib/spack/spack/test/gcs_fetch.py +++ b/lib/spack/spack/test/gcs_fetch.py @@ -10,6 +10,7 @@ import pytest import spack.config import spack.fetch_strategy import spack.stage +from spack.util.web import FetchError @pytest.mark.parametrize("_fetch_method", ["curl", "urllib"]) @@ -32,7 +33,7 @@ def test_gcsfetchstrategy_bad_url(tmpdir, _fetch_method): with spack.stage.Stage(fetcher, path=testpath) as stage: assert stage is not None assert fetcher.archive_file is None - with pytest.raises(spack.fetch_strategy.FetchError): + with pytest.raises(FetchError): fetcher.fetch() diff --git a/lib/spack/spack/test/packaging.py b/lib/spack/spack/test/packaging.py index c84a0e1c68..f0d1c240e8 100644 --- a/lib/spack/spack/test/packaging.py +++ b/lib/spack/spack/test/packaging.py @@ -573,7 +573,7 @@ def test_manual_download(install_mockery, mock_download, monkeypatch, manual, in monkeypatch.setattr(spack.package_base.PackageBase, "download_instr", _instr) expected = pkg.download_instr if manual else "All fetchers failed" - with pytest.raises(spack.fetch_strategy.FetchError, match=expected): + with pytest.raises(spack.util.web.FetchError, match=expected): pkg.do_fetch() diff --git a/lib/spack/spack/test/s3_fetch.py b/lib/spack/spack/test/s3_fetch.py index 4c14390112..cca91b431e 100644 --- a/lib/spack/spack/test/s3_fetch.py +++ b/lib/spack/spack/test/s3_fetch.py @@ -10,6 +10,7 @@ import pytest import spack.config as spack_config import spack.fetch_strategy as spack_fs import spack.stage as spack_stage +from spack.util.web import FetchError @pytest.mark.parametrize("_fetch_method", ["curl", "urllib"]) @@ -32,7 +33,7 @@ def test_s3fetchstrategy_bad_url(tmpdir, _fetch_method): with spack_stage.Stage(fetcher, path=testpath) as stage: assert stage is not None assert fetcher.archive_file is None - with pytest.raises(spack_fs.FetchError): + with pytest.raises(FetchError): fetcher.fetch() diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py index 44294ff6bd..ac445c373f 100644 --- a/lib/spack/spack/test/stage.py +++ b/lib/spack/spack/test/stage.py @@ -22,6 +22,7 @@ import spack.util.executable from spack.resource import Resource from spack.stage import DIYStage, ResourceStage, Stage, StageComposite from spack.util.path import canonicalize_path +from spack.util.web import FetchError # The following values are used for common fetch and stage mocking fixtures: _archive_base = "test-files" @@ -525,7 +526,7 @@ class TestStage(object): with stage: try: stage.fetch(mirror_only=True) - except spack.fetch_strategy.FetchError: + except FetchError: pass check_destroy(stage, self.stage_name) @@ -540,7 +541,7 @@ class TestStage(object): stage = Stage(failing_fetch_strategy, name=self.stage_name, search_fn=search_fn) with stage: - with pytest.raises(spack.fetch_strategy.FetchError, match=expected): + with pytest.raises(FetchError, match=expected): stage.fetch(mirror_only=False, err_msg=err_msg) check_destroy(stage, self.stage_name) diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py index 2703700233..da13611585 100644 --- a/lib/spack/spack/test/url_fetch.py +++ b/lib/spack/spack/test/url_fetch.py @@ -17,6 +17,7 @@ import spack.fetch_strategy as fs import spack.repo import spack.util.crypto as crypto import spack.util.executable +import spack.util.web as web_util from spack.spec import Spec from spack.stage import Stage from spack.util.executable import which @@ -328,9 +329,8 @@ def test_missing_curl(tmpdir, monkeypatch): err_msg = err_fmt.format(args[0]) raise spack.util.executable.CommandNotFoundError(err_msg) - # Patching the 'which' symbol imported by fetch_strategy works - # since it is too late in import processing to patch the defining - # (spack.util.executable) module's symbol. + # Patching the 'which' symbol imported by fetch_strategy needed due + # to 'from spack.util.executable import which' in this module. monkeypatch.setattr(fs, "which", _which) testpath = str(tmpdir) @@ -342,3 +342,79 @@ def test_missing_curl(tmpdir, monkeypatch): with Stage(fetcher, path=testpath) as stage: out = stage.fetch() assert err_fmt.format("curl") in out + + +def test_url_fetch_text_without_url(tmpdir): + with pytest.raises(web_util.FetchError, match="URL is required"): + web_util.fetch_url_text(None) + + +def test_url_fetch_text_curl_failures(tmpdir, monkeypatch): + """Check fetch_url_text if URL's curl is missing.""" + err_fmt = "No such command {0}" + + def _which(*args, **kwargs): + err_msg = err_fmt.format(args[0]) + raise spack.util.executable.CommandNotFoundError(err_msg) + + # Patching the 'which' symbol imported by spack.util.web needed due + # to 'from spack.util.executable import which' in this module. + monkeypatch.setattr(spack.util.web, "which", _which) + + with spack.config.override("config:url_fetch_method", "curl"): + with pytest.raises(web_util.FetchError, match="Missing required curl"): + web_util.fetch_url_text("https://github.com/") + + +def test_url_check_curl_errors(): + """Check that standard curl error returncodes raise expected errors.""" + # Check returncode 22 (i.e., 404) + with pytest.raises(web_util.FetchError, match="not found"): + web_util.check_curl_code(22) + + # Check returncode 60 (certificate error) + with pytest.raises(web_util.FetchError, match="invalid certificate"): + web_util.check_curl_code(60) + + +def test_url_missing_curl(tmpdir, monkeypatch): + """Check url_exists failures if URL's curl is missing.""" + err_fmt = "No such command {0}" + + def _which(*args, **kwargs): + err_msg = err_fmt.format(args[0]) + raise spack.util.executable.CommandNotFoundError(err_msg) + + # Patching the 'which' symbol imported by spack.util.web needed due + # to 'from spack.util.executable import which' in this module. + monkeypatch.setattr(spack.util.web, "which", _which) + + with spack.config.override("config:url_fetch_method", "curl"): + with pytest.raises(web_util.FetchError, match="Missing required curl"): + web_util.url_exists("https://github.com/") + + +def test_url_fetch_text_urllib_bad_returncode(tmpdir, monkeypatch): + class response(object): + def getcode(self): + return 404 + + def _read_from_url(*args, **kwargs): + return None, None, response() + + monkeypatch.setattr(spack.util.web, "read_from_url", _read_from_url) + + with spack.config.override("config:url_fetch_method", "urllib"): + with pytest.raises(web_util.FetchError, match="failed with error code"): + web_util.fetch_url_text("https://github.com/") + + +def test_url_fetch_text_urllib_web_error(tmpdir, monkeypatch): + def _raise_web_error(*args, **kwargs): + raise web_util.SpackWebError("bad url") + + monkeypatch.setattr(spack.util.web, "read_from_url", _raise_web_error) + + with spack.config.override("config:url_fetch_method", "urllib"): + with pytest.raises(web_util.FetchError, match="fetch failed to verify"): + web_util.fetch_url_text("https://github.com/") diff --git a/lib/spack/spack/util/path.py b/lib/spack/spack/util/path.py index 3413f48c76..981a6b672d 100644 --- a/lib/spack/spack/util/path.py +++ b/lib/spack/spack/util/path.py @@ -306,7 +306,14 @@ def add_padding(path, length): def canonicalize_path(path): - """Same as substitute_path_variables, but also take absolute path.""" + """Same as substitute_path_variables, but also take absolute path. + + Arguments: + path (str): path being converted as needed + + Returns: + (str): An absolute path with path variable substitution + """ # Get file in which path was written in case we need to make it absolute # relative to that path. filename = None diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py index 5ff0a53fa7..c9c7343585 100644 --- a/lib/spack/spack/util/url.py +++ b/lib/spack/spack/util/url.py @@ -12,7 +12,7 @@ import posixpath import re import sys -import six.moves.urllib.parse as urllib_parse +import six.moves.urllib.parse from six import string_types from spack.util.path import ( @@ -67,11 +67,18 @@ def local_file_path(url): def parse(url, scheme="file"): """Parse a url. - For file:// URLs, the netloc and path components are concatenated and - passed through spack.util.path.canoncalize_path(). + Path variable substitution is performed on file URLs as needed. The + variables are documented at + https://spack.readthedocs.io/en/latest/configuration.html#spack-specific-variables. - Otherwise, the returned value is the same as urllib's urlparse() with - allow_fragments=False. + Arguments: + url (str): URL to be parsed + scheme (str): associated URL scheme + Returns: + (six.moves.urllib.parse.ParseResult): For file scheme URLs, the + netloc and path components are concatenated and passed through + spack.util.path.canoncalize_path(). Otherwise, the returned value + is the same as urllib's urlparse() with allow_fragments=False. """ # guarantee a value passed in is of proper url format. Guarantee # allows for easier string manipulation accross platforms @@ -79,7 +86,11 @@ def parse(url, scheme="file"): require_url_format(url) url = escape_file_url(url) url_obj = ( - urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False) + six.moves.urllib.parse.urlparse( + url, + scheme=scheme, + allow_fragments=False, + ) if isinstance(url, string_types) else url ) @@ -108,8 +119,13 @@ def parse(url, scheme="file"): if sys.platform == "win32": path = convert_to_posix_path(path) - return urllib_parse.ParseResult( - scheme=scheme, netloc=netloc, path=path, params=params, query=query, fragment=None + return six.moves.urllib.parse.ParseResult( + scheme=scheme, + netloc=netloc, + path=path, + params=params, + query=query, + fragment=None, ) @@ -188,7 +204,11 @@ def join(base_url, path, *extra, **kwargs): last_abs_component = None scheme = "" for i in range(n - 1, -1, -1): - obj = urllib_parse.urlparse(paths[i], scheme="", allow_fragments=False) + obj = six.moves.urllib.parse.urlparse( + paths[i], + scheme="", + allow_fragments=False, + ) scheme = obj.scheme @@ -198,7 +218,11 @@ def join(base_url, path, *extra, **kwargs): # Without a scheme, we have to go back looking for the # next-last component that specifies a scheme. for j in range(i - 1, -1, -1): - obj = urllib_parse.urlparse(paths[j], scheme="", allow_fragments=False) + obj = six.moves.urllib.parse.urlparse( + paths[j], + scheme="", + allow_fragments=False, + ) if obj.scheme: paths[i] = "{SM}://{NL}{PATH}".format( @@ -214,13 +238,17 @@ def join(base_url, path, *extra, **kwargs): if last_abs_component is not None: paths = paths[last_abs_component:] if len(paths) == 1: - result = urllib_parse.urlparse(paths[0], scheme="file", allow_fragments=False) + result = six.moves.urllib.parse.urlparse( + paths[0], + scheme="file", + allow_fragments=False, + ) # another subtlety: If the last argument to join() is an absolute # file:// URL component with a relative path, the relative path # needs to be resolved. if result.scheme == "file" and result.netloc: - result = urllib_parse.ParseResult( + result = six.moves.urllib.parse.ParseResult( scheme=result.scheme, netloc="", path=posixpath.abspath(result.netloc + result.path), @@ -278,8 +306,13 @@ def _join(base_url, path, *extra, **kwargs): base_path = convert_to_posix_path(base_path) return format( - urllib_parse.ParseResult( - scheme=scheme, netloc=netloc, path=base_path, params=params, query=query, fragment=None + six.moves.urllib.parse.ParseResult( + scheme=scheme, + netloc=netloc, + path=base_path, + params=params, + query=query, + fragment=None, ) ) @@ -337,9 +370,12 @@ def parse_git_url(url): return (scheme, user, hostname, port, path) +def is_url_format(url): + return re.search(r"^(file://|http://|https://|ftp://|s3://|gs://|ssh://|git://|/)", url) + + def require_url_format(url): - ut = re.search(r"^(file://|http://|https://|ftp://|s3://|gs://|ssh://|git://|/)", url) - if not ut: + if not is_url_format(url): raise ValueError("Invalid url format from url: %s" % url) diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index a5c96e23da..202cb8a504 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -22,7 +22,7 @@ from six.moves.urllib.request import Request, urlopen import llnl.util.lang import llnl.util.tty as tty -from llnl.util.filesystem import mkdirp, rename +from llnl.util.filesystem import mkdirp, rename, working_dir import spack import spack.config @@ -33,6 +33,7 @@ import spack.util.gcs as gcs_util import spack.util.s3 as s3_util import spack.util.url as url_util from spack.util.compression import ALLOWED_ARCHIVE_TYPES +from spack.util.executable import CommandNotFoundError, which from spack.util.path import convert_to_posix_path #: User-Agent used in Request objects @@ -224,30 +225,222 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non ) -def url_exists(url): - url = url_util.parse(url) - local_path = url_util.local_file_path(url) +def base_curl_fetch_args(url, timeout=0): + """Return the basic fetch arguments typically used in calls to curl. + + The arguments include those for ensuring behaviors such as failing on + errors for codes over 400, printing HTML headers, resolving 3xx redirects, + status or failure handling, and connection timeouts. + + It also uses the following configuration option to set an additional + argument as needed: + + * config:connect_timeout (int): connection timeout + * config:verify_ssl (str): Perform SSL verification + + Arguments: + url (str): URL whose contents will be fetched + timeout (int): Connection timeout, which is only used if higher than + config:connect_timeout + + Returns (list): list of argument strings + """ + curl_args = [ + "-f", # fail on >400 errors + "-D", + "-", # "-D -" prints out HTML headers + "-L", # resolve 3xx redirects + url, + ] + if not spack.config.get("config:verify_ssl"): + curl_args.append("-k") + + if sys.stdout.isatty() and tty.msg_enabled(): + curl_args.append("-#") # status bar when using a tty + else: + curl_args.append("-sS") # show errors if fail + + connect_timeout = spack.config.get("config:connect_timeout", 10) + if timeout: + connect_timeout = max(int(connect_timeout), int(timeout)) + if connect_timeout > 0: + curl_args.extend(["--connect-timeout", str(connect_timeout)]) + + return curl_args + + +def check_curl_code(returncode): + """Check standard return code failures for provided arguments. + + Arguments: + returncode (int): curl return code + + Raises FetchError if the curl returncode indicates failure + """ + if returncode != 0: + if returncode == 22: + # This is a 404. Curl will print the error. + raise FetchError("URL was not found!") + + if returncode == 60: + # This is a certificate error. Suggest spack -k + raise FetchError( + "Curl was unable to fetch due to invalid certificate. " + "This is either an attack, or your cluster's SSL " + "configuration is bad. If you believe your SSL " + "configuration is bad, you can try running spack -k, " + "which will not check SSL certificates." + "Use this at your own risk." + ) + + raise FetchError("Curl failed with error {0}".format(returncode)) + + +def _curl(curl=None): + if not curl: + try: + curl = which("curl", required=True) + except CommandNotFoundError as exc: + tty.error(str(exc)) + raise FetchError("Missing required curl fetch method") + return curl + + +def fetch_url_text(url, curl=None, dest_dir="."): + """Retrieves text-only URL content using the configured fetch method. + It determines the fetch method from: + + * config:url_fetch_method (str): fetch method to use (e.g., 'curl') + + If the method is `curl`, it also uses the following configuration + options: + + * config:connect_timeout (int): connection time out + * config:verify_ssl (str): Perform SSL verification + + Arguments: + url (str): URL whose contents are to be fetched + curl (spack.util.executable.Executable or None): (optional) curl + executable if curl is the configured fetch method + dest_dir (str): (optional) destination directory for fetched text + file + + Returns (str or None): path to the fetched file + + Raises FetchError if the curl returncode indicates failure + """ + if not url: + raise FetchError("A URL is required to fetch its text") + + tty.debug("Fetching text at {0}".format(url)) + + filename = os.path.basename(url) + path = os.path.join(dest_dir, filename) + + fetch_method = spack.config.get("config:url_fetch_method") + tty.debug("Using '{0}' to fetch {1} into {2}".format(fetch_method, url, path)) + if fetch_method == "curl": + curl_exe = _curl(curl) + if not curl_exe: + raise FetchError("Missing required fetch method (curl)") + + curl_args = ["-O"] + curl_args.extend(base_curl_fetch_args(url)) + + # Curl automatically downloads file contents as filename + with working_dir(dest_dir, create=True): + _ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull) + check_curl_code(curl_exe.returncode) + + return path + + else: + try: + _, _, response = read_from_url(url) + + returncode = response.getcode() + if returncode and returncode != 200: + raise FetchError("Urllib failed with error code {0}".format(returncode)) + + output = codecs.getreader("utf-8")(response).read() + if output: + with working_dir(dest_dir, create=True): + with open(filename, "w") as f: + f.write(output) + + return path + + except SpackWebError as err: + raise FetchError("Urllib fetch failed to verify url: {0}".format(str(err))) + + return None + + +def url_exists(url, curl=None): + """Determines whether url exists. + + A scheme-specific process is used for Google Storage (`gs`) and Amazon + Simple Storage Service (`s3`) URLs; otherwise, the configured fetch + method defined by `config:url_fetch_method` is used. + + If the method is `curl`, it also uses the following configuration option: + + * config:verify_ssl (str): Perform SSL verification + + Otherwise, `urllib` will be used. + + Arguments: + url (str): URL whose existence is being checked + curl (spack.util.executable.Executable or None): (optional) curl + executable if curl is the configured fetch method + + Returns (bool): True if it exists; False otherwise. + """ + tty.debug("Checking existence of {0}".format(url)) + url_result = url_util.parse(url) + + # Check if a local file + local_path = url_util.local_file_path(url_result) if local_path: return os.path.exists(local_path) - if url.scheme == "s3": - # Check for URL specific connection information - s3 = s3_util.create_s3_session(url, connection=s3_util.get_mirror_connection(url)) + # Check if Amazon Simple Storage Service (S3) .. urllib-based fetch + if url_result.scheme == "s3": + # Check for URL-specific connection information + s3 = s3_util.create_s3_session( + url_result, connection=s3_util.get_mirror_connection(url_result) + ) # noqa: E501 try: - s3.get_object(Bucket=url.netloc, Key=url.path.lstrip("/")) + s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/")) return True except s3.ClientError as err: if err.response["Error"]["Code"] == "NoSuchKey": return False raise err - elif url.scheme == "gs": - gcs = gcs_util.GCSBlob(url) + # Check if Google Storage .. urllib-based fetch + if url_result.scheme == "gs": + gcs = gcs_util.GCSBlob(url_result) return gcs.exists() - # otherwise, just try to "read" from the URL, and assume that *any* - # non-throwing response contains the resource represented by the URL + # Otherwise, use the configured fetch method + if spack.config.get("config:url_fetch_method") == "curl": + curl_exe = _curl(curl) + if not curl_exe: + return False + + # Telling curl to fetch the first byte (-r 0-0) is supposed to be + # portable. + curl_args = ["--stderr", "-", "-s", "-f", "-r", "0-0", url] + if not spack.config.get("config:verify_ssl"): + curl_args.append("-k") + _ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull) + return curl_exe.returncode == 0 + + # If we get here, then the only other fetch method option is urllib. + # So try to "read" from the URL and assume that *any* non-throwing + # response contains the resource represented by the URL. try: read_from_url(url) return True @@ -716,6 +909,10 @@ def get_header(headers, header_name): raise +class FetchError(spack.error.SpackError): + """Superclass for fetch-related errors.""" + + class SpackWebError(spack.error.SpackError): """Superclass for Spack web spidering errors.""" |