From e055dc0e64693782e5d98ffa90780ad57c855aa2 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Tue, 13 Dec 2022 23:44:13 +0100 Subject: Use file paths/urls correctly (#34452) The main issue that's fixed is that Spack passes paths (as strings) to functions that require urls. That wasn't an issue on unix, since there you can simply concatenate `file://` and `path` and all is good, but on Windows that gives invalid file urls. Also on Unix, Spack would not deal with uri encoding like x%20y for file paths. It also removes Spack's custom url.parse function, which had its own incorrect interpretation of file urls, taking file://x/y to mean the relative path x/y instead of hostname=x and path=/y. Also it automatically interpolated variables, which is surprising for a function that parses URLs. Instead of all sorts of ad-hoc `if windows: fix_broken_file_url` this PR adds two helper functions around Python's own path2url and reverse. Also fixes a bug where some `spack buildcache` commands used `-d` as a flag to mean `--mirror-url` requiring a URL, and others `--directory`, requiring a path. It is now the latter consistently. --- lib/spack/spack/binary_distribution.py | 11 +- lib/spack/spack/cmd/buildcache.py | 97 ++++++++++------- lib/spack/spack/cmd/ci.py | 2 +- lib/spack/spack/cmd/create.py | 5 +- lib/spack/spack/cmd/gpg.py | 6 +- lib/spack/spack/cmd/mirror.py | 9 +- lib/spack/spack/environment/environment.py | 83 +++++++------- lib/spack/spack/fetch_strategy.py | 32 +++--- lib/spack/spack/gcs_handler.py | 4 +- lib/spack/spack/mirror.py | 23 ++-- lib/spack/spack/s3_handler.py | 4 +- lib/spack/spack/test/bindist.py | 22 ++-- lib/spack/spack/test/build_distribution.py | 20 ++-- lib/spack/spack/test/build_system_guess.py | 3 +- lib/spack/spack/test/cache_fetch.py | 14 +-- lib/spack/spack/test/cmd/ci.py | 4 +- lib/spack/spack/test/cmd/env.py | 10 +- lib/spack/spack/test/cmd/mirror.py | 13 +-- lib/spack/spack/test/conftest.py | 11 +- lib/spack/spack/test/mirror.py | 3 +- lib/spack/spack/test/packaging.py | 3 +- lib/spack/spack/test/patch.py | 3 +- lib/spack/spack/test/stage.py | 13 +-- lib/spack/spack/test/util/util_url.py | 168 +++++------------------------ lib/spack/spack/test/web.py | 9 +- lib/spack/spack/util/s3.py | 9 +- lib/spack/spack/util/url.py | 119 +++----------------- lib/spack/spack/util/web.py | 30 +++--- share/spack/spack-completion.bash | 2 +- 29 files changed, 260 insertions(+), 472 deletions(-) diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 4a4f999641..9a78531206 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -1183,7 +1183,7 @@ def generate_key_index(key_prefix, tmpdir=None): def _build_tarball( spec, - outdir, + out_url, force=False, relative=False, unsigned=False, @@ -1206,8 +1206,7 @@ def _build_tarball( tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec)) tarfile_path = os.path.join(tarfile_dir, tarfile_name) spackfile_path = os.path.join(cache_prefix, tarball_path_name(spec, ".spack")) - - remote_spackfile_path = url_util.join(outdir, os.path.relpath(spackfile_path, tmpdir)) + remote_spackfile_path = url_util.join(out_url, os.path.relpath(spackfile_path, tmpdir)) mkdirp(tarfile_dir) if web_util.url_exists(remote_spackfile_path): @@ -1226,7 +1225,7 @@ def _build_tarball( signed_specfile_path = "{0}.sig".format(specfile_path) remote_specfile_path = url_util.join( - outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir)) + out_url, os.path.relpath(specfile_path, os.path.realpath(tmpdir)) ) remote_signed_specfile_path = "{0}.sig".format(remote_specfile_path) @@ -1331,12 +1330,12 @@ def _build_tarball( # push the key to the build cache's _pgp directory so it can be # imported if not unsigned: - push_keys(outdir, keys=[key], regenerate_index=regenerate_index, tmpdir=tmpdir) + push_keys(out_url, keys=[key], regenerate_index=regenerate_index, tmpdir=tmpdir) # create an index.json for the build_cache directory so specs can be # found if regenerate_index: - generate_package_index(url_util.join(outdir, os.path.relpath(cache_prefix, tmpdir))) + generate_package_index(url_util.join(out_url, os.path.relpath(cache_prefix, tmpdir))) finally: shutil.rmtree(tmpdir) diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py index 061a34a438..8d765d86e3 100644 --- a/lib/spack/spack/cmd/buildcache.py +++ b/lib/spack/spack/cmd/buildcache.py @@ -8,6 +8,7 @@ import os import shutil import sys import tempfile +import urllib.parse import llnl.util.tty as tty @@ -45,7 +46,7 @@ def setup_parser(subparser): "-r", "--rel", action="store_true", - help="make all rpaths relative" + " before creating tarballs.", + help="make all rpaths relative before creating tarballs.", ) create.add_argument( "-f", "--force", action="store_true", help="overwrite tarball if it exists." @@ -54,13 +55,13 @@ def setup_parser(subparser): "-u", "--unsigned", action="store_true", - help="create unsigned buildcache" + " tarballs for testing", + help="create unsigned buildcache tarballs for testing", ) create.add_argument( "-a", "--allow-root", action="store_true", - help="allow install root string in binary files " + "after RPATH substitution", + help="allow install root string in binary files after RPATH substitution", ) create.add_argument( "-k", "--key", metavar="key", type=str, default=None, help="Key for signing." @@ -71,31 +72,31 @@ def setup_parser(subparser): "--directory", metavar="directory", type=str, - help="local directory where " + "buildcaches will be written.", + help="local directory where buildcaches will be written.", ) output.add_argument( "-m", "--mirror-name", metavar="mirror-name", type=str, - help="name of the mirror where " + "buildcaches will be written.", + help="name of the mirror where buildcaches will be written.", ) output.add_argument( "--mirror-url", metavar="mirror-url", type=str, - help="URL of the mirror where " + "buildcaches will be written.", + help="URL of the mirror where buildcaches will be written.", ) create.add_argument( "--rebuild-index", action="store_true", default=False, - help="Regenerate buildcache index " + "after building package(s)", + help="Regenerate buildcache index after building package(s)", ) create.add_argument( "--spec-file", default=None, - help=("Create buildcache entry for spec from json or " + "yaml file"), + help="Create buildcache entry for spec from json or yaml file", ) create.add_argument( "--only", @@ -124,19 +125,19 @@ def setup_parser(subparser): "-a", "--allow-root", action="store_true", - help="allow install root string in binary files " + "after RPATH substitution", + help="allow install root string in binary files after RPATH substitution", ) install.add_argument( "-u", "--unsigned", action="store_true", - help="install unsigned buildcache" + " tarballs for testing", + help="install unsigned buildcache tarballs for testing", ) install.add_argument( "-o", "--otherarch", action="store_true", - help="install specs from other architectures" + " instead of default platform and OS", + help="install specs from other architectures instead of default platform and OS", ) arguments.add_common_arguments(install, ["specs"]) @@ -155,7 +156,7 @@ def setup_parser(subparser): "-a", "--allarch", action="store_true", - help="list specs for all available architectures" + " instead of default platform and OS", + help="list specs for all available architectures instead of default platform and OS", ) arguments.add_common_arguments(listcache, ["specs"]) listcache.set_defaults(func=list_fn) @@ -204,7 +205,7 @@ def setup_parser(subparser): check.add_argument( "--spec-file", default=None, - help=("Check single spec from json or yaml file instead of release " + "specs file"), + help=("Check single spec from json or yaml file instead of release specs file"), ) check.set_defaults(func=check_fn) @@ -217,7 +218,7 @@ def setup_parser(subparser): download.add_argument( "--spec-file", default=None, - help=("Download built tarball for spec (from json or yaml file) " + "from mirror"), + help=("Download built tarball for spec (from json or yaml file) from mirror"), ) download.add_argument( "-p", "--path", default=None, help="Path to directory where tarball should be downloaded" @@ -234,7 +235,7 @@ def setup_parser(subparser): getbuildcachename.add_argument( "--spec-file", default=None, - help=("Path to spec json or yaml file for which buildcache name is " + "desired"), + help=("Path to spec json or yaml file for which buildcache name is desired"), ) getbuildcachename.set_defaults(func=get_buildcache_name_fn) @@ -294,7 +295,27 @@ def setup_parser(subparser): # Update buildcache index without copying any additional packages update_index = subparsers.add_parser("update-index", help=update_index_fn.__doc__) - update_index.add_argument("-d", "--mirror-url", default=None, help="Destination mirror url") + update_index_out = update_index.add_mutually_exclusive_group(required=True) + update_index_out.add_argument( + "-d", + "--directory", + metavar="directory", + type=str, + help="local directory where buildcaches will be written.", + ) + update_index_out.add_argument( + "-m", + "--mirror-name", + metavar="mirror-name", + type=str, + help="name of the mirror where buildcaches will be written.", + ) + update_index_out.add_argument( + "--mirror-url", + metavar="mirror-url", + type=str, + help="URL of the mirror where buildcaches will be written.", + ) update_index.add_argument( "-k", "--keys", @@ -305,6 +326,15 @@ def setup_parser(subparser): update_index.set_defaults(func=update_index_fn) +def _mirror_url_from_args(args): + if args.directory: + return spack.mirror.push_url_from_directory(args.directory) + if args.mirror_name: + return spack.mirror.push_url_from_mirror_name(args.mirror_name) + if args.mirror_url: + return spack.mirror.push_url_from_mirror_url(args.mirror_url) + + def _matching_specs(args): """Return a list of matching specs read from either a spec file (JSON or YAML), a query over the store or a query over the active environment. @@ -323,9 +353,9 @@ def _matching_specs(args): tty.die( "build cache file creation requires at least one" - + " installed package spec, an active environment," - + " or else a path to a json or yaml file containing a spec" - + " to install" + " installed package spec, an active environment," + " or else a path to a json or yaml file containing a spec" + " to install" ) @@ -353,15 +383,7 @@ def _concrete_spec_from_args(args): def create_fn(args): """create a binary package and push it to a mirror""" - if args.directory: - push_url = spack.mirror.push_url_from_directory(args.directory) - - if args.mirror_name: - push_url = spack.mirror.push_url_from_mirror_name(args.mirror_name) - - if args.mirror_url: - push_url = spack.mirror.push_url_from_mirror_url(args.mirror_url) - + push_url = _mirror_url_from_args(args) matches = _matching_specs(args) msg = "Pushing binary packages to {0}/build_cache".format(push_url) @@ -575,11 +597,11 @@ def sync_fn(args): source_location = None if args.src_directory: source_location = args.src_directory - scheme = url_util.parse(source_location, scheme="").scheme + scheme = urllib.parse.urlparse(source_location, scheme="").scheme if scheme != "": raise ValueError('"--src-directory" expected a local path; got a URL, instead') # Ensure that the mirror lookup does not mistake this for named mirror - source_location = "file://" + source_location + source_location = url_util.path_to_file_url(source_location) elif args.src_mirror_name: source_location = args.src_mirror_name result = spack.mirror.MirrorCollection().lookup(source_location) @@ -587,7 +609,7 @@ def sync_fn(args): raise ValueError('no configured mirror named "{name}"'.format(name=source_location)) elif args.src_mirror_url: source_location = args.src_mirror_url - scheme = url_util.parse(source_location, scheme="").scheme + scheme = urllib.parse.urlparse(source_location, scheme="").scheme if scheme == "": raise ValueError('"{url}" is not a valid URL'.format(url=source_location)) @@ -598,11 +620,11 @@ def sync_fn(args): dest_location = None if args.dest_directory: dest_location = args.dest_directory - scheme = url_util.parse(dest_location, scheme="").scheme + scheme = urllib.parse.urlparse(dest_location, scheme="").scheme if scheme != "": raise ValueError('"--dest-directory" expected a local path; got a URL, instead') # Ensure that the mirror lookup does not mistake this for named mirror - dest_location = "file://" + dest_location + dest_location = url_util.path_to_file_url(dest_location) elif args.dest_mirror_name: dest_location = args.dest_mirror_name result = spack.mirror.MirrorCollection().lookup(dest_location) @@ -610,7 +632,7 @@ def sync_fn(args): raise ValueError('no configured mirror named "{name}"'.format(name=dest_location)) elif args.dest_mirror_url: dest_location = args.dest_mirror_url - scheme = url_util.parse(dest_location, scheme="").scheme + scheme = urllib.parse.urlparse(dest_location, scheme="").scheme if scheme == "": raise ValueError('"{url}" is not a valid URL'.format(url=dest_location)) @@ -692,11 +714,8 @@ def update_index(mirror_url, update_keys=False): def update_index_fn(args): """Update a buildcache index.""" - outdir = "file://." - if args.mirror_url: - outdir = args.mirror_url - - update_index(outdir, update_keys=args.keys) + push_url = _mirror_url_from_args(args) + update_index(push_url, update_keys=args.keys) def buildcache(parser, args): diff --git a/lib/spack/spack/cmd/ci.py b/lib/spack/spack/cmd/ci.py index f82dbba4ae..4915e12ffb 100644 --- a/lib/spack/spack/cmd/ci.py +++ b/lib/spack/spack/cmd/ci.py @@ -356,7 +356,7 @@ def ci_rebuild(args): # dependencies from previous stages available since we do not # allow pushing binaries to the remote mirror during PR pipelines. enable_artifacts_mirror = True - pipeline_mirror_url = "file://" + local_mirror_dir + pipeline_mirror_url = url_util.path_to_file_url(local_mirror_dir) mirror_msg = "artifact buildcache enabled, mirror url: {0}".format(pipeline_mirror_url) tty.debug(mirror_msg) diff --git a/lib/spack/spack/cmd/create.py b/lib/spack/spack/cmd/create.py index 2d7152b7b8..11c684de1a 100644 --- a/lib/spack/spack/cmd/create.py +++ b/lib/spack/spack/cmd/create.py @@ -7,6 +7,7 @@ from __future__ import print_function import os import re +import urllib.parse import llnl.util.tty as tty from llnl.util.filesystem import mkdirp @@ -827,8 +828,8 @@ def get_versions(args, name): valid_url = True try: - spack.util.url.require_url_format(args.url) - if args.url.startswith("file://"): + parsed = urllib.parse.urlparse(args.url) + if not parsed.scheme or parsed.scheme != "file": valid_url = False # No point in spidering these except (ValueError, TypeError): valid_url = False diff --git a/lib/spack/spack/cmd/gpg.py b/lib/spack/spack/cmd/gpg.py index 35f10a680f..c37a9956e2 100644 --- a/lib/spack/spack/cmd/gpg.py +++ b/lib/spack/spack/cmd/gpg.py @@ -11,6 +11,7 @@ import spack.cmd.common.arguments as arguments import spack.mirror import spack.paths import spack.util.gpg +import spack.util.url description = "handle GPG actions for spack" section = "packaging" @@ -98,7 +99,7 @@ def setup_parser(subparser): "--directory", metavar="directory", type=str, - help="local directory where " + "keys will be published.", + help="local directory where keys will be published.", ) output.add_argument( "-m", @@ -212,7 +213,8 @@ def gpg_publish(args): mirror = None if args.directory: - mirror = spack.mirror.Mirror(args.directory, args.directory) + url = spack.util.url.path_to_file_url(args.directory) + mirror = spack.mirror.Mirror(url, url) elif args.mirror_name: mirror = spack.mirror.MirrorCollection().lookup(args.mirror_name) elif args.mirror_url: diff --git a/lib/spack/spack/cmd/mirror.py b/lib/spack/spack/cmd/mirror.py index ca960bf6e6..7768d1678c 100644 --- a/lib/spack/spack/cmd/mirror.py +++ b/lib/spack/spack/cmd/mirror.py @@ -357,11 +357,10 @@ def versions_per_spec(args): def create_mirror_for_individual_specs(mirror_specs, directory_hint, skip_unstable_versions): - local_push_url = local_mirror_url_from_user(directory_hint) present, mirrored, error = spack.mirror.create( - local_push_url, mirror_specs, skip_unstable_versions + directory_hint, mirror_specs, skip_unstable_versions ) - tty.msg("Summary for mirror in {}".format(local_push_url)) + tty.msg("Summary for mirror in {}".format(directory_hint)) process_mirror_stats(present, mirrored, error) @@ -389,9 +388,7 @@ def local_mirror_url_from_user(directory_hint): mirror_directory = spack.util.path.canonicalize_path( directory_hint or spack.config.get("config:source_cache") ) - tmp_mirror = spack.mirror.Mirror(mirror_directory) - local_url = url_util.format(tmp_mirror.push_url) - return local_url + return url_util.path_to_file_url(mirror_directory) def mirror_create(args): diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py index 7d23f6aa6c..ea5728ad3c 100644 --- a/lib/spack/spack/environment/environment.py +++ b/lib/spack/spack/environment/environment.py @@ -11,6 +11,8 @@ import shutil import stat import sys import time +import urllib.parse +import urllib.request import ruamel.yaml as yaml @@ -42,6 +44,7 @@ import spack.util.parallel import spack.util.path import spack.util.spack_json as sjson import spack.util.spack_yaml as syaml +import spack.util.url from spack.filesystem_view import ( SimpleFilesystemView, inverse_view_func_parser, @@ -926,46 +929,54 @@ class Environment(object): # allow paths to contain spack config/environment variables, etc. config_path = substitute_path_variables(config_path) - # strip file URL prefix, if needed, to avoid unnecessary remote - # config processing for local files - config_path = config_path.replace("file://", "") + include_url = urllib.parse.urlparse(config_path) - if not os.path.exists(config_path): + # Transform file:// URLs to direct includes. + if include_url.scheme == "file": + config_path = urllib.request.url2pathname(include_url.path) + + # Any other URL should be fetched. + elif include_url.scheme in ("http", "https", "ftp"): # Stage any remote configuration file(s) - if spack.util.url.is_url_format(config_path): - staged_configs = ( - os.listdir(self.config_stage_dir) - if os.path.exists(self.config_stage_dir) - else [] + staged_configs = ( + os.listdir(self.config_stage_dir) + if os.path.exists(self.config_stage_dir) + else [] + ) + remote_path = urllib.request.url2pathname(include_url.path) + basename = os.path.basename(remote_path) + if basename in staged_configs: + # Do NOT re-stage configuration files over existing + # ones with the same name since there is a risk of + # losing changes (e.g., from 'spack config update'). + tty.warn( + "Will not re-stage configuration from {0} to avoid " + "losing changes to the already staged file of the " + "same name.".format(remote_path) ) - basename = os.path.basename(config_path) - if basename in staged_configs: - # Do NOT re-stage configuration files over existing - # ones with the same name since there is a risk of - # losing changes (e.g., from 'spack config update'). - tty.warn( - "Will not re-stage configuration from {0} to avoid " - "losing changes to the already staged file of the " - "same name.".format(config_path) - ) - # Recognize the configuration stage directory - # is flattened to ensure a single copy of each - # configuration file. - config_path = self.config_stage_dir - if basename.endswith(".yaml"): - config_path = os.path.join(config_path, basename) - else: - staged_path = spack.config.fetch_remote_configs( - config_path, - self.config_stage_dir, - skip_existing=True, + # Recognize the configuration stage directory + # is flattened to ensure a single copy of each + # configuration file. + config_path = self.config_stage_dir + if basename.endswith(".yaml"): + config_path = os.path.join(config_path, basename) + else: + staged_path = spack.config.fetch_remote_configs( + config_path, + self.config_stage_dir, + skip_existing=True, + ) + if not staged_path: + raise SpackEnvironmentError( + "Unable to fetch remote configuration {0}".format(config_path) ) - if not staged_path: - raise SpackEnvironmentError( - "Unable to fetch remote configuration {0}".format(config_path) - ) - config_path = staged_path + config_path = staged_path + + elif include_url.scheme: + raise ValueError( + "Unsupported URL scheme for environment include: {}".format(config_path) + ) # treat relative paths as relative to the environment if not os.path.isabs(config_path): @@ -995,7 +1006,7 @@ class Environment(object): if missing: msg = "Detected {0} missing include path(s):".format(len(missing)) msg += "\n {0}".format("\n ".join(missing)) - tty.die("{0}\nPlease correct and try again.".format(msg)) + raise spack.config.ConfigFileError(msg) return scopes diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 41769f4e87..64d7811258 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -314,17 +314,7 @@ class URLFetchStrategy(FetchStrategy): @property def candidate_urls(self): - urls = [] - - for url in [self.url] + (self.mirrors or []): - # This must be skipped on Windows due to URL encoding - # of ':' characters on filepaths on Windows - if sys.platform != "win32" and url.startswith("file://"): - path = urllib.parse.quote(url[len("file://") :]) - url = "file://" + path - urls.append(url) - - return urls + return [self.url] + (self.mirrors or []) @_needs_stage def fetch(self): @@ -496,7 +486,9 @@ class URLFetchStrategy(FetchStrategy): if not self.archive_file: raise NoArchiveFileError("Cannot call archive() before fetching.") - web_util.push_to_url(self.archive_file, destination, keep_original=True) + web_util.push_to_url( + self.archive_file, url_util.path_to_file_url(destination), keep_original=True + ) @_needs_stage def check(self): @@ -549,8 +541,7 @@ class CacheURLFetchStrategy(URLFetchStrategy): @_needs_stage def fetch(self): - reg_str = r"^file://" - path = re.sub(reg_str, "", self.url) + path = url_util.file_url_string_to_path(self.url) # check whether the cache file exists. if not os.path.isfile(path): @@ -799,7 +790,7 @@ class GitFetchStrategy(VCSFetchStrategy): def mirror_id(self): repo_ref = self.commit or self.tag or self.branch if repo_ref: - repo_path = url_util.parse(self.url).path + repo_path = urllib.parse.urlparse(self.url).path result = os.path.sep.join(["git", repo_path, repo_ref]) return result @@ -1145,7 +1136,7 @@ class SvnFetchStrategy(VCSFetchStrategy): def mirror_id(self): if self.revision: - repo_path = url_util.parse(self.url).path + repo_path = urllib.parse.urlparse(self.url).path result = os.path.sep.join(["svn", repo_path, self.revision]) return result @@ -1256,7 +1247,7 @@ class HgFetchStrategy(VCSFetchStrategy): def mirror_id(self): if self.revision: - repo_path = url_util.parse(self.url).path + repo_path = urllib.parse.urlparse(self.url).path result = os.path.sep.join(["hg", repo_path, self.revision]) return result @@ -1328,7 +1319,7 @@ class S3FetchStrategy(URLFetchStrategy): tty.debug("Already downloaded {0}".format(self.archive_file)) return - parsed_url = url_util.parse(self.url) + parsed_url = urllib.parse.urlparse(self.url) if parsed_url.scheme != "s3": raise web_util.FetchError("S3FetchStrategy can only fetch from s3:// urls.") @@ -1375,7 +1366,7 @@ class GCSFetchStrategy(URLFetchStrategy): tty.debug("Already downloaded {0}".format(self.archive_file)) return - parsed_url = url_util.parse(self.url) + parsed_url = urllib.parse.urlparse(self.url) if parsed_url.scheme != "gs": raise web_util.FetchError("GCSFetchStrategy can only fetch from gs:// urls.") @@ -1680,7 +1671,8 @@ class FsCache(object): def fetcher(self, target_path, digest, **kwargs): path = os.path.join(self.root, target_path) - return CacheURLFetchStrategy(path, digest, **kwargs) + url = url_util.path_to_file_url(path) + return CacheURLFetchStrategy(url, digest, **kwargs) def destroy(self): shutil.rmtree(self.root, ignore_errors=True) diff --git a/lib/spack/spack/gcs_handler.py b/lib/spack/spack/gcs_handler.py index 4b547a78dc..441eea6f80 100644 --- a/lib/spack/spack/gcs_handler.py +++ b/lib/spack/spack/gcs_handler.py @@ -2,9 +2,9 @@ # Spack Project Developers. See the top-level COPYRIGHT file for details. # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import urllib.parse import urllib.response -import spack.util.url as url_util import spack.util.web as web_util @@ -12,7 +12,7 @@ def gcs_open(req, *args, **kwargs): """Open a reader stream to a blob object on GCS""" import spack.util.gcs as gcs_util - url = url_util.parse(req.get_full_url()) + url = urllib.parse.urlparse(req.get_full_url()) gcsblob = gcs_util.GCSBlob(url) if not gcsblob.exists(): diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index 8e914306e0..7a0c6a9b95 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -17,15 +17,18 @@ import os import os.path import sys import traceback +import urllib.parse import ruamel.yaml.error as yaml_error import llnl.util.tty as tty from llnl.util.filesystem import mkdirp +import spack.caches import spack.config import spack.error import spack.fetch_strategy as fs +import spack.mirror import spack.spec import spack.url as url import spack.util.spack_json as sjson @@ -507,19 +510,13 @@ def mirror_cache_and_stats(path, skip_unstable_versions=False): they do not have a stable archive checksum (as determined by ``fetch_strategy.stable_target``) """ - parsed = url_util.parse(path) - mirror_root = url_util.local_file_path(parsed) - if not mirror_root: - raise spack.error.SpackError("MirrorCaches only work with file:// URLs") # Get the absolute path of the root before we start jumping around. - if not os.path.isdir(mirror_root): + if not os.path.isdir(path): try: - mkdirp(mirror_root) + mkdirp(path) except OSError as e: - raise MirrorError("Cannot create directory '%s':" % mirror_root, str(e)) - mirror_cache = spack.caches.MirrorCache( - mirror_root, skip_unstable_versions=skip_unstable_versions - ) + raise MirrorError("Cannot create directory '%s':" % path, str(e)) + mirror_cache = spack.caches.MirrorCache(path, skip_unstable_versions=skip_unstable_versions) mirror_stats = MirrorStats() return mirror_cache, mirror_stats @@ -670,10 +667,10 @@ def push_url_from_directory(output_directory): """Given a directory in the local filesystem, return the URL on which to push binary packages. """ - scheme = url_util.parse(output_directory, scheme="").scheme + scheme = urllib.parse.urlparse(output_directory, scheme="").scheme if scheme != "": raise ValueError("expected a local path, but got a URL instead") - mirror_url = "file://" + output_directory + mirror_url = url_util.path_to_file_url(output_directory) mirror = spack.mirror.MirrorCollection().lookup(mirror_url) return url_util.format(mirror.push_url) @@ -688,7 +685,7 @@ def push_url_from_mirror_name(mirror_name): def push_url_from_mirror_url(mirror_url): """Given a mirror URL, return the URL on which to push binary packages.""" - scheme = url_util.parse(mirror_url, scheme="").scheme + scheme = urllib.parse.urlparse(mirror_url, scheme="").scheme if scheme == "": raise ValueError('"{0}" is not a valid URL'.format(mirror_url)) mirror = spack.mirror.MirrorCollection().lookup(mirror_url) diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py index aee5dc8943..a3e0aa991b 100644 --- a/lib/spack/spack/s3_handler.py +++ b/lib/spack/spack/s3_handler.py @@ -4,12 +4,12 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import urllib.error +import urllib.parse import urllib.request import urllib.response from io import BufferedReader, IOBase import spack.util.s3 as s3_util -import spack.util.url as url_util # NOTE(opadron): Workaround issue in boto where its StreamingBody @@ -43,7 +43,7 @@ class WrapStream(BufferedReader): def _s3_open(url): - parsed = url_util.parse(url) + parsed = urllib.parse.urlparse(url) s3 = s3_util.get_s3_session(url, method="fetch") bucket = parsed.netloc diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 3ac04531c7..ef80b2bae3 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -13,13 +13,16 @@ import pytest from llnl.util.filesystem import join_path, visit_directory_tree import spack.binary_distribution as bindist +import spack.caches import spack.config +import spack.fetch_strategy import spack.hooks.sbang as sbang import spack.main import spack.mirror import spack.repo import spack.store import spack.util.gpg +import spack.util.url as url_util import spack.util.web as web_util from spack.binary_distribution import get_buildfile_manifest from spack.directory_layout import DirectoryLayout @@ -58,7 +61,7 @@ def mirror_dir(tmpdir_factory): @pytest.fixture(scope="function") def test_mirror(mirror_dir): - mirror_url = "file://%s" % mirror_dir + mirror_url = url_util.path_to_file_url(mirror_dir) mirror_cmd("add", "--scope", "site", "test-mirror-func", mirror_url) yield mirror_dir mirror_cmd("rm", "--scope=site", "test-mirror-func") @@ -200,8 +203,7 @@ def test_default_rpaths_create_install_default_layout(mirror_dir): buildcache_cmd("create", "-auf", "-d", mirror_dir, cspec.name) # Create mirror index - mirror_url = "file://{0}".format(mirror_dir) - buildcache_cmd("update-index", "-d", mirror_url) + buildcache_cmd("update-index", "-d", mirror_dir) # List the buildcaches in the mirror buildcache_cmd("list", "-alv") @@ -266,8 +268,7 @@ def test_relative_rpaths_create_default_layout(mirror_dir): buildcache_cmd("create", "-aur", "-d", mirror_dir, cspec.name) # Create mirror index - mirror_url = "file://%s" % mirror_dir - buildcache_cmd("update-index", "-d", mirror_url) + buildcache_cmd("update-index", "-d", mirror_dir) # Uninstall the package and deps uninstall_cmd("-y", "--dependents", gspec.name) @@ -323,9 +324,9 @@ def test_push_and_fetch_keys(mock_gnupghome): testpath = str(mock_gnupghome) mirror = os.path.join(testpath, "mirror") - mirrors = {"test-mirror": mirror} + mirrors = {"test-mirror": url_util.path_to_file_url(mirror)} mirrors = spack.mirror.MirrorCollection(mirrors) - mirror = spack.mirror.Mirror("file://" + mirror) + mirror = spack.mirror.Mirror(url_util.path_to_file_url(mirror)) gpg_dir1 = os.path.join(testpath, "gpg1") gpg_dir2 = os.path.join(testpath, "gpg2") @@ -389,7 +390,7 @@ def test_spec_needs_rebuild(monkeypatch, tmpdir): # Create a temp mirror directory for buildcache usage mirror_dir = tmpdir.join("mirror_dir") - mirror_url = "file://{0}".format(mirror_dir.strpath) + mirror_url = url_util.path_to_file_url(mirror_dir.strpath) s = Spec("libdwarf").concretized() @@ -421,7 +422,7 @@ def test_generate_index_missing(monkeypatch, tmpdir, mutable_config): # Create a temp mirror directory for buildcache usage mirror_dir = tmpdir.join("mirror_dir") - mirror_url = "file://{0}".format(mirror_dir.strpath) + mirror_url = url_util.path_to_file_url(mirror_dir.strpath) spack.config.set("mirrors", {"test": mirror_url}) s = Spec("libdwarf").concretized() @@ -514,7 +515,6 @@ def test_update_sbang(tmpdir, test_mirror): # Need a fake mirror with *function* scope. mirror_dir = test_mirror - mirror_url = "file://{0}".format(mirror_dir) # Assume all commands will concretize old_spec the same way. install_cmd("--no-cache", old_spec.name) @@ -523,7 +523,7 @@ def test_update_sbang(tmpdir, test_mirror): buildcache_cmd("create", "-u", "-a", "-d", mirror_dir, old_spec_hash_str) # Need to force an update of the buildcache index - buildcache_cmd("update-index", "-d", mirror_url) + buildcache_cmd("update-index", "-d", mirror_dir) # Uninstall the original package. uninstall_cmd("-y", old_spec_hash_str) diff --git a/lib/spack/spack/test/build_distribution.py b/lib/spack/spack/test/build_distribution.py index 2d3024ab06..59c26892aa 100644 --- a/lib/spack/spack/test/build_distribution.py +++ b/lib/spack/spack/test/build_distribution.py @@ -10,22 +10,15 @@ import sys import pytest import spack.binary_distribution +import spack.main import spack.spec +import spack.util.url install = spack.main.SpackCommand("install") pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") -def _validate_url(url): - return - - -@pytest.fixture(autouse=True) -def url_check(monkeypatch): - monkeypatch.setattr(spack.util.url, "require_url_format", _validate_url) - - def test_build_tarball_overwrite(install_mockery, mock_fetch, monkeypatch, tmpdir): with tmpdir.as_cwd(): @@ -33,12 +26,13 @@ def test_build_tarball_overwrite(install_mockery, mock_fetch, monkeypatch, tmpdi install(str(spec)) # Runs fine the first time, throws the second time - spack.binary_distribution._build_tarball(spec, ".", unsigned=True) + out_url = spack.util.url.path_to_file_url(str(tmpdir)) + spack.binary_distribution._build_tarball(spec, out_url, unsigned=True) with pytest.raises(spack.binary_distribution.NoOverwriteException): - spack.binary_distribution._build_tarball(spec, ".", unsigned=True) + spack.binary_distribution._build_tarball(spec, out_url, unsigned=True) # Should work fine with force=True - spack.binary_distribution._build_tarball(spec, ".", force=True, unsigned=True) + spack.binary_distribution._build_tarball(spec, out_url, force=True, unsigned=True) # Remove the tarball and try again. # This must *also* throw, because of the existing .spec.json file @@ -51,4 +45,4 @@ def test_build_tarball_overwrite(install_mockery, mock_fetch, monkeypatch, tmpdi ) with pytest.raises(spack.binary_distribution.NoOverwriteException): - spack.binary_distribution._build_tarball(spec, ".", unsigned=True) + spack.binary_distribution._build_tarball(spec, out_url, unsigned=True) diff --git a/lib/spack/spack/test/build_system_guess.py b/lib/spack/spack/test/build_system_guess.py index 22ab96041d..60a96b09f2 100644 --- a/lib/spack/spack/test/build_system_guess.py +++ b/lib/spack/spack/test/build_system_guess.py @@ -10,6 +10,7 @@ import pytest import spack.cmd.create import spack.stage import spack.util.executable +import spack.util.url as url_util pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") @@ -50,7 +51,7 @@ def url_and_build_system(request, tmpdir): filename, system = request.param tmpdir.ensure("archive", filename) tar("czf", "archive.tar.gz", "archive") - url = "file://" + str(tmpdir.join("archive.tar.gz")) + url = url_util.path_to_file_url(str(tmpdir.join("archive.tar.gz"))) yield url, system orig_dir.chdir() diff --git a/lib/spack/spack/test/cache_fetch.py b/lib/spack/spack/test/cache_fetch.py index 03b8e92ecf..6a6b76f5cf 100644 --- a/lib/spack/spack/test/cache_fetch.py +++ b/lib/spack/spack/test/cache_fetch.py @@ -4,26 +4,24 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import os -import sys import pytest from llnl.util.filesystem import mkdirp, touch import spack.config +import spack.util.url as url_util from spack.fetch_strategy import CacheURLFetchStrategy, NoCacheError from spack.stage import Stage -is_windows = sys.platform == "win32" - @pytest.mark.parametrize("_fetch_method", ["curl", "urllib"]) def test_fetch_missing_cache(tmpdir, _fetch_method): """Ensure raise a missing cache file.""" testpath = str(tmpdir) + non_existing = os.path.join(testpath, "non-existing") with spack.config.override("config:url_fetch_method", _fetch_method): - abs_pref = "" if is_windows else "/" - url = "file://" + abs_pref + "not-a-real-cache-file" + url = url_util.path_to_file_url(non_existing) fetcher = CacheURLFetchStrategy(url=url) with Stage(fetcher, path=testpath): with pytest.raises(NoCacheError, match=r"No cache"): @@ -36,11 +34,7 @@ def test_fetch(tmpdir, _fetch_method): testpath = str(tmpdir) cache = os.path.join(testpath, "cache.tar.gz") touch(cache) - if is_windows: - url_stub = "{0}" - else: - url_stub = "/{0}" - url = "file://" + url_stub.format(cache) + url = url_util.path_to_file_url(cache) with spack.config.override("config:url_fetch_method", _fetch_method): fetcher = CacheURLFetchStrategy(url=url) with Stage(fetcher, path=testpath) as stage: diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py index 640ef0d236..176bb9a060 100644 --- a/lib/spack/spack/test/cmd/ci.py +++ b/lib/spack/spack/test/cmd/ci.py @@ -810,10 +810,10 @@ def create_rebuild_env(tmpdir, pkg_name, broken_tests=False): env_dir = working_dir.join("concrete_env") mirror_dir = working_dir.join("mirror") - mirror_url = "file://{0}".format(mirror_dir.strpath) + mirror_url = url_util.path_to_file_url(mirror_dir.strpath) broken_specs_path = os.path.join(working_dir.strpath, "naughty-list") - broken_specs_url = url_util.join("file://", broken_specs_path) + broken_specs_url = url_util.path_to_file_url(broken_specs_path) temp_storage_url = "file:///path/to/per/pipeline/storage" broken_tests_packages = [pkg_name] if broken_tests else [] diff --git a/lib/spack/spack/test/cmd/env.py b/lib/spack/spack/test/cmd/env.py index 64aaf3c225..81e69ab568 100644 --- a/lib/spack/spack/test/cmd/env.py +++ b/lib/spack/spack/test/cmd/env.py @@ -16,6 +16,7 @@ import llnl.util.filesystem as fs import llnl.util.link_tree import spack.cmd.env +import spack.config import spack.environment as ev import spack.environment.shell import spack.error @@ -29,7 +30,6 @@ from spack.spec import Spec from spack.stage import stage_prefix from spack.util.executable import Executable from spack.util.path import substitute_path_variables -from spack.util.web import FetchError from spack.version import Version # TODO-27021 @@ -707,9 +707,9 @@ spack: e.concretize() err = str(exc) - assert "not retrieve configuration" in err - assert os.path.join("no", "such", "directory") in err - + assert "missing include" in err + assert "/no/such/directory" in err + assert os.path.join("no", "such", "file.yaml") in err assert ev.active_environment() is None @@ -827,7 +827,7 @@ def test_env_with_included_config_missing_file(tmpdir, mutable_empty_config): f.write("spack:\n include:\n - {0}\n".format(missing_file.strpath)) env = ev.Environment(tmpdir.strpath) - with pytest.raises(FetchError, match="No such file or directory"): + with pytest.raises(spack.config.ConfigError, match="missing include path"): ev.activate(env) diff --git a/lib/spack/spack/test/cmd/mirror.py b/lib/spack/spack/test/cmd/mirror.py index e64d54428a..c1a9830870 100644 --- a/lib/spack/spack/test/cmd/mirror.py +++ b/lib/spack/spack/test/cmd/mirror.py @@ -11,6 +11,8 @@ import pytest import spack.cmd.mirror import spack.config import spack.environment as ev +import spack.spec +import spack.util.url as url_util from spack.main import SpackCommand, SpackCommandError mirror = SpackCommand("mirror") @@ -43,15 +45,6 @@ def tmp_scope(): yield scope_name -def _validate_url(url): - return - - -@pytest.fixture(autouse=True) -def url_check(monkeypatch): - monkeypatch.setattr(spack.util.url, "require_url_format", _validate_url) - - @pytest.mark.disable_clean_stage_check @pytest.mark.regression("8083") def test_regression_8083(tmpdir, capfd, mock_packages, mock_fetch, config): @@ -89,7 +82,7 @@ def source_for_pkg_with_hash(mock_packages, tmpdir): local_path = os.path.join(str(tmpdir), local_url_basename) with open(local_path, "w") as f: f.write(s.package.hashed_content) - local_url = "file://" + local_path + local_url = url_util.path_to_file_url(local_path) s.package.versions[spack.version.Version("1.0")]["url"] = local_url diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index 2d9e72a89e..77712c4d83 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -48,6 +48,7 @@ import spack.test.cray_manifest import spack.util.executable import spack.util.gpg import spack.util.spack_yaml as syaml +import spack.util.url as url_util from spack.fetch_strategy import FetchStrategyComposite, URLFetchStrategy from spack.util.pattern import Bunch from spack.util.web import FetchError @@ -1130,7 +1131,7 @@ def mock_archive(request, tmpdir_factory): "Archive", ["url", "path", "archive_file", "expanded_archive_basedir"] ) archive_file = str(tmpdir.join(archive_name)) - url = "file://" + archive_file + url = url_util.path_to_file_url(archive_file) # Return the url yield Archive( @@ -1331,7 +1332,7 @@ def mock_git_repository(tmpdir_factory): tmpdir = tmpdir_factory.mktemp("mock-git-repo-submodule-dir-{0}".format(submodule_count)) tmpdir.ensure(spack.stage._source_path_subdir, dir=True) repodir = tmpdir.join(spack.stage._source_path_subdir) - suburls.append((submodule_count, "file://" + str(repodir))) + suburls.append((submodule_count, url_util.path_to_file_url(str(repodir)))) with repodir.as_cwd(): git("init") @@ -1359,7 +1360,7 @@ def mock_git_repository(tmpdir_factory): git("init") git("config", "user.name", "Spack") git("config", "user.email", "spack@spack.io") - url = "file://" + str(repodir) + url = url_util.path_to_file_url(str(repodir)) for number, suburl in suburls: git("submodule", "add", suburl, "third_party/submodule{0}".format(number)) @@ -1461,7 +1462,7 @@ def mock_hg_repository(tmpdir_factory): # Initialize the repository with repodir.as_cwd(): - url = "file://" + str(repodir) + url = url_util.path_to_file_url(str(repodir)) hg("init") # Commit file r0 @@ -1495,7 +1496,7 @@ def mock_svn_repository(tmpdir_factory): tmpdir = tmpdir_factory.mktemp("mock-svn-stage") tmpdir.ensure(spack.stage._source_path_subdir, dir=True) repodir = tmpdir.join(spack.stage._source_path_subdir) - url = "file://" + str(repodir) + url = url_util.path_to_file_url(str(repodir)) # Initialize the repository with repodir.as_cwd(): diff --git a/lib/spack/spack/test/mirror.py b/lib/spack/spack/test/mirror.py index c156db867c..5876e62306 100644 --- a/lib/spack/spack/test/mirror.py +++ b/lib/spack/spack/test/mirror.py @@ -15,6 +15,7 @@ import spack.mirror import spack.repo import spack.util.executable import spack.util.spack_json as sjson +import spack.util.url as url_util from spack.spec import Spec from spack.stage import Stage from spack.util.executable import which @@ -54,7 +55,7 @@ def check_mirror(): with Stage("spack-mirror-test") as stage: mirror_root = os.path.join(stage.path, "test-mirror") # register mirror with spack config - mirrors = {"spack-mirror-test": "file://" + mirror_root} + mirrors = {"spack-mirror-test": url_util.path_to_file_url(mirror_root)} with spack.config.override("mirrors", mirrors): with spack.config.override("config:checksum", False): specs = [Spec(x).concretized() for x in repos] diff --git a/lib/spack/spack/test/packaging.py b/lib/spack/spack/test/packaging.py index ac92e85dad..e90465f521 100644 --- a/lib/spack/spack/test/packaging.py +++ b/lib/spack/spack/test/packaging.py @@ -25,6 +25,7 @@ import spack.package_base import spack.repo import spack.store import spack.util.gpg +import spack.util.url as url_util from spack.fetch_strategy import FetchStrategyComposite, URLFetchStrategy from spack.paths import mock_gpg_keys_path from spack.relocate import ( @@ -89,7 +90,7 @@ echo $PATH""" spack.mirror.create(mirror_path, specs=[]) # register mirror with spack config - mirrors = {"spack-mirror-test": "file://" + mirror_path} + mirrors = {"spack-mirror-test": url_util.path_to_file_url(mirror_path)} spack.config.set("mirrors", mirrors) stage = spack.stage.Stage(mirrors["spack-mirror-test"], name="build_cache", keep=True) diff --git a/lib/spack/spack/test/patch.py b/lib/spack/spack/test/patch.py index 8b446146d0..9f268dd62e 100644 --- a/lib/spack/spack/test/patch.py +++ b/lib/spack/spack/test/patch.py @@ -16,6 +16,7 @@ import spack.patch import spack.paths import spack.repo import spack.util.compression +import spack.util.url as url_util from spack.spec import Spec from spack.stage import Stage from spack.util.executable import Executable @@ -87,7 +88,7 @@ data_path = os.path.join(spack.paths.test_path, "data", "patch") ) def test_url_patch(mock_patch_stage, filename, sha256, archive_sha256, config): # Make a patch object - url = "file://" + filename + url = url_util.path_to_file_url(filename) s = Spec("patch").concretized() patch = spack.patch.UrlPatch(s.package, url, sha256=sha256, archive_sha256=archive_sha256) diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py index ac445c373f..bb1a56eb04 100644 --- a/lib/spack/spack/test/stage.py +++ b/lib/spack/spack/test/stage.py @@ -19,6 +19,7 @@ from llnl.util.filesystem import getuid, mkdirp, partition_path, touch, working_ import spack.paths import spack.stage import spack.util.executable +import spack.util.url as url_util from spack.resource import Resource from spack.stage import DIYStage, ResourceStage, Stage, StageComposite from spack.util.path import canonicalize_path @@ -41,10 +42,6 @@ _include_readme = 1 _include_hidden = 2 _include_extra = 3 -_file_prefix = "file://" -if sys.platform == "win32": - _file_prefix += "/" - # Mock fetch directories are expected to appear as follows: # @@ -218,7 +215,7 @@ def mock_stage_archive(tmp_build_stage_dir): # Create the archive directory and associated file archive_dir = tmpdir.join(_archive_base) archive = tmpdir.join(_archive_fn) - archive_url = _file_prefix + str(archive) + archive_url = url_util.path_to_file_url(str(archive)) archive_dir.ensure(dir=True) # Create the optional files as requested and make sure expanded @@ -283,7 +280,7 @@ def mock_expand_resource(tmpdir): archive_name = "resource.tar.gz" archive = tmpdir.join(archive_name) - archive_url = _file_prefix + str(archive) + archive_url = url_util.path_to_file_url(str(archive)) filename = "resource-file.txt" test_file = resource_dir.join(filename) @@ -414,7 +411,7 @@ class TestStage(object): property of the stage should refer to the path of that file. """ test_noexpand_fetcher = spack.fetch_strategy.from_kwargs( - url=_file_prefix + mock_noexpand_resource, expand=False + url=url_util.path_to_file_url(mock_noexpand_resource), expand=False ) with Stage(test_noexpand_fetcher) as stage: stage.fetch() @@ -432,7 +429,7 @@ class TestStage(object): resource_dst_name = "resource-dst-name.sh" test_resource_fetcher = spack.fetch_strategy.from_kwargs( - url=_file_prefix + mock_noexpand_resource, expand=False + url=url_util.path_to_file_url(mock_noexpand_resource), expand=False ) test_resource = Resource("test_resource", test_resource_fetcher, resource_dst_name, None) resource_stage = ResourceStage(test_resource_fetcher, root_stage, test_resource) diff --git a/lib/spack/spack/test/util/util_url.py b/lib/spack/spack/test/util/util_url.py index 38361fbf82..bd0abf572a 100644 --- a/lib/spack/spack/test/util/util_url.py +++ b/lib/spack/spack/test/util/util_url.py @@ -6,111 +6,44 @@ """Test Spack's URL handling utility functions.""" import os import os.path -import posixpath -import re -import sys +import urllib.parse import pytest -import spack.paths import spack.util.url as url_util -from spack.util.path import convert_to_posix_path -is_windows = sys.platform == "win32" -if is_windows: - drive_m = re.search(r"[A-Za-z]:", spack.paths.test_path) - drive = drive_m.group() if drive_m else None +def test_url_local_file_path(tmpdir): + # Create a file + path = str(tmpdir.join("hello.txt")) + with open(path, "wb") as f: + f.write(b"hello world") -def test_url_parse(): + # Go from path -> url -> path. + roundtrip = url_util.local_file_path(url_util.path_to_file_url(path)) - parsed = url_util.parse("/path/to/resource", scheme="fake") - assert parsed.scheme == "fake" - assert parsed.netloc == "" - assert parsed.path == "/path/to/resource" + # Verify it's the same file. + assert os.path.samefile(roundtrip, path) - parsed = url_util.parse("file:///path/to/resource") - assert parsed.scheme == "file" - assert parsed.netloc == "" - assert parsed.path == "/path/to/resource" + # Test if it accepts urlparse objects + parsed = urllib.parse.urlparse(url_util.path_to_file_url(path)) + assert os.path.samefile(url_util.local_file_path(parsed), path) - parsed = url_util.parse("file:///path/to/resource", scheme="fake") - assert parsed.scheme == "file" - assert parsed.netloc == "" - assert parsed.path == "/path/to/resource" - parsed = url_util.parse("file://path/to/resource") - assert parsed.scheme == "file" - expected = convert_to_posix_path(os.path.abspath(posixpath.join("path", "to", "resource"))) - if is_windows: - expected = expected.lstrip(drive) - assert parsed.path == expected +def test_url_local_file_path_no_file_scheme(): + assert url_util.local_file_path("https://example.com/hello.txt") is None + assert url_util.local_file_path("C:\\Program Files\\hello.txt") is None - if is_windows: - parsed = url_util.parse("file://%s\\path\\to\\resource" % drive) - assert parsed.scheme == "file" - expected = "/" + posixpath.join("path", "to", "resource") - assert parsed.path == expected - parsed = url_util.parse("https://path/to/resource") - assert parsed.scheme == "https" - assert parsed.netloc == "path" - assert parsed.path == "/to/resource" +def test_relative_path_to_file_url(tmpdir): + # Create a file + path = str(tmpdir.join("hello.txt")) + with open(path, "wb") as f: + f.write(b"hello world") - parsed = url_util.parse("gs://path/to/resource") - assert parsed.scheme == "gs" - assert parsed.netloc == "path" - assert parsed.path == "/to/resource" - - spack_root = spack.paths.spack_root - parsed = url_util.parse("file://$spack") - assert parsed.scheme == "file" - - if is_windows: - spack_root = "/" + convert_to_posix_path(spack_root) - - assert parsed.netloc + parsed.path == spack_root - - -def test_url_local_file_path(): - spack_root = spack.paths.spack_root - sep = os.path.sep - lfp = url_util.local_file_path("/a/b/c.txt") - assert lfp == sep + os.path.join("a", "b", "c.txt") - - lfp = url_util.local_file_path("file:///a/b/c.txt") - assert lfp == sep + os.path.join("a", "b", "c.txt") - - if is_windows: - lfp = url_util.local_file_path("file://a/b/c.txt") - expected = os.path.abspath(os.path.join("a", "b", "c.txt")) - assert lfp == expected - - lfp = url_util.local_file_path("file://$spack/a/b/c.txt") - expected = os.path.abspath(os.path.join(spack_root, "a", "b", "c.txt")) - assert lfp == expected - - if is_windows: - lfp = url_util.local_file_path("file:///$spack/a/b/c.txt") - expected = os.path.abspath(os.path.join(spack_root, "a", "b", "c.txt")) - assert lfp == expected - - lfp = url_util.local_file_path("file://$spack/a/b/c.txt") - expected = os.path.abspath(os.path.join(spack_root, "a", "b", "c.txt")) - assert lfp == expected - - # not a file:// URL - so no local file path - lfp = url_util.local_file_path("http:///a/b/c.txt") - assert lfp is None - - lfp = url_util.local_file_path("http://a/b/c.txt") - assert lfp is None - - lfp = url_util.local_file_path("http:///$spack/a/b/c.txt") - assert lfp is None - - lfp = url_util.local_file_path("http://$spack/a/b/c.txt") - assert lfp is None + with tmpdir.as_cwd(): + roundtrip = url_util.local_file_path(url_util.path_to_file_url("hello.txt")) + assert os.path.samefile(roundtrip, path) def test_url_join_local_paths(): @@ -179,26 +112,6 @@ def test_url_join_local_paths(): == "https://mirror.spack.io/build_cache/my-package" ) - # file:// URL path components are *NOT* canonicalized - spack_root = spack.paths.spack_root - - if sys.platform != "win32": - join_result = url_util.join("/a/b/c", "$spack") - assert join_result == "file:///a/b/c/$spack" # not canonicalized - format_result = url_util.format(join_result) - # canoncalize by hand - expected = url_util.format( - os.path.abspath(os.path.join("/", "a", "b", "c", "." + spack_root)) - ) - assert format_result == expected - - # see test_url_join_absolute_paths() for more on absolute path components - join_result = url_util.join("/a/b/c", "/$spack") - assert join_result == "file:///$spack" # not canonicalized - format_result = url_util.format(join_result) - expected = url_util.format(spack_root) - assert format_result == expected - # For s3:// URLs, the "netloc" (bucket) is considered part of the path. # Make sure join() can cross bucket boundaries in this case. args = ["s3://bucket/a/b", "new-bucket", "c"] @@ -253,38 +166,7 @@ def test_url_join_absolute_paths(): # works as if everything before the http:// URL was left out assert url_util.join("literally", "does", "not", "matter", p, "resource") == join_result - # It's important to keep in mind that this logic applies even if the - # component's path is not an absolute path! - - # For eaxmple: - p = "./d" - # ...is *NOT* an absolute path - # ...is also *NOT* an absolute path component - - u = "file://./d" - # ...is a URL - # The path of this URL is *NOT* an absolute path - # HOWEVER, the URL, itself, *is* an absolute path component - - # (We just need... - cwd = os.getcwd() - # ...to work out what resource it points to) - - if sys.platform == "win32": - convert_to_posix_path(cwd) - cwd = "/" + cwd - - # So, even though parse() assumes "file://" URL, the scheme is still - # significant in URL path components passed to join(), even if the base - # is a file:// URL. - - path_join_result = "file:///a/b/c/d" - assert url_util.join("/a/b/c", p) == path_join_result - assert url_util.join("file:///a/b/c", p) == path_join_result - - url_join_result = "file://{CWD}/d".format(CWD=cwd) - assert url_util.join("/a/b/c", u) == url_join_result - assert url_util.join("file:///a/b/c", u) == url_join_result + assert url_util.join("file:///a/b/c", "./d") == "file:///a/b/c/d" # Finally, resolve_href should have no effect for how absolute path # components are handled because local hrefs can not be absolute path diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py index f4114eb05c..476ea01019 100644 --- a/lib/spack/spack/test/web.py +++ b/lib/spack/spack/test/web.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import collections import os -import posixpath import sys import pytest @@ -15,13 +14,14 @@ import spack.config import spack.mirror import spack.paths import spack.util.s3 +import spack.util.url as url_util import spack.util.web from spack.version import ver def _create_url(relative_url): - web_data_path = posixpath.join(spack.paths.test_path, "data", "web") - return "file://" + posixpath.join(web_data_path, relative_url) + web_data_path = os.path.join(spack.paths.test_path, "data", "web") + return url_util.path_to_file_url(os.path.join(web_data_path, relative_url)) root = _create_url("index.html") @@ -185,6 +185,7 @@ def test_get_header(): @pytest.mark.skipif(sys.platform == "win32", reason="Not supported on Windows (yet)") def test_list_url(tmpdir): testpath = str(tmpdir) + testpath_url = url_util.path_to_file_url(testpath) os.mkdir(os.path.join(testpath, "dir")) @@ -199,7 +200,7 @@ def test_list_url(tmpdir): pass list_url = lambda recursive: list( - sorted(spack.util.web.list_url(testpath, recursive=recursive)) + sorted(spack.util.web.list_url(testpath_url, recursive=recursive)) ) assert list_url(False) == ["file-0.txt", "file-1.txt", "file-2.txt"] diff --git a/lib/spack/spack/util/s3.py b/lib/spack/spack/util/s3.py index 462afd05ec..6864ace85e 100644 --- a/lib/spack/spack/util/s3.py +++ b/lib/spack/spack/util/s3.py @@ -8,7 +8,6 @@ from typing import Any, Dict, Tuple import spack import spack.config -import spack.util.url as url_util #: Map (mirror name, method) tuples to s3 client instances. s3_client_cache: Dict[Tuple[str, str], Any] = dict() @@ -27,10 +26,10 @@ def get_s3_session(url, method="fetch"): global s3_client_cache - # Get a (recycled) s3 session for a particular URL - url = url_util.parse(url) - - url_str = url_util.format(url) + # Parse the URL if not already done. + if not isinstance(url, urllib.parse.ParseResult): + url = urllib.parse.urlparse(url) + url_str = url.geturl() def get_mirror_url(mirror): return mirror.fetch_url if method == "fetch" else mirror.push_url diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py index d0f9ef7393..1abd6e3146 100644 --- a/lib/spack/spack/util/url.py +++ b/lib/spack/spack/util/url.py @@ -8,18 +8,14 @@ Utility functions for parsing, formatting, and manipulating URLs. """ import itertools +import os import posixpath import re import sys import urllib.parse +import urllib.request -from spack.util.path import ( - canonicalize_path, - convert_to_platform_path, - convert_to_posix_path, -) - -is_windows = sys.platform == "win32" +from spack.util.path import convert_to_posix_path def _split_all(path): @@ -49,82 +45,22 @@ def local_file_path(url): file or directory referenced by it. Otherwise, return None. """ if isinstance(url, str): - url = parse(url) + url = urllib.parse.urlparse(url) if url.scheme == "file": - if is_windows: - pth = convert_to_platform_path(url.netloc + url.path) - if re.search(r"^\\[A-Za-z]:", pth): - pth = pth.lstrip("\\") - return pth - return url.path + return urllib.request.url2pathname(url.path) return None -def parse(url, scheme="file"): - """Parse a url. - - Path variable substitution is performed on file URLs as needed. The - variables are documented at - https://spack.readthedocs.io/en/latest/configuration.html#spack-specific-variables. - - Arguments: - url (str): URL to be parsed - scheme (str): associated URL scheme - Returns: - (urllib.parse.ParseResult): For file scheme URLs, the - netloc and path components are concatenated and passed through - spack.util.path.canoncalize_path(). Otherwise, the returned value - is the same as urllib's urlparse() with allow_fragments=False. - """ - # guarantee a value passed in is of proper url format. Guarantee - # allows for easier string manipulation accross platforms - if isinstance(url, str): - require_url_format(url) - url = escape_file_url(url) - url_obj = ( - urllib.parse.urlparse( - url, - scheme=scheme, - allow_fragments=False, - ) - if isinstance(url, str) - else url - ) - - (scheme, netloc, path, params, query, _) = url_obj - - scheme = (scheme or "file").lower() +def path_to_file_url(path): + if not os.path.isabs(path): + path = os.path.abspath(path) + return urllib.parse.urljoin("file:", urllib.request.pathname2url(path)) - if scheme == "file": - # (The user explicitly provides the file:// scheme.) - # examples: - # file://C:\\a\\b\\c - # file://X:/a/b/c - path = canonicalize_path(netloc + path) - path = re.sub(r"^/+", "/", path) - netloc = "" - - drive_ltr_lst = re.findall(r"[A-Za-z]:\\", path) - is_win_path = bool(drive_ltr_lst) - if is_windows and is_win_path: - drive_ltr = drive_ltr_lst[0].strip("\\") - path = re.sub(r"[\\]*" + drive_ltr, "", path) - netloc = "/" + drive_ltr.strip("\\") - - if sys.platform == "win32": - path = convert_to_posix_path(path) - - return urllib.parse.ParseResult( - scheme=scheme, - netloc=netloc, - path=path, - params=params, - query=query, - fragment=None, - ) +def file_url_string_to_path(url): + return urllib.request.url2pathname(urllib.parse.urlparse(url).path) def format(parsed_url): @@ -133,7 +69,7 @@ def format(parsed_url): Returns a canonicalized format of the given URL as a string. """ if isinstance(parsed_url, str): - parsed_url = parse(parsed_url) + parsed_url = urllib.parse.urlparse(parsed_url) return parsed_url.geturl() @@ -179,18 +115,6 @@ def join(base_url, path, *extra, **kwargs): # For canonicalizing file:// URLs, take care to explicitly differentiate # between absolute and relative join components. - - # '$spack' is not an absolute path component - join_result = spack.util.url.join('/a/b/c', '$spack') ; join_result - 'file:///a/b/c/$spack' - spack.util.url.format(join_result) - 'file:///a/b/c/opt/spack' - - # '/$spack' *is* an absolute path component - join_result = spack.util.url.join('/a/b/c', '/$spack') ; join_result - 'file:///$spack' - spack.util.url.format(join_result) - 'file:///opt/spack' """ paths = [ (x) if isinstance(x, str) else x.geturl() for x in itertools.chain((base_url, path), extra) @@ -260,7 +184,7 @@ def join(base_url, path, *extra, **kwargs): def _join(base_url, path, *extra, **kwargs): - base_url = parse(base_url) + base_url = urllib.parse.urlparse(base_url) resolve_href = kwargs.get("resolve_href", False) (scheme, netloc, base_path, params, query, _) = base_url @@ -365,20 +289,3 @@ def parse_git_url(url): raise ValueError("bad port in git url: %s" % url) return (scheme, user, hostname, port, path) - - -def is_url_format(url): - return re.search(r"^(file://|http://|https://|ftp://|s3://|gs://|ssh://|git://|/)", url) - - -def require_url_format(url): - if not is_url_format(url): - raise ValueError("Invalid url format from url: %s" % url) - - -def escape_file_url(url): - drive_ltr = re.findall(r"[A-Za-z]:\\", url) - if is_windows and drive_ltr: - url = url.replace(drive_ltr[0], "/" + drive_ltr[0]) - - return url diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 1f2c197460..7c8964b3c9 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -15,6 +15,7 @@ import shutil import ssl import sys import traceback +import urllib.parse from html.parser import HTMLParser from urllib.error import URLError from urllib.request import Request, urlopen @@ -68,7 +69,7 @@ def uses_ssl(parsed_url): if not endpoint_url: return True - if url_util.parse(endpoint_url, scheme="https").scheme == "https": + if urllib.parse.urlparse(endpoint_url).scheme == "https": return True elif parsed_url.scheme == "gs": @@ -79,7 +80,8 @@ def uses_ssl(parsed_url): def read_from_url(url, accept_content_type=None): - url = url_util.parse(url) + if isinstance(url, str): + url = urllib.parse.urlparse(url) context = None # Timeout in seconds for web requests @@ -143,13 +145,9 @@ def read_from_url(url, accept_content_type=None): def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=None): - if sys.platform == "win32": - if remote_path[1] == ":": - remote_path = "file://" + remote_path - remote_url = url_util.parse(remote_path) - - remote_file_path = url_util.local_file_path(remote_url) - if remote_file_path is not None: + remote_url = urllib.parse.urlparse(remote_path) + if remote_url.scheme == "file": + remote_file_path = url_util.local_file_path(remote_url) mkdirp(os.path.dirname(remote_file_path)) if keep_original: shutil.copy(local_file_path, remote_file_path) @@ -365,7 +363,7 @@ def url_exists(url, curl=None): Returns (bool): True if it exists; False otherwise. """ tty.debug("Checking existence of {0}".format(url)) - url_result = url_util.parse(url) + url_result = urllib.parse.urlparse(url) # Check if a local file local_path = url_util.local_file_path(url_result) @@ -425,7 +423,7 @@ def _debug_print_delete_results(result): def remove_url(url, recursive=False): - url = url_util.parse(url) + url = urllib.parse.urlparse(url) local_path = url_util.local_file_path(url) if local_path: @@ -534,9 +532,9 @@ def _iter_local_prefix(path): def list_url(url, recursive=False): - url = url_util.parse(url) - + url = urllib.parse.urlparse(url) local_path = url_util.local_file_path(url) + if local_path: if recursive: return list(_iter_local_prefix(local_path)) @@ -665,7 +663,7 @@ def spider(root_urls, depth=0, concurrency=32): collect = current_depth < depth for root in root_urls: - root = url_util.parse(root) + root = urllib.parse.urlparse(root) spider_args.append((root, collect)) tp = multiprocessing.pool.ThreadPool(processes=concurrency) @@ -704,11 +702,11 @@ def _urlopen(req, *args, **kwargs): del kwargs["context"] opener = urlopen - if url_util.parse(url).scheme == "s3": + if urllib.parse.urlparse(url).scheme == "s3": import spack.s3_handler opener = spack.s3_handler.open - elif url_util.parse(url).scheme == "gs": + elif urllib.parse.urlparse(url).scheme == "gs": import spack.gcs_handler opener = spack.gcs_handler.gcs_open diff --git a/share/spack/spack-completion.bash b/share/spack/spack-completion.bash index 604468aaeb..028ec16bee 100755 --- a/share/spack/spack-completion.bash +++ b/share/spack/spack-completion.bash @@ -556,7 +556,7 @@ _spack_buildcache_sync() { } _spack_buildcache_update_index() { - SPACK_COMPREPLY="-h --help -d --mirror-url -k --keys" + SPACK_COMPREPLY="-h --help -d --directory -m --mirror-name --mirror-url -k --keys" } _spack_cd() { -- cgit v1.2.3-60-g2f50