summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMassimiliano Culpo <massimiliano.culpo@gmail.com>2023-09-15 15:43:23 +0200
committerGitHub <noreply@github.com>2023-09-15 15:43:23 +0200
commitfb9e5fcc4f5307deaf10fcd571ebea68188d859c (patch)
tree0c048fbcd27b73b516922dfb1cc37003c440935b
parentbc02453f6dd06b82f0324d208b67559125e135ea (diff)
downloadspack-fb9e5fcc4f5307deaf10fcd571ebea68188d859c.tar.gz
spack-fb9e5fcc4f5307deaf10fcd571ebea68188d859c.tar.bz2
spack-fb9e5fcc4f5307deaf10fcd571ebea68188d859c.tar.xz
spack-fb9e5fcc4f5307deaf10fcd571ebea68188d859c.zip
Group primitive url/path handling functions together (#40028)
-rw-r--r--lib/spack/llnl/url.py459
-rw-r--r--lib/spack/spack/cmd/create.py2
-rw-r--r--lib/spack/spack/cmd/url.py4
-rw-r--r--lib/spack/spack/fetch_strategy.py7
-rw-r--r--lib/spack/spack/gcs_handler.py28
-rw-r--r--lib/spack/spack/main.py1
-rw-r--r--lib/spack/spack/mirror.py4
-rw-r--r--lib/spack/spack/package_base.py2
-rw-r--r--lib/spack/spack/patch.py2
-rw-r--r--lib/spack/spack/s3_handler.py80
-rw-r--r--lib/spack/spack/test/llnl/url.py167
-rw-r--r--lib/spack/spack/test/url_parse.py113
-rw-r--r--lib/spack/spack/test/util/compression.py38
-rw-r--r--lib/spack/spack/test/web.py19
-rw-r--r--lib/spack/spack/url.py439
-rw-r--r--lib/spack/spack/util/compression.py155
-rw-r--r--lib/spack/spack/util/gcs.py22
-rw-r--r--lib/spack/spack/util/path.py9
-rw-r--r--lib/spack/spack/util/s3.py75
-rw-r--r--lib/spack/spack/util/web.py159
-rw-r--r--var/spack/repos/builtin/packages/protobuf/package.py6
21 files changed, 903 insertions, 888 deletions
diff --git a/lib/spack/llnl/url.py b/lib/spack/llnl/url.py
new file mode 100644
index 0000000000..40e7606506
--- /dev/null
+++ b/lib/spack/llnl/url.py
@@ -0,0 +1,459 @@
+# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+"""URL primitives that just require Python standard library."""
+import itertools
+import os.path
+import re
+from typing import Optional, Set, Tuple
+from urllib.parse import urlsplit, urlunsplit
+
+# Archive extensions allowed in Spack
+PREFIX_EXTENSIONS = ("tar", "TAR")
+EXTENSIONS = ("gz", "bz2", "xz", "Z")
+NO_TAR_EXTENSIONS = ("zip", "tgz", "tbz2", "tbz", "txz")
+
+# Add PREFIX_EXTENSIONS and EXTENSIONS last so that .tar.gz is matched *before* .tar or .gz
+ALLOWED_ARCHIVE_TYPES = (
+ tuple(".".join(ext) for ext in itertools.product(PREFIX_EXTENSIONS, EXTENSIONS))
+ + PREFIX_EXTENSIONS
+ + EXTENSIONS
+ + NO_TAR_EXTENSIONS
+)
+CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
+
+
+def find_list_urls(url: str) -> Set[str]:
+ r"""Find good list URLs for the supplied URL.
+
+ By default, returns the dirname of the archive path.
+
+ Provides special treatment for the following websites, which have a
+ unique list URL different from the dirname of the download URL:
+
+ ========= =======================================================
+ GitHub https://github.com/<repo>/<name>/releases
+ GitLab https://gitlab.\*/<repo>/<name>/tags
+ BitBucket https://bitbucket.org/<repo>/<name>/downloads/?tab=tags
+ CRAN https://\*.r-project.org/src/contrib/Archive/<name>
+ PyPI https://pypi.org/simple/<name>/
+ LuaRocks https://luarocks.org/modules/<repo>/<name>
+ ========= =======================================================
+
+ Note: this function is called by `spack versions`, `spack checksum`,
+ and `spack create`, but not by `spack fetch` or `spack install`.
+
+ Parameters:
+ url (str): The download URL for the package
+
+ Returns:
+ set: One or more list URLs for the package
+ """
+
+ url_types = [
+ # GitHub
+ # e.g. https://github.com/llnl/callpath/archive/v1.0.1.tar.gz
+ (r"(.*github\.com/[^/]+/[^/]+)", lambda m: m.group(1) + "/releases"),
+ # GitLab API endpoint
+ # e.g. https://gitlab.dkrz.de/api/v4/projects/k202009%2Flibaec/repository/archive.tar.gz?sha=v1.0.2
+ (
+ r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
+ lambda m: m.group(1) + "/" + m.group(2) + "/" + m.group(3) + "/tags",
+ ),
+ # GitLab non-API endpoint
+ # e.g. https://gitlab.dkrz.de/k202009/libaec/uploads/631e85bcf877c2dcaca9b2e6d6526339/libaec-1.0.0.tar.gz
+ (r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: m.group(1) + "/tags"),
+ # BitBucket
+ # e.g. https://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2
+ (r"(.*bitbucket.org/[^/]+/[^/]+)", lambda m: m.group(1) + "/downloads/?tab=tags"),
+ # CRAN
+ # e.g. https://cran.r-project.org/src/contrib/Rcpp_0.12.9.tar.gz
+ # e.g. https://cloud.r-project.org/src/contrib/rgl_0.98.1.tar.gz
+ (
+ r"(.*\.r-project\.org/src/contrib)/([^_]+)",
+ lambda m: m.group(1) + "/Archive/" + m.group(2),
+ ),
+ # PyPI
+ # e.g. https://pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
+ # e.g. https://www.pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
+ # e.g. https://pypi.org/packages/source/n/numpy/numpy-1.19.4.zip
+ # e.g. https://pypi.python.org/packages/source/n/numpy/numpy-1.19.4.zip
+ # e.g. https://files.pythonhosted.org/packages/source/n/numpy/numpy-1.19.4.zip
+ # e.g. https://pypi.io/packages/py2.py3/o/opencensus-context/opencensus_context-0.1.1-py2.py3-none-any.whl
+ (
+ r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
+ lambda m: "https://pypi.org/simple/" + m.group(1) + "/",
+ ),
+ # LuaRocks
+ # e.g. https://luarocks.org/manifests/gvvaughan/lpeg-1.0.2-1.src.rock
+ # e.g. https://luarocks.org/manifests/openresty/lua-cjson-2.1.0-1.src.rock
+ (
+ r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
+ + r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
+ lambda m: "https://luarocks.org/modules/"
+ + m.group("org")
+ + "/"
+ + m.group("name")
+ + "/",
+ ),
+ ]
+
+ list_urls = {os.path.dirname(url)}
+
+ for pattern, fun in url_types:
+ match = re.search(pattern, url)
+ if match:
+ list_urls.add(fun(match))
+
+ return list_urls
+
+
+def strip_query_and_fragment(url: str) -> Tuple[str, str]:
+ """Strips query and fragment from a url, then returns the base url and the suffix.
+
+ Args:
+ url: URL to be stripped
+
+ Raises:
+ ValueError: when there is any error parsing the URL
+ """
+ components = urlsplit(url)
+ stripped = components[:3] + (None, None)
+
+ query, frag = components[3:5]
+ suffix = ""
+ if query:
+ suffix += "?" + query
+ if frag:
+ suffix += "#" + frag
+
+ return urlunsplit(stripped), suffix
+
+
+SOURCEFORGE_RE = re.compile(r"(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$")
+
+
+def split_url_on_sourceforge_suffix(url: str) -> Tuple[str, ...]:
+ """If the input is a sourceforge URL, returns base URL and "/download" suffix. Otherwise,
+ returns the input URL and an empty string.
+ """
+ match = SOURCEFORGE_RE.search(url)
+ if match is not None:
+ return match.groups()
+ return url, ""
+
+
+def has_extension(path_or_url: str, ext: str) -> bool:
+ """Returns true if the extension in input is present in path, false otherwise."""
+ prefix, _ = split_url_on_sourceforge_suffix(path_or_url)
+ if not ext.startswith(r"\."):
+ ext = rf"\.{ext}$"
+
+ if re.search(ext, prefix):
+ return True
+ return False
+
+
+def extension_from_path(path_or_url: Optional[str]) -> Optional[str]:
+ """Tries to match an allowed archive extension to the input. Returns the first match,
+ or None if no match was found.
+
+ Raises:
+ ValueError: if the input is None
+ """
+ if path_or_url is None:
+ raise ValueError("Can't call extension() on None")
+
+ for t in ALLOWED_ARCHIVE_TYPES:
+ if has_extension(path_or_url, t):
+ return t
+ return None
+
+
+def remove_extension(path_or_url: str, *, extension: str) -> str:
+ """Returns the input with the extension removed"""
+ suffix = rf"\.{extension}$"
+ return re.sub(suffix, "", path_or_url)
+
+
+def check_and_remove_ext(path: str, *, extension: str) -> str:
+ """Returns the input path with the extension removed, if the extension is present in path.
+ Otherwise, returns the input unchanged.
+ """
+ if not has_extension(path, extension):
+ return path
+ path, _ = split_url_on_sourceforge_suffix(path)
+ return remove_extension(path, extension=extension)
+
+
+def strip_extension(path_or_url: str, *, extension: Optional[str] = None) -> str:
+ """If a path contains the extension in input, returns the path stripped of the extension.
+ Otherwise, returns the input path.
+
+ If extension is None, attempts to strip any allowed extension from path.
+ """
+ if extension is None:
+ for t in ALLOWED_ARCHIVE_TYPES:
+ if has_extension(path_or_url, ext=t):
+ extension = t
+ break
+ else:
+ return path_or_url
+
+ return check_and_remove_ext(path_or_url, extension=extension)
+
+
+def split_url_extension(url: str) -> Tuple[str, ...]:
+ """Some URLs have a query string, e.g.:
+
+ 1. https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7.tgz?raw=true
+ 2. http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin.tar.gz
+ 3. https://gitlab.kitware.com/vtk/vtk/repository/archive.tar.bz2?ref=v7.0.0
+
+ In (1), the query string needs to be stripped to get at the
+ extension, but in (2) & (3), the filename is IN a single final query
+ argument.
+
+ This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
+ The suffix contains anything that was stripped off the URL to
+ get at the file extension. In (1), it will be ``'?raw=true'``, but
+ in (2), it will be empty. In (3) the suffix is a parameter that follows
+ after the file extension, e.g.:
+
+ 1. ``('https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7', '.tgz', '?raw=true')``
+ 2. ``('http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin', '.tar.gz', None)``
+ 3. ``('https://gitlab.kitware.com/vtk/vtk/repository/archive', '.tar.bz2', '?ref=v7.0.0')``
+ """
+ # Strip off sourceforge download suffix.
+ # e.g. https://sourceforge.net/projects/glew/files/glew/2.0.0/glew-2.0.0.tgz/download
+ prefix, suffix = split_url_on_sourceforge_suffix(url)
+
+ ext = extension_from_path(prefix)
+ if ext is not None:
+ prefix = strip_extension(prefix)
+ return prefix, ext, suffix
+
+ try:
+ prefix, suf = strip_query_and_fragment(prefix)
+ except ValueError:
+ # FIXME: tty.debug("Got error parsing path %s" % path)
+ # Ignore URL parse errors here
+ return url, ""
+
+ ext = extension_from_path(prefix)
+ prefix = strip_extension(prefix)
+ suffix = suf + suffix
+ if ext is None:
+ ext = ""
+
+ return prefix, ext, suffix
+
+
+def strip_version_suffixes(path_or_url: str) -> str:
+ """Some tarballs contain extraneous information after the version:
+
+ * ``bowtie2-2.2.5-source``
+ * ``libevent-2.0.21-stable``
+ * ``cuda_8.0.44_linux.run``
+
+ These strings are not part of the version number and should be ignored.
+ This function strips those suffixes off and returns the remaining string.
+ The goal is that the version is always the last thing in ``path``:
+
+ * ``bowtie2-2.2.5``
+ * ``libevent-2.0.21``
+ * ``cuda_8.0.44``
+
+ Args:
+ path_or_url: The filename or URL for the package
+
+ Returns:
+ The ``path`` with any extraneous suffixes removed
+ """
+ # NOTE: This could be done with complicated regexes in parse_version_offset
+ # NOTE: The problem is that we would have to add these regexes to the end
+ # NOTE: of every single version regex. Easier to just strip them off
+ # NOTE: permanently
+
+ suffix_regexes = [
+ # Download type
+ r"[Ii]nstall",
+ r"all",
+ r"code",
+ r"[Ss]ources?",
+ r"file",
+ r"full",
+ r"single",
+ r"with[a-zA-Z_-]+",
+ r"rock",
+ r"src(_0)?",
+ r"public",
+ r"bin",
+ r"binary",
+ r"run",
+ r"[Uu]niversal",
+ r"jar",
+ r"complete",
+ r"dynamic",
+ r"oss",
+ r"gem",
+ r"tar",
+ r"sh",
+ # Download version
+ r"release",
+ r"bin",
+ r"stable",
+ r"[Ff]inal",
+ r"rel",
+ r"orig",
+ r"dist",
+ r"\+",
+ # License
+ r"gpl",
+ # Arch
+ # Needs to come before and after OS, appears in both orders
+ r"ia32",
+ r"intel",
+ r"amd64",
+ r"linux64",
+ r"x64",
+ r"64bit",
+ r"x86[_-]64",
+ r"i586_64",
+ r"x86",
+ r"i[36]86",
+ r"ppc64(le)?",
+ r"armv?(7l|6l|64)",
+ # Other
+ r"cpp",
+ r"gtk",
+ r"incubating",
+ # OS
+ r"[Ll]inux(_64)?",
+ r"LINUX",
+ r"[Uu]ni?x",
+ r"[Ss]un[Oo][Ss]",
+ r"[Mm]ac[Oo][Ss][Xx]?",
+ r"[Oo][Ss][Xx]",
+ r"[Dd]arwin(64)?",
+ r"[Aa]pple",
+ r"[Ww]indows",
+ r"[Ww]in(64|32)?",
+ r"[Cc]ygwin(64|32)?",
+ r"[Mm]ingw",
+ r"centos",
+ # Arch
+ # Needs to come before and after OS, appears in both orders
+ r"ia32",
+ r"intel",
+ r"amd64",
+ r"linux64",
+ r"x64",
+ r"64bit",
+ r"x86[_-]64",
+ r"i586_64",
+ r"x86",
+ r"i[36]86",
+ r"ppc64(le)?",
+ r"armv?(7l|6l|64)?",
+ # PyPI
+ r"[._-]py[23].*\.whl",
+ r"[._-]cp[23].*\.whl",
+ r"[._-]win.*\.exe",
+ ]
+
+ for regex in suffix_regexes:
+ # Remove the suffix from the end of the path
+ # This may be done multiple times
+ path_or_url = re.sub(r"[._-]?" + regex + "$", "", path_or_url)
+
+ return path_or_url
+
+
+def expand_contracted_extension(extension: str) -> str:
+ """Returns the expanded version of a known contracted extension.
+
+ This function maps extensions like ".tgz" to ".tar.gz". On unknown extensions,
+ return the input unmodified.
+ """
+ extension = extension.strip(".")
+ return CONTRACTION_MAP.get(extension, extension)
+
+
+def expand_contracted_extension_in_path(
+ path_or_url: str, *, extension: Optional[str] = None
+) -> str:
+ """Returns the input path or URL with any contraction extension expanded.
+
+ Args:
+ path_or_url: path or URL to be expanded
+ extension: if specified, only attempt to expand that extension
+ """
+ extension = extension or extension_from_path(path_or_url)
+ if extension is None:
+ return path_or_url
+
+ expanded = expand_contracted_extension(extension)
+ if expanded != extension:
+ return re.sub(rf"{extension}", rf"{expanded}", path_or_url)
+ return path_or_url
+
+
+def compression_ext_from_compressed_archive(extension: str) -> Optional[str]:
+ """Returns compression extension for a compressed archive"""
+ extension = expand_contracted_extension(extension)
+ for ext in [*EXTENSIONS]:
+ if ext in extension:
+ return ext
+ return None
+
+
+def strip_compression_extension(path_or_url: str, ext: Optional[str] = None) -> str:
+ """Strips the compression extension from the input, and returns it. For instance,
+ "foo.tgz" becomes "foo.tar".
+
+ If no extension is given, try a default list of extensions.
+
+ Args:
+ path_or_url: input to be stripped
+ ext: if given, extension to be stripped
+ """
+ if not extension_from_path(path_or_url):
+ return path_or_url
+
+ expanded_path = expand_contracted_extension_in_path(path_or_url)
+ candidates = [ext] if ext is not None else EXTENSIONS
+ for current_extension in candidates:
+ modified_path = check_and_remove_ext(expanded_path, extension=current_extension)
+ if modified_path != expanded_path:
+ return modified_path
+ return expanded_path
+
+
+def allowed_archive(path_or_url: str) -> bool:
+ """Returns true if the input is a valid archive, False otherwise."""
+ return (
+ False if not path_or_url else any(path_or_url.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
+ )
+
+
+def determine_url_file_extension(path: str) -> str:
+ """This returns the type of archive a URL refers to. This is
+ sometimes confusing because of URLs like:
+
+ (1) https://github.com/petdance/ack/tarball/1.93_02
+
+ Where the URL doesn't actually contain the filename. We need
+ to know what type it is so that we can appropriately name files
+ in mirrors.
+ """
+ match = re.search(r"github.com/.+/(zip|tar)ball/", path)
+ if match:
+ if match.group(1) == "zip":
+ return "zip"
+ elif match.group(1) == "tar":
+ return "tar.gz"
+
+ prefix, ext, suffix = split_url_extension(path)
+ return ext
diff --git a/lib/spack/spack/cmd/create.py b/lib/spack/spack/cmd/create.py
index 9c923c4a17..e3569d998f 100644
--- a/lib/spack/spack/cmd/create.py
+++ b/lib/spack/spack/cmd/create.py
@@ -822,7 +822,7 @@ def get_versions(args, name):
if args.url is not None and args.template != "bundle" and valid_url:
# Find available versions
try:
- url_dict = spack.util.web.find_versions_of_archive(args.url)
+ url_dict = spack.url.find_versions_of_archive(args.url)
except UndetectableVersionError:
# Use fake versions
tty.warn("Couldn't detect version in: {0}".format(args.url))
diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py
index 8f7866c406..25f8ad382a 100644
--- a/lib/spack/spack/cmd/url.py
+++ b/lib/spack/spack/cmd/url.py
@@ -12,6 +12,7 @@ from llnl.util import tty
import spack.fetch_strategy as fs
import spack.repo
import spack.spec
+import spack.url
import spack.util.crypto as crypto
from spack.url import (
UndetectableNameError,
@@ -26,7 +27,6 @@ from spack.url import (
substitution_offsets,
)
from spack.util.naming import simplify_name
-from spack.util.web import find_versions_of_archive
description = "debugging tool for url parsing"
section = "developer"
@@ -139,7 +139,7 @@ def url_parse(args):
if args.spider:
print()
tty.msg("Spidering for versions:")
- versions = find_versions_of_archive(url)
+ versions = spack.url.find_versions_of_archive(url)
if not versions:
print(" Found no versions for {0}".format(name))
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index 1f99c4ce9e..87c6e0fc61 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -31,6 +31,7 @@ import shutil
import urllib.parse
from typing import List, Optional
+import llnl.url
import llnl.util
import llnl.util.filesystem as fs
import llnl.util.tty as tty
@@ -46,7 +47,7 @@ import spack.util.url as url_util
import spack.util.web as web_util
import spack.version
import spack.version.git_ref_lookup
-from spack.util.compression import decompressor_for, extension_from_path
+from spack.util.compression import decompressor_for
from spack.util.executable import CommandNotFoundError, which
from spack.util.string import comma_and, quote
@@ -441,7 +442,7 @@ class URLFetchStrategy(FetchStrategy):
# TODO: replace this by mime check.
if not self.extension:
- self.extension = spack.url.determine_url_file_extension(self.url)
+ self.extension = llnl.url.determine_url_file_extension(self.url)
if self.stage.expanded:
tty.debug("Source already staged to %s" % self.stage.source_path)
@@ -570,7 +571,7 @@ class VCSFetchStrategy(FetchStrategy):
@_needs_stage
def archive(self, destination, **kwargs):
- assert extension_from_path(destination) == "tar.gz"
+ assert llnl.url.extension_from_path(destination) == "tar.gz"
assert self.stage.source_path.startswith(self.stage.path)
tar = which("tar", required=True)
diff --git a/lib/spack/spack/gcs_handler.py b/lib/spack/spack/gcs_handler.py
deleted file mode 100644
index b002fa70ac..0000000000
--- a/lib/spack/spack/gcs_handler.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
-# Spack Project Developers. See the top-level COPYRIGHT file for details.
-#
-# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-import urllib.parse
-import urllib.response
-from urllib.error import URLError
-from urllib.request import BaseHandler
-
-
-def gcs_open(req, *args, **kwargs):
- """Open a reader stream to a blob object on GCS"""
- import spack.util.gcs as gcs_util
-
- url = urllib.parse.urlparse(req.get_full_url())
- gcsblob = gcs_util.GCSBlob(url)
-
- if not gcsblob.exists():
- raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
- stream = gcsblob.get_blob_byte_stream()
- headers = gcsblob.get_blob_headers()
-
- return urllib.response.addinfourl(stream, headers, url)
-
-
-class GCSHandler(BaseHandler):
- def gs_open(self, req):
- return gcs_open(req)
diff --git a/lib/spack/spack/main.py b/lib/spack/spack/main.py
index 009190829f..3b330c08d4 100644
--- a/lib/spack/spack/main.py
+++ b/lib/spack/spack/main.py
@@ -30,7 +30,6 @@ import llnl.util.tty.colify
import llnl.util.tty.color as color
from llnl.util.tty.log import log_output
-import spack
import spack.cmd
import spack.config
import spack.environment as ev
diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py
index e4825537db..32037502c5 100644
--- a/lib/spack/spack/mirror.py
+++ b/lib/spack/spack/mirror.py
@@ -20,6 +20,7 @@ import traceback
import urllib.parse
from typing import Optional, Union
+import llnl.url
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp
@@ -29,7 +30,6 @@ import spack.error
import spack.fetch_strategy as fs
import spack.mirror
import spack.spec
-import spack.url as url
import spack.util.path
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
@@ -375,7 +375,7 @@ def _determine_extension(fetcher):
if isinstance(fetcher, fs.URLFetchStrategy):
if fetcher.expand_archive:
# If we fetch with a URLFetchStrategy, use URL's archive type
- ext = url.determine_url_file_extension(fetcher.url)
+ ext = llnl.url.determine_url_file_extension(fetcher.url)
if ext:
# Remove any leading dots
diff --git a/lib/spack/spack/package_base.py b/lib/spack/spack/package_base.py
index 5a14f44f31..67cebb3a8f 100644
--- a/lib/spack/spack/package_base.py
+++ b/lib/spack/spack/package_base.py
@@ -2377,7 +2377,7 @@ class PackageBase(WindowsRPath, PackageViewMixin, metaclass=PackageMeta):
return {}
try:
- return spack.util.web.find_versions_of_archive(
+ return spack.url.find_versions_of_archive(
self.all_urls, self.list_url, self.list_depth, concurrency, reference_package=self
)
except spack.util.web.NoNetworkConnectionError as e:
diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py
index a7fb3620ee..7bbab326d1 100644
--- a/lib/spack/spack/patch.py
+++ b/lib/spack/spack/patch.py
@@ -11,6 +11,7 @@ import sys
import llnl.util.filesystem
import llnl.util.lang
+from llnl.url import allowed_archive
import spack
import spack.error
@@ -19,7 +20,6 @@ import spack.mirror
import spack.repo
import spack.stage
import spack.util.spack_json as sjson
-from spack.util.compression import allowed_archive
from spack.util.crypto import Checker, checksum
from spack.util.executable import which, which_string
diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py
deleted file mode 100644
index efab23a5ea..0000000000
--- a/lib/spack/spack/s3_handler.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
-# Spack Project Developers. See the top-level COPYRIGHT file for details.
-#
-# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-
-import urllib.error
-import urllib.parse
-import urllib.request
-import urllib.response
-from io import BufferedReader, BytesIO, IOBase
-
-import spack.util.s3 as s3_util
-
-
-# NOTE(opadron): Workaround issue in boto where its StreamingBody
-# implementation is missing several APIs expected from IOBase. These missing
-# APIs prevent the streams returned by boto from being passed as-are along to
-# urllib.
-#
-# https://github.com/boto/botocore/issues/879
-# https://github.com/python/cpython/pull/3249
-class WrapStream(BufferedReader):
- def __init__(self, raw):
- # In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
- # only add missing attributes in older versions.
- # https://github.com/boto/botocore/commit/a624815eabac50442ed7404f3c4f2664cd0aa784
- if not isinstance(raw, IOBase):
- raw.readable = lambda: True
- raw.writable = lambda: False
- raw.seekable = lambda: False
- raw.closed = False
- raw.flush = lambda: None
- super().__init__(raw)
-
- def detach(self):
- self.raw = None
-
- def read(self, *args, **kwargs):
- return self.raw.read(*args, **kwargs)
-
- def __getattr__(self, key):
- return getattr(self.raw, key)
-
-
-def _s3_open(url, method="GET"):
- parsed = urllib.parse.urlparse(url)
- s3 = s3_util.get_s3_session(url, method="fetch")
-
- bucket = parsed.netloc
- key = parsed.path
-
- if key.startswith("/"):
- key = key[1:]
-
- if method not in ("GET", "HEAD"):
- raise urllib.error.URLError(
- "Only GET and HEAD verbs are currently supported for the s3:// scheme"
- )
-
- try:
- if method == "GET":
- obj = s3.get_object(Bucket=bucket, Key=key)
- # NOTE(opadron): Apply workaround here (see above)
- stream = WrapStream(obj["Body"])
- elif method == "HEAD":
- obj = s3.head_object(Bucket=bucket, Key=key)
- stream = BytesIO()
- except s3.ClientError as e:
- raise urllib.error.URLError(e) from e
-
- headers = obj["ResponseMetadata"]["HTTPHeaders"]
-
- return url, headers, stream
-
-
-class UrllibS3Handler(urllib.request.BaseHandler):
- def s3_open(self, req):
- orig_url = req.get_full_url()
- url, headers, stream = _s3_open(orig_url, method=req.get_method())
- return urllib.response.addinfourl(stream, headers, url)
diff --git a/lib/spack/spack/test/llnl/url.py b/lib/spack/spack/test/llnl/url.py
new file mode 100644
index 0000000000..8da8e727ec
--- /dev/null
+++ b/lib/spack/spack/test/llnl/url.py
@@ -0,0 +1,167 @@
+# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+"""Tests for llnl.url functions"""
+import itertools
+
+import pytest
+
+import llnl.url
+
+
+@pytest.fixture(params=llnl.url.ALLOWED_ARCHIVE_TYPES)
+def archive_and_expected(request):
+ archive_name = ".".join(["Foo", request.param])
+ return archive_name, request.param
+
+
+def test_get_extension(archive_and_expected):
+ """Tests that we can predict correctly known extensions for simple cases."""
+ archive, expected = archive_and_expected
+ result = llnl.url.extension_from_path(archive)
+ assert result == expected
+
+
+def test_get_bad_extension():
+ """Tests that a bad extension returns None"""
+ result = llnl.url.extension_from_path("Foo.cxx")
+ assert result is None
+
+
+@pytest.mark.parametrize(
+ "url,expected",
+ [
+ # No suffix
+ ("rgb-1.0.6", "rgb-1.0.6"),
+ # Misleading prefix
+ ("jpegsrc.v9b", "jpegsrc.v9b"),
+ ("turbolinux702", "turbolinux702"),
+ ("converge_install_2.3.16", "converge_install_2.3.16"),
+ # Download type - code, source
+ ("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
+ # Download type - src
+ ("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
+ ("go1.7.4.src", "go1.7.4"),
+ # Download type - source
+ ("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
+ ("grib_api-1.17.0-Source", "grib_api-1.17.0"),
+ # Download type - full
+ ("julia-0.4.3-full", "julia-0.4.3"),
+ # Download type - bin
+ ("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
+ # Download type - binary
+ ("Jmol-14.8.0-binary", "Jmol-14.8.0"),
+ # Download type - gem
+ ("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
+ # Download type - tar
+ ("gromacs-4.6.1-tar", "gromacs-4.6.1"),
+ # Download type - sh
+ ("Miniconda2-4.3.11-Linux-x86_64.sh", "Miniconda2-4.3.11"),
+ # Download version - release
+ ("v1.0.4-release", "v1.0.4"),
+ # Download version - stable
+ ("libevent-2.0.21-stable", "libevent-2.0.21"),
+ # Download version - final
+ ("2.6.7-final", "2.6.7"),
+ # Download version - rel
+ ("v1.9.5.1rel", "v1.9.5.1"),
+ # Download version - orig
+ ("dash_0.5.5.1.orig", "dash_0.5.5.1"),
+ # Download version - plus
+ ("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
+ # License
+ ("cppad-20170114.gpl", "cppad-20170114"),
+ # Arch
+ ("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
+ ("dislin-11.0.linux.i586_64", "dislin-11.0"),
+ ("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
+ # OS - linux
+ ("astyle_2.04_linux", "astyle_2.04"),
+ # OS - unix
+ ("install-tl-unx", "install-tl"),
+ # OS - macos
+ ("astyle_1.23_macosx", "astyle_1.23"),
+ ("haxe-2.08-osx", "haxe-2.08"),
+ # PyPI - wheel
+ ("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
+ (
+ "numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
+ "macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
+ "numpy-1.12.0",
+ ),
+ # PyPI - exe
+ ("PyYAML-3.12.win-amd64-py3.5.exe", "PyYAML-3.12"),
+ # Combinations of multiple patterns - bin, release
+ ("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
+ # Combinations of multiple patterns - all
+ ("p7zip_9.04_src_all", "p7zip_9.04"),
+ # Combinations of multiple patterns - run
+ ("cuda_8.0.44_linux.run", "cuda_8.0.44"),
+ # Combinations of multiple patterns - file
+ ("ack-2.14-single-file", "ack-2.14"),
+ # Combinations of multiple patterns - jar
+ ("antlr-3.4-complete.jar", "antlr-3.4"),
+ # Combinations of multiple patterns - oss
+ ("tbb44_20160128oss_src_0", "tbb44_20160128"),
+ # Combinations of multiple patterns - darwin
+ ("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
+ ("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
+ # Combinations of multiple patterns - centos
+ ("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
+ # Combinations of multiple patterns - arch
+ (
+ "VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
+ "VizGlow_v2.2alpha17-R21November2016",
+ ),
+ ("jdk-8u92-linux-x64", "jdk-8u92"),
+ ("cuda_6.5.14_linux_64.run", "cuda_6.5.14"),
+ ("Mathematica_12.0.0_LINUX.sh", "Mathematica_12.0.0"),
+ ("trf407b.linux64", "trf407b"),
+ # Combinations of multiple patterns - with
+ ("mafft-7.221-with-extensions-src", "mafft-7.221"),
+ ("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
+ ("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
+ # Combinations of multiple patterns - rock
+ ("bitlib-23-2.src.rock", "bitlib-23-2"),
+ # Combinations of multiple patterns - public
+ ("dakota-6.3-public.src", "dakota-6.3"),
+ # Combinations of multiple patterns - universal
+ ("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
+ # Combinations of multiple patterns - dynamic
+ ("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
+ # Combinations of multiple patterns - other
+ ("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
+ ("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
+ ("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
+ ],
+)
+def test_url_strip_version_suffixes(url, expected):
+ stripped = llnl.url.strip_version_suffixes(url)
+ assert stripped == expected
+
+
+def test_strip_compression_extension(archive_and_expected):
+ archive, extension = archive_and_expected
+ stripped = llnl.url.strip_compression_extension(archive)
+ if extension == "zip":
+ assert stripped == "Foo.zip"
+ stripped = llnl.url.strip_compression_extension(archive, "zip")
+ assert stripped == "Foo"
+ elif (
+ extension.lower() == "tar"
+ or extension in llnl.url.CONTRACTION_MAP
+ or extension
+ in [
+ ".".join(ext)
+ for ext in itertools.product(llnl.url.PREFIX_EXTENSIONS, llnl.url.EXTENSIONS)
+ ]
+ ):
+ assert stripped == "Foo.tar" or stripped == "Foo.TAR"
+ else:
+ assert stripped == "Foo"
+
+
+def test_allowed_archive(archive_and_expected):
+ archive, _ = archive_and_expected
+ assert llnl.url.allowed_archive(archive)
diff --git a/lib/spack/spack/test/url_parse.py b/lib/spack/spack/test/url_parse.py
index 86ebf84fa7..dd094ed230 100644
--- a/lib/spack/spack/test/url_parse.py
+++ b/lib/spack/spack/test/url_parse.py
@@ -17,125 +17,12 @@ from spack.url import (
parse_name_offset,
parse_version_offset,
strip_name_suffixes,
- strip_version_suffixes,
substitute_version,
)
from spack.version import Version
@pytest.mark.parametrize(
- "url,expected",
- [
- # No suffix
- ("rgb-1.0.6", "rgb-1.0.6"),
- # Misleading prefix
- ("jpegsrc.v9b", "jpegsrc.v9b"),
- ("turbolinux702", "turbolinux702"),
- ("converge_install_2.3.16", "converge_install_2.3.16"),
- # Download type - code, source
- ("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
- # Download type - src
- ("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
- ("go1.7.4.src", "go1.7.4"),
- # Download type - source
- ("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
- ("grib_api-1.17.0-Source", "grib_api-1.17.0"),
- # Download type - full
- ("julia-0.4.3-full", "julia-0.4.3"),
- # Download type - bin
- ("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
- # Download type - binary
- ("Jmol-14.8.0-binary", "Jmol-14.8.0"),
- # Download type - gem
- ("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
- # Download type - tar
- ("gromacs-4.6.1-tar", "gromacs-4.6.1"),
- # Download type - sh
- ("Miniconda2-4.3.11-Linux-x86_64.sh", "Miniconda2-4.3.11"),
- # Download version - release
- ("v1.0.4-release", "v1.0.4"),
- # Download version - stable
- ("libevent-2.0.21-stable", "libevent-2.0.21"),
- # Download version - final
- ("2.6.7-final", "2.6.7"),
- # Download version - rel
- ("v1.9.5.1rel", "v1.9.5.1"),
- # Download version - orig
- ("dash_0.5.5.1.orig", "dash_0.5.5.1"),
- # Download version - plus
- ("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
- # License
- ("cppad-20170114.gpl", "cppad-20170114"),
- # Arch
- ("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
- ("dislin-11.0.linux.i586_64", "dislin-11.0"),
- ("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
- # OS - linux
- ("astyle_2.04_linux", "astyle_2.04"),
- # OS - unix
- ("install-tl-unx", "install-tl"),
- # OS - macos
- ("astyle_1.23_macosx", "astyle_1.23"),
- ("haxe-2.08-osx", "haxe-2.08"),
- # PyPI - wheel
- ("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
- (
- "numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
- "macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
- "numpy-1.12.0",
- ),
- # PyPI - exe
- ("PyYAML-3.12.win-amd64-py3.5.exe", "PyYAML-3.12"),
- # Combinations of multiple patterns - bin, release
- ("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
- # Combinations of multiple patterns - all
- ("p7zip_9.04_src_all", "p7zip_9.04"),
- # Combinations of multiple patterns - run
- ("cuda_8.0.44_linux.run", "cuda_8.0.44"),
- # Combinations of multiple patterns - file
- ("ack-2.14-single-file", "ack-2.14"),
- # Combinations of multiple patterns - jar
- ("antlr-3.4-complete.jar", "antlr-3.4"),
- # Combinations of multiple patterns - oss
- ("tbb44_20160128oss_src_0", "tbb44_20160128"),
- # Combinations of multiple patterns - darwin
- ("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
- ("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
- # Combinations of multiple patterns - centos
- ("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
- # Combinations of multiple patterns - arch
- (
- "VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
- "VizGlow_v2.2alpha17-R21November2016",
- ),
- ("jdk-8u92-linux-x64", "jdk-8u92"),
- ("cuda_6.5.14_linux_64.run", "cuda_6.5.14"),
- ("Mathematica_12.0.0_LINUX.sh", "Mathematica_12.0.0"),
- ("trf407b.linux64", "trf407b"),
- # Combinations of multiple patterns - with
- ("mafft-7.221-with-extensions-src", "mafft-7.221"),
- ("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
- ("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
- # Combinations of multiple patterns - rock
- ("bitlib-23-2.src.rock", "bitlib-23-2"),
- # Combinations of multiple patterns - public
- ("dakota-6.3-public.src", "dakota-6.3"),
- # Combinations of multiple patterns - universal
- ("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
- # Combinations of multiple patterns - dynamic
- ("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
- # Combinations of multiple patterns - other
- ("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
- ("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
- ("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
- ],
-)
-def test_url_strip_version_suffixes(url, expected):
- stripped = strip_version_suffixes(url)
- assert stripped == expected
-
-
-@pytest.mark.parametrize(
"url,version,expected",
[
# No suffix
diff --git a/lib/spack/spack/test/util/compression.py b/lib/spack/spack/test/util/compression.py
index 7cbcfb283c..29007a7e33 100644
--- a/lib/spack/spack/test/util/compression.py
+++ b/lib/spack/spack/test/util/compression.py
@@ -10,6 +10,7 @@ from itertools import product
import pytest
+import llnl.url
from llnl.util.filesystem import working_dir
from spack.paths import spack_root
@@ -21,7 +22,7 @@ datadir = os.path.join(spack_root, "lib", "spack", "spack", "test", "data", "com
ext_archive = {}
[
ext_archive.update({ext: ".".join(["Foo", ext])})
- for ext in scomp.ALLOWED_ARCHIVE_TYPES
+ for ext in llnl.url.ALLOWED_ARCHIVE_TYPES
if "TAR" not in ext
]
# Spack does not use Python native handling for tarballs or zip
@@ -95,38 +96,3 @@ def test_unallowed_extension():
bad_ext_archive = "Foo.cxx"
with pytest.raises(CommandNotFoundError):
scomp.decompressor_for(bad_ext_archive)
-
-
-@pytest.mark.parametrize("archive", ext_archive.values())
-def test_get_extension(archive):
- ext = scomp.extension_from_path(archive)
- assert ext_archive[ext] == archive
-
-
-def test_get_bad_extension():
- archive = "Foo.cxx"
- ext = scomp.extension_from_path(archive)
- assert ext is None
-
-
-@pytest.mark.parametrize("path", ext_archive.values())
-def test_allowed_archive(path):
- assert scomp.allowed_archive(path)
-
-
-@pytest.mark.parametrize("ext_path", ext_archive.items())
-def test_strip_compression_extension(ext_path):
- ext, path = ext_path
- stripped = scomp.strip_compression_extension(path)
- if ext == "zip":
- assert stripped == "Foo.zip"
- stripped = scomp.strip_compression_extension(path, "zip")
- assert stripped == "Foo"
- elif (
- ext == "tar"
- or ext in scomp.CONTRACTION_MAP.keys()
- or ext in [".".join(ext) for ext in product(scomp.PRE_EXTS, scomp.EXTS)]
- ):
- assert stripped == "Foo.tar" or stripped == "Foo.TAR"
- else:
- assert stripped == "Foo"
diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py
index 2d6f577799..a012e7524e 100644
--- a/lib/spack/spack/test/web.py
+++ b/lib/spack/spack/test/web.py
@@ -15,6 +15,7 @@ import llnl.util.tty as tty
import spack.config
import spack.mirror
import spack.paths
+import spack.url
import spack.util.path
import spack.util.s3
import spack.util.url as url_util
@@ -102,31 +103,31 @@ def test_spider_no_response(monkeypatch):
def test_find_versions_of_archive_0():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=0)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=0)
assert Version("0.0.0") in versions
def test_find_versions_of_archive_1():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=1)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=1)
assert Version("0.0.0") in versions
assert Version("1.0.0") in versions
def test_find_versions_of_archive_2():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
assert Version("0.0.0") in versions
assert Version("1.0.0") in versions
assert Version("2.0.0") in versions
def test_find_exotic_versions_of_archive_2():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
# up for grabs to make this better.
assert Version("2.0.0b2") in versions
def test_find_versions_of_archive_3():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
assert Version("0.0.0") in versions
assert Version("1.0.0") in versions
assert Version("2.0.0") in versions
@@ -135,16 +136,14 @@ def test_find_versions_of_archive_3():
def test_find_exotic_versions_of_archive_3():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
assert Version("2.0.0b2") in versions
assert Version("3.0a1") in versions
assert Version("4.5-rc5") in versions
def test_find_versions_of_archive_with_fragment():
- versions = spack.util.web.find_versions_of_archive(
- root_tarball, root_with_fragment, list_depth=0
- )
+ versions = spack.url.find_versions_of_archive(root_tarball, root_with_fragment, list_depth=0)
assert Version("5.0.0") in versions
@@ -311,7 +310,7 @@ def test_remove_s3_url(monkeypatch, capfd):
def get_s3_session(url, method="fetch"):
return MockS3Client()
- monkeypatch.setattr(spack.util.s3, "get_s3_session", get_s3_session)
+ monkeypatch.setattr(spack.util.web, "get_s3_session", get_s3_session)
current_debug_level = tty.debug_level()
tty.set_debug(1)
diff --git a/lib/spack/spack/url.py b/lib/spack/spack/url.py
index bf2990f42f..c5e47232c0 100644
--- a/lib/spack/spack/url.py
+++ b/lib/spack/spack/url.py
@@ -27,246 +27,22 @@ it's never been told about that version before.
"""
import io
import os
+import pathlib
import re
-from urllib.parse import urlsplit, urlunsplit
-import llnl.util.tty as tty
+import llnl.url
from llnl.util.tty.color import cescape, colorize
import spack.error
-import spack.util.compression as comp
-import spack.util.path as spath
+import spack.util.web
import spack.version
-
+from spack.util.path import convert_to_posix_path
#
# Note: We call the input to most of these functions a "path" but the functions
# work on paths and URLs. There's not a good word for both of these, but
# "path" seemed like the most generic term.
#
-def find_list_urls(url):
- r"""Find good list URLs for the supplied URL.
-
- By default, returns the dirname of the archive path.
-
- Provides special treatment for the following websites, which have a
- unique list URL different from the dirname of the download URL:
-
- ========= =======================================================
- GitHub https://github.com/<repo>/<name>/releases
- GitLab https://gitlab.\*/<repo>/<name>/tags
- BitBucket https://bitbucket.org/<repo>/<name>/downloads/?tab=tags
- CRAN https://\*.r-project.org/src/contrib/Archive/<name>
- PyPI https://pypi.org/simple/<name>/
- LuaRocks https://luarocks.org/modules/<repo>/<name>
- ========= =======================================================
-
- Note: this function is called by `spack versions`, `spack checksum`,
- and `spack create`, but not by `spack fetch` or `spack install`.
-
- Parameters:
- url (str): The download URL for the package
-
- Returns:
- set: One or more list URLs for the package
- """
-
- url_types = [
- # GitHub
- # e.g. https://github.com/llnl/callpath/archive/v1.0.1.tar.gz
- (r"(.*github\.com/[^/]+/[^/]+)", lambda m: m.group(1) + "/releases"),
- # GitLab API endpoint
- # e.g. https://gitlab.dkrz.de/api/v4/projects/k202009%2Flibaec/repository/archive.tar.gz?sha=v1.0.2
- (
- r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
- lambda m: m.group(1) + "/" + m.group(2) + "/" + m.group(3) + "/tags",
- ),
- # GitLab non-API endpoint
- # e.g. https://gitlab.dkrz.de/k202009/libaec/uploads/631e85bcf877c2dcaca9b2e6d6526339/libaec-1.0.0.tar.gz
- (r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: m.group(1) + "/tags"),
- # BitBucket
- # e.g. https://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2
- (r"(.*bitbucket.org/[^/]+/[^/]+)", lambda m: m.group(1) + "/downloads/?tab=tags"),
- # CRAN
- # e.g. https://cran.r-project.org/src/contrib/Rcpp_0.12.9.tar.gz
- # e.g. https://cloud.r-project.org/src/contrib/rgl_0.98.1.tar.gz
- (
- r"(.*\.r-project\.org/src/contrib)/([^_]+)",
- lambda m: m.group(1) + "/Archive/" + m.group(2),
- ),
- # PyPI
- # e.g. https://pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
- # e.g. https://www.pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
- # e.g. https://pypi.org/packages/source/n/numpy/numpy-1.19.4.zip
- # e.g. https://pypi.python.org/packages/source/n/numpy/numpy-1.19.4.zip
- # e.g. https://files.pythonhosted.org/packages/source/n/numpy/numpy-1.19.4.zip
- # e.g. https://pypi.io/packages/py2.py3/o/opencensus-context/opencensus_context-0.1.1-py2.py3-none-any.whl
- (
- r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
- lambda m: "https://pypi.org/simple/" + m.group(1) + "/",
- ),
- # LuaRocks
- # e.g. https://luarocks.org/manifests/gvvaughan/lpeg-1.0.2-1.src.rock
- # e.g. https://luarocks.org/manifests/openresty/lua-cjson-2.1.0-1.src.rock
- (
- r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
- + r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
- lambda m: "https://luarocks.org/modules/"
- + m.group("org")
- + "/"
- + m.group("name")
- + "/",
- ),
- ]
-
- list_urls = set([os.path.dirname(url)])
-
- for pattern, fun in url_types:
- match = re.search(pattern, url)
- if match:
- list_urls.add(fun(match))
-
- return list_urls
-
-
-def strip_query_and_fragment(path):
- try:
- components = urlsplit(path)
- stripped = components[:3] + (None, None)
-
- query, frag = components[3:5]
- suffix = ""
- if query:
- suffix += "?" + query
- if frag:
- suffix += "#" + frag
-
- return (urlunsplit(stripped), suffix)
-
- except ValueError:
- tty.debug("Got error parsing path %s" % path)
- return (path, "") # Ignore URL parse errors here
-
-
-def strip_version_suffixes(path):
- """Some tarballs contain extraneous information after the version:
-
- * ``bowtie2-2.2.5-source``
- * ``libevent-2.0.21-stable``
- * ``cuda_8.0.44_linux.run``
-
- These strings are not part of the version number and should be ignored.
- This function strips those suffixes off and returns the remaining string.
- The goal is that the version is always the last thing in ``path``:
-
- * ``bowtie2-2.2.5``
- * ``libevent-2.0.21``
- * ``cuda_8.0.44``
-
- Args:
- path (str): The filename or URL for the package
-
- Returns:
- str: The ``path`` with any extraneous suffixes removed
- """
- # NOTE: This could be done with complicated regexes in parse_version_offset
- # NOTE: The problem is that we would have to add these regexes to the end
- # NOTE: of every single version regex. Easier to just strip them off
- # NOTE: permanently
-
- suffix_regexes = [
- # Download type
- r"[Ii]nstall",
- r"all",
- r"code",
- r"[Ss]ources?",
- r"file",
- r"full",
- r"single",
- r"with[a-zA-Z_-]+",
- r"rock",
- r"src(_0)?",
- r"public",
- r"bin",
- r"binary",
- r"run",
- r"[Uu]niversal",
- r"jar",
- r"complete",
- r"dynamic",
- r"oss",
- r"gem",
- r"tar",
- r"sh",
- # Download version
- r"release",
- r"bin",
- r"stable",
- r"[Ff]inal",
- r"rel",
- r"orig",
- r"dist",
- r"\+",
- # License
- r"gpl",
- # Arch
- # Needs to come before and after OS, appears in both orders
- r"ia32",
- r"intel",
- r"amd64",
- r"linux64",
- r"x64",
- r"64bit",
- r"x86[_-]64",
- r"i586_64",
- r"x86",
- r"i[36]86",
- r"ppc64(le)?",
- r"armv?(7l|6l|64)",
- # Other
- r"cpp",
- r"gtk",
- r"incubating",
- # OS
- r"[Ll]inux(_64)?",
- r"LINUX",
- r"[Uu]ni?x",
- r"[Ss]un[Oo][Ss]",
- r"[Mm]ac[Oo][Ss][Xx]?",
- r"[Oo][Ss][Xx]",
- r"[Dd]arwin(64)?",
- r"[Aa]pple",
- r"[Ww]indows",
- r"[Ww]in(64|32)?",
- r"[Cc]ygwin(64|32)?",
- r"[Mm]ingw",
- r"centos",
- # Arch
- # Needs to come before and after OS, appears in both orders
- r"ia32",
- r"intel",
- r"amd64",
- r"linux64",
- r"x64",
- r"64bit",
- r"x86[_-]64",
- r"i586_64",
- r"x86",
- r"i[36]86",
- r"ppc64(le)?",
- r"armv?(7l|6l|64)?",
- # PyPI
- r"[._-]py[23].*\.whl",
- r"[._-]cp[23].*\.whl",
- r"[._-]win.*\.exe",
- ]
-
- for regex in suffix_regexes:
- # Remove the suffix from the end of the path
- # This may be done multiple times
- path = re.sub(r"[._-]?" + regex + "$", "", path)
-
- return path
def strip_name_suffixes(path, version):
@@ -341,69 +117,6 @@ def strip_name_suffixes(path, version):
return path
-def split_url_extension(path):
- """Some URLs have a query string, e.g.:
-
- 1. https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7.tgz?raw=true
- 2. http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin.tar.gz
- 3. https://gitlab.kitware.com/vtk/vtk/repository/archive.tar.bz2?ref=v7.0.0
-
- In (1), the query string needs to be stripped to get at the
- extension, but in (2) & (3), the filename is IN a single final query
- argument.
-
- This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
- The suffix contains anything that was stripped off the URL to
- get at the file extension. In (1), it will be ``'?raw=true'``, but
- in (2), it will be empty. In (3) the suffix is a parameter that follows
- after the file extension, e.g.:
-
- 1. ``('https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7', '.tgz', '?raw=true')``
- 2. ``('http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin', '.tar.gz', None)``
- 3. ``('https://gitlab.kitware.com/vtk/vtk/repository/archive', '.tar.bz2', '?ref=v7.0.0')``
- """
- prefix, ext, suffix = path, "", ""
-
- # Strip off sourceforge download suffix.
- # e.g. https://sourceforge.net/projects/glew/files/glew/2.0.0/glew-2.0.0.tgz/download
- prefix, suffix = spath.find_sourceforge_suffix(path)
-
- ext = comp.extension_from_path(prefix)
- if ext is not None:
- prefix = comp.strip_extension(prefix)
-
- else:
- prefix, suf = strip_query_and_fragment(prefix)
- ext = comp.extension_from_path(prefix)
- prefix = comp.strip_extension(prefix)
- suffix = suf + suffix
- if ext is None:
- ext = ""
-
- return prefix, ext, suffix
-
-
-def determine_url_file_extension(path):
- """This returns the type of archive a URL refers to. This is
- sometimes confusing because of URLs like:
-
- (1) https://github.com/petdance/ack/tarball/1.93_02
-
- Where the URL doesn't actually contain the filename. We need
- to know what type it is so that we can appropriately name files
- in mirrors.
- """
- match = re.search(r"github.com/.+/(zip|tar)ball/", path)
- if match:
- if match.group(1) == "zip":
- return "zip"
- elif match.group(1) == "tar":
- return "tar.gz"
-
- prefix, ext, suffix = split_url_extension(path)
- return ext
-
-
def parse_version_offset(path):
"""Try to extract a version string from a filename or URL.
@@ -426,13 +139,13 @@ def parse_version_offset(path):
# path: The prefix of the URL, everything before the ext and suffix
# ext: The file extension
# suffix: Any kind of query string that begins with a '?'
- path, ext, suffix = split_url_extension(path)
+ path, ext, suffix = llnl.url.split_url_extension(path)
# stem: Everything from path after the final '/'
original_stem = os.path.basename(path)
# Try to strip off anything after the version number
- stem = strip_version_suffixes(original_stem)
+ stem = llnl.url.strip_version_suffixes(original_stem)
# Assumptions:
#
@@ -620,7 +333,7 @@ def parse_name_offset(path, v=None):
# path: The prefix of the URL, everything before the ext and suffix
# ext: The file extension
# suffix: Any kind of query string that begins with a '?'
- path, ext, suffix = split_url_extension(path)
+ path, ext, suffix = llnl.url.split_url_extension(path)
# stem: Everything from path after the final '/'
original_stem = os.path.basename(path)
@@ -735,28 +448,6 @@ def parse_name_and_version(path):
return (name, ver)
-def insensitize(string):
- """Change upper and lowercase letters to be case insensitive in
- the provided string. e.g., 'a' becomes '[Aa]', 'B' becomes
- '[bB]', etc. Use for building regexes."""
-
- def to_ins(match):
- char = match.group(1)
- return "[%s%s]" % (char.lower(), char.upper())
-
- return re.sub(r"([a-zA-Z])", to_ins, string)
-
-
-def cumsum(elts, init=0, fn=lambda x: x):
- """Return cumulative sum of result of fn on each element in elts."""
- sums = []
- s = init
- for i, e in enumerate(elts):
- sums.append(s)
- s += fn(e)
- return sums
-
-
def find_all(substring, string):
"""Returns a list containing the indices of
every occurrence of substring in string."""
@@ -912,6 +603,122 @@ def color_url(path, **kwargs):
return colorize(out.getvalue())
+def find_versions_of_archive(
+ archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
+):
+ """Scrape web pages for new versions of a tarball. This function prefers URLs in the
+ following order: links found on the scraped page that match a url generated by the
+ reference package, found and in the archive_urls list, found and derived from those
+ in the archive_urls list, and if none are found for a version then the item in the
+ archive_urls list is included for the version.
+
+ Args:
+ archive_urls (str or list or tuple): URL or sequence of URLs for
+ different versions of a package. Typically these are just the
+ tarballs from the package file itself. By default, this searches
+ the parent directories of archives.
+ list_url (str or None): URL for a listing of archives.
+ Spack will scrape these pages for download links that look
+ like the archive URL.
+ list_depth (int): max depth to follow links on list_url pages.
+ Defaults to 0.
+ concurrency (int): maximum number of concurrent requests
+ reference_package (spack.package_base.PackageBase or None): a spack package
+ used as a reference for url detection. Uses the url_for_version
+ method on the package to produce reference urls which, if found,
+ are preferred.
+ """
+ if not isinstance(archive_urls, (list, tuple)):
+ archive_urls = [archive_urls]
+
+ # Generate a list of list_urls based on archive urls and any
+ # explicitly listed list_url in the package
+ list_urls = set()
+ if list_url is not None:
+ list_urls.add(list_url)
+ for aurl in archive_urls:
+ list_urls |= llnl.url.find_list_urls(aurl)
+
+ # Add '/' to the end of the URL. Some web servers require this.
+ additional_list_urls = set()
+ for lurl in list_urls:
+ if not lurl.endswith("/"):
+ additional_list_urls.add(lurl + "/")
+ list_urls |= additional_list_urls
+
+ # Grab some web pages to scrape.
+ pages, links = spack.util.web.spider(list_urls, depth=list_depth, concurrency=concurrency)
+
+ # Scrape them for archive URLs
+ regexes = []
+ for aurl in archive_urls:
+ # This creates a regex from the URL with a capture group for
+ # the version part of the URL. The capture group is converted
+ # to a generic wildcard, so we can use this to extract things
+ # on a page that look like archive URLs.
+ url_regex = wildcard_version(aurl)
+
+ # We'll be a bit more liberal and just look for the archive
+ # part, not the full path.
+ # this is a URL so it is a posixpath even on Windows
+ url_regex = pathlib.PurePosixPath(url_regex).name
+
+ # We need to add a / to the beginning of the regex to prevent
+ # Spack from picking up similarly named packages like:
+ # https://cran.r-project.org/src/contrib/pls_2.6-0.tar.gz
+ # https://cran.r-project.org/src/contrib/enpls_5.7.tar.gz
+ # https://cran.r-project.org/src/contrib/autopls_1.3.tar.gz
+ # https://cran.r-project.org/src/contrib/matrixpls_1.0.4.tar.gz
+ url_regex = "/" + url_regex
+
+ # We need to add a $ anchor to the end of the regex to prevent
+ # Spack from picking up signature files like:
+ # .asc
+ # .md5
+ # .sha256
+ # .sig
+ # However, SourceForge downloads still need to end in '/download'.
+ url_regex += r"(\/download)?"
+ # PyPI adds #sha256=... to the end of the URL
+ url_regex += "(#sha256=.*)?"
+ url_regex += "$"
+
+ regexes.append(url_regex)
+
+ regexes = [re.compile(r) for r in regexes]
+ # Build a dict version -> URL from any links that match the wildcards.
+ # Walk through archive_url links first.
+ # Any conflicting versions will be overwritten by the list_url links.
+ versions = {}
+ matched = set()
+ for url in sorted(links):
+ url = convert_to_posix_path(url)
+ if any(r.search(url) for r in regexes):
+ try:
+ ver = parse_version(url)
+ if ver in matched:
+ continue
+ versions[ver] = url
+ # prevent this version from getting overwritten
+ if reference_package is not None:
+ if url == reference_package.url_for_version(ver):
+ matched.add(ver)
+ else:
+ extrapolated_urls = [substitute_version(u, ver) for u in archive_urls]
+ if url in extrapolated_urls:
+ matched.add(ver)
+ except UndetectableVersionError:
+ continue
+
+ for url in archive_urls:
+ url = convert_to_posix_path(url)
+ ver = parse_version(url)
+ if ver not in versions:
+ versions[ver] = url
+
+ return versions
+
+
class UrlParseError(spack.error.SpackError):
"""Raised when the URL module can't parse something correctly."""
diff --git a/lib/spack/spack/util/compression.py b/lib/spack/spack/util/compression.py
index b8dcd032f4..25ccfdf0bb 100644
--- a/lib/spack/spack/util/compression.py
+++ b/lib/spack/spack/util/compression.py
@@ -9,27 +9,13 @@ import os
import re
import shutil
import sys
-from itertools import product
+import llnl.url
from llnl.util import tty
-import spack.util.path as spath
from spack.error import SpackError
from spack.util.executable import CommandNotFoundError, which
-# Supported archive extensions.
-PRE_EXTS = ["tar", "TAR"]
-EXTS = ["gz", "bz2", "xz", "Z"]
-NOTAR_EXTS = ["zip", "tgz", "tbz2", "tbz", "txz"]
-CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
-
-# Add PRE_EXTS and EXTS last so that .tar.gz is matched *before* .tar or .gz
-ALLOWED_ARCHIVE_TYPES = (
- [".".join(ext) for ext in product(PRE_EXTS, EXTS)] + PRE_EXTS + EXTS + NOTAR_EXTS
-)
-
-ALLOWED_SINGLE_EXT_ARCHIVE_TYPES = PRE_EXTS + EXTS + NOTAR_EXTS
-
try:
import bz2 # noqa
@@ -66,10 +52,6 @@ def is_bz2_supported():
return _bz2_support
-def allowed_archive(path):
- return False if not path else any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
-
-
def _system_untar(archive_file, remove_archive_file=False):
"""Returns path to unarchived tar file.
Untars archive via system tar.
@@ -78,7 +60,7 @@ def _system_untar(archive_file, remove_archive_file=False):
archive_file (str): absolute path to the archive to be extracted.
Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz).
"""
- archive_file_no_ext = strip_extension(archive_file)
+ archive_file_no_ext = llnl.url.strip_extension(archive_file)
outfile = os.path.basename(archive_file_no_ext)
if archive_file_no_ext == archive_file:
# the archive file has no extension. Tar on windows cannot untar onto itself
@@ -114,7 +96,7 @@ def _bunzip2(archive_file):
def _py_bunzip(archive_file):
"""Returns path to decompressed file.
Decompresses bz2 compressed archives/files via python's bz2 module"""
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
working_dir = os.getcwd()
archive_out = os.path.join(working_dir, decompressed_file)
f_bz = bz2.BZ2File(archive_file, mode="rb")
@@ -128,7 +110,7 @@ def _system_bunzip(archive_file):
"""Returns path to decompressed file.
Decompresses bz2 compressed archives/files via system bzip2 utility"""
compressed_file_name = os.path.basename(archive_file)
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
working_dir = os.getcwd()
archive_out = os.path.join(working_dir, decompressed_file)
copy_path = os.path.join(working_dir, compressed_file_name)
@@ -158,7 +140,7 @@ def _gunzip(archive_file):
def _py_gunzip(archive_file):
"""Returns path to gunzip'd file
Decompresses `.gz` compressed archvies via python gzip module"""
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "gz"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "gz"))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
f_in = gzip.open(archive_file, "rb")
@@ -171,7 +153,7 @@ def _py_gunzip(archive_file):
def _system_gunzip(archive_file):
"""Returns path to gunzip'd file
Decompresses `.gz` compressed files via system gzip"""
- archive_file_no_ext = strip_compression_extension(archive_file)
+ archive_file_no_ext = llnl.url.strip_compression_extension(archive_file)
if archive_file_no_ext == archive_file:
# the zip file has no extension. On Unix gunzip cannot unzip onto itself
archive_file = archive_file + ".gz"
@@ -196,7 +178,7 @@ def _unzip(archive_file):
Args:
archive_file (str): absolute path of the file to be decompressed
"""
- extracted_file = os.path.basename(strip_extension(archive_file, "zip"))
+ extracted_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="zip"))
if sys.platform == "win32":
return _system_untar(archive_file)
else:
@@ -259,7 +241,7 @@ def _win_compressed_tarball_handler(decompressor):
def _py_lzma(archive_file):
"""Returns path to decompressed .xz files
Decompress lzma compressed .xz files via python lzma module"""
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "xz"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "xz"))
archive_out = os.path.join(os.getcwd(), decompressed_file)
with open(archive_out, "wb") as ar:
with lzma.open(archive_file) as lar:
@@ -272,7 +254,7 @@ def _xz(archive_file):
Decompress lzma compressed .xz files via xz command line
tool.
"""
- decompressed_file = os.path.basename(strip_extension(archive_file, "xz"))
+ decompressed_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="xz"))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
compressed_file = os.path.basename(archive_file)
@@ -297,13 +279,13 @@ def _system_7zip(archive_file):
Args:
archive_file (str): absolute path of file to be unarchived
"""
- outfile = os.path.basename(strip_compression_extension(archive_file))
+ outfile = os.path.basename(llnl.url.strip_compression_extension(archive_file))
_7z = which("7z")
if not _7z:
raise CommandNotFoundError(
"7z unavailable,\
unable to extract %s files. 7z can be installed via Spack"
- % extension_from_path(archive_file)
+ % llnl.url.extension_from_path(archive_file)
)
_7z.add_default_arg("e")
_7z(archive_file)
@@ -318,7 +300,7 @@ def decompressor_for(path, extension=None):
if not extension:
extension = extension_from_file(path, decompress=True)
- if not allowed_archive(extension):
+ if not llnl.url.allowed_archive(extension):
raise CommandNotFoundError(
"Cannot extract archive, \
unrecognized file extension: '%s'"
@@ -394,7 +376,7 @@ def decompressor_for_win(extension):
path (str): path of the archive file requiring decompression
extension (str): extension
"""
- extension = expand_contracted_extension(extension)
+ extension = llnl.url.expand_contracted_extension(extension)
# Windows native tar can handle .zip extensions, use standard
# unzip method
if re.match(r"zip$", extension):
@@ -415,7 +397,7 @@ def decompressor_for_win(extension):
# python based decompression strategy
# Expand extension from contracted extension i.e. tar.gz from .tgz
# no-op on non contracted extensions
- compression_extension = compression_ext_from_compressed_archive(extension)
+ compression_extension = llnl.url.compression_ext_from_compressed_archive(extension)
decompressor = _determine_py_decomp_archive_strategy(compression_extension)
if not decompressor:
raise SpackError(
@@ -657,7 +639,7 @@ def extension_from_stream(stream, decompress=False):
"Cannot derive file extension from magic number;"
" falling back to regex path parsing."
)
- return extension_from_path(stream.name)
+ return llnl.url.extension_from_path(stream.name)
resultant_ext = suffix_ext if not prefix_ext else ".".join([prefix_ext, suffix_ext])
tty.debug("File extension %s successfully derived by magic number." % resultant_ext)
return resultant_ext
@@ -693,114 +675,11 @@ def extension_from_file(file, decompress=False):
if ext and ext.startswith("tar."):
suf = ext.split(".")[1]
abbr = "t" + suf
- if check_extension(file, abbr):
+ if llnl.url.has_extension(file, abbr):
return abbr
if not ext:
# If unable to parse extension from stream,
# attempt to fall back to string parsing
- ext = extension_from_path(file)
+ ext = llnl.url.extension_from_path(file)
return ext
return None
-
-
-def extension_from_path(path):
- """Returns the allowed archive extension for a path.
- If path does not include a valid archive extension
- (see`spack.util.compression.ALLOWED_ARCHIVE_TYPES`) return None
- """
- if path is None:
- raise ValueError("Can't call extension() on None")
-
- for t in ALLOWED_ARCHIVE_TYPES:
- if check_extension(path, t):
- return t
- return None
-
-
-def strip_compression_extension(path, ext=None):
- """Returns path with last supported (can be combined with tar) or
- provided archive extension stripped"""
- path_ext = extension_from_path(path)
- if path_ext:
- path = expand_contracted_extension_in_path(path)
- exts_to_check = EXTS
- if ext:
- exts_to_check = [ext]
- for ext_check in exts_to_check:
- mod_path = check_and_remove_ext(path, ext_check)
- if mod_path != path:
- return mod_path
- return path
-
-
-def strip_extension(path, ext=None):
- """Returns the part of a path that does not include extension.
- If ext is given, only attempts to remove that extension. If no
- extension given, attempts to strip any valid extension from path"""
- if ext:
- return check_and_remove_ext(path, ext)
- for t in ALLOWED_ARCHIVE_TYPES:
- mod_path = check_and_remove_ext(path, t)
- if mod_path != path:
- return mod_path
- return path
-
-
-def check_extension(path, ext):
- """Returns true if extension is present in path
- false otherwise"""
- # Strip sourceforge suffix.
- prefix, _ = spath.find_sourceforge_suffix(path)
- if not ext.startswith(r"\."):
- ext = r"\.%s$" % ext
- if re.search(ext, prefix):
- return True
- return False
-
-
-def reg_remove_ext(path, ext):
- """Returns path with ext remove via regex"""
- if path and ext:
- suffix = r"\.%s$" % ext
- return re.sub(suffix, "", path)
- return path
-
-
-def check_and_remove_ext(path, ext):
- """Returns path with extension removed if extension
- is present in path. Otherwise just returns path"""
- if check_extension(path, ext):
- return reg_remove_ext(path, ext)
- return path
-
-
-def _substitute_extension(path, old_ext, new_ext):
- """Returns path with old_ext replaced with new_ext.
- old_ext and new_ext can be extension strings or regexs"""
- return re.sub(rf"{old_ext}", rf"{new_ext}", path)
-
-
-def expand_contracted_extension_in_path(path, ext=None):
- """Returns path with any contraction extension (i.e. tgz) expanded
- (i.e. tar.gz). If ext is specified, only attempt to expand that extension"""
- if not ext:
- ext = extension_from_path(path)
- expanded_ext = expand_contracted_extension(ext)
- if expanded_ext != ext:
- return _substitute_extension(path, ext, expanded_ext)
- return path
-
-
-def expand_contracted_extension(extension):
- """Return expanded version of contracted extension
- i.e. .tgz -> .tar.gz, no op on non contracted extensions"""
- extension = extension.strip(".")
- return CONTRACTION_MAP.get(extension, extension)
-
-
-def compression_ext_from_compressed_archive(extension):
- """Returns compression extension for a compressed archive"""
- extension = expand_contracted_extension(extension)
- for ext in [*EXTS]:
- if ext in extension:
- return ext
diff --git a/lib/spack/spack/util/gcs.py b/lib/spack/spack/util/gcs.py
index 856fe73001..4e997df52b 100644
--- a/lib/spack/spack/util/gcs.py
+++ b/lib/spack/spack/util/gcs.py
@@ -10,6 +10,10 @@ integrate GCS Blob storage with spack buildcache.
import os
import sys
+import urllib.parse
+import urllib.response
+from urllib.error import URLError
+from urllib.request import BaseHandler
import llnl.util.tty as tty
@@ -222,3 +226,21 @@ class GCSBlob:
}
return headers
+
+
+def gcs_open(req, *args, **kwargs):
+ """Open a reader stream to a blob object on GCS"""
+ url = urllib.parse.urlparse(req.get_full_url())
+ gcsblob = GCSBlob(url)
+
+ if not gcsblob.exists():
+ raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
+ stream = gcsblob.get_blob_byte_stream()
+ headers = gcsblob.get_blob_headers()
+
+ return urllib.response.addinfourl(stream, headers, url)
+
+
+class GCSHandler(BaseHandler):
+ def gs_open(self, req):
+ return gcs_open(req)
diff --git a/lib/spack/spack/util/path.py b/lib/spack/spack/util/path.py
index ef6fb883c7..3dc0ea676c 100644
--- a/lib/spack/spack/util/path.py
+++ b/lib/spack/spack/util/path.py
@@ -109,15 +109,6 @@ def win_exe_ext():
return ".exe"
-def find_sourceforge_suffix(path):
- """find and match sourceforge filepath components
- Return match object"""
- match = re.search(r"(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$", path)
- if match:
- return match.groups()
- return path, ""
-
-
def path_to_os_path(*pths):
"""
Takes an arbitrary number of positional parameters
diff --git a/lib/spack/spack/util/s3.py b/lib/spack/spack/util/s3.py
index c4d53d86b6..796c49a8c8 100644
--- a/lib/spack/spack/util/s3.py
+++ b/lib/spack/spack/util/s3.py
@@ -3,10 +3,13 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
+import urllib.error
import urllib.parse
+import urllib.request
+import urllib.response
+from io import BufferedReader, BytesIO, IOBase
from typing import Any, Dict, Tuple
-import spack
import spack.config
#: Map (mirror name, method) tuples to s3 client instances.
@@ -114,4 +117,72 @@ def get_mirror_s3_connection_info(mirror, method):
if endpoint_url:
s3_client_args["endpoint_url"] = _parse_s3_endpoint_url(endpoint_url)
- return (s3_connection, s3_client_args)
+ return s3_connection, s3_client_args
+
+
+# NOTE(opadron): Workaround issue in boto where its StreamingBody
+# implementation is missing several APIs expected from IOBase. These missing
+# APIs prevent the streams returned by boto from being passed as-are along to
+# urllib.
+#
+# https://github.com/boto/botocore/issues/879
+# https://github.com/python/cpython/pull/3249
+class WrapStream(BufferedReader):
+ def __init__(self, raw):
+ # In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
+ # only add missing attributes in older versions.
+ # https://github.com/boto/botocore/commit/a624815eabac50442ed7404f3c4f2664cd0aa784
+ if not isinstance(raw, IOBase):
+ raw.readable = lambda: True
+ raw.writable = lambda: False
+ raw.seekable = lambda: False
+ raw.closed = False
+ raw.flush = lambda: None
+ super().__init__(raw)
+
+ def detach(self):
+ self.raw = None
+
+ def read(self, *args, **kwargs):
+ return self.raw.read(*args, **kwargs)
+
+ def __getattr__(self, key):
+ return getattr(self.raw, key)
+
+
+def _s3_open(url, method="GET"):
+ parsed = urllib.parse.urlparse(url)
+ s3 = get_s3_session(url, method="fetch")
+
+ bucket = parsed.netloc
+ key = parsed.path
+
+ if key.startswith("/"):
+ key = key[1:]
+
+ if method not in ("GET", "HEAD"):
+ raise urllib.error.URLError(
+ "Only GET and HEAD verbs are currently supported for the s3:// scheme"
+ )
+
+ try:
+ if method == "GET":
+ obj = s3.get_object(Bucket=bucket, Key=key)
+ # NOTE(opadron): Apply workaround here (see above)
+ stream = WrapStream(obj["Body"])
+ elif method == "HEAD":
+ obj = s3.head_object(Bucket=bucket, Key=key)
+ stream = BytesIO()
+ except s3.ClientError as e:
+ raise urllib.error.URLError(e) from e
+
+ headers = obj["ResponseMetadata"]["HTTPHeaders"]
+
+ return url, headers, stream
+
+
+class UrllibS3Handler(urllib.request.BaseHandler):
+ def s3_open(self, req):
+ orig_url = req.get_full_url()
+ url, headers, stream = _s3_open(orig_url, method=req.get_method())
+ return urllib.response.addinfourl(stream, headers, url)
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
index 22309ba87f..79ad39ebd7 100644
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@@ -21,23 +21,17 @@ from typing import IO, Optional
from urllib.error import HTTPError, URLError
from urllib.request import HTTPSHandler, Request, build_opener
-import llnl.util.lang
-import llnl.util.tty as tty
+import llnl.url
+from llnl.util import lang, tty
from llnl.util.filesystem import mkdirp, rename, working_dir
-import spack
import spack.config
import spack.error
-import spack.gcs_handler
-import spack.s3_handler
-import spack.url
-import spack.util.crypto
-import spack.util.gcs as gcs_util
-import spack.util.s3 as s3_util
import spack.util.url as url_util
-from spack.util.compression import ALLOWED_ARCHIVE_TYPES
-from spack.util.executable import CommandNotFoundError, which
-from spack.util.path import convert_to_posix_path
+
+from .executable import CommandNotFoundError, which
+from .gcs import GCSBlob, GCSBucket, GCSHandler
+from .s3 import UrllibS3Handler, get_s3_session
class DetailedHTTPError(HTTPError):
@@ -66,8 +60,8 @@ class SpackHTTPDefaultErrorHandler(urllib.request.HTTPDefaultErrorHandler):
def _urlopen():
- s3 = spack.s3_handler.UrllibS3Handler()
- gcs = spack.gcs_handler.GCSHandler()
+ s3 = UrllibS3Handler()
+ gcs = GCSHandler()
error_handler = SpackHTTPDefaultErrorHandler()
# One opener with HTTPS ssl enabled
@@ -90,7 +84,7 @@ def _urlopen():
#: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
-urlopen = llnl.util.lang.Singleton(_urlopen)
+urlopen = lang.Singleton(_urlopen)
#: User-Agent used in Request objects
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
@@ -190,14 +184,14 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non
while remote_path.startswith("/"):
remote_path = remote_path[1:]
- s3 = s3_util.get_s3_session(remote_url, method="push")
+ s3 = get_s3_session(remote_url, method="push")
s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args)
if not keep_original:
os.remove(local_file_path)
elif remote_url.scheme == "gs":
- gcs = gcs_util.GCSBlob(remote_url)
+ gcs = GCSBlob(remote_url)
gcs.upload_to_blob(local_file_path)
if not keep_original:
os.remove(local_file_path)
@@ -427,7 +421,7 @@ def remove_url(url, recursive=False):
if url.scheme == "s3":
# Try to find a mirror for potential connection information
- s3 = s3_util.get_s3_session(url, method="push")
+ s3 = get_s3_session(url, method="push")
bucket = url.netloc
if recursive:
# Because list_objects_v2 can only return up to 1000 items
@@ -460,10 +454,10 @@ def remove_url(url, recursive=False):
elif url.scheme == "gs":
if recursive:
- bucket = gcs_util.GCSBucket(url)
+ bucket = GCSBucket(url)
bucket.destroy(recursive=recursive)
else:
- blob = gcs_util.GCSBlob(url)
+ blob = GCSBlob(url)
blob.delete_blob()
return
@@ -538,14 +532,14 @@ def list_url(url, recursive=False):
]
if url.scheme == "s3":
- s3 = s3_util.get_s3_session(url, method="fetch")
+ s3 = get_s3_session(url, method="fetch")
if recursive:
return list(_iter_s3_prefix(s3, url))
return list(set(key.split("/", 1)[0] for key in _iter_s3_prefix(s3, url)))
elif url.scheme == "gs":
- gcs = gcs_util.GCSBucket(url)
+ gcs = GCSBucket(url)
return gcs.get_all_blobs(recursive=recursive)
@@ -636,7 +630,7 @@ def spider(root_urls, depth=0, concurrency=32):
links.add(abs_link)
# Skip stuff that looks like an archive
- if any(raw_link.endswith(s) for s in ALLOWED_ARCHIVE_TYPES):
+ if any(raw_link.endswith(s) for s in llnl.url.ALLOWED_ARCHIVE_TYPES):
continue
# Skip already-visited links
@@ -696,7 +690,7 @@ def spider(root_urls, depth=0, concurrency=32):
current_depth, depth, len(spider_args)
)
)
- results = tp.map(llnl.util.lang.star(_spider), spider_args)
+ results = tp.map(lang.star(_spider), spider_args)
spider_args = []
collect = current_depth < depth
for sub_pages, sub_links, sub_spider_args in results:
@@ -713,123 +707,6 @@ def spider(root_urls, depth=0, concurrency=32):
return pages, links
-def find_versions_of_archive(
- archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
-):
- """Scrape web pages for new versions of a tarball. This function prefers URLs in the
- following order: links found on the scraped page that match a url generated by the
- reference package, found and in the archive_urls list, found and derived from those
- in the archive_urls list, and if none are found for a version then the item in the
- archive_urls list is included for the version.
-
- Args:
- archive_urls (str or list or tuple): URL or sequence of URLs for
- different versions of a package. Typically these are just the
- tarballs from the package file itself. By default, this searches
- the parent directories of archives.
- list_url (str or None): URL for a listing of archives.
- Spack will scrape these pages for download links that look
- like the archive URL.
- list_depth (int): max depth to follow links on list_url pages.
- Defaults to 0.
- concurrency (int): maximum number of concurrent requests
- reference_package (spack.package_base.PackageBase or None): a spack package
- used as a reference for url detection. Uses the url_for_version
- method on the package to produce reference urls which, if found,
- are preferred.
- """
- if not isinstance(archive_urls, (list, tuple)):
- archive_urls = [archive_urls]
-
- # Generate a list of list_urls based on archive urls and any
- # explicitly listed list_url in the package
- list_urls = set()
- if list_url is not None:
- list_urls.add(list_url)
- for aurl in archive_urls:
- list_urls |= spack.url.find_list_urls(aurl)
-
- # Add '/' to the end of the URL. Some web servers require this.
- additional_list_urls = set()
- for lurl in list_urls:
- if not lurl.endswith("/"):
- additional_list_urls.add(lurl + "/")
- list_urls |= additional_list_urls
-
- # Grab some web pages to scrape.
- pages, links = spider(list_urls, depth=list_depth, concurrency=concurrency)
-
- # Scrape them for archive URLs
- regexes = []
- for aurl in archive_urls:
- # This creates a regex from the URL with a capture group for
- # the version part of the URL. The capture group is converted
- # to a generic wildcard, so we can use this to extract things
- # on a page that look like archive URLs.
- url_regex = spack.url.wildcard_version(aurl)
-
- # We'll be a bit more liberal and just look for the archive
- # part, not the full path.
- # this is a URL so it is a posixpath even on Windows
- url_regex = PurePosixPath(url_regex).name
-
- # We need to add a / to the beginning of the regex to prevent
- # Spack from picking up similarly named packages like:
- # https://cran.r-project.org/src/contrib/pls_2.6-0.tar.gz
- # https://cran.r-project.org/src/contrib/enpls_5.7.tar.gz
- # https://cran.r-project.org/src/contrib/autopls_1.3.tar.gz
- # https://cran.r-project.org/src/contrib/matrixpls_1.0.4.tar.gz
- url_regex = "/" + url_regex
-
- # We need to add a $ anchor to the end of the regex to prevent
- # Spack from picking up signature files like:
- # .asc
- # .md5
- # .sha256
- # .sig
- # However, SourceForge downloads still need to end in '/download'.
- url_regex += r"(\/download)?"
- # PyPI adds #sha256=... to the end of the URL
- url_regex += "(#sha256=.*)?"
- url_regex += "$"
-
- regexes.append(url_regex)
-
- # Build a dict version -> URL from any links that match the wildcards.
- # Walk through archive_url links first.
- # Any conflicting versions will be overwritten by the list_url links.
- versions = {}
- matched = set()
- for url in sorted(links):
- url = convert_to_posix_path(url)
- if any(re.search(r, url) for r in regexes):
- try:
- ver = spack.url.parse_version(url)
- if ver in matched:
- continue
- versions[ver] = url
- # prevent this version from getting overwritten
- if reference_package is not None:
- if url == reference_package.url_for_version(ver):
- matched.add(ver)
- else:
- extrapolated_urls = [
- spack.url.substitute_version(u, ver) for u in archive_urls
- ]
- if url in extrapolated_urls:
- matched.add(ver)
- except spack.url.UndetectableVersionError:
- continue
-
- for url in archive_urls:
- url = convert_to_posix_path(url)
- ver = spack.url.parse_version(url)
- if ver not in versions:
- versions[ver] = url
-
- return versions
-
-
def get_header(headers, header_name):
"""Looks up a dict of headers for the given header value.
diff --git a/var/spack/repos/builtin/packages/protobuf/package.py b/var/spack/repos/builtin/packages/protobuf/package.py
index a1a9a8e2d1..9a4ed84058 100644
--- a/var/spack/repos/builtin/packages/protobuf/package.py
+++ b/var/spack/repos/builtin/packages/protobuf/package.py
@@ -3,7 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-import spack.util.web
+import spack.url
from spack.package import *
@@ -120,9 +120,7 @@ class Protobuf(CMakePackage):
return dict(
map(
lambda u: (u, self.url_for_version(u)),
- spack.util.web.find_versions_of_archive(
- self.all_urls, self.list_url, self.list_depth
- ),
+ spack.url.find_versions_of_archive(self.all_urls, self.list_url, self.list_depth),
)
)