diff options
21 files changed, 903 insertions, 888 deletions
diff --git a/lib/spack/llnl/ b/lib/spack/llnl/
new file mode 100644
index 0000000000..40e7606506
--- /dev/null
+++ b/lib/spack/llnl/
@@ -0,0 +1,459 @@
+# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+"""URL primitives that just require Python standard library."""
+import itertools
+import os.path
+import re
+from typing import Optional, Set, Tuple
+from urllib.parse import urlsplit, urlunsplit
+# Archive extensions allowed in Spack
+EXTENSIONS = ("gz", "bz2", "xz", "Z")
+NO_TAR_EXTENSIONS = ("zip", "tgz", "tbz2", "tbz", "txz")
+# Add PREFIX_EXTENSIONS and EXTENSIONS last so that .tar.gz is matched *before* .tar or .gz
+ tuple(".".join(ext) for ext in itertools.product(PREFIX_EXTENSIONS, EXTENSIONS))
+CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
+def find_list_urls(url: str) -> Set[str]:
+ r"""Find good list URLs for the supplied URL.
+ By default, returns the dirname of the archive path.
+ Provides special treatment for the following websites, which have a
+ unique list URL different from the dirname of the download URL:
+ ========= =======================================================
+ GitHub<repo>/<name>/releases
+ GitLab https://gitlab.\*/<repo>/<name>/tags
+ BitBucket<repo>/<name>/downloads/?tab=tags
+ CRAN https://\*<name>
+ PyPI<name>/
+ LuaRocks<repo>/<name>
+ ========= =======================================================
+ Note: this function is called by `spack versions`, `spack checksum`,
+ and `spack create`, but not by `spack fetch` or `spack install`.
+ Parameters:
+ url (str): The download URL for the package
+ Returns:
+ set: One or more list URLs for the package
+ """
+ url_types = [
+ # GitHub
+ # e.g.
+ (r"(.*github\.com/[^/]+/[^/]+)", lambda m: + "/releases"),
+ # GitLab API endpoint
+ # e.g.
+ (
+ r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
+ lambda m: + "/" + + "/" + + "/tags",
+ ),
+ # GitLab non-API endpoint
+ # e.g.
+ (r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: + "/tags"),
+ # BitBucket
+ # e.g.
+ (r"(.*[^/]+/[^/]+)", lambda m: + "/downloads/?tab=tags"),
+ # CRAN
+ # e.g.
+ # e.g.
+ (
+ r"(.*\.r-project\.org/src/contrib)/([^_]+)",
+ lambda m: + "/Archive/" +,
+ ),
+ # PyPI
+ # e.g.
+ # e.g.
+ # e.g.
+ # e.g.
+ # e.g.
+ # e.g.
+ (
+ r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
+ lambda m: "" + + "/",
+ ),
+ # LuaRocks
+ # e.g.
+ # e.g.
+ (
+ r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
+ + r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
+ lambda m: ""
+ +"org")
+ + "/"
+ +"name")
+ + "/",
+ ),
+ ]
+ list_urls = {os.path.dirname(url)}
+ for pattern, fun in url_types:
+ match =, url)
+ if match:
+ list_urls.add(fun(match))
+ return list_urls
+def strip_query_and_fragment(url: str) -> Tuple[str, str]:
+ """Strips query and fragment from a url, then returns the base url and the suffix.
+ Args:
+ url: URL to be stripped
+ Raises:
+ ValueError: when there is any error parsing the URL
+ """
+ components = urlsplit(url)
+ stripped = components[:3] + (None, None)
+ query, frag = components[3:5]
+ suffix = ""
+ if query:
+ suffix += "?" + query
+ if frag:
+ suffix += "#" + frag
+ return urlunsplit(stripped), suffix
+SOURCEFORGE_RE = re.compile(r"(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$")
+def split_url_on_sourceforge_suffix(url: str) -> Tuple[str, ...]:
+ """If the input is a sourceforge URL, returns base URL and "/download" suffix. Otherwise,
+ returns the input URL and an empty string.
+ """
+ match =
+ if match is not None:
+ return match.groups()
+ return url, ""
+def has_extension(path_or_url: str, ext: str) -> bool:
+ """Returns true if the extension in input is present in path, false otherwise."""
+ prefix, _ = split_url_on_sourceforge_suffix(path_or_url)
+ if not ext.startswith(r"\."):
+ ext = rf"\.{ext}$"
+ if, prefix):
+ return True
+ return False
+def extension_from_path(path_or_url: Optional[str]) -> Optional[str]:
+ """Tries to match an allowed archive extension to the input. Returns the first match,
+ or None if no match was found.
+ Raises:
+ ValueError: if the input is None
+ """
+ if path_or_url is None:
+ raise ValueError("Can't call extension() on None")
+ if has_extension(path_or_url, t):
+ return t
+ return None
+def remove_extension(path_or_url: str, *, extension: str) -> str:
+ """Returns the input with the extension removed"""
+ suffix = rf"\.{extension}$"
+ return re.sub(suffix, "", path_or_url)
+def check_and_remove_ext(path: str, *, extension: str) -> str:
+ """Returns the input path with the extension removed, if the extension is present in path.
+ Otherwise, returns the input unchanged.
+ """
+ if not has_extension(path, extension):
+ return path
+ path, _ = split_url_on_sourceforge_suffix(path)
+ return remove_extension(path, extension=extension)
+def strip_extension(path_or_url: str, *, extension: Optional[str] = None) -> str:
+ """If a path contains the extension in input, returns the path stripped of the extension.
+ Otherwise, returns the input path.
+ If extension is None, attempts to strip any allowed extension from path.
+ """
+ if extension is None:
+ if has_extension(path_or_url, ext=t):
+ extension = t
+ break
+ else:
+ return path_or_url
+ return check_and_remove_ext(path_or_url, extension=extension)
+def split_url_extension(url: str) -> Tuple[str, ...]:
+ """Some URLs have a query string, e.g.:
+ 1.
+ 2.
+ 3.
+ In (1), the query string needs to be stripped to get at the
+ extension, but in (2) & (3), the filename is IN a single final query
+ argument.
+ This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
+ The suffix contains anything that was stripped off the URL to
+ get at the file extension. In (1), it will be ``'?raw=true'``, but
+ in (2), it will be empty. In (3) the suffix is a parameter that follows
+ after the file extension, e.g.:
+ 1. ``('', '.tgz', '?raw=true')``
+ 2. ``('', '.tar.gz', None)``
+ 3. ``('', '.tar.bz2', '?ref=v7.0.0')``
+ """
+ # Strip off sourceforge download suffix.
+ # e.g.
+ prefix, suffix = split_url_on_sourceforge_suffix(url)
+ ext = extension_from_path(prefix)
+ if ext is not None:
+ prefix = strip_extension(prefix)
+ return prefix, ext, suffix
+ try:
+ prefix, suf = strip_query_and_fragment(prefix)
+ except ValueError:
+ # FIXME: tty.debug("Got error parsing path %s" % path)
+ # Ignore URL parse errors here
+ return url, ""
+ ext = extension_from_path(prefix)
+ prefix = strip_extension(prefix)
+ suffix = suf + suffix
+ if ext is None:
+ ext = ""
+ return prefix, ext, suffix
+def strip_version_suffixes(path_or_url: str) -> str:
+ """Some tarballs contain extraneous information after the version:
+ * ``bowtie2-2.2.5-source``
+ * ``libevent-2.0.21-stable``
+ * ````
+ These strings are not part of the version number and should be ignored.
+ This function strips those suffixes off and returns the remaining string.
+ The goal is that the version is always the last thing in ``path``:
+ * ``bowtie2-2.2.5``
+ * ``libevent-2.0.21``
+ * ``cuda_8.0.44``
+ Args:
+ path_or_url: The filename or URL for the package
+ Returns:
+ The ``path`` with any extraneous suffixes removed
+ """
+ # NOTE: This could be done with complicated regexes in parse_version_offset
+ # NOTE: The problem is that we would have to add these regexes to the end
+ # NOTE: of every single version regex. Easier to just strip them off
+ # NOTE: permanently
+ suffix_regexes = [
+ # Download type
+ r"[Ii]nstall",
+ r"all",
+ r"code",
+ r"[Ss]ources?",
+ r"file",
+ r"full",
+ r"single",
+ r"with[a-zA-Z_-]+",
+ r"rock",
+ r"src(_0)?",
+ r"public",
+ r"bin",
+ r"binary",
+ r"run",
+ r"[Uu]niversal",
+ r"jar",
+ r"complete",
+ r"dynamic",
+ r"oss",
+ r"gem",
+ r"tar",
+ r"sh",
+ # Download version
+ r"release",
+ r"bin",
+ r"stable",
+ r"[Ff]inal",
+ r"rel",
+ r"orig",
+ r"dist",
+ r"\+",
+ # License
+ r"gpl",
+ # Arch
+ # Needs to come before and after OS, appears in both orders
+ r"ia32",
+ r"intel",
+ r"amd64",
+ r"linux64",
+ r"x64",
+ r"64bit",
+ r"x86[_-]64",
+ r"i586_64",
+ r"x86",
+ r"i[36]86",
+ r"ppc64(le)?",
+ r"armv?(7l|6l|64)",
+ # Other
+ r"cpp",
+ r"gtk",
+ r"incubating",
+ # OS
+ r"[Ll]inux(_64)?",
+ r"LINUX",
+ r"[Uu]ni?x",
+ r"[Ss]un[Oo][Ss]",
+ r"[Mm]ac[Oo][Ss][Xx]?",
+ r"[Oo][Ss][Xx]",
+ r"[Dd]arwin(64)?",
+ r"[Aa]pple",
+ r"[Ww]indows",
+ r"[Ww]in(64|32)?",
+ r"[Cc]ygwin(64|32)?",
+ r"[Mm]ingw",
+ r"centos",
+ # Arch
+ # Needs to come before and after OS, appears in both orders
+ r"ia32",
+ r"intel",
+ r"amd64",
+ r"linux64",
+ r"x64",
+ r"64bit",
+ r"x86[_-]64",
+ r"i586_64",
+ r"x86",
+ r"i[36]86",
+ r"ppc64(le)?",
+ r"armv?(7l|6l|64)?",
+ # PyPI
+ r"[._-]py[23].*\.whl",
+ r"[._-]cp[23].*\.whl",
+ r"[._-]win.*\.exe",
+ ]
+ for regex in suffix_regexes:
+ # Remove the suffix from the end of the path
+ # This may be done multiple times
+ path_or_url = re.sub(r"[._-]?" + regex + "$", "", path_or_url)
+ return path_or_url
+def expand_contracted_extension(extension: str) -> str:
+ """Returns the expanded version of a known contracted extension.
+ This function maps extensions like ".tgz" to ".tar.gz". On unknown extensions,
+ return the input unmodified.
+ """
+ extension = extension.strip(".")
+ return CONTRACTION_MAP.get(extension, extension)
+def expand_contracted_extension_in_path(
+ path_or_url: str, *, extension: Optional[str] = None
+) -> str:
+ """Returns the input path or URL with any contraction extension expanded.
+ Args:
+ path_or_url: path or URL to be expanded
+ extension: if specified, only attempt to expand that extension
+ """
+ extension = extension or extension_from_path(path_or_url)
+ if extension is None:
+ return path_or_url
+ expanded = expand_contracted_extension(extension)
+ if expanded != extension:
+ return re.sub(rf"{extension}", rf"{expanded}", path_or_url)
+ return path_or_url
+def compression_ext_from_compressed_archive(extension: str) -> Optional[str]:
+ """Returns compression extension for a compressed archive"""
+ extension = expand_contracted_extension(extension)
+ for ext in [*EXTENSIONS]:
+ if ext in extension:
+ return ext
+ return None
+def strip_compression_extension(path_or_url: str, ext: Optional[str] = None) -> str:
+ """Strips the compression extension from the input, and returns it. For instance,
+ "foo.tgz" becomes "foo.tar".
+ If no extension is given, try a default list of extensions.
+ Args:
+ path_or_url: input to be stripped
+ ext: if given, extension to be stripped
+ """
+ if not extension_from_path(path_or_url):
+ return path_or_url
+ expanded_path = expand_contracted_extension_in_path(path_or_url)
+ candidates = [ext] if ext is not None else EXTENSIONS
+ for current_extension in candidates:
+ modified_path = check_and_remove_ext(expanded_path, extension=current_extension)
+ if modified_path != expanded_path:
+ return modified_path
+ return expanded_path
+def allowed_archive(path_or_url: str) -> bool:
+ """Returns true if the input is a valid archive, False otherwise."""
+ return (
+ False if not path_or_url else any(path_or_url.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
+ )
+def determine_url_file_extension(path: str) -> str:
+ """This returns the type of archive a URL refers to. This is
+ sometimes confusing because of URLs like:
+ (1)
+ Where the URL doesn't actually contain the filename. We need
+ to know what type it is so that we can appropriately name files
+ in mirrors.
+ """
+ match ="|tar)ball/", path)
+ if match:
+ if == "zip":
+ return "zip"
+ elif == "tar":
+ return "tar.gz"
+ prefix, ext, suffix = split_url_extension(path)
+ return ext
diff --git a/lib/spack/spack/cmd/ b/lib/spack/spack/cmd/
index 9c923c4a17..e3569d998f 100644
--- a/lib/spack/spack/cmd/
+++ b/lib/spack/spack/cmd/
@@ -822,7 +822,7 @@ def get_versions(args, name):
if args.url is not None and args.template != "bundle" and valid_url:
# Find available versions
- url_dict = spack.util.web.find_versions_of_archive(args.url)
+ url_dict = spack.url.find_versions_of_archive(args.url)
except UndetectableVersionError:
# Use fake versions
tty.warn("Couldn't detect version in: {0}".format(args.url))
diff --git a/lib/spack/spack/cmd/ b/lib/spack/spack/cmd/
index 8f7866c406..25f8ad382a 100644
--- a/lib/spack/spack/cmd/
+++ b/lib/spack/spack/cmd/
@@ -12,6 +12,7 @@ from llnl.util import tty
import spack.fetch_strategy as fs
import spack.repo
import spack.spec
+import spack.url
import spack.util.crypto as crypto
from spack.url import (
@@ -26,7 +27,6 @@ from spack.url import (
from spack.util.naming import simplify_name
-from spack.util.web import find_versions_of_archive
description = "debugging tool for url parsing"
section = "developer"
@@ -139,7 +139,7 @@ def url_parse(args):
if args.spider:
tty.msg("Spidering for versions:")
- versions = find_versions_of_archive(url)
+ versions = spack.url.find_versions_of_archive(url)
if not versions:
print(" Found no versions for {0}".format(name))
diff --git a/lib/spack/spack/ b/lib/spack/spack/
index 1f99c4ce9e..87c6e0fc61 100644
--- a/lib/spack/spack/
+++ b/lib/spack/spack/
@@ -31,6 +31,7 @@ import shutil
import urllib.parse
from typing import List, Optional
+import llnl.url
import llnl.util
import llnl.util.filesystem as fs
import llnl.util.tty as tty
@@ -46,7 +47,7 @@ import spack.util.url as url_util
import spack.util.web as web_util
import spack.version
import spack.version.git_ref_lookup
-from spack.util.compression import decompressor_for, extension_from_path
+from spack.util.compression import decompressor_for
from spack.util.executable import CommandNotFoundError, which
from spack.util.string import comma_and, quote
@@ -441,7 +442,7 @@ class URLFetchStrategy(FetchStrategy):
# TODO: replace this by mime check.
if not self.extension:
- self.extension = spack.url.determine_url_file_extension(self.url)
+ self.extension = llnl.url.determine_url_file_extension(self.url)
if self.stage.expanded:
tty.debug("Source already staged to %s" % self.stage.source_path)
@@ -570,7 +571,7 @@ class VCSFetchStrategy(FetchStrategy):
def archive(self, destination, **kwargs):
- assert extension_from_path(destination) == "tar.gz"
+ assert llnl.url.extension_from_path(destination) == "tar.gz"
assert self.stage.source_path.startswith(self.stage.path)
tar = which("tar", required=True)
diff --git a/lib/spack/spack/ b/lib/spack/spack/
deleted file mode 100644
index b002fa70ac..0000000000
--- a/lib/spack/spack/
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
-# Spack Project Developers. See the top-level COPYRIGHT file for details.
-# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-import urllib.parse
-import urllib.response
-from urllib.error import URLError
-from urllib.request import BaseHandler
-def gcs_open(req, *args, **kwargs):
- """Open a reader stream to a blob object on GCS"""
- import spack.util.gcs as gcs_util
- url = urllib.parse.urlparse(req.get_full_url())
- gcsblob = gcs_util.GCSBlob(url)
- if not gcsblob.exists():
- raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
- stream = gcsblob.get_blob_byte_stream()
- headers = gcsblob.get_blob_headers()
- return urllib.response.addinfourl(stream, headers, url)
-class GCSHandler(BaseHandler):
- def gs_open(self, req):
- return gcs_open(req)
diff --git a/lib/spack/spack/ b/lib/spack/spack/
index 009190829f..3b330c08d4 100644
--- a/lib/spack/spack/
+++ b/lib/spack/spack/
@@ -30,7 +30,6 @@ import llnl.util.tty.colify
import llnl.util.tty.color as color
from llnl.util.tty.log import log_output
-import spack
import spack.cmd
import spack.config
import spack.environment as ev
diff --git a/lib/spack/spack/ b/lib/spack/spack/
index e4825537db..32037502c5 100644
--- a/lib/spack/spack/
+++ b/lib/spack/spack/
@@ -20,6 +20,7 @@ import traceback
import urllib.parse
from typing import Optional, Union
+import llnl.url
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp
@@ -29,7 +30,6 @@ import spack.error
import spack.fetch_strategy as fs
import spack.mirror
import spack.spec
-import spack.url as url
import spack.util.path
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
@@ -375,7 +375,7 @@ def _determine_extension(fetcher):
if isinstance(fetcher, fs.URLFetchStrategy):
if fetcher.expand_archive:
# If we fetch with a URLFetchStrategy, use URL's archive type
- ext = url.determine_url_file_extension(fetcher.url)
+ ext = llnl.url.determine_url_file_extension(fetcher.url)
if ext:
# Remove any leading dots
diff --git a/lib/spack/spack/ b/lib/spack/spack/
index 5a14f44f31..67cebb3a8f 100644
--- a/lib/spack/spack/
+++ b/lib/spack/spack/
@@ -2377,7 +2377,7 @@ class PackageBase(WindowsRPath, PackageViewMixin, metaclass=PackageMeta):
return {}
- return spack.util.web.find_versions_of_archive(
+ return spack.url.find_versions_of_archive(
self.all_urls, self.list_url, self.list_depth, concurrency, reference_package=self
except spack.util.web.NoNetworkConnectionError as e:
diff --git a/lib/spack/spack/ b/lib/spack/spack/
index a7fb3620ee..7bbab326d1 100644
--- a/lib/spack/spack/
+++ b/lib/spack/spack/
@@ -11,6 +11,7 @@ import sys
import llnl.util.filesystem
import llnl.util.lang
+from llnl.url import allowed_archive
import spack
import spack.error
@@ -19,7 +20,6 @@ import spack.mirror
import spack.repo
import spack.stage
import spack.util.spack_json as sjson
-from spack.util.compression import allowed_archive
from spack.util.crypto import Checker, checksum
from spack.util.executable import which, which_string
diff --git a/lib/spack/spack/ b/lib/spack/spack/
deleted file mode 100644
index efab23a5ea..0000000000
--- a/lib/spack/spack/
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
-# Spack Project Developers. See the top-level COPYRIGHT file for details.
-# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-import urllib.error
-import urllib.parse
-import urllib.request
-import urllib.response
-from io import BufferedReader, BytesIO, IOBase
-import spack.util.s3 as s3_util
-# NOTE(opadron): Workaround issue in boto where its StreamingBody
-# implementation is missing several APIs expected from IOBase. These missing
-# APIs prevent the streams returned by boto from being passed as-are along to
-# urllib.
-class WrapStream(BufferedReader):
- def __init__(self, raw):
- # In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
- # only add missing attributes in older versions.
- #
- if not isinstance(raw, IOBase):
- raw.readable = lambda: True
- raw.writable = lambda: False
- raw.seekable = lambda: False
- raw.closed = False
- raw.flush = lambda: None
- super().__init__(raw)
- def detach(self):
- self.raw = None
- def read(self, *args, **kwargs):
- return*args, **kwargs)
- def __getattr__(self, key):
- return getattr(self.raw, key)
-def _s3_open(url, method="GET"):
- parsed = urllib.parse.urlparse(url)
- s3 = s3_util.get_s3_session(url, method="fetch")
- bucket = parsed.netloc
- key = parsed.path
- if key.startswith("/"):
- key = key[1:]
- if method not in ("GET", "HEAD"):
- raise urllib.error.URLError(
- "Only GET and HEAD verbs are currently supported for the s3:// scheme"
- )
- try:
- if method == "GET":
- obj = s3.get_object(Bucket=bucket, Key=key)
- # NOTE(opadron): Apply workaround here (see above)
- stream = WrapStream(obj["Body"])
- elif method == "HEAD":
- obj = s3.head_object(Bucket=bucket, Key=key)
- stream = BytesIO()
- except s3.ClientError as e:
- raise urllib.error.URLError(e) from e
- headers = obj["ResponseMetadata"]["HTTPHeaders"]
- return url, headers, stream
-class UrllibS3Handler(urllib.request.BaseHandler):
- def s3_open(self, req):
- orig_url = req.get_full_url()
- url, headers, stream = _s3_open(orig_url, method=req.get_method())
- return urllib.response.addinfourl(stream, headers, url)
diff --git a/lib/spack/spack/test/llnl/ b/lib/spack/spack/test/llnl/
new file mode 100644
index 0000000000..8da8e727ec
--- /dev/null
+++ b/lib/spack/spack/test/llnl/
@@ -0,0 +1,167 @@
+# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+"""Tests for llnl.url functions"""
+import itertools
+import pytest
+import llnl.url
+def archive_and_expected(request):
+ archive_name = ".".join(["Foo", request.param])
+ return archive_name, request.param
+def test_get_extension(archive_and_expected):
+ """Tests that we can predict correctly known extensions for simple cases."""
+ archive, expected = archive_and_expected
+ result = llnl.url.extension_from_path(archive)
+ assert result == expected
+def test_get_bad_extension():
+ """Tests that a bad extension returns None"""
+ result = llnl.url.extension_from_path("Foo.cxx")
+ assert result is None
+ "url,expected",
+ [
+ # No suffix
+ ("rgb-1.0.6", "rgb-1.0.6"),
+ # Misleading prefix
+ ("jpegsrc.v9b", "jpegsrc.v9b"),
+ ("turbolinux702", "turbolinux702"),
+ ("converge_install_2.3.16", "converge_install_2.3.16"),
+ # Download type - code, source
+ ("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
+ # Download type - src
+ ("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
+ ("go1.7.4.src", "go1.7.4"),
+ # Download type - source
+ ("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
+ ("grib_api-1.17.0-Source", "grib_api-1.17.0"),
+ # Download type - full
+ ("julia-0.4.3-full", "julia-0.4.3"),
+ # Download type - bin
+ ("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
+ # Download type - binary
+ ("Jmol-14.8.0-binary", "Jmol-14.8.0"),
+ # Download type - gem
+ ("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
+ # Download type - tar
+ ("gromacs-4.6.1-tar", "gromacs-4.6.1"),
+ # Download type - sh
+ ("", "Miniconda2-4.3.11"),
+ # Download version - release
+ ("v1.0.4-release", "v1.0.4"),
+ # Download version - stable
+ ("libevent-2.0.21-stable", "libevent-2.0.21"),
+ # Download version - final
+ ("2.6.7-final", "2.6.7"),
+ # Download version - rel
+ ("v1.9.5.1rel", "v1.9.5.1"),
+ # Download version - orig
+ ("dash_0.5.5.1.orig", "dash_0.5.5.1"),
+ # Download version - plus
+ ("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
+ # License
+ ("cppad-20170114.gpl", "cppad-20170114"),
+ # Arch
+ ("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
+ ("dislin-11.0.linux.i586_64", "dislin-11.0"),
+ ("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
+ # OS - linux
+ ("astyle_2.04_linux", "astyle_2.04"),
+ # OS - unix
+ ("install-tl-unx", "install-tl"),
+ # OS - macos
+ ("astyle_1.23_macosx", "astyle_1.23"),
+ ("haxe-2.08-osx", "haxe-2.08"),
+ # PyPI - wheel
+ ("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
+ (
+ "numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
+ "macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
+ "numpy-1.12.0",
+ ),
+ # PyPI - exe
+ ("", "PyYAML-3.12"),
+ # Combinations of multiple patterns - bin, release
+ ("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
+ # Combinations of multiple patterns - all
+ ("p7zip_9.04_src_all", "p7zip_9.04"),
+ # Combinations of multiple patterns - run
+ ("", "cuda_8.0.44"),
+ # Combinations of multiple patterns - file
+ ("ack-2.14-single-file", "ack-2.14"),
+ # Combinations of multiple patterns - jar
+ ("antlr-3.4-complete.jar", "antlr-3.4"),
+ # Combinations of multiple patterns - oss
+ ("tbb44_20160128oss_src_0", "tbb44_20160128"),
+ # Combinations of multiple patterns - darwin
+ ("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
+ ("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
+ # Combinations of multiple patterns - centos
+ ("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
+ # Combinations of multiple patterns - arch
+ (
+ "VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
+ "VizGlow_v2.2alpha17-R21November2016",
+ ),
+ ("jdk-8u92-linux-x64", "jdk-8u92"),
+ ("", "cuda_6.5.14"),
+ ("", "Mathematica_12.0.0"),
+ ("trf407b.linux64", "trf407b"),
+ # Combinations of multiple patterns - with
+ ("mafft-7.221-with-extensions-src", "mafft-7.221"),
+ ("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
+ ("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
+ # Combinations of multiple patterns - rock
+ ("bitlib-23-2.src.rock", "bitlib-23-2"),
+ # Combinations of multiple patterns - public
+ ("dakota-6.3-public.src", "dakota-6.3"),
+ # Combinations of multiple patterns - universal
+ ("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
+ # Combinations of multiple patterns - dynamic
+ ("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
+ # Combinations of multiple patterns - other
+ ("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
+ ("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
+ ("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
+ ],
+def test_url_strip_version_suffixes(url, expected):
+ stripped = llnl.url.strip_version_suffixes(url)
+ assert stripped == expected
+def test_strip_compression_extension(archive_and_expected):
+ archive, extension = archive_and_expected
+ stripped = llnl.url.strip_compression_extension(archive)
+ if extension == "zip":
+ assert stripped == ""
+ stripped = llnl.url.strip_compression_extension(archive, "zip")
+ assert stripped == "Foo"
+ elif (
+ extension.lower() == "tar"
+ or extension in llnl.url.CONTRACTION_MAP
+ or extension
+ in [
+ ".".join(ext)
+ for ext in itertools.product(llnl.url.PREFIX_EXTENSIONS, llnl.url.EXTENSIONS)
+ ]
+ ):
+ assert stripped == "Foo.tar" or stripped == "Foo.TAR"
+ else:
+ assert stripped == "Foo"
+def test_allowed_archive(archive_and_expected):
+ archive, _ = archive_and_expected
+ assert llnl.url.allowed_archive(archive)
diff --git a/lib/spack/spack/test/ b/lib/spack/spack/test/
index 86ebf84fa7..dd094ed230 100644
--- a/lib/spack/spack/test/
+++ b/lib/spack/spack/test/
@@ -17,125 +17,12 @@ from spack.url import (
- strip_version_suffixes,
from spack.version import Version
- "url,expected",
- [
- # No suffix
- ("rgb-1.0.6", "rgb-1.0.6"),
- # Misleading prefix
- ("jpegsrc.v9b", "jpegsrc.v9b"),
- ("turbolinux702", "turbolinux702"),
- ("converge_install_2.3.16", "converge_install_2.3.16"),
- # Download type - code, source
- ("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
- # Download type - src
- ("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
- ("go1.7.4.src", "go1.7.4"),
- # Download type - source
- ("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
- ("grib_api-1.17.0-Source", "grib_api-1.17.0"),
- # Download type - full
- ("julia-0.4.3-full", "julia-0.4.3"),
- # Download type - bin
- ("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
- # Download type - binary
- ("Jmol-14.8.0-binary", "Jmol-14.8.0"),
- # Download type - gem
- ("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
- # Download type - tar
- ("gromacs-4.6.1-tar", "gromacs-4.6.1"),
- # Download type - sh
- ("", "Miniconda2-4.3.11"),
- # Download version - release
- ("v1.0.4-release", "v1.0.4"),
- # Download version - stable
- ("libevent-2.0.21-stable", "libevent-2.0.21"),
- # Download version - final
- ("2.6.7-final", "2.6.7"),
- # Download version - rel
- ("v1.9.5.1rel", "v1.9.5.1"),
- # Download version - orig
- ("dash_0.5.5.1.orig", "dash_0.5.5.1"),
- # Download version - plus
- ("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
- # License
- ("cppad-20170114.gpl", "cppad-20170114"),
- # Arch
- ("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
- ("dislin-11.0.linux.i586_64", "dislin-11.0"),
- ("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
- # OS - linux
- ("astyle_2.04_linux", "astyle_2.04"),
- # OS - unix
- ("install-tl-unx", "install-tl"),
- # OS - macos
- ("astyle_1.23_macosx", "astyle_1.23"),
- ("haxe-2.08-osx", "haxe-2.08"),
- # PyPI - wheel
- ("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
- (
- "numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
- "macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
- "numpy-1.12.0",
- ),
- # PyPI - exe
- ("", "PyYAML-3.12"),
- # Combinations of multiple patterns - bin, release
- ("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
- # Combinations of multiple patterns - all
- ("p7zip_9.04_src_all", "p7zip_9.04"),
- # Combinations of multiple patterns - run
- ("", "cuda_8.0.44"),
- # Combinations of multiple patterns - file
- ("ack-2.14-single-file", "ack-2.14"),
- # Combinations of multiple patterns - jar
- ("antlr-3.4-complete.jar", "antlr-3.4"),
- # Combinations of multiple patterns - oss
- ("tbb44_20160128oss_src_0", "tbb44_20160128"),
- # Combinations of multiple patterns - darwin
- ("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
- ("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
- # Combinations of multiple patterns - centos
- ("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
- # Combinations of multiple patterns - arch
- (
- "VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
- "VizGlow_v2.2alpha17-R21November2016",
- ),
- ("jdk-8u92-linux-x64", "jdk-8u92"),
- ("", "cuda_6.5.14"),
- ("", "Mathematica_12.0.0"),
- ("trf407b.linux64", "trf407b"),
- # Combinations of multiple patterns - with
- ("mafft-7.221-with-extensions-src", "mafft-7.221"),
- ("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
- ("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
- # Combinations of multiple patterns - rock
- ("bitlib-23-2.src.rock", "bitlib-23-2"),
- # Combinations of multiple patterns - public
- ("dakota-6.3-public.src", "dakota-6.3"),
- # Combinations of multiple patterns - universal
- ("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
- # Combinations of multiple patterns - dynamic
- ("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
- # Combinations of multiple patterns - other
- ("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
- ("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
- ("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
- ],
-def test_url_strip_version_suffixes(url, expected):
- stripped = strip_version_suffixes(url)
- assert stripped == expected
# No suffix
diff --git a/lib/spack/spack/test/util/ b/lib/spack/spack/test/util/
index 7cbcfb283c..29007a7e33 100644
--- a/lib/spack/spack/test/util/
+++ b/lib/spack/spack/test/util/
@@ -10,6 +10,7 @@ from itertools import product
import pytest
+import llnl.url
from llnl.util.filesystem import working_dir
from spack.paths import spack_root
@@ -21,7 +22,7 @@ datadir = os.path.join(spack_root, "lib", "spack", "spack", "test", "data", "com
ext_archive = {}
ext_archive.update({ext: ".".join(["Foo", ext])})
- for ext in scomp.ALLOWED_ARCHIVE_TYPES
+ for ext in llnl.url.ALLOWED_ARCHIVE_TYPES
if "TAR" not in ext
# Spack does not use Python native handling for tarballs or zip
@@ -95,38 +96,3 @@ def test_unallowed_extension():
bad_ext_archive = "Foo.cxx"
with pytest.raises(CommandNotFoundError):
-@pytest.mark.parametrize("archive", ext_archive.values())
-def test_get_extension(archive):
- ext = scomp.extension_from_path(archive)
- assert ext_archive[ext] == archive
-def test_get_bad_extension():
- archive = "Foo.cxx"
- ext = scomp.extension_from_path(archive)
- assert ext is None
-@pytest.mark.parametrize("path", ext_archive.values())
-def test_allowed_archive(path):
- assert scomp.allowed_archive(path)
-@pytest.mark.parametrize("ext_path", ext_archive.items())
-def test_strip_compression_extension(ext_path):
- ext, path = ext_path
- stripped = scomp.strip_compression_extension(path)
- if ext == "zip":
- assert stripped == ""
- stripped = scomp.strip_compression_extension(path, "zip")
- assert stripped == "Foo"
- elif (
- ext == "tar"
- or ext in scomp.CONTRACTION_MAP.keys()
- or ext in [".".join(ext) for ext in product(scomp.PRE_EXTS, scomp.EXTS)]
- ):
- assert stripped == "Foo.tar" or stripped == "Foo.TAR"
- else:
- assert stripped == "Foo"
diff --git a/lib/spack/spack/test/ b/lib/spack/spack/test/
index 2d6f577799..a012e7524e 100644
--- a/lib/spack/spack/test/
+++ b/lib/spack/spack/test/
@@ -15,6 +15,7 @@ import llnl.util.tty as tty
import spack.config
import spack.mirror
import spack.paths
+import spack.url
import spack.util.path
import spack.util.s3
import spack.util.url as url_util
@@ -102,31 +103,31 @@ def test_spider_no_response(monkeypatch):
def test_find_versions_of_archive_0():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=0)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=0)
assert Version("0.0.0") in versions
def test_find_versions_of_archive_1():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=1)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=1)
assert Version("0.0.0") in versions
assert Version("1.0.0") in versions
def test_find_versions_of_archive_2():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
assert Version("0.0.0") in versions
assert Version("1.0.0") in versions
assert Version("2.0.0") in versions
def test_find_exotic_versions_of_archive_2():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
# up for grabs to make this better.
assert Version("2.0.0b2") in versions
def test_find_versions_of_archive_3():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
assert Version("0.0.0") in versions
assert Version("1.0.0") in versions
assert Version("2.0.0") in versions
@@ -135,16 +136,14 @@ def test_find_versions_of_archive_3():
def test_find_exotic_versions_of_archive_3():
- versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
+ versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
assert Version("2.0.0b2") in versions
assert Version("3.0a1") in versions
assert Version("4.5-rc5") in versions
def test_find_versions_of_archive_with_fragment():
- versions = spack.util.web.find_versions_of_archive(
- root_tarball, root_with_fragment, list_depth=0
- )
+ versions = spack.url.find_versions_of_archive(root_tarball, root_with_fragment, list_depth=0)
assert Version("5.0.0") in versions
@@ -311,7 +310,7 @@ def test_remove_s3_url(monkeypatch, capfd):
def get_s3_session(url, method="fetch"):
return MockS3Client()
- monkeypatch.setattr(spack.util.s3, "get_s3_session", get_s3_session)
+ monkeypatch.setattr(spack.util.web, "get_s3_session", get_s3_session)
current_debug_level = tty.debug_level()
diff --git a/lib/spack/spack/ b/lib/spack/spack/
index bf2990f42f..c5e47232c0 100644
--- a/lib/spack/spack/
+++ b/lib/spack/spack/
@@ -27,246 +27,22 @@ it's never been told about that version before.
import io
import os
+import pathlib
import re
-from urllib.parse import urlsplit, urlunsplit
-import llnl.util.tty as tty
+import llnl.url
from llnl.util.tty.color import cescape, colorize
import spack.error
-import spack.util.compression as comp
-import spack.util.path as spath
+import spack.util.web
import spack.version
+from spack.util.path import convert_to_posix_path
# Note: We call the input to most of these functions a "path" but the functions
# work on paths and URLs. There's not a good word for both of these, but
# "path" seemed like the most generic term.
-def find_list_urls(url):
- r"""Find good list URLs for the supplied URL.
- By default, returns the dirname of the archive path.
- Provides special treatment for the following websites, which have a
- unique list URL different from the dirname of the download URL:
- ========= =======================================================
- GitHub<repo>/<name>/releases
- GitLab https://gitlab.\*/<repo>/<name>/tags
- BitBucket<repo>/<name>/downloads/?tab=tags
- CRAN https://\*<name>
- PyPI<name>/
- LuaRocks<repo>/<name>
- ========= =======================================================
- Note: this function is called by `spack versions`, `spack checksum`,
- and `spack create`, but not by `spack fetch` or `spack install`.
- Parameters:
- url (str): The download URL for the package
- Returns:
- set: One or more list URLs for the package
- """
- url_types = [
- # GitHub
- # e.g.
- (r"(.*github\.com/[^/]+/[^/]+)", lambda m: + "/releases"),
- # GitLab API endpoint
- # e.g.
- (
- r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
- lambda m: + "/" + + "/" + + "/tags",
- ),
- # GitLab non-API endpoint
- # e.g.
- (r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: + "/tags"),
- # BitBucket
- # e.g.
- (r"(.*[^/]+/[^/]+)", lambda m: + "/downloads/?tab=tags"),
- # CRAN
- # e.g.
- # e.g.
- (
- r"(.*\.r-project\.org/src/contrib)/([^_]+)",
- lambda m: + "/Archive/" +,
- ),
- # PyPI
- # e.g.
- # e.g.
- # e.g.
- # e.g.
- # e.g.
- # e.g.
- (
- r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
- lambda m: "" + + "/",
- ),
- # LuaRocks
- # e.g.
- # e.g.
- (
- r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
- + r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
- lambda m: ""
- +"org")
- + "/"
- +"name")
- + "/",
- ),
- ]
- list_urls = set([os.path.dirname(url)])
- for pattern, fun in url_types:
- match =, url)
- if match:
- list_urls.add(fun(match))
- return list_urls
-def strip_query_and_fragment(path):
- try:
- components = urlsplit(path)
- stripped = components[:3] + (None, None)
- query, frag = components[3:5]
- suffix = ""
- if query:
- suffix += "?" + query
- if frag:
- suffix += "#" + frag
- return (urlunsplit(stripped), suffix)
- except ValueError:
- tty.debug("Got error parsing path %s" % path)
- return (path, "") # Ignore URL parse errors here
-def strip_version_suffixes(path):
- """Some tarballs contain extraneous information after the version:
- * ``bowtie2-2.2.5-source``
- * ``libevent-2.0.21-stable``
- * ````
- These strings are not part of the version number and should be ignored.
- This function strips those suffixes off and returns the remaining string.
- The goal is that the version is always the last thing in ``path``:
- * ``bowtie2-2.2.5``
- * ``libevent-2.0.21``
- * ``cuda_8.0.44``
- Args:
- path (str): The filename or URL for the package
- Returns:
- str: The ``path`` with any extraneous suffixes removed
- """
- # NOTE: This could be done with complicated regexes in parse_version_offset
- # NOTE: The problem is that we would have to add these regexes to the end
- # NOTE: of every single version regex. Easier to just strip them off
- # NOTE: permanently
- suffix_regexes = [
- # Download type
- r"[Ii]nstall",
- r"all",
- r"code",
- r"[Ss]ources?",
- r"file",
- r"full",
- r"single",
- r"with[a-zA-Z_-]+",
- r"rock",
- r"src(_0)?",
- r"public",
- r"bin",
- r"binary",
- r"run",
- r"[Uu]niversal",
- r"jar",
- r"complete",
- r"dynamic",
- r"oss",
- r"gem",
- r"tar",
- r"sh",
- # Download version
- r"release",
- r"bin",
- r"stable",
- r"[Ff]inal",
- r"rel",
- r"orig",
- r"dist",
- r"\+",
- # License
- r"gpl",
- # Arch
- # Needs to come before and after OS, appears in both orders
- r"ia32",
- r"intel",
- r"amd64",
- r"linux64",
- r"x64",
- r"64bit",
- r"x86[_-]64",
- r"i586_64",
- r"x86",
- r"i[36]86",
- r"ppc64(le)?",
- r"armv?(7l|6l|64)",
- # Other
- r"cpp",
- r"gtk",
- r"incubating",
- # OS
- r"[Ll]inux(_64)?",
- r"LINUX",
- r"[Uu]ni?x",
- r"[Ss]un[Oo][Ss]",
- r"[Mm]ac[Oo][Ss][Xx]?",
- r"[Oo][Ss][Xx]",
- r"[Dd]arwin(64)?",
- r"[Aa]pple",
- r"[Ww]indows",
- r"[Ww]in(64|32)?",
- r"[Cc]ygwin(64|32)?",
- r"[Mm]ingw",
- r"centos",
- # Arch
- # Needs to come before and after OS, appears in both orders
- r"ia32",
- r"intel",
- r"amd64",
- r"linux64",
- r"x64",
- r"64bit",
- r"x86[_-]64",
- r"i586_64",
- r"x86",
- r"i[36]86",
- r"ppc64(le)?",
- r"armv?(7l|6l|64)?",
- # PyPI
- r"[._-]py[23].*\.whl",
- r"[._-]cp[23].*\.whl",
- r"[._-]win.*\.exe",
- ]
- for regex in suffix_regexes:
- # Remove the suffix from the end of the path
- # This may be done multiple times
- path = re.sub(r"[._-]?" + regex + "$", "", path)
- return path
def strip_name_suffixes(path, version):
@@ -341,69 +117,6 @@ def strip_name_suffixes(path, version):
return path
-def split_url_extension(path):
- """Some URLs have a query string, e.g.:
- 1.
- 2.
- 3.
- In (1), the query string needs to be stripped to get at the
- extension, but in (2) & (3), the filename is IN a single final query
- argument.
- This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
- The suffix contains anything that was stripped off the URL to
- get at the file extension. In (1), it will be ``'?raw=true'``, but
- in (2), it will be empty. In (3) the suffix is a parameter that follows
- after the file extension, e.g.:
- 1. ``('', '.tgz', '?raw=true')``
- 2. ``('', '.tar.gz', None)``
- 3. ``('', '.tar.bz2', '?ref=v7.0.0')``
- """
- prefix, ext, suffix = path, "", ""
- # Strip off sourceforge download suffix.
- # e.g.
- prefix, suffix = spath.find_sourceforge_suffix(path)
- ext = comp.extension_from_path(prefix)
- if ext is not None:
- prefix = comp.strip_extension(prefix)
- else:
- prefix, suf = strip_query_and_fragment(prefix)
- ext = comp.extension_from_path(prefix)
- prefix = comp.strip_extension(prefix)
- suffix = suf + suffix
- if ext is None:
- ext = ""
- return prefix, ext, suffix
-def determine_url_file_extension(path):
- """This returns the type of archive a URL refers to. This is
- sometimes confusing because of URLs like:
- (1)
- Where the URL doesn't actually contain the filename. We need
- to know what type it is so that we can appropriately name files
- in mirrors.
- """
- match ="|tar)ball/", path)
- if match:
- if == "zip":
- return "zip"
- elif == "tar":
- return "tar.gz"
- prefix, ext, suffix = split_url_extension(path)
- return ext
def parse_version_offset(path):
"""Try to extract a version string from a filename or URL.
@@ -426,13 +139,13 @@ def parse_version_offset(path):
# path: The prefix of the URL, everything before the ext and suffix
# ext: The file extension
# suffix: Any kind of query string that begins with a '?'
- path, ext, suffix = split_url_extension(path)
+ path, ext, suffix = llnl.url.split_url_extension(path)
# stem: Everything from path after the final '/'
original_stem = os.path.basename(path)
# Try to strip off anything after the version number
- stem = strip_version_suffixes(original_stem)
+ stem = llnl.url.strip_version_suffixes(original_stem)
# Assumptions:
@@ -620,7 +333,7 @@ def parse_name_offset(path, v=None):
# path: The prefix of the URL, everything before the ext and suffix
# ext: The file extension
# suffix: Any kind of query string that begins with a '?'
- path, ext, suffix = split_url_extension(path)
+ path, ext, suffix = llnl.url.split_url_extension(path)
# stem: Everything from path after the final '/'
original_stem = os.path.basename(path)
@@ -735,28 +448,6 @@ def parse_name_and_version(path):
return (name, ver)
-def insensitize(string):
- """Change upper and lowercase letters to be case insensitive in
- the provided string. e.g., 'a' becomes '[Aa]', 'B' becomes
- '[bB]', etc. Use for building regexes."""
- def to_ins(match):
- char =
- return "[%s%s]" % (char.lower(), char.upper())
- return re.sub(r"([a-zA-Z])", to_ins, string)
-def cumsum(elts, init=0, fn=lambda x: x):
- """Return cumulative sum of result of fn on each element in elts."""
- sums = []
- s = init
- for i, e in enumerate(elts):
- sums.append(s)
- s += fn(e)
- return sums
def find_all(substring, string):
"""Returns a list containing the indices of
every occurrence of substring in string."""
@@ -912,6 +603,122 @@ def color_url(path, **kwargs):
return colorize(out.getvalue())
+def find_versions_of_archive(
+ archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
+ """Scrape web pages for new versions of a tarball. This function prefers URLs in the
+ following order: links found on the scraped page that match a url generated by the
+ reference package, found and in the archive_urls list, found and derived from those
+ in the archive_urls list, and if none are found for a version then the item in the
+ archive_urls list is included for the version.
+ Args:
+ archive_urls (str or list or tuple): URL or sequence of URLs for
+ different versions of a package. Typically these are just the
+ tarballs from the package file itself. By default, this searches
+ the parent directories of archives.
+ list_url (str or None): URL for a listing of archives.
+ Spack will scrape these pages for download links that look
+ like the archive URL.
+ list_depth (int): max depth to follow links on list_url pages.
+ Defaults to 0.
+ concurrency (int): maximum number of concurrent requests
+ reference_package (spack.package_base.PackageBase or None): a spack package
+ used as a reference for url detection. Uses the url_for_version
+ method on the package to produce reference urls which, if found,
+ are preferred.
+ """
+ if not isinstance(archive_urls, (list, tuple)):
+ archive_urls = [archive_urls]
+ # Generate a list of list_urls based on archive urls and any
+ # explicitly listed list_url in the package
+ list_urls = set()
+ if list_url is not None:
+ list_urls.add(list_url)
+ for aurl in archive_urls:
+ list_urls |= llnl.url.find_list_urls(aurl)
+ # Add '/' to the end of the URL. Some web servers require this.
+ additional_list_urls = set()
+ for lurl in list_urls:
+ if not lurl.endswith("/"):
+ additional_list_urls.add(lurl + "/")
+ list_urls |= additional_list_urls
+ # Grab some web pages to scrape.
+ pages, links = spack.util.web.spider(list_urls, depth=list_depth, concurrency=concurrency)
+ # Scrape them for archive URLs
+ regexes = []
+ for aurl in archive_urls:
+ # This creates a regex from the URL with a capture group for
+ # the version part of the URL. The capture group is converted
+ # to a generic wildcard, so we can use this to extract things
+ # on a page that look like archive URLs.
+ url_regex = wildcard_version(aurl)
+ # We'll be a bit more liberal and just look for the archive
+ # part, not the full path.
+ # this is a URL so it is a posixpath even on Windows
+ url_regex = pathlib.PurePosixPath(url_regex).name
+ # We need to add a / to the beginning of the regex to prevent
+ # Spack from picking up similarly named packages like:
+ #
+ #
+ #
+ #
+ url_regex = "/" + url_regex
+ # We need to add a $ anchor to the end of the regex to prevent
+ # Spack from picking up signature files like:
+ # .asc
+ # .md5
+ # .sha256
+ # .sig
+ # However, SourceForge downloads still need to end in '/download'.
+ url_regex += r"(\/download)?"
+ # PyPI adds #sha256=... to the end of the URL
+ url_regex += "(#sha256=.*)?"
+ url_regex += "$"
+ regexes.append(url_regex)
+ regexes = [re.compile(r) for r in regexes]
+ # Build a dict version -> URL from any links that match the wildcards.
+ # Walk through archive_url links first.
+ # Any conflicting versions will be overwritten by the list_url links.
+ versions = {}
+ matched = set()
+ for url in sorted(links):
+ url = convert_to_posix_path(url)
+ if any( for r in regexes):
+ try:
+ ver = parse_version(url)
+ if ver in matched:
+ continue
+ versions[ver] = url
+ # prevent this version from getting overwritten
+ if reference_package is not None:
+ if url == reference_package.url_for_version(ver):
+ matched.add(ver)
+ else:
+ extrapolated_urls = [substitute_version(u, ver) for u in archive_urls]
+ if url in extrapolated_urls:
+ matched.add(ver)
+ except UndetectableVersionError:
+ continue
+ for url in archive_urls:
+ url = convert_to_posix_path(url)
+ ver = parse_version(url)
+ if ver not in versions:
+ versions[ver] = url
+ return versions
class UrlParseError(spack.error.SpackError):
"""Raised when the URL module can't parse something correctly."""
diff --git a/lib/spack/spack/util/ b/lib/spack/spack/util/
index b8dcd032f4..25ccfdf0bb 100644
--- a/lib/spack/spack/util/
+++ b/lib/spack/spack/util/
@@ -9,27 +9,13 @@ import os
import re
import shutil
import sys
-from itertools import product
+import llnl.url
from llnl.util import tty
-import spack.util.path as spath
from spack.error import SpackError
from spack.util.executable import CommandNotFoundError, which
-# Supported archive extensions.
-PRE_EXTS = ["tar", "TAR"]
-EXTS = ["gz", "bz2", "xz", "Z"]
-NOTAR_EXTS = ["zip", "tgz", "tbz2", "tbz", "txz"]
-CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
-# Add PRE_EXTS and EXTS last so that .tar.gz is matched *before* .tar or .gz
- [".".join(ext) for ext in product(PRE_EXTS, EXTS)] + PRE_EXTS + EXTS + NOTAR_EXTS
import bz2 # noqa
@@ -66,10 +52,6 @@ def is_bz2_supported():
return _bz2_support
-def allowed_archive(path):
- return False if not path else any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
def _system_untar(archive_file, remove_archive_file=False):
"""Returns path to unarchived tar file.
Untars archive via system tar.
@@ -78,7 +60,7 @@ def _system_untar(archive_file, remove_archive_file=False):
archive_file (str): absolute path to the archive to be extracted.
Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz).
- archive_file_no_ext = strip_extension(archive_file)
+ archive_file_no_ext = llnl.url.strip_extension(archive_file)
outfile = os.path.basename(archive_file_no_ext)
if archive_file_no_ext == archive_file:
# the archive file has no extension. Tar on windows cannot untar onto itself
@@ -114,7 +96,7 @@ def _bunzip2(archive_file):
def _py_bunzip(archive_file):
"""Returns path to decompressed file.
Decompresses bz2 compressed archives/files via python's bz2 module"""
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
working_dir = os.getcwd()
archive_out = os.path.join(working_dir, decompressed_file)
f_bz = bz2.BZ2File(archive_file, mode="rb")
@@ -128,7 +110,7 @@ def _system_bunzip(archive_file):
"""Returns path to decompressed file.
Decompresses bz2 compressed archives/files via system bzip2 utility"""
compressed_file_name = os.path.basename(archive_file)
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
working_dir = os.getcwd()
archive_out = os.path.join(working_dir, decompressed_file)
copy_path = os.path.join(working_dir, compressed_file_name)
@@ -158,7 +140,7 @@ def _gunzip(archive_file):
def _py_gunzip(archive_file):
"""Returns path to gunzip'd file
Decompresses `.gz` compressed archvies via python gzip module"""
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "gz"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "gz"))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
f_in =, "rb")
@@ -171,7 +153,7 @@ def _py_gunzip(archive_file):
def _system_gunzip(archive_file):
"""Returns path to gunzip'd file
Decompresses `.gz` compressed files via system gzip"""
- archive_file_no_ext = strip_compression_extension(archive_file)
+ archive_file_no_ext = llnl.url.strip_compression_extension(archive_file)
if archive_file_no_ext == archive_file:
# the zip file has no extension. On Unix gunzip cannot unzip onto itself
archive_file = archive_file + ".gz"
@@ -196,7 +178,7 @@ def _unzip(archive_file):
archive_file (str): absolute path of the file to be decompressed
- extracted_file = os.path.basename(strip_extension(archive_file, "zip"))
+ extracted_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="zip"))
if sys.platform == "win32":
return _system_untar(archive_file)
@@ -259,7 +241,7 @@ def _win_compressed_tarball_handler(decompressor):
def _py_lzma(archive_file):
"""Returns path to decompressed .xz files
Decompress lzma compressed .xz files via python lzma module"""
- decompressed_file = os.path.basename(strip_compression_extension(archive_file, "xz"))
+ decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "xz"))
archive_out = os.path.join(os.getcwd(), decompressed_file)
with open(archive_out, "wb") as ar:
with as lar:
@@ -272,7 +254,7 @@ def _xz(archive_file):
Decompress lzma compressed .xz files via xz command line
- decompressed_file = os.path.basename(strip_extension(archive_file, "xz"))
+ decompressed_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="xz"))
working_dir = os.getcwd()
destination_abspath = os.path.join(working_dir, decompressed_file)
compressed_file = os.path.basename(archive_file)
@@ -297,13 +279,13 @@ def _system_7zip(archive_file):
archive_file (str): absolute path of file to be unarchived
- outfile = os.path.basename(strip_compression_extension(archive_file))
+ outfile = os.path.basename(llnl.url.strip_compression_extension(archive_file))
_7z = which("7z")
if not _7z:
raise CommandNotFoundError(
"7z unavailable,\
unable to extract %s files. 7z can be installed via Spack"
- % extension_from_path(archive_file)
+ % llnl.url.extension_from_path(archive_file)
@@ -318,7 +300,7 @@ def decompressor_for(path, extension=None):
if not extension:
extension = extension_from_file(path, decompress=True)
- if not allowed_archive(extension):
+ if not llnl.url.allowed_archive(extension):
raise CommandNotFoundError(
"Cannot extract archive, \
unrecognized file extension: '%s'"
@@ -394,7 +376,7 @@ def decompressor_for_win(extension):
path (str): path of the archive file requiring decompression
extension (str): extension
- extension = expand_contracted_extension(extension)
+ extension = llnl.url.expand_contracted_extension(extension)
# Windows native tar can handle .zip extensions, use standard
# unzip method
if re.match(r"zip$", extension):
@@ -415,7 +397,7 @@ def decompressor_for_win(extension):
# python based decompression strategy
# Expand extension from contracted extension i.e. tar.gz from .tgz
# no-op on non contracted extensions
- compression_extension = compression_ext_from_compressed_archive(extension)
+ compression_extension = llnl.url.compression_ext_from_compressed_archive(extension)
decompressor = _determine_py_decomp_archive_strategy(compression_extension)
if not decompressor:
raise SpackError(
@@ -657,7 +639,7 @@ def extension_from_stream(stream, decompress=False):
"Cannot derive file extension from magic number;"
" falling back to regex path parsing."
- return extension_from_path(
+ return llnl.url.extension_from_path(
resultant_ext = suffix_ext if not prefix_ext else ".".join([prefix_ext, suffix_ext])
tty.debug("File extension %s successfully derived by magic number." % resultant_ext)
return resultant_ext
@@ -693,114 +675,11 @@ def extension_from_file(file, decompress=False):
if ext and ext.startswith("tar."):
suf = ext.split(".")[1]
abbr = "t" + suf
- if check_extension(file, abbr):
+ if llnl.url.has_extension(file, abbr):
return abbr
if not ext:
# If unable to parse extension from stream,
# attempt to fall back to string parsing
- ext = extension_from_path(file)
+ ext = llnl.url.extension_from_path(file)
return ext
return None
-def extension_from_path(path):
- """Returns the allowed archive extension for a path.
- If path does not include a valid archive extension
- (see`spack.util.compression.ALLOWED_ARCHIVE_TYPES`) return None
- """
- if path is None:
- raise ValueError("Can't call extension() on None")
- if check_extension(path, t):
- return t
- return None
-def strip_compression_extension(path, ext=None):
- """Returns path with last supported (can be combined with tar) or
- provided archive extension stripped"""
- path_ext = extension_from_path(path)
- if path_ext:
- path = expand_contracted_extension_in_path(path)
- exts_to_check = EXTS
- if ext:
- exts_to_check = [ext]
- for ext_check in exts_to_check:
- mod_path = check_and_remove_ext(path, ext_check)
- if mod_path != path:
- return mod_path
- return path
-def strip_extension(path, ext=None):
- """Returns the part of a path that does not include extension.
- If ext is given, only attempts to remove that extension. If no
- extension given, attempts to strip any valid extension from path"""
- if ext:
- return check_and_remove_ext(path, ext)
- mod_path = check_and_remove_ext(path, t)
- if mod_path != path:
- return mod_path
- return path
-def check_extension(path, ext):
- """Returns true if extension is present in path
- false otherwise"""
- # Strip sourceforge suffix.
- prefix, _ = spath.find_sourceforge_suffix(path)
- if not ext.startswith(r"\."):
- ext = r"\.%s$" % ext
- if, prefix):
- return True
- return False
-def reg_remove_ext(path, ext):
- """Returns path with ext remove via regex"""
- if path and ext:
- suffix = r"\.%s$" % ext
- return re.sub(suffix, "", path)
- return path
-def check_and_remove_ext(path, ext):
- """Returns path with extension removed if extension
- is present in path. Otherwise just returns path"""
- if check_extension(path, ext):
- return reg_remove_ext(path, ext)
- return path
-def _substitute_extension(path, old_ext, new_ext):
- """Returns path with old_ext replaced with new_ext.
- old_ext and new_ext can be extension strings or regexs"""
- return re.sub(rf"{old_ext}", rf"{new_ext}", path)
-def expand_contracted_extension_in_path(path, ext=None):
- """Returns path with any contraction extension (i.e. tgz) expanded
- (i.e. tar.gz). If ext is specified, only attempt to expand that extension"""
- if not ext:
- ext = extension_from_path(path)
- expanded_ext = expand_contracted_extension(ext)
- if expanded_ext != ext:
- return _substitute_extension(path, ext, expanded_ext)
- return path
-def expand_contracted_extension(extension):
- """Return expanded version of contracted extension
- i.e. .tgz -> .tar.gz, no op on non contracted extensions"""
- extension = extension.strip(".")
- return CONTRACTION_MAP.get(extension, extension)
-def compression_ext_from_compressed_archive(extension):
- """Returns compression extension for a compressed archive"""
- extension = expand_contracted_extension(extension)
- for ext in [*EXTS]:
- if ext in extension:
- return ext
diff --git a/lib/spack/spack/util/ b/lib/spack/spack/util/
index 856fe73001..4e997df52b 100644
--- a/lib/spack/spack/util/
+++ b/lib/spack/spack/util/
@@ -10,6 +10,10 @@ integrate GCS Blob storage with spack buildcache.
import os
import sys
+import urllib.parse
+import urllib.response
+from urllib.error import URLError
+from urllib.request import BaseHandler
import llnl.util.tty as tty
@@ -222,3 +226,21 @@ class GCSBlob:
return headers
+def gcs_open(req, *args, **kwargs):
+ """Open a reader stream to a blob object on GCS"""
+ url = urllib.parse.urlparse(req.get_full_url())
+ gcsblob = GCSBlob(url)
+ if not gcsblob.exists():
+ raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
+ stream = gcsblob.get_blob_byte_stream()
+ headers = gcsblob.get_blob_headers()
+ return urllib.response.addinfourl(stream, headers, url)
+class GCSHandler(BaseHandler):
+ def gs_open(self, req):
+ return gcs_open(req)
diff --git a/lib/spack/spack/util/ b/lib/spack/spack/util/
index ef6fb883c7..3dc0ea676c 100644
--- a/lib/spack/spack/util/
+++ b/lib/spack/spack/util/
@@ -109,15 +109,6 @@ def win_exe_ext():
return ".exe"
-def find_sourceforge_suffix(path):
- """find and match sourceforge filepath components
- Return match object"""
- match ="(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$", path)
- if match:
- return match.groups()
- return path, ""
def path_to_os_path(*pths):
Takes an arbitrary number of positional parameters
diff --git a/lib/spack/spack/util/ b/lib/spack/spack/util/
index c4d53d86b6..796c49a8c8 100644
--- a/lib/spack/spack/util/
+++ b/lib/spack/spack/util/
@@ -3,10 +3,13 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
+import urllib.error
import urllib.parse
+import urllib.request
+import urllib.response
+from io import BufferedReader, BytesIO, IOBase
from typing import Any, Dict, Tuple
-import spack
import spack.config
#: Map (mirror name, method) tuples to s3 client instances.
@@ -114,4 +117,72 @@ def get_mirror_s3_connection_info(mirror, method):
if endpoint_url:
s3_client_args["endpoint_url"] = _parse_s3_endpoint_url(endpoint_url)
- return (s3_connection, s3_client_args)
+ return s3_connection, s3_client_args
+# NOTE(opadron): Workaround issue in boto where its StreamingBody
+# implementation is missing several APIs expected from IOBase. These missing
+# APIs prevent the streams returned by boto from being passed as-are along to
+# urllib.
+class WrapStream(BufferedReader):
+ def __init__(self, raw):
+ # In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
+ # only add missing attributes in older versions.
+ #
+ if not isinstance(raw, IOBase):
+ raw.readable = lambda: True
+ raw.writable = lambda: False
+ raw.seekable = lambda: False
+ raw.closed = False
+ raw.flush = lambda: None
+ super().__init__(raw)
+ def detach(self):
+ self.raw = None
+ def read(self, *args, **kwargs):
+ return*args, **kwargs)
+ def __getattr__(self, key):
+ return getattr(self.raw, key)
+def _s3_open(url, method="GET"):
+ parsed = urllib.parse.urlparse(url)
+ s3 = get_s3_session(url, method="fetch")
+ bucket = parsed.netloc
+ key = parsed.path
+ if key.startswith("/"):
+ key = key[1:]
+ if method not in ("GET", "HEAD"):
+ raise urllib.error.URLError(
+ "Only GET and HEAD verbs are currently supported for the s3:// scheme"
+ )
+ try:
+ if method == "GET":
+ obj = s3.get_object(Bucket=bucket, Key=key)
+ # NOTE(opadron): Apply workaround here (see above)
+ stream = WrapStream(obj["Body"])
+ elif method == "HEAD":
+ obj = s3.head_object(Bucket=bucket, Key=key)
+ stream = BytesIO()
+ except s3.ClientError as e:
+ raise urllib.error.URLError(e) from e
+ headers = obj["ResponseMetadata"]["HTTPHeaders"]
+ return url, headers, stream
+class UrllibS3Handler(urllib.request.BaseHandler):
+ def s3_open(self, req):
+ orig_url = req.get_full_url()
+ url, headers, stream = _s3_open(orig_url, method=req.get_method())
+ return urllib.response.addinfourl(stream, headers, url)
diff --git a/lib/spack/spack/util/ b/lib/spack/spack/util/
index 22309ba87f..79ad39ebd7 100644
--- a/lib/spack/spack/util/
+++ b/lib/spack/spack/util/
@@ -21,23 +21,17 @@ from typing import IO, Optional
from urllib.error import HTTPError, URLError
from urllib.request import HTTPSHandler, Request, build_opener
-import llnl.util.lang
-import llnl.util.tty as tty
+import llnl.url
+from llnl.util import lang, tty
from llnl.util.filesystem import mkdirp, rename, working_dir
-import spack
import spack.config
import spack.error
-import spack.gcs_handler
-import spack.s3_handler
-import spack.url
-import spack.util.crypto
-import spack.util.gcs as gcs_util
-import spack.util.s3 as s3_util
import spack.util.url as url_util
-from spack.util.compression import ALLOWED_ARCHIVE_TYPES
-from spack.util.executable import CommandNotFoundError, which
-from spack.util.path import convert_to_posix_path
+from .executable import CommandNotFoundError, which
+from .gcs import GCSBlob, GCSBucket, GCSHandler
+from .s3 import UrllibS3Handler, get_s3_session
class DetailedHTTPError(HTTPError):
@@ -66,8 +60,8 @@ class SpackHTTPDefaultErrorHandler(urllib.request.HTTPDefaultErrorHandler):
def _urlopen():
- s3 = spack.s3_handler.UrllibS3Handler()
- gcs = spack.gcs_handler.GCSHandler()
+ s3 = UrllibS3Handler()
+ gcs = GCSHandler()
error_handler = SpackHTTPDefaultErrorHandler()
# One opener with HTTPS ssl enabled
@@ -90,7 +84,7 @@ def _urlopen():
#: Dispatches to the correct, based on Spack configuration.
-urlopen = llnl.util.lang.Singleton(_urlopen)
+urlopen = lang.Singleton(_urlopen)
#: User-Agent used in Request objects
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
@@ -190,14 +184,14 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non
while remote_path.startswith("/"):
remote_path = remote_path[1:]
- s3 = s3_util.get_s3_session(remote_url, method="push")
+ s3 = get_s3_session(remote_url, method="push")
s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args)
if not keep_original:
elif remote_url.scheme == "gs":
- gcs = gcs_util.GCSBlob(remote_url)
+ gcs = GCSBlob(remote_url)
if not keep_original:
@@ -427,7 +421,7 @@ def remove_url(url, recursive=False):
if url.scheme == "s3":
# Try to find a mirror for potential connection information
- s3 = s3_util.get_s3_session(url, method="push")
+ s3 = get_s3_session(url, method="push")
bucket = url.netloc
if recursive:
# Because list_objects_v2 can only return up to 1000 items
@@ -460,10 +454,10 @@ def remove_url(url, recursive=False):
elif url.scheme == "gs":
if recursive:
- bucket = gcs_util.GCSBucket(url)
+ bucket = GCSBucket(url)
- blob = gcs_util.GCSBlob(url)
+ blob = GCSBlob(url)
@@ -538,14 +532,14 @@ def list_url(url, recursive=False):
if url.scheme == "s3":
- s3 = s3_util.get_s3_session(url, method="fetch")
+ s3 = get_s3_session(url, method="fetch")
if recursive:
return list(_iter_s3_prefix(s3, url))
return list(set(key.split("/", 1)[0] for key in _iter_s3_prefix(s3, url)))
elif url.scheme == "gs":
- gcs = gcs_util.GCSBucket(url)
+ gcs = GCSBucket(url)
return gcs.get_all_blobs(recursive=recursive)
@@ -636,7 +630,7 @@ def spider(root_urls, depth=0, concurrency=32):
# Skip stuff that looks like an archive
- if any(raw_link.endswith(s) for s in ALLOWED_ARCHIVE_TYPES):
+ if any(raw_link.endswith(s) for s in llnl.url.ALLOWED_ARCHIVE_TYPES):
# Skip already-visited links
@@ -696,7 +690,7 @@ def spider(root_urls, depth=0, concurrency=32):
current_depth, depth, len(spider_args)
- results =, spider_args)
+ results =, spider_args)
spider_args = []
collect = current_depth < depth
for sub_pages, sub_links, sub_spider_args in results:
@@ -713,123 +707,6 @@ def spider(root_urls, depth=0, concurrency=32):
return pages, links
-def find_versions_of_archive(
- archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
- """Scrape web pages for new versions of a tarball. This function prefers URLs in the
- following order: links found on the scraped page that match a url generated by the
- reference package, found and in the archive_urls list, found and derived from those
- in the archive_urls list, and if none are found for a version then the item in the
- archive_urls list is included for the version.
- Args:
- archive_urls (str or list or tuple): URL or sequence of URLs for
- different versions of a package. Typically these are just the
- tarballs from the package file itself. By default, this searches
- the parent directories of archives.
- list_url (str or None): URL for a listing of archives.
- Spack will scrape these pages for download links that look
- like the archive URL.
- list_depth (int): max depth to follow links on list_url pages.
- Defaults to 0.
- concurrency (int): maximum number of concurrent requests
- reference_package (spack.package_base.PackageBase or None): a spack package
- used as a reference for url detection. Uses the url_for_version
- method on the package to produce reference urls which, if found,
- are preferred.
- """
- if not isinstance(archive_urls, (list, tuple)):
- archive_urls = [archive_urls]
- # Generate a list of list_urls based on archive urls and any
- # explicitly listed list_url in the package
- list_urls = set()
- if list_url is not None:
- list_urls.add(list_url)
- for aurl in archive_urls:
- list_urls |= spack.url.find_list_urls(aurl)
- # Add '/' to the end of the URL. Some web servers require this.
- additional_list_urls = set()
- for lurl in list_urls:
- if not lurl.endswith("/"):
- additional_list_urls.add(lurl + "/")
- list_urls |= additional_list_urls
- # Grab some web pages to scrape.
- pages, links = spider(list_urls, depth=list_depth, concurrency=concurrency)
- # Scrape them for archive URLs
- regexes = []
- for aurl in archive_urls:
- # This creates a regex from the URL with a capture group for
- # the version part of the URL. The capture group is converted
- # to a generic wildcard, so we can use this to extract things
- # on a page that look like archive URLs.
- url_regex = spack.url.wildcard_version(aurl)
- # We'll be a bit more liberal and just look for the archive
- # part, not the full path.
- # this is a URL so it is a posixpath even on Windows
- url_regex = PurePosixPath(url_regex).name
- # We need to add a / to the beginning of the regex to prevent
- # Spack from picking up similarly named packages like:
- #
- #
- #
- #
- url_regex = "/" + url_regex
- # We need to add a $ anchor to the end of the regex to prevent
- # Spack from picking up signature files like:
- # .asc
- # .md5
- # .sha256
- # .sig
- # However, SourceForge downloads still need to end in '/download'.
- url_regex += r"(\/download)?"
- # PyPI adds #sha256=... to the end of the URL
- url_regex += "(#sha256=.*)?"
- url_regex += "$"
- regexes.append(url_regex)
- # Build a dict version -> URL from any links that match the wildcards.
- # Walk through archive_url links first.
- # Any conflicting versions will be overwritten by the list_url links.
- versions = {}
- matched = set()
- for url in sorted(links):
- url = convert_to_posix_path(url)
- if any(, url) for r in regexes):
- try:
- ver = spack.url.parse_version(url)
- if ver in matched:
- continue
- versions[ver] = url
- # prevent this version from getting overwritten
- if reference_package is not None:
- if url == reference_package.url_for_version(ver):
- matched.add(ver)
- else:
- extrapolated_urls = [
- spack.url.substitute_version(u, ver) for u in archive_urls
- ]
- if url in extrapolated_urls:
- matched.add(ver)
- except spack.url.UndetectableVersionError:
- continue
- for url in archive_urls:
- url = convert_to_posix_path(url)
- ver = spack.url.parse_version(url)
- if ver not in versions:
- versions[ver] = url
- return versions
def get_header(headers, header_name):
"""Looks up a dict of headers for the given header value.
diff --git a/var/spack/repos/builtin/packages/protobuf/ b/var/spack/repos/builtin/packages/protobuf/
index a1a9a8e2d1..9a4ed84058 100644
--- a/var/spack/repos/builtin/packages/protobuf/
+++ b/var/spack/repos/builtin/packages/protobuf/
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-import spack.util.web
+import spack.url
from spack.package import *
@@ -120,9 +120,7 @@ class Protobuf(CMakePackage):
return dict(
lambda u: (u, self.url_for_version(u)),
- spack.util.web.find_versions_of_archive(
- self.all_urls, self.list_url, self.list_depth
- ),
+ spack.url.find_versions_of_archive(self.all_urls, self.list_url, self.list_depth),