summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <me@harmenstoppels.nl>2024-09-15 20:26:02 +0200
committerGitHub <noreply@github.com>2024-09-15 20:26:02 +0200
commitb4e32706db0aba26b1fdf16755694444cb11e89c (patch)
tree416977e2307031c5939997be18117d6e7e3e1a86
parentbcde9a3afbff7bc542792f949d8e31f5424f2c70 (diff)
downloadspack-b4e32706db0aba26b1fdf16755694444cb11e89c.tar.gz
spack-b4e32706db0aba26b1fdf16755694444cb11e89c.tar.bz2
spack-b4e32706db0aba26b1fdf16755694444cb11e89c.tar.xz
spack-b4e32706db0aba26b1fdf16755694444cb11e89c.zip
fetch_strategy: show the effective URL on checksum validation failure (#46349)
-rw-r--r--lib/spack/spack/fetch_strategy.py46
1 files changed, 26 insertions, 20 deletions
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index f163f2cb34..82b0112173 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -24,6 +24,7 @@ in order to build it. They need to define the following methods:
"""
import copy
import functools
+import http.client
import os
import os.path
import re
@@ -59,19 +60,6 @@ from spack.util.executable import CommandNotFoundError, Executable, which
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
-CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
- "The contents of {subject} look like {content_type}. Either the URL"
- " you are trying to use does not exist or you have an internet gateway"
- " issue. You can remove the bad archive using 'spack clean"
- " <package>', then try again using the correct URL."
-)
-
-
-def warn_content_type_mismatch(subject, content_type="HTML"):
- tty.warn(
- CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(subject=subject, content_type=content_type)
- )
-
def _needs_stage(fun):
"""Many methods on fetch strategies require a stage to be set
@@ -265,6 +253,7 @@ class URLFetchStrategy(FetchStrategy):
self.extra_options: dict = kwargs.get("fetch_options", {})
self._curl: Optional[Executable] = None
self.extension: Optional[str] = kwargs.get("extension", None)
+ self._effective_url: Optional[str] = None
@property
def curl(self) -> Executable:
@@ -320,7 +309,13 @@ class URLFetchStrategy(FetchStrategy):
# redirects properly.
content_types = re.findall(r"Content-Type:[^\r\n]+", headers, flags=re.IGNORECASE)
if content_types and "text/html" in content_types[-1]:
- warn_content_type_mismatch(self.archive_file or "the archive")
+ msg = (
+ f"The contents of {self.archive_file or 'the archive'} fetched from {self.url} "
+ " looks like HTML. This can indicate a broken URL, or an internet gateway issue."
+ )
+ if self._effective_url != self.url:
+ msg += f" The URL redirected to {self._effective_url}."
+ tty.warn(msg)
@_needs_stage
def _fetch_urllib(self, url):
@@ -346,6 +341,12 @@ class URLFetchStrategy(FetchStrategy):
with open(save_file, "wb") as f:
shutil.copyfileobj(response, f)
+ # Save the redirected URL for error messages. Sometimes we're redirected to an arbitrary
+ # mirror that is broken, leading to spurious download failures. In that case it's helpful
+ # for users to know which URL was actually fetched.
+ if isinstance(response, http.client.HTTPResponse):
+ self._effective_url = response.geturl()
+
self._check_headers(str(response.headers))
@_needs_stage
@@ -465,7 +466,7 @@ class URLFetchStrategy(FetchStrategy):
if not self.digest:
raise NoDigestError(f"Attempt to check {self.__class__.__name__} with no digest.")
- verify_checksum(self.archive_file, self.digest)
+ verify_checksum(self.archive_file, self.digest, self.url, self._effective_url)
@_needs_stage
def reset(self):
@@ -1433,21 +1434,26 @@ class FetchAndVerifyExpandedFile(URLFetchStrategy):
if len(files) != 1:
raise ChecksumError(self, f"Expected a single file in {src_dir}.")
- verify_checksum(os.path.join(src_dir, files[0]), self.expanded_sha256)
+ verify_checksum(
+ os.path.join(src_dir, files[0]), self.expanded_sha256, self.url, self._effective_url
+ )
-def verify_checksum(file, digest):
+def verify_checksum(file: str, digest: str, url: str, effective_url: Optional[str]) -> None:
checker = crypto.Checker(digest)
if not checker.check(file):
# On failure, provide some information about the file size and
# contents, so that we can quickly see what the issue is (redirect
# was not followed, empty file, text instead of binary, ...)
size, contents = fs.filesummary(file)
- raise ChecksumError(
- f"{checker.hash_name} checksum failed for {file}",
+ long_msg = (
f"Expected {digest} but got {checker.sum}. "
- f"File size = {size} bytes. Contents = {contents!r}",
+ f"File size = {size} bytes. Contents = {contents!r}. "
+ f"URL = {url}"
)
+ if effective_url and effective_url != url:
+ long_msg += f", redirected to = {effective_url}"
+ raise ChecksumError(f"{checker.hash_name} checksum failed for {file}", long_msg)
def stable_target(fetcher):