diff options
author | Harmen Stoppels <me@harmenstoppels.nl> | 2024-09-15 20:26:02 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-15 20:26:02 +0200 |
commit | b4e32706db0aba26b1fdf16755694444cb11e89c (patch) | |
tree | 416977e2307031c5939997be18117d6e7e3e1a86 | |
parent | bcde9a3afbff7bc542792f949d8e31f5424f2c70 (diff) | |
download | spack-b4e32706db0aba26b1fdf16755694444cb11e89c.tar.gz spack-b4e32706db0aba26b1fdf16755694444cb11e89c.tar.bz2 spack-b4e32706db0aba26b1fdf16755694444cb11e89c.tar.xz spack-b4e32706db0aba26b1fdf16755694444cb11e89c.zip |
fetch_strategy: show the effective URL on checksum validation failure (#46349)
-rw-r--r-- | lib/spack/spack/fetch_strategy.py | 46 |
1 files changed, 26 insertions, 20 deletions
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index f163f2cb34..82b0112173 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -24,6 +24,7 @@ in order to build it. They need to define the following methods: """ import copy import functools +import http.client import os import os.path import re @@ -59,19 +60,6 @@ from spack.util.executable import CommandNotFoundError, Executable, which #: List of all fetch strategies, created by FetchStrategy metaclass. all_strategies = [] -CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = ( - "The contents of {subject} look like {content_type}. Either the URL" - " you are trying to use does not exist or you have an internet gateway" - " issue. You can remove the bad archive using 'spack clean" - " <package>', then try again using the correct URL." -) - - -def warn_content_type_mismatch(subject, content_type="HTML"): - tty.warn( - CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(subject=subject, content_type=content_type) - ) - def _needs_stage(fun): """Many methods on fetch strategies require a stage to be set @@ -265,6 +253,7 @@ class URLFetchStrategy(FetchStrategy): self.extra_options: dict = kwargs.get("fetch_options", {}) self._curl: Optional[Executable] = None self.extension: Optional[str] = kwargs.get("extension", None) + self._effective_url: Optional[str] = None @property def curl(self) -> Executable: @@ -320,7 +309,13 @@ class URLFetchStrategy(FetchStrategy): # redirects properly. content_types = re.findall(r"Content-Type:[^\r\n]+", headers, flags=re.IGNORECASE) if content_types and "text/html" in content_types[-1]: - warn_content_type_mismatch(self.archive_file or "the archive") + msg = ( + f"The contents of {self.archive_file or 'the archive'} fetched from {self.url} " + " looks like HTML. This can indicate a broken URL, or an internet gateway issue." + ) + if self._effective_url != self.url: + msg += f" The URL redirected to {self._effective_url}." + tty.warn(msg) @_needs_stage def _fetch_urllib(self, url): @@ -346,6 +341,12 @@ class URLFetchStrategy(FetchStrategy): with open(save_file, "wb") as f: shutil.copyfileobj(response, f) + # Save the redirected URL for error messages. Sometimes we're redirected to an arbitrary + # mirror that is broken, leading to spurious download failures. In that case it's helpful + # for users to know which URL was actually fetched. + if isinstance(response, http.client.HTTPResponse): + self._effective_url = response.geturl() + self._check_headers(str(response.headers)) @_needs_stage @@ -465,7 +466,7 @@ class URLFetchStrategy(FetchStrategy): if not self.digest: raise NoDigestError(f"Attempt to check {self.__class__.__name__} with no digest.") - verify_checksum(self.archive_file, self.digest) + verify_checksum(self.archive_file, self.digest, self.url, self._effective_url) @_needs_stage def reset(self): @@ -1433,21 +1434,26 @@ class FetchAndVerifyExpandedFile(URLFetchStrategy): if len(files) != 1: raise ChecksumError(self, f"Expected a single file in {src_dir}.") - verify_checksum(os.path.join(src_dir, files[0]), self.expanded_sha256) + verify_checksum( + os.path.join(src_dir, files[0]), self.expanded_sha256, self.url, self._effective_url + ) -def verify_checksum(file, digest): +def verify_checksum(file: str, digest: str, url: str, effective_url: Optional[str]) -> None: checker = crypto.Checker(digest) if not checker.check(file): # On failure, provide some information about the file size and # contents, so that we can quickly see what the issue is (redirect # was not followed, empty file, text instead of binary, ...) size, contents = fs.filesummary(file) - raise ChecksumError( - f"{checker.hash_name} checksum failed for {file}", + long_msg = ( f"Expected {digest} but got {checker.sum}. " - f"File size = {size} bytes. Contents = {contents!r}", + f"File size = {size} bytes. Contents = {contents!r}. " + f"URL = {url}" ) + if effective_url and effective_url != url: + long_msg += f", redirected to = {effective_url}" + raise ChecksumError(f"{checker.hash_name} checksum failed for {file}", long_msg) def stable_target(fetcher): |