diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/test/web.py | 23 | ||||
-rw-r--r-- | lib/spack/spack/util/web.py | 119 |
2 files changed, 78 insertions, 64 deletions
diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py index 21c00e652c..491c155e51 100644 --- a/lib/spack/spack/test/web.py +++ b/lib/spack/spack/test/web.py @@ -6,6 +6,7 @@ import collections import os import posixpath import sys +from urllib.request import Request import pytest @@ -222,7 +223,10 @@ class MockPaginator(object): class MockClientError(Exception): def __init__(self): - self.response = {"Error": {"Code": "NoSuchKey"}} + self.response = { + "Error": {"Code": "NoSuchKey"}, + "ResponseMetadata": {"HTTPStatusCode": 404}, + } class MockS3Client(object): @@ -244,6 +248,12 @@ class MockS3Client(object): return True raise self.ClientError + def head_object(self, Bucket=None, Key=None): + self.ClientError = MockClientError + if Bucket == "my-bucket" and Key == "subdirectory/my-file": + return True + raise self.ClientError + def test_gather_s3_information(monkeypatch, capfd): mock_connection_data = { @@ -307,3 +317,14 @@ def test_s3_url_exists(monkeypatch, capfd): def test_s3_url_parsing(): assert spack.util.s3._parse_s3_endpoint_url("example.com") == "https://example.com" assert spack.util.s3._parse_s3_endpoint_url("http://example.com") == "http://example.com" + + +def test_head_requests_are_head_requests_after_redirection(): + # Test whether our workaround for an issue in Python where HEAD requests get + # upgraded to GET requests upon redirect works. + handler = spack.util.web.BetterHTTPRedirectHandler() + initial_request = Request("http://example.com", method="HEAD") + redirected_request = handler.redirect_request( + initial_request, {}, 302, "Moved Permanently", {}, "http://www.example.com" + ) + assert redirected_request.get_method() == "HEAD" diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 543bb43c5c..da9c3a7125 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -17,7 +17,7 @@ import sys import traceback from html.parser import HTMLParser from urllib.error import URLError -from urllib.request import Request, urlopen +from urllib.request import HTTPRedirectHandler, HTTPSHandler, Request, build_opener import llnl.util.lang import llnl.util.tty as tty @@ -35,6 +35,44 @@ from spack.util.compression import ALLOWED_ARCHIVE_TYPES from spack.util.executable import CommandNotFoundError, which from spack.util.path import convert_to_posix_path + +class BetterHTTPRedirectHandler(HTTPRedirectHandler): + """The same as HTTPRedirectHandler, except that it sticks to a HEAD + request on redirect. Somehow Python upgrades HEAD requests to GET + requests when following redirects, which makes no sense. This + handler makes Python's urllib compatible with ``curl -LI``""" + + def redirect_request(self, old_request, fp, code, msg, headers, newurl): + new_request = super().redirect_request(old_request, fp, code, msg, headers, newurl) + if old_request.get_method() == "HEAD": + new_request.method = "HEAD" + return new_request + + +def _urlopen(): + # One opener when SSL is enabled + with_ssl = build_opener( + BetterHTTPRedirectHandler, + HTTPSHandler(context=ssl.create_default_context()), + ) + + # One opener when SSL is disabled + without_ssl = build_opener( + BetterHTTPRedirectHandler, + HTTPSHandler(context=ssl._create_unverified_context()), + ) + + # And dynamically dispatch based on the config:verify_ssl. + def dispatch_open(*args, **kwargs): + opener = with_ssl if spack.config.get("config:verify_ssl", True) else without_ssl + return opener.open(*args, **kwargs) + + return dispatch_open + + +#: Dispatches to the correct OpenerDirector.open, based on Spack configuration. +urlopen = llnl.util.lang.Singleton(_urlopen) + #: User-Agent used in Request objects SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version) @@ -78,36 +116,12 @@ def uses_ssl(parsed_url): return False -__UNABLE_TO_VERIFY_SSL = (lambda pyver: ((pyver < (2, 7, 9)) or ((3,) < pyver < (3, 4, 3))))( - sys.version_info -) - - def read_from_url(url, accept_content_type=None): url = url_util.parse(url) - context = None - - verify_ssl = spack.config.get("config:verify_ssl") # Timeout in seconds for web requests timeout = spack.config.get("config:connect_timeout", 10) - # Don't even bother with a context unless the URL scheme is one that uses - # SSL certs. - if uses_ssl(url): - if verify_ssl: - if __UNABLE_TO_VERIFY_SSL: - # User wants SSL verification, but it cannot be provided. - warn_no_ssl_cert_checking() - else: - # User wants SSL verification, and it *can* be provided. - context = ssl.create_default_context() # novm - else: - # User has explicitly indicated that they do not want SSL - # verification. - if not __UNABLE_TO_VERIFY_SSL: - context = ssl._create_unverified_context() - url_scheme = url.scheme url = url_util.format(url) if sys.platform == "win32" and url_scheme == "file": @@ -123,7 +137,7 @@ def read_from_url(url, accept_content_type=None): # one round-trip. However, most servers seem to ignore the header # if you ask for a tarball with Accept: text/html. req.get_method = lambda: "HEAD" - resp = _urlopen(req, timeout=timeout, context=context) + resp = urlopen(req, timeout=timeout) content_type = get_header(resp.headers, "Content-type") @@ -131,7 +145,7 @@ def read_from_url(url, accept_content_type=None): req.get_method = lambda: "GET" try: - response = _urlopen(req, timeout=timeout, context=context) + response = urlopen(req, timeout=timeout) except URLError as err: raise SpackWebError("Download failed: {ERROR}".format(ERROR=str(err))) @@ -154,22 +168,11 @@ def read_from_url(url, accept_content_type=None): return response.geturl(), response.headers, response -def warn_no_ssl_cert_checking(): - tty.warn( - "Spack will not check SSL certificates. You need to update " - "your Python to enable certificate verification." - ) - - def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=None): if sys.platform == "win32": if remote_path[1] == ":": remote_path = "file://" + remote_path remote_url = url_util.parse(remote_path) - verify_ssl = spack.config.get("config:verify_ssl") - - if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url): - warn_no_ssl_cert_checking() remote_file_path = url_util.local_file_path(remote_url) if remote_file_path is not None: @@ -405,12 +408,12 @@ def url_exists(url, curl=None): ) # noqa: E501 try: - s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/")) + s3.head_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/")) return True except s3.ClientError as err: - if err.response["Error"]["Code"] == "NoSuchKey": + if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: return False - raise err + raise # Check if Google Storage .. urllib-based fetch if url_result.scheme == "gs": @@ -432,12 +435,14 @@ def url_exists(url, curl=None): return curl_exe.returncode == 0 # If we get here, then the only other fetch method option is urllib. - # So try to "read" from the URL and assume that *any* non-throwing - # response contains the resource represented by the URL. + # We try a HEAD request and expect a 200 return code. try: - read_from_url(url) - return True - except (SpackWebError, URLError) as e: + response = urlopen( + Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}), + timeout=spack.config.get("config:connect_timeout", 10), + ) + return response.status == 200 + except URLError as e: tty.debug("Failure reading URL: " + str(e)) return False @@ -720,36 +725,24 @@ def spider(root_urls, depth=0, concurrency=32): return pages, links -def _urlopen(req, *args, **kwargs): - """Wrapper for compatibility with old versions of Python.""" +def _open(req, *args, **kwargs): + global open url = req try: url = url.get_full_url() except AttributeError: pass - # Note: 'context' parameter was only introduced starting - # with versions 2.7.9 and 3.4.3 of Python. - if __UNABLE_TO_VERIFY_SSL: - del kwargs["context"] - - opener = urlopen if url_util.parse(url).scheme == "s3": import spack.s3_handler - opener = spack.s3_handler.open + return spack.s3_handler.open(req, *args, **kwargs) elif url_util.parse(url).scheme == "gs": import spack.gcs_handler - opener = spack.gcs_handler.gcs_open + return spack.gcs_handler.gcs_open(req, *args, **kwargs) - try: - return opener(req, *args, **kwargs) - except TypeError as err: - # If the above fails because of 'context', call without 'context'. - if "context" in kwargs and "context" in str(err): - del kwargs["context"] - return opener(req, *args, **kwargs) + return open(req, *args, **kwargs) def find_versions_of_archive( |