diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/test/web.py | 23 | ||||
-rw-r--r-- | lib/spack/spack/util/web.py | 119 |
2 files changed, 64 insertions, 78 deletions
diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py index 491c155e51..21c00e652c 100644 --- a/lib/spack/spack/test/web.py +++ b/lib/spack/spack/test/web.py @@ -6,7 +6,6 @@ import collections import os import posixpath import sys -from urllib.request import Request import pytest @@ -223,10 +222,7 @@ class MockPaginator(object): class MockClientError(Exception): def __init__(self): - self.response = { - "Error": {"Code": "NoSuchKey"}, - "ResponseMetadata": {"HTTPStatusCode": 404}, - } + self.response = {"Error": {"Code": "NoSuchKey"}} class MockS3Client(object): @@ -248,12 +244,6 @@ class MockS3Client(object): return True raise self.ClientError - def head_object(self, Bucket=None, Key=None): - self.ClientError = MockClientError - if Bucket == "my-bucket" and Key == "subdirectory/my-file": - return True - raise self.ClientError - def test_gather_s3_information(monkeypatch, capfd): mock_connection_data = { @@ -317,14 +307,3 @@ def test_s3_url_exists(monkeypatch, capfd): def test_s3_url_parsing(): assert spack.util.s3._parse_s3_endpoint_url("example.com") == "https://example.com" assert spack.util.s3._parse_s3_endpoint_url("http://example.com") == "http://example.com" - - -def test_head_requests_are_head_requests_after_redirection(): - # Test whether our workaround for an issue in Python where HEAD requests get - # upgraded to GET requests upon redirect works. - handler = spack.util.web.BetterHTTPRedirectHandler() - initial_request = Request("http://example.com", method="HEAD") - redirected_request = handler.redirect_request( - initial_request, {}, 302, "Moved Permanently", {}, "http://www.example.com" - ) - assert redirected_request.get_method() == "HEAD" diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index da9c3a7125..543bb43c5c 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -17,7 +17,7 @@ import sys import traceback from html.parser import HTMLParser from urllib.error import URLError -from urllib.request import HTTPRedirectHandler, HTTPSHandler, Request, build_opener +from urllib.request import Request, urlopen import llnl.util.lang import llnl.util.tty as tty @@ -35,44 +35,6 @@ from spack.util.compression import ALLOWED_ARCHIVE_TYPES from spack.util.executable import CommandNotFoundError, which from spack.util.path import convert_to_posix_path - -class BetterHTTPRedirectHandler(HTTPRedirectHandler): - """The same as HTTPRedirectHandler, except that it sticks to a HEAD - request on redirect. Somehow Python upgrades HEAD requests to GET - requests when following redirects, which makes no sense. This - handler makes Python's urllib compatible with ``curl -LI``""" - - def redirect_request(self, old_request, fp, code, msg, headers, newurl): - new_request = super().redirect_request(old_request, fp, code, msg, headers, newurl) - if old_request.get_method() == "HEAD": - new_request.method = "HEAD" - return new_request - - -def _urlopen(): - # One opener when SSL is enabled - with_ssl = build_opener( - BetterHTTPRedirectHandler, - HTTPSHandler(context=ssl.create_default_context()), - ) - - # One opener when SSL is disabled - without_ssl = build_opener( - BetterHTTPRedirectHandler, - HTTPSHandler(context=ssl._create_unverified_context()), - ) - - # And dynamically dispatch based on the config:verify_ssl. - def dispatch_open(*args, **kwargs): - opener = with_ssl if spack.config.get("config:verify_ssl", True) else without_ssl - return opener.open(*args, **kwargs) - - return dispatch_open - - -#: Dispatches to the correct OpenerDirector.open, based on Spack configuration. -urlopen = llnl.util.lang.Singleton(_urlopen) - #: User-Agent used in Request objects SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version) @@ -116,12 +78,36 @@ def uses_ssl(parsed_url): return False +__UNABLE_TO_VERIFY_SSL = (lambda pyver: ((pyver < (2, 7, 9)) or ((3,) < pyver < (3, 4, 3))))( + sys.version_info +) + + def read_from_url(url, accept_content_type=None): url = url_util.parse(url) + context = None + + verify_ssl = spack.config.get("config:verify_ssl") # Timeout in seconds for web requests timeout = spack.config.get("config:connect_timeout", 10) + # Don't even bother with a context unless the URL scheme is one that uses + # SSL certs. + if uses_ssl(url): + if verify_ssl: + if __UNABLE_TO_VERIFY_SSL: + # User wants SSL verification, but it cannot be provided. + warn_no_ssl_cert_checking() + else: + # User wants SSL verification, and it *can* be provided. + context = ssl.create_default_context() # novm + else: + # User has explicitly indicated that they do not want SSL + # verification. + if not __UNABLE_TO_VERIFY_SSL: + context = ssl._create_unverified_context() + url_scheme = url.scheme url = url_util.format(url) if sys.platform == "win32" and url_scheme == "file": @@ -137,7 +123,7 @@ def read_from_url(url, accept_content_type=None): # one round-trip. However, most servers seem to ignore the header # if you ask for a tarball with Accept: text/html. req.get_method = lambda: "HEAD" - resp = urlopen(req, timeout=timeout) + resp = _urlopen(req, timeout=timeout, context=context) content_type = get_header(resp.headers, "Content-type") @@ -145,7 +131,7 @@ def read_from_url(url, accept_content_type=None): req.get_method = lambda: "GET" try: - response = urlopen(req, timeout=timeout) + response = _urlopen(req, timeout=timeout, context=context) except URLError as err: raise SpackWebError("Download failed: {ERROR}".format(ERROR=str(err))) @@ -168,11 +154,22 @@ def read_from_url(url, accept_content_type=None): return response.geturl(), response.headers, response +def warn_no_ssl_cert_checking(): + tty.warn( + "Spack will not check SSL certificates. You need to update " + "your Python to enable certificate verification." + ) + + def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=None): if sys.platform == "win32": if remote_path[1] == ":": remote_path = "file://" + remote_path remote_url = url_util.parse(remote_path) + verify_ssl = spack.config.get("config:verify_ssl") + + if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url): + warn_no_ssl_cert_checking() remote_file_path = url_util.local_file_path(remote_url) if remote_file_path is not None: @@ -408,12 +405,12 @@ def url_exists(url, curl=None): ) # noqa: E501 try: - s3.head_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/")) + s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/")) return True except s3.ClientError as err: - if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: + if err.response["Error"]["Code"] == "NoSuchKey": return False - raise + raise err # Check if Google Storage .. urllib-based fetch if url_result.scheme == "gs": @@ -435,14 +432,12 @@ def url_exists(url, curl=None): return curl_exe.returncode == 0 # If we get here, then the only other fetch method option is urllib. - # We try a HEAD request and expect a 200 return code. + # So try to "read" from the URL and assume that *any* non-throwing + # response contains the resource represented by the URL. try: - response = urlopen( - Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}), - timeout=spack.config.get("config:connect_timeout", 10), - ) - return response.status == 200 - except URLError as e: + read_from_url(url) + return True + except (SpackWebError, URLError) as e: tty.debug("Failure reading URL: " + str(e)) return False @@ -725,24 +720,36 @@ def spider(root_urls, depth=0, concurrency=32): return pages, links -def _open(req, *args, **kwargs): - global open +def _urlopen(req, *args, **kwargs): + """Wrapper for compatibility with old versions of Python.""" url = req try: url = url.get_full_url() except AttributeError: pass + # Note: 'context' parameter was only introduced starting + # with versions 2.7.9 and 3.4.3 of Python. + if __UNABLE_TO_VERIFY_SSL: + del kwargs["context"] + + opener = urlopen if url_util.parse(url).scheme == "s3": import spack.s3_handler - return spack.s3_handler.open(req, *args, **kwargs) + opener = spack.s3_handler.open elif url_util.parse(url).scheme == "gs": import spack.gcs_handler - return spack.gcs_handler.gcs_open(req, *args, **kwargs) + opener = spack.gcs_handler.gcs_open - return open(req, *args, **kwargs) + try: + return opener(req, *args, **kwargs) + except TypeError as err: + # If the above fails because of 'context', call without 'context'. + if "context" in kwargs and "context" in str(err): + del kwargs["context"] + return opener(req, *args, **kwargs) def find_versions_of_archive( |