summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/gcs_handler.py11
-rw-r--r--lib/spack/spack/s3_handler.py38
-rw-r--r--lib/spack/spack/test/web.py13
-rw-r--r--lib/spack/spack/util/web.py189
4 files changed, 92 insertions, 159 deletions
diff --git a/lib/spack/spack/gcs_handler.py b/lib/spack/spack/gcs_handler.py
index 441eea6f80..4ee9f08896 100644
--- a/lib/spack/spack/gcs_handler.py
+++ b/lib/spack/spack/gcs_handler.py
@@ -4,8 +4,8 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import urllib.parse
import urllib.response
-
-import spack.util.web as web_util
+from urllib.error import URLError
+from urllib.request import BaseHandler
def gcs_open(req, *args, **kwargs):
@@ -16,8 +16,13 @@ def gcs_open(req, *args, **kwargs):
gcsblob = gcs_util.GCSBlob(url)
if not gcsblob.exists():
- raise web_util.SpackWebError("GCS blob {0} does not exist".format(gcsblob.blob_path))
+ raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
stream = gcsblob.get_blob_byte_stream()
headers = gcsblob.get_blob_headers()
return urllib.response.addinfourl(stream, headers, url)
+
+
+class GCSHandler(BaseHandler):
+ def gs_open(self, req):
+ return gcs_open(req)
diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py
index a3e0aa991b..77a8a2f7cc 100644
--- a/lib/spack/spack/s3_handler.py
+++ b/lib/spack/spack/s3_handler.py
@@ -7,7 +7,7 @@ import urllib.error
import urllib.parse
import urllib.request
import urllib.response
-from io import BufferedReader, IOBase
+from io import BufferedReader, BytesIO, IOBase
import spack.util.s3 as s3_util
@@ -42,7 +42,7 @@ class WrapStream(BufferedReader):
return getattr(self.raw, key)
-def _s3_open(url):
+def _s3_open(url, method="GET"):
parsed = urllib.parse.urlparse(url)
s3 = s3_util.get_s3_session(url, method="fetch")
@@ -52,27 +52,29 @@ def _s3_open(url):
if key.startswith("/"):
key = key[1:]
- obj = s3.get_object(Bucket=bucket, Key=key)
+ if method not in ("GET", "HEAD"):
+ raise urllib.error.URLError(
+ "Only GET and HEAD verbs are currently supported for the s3:// scheme"
+ )
+
+ try:
+ if method == "GET":
+ obj = s3.get_object(Bucket=bucket, Key=key)
+ # NOTE(opadron): Apply workaround here (see above)
+ stream = WrapStream(obj["Body"])
+ elif method == "HEAD":
+ obj = s3.head_object(Bucket=bucket, Key=key)
+ stream = BytesIO()
+ except s3.ClientError as e:
+ raise urllib.error.URLError(e) from e
- # NOTE(opadron): Apply workaround here (see above)
- stream = WrapStream(obj["Body"])
headers = obj["ResponseMetadata"]["HTTPHeaders"]
return url, headers, stream
-class UrllibS3Handler(urllib.request.HTTPSHandler):
+class UrllibS3Handler(urllib.request.BaseHandler):
def s3_open(self, req):
orig_url = req.get_full_url()
- from botocore.exceptions import ClientError # type: ignore[import]
-
- try:
- url, headers, stream = _s3_open(orig_url)
- return urllib.response.addinfourl(stream, headers, url)
- except ClientError as err:
- raise urllib.error.URLError(err) from err
-
-
-S3OpenerDirector = urllib.request.build_opener(UrllibS3Handler())
-
-open = S3OpenerDirector.open
+ url, headers, stream = _s3_open(orig_url, method=req.get_method())
+ return urllib.response.addinfourl(stream, headers, url)
diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py
index 476ea01019..166a577c89 100644
--- a/lib/spack/spack/test/web.py
+++ b/lib/spack/spack/test/web.py
@@ -224,7 +224,10 @@ class MockPaginator(object):
class MockClientError(Exception):
def __init__(self):
- self.response = {"Error": {"Code": "NoSuchKey"}}
+ self.response = {
+ "Error": {"Code": "NoSuchKey"},
+ "ResponseMetadata": {"HTTPStatusCode": 404},
+ }
class MockS3Client(object):
@@ -243,7 +246,13 @@ class MockS3Client(object):
def get_object(self, Bucket=None, Key=None):
self.ClientError = MockClientError
if Bucket == "my-bucket" and Key == "subdirectory/my-file":
- return True
+ return {"ResponseMetadata": {"HTTPHeaders": {}}}
+ raise self.ClientError
+
+ def head_object(self, Bucket=None, Key=None):
+ self.ClientError = MockClientError
+ if Bucket == "my-bucket" and Key == "subdirectory/my-file":
+ return {"ResponseMetadata": {"HTTPHeaders": {}}}
raise self.ClientError
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
index 7c8964b3c9..9398a12dd8 100644
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@@ -18,7 +18,7 @@ import traceback
import urllib.parse
from html.parser import HTMLParser
from urllib.error import URLError
-from urllib.request import Request, urlopen
+from urllib.request import HTTPSHandler, Request, build_opener
import llnl.util.lang
import llnl.util.tty as tty
@@ -27,6 +27,8 @@ from llnl.util.filesystem import mkdirp, rename, working_dir
import spack
import spack.config
import spack.error
+import spack.gcs_handler
+import spack.s3_handler
import spack.url
import spack.util.crypto
import spack.util.gcs as gcs_util
@@ -36,6 +38,28 @@ from spack.util.compression import ALLOWED_ARCHIVE_TYPES
from spack.util.executable import CommandNotFoundError, which
from spack.util.path import convert_to_posix_path
+
+def _urlopen():
+ s3 = spack.s3_handler.UrllibS3Handler()
+ gcs = spack.gcs_handler.GCSHandler()
+
+ # One opener with HTTPS ssl enabled
+ with_ssl = build_opener(s3, gcs, HTTPSHandler(context=ssl.create_default_context()))
+
+ # One opener with HTTPS ssl disabled
+ without_ssl = build_opener(s3, gcs, HTTPSHandler(context=ssl._create_unverified_context()))
+
+ # And dynamically dispatch based on the config:verify_ssl.
+ def dispatch_open(*args, **kwargs):
+ opener = with_ssl if spack.config.get("config:verify_ssl", True) else without_ssl
+ return opener.open(*args, **kwargs)
+
+ return dispatch_open
+
+
+#: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
+urlopen = llnl.util.lang.Singleton(_urlopen)
+
#: User-Agent used in Request objects
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
@@ -60,86 +84,33 @@ class LinkParser(HTMLParser):
self.links.append(val)
-def uses_ssl(parsed_url):
- if parsed_url.scheme == "https":
- return True
-
- if parsed_url.scheme == "s3":
- endpoint_url = os.environ.get("S3_ENDPOINT_URL")
- if not endpoint_url:
- return True
-
- if urllib.parse.urlparse(endpoint_url).scheme == "https":
- return True
-
- elif parsed_url.scheme == "gs":
- tty.debug("(uses_ssl) GCS Blob is https")
- return True
-
- return False
-
-
def read_from_url(url, accept_content_type=None):
if isinstance(url, str):
url = urllib.parse.urlparse(url)
- context = None
# Timeout in seconds for web requests
timeout = spack.config.get("config:connect_timeout", 10)
-
- # Don't even bother with a context unless the URL scheme is one that uses
- # SSL certs.
- if uses_ssl(url):
- if spack.config.get("config:verify_ssl"):
- # User wants SSL verification, and it *can* be provided.
- context = ssl.create_default_context()
- else:
- # User has explicitly indicated that they do not want SSL
- # verification.
- context = ssl._create_unverified_context()
-
- url_scheme = url.scheme
- url = url_util.format(url)
- if sys.platform == "win32" and url_scheme == "file":
- url = convert_to_posix_path(url)
- req = Request(url, headers={"User-Agent": SPACK_USER_AGENT})
-
- content_type = None
- is_web_url = url_scheme in ("http", "https")
- if accept_content_type and is_web_url:
- # Make a HEAD request first to check the content type. This lets
- # us ignore tarballs and gigantic files.
- # It would be nice to do this with the HTTP Accept header to avoid
- # one round-trip. However, most servers seem to ignore the header
- # if you ask for a tarball with Accept: text/html.
- req.get_method = lambda: "HEAD"
- resp = _urlopen(req, timeout=timeout, context=context)
-
- content_type = get_header(resp.headers, "Content-type")
-
- # Do the real GET request when we know it's just HTML.
- req.get_method = lambda: "GET"
+ request = Request(url.geturl(), headers={"User-Agent": SPACK_USER_AGENT})
try:
- response = _urlopen(req, timeout=timeout, context=context)
+ response = urlopen(request, timeout=timeout)
except URLError as err:
- raise SpackWebError("Download failed: {ERROR}".format(ERROR=str(err)))
-
- if accept_content_type and not is_web_url:
- content_type = get_header(response.headers, "Content-type")
+ raise SpackWebError("Download failed: {}".format(str(err)))
- reject_content_type = accept_content_type and (
- content_type is None or not content_type.startswith(accept_content_type)
- )
-
- if reject_content_type:
- tty.debug(
- "ignoring page {0}{1}{2}".format(
- url, " with content type " if content_type is not None else "", content_type or ""
- )
- )
-
- return None, None, None
+ if accept_content_type:
+ try:
+ content_type = get_header(response.headers, "Content-type")
+ reject_content_type = not content_type.startswith(accept_content_type)
+ except KeyError:
+ content_type = None
+ reject_content_type = True
+
+ if reject_content_type:
+ msg = "ignoring page {}".format(url.geturl())
+ if content_type:
+ msg += " with content type {}".format(content_type)
+ tty.debug(msg)
+ return None, None, None
return response.geturl(), response.headers, response
@@ -349,12 +320,6 @@ def url_exists(url, curl=None):
Simple Storage Service (`s3`) URLs; otherwise, the configured fetch
method defined by `config:url_fetch_method` is used.
- If the method is `curl`, it also uses the following configuration option:
-
- * config:verify_ssl (str): Perform SSL verification
-
- Otherwise, `urllib` will be used.
-
Arguments:
url (str): URL whose existence is being checked
curl (spack.util.executable.Executable or None): (optional) curl
@@ -365,31 +330,11 @@ def url_exists(url, curl=None):
tty.debug("Checking existence of {0}".format(url))
url_result = urllib.parse.urlparse(url)
- # Check if a local file
- local_path = url_util.local_file_path(url_result)
- if local_path:
- return os.path.exists(local_path)
-
- # Check if Amazon Simple Storage Service (S3) .. urllib-based fetch
- if url_result.scheme == "s3":
- # Check for URL-specific connection information
- s3 = s3_util.get_s3_session(url_result, method="fetch")
-
- try:
- s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/"))
- return True
- except s3.ClientError as err:
- if err.response["Error"]["Code"] == "NoSuchKey":
- return False
- raise err
-
- # Check if Google Storage .. urllib-based fetch
- if url_result.scheme == "gs":
- gcs = gcs_util.GCSBlob(url_result)
- return gcs.exists()
-
- # Otherwise, use the configured fetch method
- if spack.config.get("config:url_fetch_method") == "curl":
+ # Use curl if configured to do so
+ use_curl = spack.config.get(
+ "config:url_fetch_method", "urllib"
+ ) == "curl" and url_result.scheme not in ("gs", "s3")
+ if use_curl:
curl_exe = _curl(curl)
if not curl_exe:
return False
@@ -402,13 +347,14 @@ def url_exists(url, curl=None):
_ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull)
return curl_exe.returncode == 0
- # If we get here, then the only other fetch method option is urllib.
- # So try to "read" from the URL and assume that *any* non-throwing
- # response contains the resource represented by the URL.
+ # Otherwise use urllib.
try:
- read_from_url(url)
+ urlopen(
+ Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}),
+ timeout=spack.config.get("config:connect_timeout", 10),
+ )
return True
- except (SpackWebError, URLError) as e:
+ except URLError as e:
tty.debug("Failure reading URL: " + str(e))
return False
@@ -691,35 +637,6 @@ def spider(root_urls, depth=0, concurrency=32):
return pages, links
-def _urlopen(req, *args, **kwargs):
- """Wrapper for compatibility with old versions of Python."""
- url = req
- try:
- url = url.get_full_url()
- except AttributeError:
- pass
-
- del kwargs["context"]
-
- opener = urlopen
- if urllib.parse.urlparse(url).scheme == "s3":
- import spack.s3_handler
-
- opener = spack.s3_handler.open
- elif urllib.parse.urlparse(url).scheme == "gs":
- import spack.gcs_handler
-
- opener = spack.gcs_handler.gcs_open
-
- try:
- return opener(req, *args, **kwargs)
- except TypeError as err:
- # If the above fails because of 'context', call without 'context'.
- if "context" in kwargs and "context" in str(err):
- del kwargs["context"]
- return opener(req, *args, **kwargs)
-
-
def find_versions_of_archive(
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
):