summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorloulawrence <72574166+loulawrence@users.noreply.github.com>2021-06-22 16:38:37 -0400
committerGitHub <noreply@github.com>2021-06-22 13:38:37 -0700
commit4da0561496c9230eab48a8c195ab49bb835cf8a7 (patch)
tree20728118bb88be28efbc25e5bec1376c45e87b68
parent477c8ce8205ec149fa897c9d83e530815c978d8b (diff)
downloadspack-4da0561496c9230eab48a8c195ab49bb835cf8a7.tar.gz
spack-4da0561496c9230eab48a8c195ab49bb835cf8a7.tar.bz2
spack-4da0561496c9230eab48a8c195ab49bb835cf8a7.tar.xz
spack-4da0561496c9230eab48a8c195ab49bb835cf8a7.zip
Add config option to use urllib to fetch if curl missing (#21398)
* Use Python module urllib to fetch in the case that curl is missing
-rw-r--r--.gitignore2
-rw-r--r--lib/spack/spack/fetch_strategy.py80
-rw-r--r--lib/spack/spack/test/cache_fetch.py30
-rw-r--r--lib/spack/spack/test/s3_fetch.py47
-rw-r--r--lib/spack/spack/test/url_fetch.py184
-rw-r--r--lib/spack/spack/util/web.py3
6 files changed, 228 insertions, 118 deletions
diff --git a/.gitignore b/.gitignore
index dc85872f42..c3d2670af4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -508,4 +508,4 @@ $RECYCLE.BIN/
*.msp
# Windows shortcuts
-*.lnk
+*.lnk
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index e98c9d8724..bfae7ed5a2 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -316,10 +316,10 @@ class URLFetchStrategy(FetchStrategy):
try:
partial_file, save_file = self._fetch_from_url(url)
- if save_file:
+ if save_file and (partial_file is not None):
os.rename(partial_file, save_file)
break
- except FetchError as e:
+ except FailedDownloadError as e:
errors.append(str(e))
for msg in errors:
@@ -330,16 +330,68 @@ class URLFetchStrategy(FetchStrategy):
def _existing_url(self, url):
tty.debug('Checking existence of {0}'.format(url))
- curl = self.curl
- # Telling curl to fetch the first byte (-r 0-0) is supposed to be
- # portable.
- curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url]
- if not spack.config.get('config:verify_ssl'):
- curl_args.append('-k')
- _ = curl(*curl_args, fail_on_error=False, output=os.devnull)
- return curl.returncode == 0
+
+ if spack.config.get('config:use_curl'):
+ curl = self.curl
+ # Telling curl to fetch the first byte (-r 0-0) is supposed to be
+ # portable.
+ curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url]
+ if not spack.config.get('config:verify_ssl'):
+ curl_args.append('-k')
+ _ = curl(*curl_args, fail_on_error=False, output=os.devnull)
+ return curl.returncode == 0
+ else:
+ # Telling urllib to check if url is accessible
+ try:
+ url, headers, response = web_util.read_from_url(url)
+ except web_util.SpackWebError:
+ msg = "Urllib fetch failed to verify url {0}".format(url)
+ raise FailedDownloadError(url, msg)
+ return (response.getcode() is None or response.getcode() == 200)
def _fetch_from_url(self, url):
+ if spack.config.get('config:use_curl'):
+ return self._fetch_curl(url)
+ else:
+ return self._fetch_urllib(url)
+
+ def _check_headers(self, headers):
+ # Check if we somehow got an HTML file rather than the archive we
+ # asked for. We only look at the last content type, to handle
+ # redirects properly.
+ content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
+ flags=re.IGNORECASE)
+ if content_types and 'text/html' in content_types[-1]:
+ warn_content_type_mismatch(self.archive_file or "the archive")
+
+ @_needs_stage
+ def _fetch_urllib(self, url):
+ save_file = None
+ if self.stage.save_filename:
+ save_file = self.stage.save_filename
+ tty.msg('Fetching {0}'.format(url))
+
+ # Run urllib but grab the mime type from the http headers
+ try:
+ url, headers, response = web_util.read_from_url(url)
+ except web_util.SpackWebError as e:
+ # clean up archive on failure.
+ if self.archive_file:
+ os.remove(self.archive_file)
+ if save_file and os.path.exists(save_file):
+ os.remove(save_file)
+ msg = 'urllib failed to fetch with error {0}'.format(e)
+ raise FailedDownloadError(url, msg)
+ _data = response.read()
+ with open(save_file, 'wb') as _open_file:
+ _open_file.write(_data)
+ headers = _data.decode('utf-8', 'ignore')
+
+ self._check_headers(headers)
+ return None, save_file
+
+ @_needs_stage
+ def _fetch_curl(self, url):
save_file = None
partial_file = None
if self.stage.save_filename:
@@ -423,13 +475,7 @@ class URLFetchStrategy(FetchStrategy):
url,
"Curl failed with error %d" % curl.returncode)
- # Check if we somehow got an HTML file rather than the archive we
- # asked for. We only look at the last content type, to handle
- # redirects properly.
- content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
- flags=re.IGNORECASE)
- if content_types and 'text/html' in content_types[-1]:
- warn_content_type_mismatch(self.archive_file or "the archive")
+ self._check_headers(headers)
return partial_file, save_file
@property # type: ignore # decorated properties unsupported in mypy
diff --git a/lib/spack/spack/test/cache_fetch.py b/lib/spack/spack/test/cache_fetch.py
index 8771769307..94ef7ec58e 100644
--- a/lib/spack/spack/test/cache_fetch.py
+++ b/lib/spack/spack/test/cache_fetch.py
@@ -8,29 +8,33 @@ import pytest
from llnl.util.filesystem import mkdirp, touch
+import spack.config
+
from spack.stage import Stage
from spack.fetch_strategy import CacheURLFetchStrategy, NoCacheError
-def test_fetch_missing_cache(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_fetch_missing_cache(tmpdir, use_curl):
"""Ensure raise a missing cache file."""
testpath = str(tmpdir)
-
- fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file')
- with Stage(fetcher, path=testpath):
- with pytest.raises(NoCacheError, match=r'No cache'):
- fetcher.fetch()
+ with spack.config.override('config:use_curl', use_curl):
+ fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file')
+ with Stage(fetcher, path=testpath):
+ with pytest.raises(NoCacheError, match=r'No cache'):
+ fetcher.fetch()
-def test_fetch(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_fetch(tmpdir, use_curl):
"""Ensure a fetch after expanding is effectively a no-op."""
testpath = str(tmpdir)
cache = os.path.join(testpath, 'cache.tar.gz')
touch(cache)
url = 'file:///{0}'.format(cache)
-
- fetcher = CacheURLFetchStrategy(url=url)
- with Stage(fetcher, path=testpath) as stage:
- source_path = stage.source_path
- mkdirp(source_path)
- fetcher.fetch()
+ with spack.config.override('config:use_curl', use_curl):
+ fetcher = CacheURLFetchStrategy(url=url)
+ with Stage(fetcher, path=testpath) as stage:
+ source_path = stage.source_path
+ mkdirp(source_path)
+ fetcher.fetch()
diff --git a/lib/spack/spack/test/s3_fetch.py b/lib/spack/spack/test/s3_fetch.py
index e339a1c173..6f546ccc36 100644
--- a/lib/spack/spack/test/s3_fetch.py
+++ b/lib/spack/spack/test/s3_fetch.py
@@ -6,41 +6,48 @@
import os
import pytest
+import spack.config as spack_config
import spack.fetch_strategy as spack_fs
import spack.stage as spack_stage
-def test_s3fetchstrategy_sans_url():
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_s3fetchstrategy_sans_url(use_curl):
"""Ensure constructor with no URL fails."""
- with pytest.raises(ValueError):
- spack_fs.S3FetchStrategy(None)
+ with spack_config.override('config:use_curl', use_curl):
+ with pytest.raises(ValueError):
+ spack_fs.S3FetchStrategy(None)
-def test_s3fetchstrategy_bad_url(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_s3fetchstrategy_bad_url(tmpdir, use_curl):
"""Ensure fetch with bad URL fails as expected."""
testpath = str(tmpdir)
- fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
- assert fetcher is not None
+ with spack_config.override('config:use_curl', use_curl):
+ fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
+ assert fetcher is not None
- with spack_stage.Stage(fetcher, path=testpath) as stage:
- assert stage is not None
- assert fetcher.archive_file is None
- with pytest.raises(spack_fs.FetchError):
- fetcher.fetch()
+ with spack_stage.Stage(fetcher, path=testpath) as stage:
+ assert stage is not None
+ assert fetcher.archive_file is None
+ with pytest.raises(spack_fs.FetchError):
+ fetcher.fetch()
-def test_s3fetchstrategy_downloaded(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_s3fetchstrategy_downloaded(tmpdir, use_curl):
"""Ensure fetch with archive file already downloaded is a noop."""
testpath = str(tmpdir)
archive = os.path.join(testpath, 's3.tar.gz')
- class Archived_S3FS(spack_fs.S3FetchStrategy):
- @property
- def archive_file(self):
- return archive
+ with spack_config.override('config:use_curl', use_curl):
+ class Archived_S3FS(spack_fs.S3FetchStrategy):
+ @property
+ def archive_file(self):
+ return archive
- url = 's3:///{0}'.format(archive)
- fetcher = Archived_S3FS(url=url)
- with spack_stage.Stage(fetcher, path=testpath):
- fetcher.fetch()
+ url = 's3:///{0}'.format(archive)
+ fetcher = Archived_S3FS(url=url)
+ with spack_stage.Stage(fetcher, path=testpath):
+ fetcher.fetch()
diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py
index 9804e4eb72..0c8ee3cf17 100644
--- a/lib/spack/spack/test/url_fetch.py
+++ b/lib/spack/spack/test/url_fetch.py
@@ -19,6 +19,7 @@ from spack.stage import Stage
from spack.version import ver
import spack.util.crypto as crypto
import spack.util.executable
+from spack.util.executable import which
@pytest.fixture(params=list(crypto.hashes.keys()))
@@ -47,19 +48,36 @@ def pkg_factory():
return factory
-def test_urlfetchstrategy_sans_url():
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_urlfetchstrategy_sans_url(use_curl):
"""Ensure constructor with no URL fails."""
- with pytest.raises(ValueError):
- with fs.URLFetchStrategy(None):
- pass
+ with spack.config.override('config:use_curl', use_curl):
+ with pytest.raises(ValueError):
+ with fs.URLFetchStrategy(None):
+ pass
-def test_urlfetchstrategy_bad_url(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_urlfetchstrategy_bad_url(tmpdir, use_curl):
"""Ensure fetch with bad URL fails as expected."""
testpath = str(tmpdir)
+ with spack.config.override('config:use_curl', use_curl):
+ with pytest.raises(fs.FailedDownloadError):
+ fetcher = fs.URLFetchStrategy(url='file:///does-not-exist')
+ assert fetcher is not None
+
+ with Stage(fetcher, path=testpath) as stage:
+ assert stage is not None
+ assert fetcher.archive_file is None
+ fetcher.fetch()
- with pytest.raises(fs.FailedDownloadError):
- fetcher = fs.URLFetchStrategy(url='file:///does-not-exist')
+
+def test_fetch_options(tmpdir, mock_archive):
+ testpath = str(tmpdir)
+ with spack.config.override('config:use_curl', True):
+ fetcher = fs.URLFetchStrategy(url=mock_archive.url,
+ fetch_options={'cookie': 'True',
+ 'timeout': 10})
assert fetcher is not None
with Stage(fetcher, path=testpath) as stage:
@@ -68,7 +86,32 @@ def test_urlfetchstrategy_bad_url(tmpdir):
fetcher.fetch()
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_archive_file_errors(tmpdir, mock_archive, use_curl):
+ """Ensure FetchStrategy commands may only be used as intended"""
+ testpath = str(tmpdir)
+ with spack.config.override('config:use_curl', use_curl):
+ fetcher = fs.URLFetchStrategy(url=mock_archive.url)
+ assert fetcher is not None
+ with pytest.raises(fs.FailedDownloadError):
+ with Stage(fetcher, path=testpath) as stage:
+ assert stage is not None
+ assert fetcher.archive_file is None
+ with pytest.raises(fs.NoArchiveFileError):
+ fetcher.archive(testpath)
+ with pytest.raises(fs.NoArchiveFileError):
+ fetcher.expand()
+ with pytest.raises(fs.NoArchiveFileError):
+ fetcher.reset()
+ stage.fetch()
+ with pytest.raises(fs.NoDigestError):
+ fetcher.check()
+ assert fetcher.archive_file is not None
+ fetcher._fetch_from_url('file:///does-not-exist')
+
+
@pytest.mark.parametrize('secure', [True, False])
+@pytest.mark.parametrize('use_curl', [True, False])
@pytest.mark.parametrize('mock_archive',
[('.tar.gz', 'z'), ('.tgz', 'z'),
('.tar.bz2', 'j'), ('.tbz2', 'j'),
@@ -77,6 +120,7 @@ def test_urlfetchstrategy_bad_url(tmpdir):
def test_fetch(
mock_archive,
secure,
+ use_curl,
checksum_type,
config,
mutable_mock_repo
@@ -102,8 +146,8 @@ def test_fetch(
# Enter the stage directory and check some properties
with pkg.stage:
with spack.config.override('config:verify_ssl', secure):
- pkg.do_stage()
-
+ with spack.config.override('config:use_curl', use_curl):
+ pkg.do_stage()
with working_dir(pkg.stage.source_path):
assert os.path.exists('configure')
assert is_exe('configure')
@@ -123,37 +167,41 @@ def test_fetch(
('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'),
('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'),
])
-def test_from_list_url(mock_packages, config, spec, url, digest):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_from_list_url(mock_packages, config, spec, url, digest, use_curl):
"""
Test URLs in the url-list-test package, which means they should
have checksums in the package.
"""
- specification = Spec(spec).concretized()
- pkg = spack.repo.get(specification)
- fetch_strategy = fs.from_list_url(pkg)
- assert isinstance(fetch_strategy, fs.URLFetchStrategy)
- assert os.path.basename(fetch_strategy.url) == url
- assert fetch_strategy.digest == digest
- assert fetch_strategy.extra_options == {}
- pkg.fetch_options = {'timeout': 60}
- fetch_strategy = fs.from_list_url(pkg)
- assert fetch_strategy.extra_options == {'timeout': 60}
-
-
-def test_from_list_url_unspecified(mock_packages, config):
+ with spack.config.override('config:use_curl', use_curl):
+ specification = Spec(spec).concretized()
+ pkg = spack.repo.get(specification)
+ fetch_strategy = fs.from_list_url(pkg)
+ assert isinstance(fetch_strategy, fs.URLFetchStrategy)
+ assert os.path.basename(fetch_strategy.url) == url
+ assert fetch_strategy.digest == digest
+ assert fetch_strategy.extra_options == {}
+ pkg.fetch_options = {'timeout': 60}
+ fetch_strategy = fs.from_list_url(pkg)
+ assert fetch_strategy.extra_options == {'timeout': 60}
+
+
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_from_list_url_unspecified(mock_packages, config, use_curl):
"""Test non-specific URLs from the url-list-test package."""
- pkg = spack.repo.get('url-list-test')
+ with spack.config.override('config:use_curl', use_curl):
+ pkg = spack.repo.get('url-list-test')
- spec = Spec('url-list-test @2.0.0').concretized()
- pkg = spack.repo.get(spec)
- fetch_strategy = fs.from_list_url(pkg)
- assert isinstance(fetch_strategy, fs.URLFetchStrategy)
- assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz'
- assert fetch_strategy.digest is None
- assert fetch_strategy.extra_options == {}
- pkg.fetch_options = {'timeout': 60}
- fetch_strategy = fs.from_list_url(pkg)
- assert fetch_strategy.extra_options == {'timeout': 60}
+ spec = Spec('url-list-test @2.0.0').concretized()
+ pkg = spack.repo.get(spec)
+ fetch_strategy = fs.from_list_url(pkg)
+ assert isinstance(fetch_strategy, fs.URLFetchStrategy)
+ assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz'
+ assert fetch_strategy.digest is None
+ assert fetch_strategy.extra_options == {}
+ pkg.fetch_options = {'timeout': 60}
+ fetch_strategy = fs.from_list_url(pkg)
+ assert fetch_strategy.extra_options == {'timeout': 60}
def test_nosource_from_list_url(mock_packages, config):
@@ -176,6 +224,8 @@ def test_unknown_hash(checksum_type):
crypto.Checker('a')
+@pytest.mark.skipif(which('curl') is None,
+ reason='Urllib does not have built-in status bar')
def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd):
"""Ensure fetch with status bar option succeeds."""
def is_true():
@@ -185,26 +235,27 @@ def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd):
monkeypatch.setattr(sys.stdout, 'isatty', is_true)
monkeypatch.setattr(tty, 'msg_enabled', is_true)
+ with spack.config.override('config:use_curl', True):
+ fetcher = fs.URLFetchStrategy(mock_archive.url)
+ with Stage(fetcher, path=testpath) as stage:
+ assert fetcher.archive_file is None
+ stage.fetch()
- fetcher = fs.URLFetchStrategy(mock_archive.url)
- with Stage(fetcher, path=testpath) as stage:
- assert fetcher.archive_file is None
- stage.fetch()
-
- status = capfd.readouterr()[1]
- assert '##### 100' in status
+ status = capfd.readouterr()[1]
+ assert '##### 100' in status
-def test_url_extra_fetch(tmpdir, mock_archive):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_url_extra_fetch(tmpdir, mock_archive, use_curl):
"""Ensure a fetch after downloading is effectively a no-op."""
- testpath = str(tmpdir)
-
- fetcher = fs.URLFetchStrategy(mock_archive.url)
- with Stage(fetcher, path=testpath) as stage:
- assert fetcher.archive_file is None
- stage.fetch()
- assert fetcher.archive_file is not None
- fetcher.fetch()
+ with spack.config.override('config:use_curl', use_curl):
+ testpath = str(tmpdir)
+ fetcher = fs.URLFetchStrategy(mock_archive.url)
+ with Stage(fetcher, path=testpath) as stage:
+ assert fetcher.archive_file is None
+ stage.fetch()
+ assert fetcher.archive_file is not None
+ fetcher.fetch()
@pytest.mark.parametrize('url,urls,version,expected', [
@@ -215,17 +266,19 @@ def test_url_extra_fetch(tmpdir, mock_archive):
['https://ftpmirror.gnu.org/autoconf/autoconf-2.62.tar.gz',
'https://ftp.gnu.org/gnu/autoconf/autoconf-2.62.tar.gz'])
])
-def test_candidate_urls(pkg_factory, url, urls, version, expected):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_candidate_urls(pkg_factory, url, urls, version, expected, use_curl):
"""Tests that candidate urls include mirrors and that they go through
pattern matching and substitution for versions.
"""
- pkg = pkg_factory(url, urls)
- f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
- assert f.candidate_urls == expected
- assert f.extra_options == {}
- pkg = pkg_factory(url, urls, fetch_options={'timeout': 60})
- f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
- assert f.extra_options == {'timeout': 60}
+ with spack.config.override('config:use_curl', use_curl):
+ pkg = pkg_factory(url, urls)
+ f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
+ assert f.candidate_urls == expected
+ assert f.extra_options == {}
+ pkg = pkg_factory(url, urls, fetch_options={'timeout': 60})
+ f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
+ assert f.extra_options == {'timeout': 60}
@pytest.mark.regression('19673')
@@ -244,11 +297,10 @@ def test_missing_curl(tmpdir, monkeypatch):
testpath = str(tmpdir)
url = 'http://github.com/spack/spack'
- fetcher = fs.URLFetchStrategy(url=url)
- assert fetcher is not None
-
- with pytest.raises(TypeError, match='object is not callable'):
- with Stage(fetcher, path=testpath) as stage:
- out = stage.fetch()
-
- assert err_fmt.format('curl') in out
+ with spack.config.override('config:use_curl', True):
+ fetcher = fs.URLFetchStrategy(url=url)
+ assert fetcher is not None
+ with pytest.raises(TypeError, match='object is not callable'):
+ with Stage(fetcher, path=testpath) as stage:
+ out = stage.fetch()
+ assert err_fmt.format('curl') in out
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
index 90c7c2b8cc..d4febac3e3 100644
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@@ -105,7 +105,8 @@ def read_from_url(url, accept_content_type=None):
else:
# User has explicitly indicated that they do not want SSL
# verification.
- context = ssl._create_unverified_context()
+ if not __UNABLE_TO_VERIFY_SSL:
+ context = ssl._create_unverified_context()
req = Request(url_util.format(url))
content_type = None