From 4da0561496c9230eab48a8c195ab49bb835cf8a7 Mon Sep 17 00:00:00 2001 From: loulawrence <72574166+loulawrence@users.noreply.github.com> Date: Tue, 22 Jun 2021 16:38:37 -0400 Subject: Add config option to use urllib to fetch if curl missing (#21398) * Use Python module urllib to fetch in the case that curl is missing --- .gitignore | 2 +- lib/spack/spack/fetch_strategy.py | 80 ++++++++++++---- lib/spack/spack/test/cache_fetch.py | 30 +++--- lib/spack/spack/test/s3_fetch.py | 47 +++++---- lib/spack/spack/test/url_fetch.py | 184 +++++++++++++++++++++++------------- lib/spack/spack/util/web.py | 3 +- 6 files changed, 228 insertions(+), 118 deletions(-) diff --git a/.gitignore b/.gitignore index dc85872f42..c3d2670af4 100644 --- a/.gitignore +++ b/.gitignore @@ -508,4 +508,4 @@ $RECYCLE.BIN/ *.msp # Windows shortcuts -*.lnk +*.lnk diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index e98c9d8724..bfae7ed5a2 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -316,10 +316,10 @@ class URLFetchStrategy(FetchStrategy): try: partial_file, save_file = self._fetch_from_url(url) - if save_file: + if save_file and (partial_file is not None): os.rename(partial_file, save_file) break - except FetchError as e: + except FailedDownloadError as e: errors.append(str(e)) for msg in errors: @@ -330,16 +330,68 @@ class URLFetchStrategy(FetchStrategy): def _existing_url(self, url): tty.debug('Checking existence of {0}'.format(url)) - curl = self.curl - # Telling curl to fetch the first byte (-r 0-0) is supposed to be - # portable. - curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url] - if not spack.config.get('config:verify_ssl'): - curl_args.append('-k') - _ = curl(*curl_args, fail_on_error=False, output=os.devnull) - return curl.returncode == 0 + + if spack.config.get('config:use_curl'): + curl = self.curl + # Telling curl to fetch the first byte (-r 0-0) is supposed to be + # portable. + curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url] + if not spack.config.get('config:verify_ssl'): + curl_args.append('-k') + _ = curl(*curl_args, fail_on_error=False, output=os.devnull) + return curl.returncode == 0 + else: + # Telling urllib to check if url is accessible + try: + url, headers, response = web_util.read_from_url(url) + except web_util.SpackWebError: + msg = "Urllib fetch failed to verify url {0}".format(url) + raise FailedDownloadError(url, msg) + return (response.getcode() is None or response.getcode() == 200) def _fetch_from_url(self, url): + if spack.config.get('config:use_curl'): + return self._fetch_curl(url) + else: + return self._fetch_urllib(url) + + def _check_headers(self, headers): + # Check if we somehow got an HTML file rather than the archive we + # asked for. We only look at the last content type, to handle + # redirects properly. + content_types = re.findall(r'Content-Type:[^\r\n]+', headers, + flags=re.IGNORECASE) + if content_types and 'text/html' in content_types[-1]: + warn_content_type_mismatch(self.archive_file or "the archive") + + @_needs_stage + def _fetch_urllib(self, url): + save_file = None + if self.stage.save_filename: + save_file = self.stage.save_filename + tty.msg('Fetching {0}'.format(url)) + + # Run urllib but grab the mime type from the http headers + try: + url, headers, response = web_util.read_from_url(url) + except web_util.SpackWebError as e: + # clean up archive on failure. + if self.archive_file: + os.remove(self.archive_file) + if save_file and os.path.exists(save_file): + os.remove(save_file) + msg = 'urllib failed to fetch with error {0}'.format(e) + raise FailedDownloadError(url, msg) + _data = response.read() + with open(save_file, 'wb') as _open_file: + _open_file.write(_data) + headers = _data.decode('utf-8', 'ignore') + + self._check_headers(headers) + return None, save_file + + @_needs_stage + def _fetch_curl(self, url): save_file = None partial_file = None if self.stage.save_filename: @@ -423,13 +475,7 @@ class URLFetchStrategy(FetchStrategy): url, "Curl failed with error %d" % curl.returncode) - # Check if we somehow got an HTML file rather than the archive we - # asked for. We only look at the last content type, to handle - # redirects properly. - content_types = re.findall(r'Content-Type:[^\r\n]+', headers, - flags=re.IGNORECASE) - if content_types and 'text/html' in content_types[-1]: - warn_content_type_mismatch(self.archive_file or "the archive") + self._check_headers(headers) return partial_file, save_file @property # type: ignore # decorated properties unsupported in mypy diff --git a/lib/spack/spack/test/cache_fetch.py b/lib/spack/spack/test/cache_fetch.py index 8771769307..94ef7ec58e 100644 --- a/lib/spack/spack/test/cache_fetch.py +++ b/lib/spack/spack/test/cache_fetch.py @@ -8,29 +8,33 @@ import pytest from llnl.util.filesystem import mkdirp, touch +import spack.config + from spack.stage import Stage from spack.fetch_strategy import CacheURLFetchStrategy, NoCacheError -def test_fetch_missing_cache(tmpdir): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_fetch_missing_cache(tmpdir, use_curl): """Ensure raise a missing cache file.""" testpath = str(tmpdir) - - fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file') - with Stage(fetcher, path=testpath): - with pytest.raises(NoCacheError, match=r'No cache'): - fetcher.fetch() + with spack.config.override('config:use_curl', use_curl): + fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file') + with Stage(fetcher, path=testpath): + with pytest.raises(NoCacheError, match=r'No cache'): + fetcher.fetch() -def test_fetch(tmpdir): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_fetch(tmpdir, use_curl): """Ensure a fetch after expanding is effectively a no-op.""" testpath = str(tmpdir) cache = os.path.join(testpath, 'cache.tar.gz') touch(cache) url = 'file:///{0}'.format(cache) - - fetcher = CacheURLFetchStrategy(url=url) - with Stage(fetcher, path=testpath) as stage: - source_path = stage.source_path - mkdirp(source_path) - fetcher.fetch() + with spack.config.override('config:use_curl', use_curl): + fetcher = CacheURLFetchStrategy(url=url) + with Stage(fetcher, path=testpath) as stage: + source_path = stage.source_path + mkdirp(source_path) + fetcher.fetch() diff --git a/lib/spack/spack/test/s3_fetch.py b/lib/spack/spack/test/s3_fetch.py index e339a1c173..6f546ccc36 100644 --- a/lib/spack/spack/test/s3_fetch.py +++ b/lib/spack/spack/test/s3_fetch.py @@ -6,41 +6,48 @@ import os import pytest +import spack.config as spack_config import spack.fetch_strategy as spack_fs import spack.stage as spack_stage -def test_s3fetchstrategy_sans_url(): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_s3fetchstrategy_sans_url(use_curl): """Ensure constructor with no URL fails.""" - with pytest.raises(ValueError): - spack_fs.S3FetchStrategy(None) + with spack_config.override('config:use_curl', use_curl): + with pytest.raises(ValueError): + spack_fs.S3FetchStrategy(None) -def test_s3fetchstrategy_bad_url(tmpdir): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_s3fetchstrategy_bad_url(tmpdir, use_curl): """Ensure fetch with bad URL fails as expected.""" testpath = str(tmpdir) - fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist') - assert fetcher is not None + with spack_config.override('config:use_curl', use_curl): + fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist') + assert fetcher is not None - with spack_stage.Stage(fetcher, path=testpath) as stage: - assert stage is not None - assert fetcher.archive_file is None - with pytest.raises(spack_fs.FetchError): - fetcher.fetch() + with spack_stage.Stage(fetcher, path=testpath) as stage: + assert stage is not None + assert fetcher.archive_file is None + with pytest.raises(spack_fs.FetchError): + fetcher.fetch() -def test_s3fetchstrategy_downloaded(tmpdir): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_s3fetchstrategy_downloaded(tmpdir, use_curl): """Ensure fetch with archive file already downloaded is a noop.""" testpath = str(tmpdir) archive = os.path.join(testpath, 's3.tar.gz') - class Archived_S3FS(spack_fs.S3FetchStrategy): - @property - def archive_file(self): - return archive + with spack_config.override('config:use_curl', use_curl): + class Archived_S3FS(spack_fs.S3FetchStrategy): + @property + def archive_file(self): + return archive - url = 's3:///{0}'.format(archive) - fetcher = Archived_S3FS(url=url) - with spack_stage.Stage(fetcher, path=testpath): - fetcher.fetch() + url = 's3:///{0}'.format(archive) + fetcher = Archived_S3FS(url=url) + with spack_stage.Stage(fetcher, path=testpath): + fetcher.fetch() diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py index 9804e4eb72..0c8ee3cf17 100644 --- a/lib/spack/spack/test/url_fetch.py +++ b/lib/spack/spack/test/url_fetch.py @@ -19,6 +19,7 @@ from spack.stage import Stage from spack.version import ver import spack.util.crypto as crypto import spack.util.executable +from spack.util.executable import which @pytest.fixture(params=list(crypto.hashes.keys())) @@ -47,19 +48,36 @@ def pkg_factory(): return factory -def test_urlfetchstrategy_sans_url(): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_urlfetchstrategy_sans_url(use_curl): """Ensure constructor with no URL fails.""" - with pytest.raises(ValueError): - with fs.URLFetchStrategy(None): - pass + with spack.config.override('config:use_curl', use_curl): + with pytest.raises(ValueError): + with fs.URLFetchStrategy(None): + pass -def test_urlfetchstrategy_bad_url(tmpdir): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_urlfetchstrategy_bad_url(tmpdir, use_curl): """Ensure fetch with bad URL fails as expected.""" testpath = str(tmpdir) + with spack.config.override('config:use_curl', use_curl): + with pytest.raises(fs.FailedDownloadError): + fetcher = fs.URLFetchStrategy(url='file:///does-not-exist') + assert fetcher is not None + + with Stage(fetcher, path=testpath) as stage: + assert stage is not None + assert fetcher.archive_file is None + fetcher.fetch() - with pytest.raises(fs.FailedDownloadError): - fetcher = fs.URLFetchStrategy(url='file:///does-not-exist') + +def test_fetch_options(tmpdir, mock_archive): + testpath = str(tmpdir) + with spack.config.override('config:use_curl', True): + fetcher = fs.URLFetchStrategy(url=mock_archive.url, + fetch_options={'cookie': 'True', + 'timeout': 10}) assert fetcher is not None with Stage(fetcher, path=testpath) as stage: @@ -68,7 +86,32 @@ def test_urlfetchstrategy_bad_url(tmpdir): fetcher.fetch() +@pytest.mark.parametrize('use_curl', [True, False]) +def test_archive_file_errors(tmpdir, mock_archive, use_curl): + """Ensure FetchStrategy commands may only be used as intended""" + testpath = str(tmpdir) + with spack.config.override('config:use_curl', use_curl): + fetcher = fs.URLFetchStrategy(url=mock_archive.url) + assert fetcher is not None + with pytest.raises(fs.FailedDownloadError): + with Stage(fetcher, path=testpath) as stage: + assert stage is not None + assert fetcher.archive_file is None + with pytest.raises(fs.NoArchiveFileError): + fetcher.archive(testpath) + with pytest.raises(fs.NoArchiveFileError): + fetcher.expand() + with pytest.raises(fs.NoArchiveFileError): + fetcher.reset() + stage.fetch() + with pytest.raises(fs.NoDigestError): + fetcher.check() + assert fetcher.archive_file is not None + fetcher._fetch_from_url('file:///does-not-exist') + + @pytest.mark.parametrize('secure', [True, False]) +@pytest.mark.parametrize('use_curl', [True, False]) @pytest.mark.parametrize('mock_archive', [('.tar.gz', 'z'), ('.tgz', 'z'), ('.tar.bz2', 'j'), ('.tbz2', 'j'), @@ -77,6 +120,7 @@ def test_urlfetchstrategy_bad_url(tmpdir): def test_fetch( mock_archive, secure, + use_curl, checksum_type, config, mutable_mock_repo @@ -102,8 +146,8 @@ def test_fetch( # Enter the stage directory and check some properties with pkg.stage: with spack.config.override('config:verify_ssl', secure): - pkg.do_stage() - + with spack.config.override('config:use_curl', use_curl): + pkg.do_stage() with working_dir(pkg.stage.source_path): assert os.path.exists('configure') assert is_exe('configure') @@ -123,37 +167,41 @@ def test_fetch( ('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'), ('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'), ]) -def test_from_list_url(mock_packages, config, spec, url, digest): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_from_list_url(mock_packages, config, spec, url, digest, use_curl): """ Test URLs in the url-list-test package, which means they should have checksums in the package. """ - specification = Spec(spec).concretized() - pkg = spack.repo.get(specification) - fetch_strategy = fs.from_list_url(pkg) - assert isinstance(fetch_strategy, fs.URLFetchStrategy) - assert os.path.basename(fetch_strategy.url) == url - assert fetch_strategy.digest == digest - assert fetch_strategy.extra_options == {} - pkg.fetch_options = {'timeout': 60} - fetch_strategy = fs.from_list_url(pkg) - assert fetch_strategy.extra_options == {'timeout': 60} - - -def test_from_list_url_unspecified(mock_packages, config): + with spack.config.override('config:use_curl', use_curl): + specification = Spec(spec).concretized() + pkg = spack.repo.get(specification) + fetch_strategy = fs.from_list_url(pkg) + assert isinstance(fetch_strategy, fs.URLFetchStrategy) + assert os.path.basename(fetch_strategy.url) == url + assert fetch_strategy.digest == digest + assert fetch_strategy.extra_options == {} + pkg.fetch_options = {'timeout': 60} + fetch_strategy = fs.from_list_url(pkg) + assert fetch_strategy.extra_options == {'timeout': 60} + + +@pytest.mark.parametrize('use_curl', [True, False]) +def test_from_list_url_unspecified(mock_packages, config, use_curl): """Test non-specific URLs from the url-list-test package.""" - pkg = spack.repo.get('url-list-test') + with spack.config.override('config:use_curl', use_curl): + pkg = spack.repo.get('url-list-test') - spec = Spec('url-list-test @2.0.0').concretized() - pkg = spack.repo.get(spec) - fetch_strategy = fs.from_list_url(pkg) - assert isinstance(fetch_strategy, fs.URLFetchStrategy) - assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz' - assert fetch_strategy.digest is None - assert fetch_strategy.extra_options == {} - pkg.fetch_options = {'timeout': 60} - fetch_strategy = fs.from_list_url(pkg) - assert fetch_strategy.extra_options == {'timeout': 60} + spec = Spec('url-list-test @2.0.0').concretized() + pkg = spack.repo.get(spec) + fetch_strategy = fs.from_list_url(pkg) + assert isinstance(fetch_strategy, fs.URLFetchStrategy) + assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz' + assert fetch_strategy.digest is None + assert fetch_strategy.extra_options == {} + pkg.fetch_options = {'timeout': 60} + fetch_strategy = fs.from_list_url(pkg) + assert fetch_strategy.extra_options == {'timeout': 60} def test_nosource_from_list_url(mock_packages, config): @@ -176,6 +224,8 @@ def test_unknown_hash(checksum_type): crypto.Checker('a') +@pytest.mark.skipif(which('curl') is None, + reason='Urllib does not have built-in status bar') def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd): """Ensure fetch with status bar option succeeds.""" def is_true(): @@ -185,26 +235,27 @@ def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd): monkeypatch.setattr(sys.stdout, 'isatty', is_true) monkeypatch.setattr(tty, 'msg_enabled', is_true) + with spack.config.override('config:use_curl', True): + fetcher = fs.URLFetchStrategy(mock_archive.url) + with Stage(fetcher, path=testpath) as stage: + assert fetcher.archive_file is None + stage.fetch() - fetcher = fs.URLFetchStrategy(mock_archive.url) - with Stage(fetcher, path=testpath) as stage: - assert fetcher.archive_file is None - stage.fetch() - - status = capfd.readouterr()[1] - assert '##### 100' in status + status = capfd.readouterr()[1] + assert '##### 100' in status -def test_url_extra_fetch(tmpdir, mock_archive): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_url_extra_fetch(tmpdir, mock_archive, use_curl): """Ensure a fetch after downloading is effectively a no-op.""" - testpath = str(tmpdir) - - fetcher = fs.URLFetchStrategy(mock_archive.url) - with Stage(fetcher, path=testpath) as stage: - assert fetcher.archive_file is None - stage.fetch() - assert fetcher.archive_file is not None - fetcher.fetch() + with spack.config.override('config:use_curl', use_curl): + testpath = str(tmpdir) + fetcher = fs.URLFetchStrategy(mock_archive.url) + with Stage(fetcher, path=testpath) as stage: + assert fetcher.archive_file is None + stage.fetch() + assert fetcher.archive_file is not None + fetcher.fetch() @pytest.mark.parametrize('url,urls,version,expected', [ @@ -215,17 +266,19 @@ def test_url_extra_fetch(tmpdir, mock_archive): ['https://ftpmirror.gnu.org/autoconf/autoconf-2.62.tar.gz', 'https://ftp.gnu.org/gnu/autoconf/autoconf-2.62.tar.gz']) ]) -def test_candidate_urls(pkg_factory, url, urls, version, expected): +@pytest.mark.parametrize('use_curl', [True, False]) +def test_candidate_urls(pkg_factory, url, urls, version, expected, use_curl): """Tests that candidate urls include mirrors and that they go through pattern matching and substitution for versions. """ - pkg = pkg_factory(url, urls) - f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version) - assert f.candidate_urls == expected - assert f.extra_options == {} - pkg = pkg_factory(url, urls, fetch_options={'timeout': 60}) - f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version) - assert f.extra_options == {'timeout': 60} + with spack.config.override('config:use_curl', use_curl): + pkg = pkg_factory(url, urls) + f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version) + assert f.candidate_urls == expected + assert f.extra_options == {} + pkg = pkg_factory(url, urls, fetch_options={'timeout': 60}) + f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version) + assert f.extra_options == {'timeout': 60} @pytest.mark.regression('19673') @@ -244,11 +297,10 @@ def test_missing_curl(tmpdir, monkeypatch): testpath = str(tmpdir) url = 'http://github.com/spack/spack' - fetcher = fs.URLFetchStrategy(url=url) - assert fetcher is not None - - with pytest.raises(TypeError, match='object is not callable'): - with Stage(fetcher, path=testpath) as stage: - out = stage.fetch() - - assert err_fmt.format('curl') in out + with spack.config.override('config:use_curl', True): + fetcher = fs.URLFetchStrategy(url=url) + assert fetcher is not None + with pytest.raises(TypeError, match='object is not callable'): + with Stage(fetcher, path=testpath) as stage: + out = stage.fetch() + assert err_fmt.format('curl') in out diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 90c7c2b8cc..d4febac3e3 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -105,7 +105,8 @@ def read_from_url(url, accept_content_type=None): else: # User has explicitly indicated that they do not want SSL # verification. - context = ssl._create_unverified_context() + if not __UNABLE_TO_VERIFY_SSL: + context = ssl._create_unverified_context() req = Request(url_util.format(url)) content_type = None -- cgit v1.2.3-70-g09d2