From 4da0561496c9230eab48a8c195ab49bb835cf8a7 Mon Sep 17 00:00:00 2001
From: loulawrence <72574166+loulawrence@users.noreply.github.com>
Date: Tue, 22 Jun 2021 16:38:37 -0400
Subject: Add config option to use urllib to fetch if curl missing (#21398)

* Use Python module urllib to fetch in the case that curl is missing
---
 .gitignore                          |   2 +-
 lib/spack/spack/fetch_strategy.py   |  80 ++++++++++++----
 lib/spack/spack/test/cache_fetch.py |  30 +++---
 lib/spack/spack/test/s3_fetch.py    |  47 +++++----
 lib/spack/spack/test/url_fetch.py   | 184 +++++++++++++++++++++++-------------
 lib/spack/spack/util/web.py         |   3 +-
 6 files changed, 228 insertions(+), 118 deletions(-)

diff --git a/.gitignore b/.gitignore
index dc85872f42..c3d2670af4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -508,4 +508,4 @@ $RECYCLE.BIN/
 *.msp
 
 # Windows shortcuts
-*.lnk
+*.lnk
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
index e98c9d8724..bfae7ed5a2 100644
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@@ -316,10 +316,10 @@ class URLFetchStrategy(FetchStrategy):
 
             try:
                 partial_file, save_file = self._fetch_from_url(url)
-                if save_file:
+                if save_file and (partial_file is not None):
                     os.rename(partial_file, save_file)
                 break
-            except FetchError as e:
+            except FailedDownloadError as e:
                 errors.append(str(e))
 
         for msg in errors:
@@ -330,16 +330,68 @@ class URLFetchStrategy(FetchStrategy):
 
     def _existing_url(self, url):
         tty.debug('Checking existence of {0}'.format(url))
-        curl = self.curl
-        # Telling curl to fetch the first byte (-r 0-0) is supposed to be
-        # portable.
-        curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url]
-        if not spack.config.get('config:verify_ssl'):
-            curl_args.append('-k')
-        _ = curl(*curl_args, fail_on_error=False, output=os.devnull)
-        return curl.returncode == 0
+
+        if spack.config.get('config:use_curl'):
+            curl = self.curl
+            # Telling curl to fetch the first byte (-r 0-0) is supposed to be
+            # portable.
+            curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url]
+            if not spack.config.get('config:verify_ssl'):
+                curl_args.append('-k')
+            _ = curl(*curl_args, fail_on_error=False, output=os.devnull)
+            return curl.returncode == 0
+        else:
+            # Telling urllib to check if url is accessible
+            try:
+                url, headers, response = web_util.read_from_url(url)
+            except web_util.SpackWebError:
+                msg = "Urllib fetch failed to verify url {0}".format(url)
+                raise FailedDownloadError(url, msg)
+            return (response.getcode() is None or response.getcode() == 200)
 
     def _fetch_from_url(self, url):
+        if spack.config.get('config:use_curl'):
+            return self._fetch_curl(url)
+        else:
+            return self._fetch_urllib(url)
+
+    def _check_headers(self, headers):
+        # Check if we somehow got an HTML file rather than the archive we
+        # asked for.  We only look at the last content type, to handle
+        # redirects properly.
+        content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
+                                   flags=re.IGNORECASE)
+        if content_types and 'text/html' in content_types[-1]:
+            warn_content_type_mismatch(self.archive_file or "the archive")
+
+    @_needs_stage
+    def _fetch_urllib(self, url):
+        save_file = None
+        if self.stage.save_filename:
+            save_file = self.stage.save_filename
+        tty.msg('Fetching {0}'.format(url))
+
+        # Run urllib but grab the mime type from the http headers
+        try:
+            url, headers, response = web_util.read_from_url(url)
+        except web_util.SpackWebError as e:
+            # clean up archive on failure.
+            if self.archive_file:
+                os.remove(self.archive_file)
+            if save_file and os.path.exists(save_file):
+                os.remove(save_file)
+            msg = 'urllib failed to fetch with error {0}'.format(e)
+            raise FailedDownloadError(url, msg)
+        _data = response.read()
+        with open(save_file, 'wb') as _open_file:
+            _open_file.write(_data)
+        headers = _data.decode('utf-8', 'ignore')
+
+        self._check_headers(headers)
+        return None, save_file
+
+    @_needs_stage
+    def _fetch_curl(self, url):
         save_file = None
         partial_file = None
         if self.stage.save_filename:
@@ -423,13 +475,7 @@ class URLFetchStrategy(FetchStrategy):
                     url,
                     "Curl failed with error %d" % curl.returncode)
 
-        # Check if we somehow got an HTML file rather than the archive we
-        # asked for.  We only look at the last content type, to handle
-        # redirects properly.
-        content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
-                                   flags=re.IGNORECASE)
-        if content_types and 'text/html' in content_types[-1]:
-            warn_content_type_mismatch(self.archive_file or "the archive")
+        self._check_headers(headers)
         return partial_file, save_file
 
     @property  # type: ignore # decorated properties unsupported in mypy
diff --git a/lib/spack/spack/test/cache_fetch.py b/lib/spack/spack/test/cache_fetch.py
index 8771769307..94ef7ec58e 100644
--- a/lib/spack/spack/test/cache_fetch.py
+++ b/lib/spack/spack/test/cache_fetch.py
@@ -8,29 +8,33 @@ import pytest
 
 from llnl.util.filesystem import mkdirp, touch
 
+import spack.config
+
 from spack.stage import Stage
 from spack.fetch_strategy import CacheURLFetchStrategy, NoCacheError
 
 
-def test_fetch_missing_cache(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_fetch_missing_cache(tmpdir, use_curl):
     """Ensure raise a missing cache file."""
     testpath = str(tmpdir)
-
-    fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file')
-    with Stage(fetcher, path=testpath):
-        with pytest.raises(NoCacheError, match=r'No cache'):
-            fetcher.fetch()
+    with spack.config.override('config:use_curl', use_curl):
+        fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file')
+        with Stage(fetcher, path=testpath):
+            with pytest.raises(NoCacheError, match=r'No cache'):
+                fetcher.fetch()
 
 
-def test_fetch(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_fetch(tmpdir, use_curl):
     """Ensure a fetch after expanding is effectively a no-op."""
     testpath = str(tmpdir)
     cache = os.path.join(testpath, 'cache.tar.gz')
     touch(cache)
     url = 'file:///{0}'.format(cache)
-
-    fetcher = CacheURLFetchStrategy(url=url)
-    with Stage(fetcher, path=testpath) as stage:
-        source_path = stage.source_path
-        mkdirp(source_path)
-        fetcher.fetch()
+    with spack.config.override('config:use_curl', use_curl):
+        fetcher = CacheURLFetchStrategy(url=url)
+        with Stage(fetcher, path=testpath) as stage:
+            source_path = stage.source_path
+            mkdirp(source_path)
+            fetcher.fetch()
diff --git a/lib/spack/spack/test/s3_fetch.py b/lib/spack/spack/test/s3_fetch.py
index e339a1c173..6f546ccc36 100644
--- a/lib/spack/spack/test/s3_fetch.py
+++ b/lib/spack/spack/test/s3_fetch.py
@@ -6,41 +6,48 @@
 import os
 import pytest
 
+import spack.config as spack_config
 import spack.fetch_strategy as spack_fs
 import spack.stage as spack_stage
 
 
-def test_s3fetchstrategy_sans_url():
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_s3fetchstrategy_sans_url(use_curl):
     """Ensure constructor with no URL fails."""
-    with pytest.raises(ValueError):
-        spack_fs.S3FetchStrategy(None)
+    with spack_config.override('config:use_curl', use_curl):
+        with pytest.raises(ValueError):
+            spack_fs.S3FetchStrategy(None)
 
 
-def test_s3fetchstrategy_bad_url(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_s3fetchstrategy_bad_url(tmpdir, use_curl):
     """Ensure fetch with bad URL fails as expected."""
     testpath = str(tmpdir)
 
-    fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
-    assert fetcher is not None
+    with spack_config.override('config:use_curl', use_curl):
+        fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
+        assert fetcher is not None
 
-    with spack_stage.Stage(fetcher, path=testpath) as stage:
-        assert stage is not None
-        assert fetcher.archive_file is None
-        with pytest.raises(spack_fs.FetchError):
-            fetcher.fetch()
+        with spack_stage.Stage(fetcher, path=testpath) as stage:
+            assert stage is not None
+            assert fetcher.archive_file is None
+            with pytest.raises(spack_fs.FetchError):
+                fetcher.fetch()
 
 
-def test_s3fetchstrategy_downloaded(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_s3fetchstrategy_downloaded(tmpdir, use_curl):
     """Ensure fetch with archive file already downloaded is a noop."""
     testpath = str(tmpdir)
     archive = os.path.join(testpath, 's3.tar.gz')
 
-    class Archived_S3FS(spack_fs.S3FetchStrategy):
-        @property
-        def archive_file(self):
-            return archive
+    with spack_config.override('config:use_curl', use_curl):
+        class Archived_S3FS(spack_fs.S3FetchStrategy):
+            @property
+            def archive_file(self):
+                return archive
 
-    url = 's3:///{0}'.format(archive)
-    fetcher = Archived_S3FS(url=url)
-    with spack_stage.Stage(fetcher, path=testpath):
-        fetcher.fetch()
+        url = 's3:///{0}'.format(archive)
+        fetcher = Archived_S3FS(url=url)
+        with spack_stage.Stage(fetcher, path=testpath):
+            fetcher.fetch()
diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py
index 9804e4eb72..0c8ee3cf17 100644
--- a/lib/spack/spack/test/url_fetch.py
+++ b/lib/spack/spack/test/url_fetch.py
@@ -19,6 +19,7 @@ from spack.stage import Stage
 from spack.version import ver
 import spack.util.crypto as crypto
 import spack.util.executable
+from spack.util.executable import which
 
 
 @pytest.fixture(params=list(crypto.hashes.keys()))
@@ -47,19 +48,36 @@ def pkg_factory():
     return factory
 
 
-def test_urlfetchstrategy_sans_url():
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_urlfetchstrategy_sans_url(use_curl):
     """Ensure constructor with no URL fails."""
-    with pytest.raises(ValueError):
-        with fs.URLFetchStrategy(None):
-            pass
+    with spack.config.override('config:use_curl', use_curl):
+        with pytest.raises(ValueError):
+            with fs.URLFetchStrategy(None):
+                pass
 
 
-def test_urlfetchstrategy_bad_url(tmpdir):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_urlfetchstrategy_bad_url(tmpdir, use_curl):
     """Ensure fetch with bad URL fails as expected."""
     testpath = str(tmpdir)
+    with spack.config.override('config:use_curl', use_curl):
+        with pytest.raises(fs.FailedDownloadError):
+            fetcher = fs.URLFetchStrategy(url='file:///does-not-exist')
+            assert fetcher is not None
+
+            with Stage(fetcher, path=testpath) as stage:
+                assert stage is not None
+                assert fetcher.archive_file is None
+                fetcher.fetch()
 
-    with pytest.raises(fs.FailedDownloadError):
-        fetcher = fs.URLFetchStrategy(url='file:///does-not-exist')
+
+def test_fetch_options(tmpdir, mock_archive):
+    testpath = str(tmpdir)
+    with spack.config.override('config:use_curl', True):
+        fetcher = fs.URLFetchStrategy(url=mock_archive.url,
+                                      fetch_options={'cookie': 'True',
+                                                     'timeout': 10})
         assert fetcher is not None
 
         with Stage(fetcher, path=testpath) as stage:
@@ -68,7 +86,32 @@ def test_urlfetchstrategy_bad_url(tmpdir):
             fetcher.fetch()
 
 
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_archive_file_errors(tmpdir, mock_archive, use_curl):
+    """Ensure FetchStrategy commands may only be used as intended"""
+    testpath = str(tmpdir)
+    with spack.config.override('config:use_curl', use_curl):
+        fetcher = fs.URLFetchStrategy(url=mock_archive.url)
+        assert fetcher is not None
+        with pytest.raises(fs.FailedDownloadError):
+            with Stage(fetcher, path=testpath) as stage:
+                assert stage is not None
+                assert fetcher.archive_file is None
+                with pytest.raises(fs.NoArchiveFileError):
+                    fetcher.archive(testpath)
+                with pytest.raises(fs.NoArchiveFileError):
+                    fetcher.expand()
+                with pytest.raises(fs.NoArchiveFileError):
+                    fetcher.reset()
+                stage.fetch()
+                with pytest.raises(fs.NoDigestError):
+                    fetcher.check()
+                assert fetcher.archive_file is not None
+                fetcher._fetch_from_url('file:///does-not-exist')
+
+
 @pytest.mark.parametrize('secure', [True, False])
+@pytest.mark.parametrize('use_curl', [True, False])
 @pytest.mark.parametrize('mock_archive',
                          [('.tar.gz', 'z'), ('.tgz', 'z'),
                           ('.tar.bz2', 'j'), ('.tbz2', 'j'),
@@ -77,6 +120,7 @@ def test_urlfetchstrategy_bad_url(tmpdir):
 def test_fetch(
         mock_archive,
         secure,
+        use_curl,
         checksum_type,
         config,
         mutable_mock_repo
@@ -102,8 +146,8 @@ def test_fetch(
     # Enter the stage directory and check some properties
     with pkg.stage:
         with spack.config.override('config:verify_ssl', secure):
-            pkg.do_stage()
-
+            with spack.config.override('config:use_curl', use_curl):
+                pkg.do_stage()
         with working_dir(pkg.stage.source_path):
             assert os.path.exists('configure')
             assert is_exe('configure')
@@ -123,37 +167,41 @@ def test_fetch(
     ('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'),
     ('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'),
 ])
-def test_from_list_url(mock_packages, config, spec, url, digest):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_from_list_url(mock_packages, config, spec, url, digest, use_curl):
     """
     Test URLs in the url-list-test package, which means they should
     have checksums in the package.
     """
-    specification = Spec(spec).concretized()
-    pkg = spack.repo.get(specification)
-    fetch_strategy = fs.from_list_url(pkg)
-    assert isinstance(fetch_strategy, fs.URLFetchStrategy)
-    assert os.path.basename(fetch_strategy.url) == url
-    assert fetch_strategy.digest == digest
-    assert fetch_strategy.extra_options == {}
-    pkg.fetch_options = {'timeout': 60}
-    fetch_strategy = fs.from_list_url(pkg)
-    assert fetch_strategy.extra_options == {'timeout': 60}
-
-
-def test_from_list_url_unspecified(mock_packages, config):
+    with spack.config.override('config:use_curl', use_curl):
+        specification = Spec(spec).concretized()
+        pkg = spack.repo.get(specification)
+        fetch_strategy = fs.from_list_url(pkg)
+        assert isinstance(fetch_strategy, fs.URLFetchStrategy)
+        assert os.path.basename(fetch_strategy.url) == url
+        assert fetch_strategy.digest == digest
+        assert fetch_strategy.extra_options == {}
+        pkg.fetch_options = {'timeout': 60}
+        fetch_strategy = fs.from_list_url(pkg)
+        assert fetch_strategy.extra_options == {'timeout': 60}
+
+
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_from_list_url_unspecified(mock_packages, config, use_curl):
     """Test non-specific URLs from the url-list-test package."""
-    pkg = spack.repo.get('url-list-test')
+    with spack.config.override('config:use_curl', use_curl):
+        pkg = spack.repo.get('url-list-test')
 
-    spec = Spec('url-list-test @2.0.0').concretized()
-    pkg = spack.repo.get(spec)
-    fetch_strategy = fs.from_list_url(pkg)
-    assert isinstance(fetch_strategy, fs.URLFetchStrategy)
-    assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz'
-    assert fetch_strategy.digest is None
-    assert fetch_strategy.extra_options == {}
-    pkg.fetch_options = {'timeout': 60}
-    fetch_strategy = fs.from_list_url(pkg)
-    assert fetch_strategy.extra_options == {'timeout': 60}
+        spec = Spec('url-list-test @2.0.0').concretized()
+        pkg = spack.repo.get(spec)
+        fetch_strategy = fs.from_list_url(pkg)
+        assert isinstance(fetch_strategy, fs.URLFetchStrategy)
+        assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz'
+        assert fetch_strategy.digest is None
+        assert fetch_strategy.extra_options == {}
+        pkg.fetch_options = {'timeout': 60}
+        fetch_strategy = fs.from_list_url(pkg)
+        assert fetch_strategy.extra_options == {'timeout': 60}
 
 
 def test_nosource_from_list_url(mock_packages, config):
@@ -176,6 +224,8 @@ def test_unknown_hash(checksum_type):
         crypto.Checker('a')
 
 
+@pytest.mark.skipif(which('curl') is None,
+                    reason='Urllib does not have built-in status bar')
 def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd):
     """Ensure fetch with status bar option succeeds."""
     def is_true():
@@ -185,26 +235,27 @@ def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd):
 
     monkeypatch.setattr(sys.stdout, 'isatty', is_true)
     monkeypatch.setattr(tty, 'msg_enabled', is_true)
+    with spack.config.override('config:use_curl', True):
+        fetcher = fs.URLFetchStrategy(mock_archive.url)
+        with Stage(fetcher, path=testpath) as stage:
+            assert fetcher.archive_file is None
+            stage.fetch()
 
-    fetcher = fs.URLFetchStrategy(mock_archive.url)
-    with Stage(fetcher, path=testpath) as stage:
-        assert fetcher.archive_file is None
-        stage.fetch()
-
-    status = capfd.readouterr()[1]
-    assert '##### 100' in status
+        status = capfd.readouterr()[1]
+        assert '##### 100' in status
 
 
-def test_url_extra_fetch(tmpdir, mock_archive):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_url_extra_fetch(tmpdir, mock_archive, use_curl):
     """Ensure a fetch after downloading is effectively a no-op."""
-    testpath = str(tmpdir)
-
-    fetcher = fs.URLFetchStrategy(mock_archive.url)
-    with Stage(fetcher, path=testpath) as stage:
-        assert fetcher.archive_file is None
-        stage.fetch()
-        assert fetcher.archive_file is not None
-        fetcher.fetch()
+    with spack.config.override('config:use_curl', use_curl):
+        testpath = str(tmpdir)
+        fetcher = fs.URLFetchStrategy(mock_archive.url)
+        with Stage(fetcher, path=testpath) as stage:
+            assert fetcher.archive_file is None
+            stage.fetch()
+            assert fetcher.archive_file is not None
+            fetcher.fetch()
 
 
 @pytest.mark.parametrize('url,urls,version,expected', [
@@ -215,17 +266,19 @@ def test_url_extra_fetch(tmpdir, mock_archive):
      ['https://ftpmirror.gnu.org/autoconf/autoconf-2.62.tar.gz',
       'https://ftp.gnu.org/gnu/autoconf/autoconf-2.62.tar.gz'])
 ])
-def test_candidate_urls(pkg_factory, url, urls, version, expected):
+@pytest.mark.parametrize('use_curl', [True, False])
+def test_candidate_urls(pkg_factory, url, urls, version, expected, use_curl):
     """Tests that candidate urls include mirrors and that they go through
     pattern matching and substitution for versions.
     """
-    pkg = pkg_factory(url, urls)
-    f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
-    assert f.candidate_urls == expected
-    assert f.extra_options == {}
-    pkg = pkg_factory(url, urls, fetch_options={'timeout': 60})
-    f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
-    assert f.extra_options == {'timeout': 60}
+    with spack.config.override('config:use_curl', use_curl):
+        pkg = pkg_factory(url, urls)
+        f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
+        assert f.candidate_urls == expected
+        assert f.extra_options == {}
+        pkg = pkg_factory(url, urls, fetch_options={'timeout': 60})
+        f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
+        assert f.extra_options == {'timeout': 60}
 
 
 @pytest.mark.regression('19673')
@@ -244,11 +297,10 @@ def test_missing_curl(tmpdir, monkeypatch):
 
     testpath = str(tmpdir)
     url = 'http://github.com/spack/spack'
-    fetcher = fs.URLFetchStrategy(url=url)
-    assert fetcher is not None
-
-    with pytest.raises(TypeError, match='object is not callable'):
-        with Stage(fetcher, path=testpath) as stage:
-            out = stage.fetch()
-
-        assert err_fmt.format('curl') in out
+    with spack.config.override('config:use_curl', True):
+        fetcher = fs.URLFetchStrategy(url=url)
+        assert fetcher is not None
+        with pytest.raises(TypeError, match='object is not callable'):
+            with Stage(fetcher, path=testpath) as stage:
+                out = stage.fetch()
+            assert err_fmt.format('curl') in out
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
index 90c7c2b8cc..d4febac3e3 100644
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@@ -105,7 +105,8 @@ def read_from_url(url, accept_content_type=None):
         else:
             # User has explicitly indicated that they do not want SSL
             # verification.
-            context = ssl._create_unverified_context()
+            if not __UNABLE_TO_VERIFY_SSL:
+                context = ssl._create_unverified_context()
 
     req = Request(url_util.format(url))
     content_type = None
-- 
cgit v1.2.3-70-g09d2