From c309adb4b3a3d278fef89b32e6ad6fe677748ecb Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Wed, 8 Sep 2021 07:59:06 -0700 Subject: url stats: add `--show-issues` option (#25792) * tests: make `spack url [stats|summary]` work on mock packages Mock packages have historically had mock hashes, but this means they're also invalid as far as Spack's hash detection is concerned. - [x] convert all hashes in mock package to md5 or sha256 - [x] ensure that all mock packages have a URL - [x] ignore some special cases with multiple VCS fetchers * url stats: add `--show-issues` option `spack url stats` tells us how many URLs are using what protocol, type of checksum, etc., but it previously did not tell us which packages and URLs had the issues. This adds a `--show-issues` option to show URLs with insecure (`http`) URLs or `md5` hashes (which are now deprecated by NIST). --- lib/spack/spack/cmd/url.py | 46 ++++++++++++++++++++++++++++++++++----- lib/spack/spack/test/cmd/url.py | 42 +++++++++++++++++++++-------------- lib/spack/spack/test/packages.py | 17 +++++++++------ lib/spack/spack/test/url_fetch.py | 22 +++++++++++++------ 4 files changed, 91 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py index 8d9232203f..e3182478ab 100644 --- a/lib/spack/spack/cmd/url.py +++ b/lib/spack/spack/cmd/url.py @@ -9,6 +9,7 @@ from collections import defaultdict import six.moves.urllib.parse as urllib_parse +import llnl.util.tty.color as color from llnl.util import tty import spack.fetch_strategy as fs @@ -80,9 +81,13 @@ def setup_parser(subparser): help='print a summary of how well we are parsing package urls') # Stats - sp.add_parser( + stats_parser = sp.add_parser( 'stats', help='print statistics on versions and checksums for all packages') + stats_parser.add_argument( + "--show-issues", action="store_true", + help="show packages with issues (md5 hashes, http urls)" + ) def url(parser, args): @@ -262,6 +267,9 @@ def url_summary(args): def url_stats(args): + # dictionary of issue type -> package -> descriptions + issues = defaultdict(lambda: defaultdict(lambda: [])) + class UrlStats(object): def __init__(self): self.total = 0 @@ -270,7 +278,7 @@ def url_stats(args): self.url_type = defaultdict(lambda: 0) self.git_type = defaultdict(lambda: 0) - def add(self, fetcher): + def add(self, pkg_name, fetcher): self.total += 1 url_type = fetcher.url_attr @@ -284,10 +292,18 @@ def url_stats(args): algo = 'no checksum' self.checksums[algo] += 1 + if algo == "md5": + md5_hashes = issues["md5 hashes"] + md5_hashes[pkg_name].append(fetcher.url) + # parse out the URL scheme (https/http/ftp/etc.) urlinfo = urllib_parse.urlparse(fetcher.url) self.schemes[urlinfo.scheme] += 1 + if urlinfo.scheme == "http": + http_urls = issues["http urls"] + http_urls[pkg_name].append(fetcher.url) + elif url_type == 'git': if getattr(fetcher, 'commit', None): self.git_type['commit'] += 1 @@ -305,13 +321,16 @@ def url_stats(args): for pkg in spack.repo.path.all_packages(): npkgs += 1 - for v, args in pkg.versions.items(): - fetcher = fs.for_package_version(pkg, v) - version_stats.add(fetcher) + for v in pkg.versions: + try: + fetcher = fs.for_package_version(pkg, v) + except (fs.InvalidArgsError, fs.FetcherConflict): + continue + version_stats.add(pkg.name, fetcher) for _, resources in pkg.resources.items(): for resource in resources: - resource_stats.add(resource.fetcher) + resource_stats.add(pkg.name, resource.fetcher) # print a nice summary table tty.msg("URL stats for %d packages:" % npkgs) @@ -361,6 +380,21 @@ def url_stats(args): print_stat(4, git_type, "git_type") print_line() + if args.show_issues: + total_issues = sum( + len(issues) + for _, pkg_issues in issues.items() + for _, issues in pkg_issues.items() + ) + print() + tty.msg("Found %d issues." % total_issues) + for issue_type, pkgs in issues.items(): + tty.msg("Package URLs with %s" % issue_type) + for pkg, pkg_issues in pkgs.items(): + color.cprint(" @*C{%s}" % pkg) + for issue in pkg_issues: + print(" %s" % issue) + def print_name_and_version(url): """Prints a URL. Underlines the detected name with dashes and diff --git a/lib/spack/spack/test/cmd/url.py b/lib/spack/spack/test/cmd/url.py index b5b2b9feba..f9179720b1 100644 --- a/lib/spack/spack/test/cmd/url.py +++ b/lib/spack/spack/test/cmd/url.py @@ -69,13 +69,15 @@ def test_url_with_no_version_fails(): url('parse', 'http://www.netlib.org/voronoi/triangle.zip') -@pytest.mark.maybeslow -@pytest.mark.skipif( +skip_python_26 = pytest.mark.skipif( sys.version_info < (2, 7), reason="Python 2.6 tests are run in a container, where " "networking is super slow" ) -def test_url_list(): + + +@skip_python_26 +def test_url_list(mock_packages): out = url('list') total_urls = len(out.split('\n')) @@ -104,13 +106,8 @@ def test_url_list(): assert 0 < correct_version_urls < total_urls -@pytest.mark.maybeslow -@pytest.mark.skipif( - sys.version_info < (2, 7), - reason="Python 2.6 tests are run in a container, where " - "networking is super slow" -) -def test_url_summary(): +@skip_python_26 +def test_url_summary(mock_packages): """Test the URL summary command.""" # test url_summary, the internal function that does the work (total_urls, correct_names, correct_versions, @@ -136,12 +133,8 @@ def test_url_summary(): assert out_correct_versions == correct_versions -@pytest.mark.skipif( - sys.version_info < (2, 7), - reason="Python 2.6 tests are run in a container, where " - "networking is super slow" -) -def test_url_stats(capfd): +@skip_python_26 +def test_url_stats(capfd, mock_packages): with capfd.disabled(): output = url('stats') npkgs = '%d packages' % len(spack.repo.all_package_names()) @@ -151,3 +144,20 @@ def test_url_stats(capfd): assert 'schemes' in output assert 'versions' in output assert 'resources' in output + + output = url('stats', '--show-issues') + npkgs = '%d packages' % len(spack.repo.all_package_names()) + assert npkgs in output + assert 'url' in output + assert 'git' in output + assert 'schemes' in output + assert 'versions' in output + assert 'resources' in output + + assert 'Package URLs with md5 hashes' in output + assert 'needs-relocation' in output + assert 'https://cmake.org/files/v3.4/cmake-0.0.0.tar.gz' in output + + assert 'Package URLs with http urls' in output + assert 'zmpi' in output + assert 'http://www.spack-fake-zmpi.org/downloads/zmpi-1.0.tar.gz' in output diff --git a/lib/spack/spack/test/packages.py b/lib/spack/spack/test/packages.py index 16e209c42c..4ebf1af69c 100644 --- a/lib/spack/spack/test/packages.py +++ b/lib/spack/spack/test/packages.py @@ -281,25 +281,28 @@ def test_git_url_top_level_url_versions(mock_packages, config): pkg = spack.repo.get('git-url-top-level') + # leading 62 zeros of sha256 hash + leading_zeros = '0' * 62 + fetcher = spack.fetch_strategy.for_package_version(pkg, '2.0') assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy) assert fetcher.url == 'https://example.com/some/tarball-2.0.tar.gz' - assert fetcher.digest == 'abc20' + assert fetcher.digest == leading_zeros + '20' fetcher = spack.fetch_strategy.for_package_version(pkg, '2.1') assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy) assert fetcher.url == 'https://example.com/some/tarball-2.1.tar.gz' - assert fetcher.digest == 'abc21' + assert fetcher.digest == leading_zeros + '21' fetcher = spack.fetch_strategy.for_package_version(pkg, '2.2') assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy) assert fetcher.url == 'https://www.example.com/foo2.2.tar.gz' - assert fetcher.digest == 'abc22' + assert fetcher.digest == leading_zeros + '22' fetcher = spack.fetch_strategy.for_package_version(pkg, '2.3') assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy) assert fetcher.url == 'https://www.example.com/foo2.3.tar.gz' - assert fetcher.digest == 'abc23' + assert fetcher.digest == leading_zeros + '23' def test_git_url_top_level_git_versions(mock_packages, config): @@ -409,15 +412,15 @@ def test_fetch_options(mock_packages, config): fetcher = spack.fetch_strategy.for_package_version(pkg, '1.0') assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy) - assert fetcher.digest == 'abc10' + assert fetcher.digest == '00000000000000000000000000000010' assert fetcher.extra_options == {'timeout': 42, 'cookie': 'foobar'} fetcher = spack.fetch_strategy.for_package_version(pkg, '1.1') assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy) - assert fetcher.digest == 'abc11' + assert fetcher.digest == '00000000000000000000000000000011' assert fetcher.extra_options == {'timeout': 65} fetcher = spack.fetch_strategy.for_package_version(pkg, '1.2') assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy) - assert fetcher.digest == 'abc12' + assert fetcher.digest == '00000000000000000000000000000012' assert fetcher.extra_options == {'cookie': 'baz'} diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py index b0bd53af72..11a282dc2c 100644 --- a/lib/spack/spack/test/url_fetch.py +++ b/lib/spack/spack/test/url_fetch.py @@ -160,13 +160,21 @@ def test_fetch( @pytest.mark.parametrize('spec,url,digest', [ - ('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', 'abc000'), - ('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', 'abc100'), - ('url-list-test @3.0', 'foo-3.0.tar.gz', 'abc30'), - ('url-list-test @4.5', 'foo-4.5.tar.gz', 'abc45'), - ('url-list-test @2.0.0b2', 'foo-2.0.0b2.tar.gz', 'abc200b2'), - ('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'), - ('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'), + ('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', '00000000000000000000000000000000'), + ('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', '00000000000000000000000000000100'), + ('url-list-test @3.0', 'foo-3.0.tar.gz', '00000000000000000000000000000030'), + ('url-list-test @4.5', 'foo-4.5.tar.gz', '00000000000000000000000000000450'), + ( + 'url-list-test @2.0.0b2', + 'foo-2.0.0b2.tar.gz', + '000000000000000000000000000200b2' + ), + ('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', '000000000000000000000000000030a1'), + ( + 'url-list-test @4.5-rc5', + 'foo-4.5-rc5.tar.gz', + '000000000000000000000000000045c5' + ), ]) @pytest.mark.parametrize('_fetch_method', ['curl', 'urllib']) def test_from_list_url(mock_packages, config, spec, url, digest, _fetch_method): -- cgit v1.2.3-70-g09d2