summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorTodd Gamblin <gamblin2@llnl.gov>2021-09-08 07:59:06 -0700
committerGitHub <noreply@github.com>2021-09-08 07:59:06 -0700
commitc309adb4b3a3d278fef89b32e6ad6fe677748ecb (patch)
tree789437e793a69b2272b4262f48279e03489e2212 /lib
parentc2a6ccbea843435d942f68ce60eb4c921d980150 (diff)
downloadspack-c309adb4b3a3d278fef89b32e6ad6fe677748ecb.tar.gz
spack-c309adb4b3a3d278fef89b32e6ad6fe677748ecb.tar.bz2
spack-c309adb4b3a3d278fef89b32e6ad6fe677748ecb.tar.xz
spack-c309adb4b3a3d278fef89b32e6ad6fe677748ecb.zip
url stats: add `--show-issues` option (#25792)
* tests: make `spack url [stats|summary]` work on mock packages Mock packages have historically had mock hashes, but this means they're also invalid as far as Spack's hash detection is concerned. - [x] convert all hashes in mock package to md5 or sha256 - [x] ensure that all mock packages have a URL - [x] ignore some special cases with multiple VCS fetchers * url stats: add `--show-issues` option `spack url stats` tells us how many URLs are using what protocol, type of checksum, etc., but it previously did not tell us which packages and URLs had the issues. This adds a `--show-issues` option to show URLs with insecure (`http`) URLs or `md5` hashes (which are now deprecated by NIST).
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/cmd/url.py46
-rw-r--r--lib/spack/spack/test/cmd/url.py42
-rw-r--r--lib/spack/spack/test/packages.py17
-rw-r--r--lib/spack/spack/test/url_fetch.py22
4 files changed, 91 insertions, 36 deletions
diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py
index 8d9232203f..e3182478ab 100644
--- a/lib/spack/spack/cmd/url.py
+++ b/lib/spack/spack/cmd/url.py
@@ -9,6 +9,7 @@ from collections import defaultdict
import six.moves.urllib.parse as urllib_parse
+import llnl.util.tty.color as color
from llnl.util import tty
import spack.fetch_strategy as fs
@@ -80,9 +81,13 @@ def setup_parser(subparser):
help='print a summary of how well we are parsing package urls')
# Stats
- sp.add_parser(
+ stats_parser = sp.add_parser(
'stats',
help='print statistics on versions and checksums for all packages')
+ stats_parser.add_argument(
+ "--show-issues", action="store_true",
+ help="show packages with issues (md5 hashes, http urls)"
+ )
def url(parser, args):
@@ -262,6 +267,9 @@ def url_summary(args):
def url_stats(args):
+ # dictionary of issue type -> package -> descriptions
+ issues = defaultdict(lambda: defaultdict(lambda: []))
+
class UrlStats(object):
def __init__(self):
self.total = 0
@@ -270,7 +278,7 @@ def url_stats(args):
self.url_type = defaultdict(lambda: 0)
self.git_type = defaultdict(lambda: 0)
- def add(self, fetcher):
+ def add(self, pkg_name, fetcher):
self.total += 1
url_type = fetcher.url_attr
@@ -284,10 +292,18 @@ def url_stats(args):
algo = 'no checksum'
self.checksums[algo] += 1
+ if algo == "md5":
+ md5_hashes = issues["md5 hashes"]
+ md5_hashes[pkg_name].append(fetcher.url)
+
# parse out the URL scheme (https/http/ftp/etc.)
urlinfo = urllib_parse.urlparse(fetcher.url)
self.schemes[urlinfo.scheme] += 1
+ if urlinfo.scheme == "http":
+ http_urls = issues["http urls"]
+ http_urls[pkg_name].append(fetcher.url)
+
elif url_type == 'git':
if getattr(fetcher, 'commit', None):
self.git_type['commit'] += 1
@@ -305,13 +321,16 @@ def url_stats(args):
for pkg in spack.repo.path.all_packages():
npkgs += 1
- for v, args in pkg.versions.items():
- fetcher = fs.for_package_version(pkg, v)
- version_stats.add(fetcher)
+ for v in pkg.versions:
+ try:
+ fetcher = fs.for_package_version(pkg, v)
+ except (fs.InvalidArgsError, fs.FetcherConflict):
+ continue
+ version_stats.add(pkg.name, fetcher)
for _, resources in pkg.resources.items():
for resource in resources:
- resource_stats.add(resource.fetcher)
+ resource_stats.add(pkg.name, resource.fetcher)
# print a nice summary table
tty.msg("URL stats for %d packages:" % npkgs)
@@ -361,6 +380,21 @@ def url_stats(args):
print_stat(4, git_type, "git_type")
print_line()
+ if args.show_issues:
+ total_issues = sum(
+ len(issues)
+ for _, pkg_issues in issues.items()
+ for _, issues in pkg_issues.items()
+ )
+ print()
+ tty.msg("Found %d issues." % total_issues)
+ for issue_type, pkgs in issues.items():
+ tty.msg("Package URLs with %s" % issue_type)
+ for pkg, pkg_issues in pkgs.items():
+ color.cprint(" @*C{%s}" % pkg)
+ for issue in pkg_issues:
+ print(" %s" % issue)
+
def print_name_and_version(url):
"""Prints a URL. Underlines the detected name with dashes and
diff --git a/lib/spack/spack/test/cmd/url.py b/lib/spack/spack/test/cmd/url.py
index b5b2b9feba..f9179720b1 100644
--- a/lib/spack/spack/test/cmd/url.py
+++ b/lib/spack/spack/test/cmd/url.py
@@ -69,13 +69,15 @@ def test_url_with_no_version_fails():
url('parse', 'http://www.netlib.org/voronoi/triangle.zip')
-@pytest.mark.maybeslow
-@pytest.mark.skipif(
+skip_python_26 = pytest.mark.skipif(
sys.version_info < (2, 7),
reason="Python 2.6 tests are run in a container, where "
"networking is super slow"
)
-def test_url_list():
+
+
+@skip_python_26
+def test_url_list(mock_packages):
out = url('list')
total_urls = len(out.split('\n'))
@@ -104,13 +106,8 @@ def test_url_list():
assert 0 < correct_version_urls < total_urls
-@pytest.mark.maybeslow
-@pytest.mark.skipif(
- sys.version_info < (2, 7),
- reason="Python 2.6 tests are run in a container, where "
- "networking is super slow"
-)
-def test_url_summary():
+@skip_python_26
+def test_url_summary(mock_packages):
"""Test the URL summary command."""
# test url_summary, the internal function that does the work
(total_urls, correct_names, correct_versions,
@@ -136,12 +133,8 @@ def test_url_summary():
assert out_correct_versions == correct_versions
-@pytest.mark.skipif(
- sys.version_info < (2, 7),
- reason="Python 2.6 tests are run in a container, where "
- "networking is super slow"
-)
-def test_url_stats(capfd):
+@skip_python_26
+def test_url_stats(capfd, mock_packages):
with capfd.disabled():
output = url('stats')
npkgs = '%d packages' % len(spack.repo.all_package_names())
@@ -151,3 +144,20 @@ def test_url_stats(capfd):
assert 'schemes' in output
assert 'versions' in output
assert 'resources' in output
+
+ output = url('stats', '--show-issues')
+ npkgs = '%d packages' % len(spack.repo.all_package_names())
+ assert npkgs in output
+ assert 'url' in output
+ assert 'git' in output
+ assert 'schemes' in output
+ assert 'versions' in output
+ assert 'resources' in output
+
+ assert 'Package URLs with md5 hashes' in output
+ assert 'needs-relocation' in output
+ assert 'https://cmake.org/files/v3.4/cmake-0.0.0.tar.gz' in output
+
+ assert 'Package URLs with http urls' in output
+ assert 'zmpi' in output
+ assert 'http://www.spack-fake-zmpi.org/downloads/zmpi-1.0.tar.gz' in output
diff --git a/lib/spack/spack/test/packages.py b/lib/spack/spack/test/packages.py
index 16e209c42c..4ebf1af69c 100644
--- a/lib/spack/spack/test/packages.py
+++ b/lib/spack/spack/test/packages.py
@@ -281,25 +281,28 @@ def test_git_url_top_level_url_versions(mock_packages, config):
pkg = spack.repo.get('git-url-top-level')
+ # leading 62 zeros of sha256 hash
+ leading_zeros = '0' * 62
+
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.0')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://example.com/some/tarball-2.0.tar.gz'
- assert fetcher.digest == 'abc20'
+ assert fetcher.digest == leading_zeros + '20'
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.1')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://example.com/some/tarball-2.1.tar.gz'
- assert fetcher.digest == 'abc21'
+ assert fetcher.digest == leading_zeros + '21'
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.2')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://www.example.com/foo2.2.tar.gz'
- assert fetcher.digest == 'abc22'
+ assert fetcher.digest == leading_zeros + '22'
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.3')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://www.example.com/foo2.3.tar.gz'
- assert fetcher.digest == 'abc23'
+ assert fetcher.digest == leading_zeros + '23'
def test_git_url_top_level_git_versions(mock_packages, config):
@@ -409,15 +412,15 @@ def test_fetch_options(mock_packages, config):
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.0')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
- assert fetcher.digest == 'abc10'
+ assert fetcher.digest == '00000000000000000000000000000010'
assert fetcher.extra_options == {'timeout': 42, 'cookie': 'foobar'}
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.1')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
- assert fetcher.digest == 'abc11'
+ assert fetcher.digest == '00000000000000000000000000000011'
assert fetcher.extra_options == {'timeout': 65}
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.2')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
- assert fetcher.digest == 'abc12'
+ assert fetcher.digest == '00000000000000000000000000000012'
assert fetcher.extra_options == {'cookie': 'baz'}
diff --git a/lib/spack/spack/test/url_fetch.py b/lib/spack/spack/test/url_fetch.py
index b0bd53af72..11a282dc2c 100644
--- a/lib/spack/spack/test/url_fetch.py
+++ b/lib/spack/spack/test/url_fetch.py
@@ -160,13 +160,21 @@ def test_fetch(
@pytest.mark.parametrize('spec,url,digest', [
- ('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', 'abc000'),
- ('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', 'abc100'),
- ('url-list-test @3.0', 'foo-3.0.tar.gz', 'abc30'),
- ('url-list-test @4.5', 'foo-4.5.tar.gz', 'abc45'),
- ('url-list-test @2.0.0b2', 'foo-2.0.0b2.tar.gz', 'abc200b2'),
- ('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'),
- ('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'),
+ ('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', '00000000000000000000000000000000'),
+ ('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', '00000000000000000000000000000100'),
+ ('url-list-test @3.0', 'foo-3.0.tar.gz', '00000000000000000000000000000030'),
+ ('url-list-test @4.5', 'foo-4.5.tar.gz', '00000000000000000000000000000450'),
+ (
+ 'url-list-test @2.0.0b2',
+ 'foo-2.0.0b2.tar.gz',
+ '000000000000000000000000000200b2'
+ ),
+ ('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', '000000000000000000000000000030a1'),
+ (
+ 'url-list-test @4.5-rc5',
+ 'foo-4.5-rc5.tar.gz',
+ '000000000000000000000000000045c5'
+ ),
])
@pytest.mark.parametrize('_fetch_method', ['curl', 'urllib'])
def test_from_list_url(mock_packages, config, spec, url, digest, _fetch_method):