From 3f0e6d04e048ff54882e9426500bd375cf590fe0 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 24 Aug 2018 11:19:47 -0700 Subject: commands: add `spack url stats` command This command prints out stats about all package versions, like so: $ spack url stats ==> 6070 total versions for 2827 packages: ------------------------------------ url 5411 89.1% schemes https 3822 70.6% http 1527 28.2% ftp 27 0.5% file 35 0.6% checksums sha512 6 0.1% sha256 163 3.0% sha1 81 1.5% md5 5161 95.4% ------------------------------------ svn 3 0.0% ------------------------------------ hg 5 0.1% ------------------------------------ go 1 0.0% ------------------------------------ git 650 10.7% security no commit 384 59.1% commit 266 40.9% ------------------------------------ --- lib/spack/spack/cmd/url.py | 86 ++++++++++++++++++++++++++++++++++++++++- lib/spack/spack/test/cmd/url.py | 10 +++++ 2 files changed, 94 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/url.py b/lib/spack/spack/cmd/url.py index f39e09de3e..b787cfc7b0 100644 --- a/lib/spack/spack/cmd/url.py +++ b/lib/spack/spack/cmd/url.py @@ -23,10 +23,15 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ############################################################################## from __future__ import division, print_function - from collections import defaultdict +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse +import spack.fetch_strategy as fs import spack.repo +import spack.util.crypto as crypto from llnl.util import tty from spack.url import parse_version_offset, parse_name_offset @@ -87,12 +92,18 @@ def setup_parser(subparser): 'summary', help='print a summary of how well we are parsing package urls') + # Stats + sp.add_parser( + 'stats', + help='print statistics on versions and checksums for all packages') + def url(parser, args): action = { 'parse': url_parse, 'list': url_list, - 'summary': url_summary + 'summary': url_summary, + 'stats': url_stats, } action[args.subcommand](args) @@ -246,6 +257,77 @@ def url_summary(args): name_count_dict, version_count_dict) +def url_stats(args): + stats = {} # stats about fetchers in packages. + nvers = 0 # total number of versions + npkgs = 0 # total number of packages + + def inc(fstype, category, attr=None): + """Increment statistics in the stats dict.""" + categories = stats.setdefault(fstype, {}) + if attr: + cat_stats = categories.setdefault(category, {}) + val = cat_stats.setdefault(attr, 0) + stats[fstype][category][attr] = val + 1 + else: + val = categories.setdefault(category, 0) + stats[fstype][category] = val + 1 + + # over all packages + for pkg in spack.repo.path.all_packages(): + npkgs += 1 + + # look at each version + for v, args in pkg.versions.items(): + # figure out what type of fetcher it is + fetcher = fs.for_package_version(pkg, v) + nvers += 1 + + fstype = fetcher.url_attr + inc(fstype, 'total') + + # put some special stats in for particular types of fetchers. + if fstype == 'git': + if 'commit' in args: + inc('git', 'security', 'commit') + else: + inc('git', 'security', 'no commit') + elif fstype == 'url': + for h in crypto.hashes: + if h in args: + inc('url', 'checksums', h) + break + else: + if 'checksum' in args: + h = crypto.hash_algo_for_digest(args['checksum']) + inc('url', 'checksums', h) + else: + inc('url', 'checksums', 'no checksum') + + # parse out the URL scheme (https/http/ftp/etc.) + urlinfo = urlparse(fetcher.url) + inc('url', 'schemes', urlinfo.scheme) + + # print a nice summary table + tty.msg("%d total versions for %d packages:" % (nvers, npkgs)) + line_width = 36 + print("-" * line_width) + for fetcher, fetcher_stats in sorted(stats.items(), reverse=True): + fs_total = fetcher_stats['total'] + fs_pct = float(fs_total) / nvers * 100 + print("%-22s%5d%8.1f%%" % (fetcher, fs_total, fs_pct)) + + for category, cat_stats in sorted(fetcher_stats.items(), reverse=True): + if category == 'total': + continue + print(" %s" % category) + + for name, number in sorted(cat_stats.items(), reverse=True): + pct = float(number) / fs_total * 100 + print(" %-18s%5d%8.1f%%" % (name, number, pct)) + print("-" * line_width) + + def print_name_and_version(url): """Prints a URL. Underlines the detected name with dashes and the detected version with tildes. diff --git a/lib/spack/spack/test/cmd/url.py b/lib/spack/spack/test/cmd/url.py index 6347f636af..f96177d2e5 100644 --- a/lib/spack/spack/test/cmd/url.py +++ b/lib/spack/spack/test/cmd/url.py @@ -24,6 +24,8 @@ ############################################################################## import re import pytest + +import spack.repo from spack.url import UndetectableVersionError from spack.main import SpackCommand from spack.cmd.url import name_parsed_correctly, version_parsed_correctly @@ -141,3 +143,11 @@ def test_url_summary(): out_correct_versions = int( re.search(r'Versions correctly parsed:\s*(\d+)', out).group(1)) assert out_correct_versions == correct_versions + + +def test_url_stats(capfd): + with capfd.disabled(): + output = url('stats') + npkgs = '%d packages' % len(spack.repo.all_package_names()) + assert npkgs in output + assert 'total versions' in output -- cgit v1.2.3-60-g2f50