From a9ba40164ace81af7c7488bdcb693e42a3446b48 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Tue, 22 Feb 2022 16:55:59 -0800 Subject: Checksum match (#28989) * cmd/checksum: prefer url matching url_from_version This is a minimal change toward getting the right archive from places like github. The heuristic is: * if an archive url exists, take its version * generate a url from the package with pkg.url_from_version * if they match * stop considering other URLs for this version * otherwise, continue replacing the url for the version I doubt this will always work, but it should address a variety of versions of this bug. A good test right now is `spack checksum gh`, which checksums macos binaries without this, and the correct source packages with it. fixes #15985 related to #14129 related to #13940 * add heuristics to help create as well Since create can't rely on an existing package, this commit adds another pair of heuristics: 1. if the current version is a specifically listed archive, don't replace it 2. if the current url matches the result of applying `spack.url.substitute_version(a, ver)` for any a in archive_urls, prefer it and don't replace it fixes #13940 * clean up style and a lingering debug import * ok flake8, you got me * document reference_package argument * Update lib/spack/spack/util/web.py Co-authored-by: Adam J. Stewart * try to appease sphinx Co-authored-by: Adam J. Stewart --- lib/spack/spack/package.py | 6 +++++- lib/spack/spack/util/web.py | 21 ++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index c10caf89fc..c30747edeb 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -2564,7 +2564,11 @@ class PackageBase(six.with_metaclass(PackageMeta, PackageViewMixin, object)): try: return spack.util.web.find_versions_of_archive( - self.all_urls, self.list_url, self.list_depth, concurrency + self.all_urls, + self.list_url, + self.list_depth, + concurrency, + reference_package=self, ) except spack.util.web.NoNetworkConnectionError as e: tty.die("Package.fetch_versions couldn't connect to:", e.url, diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index a47bd39ccb..0f148a88f5 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -562,7 +562,7 @@ def _urlopen(req, *args, **kwargs): def find_versions_of_archive( - archive_urls, list_url=None, list_depth=0, concurrency=32 + archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None ): """Scrape web pages for new versions of a tarball. @@ -577,6 +577,10 @@ def find_versions_of_archive( list_depth (int): max depth to follow links on list_url pages. Defaults to 0. concurrency (int): maximum number of concurrent requests + reference_package (spack.package.Package or None): a spack package + used as a reference for url detection. Uses the url_for_version + method on the package to produce reference urls which, if found, + are preferred. """ if not isinstance(archive_urls, (list, tuple)): archive_urls = [archive_urls] @@ -638,11 +642,26 @@ def find_versions_of_archive( # Walk through archive_url links first. # Any conflicting versions will be overwritten by the list_url links. versions = {} + matched = set() for url in archive_urls + sorted(links): if any(re.search(r, url) for r in regexes): try: ver = spack.url.parse_version(url) + if ver in matched: + continue versions[ver] = url + # prevent this version from getting overwritten + if url in archive_urls: + matched.add(ver) + elif reference_package is not None: + if url == reference_package.url_for_version(ver): + matched.add(ver) + else: + extrapolated_urls = [ + spack.url.substitute_version(u, ver) for u in archive_urls + ] + if url in extrapolated_urls: + matched.add(ver) except spack.url.UndetectableVersionError: continue -- cgit v1.2.3-60-g2f50