From 2b89d9b1db620c3b4a0e391ffc6621717aa03382 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Tue, 22 Dec 2015 16:54:41 -0800 Subject: More consistent URL parsing when finding versions. Versions found by wildcard URLs are different from versions found by parse_version, etc. The wildcards are constructed more haphazardly than the very specific URL patterns in url.py, so they can get things wrong. e.g., for this URL: https://software.lanl.gov/MeshTools/trac/attachment/wiki/WikiStart/mstk-2.25rc1.tgz We miss the 'rc' and only return 2.25r as the version if we ONLY use URL wildcards. Future: Maybe use the regexes from url.py to scrape web pages, and then compare them for similarity with the original URL, instead of trying to make a structured wildcard URL pattern? This might yield better results. --- lib/spack/spack/package.py | 12 +++++++++--- lib/spack/spack/url.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index b95afb073d..2e7678b4b0 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -1200,6 +1200,8 @@ def find_versions_of_archive(*archive_urls, **kwargs): for aurl in archive_urls: list_urls.add(spack.url.find_list_url(aurl)) + print list_urls + # Grab some web pages to scrape. page_map = {} for lurl in list_urls: @@ -1224,9 +1226,13 @@ def find_versions_of_archive(*archive_urls, **kwargs): for page_url, content in page_map.iteritems(): # extract versions from matches. for regex in regexes: - versions.update( - (Version(m.group(1)), urljoin(page_url, m.group(0))) - for m in re.finditer(regex, content)) + print regex + print + + for m in re.finditer(regex, content): + url = urljoin(page_url, m.group(0)) + ver = spack.url.parse_version(url) + versions[ver] = url return versions diff --git a/lib/spack/spack/url.py b/lib/spack/spack/url.py index 02c0b83e26..ed5a6d2a44 100644 --- a/lib/spack/spack/url.py +++ b/lib/spack/spack/url.py @@ -210,7 +210,7 @@ def parse_version_offset(path): (r'-((\d+\.)*\d+)$', stem), # e.g. foobar-4.5.1b, foobar4.5RC, foobar.v4.5.1b - (r'[-._]?v?((\d+\.)*\d+[-._]?([a-z]|rc|RC|tp|TP?)\d*)$', stem), + (r'[-._]?v?((\d+\.)*\d+[-._]?([a-z]|rc|RC|tp|TP)?\d*)$', stem), # e.g. foobar-4.5.0-beta1, or foobar-4.50-beta (r'-((\d+\.)*\d+-beta(\d+)?)$', stem), -- cgit v1.2.3-60-g2f50