diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/cmd/create.py | 4 | ||||
-rw-r--r-- | lib/spack/spack/cmd/url-parse.py | 75 | ||||
-rw-r--r-- | lib/spack/spack/fetch_strategy.py | 4 | ||||
-rw-r--r-- | lib/spack/spack/package.py | 85 | ||||
-rw-r--r-- | lib/spack/spack/stage.py | 53 | ||||
-rw-r--r-- | lib/spack/spack/util/web.py | 140 |
6 files changed, 253 insertions, 108 deletions
diff --git a/lib/spack/spack/cmd/create.py b/lib/spack/spack/cmd/create.py index 5e42860f3e..9ecb709110 100644 --- a/lib/spack/spack/cmd/create.py +++ b/lib/spack/spack/cmd/create.py @@ -34,8 +34,8 @@ from llnl.util.filesystem import mkdirp import spack import spack.cmd import spack.cmd.checksum -import spack.package import spack.url +import spack.util.web from spack.util.naming import * import spack.util.crypto as crypto @@ -166,7 +166,7 @@ def create(parser, args): tty.msg("This looks like a URL for %s version %s." % (name, version)) tty.msg("Creating template for package %s" % name) - versions = spack.package.find_versions_of_archive(url) + versions = spack.util.web.find_versions_of_archive(url) rkeys = sorted(versions.keys(), reverse=True) versions = OrderedDict(zip(rkeys, (versions[v] for v in rkeys))) diff --git a/lib/spack/spack/cmd/url-parse.py b/lib/spack/spack/cmd/url-parse.py new file mode 100644 index 0000000000..077c793d2e --- /dev/null +++ b/lib/spack/spack/cmd/url-parse.py @@ -0,0 +1,75 @@ +############################################################################## +# Copyright (c) 2013, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory. +# +# This file is part of Spack. +# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved. +# LLNL-CODE-647188 +# +# For details, see https://github.com/llnl/spack +# Please also see the LICENSE file for our notice and the LGPL. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License (as published by +# the Free Software Foundation) version 2.1 dated February 1999. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and +# conditions of the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +############################################################################## +import sys + +import llnl.util.tty as tty + +import spack +import spack.url +from spack.util.web import find_versions_of_archive + +description = "Show parsing of a URL, optionally spider web for other versions." + +def setup_parser(subparser): + subparser.add_argument('url', help="url of a package archive") + subparser.add_argument( + '-s', '--spider', action='store_true', help="Spider the source page for versions.") + + +def print_name_and_version(url): + name, ns, nl, ntup, ver, vs, vl, vtup = spack.url.substitution_offsets(url) + underlines = [" "] * max(ns+nl, vs+vl) + for i in range(ns, ns+nl): + underlines[i] = '-' + for i in range(vs, vs+vl): + underlines[i] = '~' + + print " %s" % url + print " %s" % ''.join(underlines) + + +def url_parse(parser, args): + url = args.url + + ver, vs, vl = spack.url.parse_version_offset(url) + name, ns, nl = spack.url.parse_name_offset(url, ver) + + tty.msg("Parsing URL:") + try: + print_name_and_version(url) + except spack.url.UrlParseError as e: + tty.error(str(e)) + + print + tty.msg("Substituting version 9.9.9b:") + newurl = spack.url.substitute_version(url, '9.9.9b') + print_name_and_version(newurl) + + if args.spider: + print + tty.msg("Spidering for versions:") + versions = find_versions_of_archive(url) + for v in sorted(versions): + print "%-20s%s" % (v, versions[v]) diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index a9374fb34b..0657146bf6 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -687,7 +687,7 @@ def for_package_version(pkg, version): class FetchError(spack.error.SpackError): - def __init__(self, msg, long_msg): + def __init__(self, msg, long_msg=None): super(FetchError, self).__init__(msg, long_msg) @@ -705,7 +705,7 @@ class NoArchiveFileError(FetchError): class NoDigestError(FetchError): - def __init__(self, msg, long_msg): + def __init__(self, msg, long_msg=None): super(NoDigestError, self).__init__(msg, long_msg) diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index b95afb073d..84bcb15f7f 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -733,9 +733,10 @@ class Package(object): # Construct paths to special files in the archive dir used to # keep track of whether patches were successfully applied. - archive_dir = self.stage.source_path - good_file = join_path(archive_dir, '.spack_patched') - bad_file = join_path(archive_dir, '.spack_patch_failed') + archive_dir = self.stage.source_path + good_file = join_path(archive_dir, '.spack_patched') + no_patches_file = join_path(archive_dir, '.spack_no_patches') + bad_file = join_path(archive_dir, '.spack_patch_failed') # If we encounter an archive that failed to patch, restage it # so that we can apply all the patches again. @@ -749,29 +750,46 @@ class Package(object): if os.path.isfile(good_file): tty.msg("Already patched %s" % self.name) return + elif os.path.isfile(no_patches_file): + tty.msg("No patches needed for %s." % self.name) + return # Apply all the patches for specs that match this one + patched = False for spec, patch_list in self.patches.items(): if self.spec.satisfies(spec): for patch in patch_list: - tty.msg('Applying patch %s' % patch.path_or_url) try: patch.apply(self.stage) + tty.msg('Applied patch %s' % patch.path_or_url) + patched = True except: # Touch bad file if anything goes wrong. + tty.msg('Patch %s failed.' % patch.path_or_url) touch(bad_file) raise - # patch succeeded. Get rid of failed file & touch good file so we - # don't try to patch again again next time. + if has_patch_fun: + try: + self.patch() + tty.msg("Ran patch() for %s." % self.name) + patched = True + except: + tty.msg("patch() function failed for %s." % self.name) + touch(bad_file) + raise + + # Get rid of any old failed file -- patches have either succeeded + # or are not needed. This is mostly defensive -- it's needed + # if the restage() method doesn't clean *everything* (e.g., for a repo) if os.path.isfile(bad_file): os.remove(bad_file) - touch(good_file) - - if has_patch_fun: - self.patch() - tty.msg("Patched %s" % self.name) + # touch good or no patches file so that we skip next time. + if patched: + touch(good_file) + else: + touch(no_patches_file) def do_fake_install(self): @@ -1164,7 +1182,7 @@ class Package(object): raise VersionFetchError(self.__class__) try: - return find_versions_of_archive( + return spack.util.web.find_versions_of_archive( *self.all_urls, list_url=self.list_url, list_depth=self.list_depth) except spack.error.NoNetworkConnectionError, e: tty.die("Package.fetch_versions couldn't connect to:", @@ -1188,49 +1206,6 @@ class Package(object): return " ".join("-Wl,-rpath=%s" % p for p in self.rpath) -def find_versions_of_archive(*archive_urls, **kwargs): - list_url = kwargs.get('list_url', None) - list_depth = kwargs.get('list_depth', 1) - - # Generate a list of list_urls based on archive urls and any - # explicitly listed list_url in the package - list_urls = set() - if list_url: - list_urls.add(list_url) - for aurl in archive_urls: - list_urls.add(spack.url.find_list_url(aurl)) - - # Grab some web pages to scrape. - page_map = {} - for lurl in list_urls: - pages = spack.util.web.get_pages(lurl, depth=list_depth) - page_map.update(pages) - - # Scrape them for archive URLs - regexes = [] - for aurl in archive_urls: - # This creates a regex from the URL with a capture group for - # the version part of the URL. The capture group is converted - # to a generic wildcard, so we can use this to extract things - # on a page that look like archive URLs. - url_regex = spack.url.wildcard_version(aurl) - - # We'll be a bit more liberal and just look for the archive - # part, not the full path. - regexes.append(os.path.basename(url_regex)) - - # Build a version list from all the matches we find - versions = {} - for page_url, content in page_map.iteritems(): - # extract versions from matches. - for regex in regexes: - versions.update( - (Version(m.group(1)), urljoin(page_url, m.group(0))) - for m in re.finditer(regex, content)) - - return versions - - def validate_package_url(url_string): """Determine whether spack can handle a particular URL or not.""" url = urlparse(url_string) diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 754344fc01..76ca7273cb 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -82,14 +82,18 @@ class Stage(object): stage object later). If name is not provided, then this stage will be given a unique name automatically. """ + # TODO: fetch/stage coupling needs to be reworked -- the logic + # TODO: here is convoluted and not modular enough. if isinstance(url_or_fetch_strategy, basestring): self.fetcher = fs.from_url(url_or_fetch_strategy) elif isinstance(url_or_fetch_strategy, fs.FetchStrategy): self.fetcher = url_or_fetch_strategy else: raise ValueError("Can't construct Stage without url or fetch strategy") - self.fetcher.set_stage(self) + self.default_fetcher = self.fetcher # self.fetcher can change with mirrors. + self.skip_checksum_for_mirror = True # used for mirrored archives of repositories. + self.name = kwargs.get('name') self.mirror_path = kwargs.get('mirror_path') @@ -198,17 +202,18 @@ class Stage(object): @property def archive_file(self): """Path to the source archive within this stage directory.""" - if not isinstance(self.fetcher, fs.URLFetchStrategy): - return None + paths = [] + if isinstance(self.fetcher, fs.URLFetchStrategy): + paths.append(os.path.join(self.path, os.path.basename(self.fetcher.url))) - paths = [os.path.join(self.path, os.path.basename(self.fetcher.url))] if self.mirror_path: paths.append(os.path.join(self.path, os.path.basename(self.mirror_path))) for path in paths: if os.path.exists(path): return path - return None + else: + return None @property @@ -238,23 +243,34 @@ class Stage(object): """Downloads an archive or checks out code from a repository.""" self.chdir() - fetchers = [self.fetcher] + fetchers = [self.default_fetcher] # TODO: move mirror logic out of here and clean it up! + # TODO: Or @alalazo may have some ideas about how to use a + # TODO: CompositeFetchStrategy here. + self.skip_checksum_for_mirror = True if self.mirror_path: urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()] + # If this archive is normally fetched from a tarball URL, + # then use the same digest. `spack mirror` ensures that + # the checksum will be the same. digest = None - if isinstance(self.fetcher, fs.URLFetchStrategy): - digest = self.fetcher.digest - fetchers = [fs.URLFetchStrategy(url, digest) - for url in urls] + fetchers - for f in fetchers: - f.set_stage(self) + if isinstance(self.default_fetcher, fs.URLFetchStrategy): + digest = self.default_fetcher.digest + + # Have to skip the checkesum for things archived from + # repositories. How can this be made safer? + self.skip_checksum_for_mirror = not bool(digest) + + for url in urls: + fetchers.insert(0, fs.URLFetchStrategy(url, digest)) for fetcher in fetchers: try: - fetcher.fetch() + fetcher.set_stage(self) + self.fetcher = fetcher + self.fetcher.fetch() break except spack.error.SpackError, e: tty.msg("Fetching from %s failed." % fetcher) @@ -262,13 +278,22 @@ class Stage(object): continue else: errMessage = "All fetchers failed for %s" % self.name + self.fetcher = self.default_fetcher raise fs.FetchError(errMessage, None) def check(self): """Check the downloaded archive against a checksum digest. No-op if this stage checks code out of a repository.""" - self.fetcher.check() + if self.fetcher is not self.default_fetcher and self.skip_checksum_for_mirror: + tty.warn("Fetching from mirror without a checksum!", + "This package is normally checked out from a version " + "control system, but it has been archived on a spack " + "mirror. This means we cannot know a checksum for the " + "tarball in advance. Be sure that your connection to " + "this mirror is secure!.") + else: + self.fetcher.check() def expand_archive(self): diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 94384e9c86..e26daef296 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -23,6 +23,7 @@ # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ############################################################################## import re +import os import sys import subprocess import urllib2, cookielib @@ -70,7 +71,9 @@ def _spider(args): """ url, visited, root, opener, depth, max_depth, raise_on_error = args - pages = {} + pages = {} # dict from page URL -> text content. + links = set() # set of all links seen on visited pages. + try: # Make a HEAD request first to check the content type. This lets # us ignore tarballs and gigantic files. @@ -99,42 +102,45 @@ def _spider(args): page = response.read() pages[response_url] = page - # If we're not at max depth, parse out the links in the page - if depth < max_depth: - link_parser = LinkParser() - subcalls = [] - link_parser.feed(page) - - while link_parser.links: - raw_link = link_parser.links.pop() + # Parse out the links in the page + link_parser = LinkParser() + subcalls = [] + link_parser.feed(page) - # Skip stuff that looks like an archive - if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES): - continue + while link_parser.links: + raw_link = link_parser.links.pop() + abs_link = urlparse.urljoin(response_url, raw_link) - # Evaluate the link relative to the page it came from. - abs_link = urlparse.urljoin(response_url, raw_link) + links.add(abs_link) - # Skip things outside the root directory - if not abs_link.startswith(root): - continue + # Skip stuff that looks like an archive + if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES): + continue - # Skip already-visited links - if abs_link in visited: - continue + # Skip things outside the root directory + if not abs_link.startswith(root): + continue - subcalls.append((abs_link, visited, root, None, depth+1, max_depth, raise_on_error)) - visited.add(abs_link) + # Skip already-visited links + if abs_link in visited: + continue - if subcalls: - try: - pool = Pool(processes=len(subcalls)) - dicts = pool.map(_spider, subcalls) - for d in dicts: - pages.update(d) - finally: - pool.terminate() - pool.join() + # If we're not at max depth, follow links. + if depth < max_depth: + subcalls.append((abs_link, visited, root, None, + depth+1, max_depth, raise_on_error)) + visited.add(abs_link) + + if subcalls: + try: + pool = Pool(processes=len(subcalls)) + results = pool.map(_spider, subcalls) + for sub_pages, sub_links in results: + pages.update(sub_pages) + links.update(sub_links) + finally: + pool.terminate() + pool.join() except urllib2.URLError, e: tty.debug(e) @@ -155,10 +161,10 @@ def _spider(args): # Other types of errors are completely ignored, except in debug mode. tty.debug("Error in _spider: %s" % e) - return pages + return pages, links -def get_pages(root_url, **kwargs): +def spider(root_url, **kwargs): """Gets web pages from a root URL. If depth is specified (e.g., depth=2), then this will also fetches pages linked from the root and its children up to depth. @@ -167,5 +173,69 @@ def get_pages(root_url, **kwargs): performance over a sequential fetch. """ max_depth = kwargs.setdefault('depth', 1) - pages = _spider((root_url, set(), root_url, None, 1, max_depth, False)) - return pages + pages, links = _spider((root_url, set(), root_url, None, 1, max_depth, False)) + return pages, links + + +def find_versions_of_archive(*archive_urls, **kwargs): + """Scrape web pages for new versions of a tarball. + + Arguments: + archive_urls: + URLs for different versions of a package. Typically these + are just the tarballs from the package file itself. By + default, this searches the parent directories of archives. + + Keyword Arguments: + list_url: + + URL for a listing of archives. Spack wills scrape these + pages for download links that look like the archive URL. + + list_depth: + Max depth to follow links on list_url pages. + + """ + list_url = kwargs.get('list_url', None) + list_depth = kwargs.get('list_depth', 1) + + # Generate a list of list_urls based on archive urls and any + # explicitly listed list_url in the package + list_urls = set() + if list_url: + list_urls.add(list_url) + for aurl in archive_urls: + list_urls.add(spack.url.find_list_url(aurl)) + + # Grab some web pages to scrape. + pages = {} + links = set() + for lurl in list_urls: + p, l = spider(lurl, depth=list_depth) + pages.update(p) + links.update(l) + + # Scrape them for archive URLs + regexes = [] + for aurl in archive_urls: + # This creates a regex from the URL with a capture group for + # the version part of the URL. The capture group is converted + # to a generic wildcard, so we can use this to extract things + # on a page that look like archive URLs. + url_regex = spack.url.wildcard_version(aurl) + + # We'll be a bit more liberal and just look for the archive + # part, not the full path. + regexes.append(os.path.basename(url_regex)) + + # Build a dict version -> URL from any links that match the wildcards. + versions = {} + for url in links: + if any(re.search(r, url) for r in regexes): + try: + ver = spack.url.parse_version(url) + versions[ver] = url + except spack.url.UndetectableVersionError as e: + continue + + return versions |