diff options
author | Todd Gamblin <tgamblin@llnl.gov> | 2013-11-23 13:04:36 -0800 |
---|---|---|
committer | Todd Gamblin <tgamblin@llnl.gov> | 2013-11-23 13:04:36 -0800 |
commit | 389fa1792d8f0ac7945b80620c386f2d614a7921 (patch) | |
tree | 76f0da2238b8db2303b5b98f91f4d4a49cfd70fc | |
parent | fe7da0dcffebc6f8953905a0de5483ca977e3fe9 (diff) | |
download | spack-389fa1792d8f0ac7945b80620c386f2d614a7921.tar.gz spack-389fa1792d8f0ac7945b80620c386f2d614a7921.tar.bz2 spack-389fa1792d8f0ac7945b80620c386f2d614a7921.tar.xz spack-389fa1792d8f0ac7945b80620c386f2d614a7921.zip |
Added web spider capability for listing versions.
-rwxr-xr-x | bin/spack | 6 | ||||
-rw-r--r-- | lib/spack/spack/cmd/checksum.py | 63 | ||||
-rw-r--r-- | lib/spack/spack/cmd/spec.py | 10 | ||||
-rw-r--r-- | lib/spack/spack/cmd/versions.py | 6 | ||||
-rw-r--r-- | lib/spack/spack/package.py | 64 | ||||
-rw-r--r-- | lib/spack/spack/packages/__init__.py | 25 | ||||
-rw-r--r-- | lib/spack/spack/packages/dyninst.py | 1 | ||||
-rw-r--r-- | lib/spack/spack/relations.py | 19 | ||||
-rw-r--r-- | lib/spack/spack/spec.py | 8 | ||||
-rw-r--r-- | lib/spack/spack/test/mock_packages/callpath.py | 4 | ||||
-rw-r--r-- | lib/spack/spack/test/mock_packages/dyninst.py | 6 | ||||
-rw-r--r-- | lib/spack/spack/test/mock_packages/libdwarf.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/test/mock_packages/libelf.py | 5 | ||||
-rw-r--r-- | lib/spack/spack/test/mock_packages/mpich.py | 3 | ||||
-rw-r--r-- | lib/spack/spack/test/mock_packages/mpileaks.py | 5 | ||||
-rw-r--r-- | lib/spack/spack/url.py | 4 | ||||
-rw-r--r-- | lib/spack/spack/util/crypto.py | 13 | ||||
-rw-r--r-- | lib/spack/spack/util/filesystem.py | 15 | ||||
-rw-r--r-- | lib/spack/spack/util/web.py | 113 |
19 files changed, 317 insertions, 55 deletions
@@ -30,6 +30,8 @@ parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help="print additional output during builds") parser.add_argument('-d', '--debug', action='store_true', dest='debug', help="write out debug logs during compile") +parser.add_argument('-m', '--mock', action='store_true', dest='mock', + help="Use mock packages instead of real ones.") # each command module implements a parser() function, to which we pass its # subparser for setup. @@ -46,6 +48,10 @@ args = parser.parse_args() # Set up environment based on args. spack.verbose = args.verbose spack.debug = args.debug +if args.mock: + from spack.util.filesystem import new_path + mock_path = new_path(spack.module_path, 'test', 'mock_packages') + spack.packages_path = mock_path # Try to load the particular command asked for and run it command = spack.cmd.get_command(args.command) diff --git a/lib/spack/spack/cmd/checksum.py b/lib/spack/spack/cmd/checksum.py new file mode 100644 index 0000000000..7dd214e6aa --- /dev/null +++ b/lib/spack/spack/cmd/checksum.py @@ -0,0 +1,63 @@ +import os +import re +import argparse +from pprint import pprint +from subprocess import CalledProcessError + +import spack.tty as tty +import spack.packages as packages +from spack.stage import Stage +from spack.colify import colify +from spack.util.crypto import md5 +from spack.version import * + +group='foo' +description ="Checksum available versions of a package, print out checksums for addition to a package file." + +def setup_parser(subparser): + subparser.add_argument('package', metavar='PACKAGE', help='Package to list versions for') + subparser.add_argument('versions', nargs=argparse.REMAINDER, help='Versions to generate checksums for') + subparser.add_argument('-n', '--number', dest='number', type=int, + default=10, help='Number of versions to list') + + +def checksum(parser, args): + # get the package we're going to generate checksums for + pkg = packages.get(args.package) + + # If the user asked for specific versions, use those. + # Otherwise get the latest n, where n is from the -n/--number param + versions = [ver(v) for v in args.versions] + + if not all(type(v) == Version for v in versions): + tty.die("Cannot generate checksums for version lists or " + + "version ranges. Use unambiguous versions.") + + if not versions: + versions = pkg.fetch_available_versions()[:args.number] + if not versions: + tty.die("Could not fetch any available versions for %s." + % pkg.name) + + versions.sort() + versions.reverse() + urls = [pkg.url_for_version(v) for v in versions] + + tty.msg("Found %s versions to checksum." % len(urls)) + tty.msg("Downloading...") + + hashes = [] + for url, version in zip(urls, versions): + stage = Stage("checksum-%s-%s" % (pkg.name, version), url) + try: + stage.fetch() + hashes.append(md5(stage.archive_file)) + finally: + stage.destroy() + + dict_string = ["{"] + for i, (v, h) in enumerate(zip(versions, hashes)): + comma = "" if i == len(hashes) - 1 else "," + dict_string.append(" '%s' : '%s'%s" % (str(v), str(h), comma)) + dict_string.append("}") + tty.msg("Checksummed new versions of %s:" % pkg.name, *dict_string) diff --git a/lib/spack/spack/cmd/spec.py b/lib/spack/spack/cmd/spec.py index 36bfc5e154..d8146405c9 100644 --- a/lib/spack/spack/cmd/spec.py +++ b/lib/spack/spack/cmd/spec.py @@ -2,8 +2,10 @@ import argparse import spack.cmd import spack.tty as tty +import spack.url as url import spack + description = "parse specs and print them out to the command line." def setup_parser(subparser): @@ -13,7 +15,11 @@ def spec(parser, args): specs = spack.cmd.parse_specs(args.specs) for spec in specs: spec.normalize() - print spec.tree() + print spec.tree(color=True) spec.concretize() - print spec.tree() + print spec.tree(color=True) + + pkg = spec.package + wc = url.wildcard_version(pkg.url) + print wc diff --git a/lib/spack/spack/cmd/versions.py b/lib/spack/spack/cmd/versions.py index 9d0b1df55a..d18b1c1265 100644 --- a/lib/spack/spack/cmd/versions.py +++ b/lib/spack/spack/cmd/versions.py @@ -2,12 +2,8 @@ import os import re from subprocess import CalledProcessError -import spack import spack.packages as packages -import spack.url as url -import spack.tty as tty from spack.colify import colify -from spack.version import ver description ="List available versions of a package" @@ -17,4 +13,4 @@ def setup_parser(subparser): def versions(parser, args): pkg = packages.get(args.package) - colify(reversed(pkg.available_versions)) + colify(reversed(pkg.fetch_available_versions())) diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index d61ab4620d..d4e31d7326 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -29,6 +29,8 @@ from version import * from multi_function import platform from stage import Stage from spack.util.lang import memoized, list_modules +from spack.util.crypto import md5 +from spack.util.web import get_pages class Package(object): @@ -251,6 +253,9 @@ class Package(object): """By default a package has no dependencies.""" dependencies = {} + """List of specs of virtual packages provided by this package.""" + provided_virtual_packages = {} + # # These are default values for instance variables. # @@ -310,6 +315,9 @@ class Package(object): if not hasattr(self, 'list_url'): self.list_url = os.path.dirname(self.url) + if not hasattr(self, 'list_depth'): + self.list_depth = 1 + def add_commands_to_module(self): """Populate the module scope of install() with some useful functions. @@ -464,6 +472,11 @@ class Package(object): return str(version) + def url_for_version(self, version): + """Gives a URL that you can download a new version of this package from.""" + return url.substitute_version(self.url, self.url_version(version)) + + def remove_prefix(self): """Removes the prefix for a package along with any empty parent directories.""" if self.dirty: @@ -640,39 +653,44 @@ class Package(object): tty.msg("Successfully cleaned %s" % self.name) - @property - def available_versions(self): - # If the package overrode available_versions, then use that. - if self.versions is not None: - return self.versions - + def fetch_available_versions(self): # If not, then try to fetch using list_url if not self._available_versions: - self._available_versions = ver([self.version]) - try: - # Run curl but grab the mime type from the http headers - listing = spack.curl('-s', '-L', self.list_url, return_output=True) - url_regex = os.path.basename(url.wildcard_version(self.url)) - strings = re.findall(url_regex, listing) - wildcard = self.version.wildcard() + self._available_versions = VersionList() + url_regex = os.path.basename(url.wildcard_version(self.url)) + wildcard = self.version.wildcard() + + page_map = get_pages(self.list_url, depth=self.list_depth) + for site, page in page_map.iteritems(): + strings = re.findall(url_regex, page) + for s in strings: match = re.search(wildcard, s) if match: - self._available_versions.add(Version(match.group(0))) - - if not self._available_versions: - tty.warn("Found no versions for %s" % self.name, - "Packate.available_versions may require adding the list_url attribute", - "to the package to tell Spack where to look for versions.") + v = match.group(0) + self._available_versions.add(Version(v)) - except subprocess.CalledProcessError: - tty.warn("Could not connect to %s" % self.list_url, - "Package.available_versions requires an internet connection.", - "Version list may be incomplete.") + if not self._available_versions: + tty.warn("Found no versions for %s" % self.name, + "Check the list_url and list_depth attribute on the " + + self.name + " package.", + "Use them to tell Spack where to look for versions.") return self._available_versions + @property + def available_versions(self): + # If the package overrode available_versions, then use that. + if self.versions is not None: + return self.versions + else: + vlist = self.fetch_available_versions() + if not vlist: + vlist = ver([self.version]) + return vlist + + class MakeExecutable(Executable): """Special Executable for make so the user can specify parallel or not on a per-invocation basis. Using 'parallel' as a kwarg will diff --git a/lib/spack/spack/packages/__init__.py b/lib/spack/spack/packages/__init__.py index 8692dde5a8..40270afc99 100644 --- a/lib/spack/spack/packages/__init__.py +++ b/lib/spack/spack/packages/__init__.py @@ -19,6 +19,7 @@ valid_package_re = r'^\w[\w-]*$' invalid_package_re = r'[_-][_-]+' instances = {} +providers = {} def get(pkg_name): @@ -29,6 +30,24 @@ def get(pkg_name): return instances[pkg_name] +def get_providers(vpkg_name): + if not providers: + compute_providers() + + if not vpkg_name in providers: + raise UnknownPackageError("No such virtual package: %s" % vpkg_name) + + return providers[vpkg_name] + + +def compute_providers(): + for pkg in all_packages(): + for vpkg in pkg.provided_virtual_packages: + if vpkg not in providers: + providers[vpkg] = [] + providers[vpkg].append(pkg) + + def valid_package_name(pkg_name): return (re.match(valid_package_re, pkg_name) and not re.search(invalid_package_re, pkg_name)) @@ -75,6 +94,11 @@ def class_name_for_package_name(pkg_name): return class_name +def exists(pkg_name): + """Whether a package is concrete.""" + return os.path.exists(filename_for_package_name(pkg_name)) + + def get_class_for_package_name(pkg_name): file_name = filename_for_package_name(pkg_name) @@ -149,7 +173,6 @@ def graph_dependencies(out=sys.stdout): out.write('}\n') - class InvalidPackageNameError(spack.error.SpackError): """Raised when we encounter a bad package name.""" def __init__(self, name): diff --git a/lib/spack/spack/packages/dyninst.py b/lib/spack/spack/packages/dyninst.py index f550cde54f..3e5b4a5bd4 100644 --- a/lib/spack/spack/packages/dyninst.py +++ b/lib/spack/spack/packages/dyninst.py @@ -4,6 +4,7 @@ class Dyninst(Package): homepage = "https://paradyn.org" url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz" md5 = "bf03b33375afa66fe0efa46ce3f4b17a" + list_url = "http://www.dyninst.org/downloads/dyninst-8.x" depends_on("libelf") depends_on("libdwarf") diff --git a/lib/spack/spack/relations.py b/lib/spack/spack/relations.py index cc690ffc0f..101cdd8c83 100644 --- a/lib/spack/spack/relations.py +++ b/lib/spack/spack/relations.py @@ -45,16 +45,28 @@ provides spack install mpileaks ^mpich """ import sys +import inspect import spack.spec +def _caller_locals(): + """This will return the locals of the *parent* of the caller. + This allows a fucntion to insert variables into its caller's + scope. + """ + stack = inspect.stack() + try: + return stack[2][0].f_locals + finally: + del stack + + def depends_on(*specs): """Adds a dependencies local variable in the locals of the calling class, based on args. """ # Get the enclosing package's scope and add deps to it. - locals = sys._getframe(1).f_locals - dependencies = locals.setdefault("dependencies", {}) + dependencies = _caller_locals().setdefault("dependencies", {}) for string in specs: for spec in spack.spec.parse(string): dependencies[spec.name] = spec @@ -66,7 +78,6 @@ def provides(*args): can use the providing package to satisfy the dependency. """ # Get the enclosing package's scope and add deps to it. - locals = sys._getframe(1).f_locals - provides = locals.setdefault("provides", []) + provides = _caller_locals().setdefault("provides", []) for name in args: provides.append(name) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index e4203e85fb..31c504d4ca 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -322,8 +322,14 @@ class Spec(object): @property + def virtual(self): + return packages.exists(self.name) + + + @property def concrete(self): - return bool(self.versions.concrete + return bool(not self.virtual + and self.versions.concrete # TODO: support variants and self.architecture and self.compiler and self.compiler.concrete diff --git a/lib/spack/spack/test/mock_packages/callpath.py b/lib/spack/spack/test/mock_packages/callpath.py index 4ca1a57007..edc0833de4 100644 --- a/lib/spack/spack/test/mock_packages/callpath.py +++ b/lib/spack/spack/test/mock_packages/callpath.py @@ -5,7 +5,9 @@ class Callpath(Package): url = "http://github.com/tgamblin/callpath-0.2.tar.gz" md5 = "foobarbaz" - versions = [0.8, 0.9, 1.0] + versions = { 0.8 : 'bf03b33375afa66fe0efa46ce3f4b17a', + 0.9 : 'bf03b33375afa66fe0efa46ce3f4b17a', + 1.0 : 'bf03b33375afa66fe0efa46ce3f4b17a' } depends_on("dyninst") depends_on("mpich") diff --git a/lib/spack/spack/test/mock_packages/dyninst.py b/lib/spack/spack/test/mock_packages/dyninst.py index 99bbd1507b..2974e17f42 100644 --- a/lib/spack/spack/test/mock_packages/dyninst.py +++ b/lib/spack/spack/test/mock_packages/dyninst.py @@ -5,7 +5,11 @@ class Dyninst(Package): url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz" md5 = "bf03b33375afa66fe0efa46ce3f4b17a" - versions = '7.0, 7.0.1, 8.0, 8.1.1, 8.1.2' + list_url = "http://www.dyninst.org/downloads/dyninst-8.x" + + versions = { + '8.1.2' : 'bf03b33375afa66fe0efa46ce3f4b17a', + '8.1.1' : '1f8743e3a5662b25ce64a7edf647e77d' } depends_on("libelf") depends_on("libdwarf") diff --git a/lib/spack/spack/test/mock_packages/libdwarf.py b/lib/spack/spack/test/mock_packages/libdwarf.py index 05f792aae4..d3a5e8dc47 100644 --- a/lib/spack/spack/test/mock_packages/libdwarf.py +++ b/lib/spack/spack/test/mock_packages/libdwarf.py @@ -11,6 +11,8 @@ class Libdwarf(Package): md5 = "64b42692e947d5180e162e46c689dfbf" + versions = [20070703, 20111030, 20130207] + depends_on("libelf") diff --git a/lib/spack/spack/test/mock_packages/libelf.py b/lib/spack/spack/test/mock_packages/libelf.py index efc3ebc98b..f003eff010 100644 --- a/lib/spack/spack/test/mock_packages/libelf.py +++ b/lib/spack/spack/test/mock_packages/libelf.py @@ -5,7 +5,10 @@ class Libelf(Package): url = "http://www.mr511.de/software/libelf-0.8.13.tar.gz" md5 = "4136d7b4c04df68b686570afa26988ac" - versions = '0.8.10, 0.8.12, 0.8.13' + versions = { + '0.8.13' : '4136d7b4c04df68b686570afa26988ac', + '0.8.12' : 'e21f8273d9f5f6d43a59878dc274fec7', + '0.8.10' : '9db4d36c283d9790d8fa7df1f4d7b4d9' } def install(self, prefix): configure("--prefix=%s" % prefix, diff --git a/lib/spack/spack/test/mock_packages/mpich.py b/lib/spack/spack/test/mock_packages/mpich.py index ab235b1e43..337e7f6629 100644 --- a/lib/spack/spack/test/mock_packages/mpich.py +++ b/lib/spack/spack/test/mock_packages/mpich.py @@ -3,6 +3,9 @@ from spack import * class Mpich(Package): homepage = "http://www.mpich.org" url = "http://www.mpich.org/static/downloads/3.0.4/mpich-3.0.4.tar.gz" + + list_url = "http://www.mpich.org/static/downloads/" + list_depth = 2 md5 = "9c5d5d4fe1e17dd12153f40bc5b6dbc0" versions = '1.0.3, 1.3.2p1, 1.4.1p1, 3.0.4, 3.1b1' diff --git a/lib/spack/spack/test/mock_packages/mpileaks.py b/lib/spack/spack/test/mock_packages/mpileaks.py index d006ff61ed..c355bb226f 100644 --- a/lib/spack/spack/test/mock_packages/mpileaks.py +++ b/lib/spack/spack/test/mock_packages/mpileaks.py @@ -5,7 +5,10 @@ class Mpileaks(Package): url = "http://www.llnl.gov/mpileaks-1.0.tar.gz" md5 = "foobarbaz" - versions = [1.0, 2.1, 2.2, 2.3] + versions = { 1.0 : None, + 2.1 : None, + 2.2 : None, + 2.3 : None } depends_on("mpich") depends_on("callpath") diff --git a/lib/spack/spack/url.py b/lib/spack/spack/url.py index 59d05203b6..02872527c4 100644 --- a/lib/spack/spack/url.py +++ b/lib/spack/spack/url.py @@ -176,6 +176,8 @@ def wildcard_version(path): that will match this path with any version in its place. """ ver, start, end = parse_version_string_with_indices(path) + v = Version(ver) + parts = list(re.escape(p) for p in path.split(str(v))) - return re.escape(path[:start]) + v.wildcard() + re.escape(path[end:]) + return v.wildcard().join(parts) diff --git a/lib/spack/spack/util/crypto.py b/lib/spack/spack/util/crypto.py new file mode 100644 index 0000000000..da6597ef64 --- /dev/null +++ b/lib/spack/spack/util/crypto.py @@ -0,0 +1,13 @@ +import hashlib +from contextlib import closing + +def md5(filename, block_size=2**20): + """Computes the md5 hash of a file.""" + md5 = hashlib.md5() + with closing(open(filename)) as file: + while True: + data = file.read(block_size) + if not data: + break + md5.update(data) + return md5.hexdigest() diff --git a/lib/spack/spack/util/filesystem.py b/lib/spack/spack/util/filesystem.py index e051dc2b6f..8188946ccb 100644 --- a/lib/spack/spack/util/filesystem.py +++ b/lib/spack/spack/util/filesystem.py @@ -30,7 +30,7 @@ def mkdirp(*paths): def new_path(prefix, *args): - path=str(prefix) + path = str(prefix) for elt in args: path = os.path.join(path, str(elt)) @@ -56,16 +56,3 @@ def stem(path): if re.search(suffix, path): return re.sub(suffix, "", path) return path - - -def md5(filename, block_size=2**20): - """Computes the md5 hash of a file.""" - import hashlib - md5 = hashlib.md5() - with closing(open(filename)) as file: - while True: - data = file.read(block_size) - if not data: - break - md5.update(data) - return md5.hexdigest() diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py new file mode 100644 index 0000000000..32ffe6dbbe --- /dev/null +++ b/lib/spack/spack/util/web.py @@ -0,0 +1,113 @@ +import re +import subprocess +import urllib2 +import urlparse +from multiprocessing import Pool +from HTMLParser import HTMLParser + +import spack +import spack.tty as tty +from spack.util.compression import ALLOWED_ARCHIVE_TYPES + +# Timeout in seconds for web requests +TIMEOUT = 10 + + +class LinkParser(HTMLParser): + """This parser just takes an HTML page and strips out the hrefs on the + links. Good enough for a really simple spider. """ + def __init__(self): + HTMLParser.__init__(self) + self.links = [] + + def handle_starttag(self, tag, attrs): + if tag == 'a': + for attr, val in attrs: + if attr == 'href': + self.links.append(val) + + +def _spider(args): + """_spider(url, depth, max_depth) + + Fetches URL and any pages it links to up to max_depth. depth should + initially be 1, and max_depth includes the root. This function will + print out a warning only if the root can't be fetched; it ignores + errors with pages that the root links to. + + This will return a list of the pages fetched, in no particular order. + + Takes args as a tuple b/c it's intended to be used by a multiprocessing + pool. Firing off all the child links at once makes the fetch MUCH + faster for pages with lots of children. + """ + url, depth, max_depth = args + + pages = {} + try: + # Make a HEAD request first to check the content type. This lets + # us ignore tarballs and gigantic files. + # It would be nice to do this with the HTTP Accept header to avoid + # one round-trip. However, most servers seem to ignore the header + # if you ask for a tarball with Accept: text/html. + req = urllib2.Request(url) + req.get_method = lambda: "HEAD" + resp = urllib2.urlopen(req, timeout=TIMEOUT) + + if not resp.headers["Content-type"].startswith('text/html'): + print "ignoring page " + url + " with content type " + resp.headers["Content-type"] + return pages + + # Do the real GET request when we know it's just HTML. + req.get_method = lambda: "GET" + response = urllib2.urlopen(req, timeout=TIMEOUT) + response_url = response.geturl() + + # Read the page and and stick it in the map we'll return + page = response.read() + pages[response_url] = page + + # If we're not at max depth, parse out the links in the page + if depth < max_depth: + link_parser = LinkParser() + + subcalls = [] + link_parser.feed(page) + while link_parser.links: + raw_link = link_parser.links.pop() + + # Skip stuff that looks like an archive + if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES): + continue + + # Evaluate the link relative to the page it came from. + abs_link = urlparse.urljoin(response_url, raw_link) + subcalls.append((abs_link, depth+1, max_depth)) + + if subcalls: + pool = Pool(processes=len(subcalls)) + dicts = pool.map(_spider, subcalls) + for d in dicts: + pages.update(d) + + except urllib2.HTTPError, e: + # Only report it if it's the root page. We ignore errors when spidering. + if depth == 1: + tty.warn("Could not connect to %s" % url, e.reason, + "Package.available_versions requires an internet connection.", + "Version list may be incomplete.") + + return pages + + +def get_pages(root_url, **kwargs): + """Gets web pages from a root URL. + If depth is specified (e.g., depth=2), then this will also fetches pages + linked from the root and its children up to depth. + + This will spawn processes to fetch the children, for much improved + performance over a sequential fetch. + """ + max_depth = kwargs.setdefault('depth', 1) + pages = _spider((root_url, 1, max_depth)) + return pages |