summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd Gamblin <tgamblin@llnl.gov>2013-11-23 13:04:36 -0800
committerTodd Gamblin <tgamblin@llnl.gov>2013-11-23 13:04:36 -0800
commit389fa1792d8f0ac7945b80620c386f2d614a7921 (patch)
tree76f0da2238b8db2303b5b98f91f4d4a49cfd70fc
parentfe7da0dcffebc6f8953905a0de5483ca977e3fe9 (diff)
downloadspack-389fa1792d8f0ac7945b80620c386f2d614a7921.tar.gz
spack-389fa1792d8f0ac7945b80620c386f2d614a7921.tar.bz2
spack-389fa1792d8f0ac7945b80620c386f2d614a7921.tar.xz
spack-389fa1792d8f0ac7945b80620c386f2d614a7921.zip
Added web spider capability for listing versions.
-rwxr-xr-xbin/spack6
-rw-r--r--lib/spack/spack/cmd/checksum.py63
-rw-r--r--lib/spack/spack/cmd/spec.py10
-rw-r--r--lib/spack/spack/cmd/versions.py6
-rw-r--r--lib/spack/spack/package.py64
-rw-r--r--lib/spack/spack/packages/__init__.py25
-rw-r--r--lib/spack/spack/packages/dyninst.py1
-rw-r--r--lib/spack/spack/relations.py19
-rw-r--r--lib/spack/spack/spec.py8
-rw-r--r--lib/spack/spack/test/mock_packages/callpath.py4
-rw-r--r--lib/spack/spack/test/mock_packages/dyninst.py6
-rw-r--r--lib/spack/spack/test/mock_packages/libdwarf.py2
-rw-r--r--lib/spack/spack/test/mock_packages/libelf.py5
-rw-r--r--lib/spack/spack/test/mock_packages/mpich.py3
-rw-r--r--lib/spack/spack/test/mock_packages/mpileaks.py5
-rw-r--r--lib/spack/spack/url.py4
-rw-r--r--lib/spack/spack/util/crypto.py13
-rw-r--r--lib/spack/spack/util/filesystem.py15
-rw-r--r--lib/spack/spack/util/web.py113
19 files changed, 317 insertions, 55 deletions
diff --git a/bin/spack b/bin/spack
index a4cd169bf1..45a31fbeb0 100755
--- a/bin/spack
+++ b/bin/spack
@@ -30,6 +30,8 @@ parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
help="print additional output during builds")
parser.add_argument('-d', '--debug', action='store_true', dest='debug',
help="write out debug logs during compile")
+parser.add_argument('-m', '--mock', action='store_true', dest='mock',
+ help="Use mock packages instead of real ones.")
# each command module implements a parser() function, to which we pass its
# subparser for setup.
@@ -46,6 +48,10 @@ args = parser.parse_args()
# Set up environment based on args.
spack.verbose = args.verbose
spack.debug = args.debug
+if args.mock:
+ from spack.util.filesystem import new_path
+ mock_path = new_path(spack.module_path, 'test', 'mock_packages')
+ spack.packages_path = mock_path
# Try to load the particular command asked for and run it
command = spack.cmd.get_command(args.command)
diff --git a/lib/spack/spack/cmd/checksum.py b/lib/spack/spack/cmd/checksum.py
new file mode 100644
index 0000000000..7dd214e6aa
--- /dev/null
+++ b/lib/spack/spack/cmd/checksum.py
@@ -0,0 +1,63 @@
+import os
+import re
+import argparse
+from pprint import pprint
+from subprocess import CalledProcessError
+
+import spack.tty as tty
+import spack.packages as packages
+from spack.stage import Stage
+from spack.colify import colify
+from spack.util.crypto import md5
+from spack.version import *
+
+group='foo'
+description ="Checksum available versions of a package, print out checksums for addition to a package file."
+
+def setup_parser(subparser):
+ subparser.add_argument('package', metavar='PACKAGE', help='Package to list versions for')
+ subparser.add_argument('versions', nargs=argparse.REMAINDER, help='Versions to generate checksums for')
+ subparser.add_argument('-n', '--number', dest='number', type=int,
+ default=10, help='Number of versions to list')
+
+
+def checksum(parser, args):
+ # get the package we're going to generate checksums for
+ pkg = packages.get(args.package)
+
+ # If the user asked for specific versions, use those.
+ # Otherwise get the latest n, where n is from the -n/--number param
+ versions = [ver(v) for v in args.versions]
+
+ if not all(type(v) == Version for v in versions):
+ tty.die("Cannot generate checksums for version lists or " +
+ "version ranges. Use unambiguous versions.")
+
+ if not versions:
+ versions = pkg.fetch_available_versions()[:args.number]
+ if not versions:
+ tty.die("Could not fetch any available versions for %s."
+ % pkg.name)
+
+ versions.sort()
+ versions.reverse()
+ urls = [pkg.url_for_version(v) for v in versions]
+
+ tty.msg("Found %s versions to checksum." % len(urls))
+ tty.msg("Downloading...")
+
+ hashes = []
+ for url, version in zip(urls, versions):
+ stage = Stage("checksum-%s-%s" % (pkg.name, version), url)
+ try:
+ stage.fetch()
+ hashes.append(md5(stage.archive_file))
+ finally:
+ stage.destroy()
+
+ dict_string = ["{"]
+ for i, (v, h) in enumerate(zip(versions, hashes)):
+ comma = "" if i == len(hashes) - 1 else ","
+ dict_string.append(" '%s' : '%s'%s" % (str(v), str(h), comma))
+ dict_string.append("}")
+ tty.msg("Checksummed new versions of %s:" % pkg.name, *dict_string)
diff --git a/lib/spack/spack/cmd/spec.py b/lib/spack/spack/cmd/spec.py
index 36bfc5e154..d8146405c9 100644
--- a/lib/spack/spack/cmd/spec.py
+++ b/lib/spack/spack/cmd/spec.py
@@ -2,8 +2,10 @@ import argparse
import spack.cmd
import spack.tty as tty
+import spack.url as url
import spack
+
description = "parse specs and print them out to the command line."
def setup_parser(subparser):
@@ -13,7 +15,11 @@ def spec(parser, args):
specs = spack.cmd.parse_specs(args.specs)
for spec in specs:
spec.normalize()
- print spec.tree()
+ print spec.tree(color=True)
spec.concretize()
- print spec.tree()
+ print spec.tree(color=True)
+
+ pkg = spec.package
+ wc = url.wildcard_version(pkg.url)
+ print wc
diff --git a/lib/spack/spack/cmd/versions.py b/lib/spack/spack/cmd/versions.py
index 9d0b1df55a..d18b1c1265 100644
--- a/lib/spack/spack/cmd/versions.py
+++ b/lib/spack/spack/cmd/versions.py
@@ -2,12 +2,8 @@ import os
import re
from subprocess import CalledProcessError
-import spack
import spack.packages as packages
-import spack.url as url
-import spack.tty as tty
from spack.colify import colify
-from spack.version import ver
description ="List available versions of a package"
@@ -17,4 +13,4 @@ def setup_parser(subparser):
def versions(parser, args):
pkg = packages.get(args.package)
- colify(reversed(pkg.available_versions))
+ colify(reversed(pkg.fetch_available_versions()))
diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py
index d61ab4620d..d4e31d7326 100644
--- a/lib/spack/spack/package.py
+++ b/lib/spack/spack/package.py
@@ -29,6 +29,8 @@ from version import *
from multi_function import platform
from stage import Stage
from spack.util.lang import memoized, list_modules
+from spack.util.crypto import md5
+from spack.util.web import get_pages
class Package(object):
@@ -251,6 +253,9 @@ class Package(object):
"""By default a package has no dependencies."""
dependencies = {}
+ """List of specs of virtual packages provided by this package."""
+ provided_virtual_packages = {}
+
#
# These are default values for instance variables.
#
@@ -310,6 +315,9 @@ class Package(object):
if not hasattr(self, 'list_url'):
self.list_url = os.path.dirname(self.url)
+ if not hasattr(self, 'list_depth'):
+ self.list_depth = 1
+
def add_commands_to_module(self):
"""Populate the module scope of install() with some useful functions.
@@ -464,6 +472,11 @@ class Package(object):
return str(version)
+ def url_for_version(self, version):
+ """Gives a URL that you can download a new version of this package from."""
+ return url.substitute_version(self.url, self.url_version(version))
+
+
def remove_prefix(self):
"""Removes the prefix for a package along with any empty parent directories."""
if self.dirty:
@@ -640,39 +653,44 @@ class Package(object):
tty.msg("Successfully cleaned %s" % self.name)
- @property
- def available_versions(self):
- # If the package overrode available_versions, then use that.
- if self.versions is not None:
- return self.versions
-
+ def fetch_available_versions(self):
# If not, then try to fetch using list_url
if not self._available_versions:
- self._available_versions = ver([self.version])
- try:
- # Run curl but grab the mime type from the http headers
- listing = spack.curl('-s', '-L', self.list_url, return_output=True)
- url_regex = os.path.basename(url.wildcard_version(self.url))
- strings = re.findall(url_regex, listing)
- wildcard = self.version.wildcard()
+ self._available_versions = VersionList()
+ url_regex = os.path.basename(url.wildcard_version(self.url))
+ wildcard = self.version.wildcard()
+
+ page_map = get_pages(self.list_url, depth=self.list_depth)
+ for site, page in page_map.iteritems():
+ strings = re.findall(url_regex, page)
+
for s in strings:
match = re.search(wildcard, s)
if match:
- self._available_versions.add(Version(match.group(0)))
-
- if not self._available_versions:
- tty.warn("Found no versions for %s" % self.name,
- "Packate.available_versions may require adding the list_url attribute",
- "to the package to tell Spack where to look for versions.")
+ v = match.group(0)
+ self._available_versions.add(Version(v))
- except subprocess.CalledProcessError:
- tty.warn("Could not connect to %s" % self.list_url,
- "Package.available_versions requires an internet connection.",
- "Version list may be incomplete.")
+ if not self._available_versions:
+ tty.warn("Found no versions for %s" % self.name,
+ "Check the list_url and list_depth attribute on the "
+ + self.name + " package.",
+ "Use them to tell Spack where to look for versions.")
return self._available_versions
+ @property
+ def available_versions(self):
+ # If the package overrode available_versions, then use that.
+ if self.versions is not None:
+ return self.versions
+ else:
+ vlist = self.fetch_available_versions()
+ if not vlist:
+ vlist = ver([self.version])
+ return vlist
+
+
class MakeExecutable(Executable):
"""Special Executable for make so the user can specify parallel or
not on a per-invocation basis. Using 'parallel' as a kwarg will
diff --git a/lib/spack/spack/packages/__init__.py b/lib/spack/spack/packages/__init__.py
index 8692dde5a8..40270afc99 100644
--- a/lib/spack/spack/packages/__init__.py
+++ b/lib/spack/spack/packages/__init__.py
@@ -19,6 +19,7 @@ valid_package_re = r'^\w[\w-]*$'
invalid_package_re = r'[_-][_-]+'
instances = {}
+providers = {}
def get(pkg_name):
@@ -29,6 +30,24 @@ def get(pkg_name):
return instances[pkg_name]
+def get_providers(vpkg_name):
+ if not providers:
+ compute_providers()
+
+ if not vpkg_name in providers:
+ raise UnknownPackageError("No such virtual package: %s" % vpkg_name)
+
+ return providers[vpkg_name]
+
+
+def compute_providers():
+ for pkg in all_packages():
+ for vpkg in pkg.provided_virtual_packages:
+ if vpkg not in providers:
+ providers[vpkg] = []
+ providers[vpkg].append(pkg)
+
+
def valid_package_name(pkg_name):
return (re.match(valid_package_re, pkg_name) and
not re.search(invalid_package_re, pkg_name))
@@ -75,6 +94,11 @@ def class_name_for_package_name(pkg_name):
return class_name
+def exists(pkg_name):
+ """Whether a package is concrete."""
+ return os.path.exists(filename_for_package_name(pkg_name))
+
+
def get_class_for_package_name(pkg_name):
file_name = filename_for_package_name(pkg_name)
@@ -149,7 +173,6 @@ def graph_dependencies(out=sys.stdout):
out.write('}\n')
-
class InvalidPackageNameError(spack.error.SpackError):
"""Raised when we encounter a bad package name."""
def __init__(self, name):
diff --git a/lib/spack/spack/packages/dyninst.py b/lib/spack/spack/packages/dyninst.py
index f550cde54f..3e5b4a5bd4 100644
--- a/lib/spack/spack/packages/dyninst.py
+++ b/lib/spack/spack/packages/dyninst.py
@@ -4,6 +4,7 @@ class Dyninst(Package):
homepage = "https://paradyn.org"
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
+ list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
depends_on("libelf")
depends_on("libdwarf")
diff --git a/lib/spack/spack/relations.py b/lib/spack/spack/relations.py
index cc690ffc0f..101cdd8c83 100644
--- a/lib/spack/spack/relations.py
+++ b/lib/spack/spack/relations.py
@@ -45,16 +45,28 @@ provides
spack install mpileaks ^mpich
"""
import sys
+import inspect
import spack.spec
+def _caller_locals():
+ """This will return the locals of the *parent* of the caller.
+ This allows a fucntion to insert variables into its caller's
+ scope.
+ """
+ stack = inspect.stack()
+ try:
+ return stack[2][0].f_locals
+ finally:
+ del stack
+
+
def depends_on(*specs):
"""Adds a dependencies local variable in the locals of
the calling class, based on args.
"""
# Get the enclosing package's scope and add deps to it.
- locals = sys._getframe(1).f_locals
- dependencies = locals.setdefault("dependencies", {})
+ dependencies = _caller_locals().setdefault("dependencies", {})
for string in specs:
for spec in spack.spec.parse(string):
dependencies[spec.name] = spec
@@ -66,7 +78,6 @@ def provides(*args):
can use the providing package to satisfy the dependency.
"""
# Get the enclosing package's scope and add deps to it.
- locals = sys._getframe(1).f_locals
- provides = locals.setdefault("provides", [])
+ provides = _caller_locals().setdefault("provides", [])
for name in args:
provides.append(name)
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index e4203e85fb..31c504d4ca 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -322,8 +322,14 @@ class Spec(object):
@property
+ def virtual(self):
+ return packages.exists(self.name)
+
+
+ @property
def concrete(self):
- return bool(self.versions.concrete
+ return bool(not self.virtual
+ and self.versions.concrete
# TODO: support variants
and self.architecture
and self.compiler and self.compiler.concrete
diff --git a/lib/spack/spack/test/mock_packages/callpath.py b/lib/spack/spack/test/mock_packages/callpath.py
index 4ca1a57007..edc0833de4 100644
--- a/lib/spack/spack/test/mock_packages/callpath.py
+++ b/lib/spack/spack/test/mock_packages/callpath.py
@@ -5,7 +5,9 @@ class Callpath(Package):
url = "http://github.com/tgamblin/callpath-0.2.tar.gz"
md5 = "foobarbaz"
- versions = [0.8, 0.9, 1.0]
+ versions = { 0.8 : 'bf03b33375afa66fe0efa46ce3f4b17a',
+ 0.9 : 'bf03b33375afa66fe0efa46ce3f4b17a',
+ 1.0 : 'bf03b33375afa66fe0efa46ce3f4b17a' }
depends_on("dyninst")
depends_on("mpich")
diff --git a/lib/spack/spack/test/mock_packages/dyninst.py b/lib/spack/spack/test/mock_packages/dyninst.py
index 99bbd1507b..2974e17f42 100644
--- a/lib/spack/spack/test/mock_packages/dyninst.py
+++ b/lib/spack/spack/test/mock_packages/dyninst.py
@@ -5,7 +5,11 @@ class Dyninst(Package):
url = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
md5 = "bf03b33375afa66fe0efa46ce3f4b17a"
- versions = '7.0, 7.0.1, 8.0, 8.1.1, 8.1.2'
+ list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
+
+ versions = {
+ '8.1.2' : 'bf03b33375afa66fe0efa46ce3f4b17a',
+ '8.1.1' : '1f8743e3a5662b25ce64a7edf647e77d' }
depends_on("libelf")
depends_on("libdwarf")
diff --git a/lib/spack/spack/test/mock_packages/libdwarf.py b/lib/spack/spack/test/mock_packages/libdwarf.py
index 05f792aae4..d3a5e8dc47 100644
--- a/lib/spack/spack/test/mock_packages/libdwarf.py
+++ b/lib/spack/spack/test/mock_packages/libdwarf.py
@@ -11,6 +11,8 @@ class Libdwarf(Package):
md5 = "64b42692e947d5180e162e46c689dfbf"
+ versions = [20070703, 20111030, 20130207]
+
depends_on("libelf")
diff --git a/lib/spack/spack/test/mock_packages/libelf.py b/lib/spack/spack/test/mock_packages/libelf.py
index efc3ebc98b..f003eff010 100644
--- a/lib/spack/spack/test/mock_packages/libelf.py
+++ b/lib/spack/spack/test/mock_packages/libelf.py
@@ -5,7 +5,10 @@ class Libelf(Package):
url = "http://www.mr511.de/software/libelf-0.8.13.tar.gz"
md5 = "4136d7b4c04df68b686570afa26988ac"
- versions = '0.8.10, 0.8.12, 0.8.13'
+ versions = {
+ '0.8.13' : '4136d7b4c04df68b686570afa26988ac',
+ '0.8.12' : 'e21f8273d9f5f6d43a59878dc274fec7',
+ '0.8.10' : '9db4d36c283d9790d8fa7df1f4d7b4d9' }
def install(self, prefix):
configure("--prefix=%s" % prefix,
diff --git a/lib/spack/spack/test/mock_packages/mpich.py b/lib/spack/spack/test/mock_packages/mpich.py
index ab235b1e43..337e7f6629 100644
--- a/lib/spack/spack/test/mock_packages/mpich.py
+++ b/lib/spack/spack/test/mock_packages/mpich.py
@@ -3,6 +3,9 @@ from spack import *
class Mpich(Package):
homepage = "http://www.mpich.org"
url = "http://www.mpich.org/static/downloads/3.0.4/mpich-3.0.4.tar.gz"
+
+ list_url = "http://www.mpich.org/static/downloads/"
+ list_depth = 2
md5 = "9c5d5d4fe1e17dd12153f40bc5b6dbc0"
versions = '1.0.3, 1.3.2p1, 1.4.1p1, 3.0.4, 3.1b1'
diff --git a/lib/spack/spack/test/mock_packages/mpileaks.py b/lib/spack/spack/test/mock_packages/mpileaks.py
index d006ff61ed..c355bb226f 100644
--- a/lib/spack/spack/test/mock_packages/mpileaks.py
+++ b/lib/spack/spack/test/mock_packages/mpileaks.py
@@ -5,7 +5,10 @@ class Mpileaks(Package):
url = "http://www.llnl.gov/mpileaks-1.0.tar.gz"
md5 = "foobarbaz"
- versions = [1.0, 2.1, 2.2, 2.3]
+ versions = { 1.0 : None,
+ 2.1 : None,
+ 2.2 : None,
+ 2.3 : None }
depends_on("mpich")
depends_on("callpath")
diff --git a/lib/spack/spack/url.py b/lib/spack/spack/url.py
index 59d05203b6..02872527c4 100644
--- a/lib/spack/spack/url.py
+++ b/lib/spack/spack/url.py
@@ -176,6 +176,8 @@ def wildcard_version(path):
that will match this path with any version in its place.
"""
ver, start, end = parse_version_string_with_indices(path)
+
v = Version(ver)
+ parts = list(re.escape(p) for p in path.split(str(v)))
- return re.escape(path[:start]) + v.wildcard() + re.escape(path[end:])
+ return v.wildcard().join(parts)
diff --git a/lib/spack/spack/util/crypto.py b/lib/spack/spack/util/crypto.py
new file mode 100644
index 0000000000..da6597ef64
--- /dev/null
+++ b/lib/spack/spack/util/crypto.py
@@ -0,0 +1,13 @@
+import hashlib
+from contextlib import closing
+
+def md5(filename, block_size=2**20):
+ """Computes the md5 hash of a file."""
+ md5 = hashlib.md5()
+ with closing(open(filename)) as file:
+ while True:
+ data = file.read(block_size)
+ if not data:
+ break
+ md5.update(data)
+ return md5.hexdigest()
diff --git a/lib/spack/spack/util/filesystem.py b/lib/spack/spack/util/filesystem.py
index e051dc2b6f..8188946ccb 100644
--- a/lib/spack/spack/util/filesystem.py
+++ b/lib/spack/spack/util/filesystem.py
@@ -30,7 +30,7 @@ def mkdirp(*paths):
def new_path(prefix, *args):
- path=str(prefix)
+ path = str(prefix)
for elt in args:
path = os.path.join(path, str(elt))
@@ -56,16 +56,3 @@ def stem(path):
if re.search(suffix, path):
return re.sub(suffix, "", path)
return path
-
-
-def md5(filename, block_size=2**20):
- """Computes the md5 hash of a file."""
- import hashlib
- md5 = hashlib.md5()
- with closing(open(filename)) as file:
- while True:
- data = file.read(block_size)
- if not data:
- break
- md5.update(data)
- return md5.hexdigest()
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
new file mode 100644
index 0000000000..32ffe6dbbe
--- /dev/null
+++ b/lib/spack/spack/util/web.py
@@ -0,0 +1,113 @@
+import re
+import subprocess
+import urllib2
+import urlparse
+from multiprocessing import Pool
+from HTMLParser import HTMLParser
+
+import spack
+import spack.tty as tty
+from spack.util.compression import ALLOWED_ARCHIVE_TYPES
+
+# Timeout in seconds for web requests
+TIMEOUT = 10
+
+
+class LinkParser(HTMLParser):
+ """This parser just takes an HTML page and strips out the hrefs on the
+ links. Good enough for a really simple spider. """
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.links = []
+
+ def handle_starttag(self, tag, attrs):
+ if tag == 'a':
+ for attr, val in attrs:
+ if attr == 'href':
+ self.links.append(val)
+
+
+def _spider(args):
+ """_spider(url, depth, max_depth)
+
+ Fetches URL and any pages it links to up to max_depth. depth should
+ initially be 1, and max_depth includes the root. This function will
+ print out a warning only if the root can't be fetched; it ignores
+ errors with pages that the root links to.
+
+ This will return a list of the pages fetched, in no particular order.
+
+ Takes args as a tuple b/c it's intended to be used by a multiprocessing
+ pool. Firing off all the child links at once makes the fetch MUCH
+ faster for pages with lots of children.
+ """
+ url, depth, max_depth = args
+
+ pages = {}
+ try:
+ # Make a HEAD request first to check the content type. This lets
+ # us ignore tarballs and gigantic files.
+ # It would be nice to do this with the HTTP Accept header to avoid
+ # one round-trip. However, most servers seem to ignore the header
+ # if you ask for a tarball with Accept: text/html.
+ req = urllib2.Request(url)
+ req.get_method = lambda: "HEAD"
+ resp = urllib2.urlopen(req, timeout=TIMEOUT)
+
+ if not resp.headers["Content-type"].startswith('text/html'):
+ print "ignoring page " + url + " with content type " + resp.headers["Content-type"]
+ return pages
+
+ # Do the real GET request when we know it's just HTML.
+ req.get_method = lambda: "GET"
+ response = urllib2.urlopen(req, timeout=TIMEOUT)
+ response_url = response.geturl()
+
+ # Read the page and and stick it in the map we'll return
+ page = response.read()
+ pages[response_url] = page
+
+ # If we're not at max depth, parse out the links in the page
+ if depth < max_depth:
+ link_parser = LinkParser()
+
+ subcalls = []
+ link_parser.feed(page)
+ while link_parser.links:
+ raw_link = link_parser.links.pop()
+
+ # Skip stuff that looks like an archive
+ if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES):
+ continue
+
+ # Evaluate the link relative to the page it came from.
+ abs_link = urlparse.urljoin(response_url, raw_link)
+ subcalls.append((abs_link, depth+1, max_depth))
+
+ if subcalls:
+ pool = Pool(processes=len(subcalls))
+ dicts = pool.map(_spider, subcalls)
+ for d in dicts:
+ pages.update(d)
+
+ except urllib2.HTTPError, e:
+ # Only report it if it's the root page. We ignore errors when spidering.
+ if depth == 1:
+ tty.warn("Could not connect to %s" % url, e.reason,
+ "Package.available_versions requires an internet connection.",
+ "Version list may be incomplete.")
+
+ return pages
+
+
+def get_pages(root_url, **kwargs):
+ """Gets web pages from a root URL.
+ If depth is specified (e.g., depth=2), then this will also fetches pages
+ linked from the root and its children up to depth.
+
+ This will spawn processes to fetch the children, for much improved
+ performance over a sequential fetch.
+ """
+ max_depth = kwargs.setdefault('depth', 1)
+ pages = _spider((root_url, 1, max_depth))
+ return pages