Factor out URL fetching into URLFetchStrategy

- Added FetchStrategy class to Spack - Isolated pieces that need to be separate from Stage for git/svn/http - Added URLFetchStrategy for curl-based fetching.
author: Todd Gamblin <tgamblin@llnl.gov> 2014-08-25 01:11:12 -0700
committer: Todd Gamblin <tgamblin@llnl.gov> 2014-10-03 16:53:13 -0700
commit: 52d140c3370bfe42dafe4568d02fef9f34cc8622 (patch)
tree: 788bb3f9e586fbd8def6948b8077d467d59f9675 /lib
parent: 74a603dcd3a4b8dbcde8cc6caee6caba405e2c66 (diff)
download: spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.tar.gz
spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.tar.bz2
spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.tar.xz
spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.zip
5 files changed, 310 insertions, 117 deletions
diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py
new file mode 100644
index 0000000000..2811c0e92b
--- /dev/null
+++ b/lib/spack/spack/fetch_strategy.py
@@ -0,0 +1,222 @@
+##############################################################################
+# Copyright (c) 2013, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory.
+#
+# This file is part of Spack.
+# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
+# LLNL-CODE-647188
+#
+# For details, see https://scalability-llnl.github.io/spack
+# Please also see the LICENSE file for our notice and the LGPL.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (as published by
+# the Free Software Foundation) version 2.1 dated February 1999.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
+# conditions of the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+##############################################################################
+"""
+Fetch strategies are used to download source code into a staging area
+in order to build it.  They need to define the following methods:
+
+    * fetch()
+        This should attempt to download/check out source from somewhere.
+    * check()
+        Apply a checksum to the downloaded source code, e.g. for an archive.
+        May not do anything if the fetch method was safe to begin with.
+    * expand()
+        Expand (e.g., an archive) downloaded file to source.
+    * reset()
+        Restore original state of downloaded code.  Used by clean commands.
+        This may just remove the expanded source and re-expand an archive,
+        or it may run something like git reset --hard.
+"""
+import os
+import re
+import shutil
+
+import llnl.util.tty as tty
+
+import spack
+import spack.error
+import spack.util.crypto as crypto
+from spack.util.compression import decompressor_for
+
+
+class FetchStrategy(object):
+    def __init__(self):
+        # The stage is initialized late, so that fetch strategies can be constructed
+        # at package construction time.  This is where things will be fetched.
+        self.stage = None
+
+
+    def set_stage(self, stage):
+        """This is called by Stage before any of the fetching
+           methods are called on the stage."""
+        self.stage = stage
+
+
+    # Subclasses need to implement tehse methods
+    def fetch(self): pass    # Return True on success, False on fail
+    def check(self): pass
+    def expand(self): pass
+    def reset(self): pass
+    def __str__(self): pass
+
+
+
+class URLFetchStrategy(FetchStrategy):
+
+    def __init__(self, url, digest=None):
+        super(URLFetchStrategy, self).__init__()
+        self.url = url
+        self.digest = digest
+
+
+    def fetch(self):
+        assert(self.stage)
+
+        self.stage.chdir()
+
+        if self.archive_file:
+            tty.msg("Already downloaded %s." % self.archive_file)
+            return
+
+        tty.msg("Trying to fetch from %s" % self.url)
+
+        # Run curl but grab the mime type from the http headers
+        headers = spack.curl('-#',        # status bar
+                             '-O',        # save file to disk
+                             '-D', '-',   # print out HTML headers
+                             '-L', self.url,
+                             return_output=True, fail_on_error=False)
+
+        if spack.curl.returncode != 0:
+            # clean up archive on failure.
+            if self.archive_file:
+                os.remove(self.archive_file)
+
+            if spack.curl.returncode == 60:
+                # This is a certificate error.  Suggest spack -k
+                raise FailedDownloadError(
+                    self.url,
+                    "Curl was unable to fetch due to invalid certificate. "
+                    "This is either an attack, or your cluster's SSL configuration "
+                    "is bad.  If you believe your SSL configuration is bad, you "
+                    "can try running spack -k, which will not check SSL certificates."
+                    "Use this at your own risk.")
+
+        # Check if we somehow got an HTML file rather than the archive we
+        # asked for.  We only look at the last content type, to handle
+        # redirects properly.
+        content_types = re.findall(r'Content-Type:[^\r\n]+', headers)
+        if content_types and 'text/html' in content_types[-1]:
+            tty.warn("The contents of " + self.archive_file + " look like HTML.",
+                     "The checksum will likely be bad.  If it is, you can use",
+                     "'spack clean --dist' to remove the bad archive, then fix",
+                     "your internet gateway issue and install again.")
+
+        if not self.archive_file:
+            raise FailedDownloadError(self.url)
+
+
+    @property
+    def archive_file(self):
+        """Path to the source archive within this stage directory."""
+        assert(self.stage)
+        path = os.path.join(self.stage.path, os.path.basename(self.url))
+        return path if os.path.exists(path) else None
+
+
+    def expand(self):
+        assert(self.stage)
+        tty.msg("Staging archive: %s" % self.archive_file)
+
+        self.stage.chdir()
+        if not self.archive_file:
+            raise NoArchiveFileError("URLFetchStrategy couldn't find archive file",
+                                     "Failed on expand() for URL %s" % self.url)
+
+        print self.archive_file
+
+        decompress = decompressor_for(self.archive_file)
+        decompress(self.archive_file)
+
+
+    def check(self):
+        """Check the downloaded archive against a checksum digest.
+           No-op if this stage checks code out of a repository."""
+        assert(self.stage)
+        if not self.digest:
+            raise NoDigestError("Attempt to check URLFetchStrategy with no digest.")
+        checker = crypto.Checker(digest)
+        if not checker.check(self.archive_file):
+            raise ChecksumError(
+                "%s checksum failed for %s." % (checker.hash_name, self.archive_file),
+                "Expected %s but got %s." % (digest, checker.sum))
+
+
+    def reset(self):
+        """Removes the source path if it exists, then re-expands the archive."""
+        assert(self.stage)
+        if not self.archive_file:
+            raise NoArchiveFileError("Tried to reset URLFetchStrategy before fetching",
+                                     "Failed on reset() for URL %s" % self.url)
+        if self.stage.source_path:
+            shutil.rmtree(self.stage.source_path, ignore_errors=True)
+        self.expand()
+
+
+    def __str__(self):
+        return self.url
+
+
+
+class GitFetchStrategy(FetchStrategy):
+    pass
+
+
+class SvnFetchStrategy(FetchStrategy):
+    pass
+
+
+def strategy_for_url(url):
+    """Given a URL, find an appropriate fetch strategy for it.
+       Currently just gives you a URLFetchStrategy that uses curl.
+
+       TODO: make this return appropriate fetch strategies for other
+             types of URLs.
+    """
+    return URLFetchStrategy(url)
+
+
+class FetchStrategyError(spack.error.SpackError):
+    def __init__(self, msg, long_msg):
+        super(FetchStrategyError, self).__init__(msg, long_msg)
+
+
+class FailedDownloadError(FetchStrategyError):
+    """Raised wen a download fails."""
+    def __init__(self, url, msg=""):
+        super(FailedDownloadError, self).__init__(
+            "Failed to fetch file from URL: %s" % url, msg)
+        self.url = url
+
+
+class NoArchiveFileError(FetchStrategyError):
+    def __init__(self, msg, long_msg):
+        super(NoArchiveFileError, self).__init__(msg, long_msg)
+
+
+class NoDigestError(FetchStrategyError):
+    def __init__(self, msg, long_msg):
+        super(NoDigestError, self).__init__(msg, long_msg)
+
+
diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py
index 9644aa43d3..f5f1c9dec6 100644
--- a/lib/spack/spack/package.py
+++ b/lib/spack/spack/package.py
@@ -337,7 +337,7 @@ class Package(object):
 
         # Sanity check some required variables that could be
         # overridden by package authors.
-        def sanity_check_dict(attr_name):
+        def ensure_has_dict(attr_name):
             if not hasattr(self, attr_name):
                 raise PackageError("Package %s must define %s" % attr_name)
 
@@ -345,10 +345,10 @@ class Package(object):
             if not isinstance(attr, dict):
                 raise PackageError("Package %s has non-dict %s attribute!"
                                    % (self.name, attr_name))
-        sanity_check_dict('versions')
-        sanity_check_dict('dependencies')
-        sanity_check_dict('conflicted')
-        sanity_check_dict('patches')
+        ensure_has_dict('versions')
+        ensure_has_dict('dependencies')
+        ensure_has_dict('conflicted')
+        ensure_has_dict('patches')
 
         # Check versions in the versions dict.
         for v in self.versions:
@@ -362,9 +362,8 @@ class Package(object):
         # Version-ize the keys in versions dict
         try:
             self.versions = dict((Version(v), h) for v,h in self.versions.items())
-        except ValueError:
-            raise ValueError("Keys of versions dict in package %s must be versions!"
-                             % self.name)
+        except ValueError, e:
+            raise ValueError("In package %s: %s" % (self.name, e.message))
 
         # stage used to build this package.
         self._stage = None
@@ -600,9 +599,8 @@ class Package(object):
 
         self.do_fetch()
 
-        archive_dir = self.stage.expanded_archive_path
+        archive_dir = self.stage.source_path
         if not archive_dir:
-            tty.msg("Staging archive: %s" % self.stage.archive_file)
             self.stage.expand_archive()
             tty.msg("Created stage directory in %s." % self.stage.path)
         else:
@@ -620,7 +618,7 @@ class Package(object):
 
         # Construct paths to special files in the archive dir used to
         # keep track of whether patches were successfully applied.
-        archive_dir = self.stage.expanded_archive_path
+        archive_dir = self.stage.source_path
         good_file = join_path(archive_dir, '.spack_patched')
         bad_file  = join_path(archive_dir, '.spack_patch_failed')
 
diff --git a/lib/spack/spack/relations.py b/lib/spack/spack/relations.py
index 5afb7e7624..f7e1cfd925 100644
--- a/lib/spack/spack/relations.py
+++ b/lib/spack/spack/relations.py
@@ -95,7 +95,7 @@ class VersionDescriptor(object):
 
 
 def version(ver, checksum, **kwargs):
-    """Adds a version and associated metadata to the package."""
+    """Adds a version and metadata describing how to fetch it."""
     pkg = caller_locals()
 
     versions = pkg.setdefault('versions', {})
diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py
index 3dac798396..0fa315051f 100644
--- a/lib/spack/spack/stage.py
+++ b/lib/spack/spack/stage.py
@@ -32,18 +32,20 @@ from llnl.util.filesystem import *
 
 import spack
 import spack.config
+from spack.fetch_strategy import strategy_for_url, URLFetchStrategy
 import spack.error
-import spack.util.crypto as crypto
-from spack.util.compression import decompressor_for
+
 
 
 STAGE_PREFIX = 'spack-stage-'
 
 
 class Stage(object):
-    """A Stage object manaages a directory where an archive is downloaded,
-       expanded, and built before being installed.  It also handles downloading
-       the archive.  A stage's lifecycle looks like this:
+    """A Stage object manaages a directory where some source code is
+       downloaded and built before being installed.  It handles
+       fetching the source code, either as an archive to be expanded
+       or by checking it out of a repository.  A stage's lifecycle
+       looks like this:
 
        Stage()
          Constructor creates the stage directory.
@@ -71,18 +73,24 @@ class Stage(object):
     def __init__(self, url, **kwargs):
         """Create a stage object.
            Parameters:
-             url     URL of the archive to be downloaded into this stage.
-
-             name    If a name is provided, then this stage is a named stage
-                     and will persist between runs (or if you construct another
-                     stage object later).  If name is not provided, then this
-                     stage will be given a unique name automatically.
+             url_or_fetch_strategy
+                 URL of the archive to be downloaded into this stage, OR
+                 a valid FetchStrategy.
+
+             name
+                 If a name is provided, then this stage is a named stage
+                 and will persist between runs (or if you construct another
+                 stage object later).  If name is not provided, then this
+                 stage will be given a unique name automatically.
         """
+        if isinstance(url, basestring):
+            self.fetcher = strategy_for_url(url)
+            self.fetcher.set_stage(self)
+
         self.name = kwargs.get('name')
         self.mirror_path = kwargs.get('mirror_path')
 
         self.tmp_root = find_tmp_root()
-        self.url = url
 
         self.path = None
         self._setup()
@@ -198,17 +206,17 @@ class Stage(object):
 
 
     @property
-    def expanded_archive_path(self):
-        """Returns the path to the expanded archive directory if it's expanded;
-           None if the archive hasn't been expanded.
-        """
-        if not self.archive_file:
-            return None
+    def source_path(self):
+        """Returns the path to the expanded/checked out source code
+           within this fetch strategy's path.
 
-        for file in os.listdir(self.path):
-            archive_path = join_path(self.path, file)
-            if os.path.isdir(archive_path):
-                return archive_path
+           This assumes nothing else is going ot be put in the
+           FetchStrategy's path.  It searches for the first
+           subdirectory of the path it can find, then returns that.
+        """
+        for p in [os.path.join(self.path, f) for f in os.listdir(self.path)]:
+            if os.path.isdir(p):
+                return p
         return None
 
 
@@ -220,71 +228,35 @@ class Stage(object):
             tty.die("Setup failed: no such directory: " + self.path)
 
 
-    def fetch_from_url(self, url):
-        # Run curl but grab the mime type from the http headers
-        headers = spack.curl('-#',        # status bar
-                             '-O',        # save file to disk
-                             '-D', '-',   # print out HTML headers
-                             '-L', url,
-                             return_output=True, fail_on_error=False)
-
-        if spack.curl.returncode != 0:
-            # clean up archive on failure.
-            if self.archive_file:
-                os.remove(self.archive_file)
-
-            if spack.curl.returncode == 60:
-                # This is a certificate error.  Suggest spack -k
-                raise FailedDownloadError(
-                    url,
-                    "Curl was unable to fetch due to invalid certificate. "
-                    "This is either an attack, or your cluster's SSL configuration "
-                    "is bad.  If you believe your SSL configuration is bad, you "
-                    "can try running spack -k, which will not check SSL certificates."
-                    "Use this at your own risk.")
-
-        # Check if we somehow got an HTML file rather than the archive we
-        # asked for.  We only look at the last content type, to handle
-        # redirects properly.
-        content_types = re.findall(r'Content-Type:[^\r\n]+', headers)
-        if content_types and 'text/html' in content_types[-1]:
-            tty.warn("The contents of " + self.archive_file + " look like HTML.",
-                     "The checksum will likely be bad.  If it is, you can use",
-                     "'spack clean --dist' to remove the bad archive, then fix",
-                     "your internet gateway issue and install again.")
-
-
     def fetch(self):
-        """Downloads the file at URL to the stage.  Returns true if it was downloaded,
-           false if it already existed."""
+        """Downloads an archive or checks out code from a repository."""
         self.chdir()
-        if self.archive_file:
-            tty.msg("Already downloaded %s." % self.archive_file)
-
-        else:
-            urls = [self.url]
-            if self.mirror_path:
-                urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()] + urls
 
-            for url in urls:
-                tty.msg("Trying to fetch from %s" % url)
-                self.fetch_from_url(url)
-                if self.archive_file:
-                    break
+        fetchers = [self.fetcher]
 
-        if not self.archive_file:
-            raise FailedDownloadError(url)
-
-        return self.archive_file
+        # TODO: move mirror logic out of here and clean it up!
+        if self.mirror_path:
+            urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()]
+            digest = None
+            if isinstance(self.fetcher, URLFetchStrategy):
+                digest = self.fetcher.digest
+            fetchers = [URLFetchStrategy(url, digest) for url in urls] + fetchers
+            for f in fetchers:
+                f.set_stage(self)
+
+        for fetcher in fetchers:
+            try:
+                fetcher.fetch()
+                break
+            except spack.error.SpackError, e:
+                tty.msg("Download from %s failed." % fetcher)
+                continue
 
 
     def check(self, digest):
-        """Check the downloaded archive against a checksum digest"""
-        checker = crypto.Checker(digest)
-        if not checker.check(self.archive_file):
-            raise ChecksumError(
-                "%s checksum failed for %s." % (checker.hash_name, self.archive_file),
-                "Expected %s but got %s." % (digest, checker.sum))
+        """Check the downloaded archive against a checksum digest.
+           No-op if this stage checks code out of a repository."""
+        self.fetcher.check()
 
 
     def expand_archive(self):
@@ -292,19 +264,14 @@ class Stage(object):
            archive.  Fail if the stage is not set up or if the archive is not yet
            downloaded.
         """
-        self.chdir()
-        if not self.archive_file:
-            tty.die("Attempt to expand archive before fetching.")
-
-        decompress = decompressor_for(self.archive_file)
-        decompress(self.archive_file)
+        self.fetcher.expand()
 
 
     def chdir_to_archive(self):
         """Changes directory to the expanded archive directory.
            Dies with an error if there was no expanded archive.
         """
-        path = self.expanded_archive_path
+        path = self.source_path
         if not path:
             tty.die("Attempt to chdir before expanding archive.")
         else:
@@ -317,12 +284,7 @@ class Stage(object):
         """Removes the expanded archive path if it exists, then re-expands
            the archive.
         """
-        if not self.archive_file:
-            tty.die("Attempt to restage when not staged.")
-
-        if self.expanded_archive_path:
-            shutil.rmtree(self.expanded_archive_path, True)
-        self.expand_archive()
+        self.fetcher.reset()
 
 
     def destroy(self):
@@ -393,15 +355,26 @@ def find_tmp_root():
     return None
 
 
-class FailedDownloadError(spack.error.SpackError):
-    """Raised wen a download fails."""
-    def __init__(self, url, msg=""):
-        super(FailedDownloadError, self).__init__(
-            "Failed to fetch file from URL: %s" % url, msg)
-        self.url = url
+class StageError(spack.error.SpackError):
+    def __init__(self, message, long_message=None):
+        super(self, StageError).__init__(message, long_message)
 
 
-class ChecksumError(spack.error.SpackError):
+class ChecksumError(StageError):
     """Raised when archive fails to checksum."""
-    def __init__(self, message, long_msg):
+    def __init__(self, message, long_msg=None):
         super(ChecksumError, self).__init__(message, long_msg)
+
+
+class RestageError(StageError):
+    def __init__(self, message, long_msg=None):
+        super(RestageError, self).__init__(message, long_msg)
+
+
+class ChdirError(StageError):
+    def __init__(self, message, long_msg=None):
+        super(ChdirError, self).__init__(message, long_msg)
+
+
+# Keep this in namespace for convenience
+FailedDownloadError = spack.fetch_strategy.FailedDownloadError
diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py
index a412549dc7..8cb7ac772e 100644
--- a/lib/spack/spack/test/stage.py
+++ b/lib/spack/spack/test/stage.py
@@ -146,7 +146,7 @@ class StageTest(unittest.TestCase):
         stage_path = self.get_stage_path(stage, stage_name)
         self.assertTrue(archive_name in os.listdir(stage_path))
         self.assertEqual(join_path(stage_path, archive_name),
-                         stage.archive_file)
+                         stage.fetcher.archive_file)
 
 
     def check_expand_archive(self, stage, stage_name):
@@ -156,7 +156,7 @@ class StageTest(unittest.TestCase):
 
         self.assertEqual(
             join_path(stage_path, archive_dir),
-            stage.expanded_archive_path)
+            stage.source_path)
 
         readme = join_path(stage_path, archive_dir, readme_name)
         self.assertTrue(os.path.isfile(readme))
@@ -292,7 +292,7 @@ class StageTest(unittest.TestCase):
         with closing(open('foobar', 'w')) as file:
             file.write("this file is to be destroyed.")
 
-        self.assertTrue('foobar' in os.listdir(stage.expanded_archive_path))
+        self.assertTrue('foobar' in os.listdir(stage.source_path))
 
         # Make sure the file is not there after restage.
         stage.restage()
@@ -301,7 +301,7 @@ class StageTest(unittest.TestCase):
 
         stage.chdir_to_archive()
         self.check_chdir_to_archive(stage, stage_name)
-        self.assertFalse('foobar' in os.listdir(stage.expanded_archive_path))
+        self.assertFalse('foobar' in os.listdir(stage.source_path))
 
         stage.destroy()
         self.check_destroy(stage, stage_name)
author	Todd Gamblin <tgamblin@llnl.gov>	2014-08-25 01:11:12 -0700
committer	Todd Gamblin <tgamblin@llnl.gov>	2014-10-03 16:53:13 -0700
commit	52d140c3370bfe42dafe4568d02fef9f34cc8622 (patch)
tree	788bb3f9e586fbd8def6948b8077d467d59f9675 /lib
parent	74a603dcd3a4b8dbcde8cc6caee6caba405e2c66 (diff)
download	spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.tar.gz spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.tar.bz2 spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.tar.xz spack-52d140c3370bfe42dafe4568d02fef9f34cc8622.zip