From 52d140c3370bfe42dafe4568d02fef9f34cc8622 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 25 Aug 2014 01:11:12 -0700 Subject: Factor out URL fetching into URLFetchStrategy - Added FetchStrategy class to Spack - Isolated pieces that need to be separate from Stage for git/svn/http - Added URLFetchStrategy for curl-based fetching. --- lib/spack/spack/fetch_strategy.py | 222 ++++++++++++++++++++++++++++++++++++++ lib/spack/spack/package.py | 20 ++-- lib/spack/spack/relations.py | 2 +- lib/spack/spack/stage.py | 175 +++++++++++++----------------- lib/spack/spack/test/stage.py | 8 +- 5 files changed, 310 insertions(+), 117 deletions(-) create mode 100644 lib/spack/spack/fetch_strategy.py diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py new file mode 100644 index 0000000000..2811c0e92b --- /dev/null +++ b/lib/spack/spack/fetch_strategy.py @@ -0,0 +1,222 @@ +############################################################################## +# Copyright (c) 2013, Lawrence Livermore National Security, LLC. +# Produced at the Lawrence Livermore National Laboratory. +# +# This file is part of Spack. +# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved. +# LLNL-CODE-647188 +# +# For details, see https://scalability-llnl.github.io/spack +# Please also see the LICENSE file for our notice and the LGPL. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License (as published by +# the Free Software Foundation) version 2.1 dated February 1999. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and +# conditions of the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +############################################################################## +""" +Fetch strategies are used to download source code into a staging area +in order to build it. They need to define the following methods: + + * fetch() + This should attempt to download/check out source from somewhere. + * check() + Apply a checksum to the downloaded source code, e.g. for an archive. + May not do anything if the fetch method was safe to begin with. + * expand() + Expand (e.g., an archive) downloaded file to source. + * reset() + Restore original state of downloaded code. Used by clean commands. + This may just remove the expanded source and re-expand an archive, + or it may run something like git reset --hard. +""" +import os +import re +import shutil + +import llnl.util.tty as tty + +import spack +import spack.error +import spack.util.crypto as crypto +from spack.util.compression import decompressor_for + + +class FetchStrategy(object): + def __init__(self): + # The stage is initialized late, so that fetch strategies can be constructed + # at package construction time. This is where things will be fetched. + self.stage = None + + + def set_stage(self, stage): + """This is called by Stage before any of the fetching + methods are called on the stage.""" + self.stage = stage + + + # Subclasses need to implement tehse methods + def fetch(self): pass # Return True on success, False on fail + def check(self): pass + def expand(self): pass + def reset(self): pass + def __str__(self): pass + + + +class URLFetchStrategy(FetchStrategy): + + def __init__(self, url, digest=None): + super(URLFetchStrategy, self).__init__() + self.url = url + self.digest = digest + + + def fetch(self): + assert(self.stage) + + self.stage.chdir() + + if self.archive_file: + tty.msg("Already downloaded %s." % self.archive_file) + return + + tty.msg("Trying to fetch from %s" % self.url) + + # Run curl but grab the mime type from the http headers + headers = spack.curl('-#', # status bar + '-O', # save file to disk + '-D', '-', # print out HTML headers + '-L', self.url, + return_output=True, fail_on_error=False) + + if spack.curl.returncode != 0: + # clean up archive on failure. + if self.archive_file: + os.remove(self.archive_file) + + if spack.curl.returncode == 60: + # This is a certificate error. Suggest spack -k + raise FailedDownloadError( + self.url, + "Curl was unable to fetch due to invalid certificate. " + "This is either an attack, or your cluster's SSL configuration " + "is bad. If you believe your SSL configuration is bad, you " + "can try running spack -k, which will not check SSL certificates." + "Use this at your own risk.") + + # Check if we somehow got an HTML file rather than the archive we + # asked for. We only look at the last content type, to handle + # redirects properly. + content_types = re.findall(r'Content-Type:[^\r\n]+', headers) + if content_types and 'text/html' in content_types[-1]: + tty.warn("The contents of " + self.archive_file + " look like HTML.", + "The checksum will likely be bad. If it is, you can use", + "'spack clean --dist' to remove the bad archive, then fix", + "your internet gateway issue and install again.") + + if not self.archive_file: + raise FailedDownloadError(self.url) + + + @property + def archive_file(self): + """Path to the source archive within this stage directory.""" + assert(self.stage) + path = os.path.join(self.stage.path, os.path.basename(self.url)) + return path if os.path.exists(path) else None + + + def expand(self): + assert(self.stage) + tty.msg("Staging archive: %s" % self.archive_file) + + self.stage.chdir() + if not self.archive_file: + raise NoArchiveFileError("URLFetchStrategy couldn't find archive file", + "Failed on expand() for URL %s" % self.url) + + print self.archive_file + + decompress = decompressor_for(self.archive_file) + decompress(self.archive_file) + + + def check(self): + """Check the downloaded archive against a checksum digest. + No-op if this stage checks code out of a repository.""" + assert(self.stage) + if not self.digest: + raise NoDigestError("Attempt to check URLFetchStrategy with no digest.") + checker = crypto.Checker(digest) + if not checker.check(self.archive_file): + raise ChecksumError( + "%s checksum failed for %s." % (checker.hash_name, self.archive_file), + "Expected %s but got %s." % (digest, checker.sum)) + + + def reset(self): + """Removes the source path if it exists, then re-expands the archive.""" + assert(self.stage) + if not self.archive_file: + raise NoArchiveFileError("Tried to reset URLFetchStrategy before fetching", + "Failed on reset() for URL %s" % self.url) + if self.stage.source_path: + shutil.rmtree(self.stage.source_path, ignore_errors=True) + self.expand() + + + def __str__(self): + return self.url + + + +class GitFetchStrategy(FetchStrategy): + pass + + +class SvnFetchStrategy(FetchStrategy): + pass + + +def strategy_for_url(url): + """Given a URL, find an appropriate fetch strategy for it. + Currently just gives you a URLFetchStrategy that uses curl. + + TODO: make this return appropriate fetch strategies for other + types of URLs. + """ + return URLFetchStrategy(url) + + +class FetchStrategyError(spack.error.SpackError): + def __init__(self, msg, long_msg): + super(FetchStrategyError, self).__init__(msg, long_msg) + + +class FailedDownloadError(FetchStrategyError): + """Raised wen a download fails.""" + def __init__(self, url, msg=""): + super(FailedDownloadError, self).__init__( + "Failed to fetch file from URL: %s" % url, msg) + self.url = url + + +class NoArchiveFileError(FetchStrategyError): + def __init__(self, msg, long_msg): + super(NoArchiveFileError, self).__init__(msg, long_msg) + + +class NoDigestError(FetchStrategyError): + def __init__(self, msg, long_msg): + super(NoDigestError, self).__init__(msg, long_msg) + + diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 9644aa43d3..f5f1c9dec6 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -337,7 +337,7 @@ class Package(object): # Sanity check some required variables that could be # overridden by package authors. - def sanity_check_dict(attr_name): + def ensure_has_dict(attr_name): if not hasattr(self, attr_name): raise PackageError("Package %s must define %s" % attr_name) @@ -345,10 +345,10 @@ class Package(object): if not isinstance(attr, dict): raise PackageError("Package %s has non-dict %s attribute!" % (self.name, attr_name)) - sanity_check_dict('versions') - sanity_check_dict('dependencies') - sanity_check_dict('conflicted') - sanity_check_dict('patches') + ensure_has_dict('versions') + ensure_has_dict('dependencies') + ensure_has_dict('conflicted') + ensure_has_dict('patches') # Check versions in the versions dict. for v in self.versions: @@ -362,9 +362,8 @@ class Package(object): # Version-ize the keys in versions dict try: self.versions = dict((Version(v), h) for v,h in self.versions.items()) - except ValueError: - raise ValueError("Keys of versions dict in package %s must be versions!" - % self.name) + except ValueError, e: + raise ValueError("In package %s: %s" % (self.name, e.message)) # stage used to build this package. self._stage = None @@ -600,9 +599,8 @@ class Package(object): self.do_fetch() - archive_dir = self.stage.expanded_archive_path + archive_dir = self.stage.source_path if not archive_dir: - tty.msg("Staging archive: %s" % self.stage.archive_file) self.stage.expand_archive() tty.msg("Created stage directory in %s." % self.stage.path) else: @@ -620,7 +618,7 @@ class Package(object): # Construct paths to special files in the archive dir used to # keep track of whether patches were successfully applied. - archive_dir = self.stage.expanded_archive_path + archive_dir = self.stage.source_path good_file = join_path(archive_dir, '.spack_patched') bad_file = join_path(archive_dir, '.spack_patch_failed') diff --git a/lib/spack/spack/relations.py b/lib/spack/spack/relations.py index 5afb7e7624..f7e1cfd925 100644 --- a/lib/spack/spack/relations.py +++ b/lib/spack/spack/relations.py @@ -95,7 +95,7 @@ class VersionDescriptor(object): def version(ver, checksum, **kwargs): - """Adds a version and associated metadata to the package.""" + """Adds a version and metadata describing how to fetch it.""" pkg = caller_locals() versions = pkg.setdefault('versions', {}) diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 3dac798396..0fa315051f 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -32,18 +32,20 @@ from llnl.util.filesystem import * import spack import spack.config +from spack.fetch_strategy import strategy_for_url, URLFetchStrategy import spack.error -import spack.util.crypto as crypto -from spack.util.compression import decompressor_for + STAGE_PREFIX = 'spack-stage-' class Stage(object): - """A Stage object manaages a directory where an archive is downloaded, - expanded, and built before being installed. It also handles downloading - the archive. A stage's lifecycle looks like this: + """A Stage object manaages a directory where some source code is + downloaded and built before being installed. It handles + fetching the source code, either as an archive to be expanded + or by checking it out of a repository. A stage's lifecycle + looks like this: Stage() Constructor creates the stage directory. @@ -71,18 +73,24 @@ class Stage(object): def __init__(self, url, **kwargs): """Create a stage object. Parameters: - url URL of the archive to be downloaded into this stage. - - name If a name is provided, then this stage is a named stage - and will persist between runs (or if you construct another - stage object later). If name is not provided, then this - stage will be given a unique name automatically. + url_or_fetch_strategy + URL of the archive to be downloaded into this stage, OR + a valid FetchStrategy. + + name + If a name is provided, then this stage is a named stage + and will persist between runs (or if you construct another + stage object later). If name is not provided, then this + stage will be given a unique name automatically. """ + if isinstance(url, basestring): + self.fetcher = strategy_for_url(url) + self.fetcher.set_stage(self) + self.name = kwargs.get('name') self.mirror_path = kwargs.get('mirror_path') self.tmp_root = find_tmp_root() - self.url = url self.path = None self._setup() @@ -198,17 +206,17 @@ class Stage(object): @property - def expanded_archive_path(self): - """Returns the path to the expanded archive directory if it's expanded; - None if the archive hasn't been expanded. - """ - if not self.archive_file: - return None + def source_path(self): + """Returns the path to the expanded/checked out source code + within this fetch strategy's path. - for file in os.listdir(self.path): - archive_path = join_path(self.path, file) - if os.path.isdir(archive_path): - return archive_path + This assumes nothing else is going ot be put in the + FetchStrategy's path. It searches for the first + subdirectory of the path it can find, then returns that. + """ + for p in [os.path.join(self.path, f) for f in os.listdir(self.path)]: + if os.path.isdir(p): + return p return None @@ -220,71 +228,35 @@ class Stage(object): tty.die("Setup failed: no such directory: " + self.path) - def fetch_from_url(self, url): - # Run curl but grab the mime type from the http headers - headers = spack.curl('-#', # status bar - '-O', # save file to disk - '-D', '-', # print out HTML headers - '-L', url, - return_output=True, fail_on_error=False) - - if spack.curl.returncode != 0: - # clean up archive on failure. - if self.archive_file: - os.remove(self.archive_file) - - if spack.curl.returncode == 60: - # This is a certificate error. Suggest spack -k - raise FailedDownloadError( - url, - "Curl was unable to fetch due to invalid certificate. " - "This is either an attack, or your cluster's SSL configuration " - "is bad. If you believe your SSL configuration is bad, you " - "can try running spack -k, which will not check SSL certificates." - "Use this at your own risk.") - - # Check if we somehow got an HTML file rather than the archive we - # asked for. We only look at the last content type, to handle - # redirects properly. - content_types = re.findall(r'Content-Type:[^\r\n]+', headers) - if content_types and 'text/html' in content_types[-1]: - tty.warn("The contents of " + self.archive_file + " look like HTML.", - "The checksum will likely be bad. If it is, you can use", - "'spack clean --dist' to remove the bad archive, then fix", - "your internet gateway issue and install again.") - - def fetch(self): - """Downloads the file at URL to the stage. Returns true if it was downloaded, - false if it already existed.""" + """Downloads an archive or checks out code from a repository.""" self.chdir() - if self.archive_file: - tty.msg("Already downloaded %s." % self.archive_file) - - else: - urls = [self.url] - if self.mirror_path: - urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()] + urls - for url in urls: - tty.msg("Trying to fetch from %s" % url) - self.fetch_from_url(url) - if self.archive_file: - break + fetchers = [self.fetcher] - if not self.archive_file: - raise FailedDownloadError(url) - - return self.archive_file + # TODO: move mirror logic out of here and clean it up! + if self.mirror_path: + urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()] + digest = None + if isinstance(self.fetcher, URLFetchStrategy): + digest = self.fetcher.digest + fetchers = [URLFetchStrategy(url, digest) for url in urls] + fetchers + for f in fetchers: + f.set_stage(self) + + for fetcher in fetchers: + try: + fetcher.fetch() + break + except spack.error.SpackError, e: + tty.msg("Download from %s failed." % fetcher) + continue def check(self, digest): - """Check the downloaded archive against a checksum digest""" - checker = crypto.Checker(digest) - if not checker.check(self.archive_file): - raise ChecksumError( - "%s checksum failed for %s." % (checker.hash_name, self.archive_file), - "Expected %s but got %s." % (digest, checker.sum)) + """Check the downloaded archive against a checksum digest. + No-op if this stage checks code out of a repository.""" + self.fetcher.check() def expand_archive(self): @@ -292,19 +264,14 @@ class Stage(object): archive. Fail if the stage is not set up or if the archive is not yet downloaded. """ - self.chdir() - if not self.archive_file: - tty.die("Attempt to expand archive before fetching.") - - decompress = decompressor_for(self.archive_file) - decompress(self.archive_file) + self.fetcher.expand() def chdir_to_archive(self): """Changes directory to the expanded archive directory. Dies with an error if there was no expanded archive. """ - path = self.expanded_archive_path + path = self.source_path if not path: tty.die("Attempt to chdir before expanding archive.") else: @@ -317,12 +284,7 @@ class Stage(object): """Removes the expanded archive path if it exists, then re-expands the archive. """ - if not self.archive_file: - tty.die("Attempt to restage when not staged.") - - if self.expanded_archive_path: - shutil.rmtree(self.expanded_archive_path, True) - self.expand_archive() + self.fetcher.reset() def destroy(self): @@ -393,15 +355,26 @@ def find_tmp_root(): return None -class FailedDownloadError(spack.error.SpackError): - """Raised wen a download fails.""" - def __init__(self, url, msg=""): - super(FailedDownloadError, self).__init__( - "Failed to fetch file from URL: %s" % url, msg) - self.url = url +class StageError(spack.error.SpackError): + def __init__(self, message, long_message=None): + super(self, StageError).__init__(message, long_message) -class ChecksumError(spack.error.SpackError): +class ChecksumError(StageError): """Raised when archive fails to checksum.""" - def __init__(self, message, long_msg): + def __init__(self, message, long_msg=None): super(ChecksumError, self).__init__(message, long_msg) + + +class RestageError(StageError): + def __init__(self, message, long_msg=None): + super(RestageError, self).__init__(message, long_msg) + + +class ChdirError(StageError): + def __init__(self, message, long_msg=None): + super(ChdirError, self).__init__(message, long_msg) + + +# Keep this in namespace for convenience +FailedDownloadError = spack.fetch_strategy.FailedDownloadError diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py index a412549dc7..8cb7ac772e 100644 --- a/lib/spack/spack/test/stage.py +++ b/lib/spack/spack/test/stage.py @@ -146,7 +146,7 @@ class StageTest(unittest.TestCase): stage_path = self.get_stage_path(stage, stage_name) self.assertTrue(archive_name in os.listdir(stage_path)) self.assertEqual(join_path(stage_path, archive_name), - stage.archive_file) + stage.fetcher.archive_file) def check_expand_archive(self, stage, stage_name): @@ -156,7 +156,7 @@ class StageTest(unittest.TestCase): self.assertEqual( join_path(stage_path, archive_dir), - stage.expanded_archive_path) + stage.source_path) readme = join_path(stage_path, archive_dir, readme_name) self.assertTrue(os.path.isfile(readme)) @@ -292,7 +292,7 @@ class StageTest(unittest.TestCase): with closing(open('foobar', 'w')) as file: file.write("this file is to be destroyed.") - self.assertTrue('foobar' in os.listdir(stage.expanded_archive_path)) + self.assertTrue('foobar' in os.listdir(stage.source_path)) # Make sure the file is not there after restage. stage.restage() @@ -301,7 +301,7 @@ class StageTest(unittest.TestCase): stage.chdir_to_archive() self.check_chdir_to_archive(stage, stage_name) - self.assertFalse('foobar' in os.listdir(stage.expanded_archive_path)) + self.assertFalse('foobar' in os.listdir(stage.source_path)) stage.destroy() self.check_destroy(stage, stage_name) -- cgit v1.2.3-70-g09d2