From 4af448724f4f933b5306c4cdd2f7522d64dc8719 Mon Sep 17 00:00:00 2001 From: Peter Scheibel Date: Fri, 25 Oct 2019 21:47:28 -0700 Subject: `mirror create --all` can mirror everything (#12940) Support mirroring all packages with `spack mirror create --all`. In this mode there is no concretization: * Spack pulls every version of every package into the created mirror. * It also makes multiple attempts for each package/version combination (if there is a temporary connection failure). * Continues if all attempts fail. i.e., this makes its best effort to fetch evrerything, even if all attempts to fetch one package fail. This also changes mirroring logic to prefer storing sources by their hash or by a unique name derived from the source. For example: * Archives with checksums are named by the sha256 sum, i.e., `archive/f6/f6cf3bd233f9ea6147b21c7c02cac24e5363570ce4fd6be11dab9f499ed6a7d8.tar.gz` vs the previous `-package-version>.tar.gz` * VCS repositories are stored by a path derived from their URL, e.g. `git/google/leveldb.git/master.tar.gz`. The new mirror layout allows different packages to refer to the same resource or source without duplicating that download in the mirror/cache. This change is not essential to mirroring everything but is expected to save space when mirroring packages that all use the same resource. The new structure of the mirror is: ``` / _source-cache/ <-- the _source-cache directory is new archive/ <-- archives/resources/patches stored by hash 00/ <-- 2-letter sha256 prefix 002748bdd0319d5ab82606cf92dc210fc1c05d0607a2e1d5538f60512b029056.tar.gz 01/ 0154c25c45b5506b6d618ca8e18d0ef093dac47946ac0df464fb21e77b504118.tar.gz 0173a74a515211997a3117a47e7b9ea43594a04b865b69da5a71c0886fa829ea.tar.gz ... git/ OpenFAST/ openfast.git/ master.tar.gz <-- repo by branch name PHASTA/ phasta.git/ 11f431f2d1a53a529dab4b0f079ab8aab7ca1109.tar.gz <-- repo by commit ... svn/ <-- each fetch strategy has its own subdirectory ... openmpi/ <-- the remaining package directories have the old format openmpi-1.10.1.tar.gz <-- human-readable name is symlink to _source-cache ``` In addition to the archive names as described above, `mirror create` now also creates symlinks with the old format to help users understand which package each mirrored archive is associated with, and to allow mirrors to work with old spack versions. The symlinks are relative so the mirror directory can still itself be archived. Other improvements: * `spack mirror create` will not re-download resources that have already been placed in it. * When creating a mirror, the resources downloaded to the mirror will not be cached (things are not stored twice). --- lib/spack/spack/caches.py | 31 +++-- lib/spack/spack/cmd/mirror.py | 133 +++++++++++++-------- lib/spack/spack/fetch_strategy.py | 40 +++++++ lib/spack/spack/mirror.py | 234 +++++++++++++++++++++++++------------ lib/spack/spack/package.py | 68 ++++++++--- lib/spack/spack/patch.py | 22 +++- lib/spack/spack/stage.py | 69 +++++++---- lib/spack/spack/test/cmd/mirror.py | 2 +- lib/spack/spack/test/install.py | 6 +- lib/spack/spack/test/mirror.py | 84 ++++++------- lib/spack/spack/test/packaging.py | 4 +- 11 files changed, 465 insertions(+), 228 deletions(-) diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index e2352b2fcc..bdece50421 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -15,7 +15,6 @@ import spack.config import spack.fetch_strategy import spack.util.file_cache import spack.util.path -import spack.util.url as url_util def _misc_cache(): @@ -52,25 +51,25 @@ def _fetch_cache(): class MirrorCache(object): def __init__(self, root): - self.root = url_util.local_file_path(root) - if not self.root: - raise spack.error.SpackError( - 'MirrorCaches only work with file:// URLs') + self.root = os.path.abspath(root) - self.new_resources = set() - self.existing_resources = set() - - def store(self, fetcher, relative_dest): + def store(self, fetcher, relative_dest, cosmetic_path=None): # Note this will archive package sources even if they would not # normally be cached (e.g. the current tip of an hg/git branch) dst = os.path.join(self.root, relative_dest) - - if os.path.exists(dst): - self.existing_resources.add(relative_dest) - else: - self.new_resources.add(relative_dest) - mkdirp(os.path.dirname(dst)) - fetcher.archive(dst) + mkdirp(os.path.dirname(dst)) + fetcher.archive(dst) + + # Add a symlink path that a human can read to understand what resource + # the archive path refers to + if not cosmetic_path: + return + cosmetic_path = os.path.join(self.root, cosmetic_path) + relative_dst = os.path.relpath( + dst, start=os.path.dirname(cosmetic_path)) + if not os.path.exists(cosmetic_path): + mkdirp(os.path.dirname(cosmetic_path)) + os.symlink(relative_dst, cosmetic_path) #: Spack's local cache for downloaded source archives diff --git a/lib/spack/spack/cmd/mirror.py b/lib/spack/spack/cmd/mirror.py index 91ed40a4c5..21723e4965 100644 --- a/lib/spack/spack/cmd/mirror.py +++ b/lib/spack/spack/cmd/mirror.py @@ -38,18 +38,25 @@ def setup_parser(subparser): create_parser = sp.add_parser('create', help=mirror_create.__doc__) create_parser.add_argument('-d', '--directory', default=None, help="directory in which to create mirror") + create_parser.add_argument( 'specs', nargs=argparse.REMAINDER, help="specs of packages to put in mirror") + create_parser.add_argument( + '-a', '--all', action='store_true', + help="mirror all versions of all packages in Spack, or all packages" + " in the current environment if there is an active environment" + " (this requires significant time and space)") create_parser.add_argument( '-f', '--file', help="file with specs of packages to put in mirror") + create_parser.add_argument( '-D', '--dependencies', action='store_true', help="also fetch all dependencies") create_parser.add_argument( - '-n', '--versions-per-spec', type=int, - default=1, - help="the number of versions to fetch for each spec") + '-n', '--versions-per-spec', + help="the number of versions to fetch for each spec, choose 'all' to" + " retrieve all versions of each package") # used to construct scope arguments below scopes = spack.config.scopes() @@ -225,6 +232,25 @@ def _read_specs_from_file(filename): def mirror_create(args): """Create a directory to be used as a spack mirror, and fill it with package archives.""" + if args.specs and args.all: + raise SpackError("Cannot specify specs on command line if you" + " chose to mirror all specs with '--all'") + elif args.file and args.all: + raise SpackError("Cannot specify specs with a file ('-f') if you" + " chose to mirror all specs with '--all'") + + if not args.versions_per_spec: + num_versions = 1 + elif args.versions_per_spec == 'all': + num_versions = 'all' + else: + try: + num_versions = int(args.versions_per_spec) + except ValueError: + raise SpackError( + "'--versions-per-spec' must be a number or 'all'," + " got '{0}'".format(args.versions_per_spec)) + # try to parse specs from the command line first. with spack.concretize.disable_compiler_existence_check(): specs = spack.cmd.parse_specs(args.specs, concretize=True) @@ -235,56 +261,67 @@ def mirror_create(args): tty.die("Cannot pass specs on the command line with --file.") specs = _read_specs_from_file(args.file) - # If nothing is passed, use environment or all if no active env if not specs: + # If nothing is passed, use environment or all if no active env + if not args.all: + tty.die("No packages were specified.", + "To mirror all packages, use the '--all' option" + " (this will require significant time and space).") + env = ev.get_env(args, 'mirror') if env: - specs = env.specs_by_hash.values() + mirror_specs = env.specs_by_hash.values() else: specs = [Spec(n) for n in spack.repo.all_package_names()] - specs.sort(key=lambda s: s.format("{name}{@version}").lower()) - - # If the user asked for dependencies, traverse spec DAG get them. - if args.dependencies: - new_specs = set() - for spec in specs: - spec.concretize() - for s in spec.traverse(): - new_specs.add(s) - specs = list(new_specs) - - # Skip external specs, as they are already installed - external_specs = [s for s in specs if s.external] - specs = [s for s in specs if not s.external] - - for spec in external_specs: - msg = 'Skipping {0} as it is an external spec.' - tty.msg(msg.format(spec.cshort_spec)) - - mirror = spack.mirror.Mirror( - args.directory or spack.config.get('config:source_cache')) - - directory = url_util.format(mirror.push_url) - - # Make sure nothing is in the way. - existed = web_util.url_exists(directory) - - # Actually do the work to create the mirror - present, mirrored, error = spack.mirror.create( - directory, specs, num_versions=args.versions_per_spec) - p, m, e = len(present), len(mirrored), len(error) - - verb = "updated" if existed else "created" - tty.msg( - "Successfully %s mirror in %s" % (verb, directory), - "Archive stats:", - " %-4d already present" % p, - " %-4d added" % m, - " %-4d failed to fetch." % e) - if error: - tty.error("Failed downloads:") - colify(s.cformat("{name}{@version}") for s in error) - sys.exit(1) + mirror_specs = spack.mirror.get_all_versions(specs) + mirror_specs.sort( + key=lambda s: (s.name, s.version)) + else: + # If the user asked for dependencies, traverse spec DAG get them. + if args.dependencies: + new_specs = set() + for spec in specs: + spec.concretize() + for s in spec.traverse(): + new_specs.add(s) + specs = list(new_specs) + + # Skip external specs, as they are already installed + external_specs = [s for s in specs if s.external] + specs = [s for s in specs if not s.external] + + for spec in external_specs: + msg = 'Skipping {0} as it is an external spec.' + tty.msg(msg.format(spec.cshort_spec)) + + if num_versions == 'all': + mirror_specs = spack.mirror.get_all_versions(specs) + else: + mirror_specs = spack.mirror.get_matching_versions( + specs, num_versions=num_versions) + + mirror = spack.mirror.Mirror( + args.directory or spack.config.get('config:source_cache')) + + directory = url_util.format(mirror.push_url) + + existed = web_util.url_exists(directory) + + # Actually do the work to create the mirror + present, mirrored, error = spack.mirror.create(directory, mirror_specs) + p, m, e = len(present), len(mirrored), len(error) + + verb = "updated" if existed else "created" + tty.msg( + "Successfully %s mirror in %s" % (verb, directory), + "Archive stats:", + " %-4d already present" % p, + " %-4d added" % m, + " %-4d failed to fetch." % e) + if error: + tty.error("Failed downloads:") + colify(s.cformat("{name}{@version}") for s in error) + sys.exit(1) def mirror(parser, args): diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 8105402f99..393e3af9d1 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -166,11 +166,23 @@ class FetchStrategy(with_metaclass(FSMeta, object)): def source_id(self): """A unique ID for the source. + It is intended that a human could easily generate this themselves using + the information available to them in the Spack package. + The returned value is added to the content which determines the full hash for a package using `str()`. """ raise NotImplementedError + def mirror_id(self): + """This is a unique ID for a source that is intended to help identify + reuse of resources across packages. + + It is unique like source-id, but it does not include the package name + and is not necessarily easy for a human to create themselves. + """ + raise NotImplementedError + def __str__(self): # Should be human readable URL. return "FetchStrategy.__str___" @@ -273,6 +285,15 @@ class URLFetchStrategy(FetchStrategy): def source_id(self): return self.digest + def mirror_id(self): + if not self.digest: + return None + # The filename is the digest. A directory is also created based on + # truncating the digest to avoid creating a directory with too many + # entries + return os.path.sep.join( + ['archive', self.digest[:2], self.digest]) + @_needs_stage def fetch(self): if self.archive_file: @@ -727,6 +748,13 @@ class GitFetchStrategy(VCSFetchStrategy): def source_id(self): return self.commit or self.tag + def mirror_id(self): + repo_ref = self.commit or self.tag or self.branch + if repo_ref: + repo_path = url_util.parse(self.url).path + result = os.path.sep.join(['git', repo_path, repo_ref]) + return result + def get_source_id(self): if not self.branch: return @@ -908,6 +936,12 @@ class SvnFetchStrategy(VCSFetchStrategy): info = xml.etree.ElementTree.fromstring(output) return info.find('entry/commit').get('revision') + def mirror_id(self): + if self.revision: + repo_path = url_util.parse(self.url).path + result = os.path.sep.join(['svn', repo_path, self.revision]) + return result + @_needs_stage def fetch(self): if self.stage.expanded: @@ -1011,6 +1045,12 @@ class HgFetchStrategy(VCSFetchStrategy): def source_id(self): return self.revision + def mirror_id(self): + if self.revision: + repo_path = url_util.parse(self.url).path + result = os.path.sep.join(['hg', repo_path, self.revision]) + return result + def get_source_id(self): output = self.hg('id', self.url, output=str) if output: diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index e2329b6861..0c5e2a9cc2 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -13,6 +13,7 @@ to download packages directly from a mirror (e.g., on an intranet). """ import sys import os +import traceback import os.path import operator @@ -37,7 +38,6 @@ import spack.util.spack_yaml as syaml import spack.util.url as url_util import spack.spec from spack.version import VersionList -from spack.util.compression import allowed_archive from spack.util.spack_yaml import syaml_dict @@ -233,27 +233,16 @@ class MirrorCollection(Mapping): return len(self._mirrors) -def mirror_archive_filename(spec, fetcher, resource_id=None): - """Get the name of the spec's archive in the mirror.""" - if not spec.version.concrete: - raise ValueError("mirror.path requires spec with concrete version.") - +def _determine_extension(fetcher): if isinstance(fetcher, fs.URLFetchStrategy): if fetcher.expand_archive: # If we fetch with a URLFetchStrategy, use URL's archive type ext = url.determine_url_file_extension(fetcher.url) - # If the filename does not end with a normal suffix, - # see if the package explicitly declares the extension - if not ext: - ext = spec.package.versions[spec.package.version].get( - 'extension', None) - if ext: # Remove any leading dots ext = ext.lstrip('.') - - if not ext: + else: msg = """\ Unable to parse extension from {0}. @@ -276,21 +265,92 @@ Spack not to expand it with the following syntax: # Otherwise we'll make a .tar.gz ourselves ext = 'tar.gz' - if resource_id: - filename = "%s-%s" % (resource_id, spec.version) + ".%s" % ext - else: - filename = "%s-%s" % (spec.package.name, spec.version) + ".%s" % ext + return ext + + +class MirrorReference(object): + """A ``MirrorReference`` stores the relative paths where you can store a + package/resource in a mirror directory. - return filename + The appropriate storage location is given by ``storage_path``. The + ``cosmetic_path`` property provides a reference that a human could generate + themselves based on reading the details of the package. + + A user can iterate over a ``MirrorReference`` object to get all the + possible names that might be used to refer to the resource in a mirror; + this includes names generated by previous naming schemes that are no-longer + reported by ``storage_path`` or ``cosmetic_path``. + """ + def __init__(self, cosmetic_path, global_path=None): + self.global_path = global_path + self.cosmetic_path = cosmetic_path + + @property + def storage_path(self): + if self.global_path: + return self.global_path + else: + return self.cosmetic_path + def __iter__(self): + if self.global_path: + yield self.global_path + yield self.cosmetic_path + + +def mirror_archive_paths(fetcher, per_package_ref, spec=None): + """Returns a ``MirrorReference`` object which keeps track of the relative + storage path of the resource associated with the specified ``fetcher``.""" + ext = None + if spec: + ext = spec.package.versions[spec.package.version].get( + 'extension', None) + # If the spec does not explicitly specify an extension (the default case), + # then try to determine it automatically. An extension can only be + # specified for the primary source of the package (e.g. the source code + # identified in the 'version' declaration). Resources/patches don't have + # an option to specify an extension, so it must be inferred for those. + ext = ext or _determine_extension(fetcher) + + if ext: + per_package_ref += ".%s" % ext + + global_ref = fetcher.mirror_id() + if global_ref: + global_ref = os.path.join('_source-cache', global_ref) + if global_ref and ext: + global_ref += ".%s" % ext + + return MirrorReference(per_package_ref, global_ref) + + +def get_all_versions(specs): + """Given a set of initial specs, return a new set of specs that includes + each version of each package in the original set. + + Note that if any spec in the original set specifies properties other than + version, this information will be omitted in the new set; for example; the + new set of specs will not include variant settings. + """ + + version_specs = [] + for spec in specs: + pkg = spec.package + + # Skip any package that has no known versions. + if not pkg.versions: + tty.msg("No safe (checksummed) versions for package %s" % pkg.name) + continue -def mirror_archive_path(spec, fetcher, resource_id=None): - """Get the relative path to the spec's archive within a mirror.""" - return os.path.join( - spec.name, mirror_archive_filename(spec, fetcher, resource_id)) + for version in pkg.versions: + version_spec = spack.spec.Spec(pkg.name) + version_spec.versions = VersionList([version]) + version_specs.append(version_spec) + return version_specs -def get_matching_versions(specs, **kwargs): + +def get_matching_versions(specs, num_versions=1): """Get a spec for EACH known version matching any spec in the list. For concrete specs, this retrieves the concrete version and, if more than one version per spec is requested, retrieves the latest versions @@ -305,7 +365,7 @@ def get_matching_versions(specs, **kwargs): tty.msg("No safe (checksummed) versions for package %s" % pkg.name) continue - pkg_versions = kwargs.get('num_versions', 1) + pkg_versions = num_versions version_order = list(reversed(sorted(pkg.versions))) matching_spec = [] @@ -338,19 +398,7 @@ def get_matching_versions(specs, **kwargs): return matching -def suggest_archive_basename(resource): - """Return a tentative basename for an archive. - - Raises: - RuntimeError: if the name is not an allowed archive type. - """ - basename = os.path.basename(resource.fetcher.url) - if not allowed_archive(basename): - raise RuntimeError("%s is not an allowed archive tye" % basename) - return basename - - -def create(path, specs, **kwargs): +def create(path, specs): """Create a directory to be used as a spack mirror, and fill it with package archives. @@ -359,10 +407,6 @@ def create(path, specs, **kwargs): specs: Any package versions matching these specs will be added \ to the mirror. - Keyword args: - num_versions: Max number of versions to fetch per spec, \ - (default is 1 each spec) - Return Value: Returns a tuple of lists: (present, mirrored, error) @@ -376,69 +420,107 @@ def create(path, specs, **kwargs): """ parsed = url_util.parse(path) mirror_root = url_util.local_file_path(parsed) - - # Make sure nothing is in the way. - if mirror_root and os.path.isfile(mirror_root): - raise MirrorError("%s already exists and is a file." % mirror_root) + if not mirror_root: + raise spack.error.SpackError( + 'MirrorCaches only work with file:// URLs') # automatically spec-ify anything in the specs array. specs = [ s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s) for s in specs] - # Get concrete specs for each matching version of these specs. - version_specs = get_matching_versions( - specs, num_versions=kwargs.get('num_versions', 1)) - for s in version_specs: - s.concretize() - # Get the absolute path of the root before we start jumping around. - if mirror_root and not os.path.isdir(mirror_root): + if not os.path.isdir(mirror_root): try: mkdirp(mirror_root) except OSError as e: raise MirrorError( "Cannot create directory '%s':" % mirror_root, str(e)) - # Things to keep track of while parsing specs. - categories = { - 'present': [], - 'mirrored': [], - 'error': [] - } - - mirror_cache = spack.caches.MirrorCache(parsed) + mirror_cache = spack.caches.MirrorCache(mirror_root) + mirror_stats = MirrorStats() try: spack.caches.mirror_cache = mirror_cache # Iterate through packages and download all safe tarballs for each - for spec in version_specs: - add_single_spec(spec, parsed, categories, **kwargs) + for spec in specs: + mirror_stats.next_spec(spec) + add_single_spec(spec, mirror_root, mirror_stats) finally: spack.caches.mirror_cache = None - categories['mirrored'] = list(mirror_cache.new_resources) - categories['present'] = list(mirror_cache.existing_resources) + return mirror_stats.stats() + - return categories['present'], categories['mirrored'], categories['error'] +class MirrorStats(object): + def __init__(self): + self.present = {} + self.new = {} + self.errors = set() + self.current_spec = None + self.added_resources = set() + self.existing_resources = set() -def add_single_spec(spec, mirror_root, categories, **kwargs): + def next_spec(self, spec): + self._tally_current_spec() + self.current_spec = spec + + def _tally_current_spec(self): + if self.current_spec: + if self.added_resources: + self.new[self.current_spec] = len(self.added_resources) + if self.existing_resources: + self.present[self.current_spec] = len(self.existing_resources) + self.added_resources = set() + self.existing_resources = set() + self.current_spec = None + + def stats(self): + self._tally_current_spec() + return list(self.present), list(self.new), list(self.errors) + + def already_existed(self, resource): + # If an error occurred after caching a subset of a spec's + # resources, a secondary attempt may consider them already added + if resource not in self.added_resources: + self.existing_resources.add(resource) + + def added(self, resource): + self.added_resources.add(resource) + + def error(self): + self.errors.add(self.current_spec) + + +def add_single_spec(spec, mirror_root, mirror_stats): tty.msg("Adding package {pkg} to mirror".format( pkg=spec.format("{name}{@version}") )) - try: - spec.package.do_fetch() - spec.package.do_clean() - - except Exception as e: - tty.debug(e) + num_retries = 3 + while num_retries > 0: + try: + with spec.package.stage as pkg_stage: + pkg_stage.cache_mirror(mirror_stats) + for patch in spec.package.all_patches(): + patch.fetch(pkg_stage) + if patch.cache(): + patch.cache().cache_mirror(mirror_stats) + patch.clean() + exception = None + break + except Exception as e: + exc_tuple = sys.exc_info() + exception = e + num_retries -= 1 + + if exception: if spack.config.get('config:debug'): - sys.excepthook(*sys.exc_info()) + traceback.print_exception(file=sys.stderr, *exc_tuple) else: tty.warn( "Error while fetching %s" % spec.cformat('{name}{@version}'), - e.message) - categories['error'].append(spec) + exception.message) + mirror_stats.error() class MirrorError(spack.error.SpackError): diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 8a65d7b733..38631c7a0e 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -663,7 +663,8 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): @property def version(self): if not self.spec.versions.concrete: - raise ValueError("Can only get of package with concrete version.") + raise ValueError("Version requested for a package that" + " does not have a concrete version.") return self.spec.versions[0] @memoized @@ -741,19 +742,23 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): def _make_resource_stage(self, root_stage, fetcher, resource): resource_stage_folder = self._resource_stage(resource) - resource_mirror = spack.mirror.mirror_archive_path( - self.spec, fetcher, resource.name) + mirror_paths = spack.mirror.mirror_archive_paths( + fetcher, + os.path.join(self.name, "%s-%s" % (resource.name, self.version))) stage = ResourceStage(resource.fetcher, root=root_stage, resource=resource, name=resource_stage_folder, - mirror_path=resource_mirror, + mirror_paths=mirror_paths, path=self.path) return stage def _make_root_stage(self, fetcher): # Construct a mirror path (TODO: get this out of package.py) - mp = spack.mirror.mirror_archive_path(self.spec, fetcher) + mirror_paths = spack.mirror.mirror_archive_paths( + fetcher, + os.path.join(self.name, "%s-%s" % (self.name, self.version)), + self.spec) # Construct a path where the stage should build.. s = self.spec stage_name = "{0}{1}-{2}-{3}".format(stage_prefix, s.name, s.version, @@ -763,8 +768,8 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): dynamic_fetcher = fs.from_list_url(self) return [dynamic_fetcher] if dynamic_fetcher else [] - stage = Stage(fetcher, mirror_path=mp, name=stage_name, path=self.path, - search_fn=download_search) + stage = Stage(fetcher, mirror_paths=mirror_paths, name=stage_name, + path=self.path, search_fn=download_search) return stage def _make_stage(self): @@ -794,8 +799,9 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): doesn't have one yet, but it does not create the Stage directory on the filesystem. """ - if not self.spec.concrete: - raise ValueError("Can only get a stage for a concrete package.") + if not self.spec.versions.concrete: + raise ValueError( + "Cannot retrieve stage for package without concrete version.") if self._stage is None: self._stage = self._make_stage() return self._stage @@ -873,8 +879,8 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): @property def fetcher(self): if not self.spec.versions.concrete: - raise ValueError( - "Can only get a fetcher for a package with concrete versions.") + raise ValueError("Cannot retrieve fetcher for" + " package without concrete version.") if not self._fetcher: self._fetcher = self._make_fetcher() return self._fetcher @@ -1081,6 +1087,8 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): for patch in self.spec.patches: patch.fetch(self.stage) + if patch.cache(): + patch.cache().cache_local() def do_stage(self, mirror_only=False): """Unpacks and expands the fetched tarball.""" @@ -1193,6 +1201,26 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): else: touch(no_patches_file) + @classmethod + def all_patches(cls): + """Retrieve all patches associated with the package. + + Retrieves patches on the package itself as well as patches on the + dependencies of the package.""" + patches = [] + for _, patch_list in cls.patches.items(): + for patch in patch_list: + patches.append(patch) + + pkg_deps = cls.dependencies + for dep_name in pkg_deps: + for _, dependency in pkg_deps[dep_name].items(): + for _, patch_list in dependency.patches.items(): + for patch in patch_list: + patches.append(patch) + + return patches + def content_hash(self, content=None): """Create a hash based on the sources and logic used to build the package. This includes the contents of all applied patches and the @@ -1366,9 +1394,21 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): def _get_needed_resources(self): resources = [] # Select the resources that are needed for this build - for when_spec, resource_list in self.resources.items(): - if when_spec in self.spec: - resources.extend(resource_list) + if self.spec.concrete: + for when_spec, resource_list in self.resources.items(): + if when_spec in self.spec: + resources.extend(resource_list) + else: + for when_spec, resource_list in self.resources.items(): + # Note that variant checking is always strict for specs where + # the name is not specified. But with strict variant checking, + # only variants mentioned in 'other' are checked. Here we only + # want to make sure that no constraints in when_spec + # conflict with the spec, so we need to invoke + # when_spec.satisfies(self.spec) vs. + # self.spec.satisfies(when_spec) + if when_spec.satisfies(self.spec, strict=False): + resources.extend(resource_list) # Sorts the resources by the length of the string representing their # destination. Since any nested resource must contain another # resource's name in its path, it seems that should work diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index cb012af7ff..79550538db 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -16,6 +16,7 @@ import spack.fetch_strategy as fs import spack.repo import spack.stage import spack.util.spack_json as sjson +import spack from spack.util.compression import allowed_archive from spack.util.crypto import checksum, Checker @@ -87,6 +88,9 @@ class Patch(object): apply_patch(stage, self.path, self.level, self.working_dir) + def cache(self): + return None + def to_dict(self): """Partial dictionary -- subclases should add to this.""" return { @@ -180,6 +184,7 @@ class UrlPatch(Patch): if not self.sha256: raise PatchDirectiveError("URL patches require a sha256 checksum") + # TODO: this function doesn't use the stage arg def fetch(self, stage): """Retrieve the patch in a temporary stage and compute self.path @@ -191,15 +196,19 @@ class UrlPatch(Patch): if self.archive_sha256: fetch_digest = self.archive_sha256 - fetcher = fs.URLFetchStrategy(self.url, fetch_digest) - mirror = os.path.join(os.path.dirname(stage.mirror_path), - os.path.basename(self.url)) + fetcher = fs.URLFetchStrategy(self.url, fetch_digest, + expand=bool(self.archive_sha256)) + + per_package_ref = os.path.join( + self.owner.split('.')[-1], os.path.basename(self.url)) + # Reference starting with "spack." is required to avoid cyclic imports + mirror_ref = spack.mirror.mirror_archive_paths( + fetcher, per_package_ref) - self.stage = spack.stage.Stage(fetcher, mirror_path=mirror) + self.stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref) self.stage.create() self.stage.fetch() self.stage.check() - self.stage.cache_local() root = self.stage.path if self.archive_sha256: @@ -230,6 +239,9 @@ class UrlPatch(Patch): "sha256 checksum failed for %s" % self.path, "Expected %s but got %s" % (self.sha256, checker.sum)) + def cache(self): + return self.stage + def clean(self): self.stage.destroy() diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 9621938bcd..7869c5f863 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -166,6 +166,14 @@ def get_stage_root(): return _stage_root +def _mirror_roots(): + mirrors = spack.config.get('mirrors') + return [ + sup.substitute_path_variables(root) if root.endswith(os.sep) + else sup.substitute_path_variables(root) + os.sep + for root in mirrors.values()] + + class Stage(object): """Manages a temporary stage directory for building. @@ -216,7 +224,7 @@ class Stage(object): def __init__( self, url_or_fetch_strategy, - name=None, mirror_path=None, keep=False, path=None, lock=True, + name=None, mirror_paths=None, keep=False, path=None, lock=True, search_fn=None): """Create a stage object. Parameters: @@ -230,10 +238,10 @@ class Stage(object): stage object later). If name is not provided, then this stage will be given a unique name automatically. - mirror_path + mirror_paths If provided, Stage will search Spack's mirrors for - this archive at the mirror_path, before using the - default fetch strategy. + this archive at each of the provided relative mirror paths + before using the default fetch strategy. keep By default, when used as a context manager, the Stage @@ -276,7 +284,7 @@ class Stage(object): self.name = name if name is None: self.name = stage_prefix + next(tempfile._get_candidate_names()) - self.mirror_path = mirror_path + self.mirror_paths = mirror_paths # Use the provided path or construct an optionally named stage path. if path is not None: @@ -350,8 +358,8 @@ class Stage(object): expanded = self.default_fetcher.expand_archive fnames.append(os.path.basename(self.default_fetcher.url)) - if self.mirror_path: - fnames.append(os.path.basename(self.mirror_path)) + if self.mirror_paths: + fnames.extend(os.path.basename(x) for x in self.mirror_paths) paths.extend(os.path.join(self.path, f) for f in fnames) if not expanded: @@ -399,10 +407,14 @@ class Stage(object): # TODO: Or @alalazo may have some ideas about how to use a # TODO: CompositeFetchStrategy here. self.skip_checksum_for_mirror = True - if self.mirror_path: - urls = [ - url_util.join(mirror.fetch_url, self.mirror_path) - for mirror in spack.mirror.MirrorCollection().values()] + if self.mirror_paths: + # Join URLs of mirror roots with mirror paths. Because + # urljoin() will strip everything past the final '/' in + # the root, so we add a '/' if it is not present. + urls = [] + for mirror in spack.mirror.MirrorCollection().values(): + for rel_path in self.mirror_paths: + urls.append(url_util.join(mirror.fetch_url, rel_path)) # If this archive is normally fetched from a tarball URL, # then use the same digest. `spack mirror` ensures that @@ -428,10 +440,11 @@ class Stage(object): # url, digest, expand=expand, extension=extension)) if self.default_fetcher.cachable: - fetchers.insert( - 0, spack.caches.fetch_cache.fetcher( - self.mirror_path, digest, expand=expand, - extension=extension)) + for rel_path in reversed(list(self.mirror_paths)): + cache_fetcher = spack.caches.fetch_cache.fetcher( + rel_path, digest, expand=expand, + extension=extension) + fetchers.insert(0, cache_fetcher) def generate_fetchers(): for fetcher in fetchers: @@ -476,10 +489,24 @@ class Stage(object): self.fetcher.check() def cache_local(self): - spack.caches.fetch_cache.store(self.fetcher, self.mirror_path) + spack.caches.fetch_cache.store( + self.fetcher, self.mirror_paths.storage_path) + + def cache_mirror(self, stats): + """Perform a fetch if the resource is not already cached""" + dst_root = spack.caches.mirror_cache.root + absolute_storage_path = os.path.join( + dst_root, self.mirror_paths.storage_path) - if spack.caches.mirror_cache: - spack.caches.mirror_cache.store(self.fetcher, self.mirror_path) + if os.path.exists(absolute_storage_path): + stats.already_existed(absolute_storage_path) + return + + self.fetch() + spack.caches.mirror_cache.store( + self.fetcher, self.mirror_paths.storage_path, + self.mirror_paths.cosmetic_path) + stats.added(absolute_storage_path) def expand_archive(self): """Changes to the stage directory and attempt to expand the downloaded @@ -591,7 +618,7 @@ class ResourceStage(Stage): @pattern.composite(method_list=[ 'fetch', 'create', 'created', 'check', 'expand_archive', 'restage', - 'destroy', 'cache_local', 'managed_by_spack']) + 'destroy', 'cache_local', 'cache_mirror', 'managed_by_spack']) class StageComposite: """Composite for Stage type objects. The first item in this composite is considered to be the root package, and operations that return a value are @@ -629,10 +656,6 @@ class StageComposite: def archive_file(self): return self[0].archive_file - @property - def mirror_path(self): - return self[0].mirror_path - class DIYStage(object): """ diff --git a/lib/spack/spack/test/cmd/mirror.py b/lib/spack/spack/test/cmd/mirror.py index 266aa36a4b..889d81f98b 100644 --- a/lib/spack/spack/test/cmd/mirror.py +++ b/lib/spack/spack/test/cmd/mirror.py @@ -37,7 +37,7 @@ def test_mirror_from_env(tmpdir, mock_packages, mock_fetch, config, add('git-test') concretize() with spack.config.override('config:checksum', False): - mirror('create', '-d', mirror_dir) + mirror('create', '-d', mirror_dir, '--all') e = ev.read(env_name) assert set(os.listdir(mirror_dir)) == set([s.name for s in e.user_specs]) diff --git a/lib/spack/spack/test/install.py b/lib/spack/spack/test/install.py index 571e9fcd58..4bed12456a 100644 --- a/lib/spack/spack/test/install.py +++ b/lib/spack/spack/test/install.py @@ -460,11 +460,11 @@ def test_unconcretized_install(install_mockery, mock_fetch, mock_packages): with pytest.raises(ValueError, match="only install concrete packages"): spec.package.do_install() - with pytest.raises(ValueError, match="fetch concrete packages"): + with pytest.raises(ValueError, match="only fetch concrete packages"): spec.package.do_fetch() - with pytest.raises(ValueError, match="stage concrete packages"): + with pytest.raises(ValueError, match="only stage concrete packages"): spec.package.do_stage() - with pytest.raises(ValueError, match="patch concrete packages"): + with pytest.raises(ValueError, match="only patch concrete packages"): spec.package.do_patch() diff --git a/lib/spack/spack/test/mirror.py b/lib/spack/spack/test/mirror.py index 2f80bae408..da40cfac37 100644 --- a/lib/spack/spack/test/mirror.py +++ b/lib/spack/spack/test/mirror.py @@ -52,46 +52,50 @@ def check_mirror(): mirrors = {'spack-mirror-test': 'file://' + mirror_root} spack.config.set('mirrors', mirrors) with spack.config.override('config:checksum', False): - spack.mirror.create(mirror_root, repos) + specs = [Spec(x).concretized() for x in repos] + spack.mirror.create(mirror_root, specs) # Stage directory exists assert os.path.isdir(mirror_root) - # check that there are subdirs for each package - for name in repos: - subdir = os.path.join(mirror_root, name) - assert os.path.isdir(subdir) - - files = os.listdir(subdir) - assert len(files) == 1 - - # Now try to fetch each package. - for name, mock_repo in repos.items(): - spec = Spec(name).concretized() - pkg = spec.package - - with spack.config.override('config:checksum', False): - with pkg.stage: - pkg.do_stage(mirror_only=True) - - # Compare the original repo with the expanded archive - original_path = mock_repo.path - if 'svn' in name: - # have to check out the svn repo to compare. - original_path = os.path.join( - mock_repo.path, 'checked_out') - - svn = which('svn', required=True) - svn('checkout', mock_repo.url, original_path) - - dcmp = filecmp.dircmp( - original_path, pkg.stage.source_path) - - # make sure there are no new files in the expanded - # tarball - assert not dcmp.right_only - # and that all original files are present. - assert all(l in exclude for l in dcmp.left_only) + for spec in specs: + fetcher = spec.package.fetcher[0] + per_package_ref = os.path.join( + spec.name, '-'.join([spec.name, str(spec.version)])) + mirror_paths = spack.mirror.mirror_archive_paths( + fetcher, + per_package_ref) + expected_path = os.path.join( + mirror_root, mirror_paths.storage_path) + assert os.path.exists(expected_path) + + # Now try to fetch each package. + for name, mock_repo in repos.items(): + spec = Spec(name).concretized() + pkg = spec.package + + with spack.config.override('config:checksum', False): + with pkg.stage: + pkg.do_stage(mirror_only=True) + + # Compare the original repo with the expanded archive + original_path = mock_repo.path + if 'svn' in name: + # have to check out the svn repo to compare. + original_path = os.path.join( + mock_repo.path, 'checked_out') + + svn = which('svn', required=True) + svn('checkout', mock_repo.url, original_path) + + dcmp = filecmp.dircmp( + original_path, pkg.stage.source_path) + + # make sure there are no new files in the expanded + # tarball + assert not dcmp.right_only + # and that all original files are present. + assert all(l in exclude for l in dcmp.left_only) def test_url_mirror(mock_archive): @@ -148,7 +152,7 @@ def test_mirror_with_url_patches(mock_packages, config, monkeypatch): files_cached_in_mirror = set() - def record_store(_class, fetcher, relative_dst): + def record_store(_class, fetcher, relative_dst, cosmetic_path=None): files_cached_in_mirror.add(os.path.basename(relative_dst)) def successful_fetch(_class): @@ -178,5 +182,7 @@ def test_mirror_with_url_patches(mock_packages, config, monkeypatch): with spack.config.override('config:checksum', False): spack.mirror.create(mirror_root, list(spec.traverse())) - assert not (set(['urlpatch.patch', 'urlpatch2.patch.gz']) - - files_cached_in_mirror) + assert not (set([ + 'abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234', + 'abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd.gz' # NOQA: ignore=E501 + ]) - files_cached_in_mirror) diff --git a/lib/spack/spack/test/packaging.py b/lib/spack/spack/test/packaging.py index e865cac32e..9c0396ef5e 100644 --- a/lib/spack/spack/test/packaging.py +++ b/lib/spack/spack/test/packaging.py @@ -92,9 +92,7 @@ echo $PATH""" # Create the build cache and # put it directly into the mirror mirror_path = os.path.join(str(tmpdir), 'test-mirror') - spack.mirror.create( - mirror_path, specs=[], no_checksum=True - ) + spack.mirror.create(mirror_path, specs=[]) # register mirror with spack config mirrors = {'spack-mirror-test': 'file://' + mirror_path} -- cgit v1.2.3-70-g09d2