diff options
Diffstat (limited to 'lib/spack/spack/repository.py')
-rw-r--r-- | lib/spack/spack/repository.py | 437 |
1 files changed, 308 insertions, 129 deletions
diff --git a/lib/spack/spack/repository.py b/lib/spack/spack/repository.py index 1536ecb0e6..e38af165f9 100644 --- a/lib/spack/spack/repository.py +++ b/lib/spack/spack/repository.py @@ -1,13 +1,13 @@ ############################################################################## -# Copyright (c) 2013-2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2013-2017, Lawrence Livermore National Security, LLC. # Produced at the Lawrence Livermore National Laboratory. # # This file is part of Spack. # Created by Todd Gamblin, tgamblin@llnl.gov, All rights reserved. # LLNL-CODE-647188 # -# For details, see https://github.com/llnl/spack -# Please also see the LICENSE file for our notice and the LGPL. +# For details, see https://github.com/spack/spack +# Please also see the NOTICE and LICENSE files for our notice and the LGPL. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License (as @@ -22,30 +22,38 @@ # License along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ############################################################################## +import collections import os import stat import shutil import errno -import exceptions import sys import inspect import imp import re import traceback -from bisect import bisect_left +import json + +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping + from types import ModuleType import yaml +import llnl.util.lang import llnl.util.tty as tty -from llnl.util.filesystem import * +from llnl.util.filesystem import mkdirp, join_path, install import spack import spack.error import spack.spec from spack.provider_index import ProviderIndex from spack.util.path import canonicalize_path -from spack.util.naming import * +from spack.util.naming import NamespaceTrie, valid_module_name +from spack.util.naming import mod_to_class, possible_spack_module_names # # Super-namespace for all packages. @@ -94,6 +102,242 @@ class SpackNamespace(ModuleType): return getattr(self, name) +class FastPackageChecker(Mapping): + """Cache that maps package names to the stats obtained on the + 'package.py' files associated with them. + + For each repository a cache is maintained at class level, and shared among + all instances referring to it. Update of the global cache is done lazily + during instance initialization. + """ + #: Global cache, reused by every instance + _paths_cache = {} + + def __init__(self, packages_path): + + #: The path of the repository managed by this instance + self.packages_path = packages_path + + # If the cache we need is not there yet, then build it appropriately + if packages_path not in self._paths_cache: + self._paths_cache[packages_path] = self._create_new_cache() + + #: Reference to the appropriate entry in the global cache + self._packages_to_stats = self._paths_cache[packages_path] + + def _create_new_cache(self): + """Create a new cache for packages in a repo. + + The implementation here should try to minimize filesystem + calls. At the moment, it is O(number of packages) and makes + about one stat call per package. This is reasonably fast, and + avoids actually importing packages in Spack, which is slow. + """ + # Create a dictionary that will store the mapping between a + # package name and its stat info + cache = {} + for pkg_name in os.listdir(self.packages_path): + # Skip non-directories in the package root. + pkg_dir = join_path(self.packages_path, pkg_name) + + # Warn about invalid names that look like packages. + if not valid_module_name(pkg_name): + msg = 'Skipping package at {0}. ' + msg += '"{1}" is not a valid Spack module name.' + tty.warn(msg.format(pkg_dir, pkg_name)) + continue + + # Construct the file name from the directory + pkg_file = os.path.join( + self.packages_path, pkg_name, package_file_name + ) + + # Use stat here to avoid lots of calls to the filesystem. + try: + sinfo = os.stat(pkg_file) + except OSError as e: + if e.errno == errno.ENOENT: + # No package.py file here. + continue + elif e.errno == errno.EACCES: + tty.warn("Can't read package file %s." % pkg_file) + continue + raise e + + # If it's not a file, skip it. + if stat.S_ISDIR(sinfo.st_mode): + continue + + # If it is a file, then save the stats under the + # appropriate key + cache[pkg_name] = sinfo + + return cache + + def __getitem__(self, item): + return self._packages_to_stats[item] + + def __iter__(self): + return iter(self._packages_to_stats) + + def __len__(self): + return len(self._packages_to_stats) + + +class TagIndex(Mapping): + """Maps tags to list of packages.""" + + def __init__(self): + self._tag_dict = collections.defaultdict(list) + + def to_json(self, stream): + json.dump({'tags': self._tag_dict}, stream) + + @staticmethod + def from_json(stream): + d = json.load(stream) + + r = TagIndex() + + for tag, list in d['tags'].items(): + r[tag].extend(list) + + return r + + def __getitem__(self, item): + return self._tag_dict[item] + + def __iter__(self): + return iter(self._tag_dict) + + def __len__(self): + return len(self._tag_dict) + + def update_package(self, pkg_name): + """Updates a package in the tag index. + + Args: + pkg_name (str): name of the package to be removed from the index + + """ + + package = spack.repo.get(pkg_name) + + # Remove the package from the list of packages, if present + for pkg_list in self._tag_dict.values(): + if pkg_name in pkg_list: + pkg_list.remove(pkg_name) + + # Add it again under the appropriate tags + for tag in getattr(package, 'tags', []): + self._tag_dict[tag].append(package.name) + + +@llnl.util.lang.memoized +def make_provider_index_cache(packages_path, namespace): + """Lazily updates the provider index cache associated with a repository, + if need be, then returns it. Caches results for later look-ups. + + Args: + packages_path: path of the repository + namespace: namespace of the repository + + Returns: + instance of ProviderIndex + """ + # Map that goes from package names to stat info + fast_package_checker = FastPackageChecker(packages_path) + + # Filename of the provider index cache + cache_filename = 'providers/{0}-index.yaml'.format(namespace) + + # Compute which packages needs to be updated in the cache + index_mtime = spack.misc_cache.mtime(cache_filename) + + needs_update = [ + x for x, sinfo in fast_package_checker.items() + if sinfo.st_mtime > index_mtime + ] + + # Read the old ProviderIndex, or make a new one. + index_existed = spack.misc_cache.init_entry(cache_filename) + + if index_existed and not needs_update: + + # If the provider index exists and doesn't need an update + # just read from it + with spack.misc_cache.read_transaction(cache_filename) as f: + index = ProviderIndex.from_yaml(f) + + else: + + # Otherwise we need a write transaction to update it + with spack.misc_cache.write_transaction(cache_filename) as (old, new): + + index = ProviderIndex.from_yaml(old) if old else ProviderIndex() + + for pkg_name in needs_update: + namespaced_name = '{0}.{1}'.format(namespace, pkg_name) + index.remove_provider(namespaced_name) + index.update(namespaced_name) + + index.to_yaml(new) + + return index + + +@llnl.util.lang.memoized +def make_tag_index_cache(packages_path, namespace): + """Lazily updates the tag index cache associated with a repository, + if need be, then returns it. Caches results for later look-ups. + + Args: + packages_path: path of the repository + namespace: namespace of the repository + + Returns: + instance of TagIndex + """ + # Map that goes from package names to stat info + fast_package_checker = FastPackageChecker(packages_path) + + # Filename of the provider index cache + cache_filename = 'tags/{0}-index.json'.format(namespace) + + # Compute which packages needs to be updated in the cache + index_mtime = spack.misc_cache.mtime(cache_filename) + + needs_update = [ + x for x, sinfo in fast_package_checker.items() + if sinfo.st_mtime > index_mtime + ] + + # Read the old ProviderIndex, or make a new one. + index_existed = spack.misc_cache.init_entry(cache_filename) + + if index_existed and not needs_update: + + # If the provider index exists and doesn't need an update + # just read from it + with spack.misc_cache.read_transaction(cache_filename) as f: + index = TagIndex.from_json(f) + + else: + + # Otherwise we need a write transaction to update it + with spack.misc_cache.write_transaction(cache_filename) as (old, new): + + index = TagIndex.from_json(old) if old else TagIndex() + + for pkg_name in needs_update: + namespaced_name = '{0}.{1}'.format(namespace, pkg_name) + index.update_package(namespaced_name) + + index.to_json(new) + + return index + + class RepoPath(object): """A RepoPath is a list of repos that function as one. @@ -221,6 +465,12 @@ class RepoPath(object): self._all_package_names = sorted(all_pkgs, key=lambda n: n.lower()) return self._all_package_names + def packages_with_tags(self, *tags): + r = set() + for repo in self.repos: + r |= set(repo.packages_with_tags(*tags)) + return sorted(r) + def all_packages(self): for name in self.all_package_names(): yield self.get(name) @@ -288,20 +538,29 @@ class RepoPath(object): sys.modules[fullname] = module return module - @_autospec def repo_for_pkg(self, spec): """Given a spec, get the repository for its package.""" + # We don't @_autospec this function b/c it's called very frequently + # and we want to avoid parsing str's into Specs unnecessarily. + namespace = None + if isinstance(spec, spack.spec.Spec): + namespace = spec.namespace + name = spec.name + else: + # handle strings directly for speed instead of @_autospec'ing + namespace, _, name = spec.rpartition('.') + # If the spec already has a namespace, then return the # corresponding repo if we know about it. - if spec.namespace: - fullspace = '%s.%s' % (self.super_namespace, spec.namespace) + if namespace: + fullspace = '%s.%s' % (self.super_namespace, namespace) if fullspace not in self.by_namespace: raise UnknownNamespaceError(spec.namespace) return self.by_namespace[fullspace] # If there's no namespace, search in the RepoPath. for repo in self.repos: - if spec.name in repo: + if name in repo: return repo # If the package isn't in any repo, return the one with @@ -423,21 +682,18 @@ class Repo(object): self._classes = {} self._instances = {} - # list of packages that are newer than the index. - self._needs_update = [] + # Maps that goes from package name to corresponding file stat + self._fast_package_checker = FastPackageChecker(self.packages_path) - # Index of virtual dependencies + # Index of virtual dependencies, computed lazily self._provider_index = None - # Cached list of package names. - self._all_package_names = None + # Index of tags, computed lazily + self._tag_index = None # make sure the namespace for packages in this repo exists. self._create_namespace() - # Unique filename for cache of virtual dependency providers - self._cache_file = 'providers/%s-index.yaml' % self.namespace - def _create_namespace(self): """Create this repo's namespace module and insert it into sys.modules. @@ -558,13 +814,13 @@ class Repo(object): return yaml_data['repo'] - except exceptions.IOError: + except IOError: tty.die("Error reading %s when opening %s" % (self.config_file, self.root)) @_autospec def get(self, spec, new=False): - if spec.virtual: + if not self.exists(spec.name): raise UnknownPackageError(spec.name) if spec.namespace and spec.namespace != self.namespace: @@ -618,41 +874,28 @@ class Repo(object): """Clear entire package instance cache.""" self._instances.clear() - def _update_provider_index(self): - # Check modification dates of all packages - self._fast_package_check() - - def read(): - with open(self.index_file) as f: - self._provider_index = ProviderIndex.from_yaml(f) - - # Read the old ProviderIndex, or make a new one. - key = self._cache_file - index_existed = spack.misc_cache.init_entry(key) - if index_existed and not self._needs_update: - with spack.misc_cache.read_transaction(key) as f: - self._provider_index = ProviderIndex.from_yaml(f) - else: - with spack.misc_cache.write_transaction(key) as (old, new): - if old: - self._provider_index = ProviderIndex.from_yaml(old) - else: - self._provider_index = ProviderIndex() - - for pkg_name in self._needs_update: - namespaced_name = '%s.%s' % (self.namespace, pkg_name) - self._provider_index.remove_provider(namespaced_name) - self._provider_index.update(namespaced_name) - - self._provider_index.to_yaml(new) - @property def provider_index(self): """A provider index with names *specific* to this repo.""" + if self._provider_index is None: - self._update_provider_index() + self._provider_index = make_provider_index_cache( + self.packages_path, self.namespace + ) + return self._provider_index + @property + def tag_index(self): + """A provider index with names *specific* to this repo.""" + + if self._tag_index is None: + self._tag_index = make_tag_index_cache( + self.packages_path, self.namespace + ) + + return self._tag_index + @_autospec def providers_for(self, vpkg_spec): providers = self.provider_index.providers_for(vpkg_spec) @@ -690,73 +933,18 @@ class Repo(object): pkg_dir = self.dirname_for_package_name(spec.name) return join_path(pkg_dir, package_file_name) - def _fast_package_check(self): - """List packages in the repo and check whether index is up to date. - - Both of these opreations require checking all `package.py` - files so we do them at the same time. We list the repo - directory and look at package.py files, and we compare the - index modification date with the ost recently modified package - file, storing the result. - - The implementation here should try to minimize filesystem - calls. At the moment, it is O(number of packages) and makes - about one stat call per package. This is resonably fast, and - avoids actually importing packages in Spack, which is slow. - - """ - if self._all_package_names is None: - self._all_package_names = [] - - # Get index modification time. - index_mtime = spack.misc_cache.mtime(self._cache_file) - - for pkg_name in os.listdir(self.packages_path): - # Skip non-directories in the package root. - pkg_dir = join_path(self.packages_path, pkg_name) - - # Warn about invalid names that look like packages. - if not valid_module_name(pkg_name): - msg = ("Skipping package at %s. " - "'%s' is not a valid Spack module name.") - tty.warn(msg % (pkg_dir, pkg_name)) - continue - - # construct the file name from the directory - pkg_file = join_path( - self.packages_path, pkg_name, package_file_name) - - # Use stat here to avoid lots of calls to the filesystem. - try: - sinfo = os.stat(pkg_file) - except OSError as e: - if e.errno == errno.ENOENT: - # No package.py file here. - continue - elif e.errno == errno.EACCES: - tty.warn("Can't read package file %s." % pkg_file) - continue - raise e - - # if it's not a file, skip it. - if stat.S_ISDIR(sinfo.st_mode): - continue - - # All checks passed. Add it to the list. - self._all_package_names.append(pkg_name) - - # record the package if it is newer than the index. - if sinfo.st_mtime > index_mtime: - self._needs_update.append(pkg_name) + def all_package_names(self): + """Returns a sorted list of all package names in the Repo.""" + return sorted(self._fast_package_checker.keys()) - self._all_package_names.sort() + def packages_with_tags(self, *tags): + v = set(self.all_package_names()) + index = self.tag_index - return self._all_package_names + for t in tags: + v &= set(index[t]) - def all_package_names(self): - """Returns a sorted list of all package names in the Repo.""" - self._fast_package_check() - return self._all_package_names + return sorted(v) def all_packages(self): """Iterator over all packages in the repository. @@ -769,16 +957,7 @@ class Repo(object): def exists(self, pkg_name): """Whether a package with the supplied name exists.""" - if self._all_package_names: - # This does a binary search in the sorted list. - idx = bisect_left(self.all_package_names(), pkg_name) - return (idx < len(self._all_package_names) and - self._all_package_names[idx] == pkg_name) - - # If we haven't generated the full package list, don't. - # Just check whether the file exists. - filename = self.filename_for_package_name(pkg_name) - return os.path.exists(filename) + return pkg_name in self._fast_package_checker def is_virtual(self, pkg_name): """True if the package with this name is virtual, False otherwise.""" @@ -925,11 +1104,11 @@ class DuplicateRepoError(RepoError): """Raised when duplicate repos are added to a RepoPath.""" -class PackageLoadError(spack.error.SpackError): - """Superclass for errors related to loading packages.""" +class UnknownEntityError(RepoError): + """Raised when we encounter a package spack doesn't have.""" -class UnknownPackageError(PackageLoadError): +class UnknownPackageError(UnknownEntityError): """Raised when we encounter a package spack doesn't have.""" def __init__(self, name, repo=None): @@ -942,7 +1121,7 @@ class UnknownPackageError(PackageLoadError): self.name = name -class UnknownNamespaceError(PackageLoadError): +class UnknownNamespaceError(UnknownEntityError): """Raised when we encounter an unknown namespace""" def __init__(self, namespace): @@ -950,7 +1129,7 @@ class UnknownNamespaceError(PackageLoadError): "Unknown namespace: %s" % namespace) -class FailedConstructorError(PackageLoadError): +class FailedConstructorError(RepoError): """Raised when a package's class constructor fails.""" def __init__(self, name, exc_type, exc_obj, exc_tb): |