From d0e22b22406c8fb064031dbd4ac887b7a9abbc95 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 18 Sep 2015 11:40:05 -0700 Subject: Add ref counting to database. This does not handle removal properly yet. --- lib/spack/spack/cmd/__init__.py | 2 +- lib/spack/spack/cmd/find.py | 35 ++++++-- lib/spack/spack/cmd/module.py | 4 +- lib/spack/spack/cmd/uninstall.py | 2 +- lib/spack/spack/database.py | 188 ++++++++++++++++++++++++++++++--------- lib/spack/spack/package.py | 7 +- 6 files changed, 183 insertions(+), 55 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/__init__.py b/lib/spack/spack/cmd/__init__.py index a8e8b1a48b..d4778b1375 100644 --- a/lib/spack/spack/cmd/__init__.py +++ b/lib/spack/spack/cmd/__init__.py @@ -125,7 +125,7 @@ def elide_list(line_list, max_num=10): def disambiguate_spec(spec): with spack.installed_db.read_lock(): - matching_specs = spack.installed_db.get_installed(spec) + matching_specs = spack.installed_db.query(spec) if not matching_specs: tty.die("Spec '%s' matches no installed packages." % spec) diff --git a/lib/spack/spack/cmd/find.py b/lib/spack/spack/cmd/find.py index e2edd454f4..6a0c3d11ff 100644 --- a/lib/spack/spack/cmd/find.py +++ b/lib/spack/spack/cmd/find.py @@ -54,6 +54,16 @@ def setup_parser(subparser): '-L', '--very-long', action='store_true', dest='very_long', help='Show dependency hashes as well as versions.') + subparser.add_argument( + '-u', '--unknown', action='store_true', dest='unknown', + help='Show only specs Spack does not have a package for.') + subparser.add_argument( + '-m', '--missing', action='store_true', dest='missing', + help='Show missing dependencies as well as installed specs.') + subparser.add_argument( + '-M', '--only-missing', action='store_true', dest='only_missing', + help='Show only missing dependencies.') + subparser.add_argument( 'query_specs', nargs=argparse.REMAINDER, help='optional specs to filter results') @@ -113,6 +123,7 @@ def display_specs(specs, **kwargs): if hashes: string += gray_hash(s, hlen) + ' ' string += s.format('$-_$@$+', color=True) + return string colify(fmt(s) for s in specs) @@ -136,15 +147,23 @@ def find(parser, args): if not query_specs: return + # Set up query arguments. + installed, known = True, any + if args.only_missing: + installed = False + elif args.missing: + installed = any + if args.unknown: + known = False + q_args = { 'installed' : installed, 'known' : known } + # Get all the specs the user asked for - if not query_specs: - with spack.installed_db.read_lock(): - specs = set(spack.installed_db.installed_package_specs()) - - else: - with spack.installed_db.read_lock(): - results = [set(spack.installed_db.get_installed(qs)) for qs in query_specs] - specs = set.union(*results) + with spack.installed_db.read_lock(): + if not query_specs: + specs = set(spack.installed_db.query(**q_args)) + else: + results = [set(spack.installed_db.query(qs, **q_args)) for qs in query_specs] + specs = set.union(*results) if not args.mode: args.mode = 'short' diff --git a/lib/spack/spack/cmd/module.py b/lib/spack/spack/cmd/module.py index 215d877bd0..654b0cb2fa 100644 --- a/lib/spack/spack/cmd/module.py +++ b/lib/spack/spack/cmd/module.py @@ -65,7 +65,7 @@ def module_find(mtype, spec_array): tty.die("You can only pass one spec.") spec = specs[0] - specs = [s for s in spack.installed_db.installed_package_specs() if s.satisfies(spec)] + specs = spack.installed_db.query(spec) if len(specs) == 0: tty.die("No installed packages match spec %s" % spec) @@ -86,7 +86,7 @@ def module_find(mtype, spec_array): def module_refresh(): """Regenerate all module files for installed packages known to spack (some packages may no longer exist).""" - specs = [s for s in spack.installed_db.installed_known_package_specs()] + specs = [s for s in spack.installed_db.query(installed=True, known=True)] for name, cls in module_types.items(): tty.msg("Regenerating %s module files." % name) diff --git a/lib/spack/spack/cmd/uninstall.py b/lib/spack/spack/cmd/uninstall.py index 7425db3ca3..7b7c32c065 100644 --- a/lib/spack/spack/cmd/uninstall.py +++ b/lib/spack/spack/cmd/uninstall.py @@ -60,7 +60,7 @@ def uninstall(parser, args): # Fail and ask user to be unambiguous if it doesn't pkgs = [] for spec in specs: - matching_specs = spack.installed_db.get_installed(spec) + matching_specs = spack.installed_db.query(spec) if not args.all and len(matching_specs) > 1: tty.error("%s matches multiple packages:" % spec) print diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index e74217a262..1d1c640d66 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -22,6 +22,23 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ############################################################################## +"""Spack's installation tracking database. + +The database serves two purposes: + + 1. It implements a cache on top of a potentially very large Spack + directory hierarchy, speeding up many operations that would + otherwise require filesystem access. + + 2. It will allow us to track external installations as well as lost + packages and their dependencies. + +Prior ot the implementation of this store, a direcotry layout served +as the authoritative database of packages in Spack. This module +provides a cache and a sanity checking mechanism for what is in the +filesystem. + +""" import os import time import socket @@ -58,18 +75,37 @@ def _autospec(function): class InstallRecord(object): - """A record represents one installation in the DB.""" - def __init__(self, spec, path): + """A record represents one installation in the DB. + + The record keeps track of the spec for the installation, its + install path, AND whether or not it is installed. We need the + installed flag in case a user either: + + a) blew away a directory, or + b) used spack uninstall -f to get rid of it + + If, in either case, the package was removed but others still + depend on it, we still need to track its spec, so we don't + actually remove from the database until a spec has no installed + dependents left. + + """ + def __init__(self, spec, path, installed): self.spec = spec self.path = path + self.installed = installed + self.ref_count = 0 def to_dict(self): - return { 'spec' : self.spec.to_node_dict(), - 'path' : self.path } + return { 'spec' : self.spec.to_node_dict(), + 'path' : self.path, + 'installed' : self.installed, + 'ref_count' : self.ref_count } @classmethod def from_dict(cls, d): - return InstallRecord(d['spec'], d['path']) + # TODO: check the dict more rigorously. + return InstallRecord(d['spec'], d['path'], d['installed'], d['ref_count']) class Database(object): @@ -136,9 +172,11 @@ class Database(object): raise SpackYAMLError("error writing YAML database:", str(e)) - def _read_spec_from_yaml(self, hash_key, installs): + def _read_spec_from_yaml(self, hash_key, installs, parent_key=None): """Recursively construct a spec from a hash in a YAML database.""" - # TODO: check validity of hash_key records here. + if hash_key not in installs: + parent = read_spec(installs[parent_key]['path']) + spec_dict = installs[hash_key]['spec'] # Build spec from dict first. @@ -147,7 +185,8 @@ class Database(object): # Add dependencies from other records in the install DB to # form a full spec. for dep_hash in spec_dict[spec.name]['dependencies'].values(): - spec._add_dependency(self._read_spec_from_yaml(dep_hash, installs)) + child = self._read_spec_from_yaml(dep_hash, installs, hash_key) + spec._add_dependency(child) return spec @@ -175,12 +214,12 @@ class Database(object): check('database' in yfile, "No 'database' attribute in YAML.") - # High-level file checks. + # High-level file checks db = yfile['database'] check('installs' in db, "No 'installs' in YAML DB.") check('version' in db, "No 'version' in YAML DB.") - # TODO: better version check. + # TODO: better version checking semantics. version = Version(db['version']) if version != _db_version: raise InvalidDatabaseVersionError(_db_version, version) @@ -190,14 +229,21 @@ class Database(object): data = {} for hash_key, rec in installs.items(): try: + # This constructs a spec DAG from the list of all installs spec = self._read_spec_from_yaml(hash_key, installs) + + # Validate the spec by ensuring the stored and actual + # hashes are the same. spec_hash = spec.dag_hash() if not spec_hash == hash_key: tty.warn("Hash mismatch in database: %s -> spec with hash %s" % (hash_key, spec_hash)) - continue + continue # TODO: is skipping the right thing to do? - data[hash_key] = InstallRecord(spec, rec['path']) + # Insert the brand new spec in the database. Each + # spec has its own copies of its dependency specs. + # TODO: would a more immmutable spec implementation simplify this? + data[hash_key] = InstallRecord(spec, rec['path'], rec['installed']) except Exception as e: tty.warn("Invalid database reecord:", @@ -213,12 +259,29 @@ class Database(object): """Build database index from scratch based from a directory layout.""" with self.write_lock(): data = {} + + # Ask the directory layout to traverse the filesystem. for spec in directory_layout.all_specs(): + # Create a spec for each known package and add it. path = directory_layout.path_for_spec(spec) hash_key = spec.dag_hash() - data[hash_key] = InstallRecord(spec, path) + data[hash_key] = InstallRecord(spec, path, True) + + # Recursively examine dependencies and add them, even + # if they are NOT installed. This ensures we know + # about missing dependencies. + for dep in spec.traverse(root=False): + dep_hash = dep.dag_hash() + if dep_hash not in data: + path = directory_layout.path_for_spec(dep) + installed = os.path.isdir(path) + data[dep_hash] = InstallRecord(dep.copy(), path, installed) + data[dep_hash].ref_count += 1 + + # Assuming everything went ok, replace this object's data. self._data = data + # write out, blowing away the old version if necessary self.write() @@ -274,22 +337,37 @@ class Database(object): @_autospec def add(self, spec, path): """Read the database from the set location - Add the specified entry as a dict - Write the database back to memory + + Add the specified entry as a dict, then write the database + back to memory. This assumes that ALL dependencies are already in + the database. Should not be called otherwise. + """ # Should always already be locked with self.write_lock(): self.read() - self._data[spec.dag_hash()] = InstallRecord(spec, path) + self._data[spec.dag_hash()] = InstallRecord(spec, path, True) + + # sanity check the dependencies in case something went + # wrong during install() + # TODO: ensure no races during distributed install. + for dep in spec.traverse(root=False): + assert dep.dag_hash() in self._data + self.write() @_autospec def remove(self, spec): - """ - Reads the database from the set location - Searches for and removes the specified spec - Writes the database back to memory + """Removes a spec from the database. To be called on uninstall. + + Reads the database, then: + + 1. Marks the spec as not installed. + 2. Removes the spec if it has no more dependents. + 3. If removed, recursively updates dependencies' ref counts + and remvoes them if they are no longer needed. + """ # Should always already be locked with self.write_lock(): @@ -300,19 +378,13 @@ class Database(object): self.write() - @_autospec - def get_installed(self, spec): - """Get installed specs that satisfy the provided spec constraint.""" - return [s for s in self.installed_package_specs() if s.satisfies(spec)] - - @_autospec def installed_extensions_for(self, extendee_spec): """ Return the specs of all packages that extend the given spec """ - for s in self.installed_package_specs(): + for s in self.query(): try: if s.package.extends(extendee_spec): yield s.package @@ -322,25 +394,59 @@ class Database(object): # TODO: conditional way to do this instead of catching exceptions - def installed_package_specs(self): - """ - Read installed package names from the database - and return their specs + def query(self, query_spec=any, known=any, installed=True): + """Run a query on the database. + + ``query_spec`` + Queries iterate through specs in the database and return + those that satisfy the supplied ``query_spec``. If + query_spec is `any`, This will match all specs in the + database. If it is a spec, we'll evaluate + ``spec.satisfies(query_spec)``. + + The query can be constrained by two additional attributes: + + ``known`` + Possible values: True, False, any + + Specs that are "known" are those for which Spack can + locate a ``package.py`` file -- i.e., Spack "knows" how to + install them. Specs that are unknown may represent + packages that existed in a previous version of Spack, but + have since either changed their name or been removed. + + ``installed`` + Possible values: True, False, any + + Specs for which a prefix exists are "installed". A spec + that is NOT installed will be in the database if some + other spec depends on it but its installation has gone + away since Spack installed it. + + TODO: Specs are a lot like queries. Should there be a + wildcard spec object, and should specs have attributes + like installed and known that can be queried? Or are + these really special cases that only belong here? + """ - # Should always already be locked with self.read_lock(): self.read() - return sorted(rec.spec for rec in self._data.values()) + results = [] + for key, rec in self._data.items(): + if installed is not any and rec.installed != installed: + continue + if known is not any and spack.db.exists(rec.spec.name) != known: + continue + if query_spec is any or rec.spec.satisfies(query_spec): + results.append(rec.spec) - def installed_known_package_specs(self): - """ - Read installed package names from the database. - Return only the specs for which the package is known - to this version of spack - """ - return [s for s in self.installed_package_specs() - if spack.db.exists(s.name)] + return sorted(results) + + + def missing(self, spec): + key = spec.dag_hash() + return key in self._data and not self._data[key].installed class CorruptDatabaseError(SpackError): diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index e64b427852..e6944ce40c 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -563,9 +563,12 @@ class Package(object): @property def installed_dependents(self): """Return a list of the specs of all installed packages that depend - on this one.""" + on this one. + + TODO: move this method to database.py? + """ dependents = [] - for spec in spack.installed_db.installed_package_specs(): + for spec in spack.installed_db.query(): if self.name == spec.name: continue for dep in spec.traverse(): -- cgit v1.2.3-60-g2f50