summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd Gamblin <tgamblin@llnl.gov>2016-09-01 13:00:21 -0700
committerGitHub <noreply@github.com>2016-09-01 13:00:21 -0700
commitf5bc0cbb65c95249b3f1fd1b5c63da5a03acbded (patch)
treedb888fa0a05e3265549efdc95737c7bf9b6ae192
parent8d755c010dadc510a2f01a2c4e621cf32a262d72 (diff)
parent69b68153a173f6a6f273755c9f56da452cb5970e (diff)
downloadspack-f5bc0cbb65c95249b3f1fd1b5c63da5a03acbded.tar.gz
spack-f5bc0cbb65c95249b3f1fd1b5c63da5a03acbded.tar.bz2
spack-f5bc0cbb65c95249b3f1fd1b5c63da5a03acbded.tar.xz
spack-f5bc0cbb65c95249b3f1fd1b5c63da5a03acbded.zip
Merge pull request #1535 from LLNL/bugfix/faster-install-db-gh1521
[WIP] Faster database loading, faster in-memory hashing
-rw-r--r--lib/spack/spack/database.py113
-rw-r--r--lib/spack/spack/spec.py70
-rw-r--r--lib/spack/spack/test/directory_layout.py25
3 files changed, 145 insertions, 63 deletions
diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py
index 1240e9a658..043e3b953d 100644
--- a/lib/spack/spack/database.py
+++ b/lib/spack/spack/database.py
@@ -164,6 +164,9 @@ class Database(object):
self.lock = Lock(self._lock_path)
self._data = {}
+ # whether there was an error at the start of a read transaction
+ self._error = None
+
def write_transaction(self, timeout=_db_lock_timeout):
"""Get a write lock context manager for use in a `with` block."""
return WriteTransaction(self.lock, self._read, self._write, timeout)
@@ -198,7 +201,7 @@ class Database(object):
except YAMLError as e:
raise SpackYAMLError("error writing YAML database:", str(e))
- def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
+ def _read_spec_from_yaml(self, hash_key, installs):
"""Recursively construct a spec from a hash in a YAML database.
Does not do any locking.
@@ -212,19 +215,27 @@ class Database(object):
# Build spec from dict first.
spec = Spec.from_node_dict(spec_dict)
+ return spec
+ def _assign_dependencies(self, hash_key, installs, data):
# Add dependencies from other records in the install DB to
# form a full spec.
+ spec = data[hash_key].spec
+ spec_dict = installs[hash_key]['spec']
+
if 'dependencies' in spec_dict[spec.name]:
yaml_deps = spec_dict[spec.name]['dependencies']
for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps):
- child = self._read_spec_from_yaml(dhash, installs, hash_key)
- spec._add_dependency(child, dtypes)
+ if dhash not in data:
+ tty.warn("Missing dependency not in database: ",
+ "%s needs %s-%s" % (
+ spec.format('$_$#'), dname, dhash[:7]))
+ continue
- # Specs from the database need to be marked concrete because
- # they represent actual installations.
- spec._mark_concrete()
- return spec
+ # defensive copy (not sure everything handles extra
+ # parent links yet)
+ child = data[dhash].spec
+ spec._add_dependency(child, dtypes)
def _read_from_yaml(self, stream):
"""
@@ -248,7 +259,8 @@ class Database(object):
def check(cond, msg):
if not cond:
- raise CorruptDatabaseError(self._index_path, msg)
+ raise CorruptDatabaseError(
+ "Spack database is corrupt: %s" % msg, self._index_path)
check('database' in yfile, "No 'database' attribute in YAML.")
@@ -267,22 +279,28 @@ class Database(object):
self.reindex(spack.install_layout)
installs = dict((k, v.to_dict()) for k, v in self._data.items())
- # Iterate through database and check each record.
+ def invalid_record(hash_key, error):
+ msg = ("Invalid record in Spack database: "
+ "hash: %s, cause: %s: %s")
+ msg %= (hash_key, type(e).__name__, str(e))
+ raise CorruptDatabaseError(msg, self._index_path)
+
+ # Build up the database in three passes:
+ #
+ # 1. Read in all specs without dependencies.
+ # 2. Hook dependencies up among specs.
+ # 3. Mark all specs concrete.
+ #
+ # The database is built up so that ALL specs in it share nodes
+ # (i.e., its specs are a true Merkle DAG, unlike most specs.)
+
+ # Pass 1: Iterate through database and build specs w/o dependencies
data = {}
for hash_key, rec in installs.items():
try:
# This constructs a spec DAG from the list of all installs
spec = self._read_spec_from_yaml(hash_key, installs)
- # Validate the spec by ensuring the stored and actual
- # hashes are the same.
- spec_hash = spec.dag_hash()
- if not spec_hash == hash_key:
- tty.warn(
- "Hash mismatch in database: %s -> spec with hash %s" %
- (hash_key, spec_hash))
- continue # TODO: is skipping the right thing to do?
-
# Insert the brand new spec in the database. Each
# spec has its own copies of its dependency specs.
# TODO: would a more immmutable spec implementation simplify
@@ -290,11 +308,22 @@ class Database(object):
data[hash_key] = InstallRecord.from_dict(spec, rec)
except Exception as e:
- tty.warn("Invalid database reecord:",
- "file: %s" % self._index_path,
- "hash: %s" % hash_key,
- "cause: %s: %s" % (type(e).__name__, str(e)))
- raise
+ invalid_record(hash_key, e)
+
+ # Pass 2: Assign dependencies once all specs are created.
+ for hash_key in data:
+ try:
+ self._assign_dependencies(hash_key, installs, data)
+ except Exception as e:
+ invalid_record(hash_key, e)
+
+ # Pass 3: Mark all specs concrete. Specs representing real
+ # installations must be explicitly marked.
+ # We do this *after* all dependencies are connected because if we
+ # do it *while* we're constructing specs,it causes hashes to be
+ # cached prematurely.
+ for hash_key, rec in data.items():
+ rec.spec._mark_concrete()
self._data = data
@@ -304,7 +333,26 @@ class Database(object):
Locks the DB if it isn't locked already.
"""
- with self.write_transaction():
+ # Special transaction to avoid recursive reindex calls and to
+ # ignore errors if we need to rebuild a corrupt database.
+ def _read_suppress_error():
+ try:
+ if os.path.isfile(self._index_path):
+ self._read_from_yaml(self._index_path)
+ except CorruptDatabaseError as e:
+ self._error = e
+ self._data = {}
+
+ transaction = WriteTransaction(
+ self.lock, _read_suppress_error, self._write, _db_lock_timeout)
+
+ with transaction:
+ if self._error:
+ tty.warn(
+ "Spack database was corrupt. Will rebuild. Error was:",
+ str(self._error))
+ self._error = None
+
old_data = self._data
try:
self._data = {}
@@ -313,10 +361,15 @@ class Database(object):
for spec in directory_layout.all_specs():
# Create a spec for each known package and add it.
path = directory_layout.path_for_spec(spec)
- old_info = old_data.get(spec.dag_hash())
+
+ # Try to recover explicit value from old DB, but
+ # default it to False if DB was corrupt.
explicit = False
- if old_info is not None:
- explicit = old_info.explicit
+ if old_data is not None:
+ old_info = old_data.get(spec.dag_hash())
+ if old_info is not None:
+ explicit = old_info.explicit
+
self._add(spec, path, directory_layout, explicit=explicit)
self._check_ref_counts()
@@ -601,11 +654,7 @@ class Database(object):
class CorruptDatabaseError(SpackError):
-
- def __init__(self, path, msg=''):
- super(CorruptDatabaseError, self).__init__(
- "Spack database is corrupt: %s. %s." % (path, msg),
- "Try running `spack reindex` to fix.")
+ """Raised when errors are found while reading the database."""
class InvalidDatabaseVersionError(SpackError):
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index d8a7cf9d7b..99446d21dd 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -504,6 +504,7 @@ class Spec(object):
self.variants.spec = self
self.namespace = other.namespace
self._hash = other._hash
+ self._cmp_key_cache = other._cmp_key_cache
# Specs are by default not assumed to be normal, but in some
# cases we've read them from a file want to assume normal.
@@ -858,9 +859,10 @@ class Spec(object):
# Edge traversal yields but skips children of visited nodes
if not (key in visited and cover == 'edges'):
# This code determines direction and yields the children/parents
+
successors = deps
if direction == 'parents':
- successors = self.dependents_dict()
+ successors = self.dependents_dict() # TODO: deptype?
visited.add(key)
for name in sorted(successors):
@@ -1278,15 +1280,15 @@ class Spec(object):
# Mark everything in the spec as concrete, as well.
self._mark_concrete()
- def _mark_concrete(self):
+ def _mark_concrete(self, value=True):
"""Mark this spec and its dependencies as concrete.
Only for internal use -- client code should use "concretize"
unless there is a need to force a spec to be concrete.
"""
for s in self.traverse(deptype_query=alldeps):
- s._normal = True
- s._concrete = True
+ s._normal = value
+ s._concrete = value
def concretized(self):
"""This is a non-destructive version of concretize(). First clones,
@@ -1533,6 +1535,10 @@ class Spec(object):
if self._normal and not force:
return False
+ # avoid any assumptions about concreteness when forced
+ if force:
+ self._mark_concrete(False)
+
# Ensure first that all packages & compilers in the DAG exist.
self.validate_names()
# Get all the dependencies into one DependencyMap
@@ -1865,7 +1871,7 @@ class Spec(object):
"""Return list of any virtual deps in this spec."""
return [spec for spec in self.traverse() if spec.virtual]
- def _dup(self, other, **kwargs):
+ def _dup(self, other, deps=True, cleardeps=True):
"""Copy the spec other into self. This is an overwriting
copy. It does not copy any dependents (parents), but by default
copies dependencies.
@@ -1896,7 +1902,7 @@ class Spec(object):
self.versions = other.versions.copy()
self.architecture = other.architecture
self.compiler = other.compiler.copy() if other.compiler else None
- if kwargs.get('cleardeps', True):
+ if cleardeps:
self._dependents = DependencyMap()
self._dependencies = DependencyMap()
self.compiler_flags = other.compiler_flags.copy()
@@ -1906,19 +1912,15 @@ class Spec(object):
self.external_module = other.external_module
self.namespace = other.namespace
self._hash = other._hash
+ self._cmp_key_cache = other._cmp_key_cache
# If we copy dependencies, preserve DAG structure in the new spec
- if kwargs.get('deps', True):
+ if deps:
# This copies the deps from other using _dup(deps=False)
- # XXX(deptype): We can keep different instances of specs here iff
- # it is only a 'build' dependency (from its parent).
- # All other instances must be shared (due to symbol
- # and PATH contention). These should probably search
- # for any existing installation which can satisfy the
- # build and latch onto that because if 3 things need
- # the same build dependency and it is *not*
- # available, we only want to build it once.
- new_nodes = other.flat_dependencies(deptype_query=alldeps)
+ deptypes = alldeps
+ if isinstance(deps, (tuple, list)):
+ deptypes = deps
+ new_nodes = other.flat_dependencies(deptypes=deptypes)
new_nodes[self.name] = self
stack = [other]
@@ -1927,6 +1929,9 @@ class Spec(object):
new_spec = new_nodes[cur_spec.name]
for depspec in cur_spec._dependencies.values():
+ if not any(d in deptypes for d in depspec.deptypes):
+ continue
+
stack.append(depspec.spec)
# XXX(deptype): add any new deptypes that may have appeared
@@ -1942,13 +1947,22 @@ class Spec(object):
self.external_module = other.external_module
return changed
- def copy(self, **kwargs):
+ def copy(self, deps=True):
"""Return a copy of this spec.
- By default, returns a deep copy. Supply dependencies=False
- to get a shallow copy.
+
+ By default, returns a deep copy. To control how dependencies are
+ copied, supply:
+
+ deps=True: deep copy
+
+ deps=False: shallow copy (no dependencies)
+
+ deps=('link', 'build'):
+ only build and link dependencies. Similar for other deptypes.
+
"""
clone = Spec.__new__(Spec)
- clone._dup(self, **kwargs)
+ clone._dup(self, deps=deps)
return clone
@property
@@ -2059,10 +2073,17 @@ class Spec(object):
1. A tuple describing this node in the DAG.
2. The hash of each of this node's dependencies' cmp_keys.
"""
- dep_dict = self.dependencies_dict(deptype=('link', 'run'))
- return self._cmp_node() + (
- tuple(hash(dep_dict[name])
- for name in sorted(dep_dict)),)
+ if self._cmp_key_cache:
+ return self._cmp_key_cache
+
+ dep_tuple = tuple(
+ (d.spec.name, hash(d.spec), tuple(sorted(d.deptypes)))
+ for name, d in sorted(self._dependencies.items()))
+
+ key = (self._cmp_node(), dep_tuple)
+ if self._concrete:
+ self._cmp_key_cache = key
+ return key
def colorized(self):
return colorize_spec(self)
@@ -2457,6 +2478,7 @@ class SpecParser(spack.parse.Parser):
spec._dependencies = DependencyMap()
spec.namespace = spec_namespace
spec._hash = None
+ spec._cmp_key_cache = None
spec._normal = False
spec._concrete = False
diff --git a/lib/spack/spack/test/directory_layout.py b/lib/spack/spack/test/directory_layout.py
index 2d0565acae..fdaa43464b 100644
--- a/lib/spack/spack/test/directory_layout.py
+++ b/lib/spack/spack/test/directory_layout.py
@@ -91,22 +91,33 @@ class DirectoryLayoutTest(MockPackagesTest):
# Make sure spec file can be read back in to get the original spec
spec_from_file = self.layout.read_spec(spec_path)
- self.assertEqual(spec, spec_from_file)
- self.assertTrue(spec.eq_dag, spec_from_file)
+
+ # currently we don't store build dependency information when
+ # we write out specs to the filesystem.
+
+ # TODO: fix this when we can concretize more loosely based on
+ # TODO: what is installed. We currently omit these to
+ # TODO: increase reuse of build dependencies.
+ stored_deptypes = ('link', 'run')
+ expected = spec.copy(deps=stored_deptypes)
+ self.assertEqual(expected, spec_from_file)
+ self.assertTrue(expected.eq_dag, spec_from_file)
self.assertTrue(spec_from_file.concrete)
# Ensure that specs that come out "normal" are really normal.
with open(spec_path) as spec_file:
read_separately = Spec.from_yaml(spec_file.read())
- read_separately.normalize()
- self.assertEqual(read_separately, spec_from_file)
+ # TODO: revise this when build deps are in dag_hash
+ norm = read_separately.normalized().copy(deps=stored_deptypes)
+ self.assertEqual(norm, spec_from_file)
- read_separately.concretize()
- self.assertEqual(read_separately, spec_from_file)
+ # TODO: revise this when build deps are in dag_hash
+ conc = read_separately.concretized().copy(deps=stored_deptypes)
+ self.assertEqual(conc, spec_from_file)
# Make sure the hash of the read-in spec is the same
- self.assertEqual(spec.dag_hash(), spec_from_file.dag_hash())
+ self.assertEqual(expected.dag_hash(), spec_from_file.dag_hash())
# Ensure directories are properly removed
self.layout.remove_install_directory(spec)