diff options
author | Todd Gamblin <tgamblin@llnl.gov> | 2015-12-11 12:29:32 -0800 |
---|---|---|
committer | Todd Gamblin <tgamblin@llnl.gov> | 2015-12-11 12:40:27 -0800 |
commit | 6ee2eb21dd2c3c32d115ebf28becc735311bb1a9 (patch) | |
tree | 9115b07d74626ceccc57569e3a3eb2e084718afb /lib | |
parent | 7526a89463c6f4c61cd4ae725fc096e14fc063f3 (diff) | |
download | spack-6ee2eb21dd2c3c32d115ebf28becc735311bb1a9.tar.gz spack-6ee2eb21dd2c3c32d115ebf28becc735311bb1a9.tar.bz2 spack-6ee2eb21dd2c3c32d115ebf28becc735311bb1a9.tar.xz spack-6ee2eb21dd2c3c32d115ebf28becc735311bb1a9.zip |
Fix #217: Use MUCH faster hashing, reduce number of DAG copies.
This changes the hash algorithm so that it does much less object
allocation and copying, and so that it is correct.
The old version of `_cmp_key()` would call `sorted_deps`, which would
call `flat_dependencies` to get a list of dependencies so that it
could sort them in alphabetical order. This isn't necessary in the
`_cmp_key()`, and in fact we want more DAG structure than that to be
included in the `_cmp_key()`.
The new version constructs a tuple without copying the Spec DAG, and
the tuple contains hashes of sub-DAGs that are computed recursively
in-place. This is way faster than the previous algorithm and reduces
the numebr of copies significantly. It is also a correct DAG hash.
Example timing and copy counts for the different hashing algorithms
we've tried:
Original (wrong) Spec hash:
```
106,170 copies
real 0m5.024s
user 0m4.949s
sys 0m0.104s
```
Spec hash using YAML `dag_hash()`:
```
3,794 copies
real 0m5.024s
user 0m4.949s
sys 0m0.104s
New no-copy, no-YAML hash:
```
3,594 copies
real 0m2.543s
user 0m2.435s
sys 0m0.104s
```
So now we have a hash that is correct AND faster.
The remaining ~3k copies happen mostly during concretization, and as
all packages are initially loaded. I believe this is because Spack
currently has to load all packages to figure out virtual dependency
information; it could also be becasue there ar a lot of lookups of
partial specs in concretize. I can investigate this further.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/packages.py | 12 | ||||
-rw-r--r-- | lib/spack/spack/spec.py | 18 |
2 files changed, 18 insertions, 12 deletions
diff --git a/lib/spack/spack/packages.py b/lib/spack/spack/packages.py index 9d87c3a94b..080644fb90 100644 --- a/lib/spack/spack/packages.py +++ b/lib/spack/spack/packages.py @@ -67,22 +67,22 @@ class PackageDB(object): if spec.virtual: raise UnknownPackageError(spec.name) - dhash = spec.dag_hash() + key = hash(spec) if kwargs.get('new', False): - if dhash in self.instances: - del self.instances[dhash] + if key in self.instances: + del self.instances[key] - if not dhash in self.instances: + if not key in self.instances: package_class = self.get_class_for_package_name(spec.name) try: copy = spec.copy() # defensive copy. Package owns its spec. - self.instances[dhash] = package_class(copy) + self.instances[key] = package_class(copy) except Exception, e: if spack.debug: sys.excepthook(*sys.exc_info()) raise FailedConstructorError(spec.name, e) - return self.instances[dhash] + return self.instances[key] @_autospec diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index f62182ce76..483da03c47 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1481,8 +1481,11 @@ class Spec(object): def _cmp_node(self): """Comparison key for just *this node* and not its deps.""" - return (self.name, self.versions, self.variants, - self.architecture, self.compiler) + return (self.name, + self.versions, + self.variants, + self.architecture, + self.compiler) def eq_node(self, other): @@ -1496,11 +1499,14 @@ class Spec(object): def _cmp_key(self): - """Comparison key for this node and all dependencies *without* - considering structure. This is the default, as - normalization will restore structure. + """This returns a key for the spec *including* DAG structure. + + The key is the concatenation of: + 1. A tuple describing this node in the DAG. + 2. The hash of each of this node's dependencies' cmp_keys. """ - return self._cmp_node() + (self.sorted_deps(),) + return self._cmp_node() + ( + tuple(sorted(hash(d) for d in self.dependencies.values())),) def colorized(self): |