From 7526a89463c6f4c61cd4ae725fc096e14fc063f3 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 11 Dec 2015 02:16:24 -0800 Subject: Fix #217: Make package cache use DAG hash instead of sorted deps. - Gets rid of last vestige of old-style specs. - Uses new hashing for lookup --- lib/spack/spack/packages.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/packages.py b/lib/spack/spack/packages.py index f6f4cbf025..9d87c3a94b 100644 --- a/lib/spack/spack/packages.py +++ b/lib/spack/spack/packages.py @@ -67,27 +67,28 @@ class PackageDB(object): if spec.virtual: raise UnknownPackageError(spec.name) + dhash = spec.dag_hash() if kwargs.get('new', False): - if spec in self.instances: - del self.instances[spec] + if dhash in self.instances: + del self.instances[dhash] - if not spec in self.instances: + if not dhash in self.instances: package_class = self.get_class_for_package_name(spec.name) try: - copy = spec.copy() - self.instances[copy] = package_class(copy) + copy = spec.copy() # defensive copy. Package owns its spec. + self.instances[dhash] = package_class(copy) except Exception, e: if spack.debug: sys.excepthook(*sys.exc_info()) raise FailedConstructorError(spec.name, e) - return self.instances[spec] + return self.instances[dhash] @_autospec def delete(self, spec): """Force a package to be recreated.""" - del self.instances[spec] + del self.instances[spec.dag_hash()] def purge(self): -- cgit v1.2.3-70-g09d2 From 6ee2eb21dd2c3c32d115ebf28becc735311bb1a9 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 11 Dec 2015 12:29:32 -0800 Subject: Fix #217: Use MUCH faster hashing, reduce number of DAG copies. This changes the hash algorithm so that it does much less object allocation and copying, and so that it is correct. The old version of `_cmp_key()` would call `sorted_deps`, which would call `flat_dependencies` to get a list of dependencies so that it could sort them in alphabetical order. This isn't necessary in the `_cmp_key()`, and in fact we want more DAG structure than that to be included in the `_cmp_key()`. The new version constructs a tuple without copying the Spec DAG, and the tuple contains hashes of sub-DAGs that are computed recursively in-place. This is way faster than the previous algorithm and reduces the numebr of copies significantly. It is also a correct DAG hash. Example timing and copy counts for the different hashing algorithms we've tried: Original (wrong) Spec hash: ``` 106,170 copies real 0m5.024s user 0m4.949s sys 0m0.104s ``` Spec hash using YAML `dag_hash()`: ``` 3,794 copies real 0m5.024s user 0m4.949s sys 0m0.104s New no-copy, no-YAML hash: ``` 3,594 copies real 0m2.543s user 0m2.435s sys 0m0.104s ``` So now we have a hash that is correct AND faster. The remaining ~3k copies happen mostly during concretization, and as all packages are initially loaded. I believe this is because Spack currently has to load all packages to figure out virtual dependency information; it could also be becasue there ar a lot of lookups of partial specs in concretize. I can investigate this further. --- lib/spack/spack/packages.py | 12 ++++++------ lib/spack/spack/spec.py | 18 ++++++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/packages.py b/lib/spack/spack/packages.py index 9d87c3a94b..080644fb90 100644 --- a/lib/spack/spack/packages.py +++ b/lib/spack/spack/packages.py @@ -67,22 +67,22 @@ class PackageDB(object): if spec.virtual: raise UnknownPackageError(spec.name) - dhash = spec.dag_hash() + key = hash(spec) if kwargs.get('new', False): - if dhash in self.instances: - del self.instances[dhash] + if key in self.instances: + del self.instances[key] - if not dhash in self.instances: + if not key in self.instances: package_class = self.get_class_for_package_name(spec.name) try: copy = spec.copy() # defensive copy. Package owns its spec. - self.instances[dhash] = package_class(copy) + self.instances[key] = package_class(copy) except Exception, e: if spack.debug: sys.excepthook(*sys.exc_info()) raise FailedConstructorError(spec.name, e) - return self.instances[dhash] + return self.instances[key] @_autospec diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index f62182ce76..483da03c47 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1481,8 +1481,11 @@ class Spec(object): def _cmp_node(self): """Comparison key for just *this node* and not its deps.""" - return (self.name, self.versions, self.variants, - self.architecture, self.compiler) + return (self.name, + self.versions, + self.variants, + self.architecture, + self.compiler) def eq_node(self, other): @@ -1496,11 +1499,14 @@ class Spec(object): def _cmp_key(self): - """Comparison key for this node and all dependencies *without* - considering structure. This is the default, as - normalization will restore structure. + """This returns a key for the spec *including* DAG structure. + + The key is the concatenation of: + 1. A tuple describing this node in the DAG. + 2. The hash of each of this node's dependencies' cmp_keys. """ - return self._cmp_node() + (self.sorted_deps(),) + return self._cmp_node() + ( + tuple(sorted(hash(d) for d in self.dependencies.values())),) def colorized(self): -- cgit v1.2.3-70-g09d2 From 3dd6cbc556458ff5776988d12927b9d93c326b64 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 11 Dec 2015 13:07:20 -0800 Subject: Fix #217: update spec_dag test for new `_cmp_key`. --- lib/spack/spack/test/spec_dag.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/test/spec_dag.py b/lib/spack/spack/test/spec_dag.py index 94438b6a0c..d3a4d77b32 100644 --- a/lib/spack/spack/test/spec_dag.py +++ b/lib/spack/spack/test/spec_dag.py @@ -340,16 +340,18 @@ class SpecDagTest(MockPackagesTest): self.assertEqual(spec, expected_flat) self.assertTrue(spec.eq_dag(expected_flat)) - self.assertEqual(spec, expected_normalized) + # Normalized has different DAG structure, so NOT equal. + self.assertNotEqual(spec, expected_normalized) self.assertFalse(spec.eq_dag(expected_normalized)) - self.assertEqual(spec, non_unique_nodes) + # Again, different DAG structure so not equal. + self.assertNotEqual(spec, non_unique_nodes) self.assertFalse(spec.eq_dag(non_unique_nodes)) spec.normalize() # After normalizing, spec_dag_equal should match the normalized spec. - self.assertEqual(spec, expected_flat) + self.assertNotEqual(spec, expected_flat) self.assertFalse(spec.eq_dag(expected_flat)) self.assertEqual(spec, expected_normalized) -- cgit v1.2.3-70-g09d2 From afbd0e77d0754227b799ae3872d8775ceec0ccd0 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 11 Dec 2015 16:47:34 -0800 Subject: Make internal hash dep sort order match external one. --- lib/spack/spack/spec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 483da03c47..fb5ea5d02b 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1506,7 +1506,8 @@ class Spec(object): 2. The hash of each of this node's dependencies' cmp_keys. """ return self._cmp_node() + ( - tuple(sorted(hash(d) for d in self.dependencies.values())),) + tuple(hash(self.dependencies[name]) + for name in sorted(self.dependencies)),) def colorized(self): -- cgit v1.2.3-70-g09d2