From ebf2076755beabea00de0ff1588c6eec6af2b304 Mon Sep 17 00:00:00 2001 From: Greg Becker Date: Fri, 9 Jul 2021 18:16:48 -0700 Subject: spec.splice: properly handle cached hash invalidations (#24795) * spec.splice: properly handle cached hash invalidations * make package_hash a cached hash on the spec --- lib/spack/spack/hash_types.py | 13 +++++-- lib/spack/spack/spec.py | 66 +++++++++++++++++++++++++--------- lib/spack/spack/test/spec_semantics.py | 30 ++++++++++++++++ 3 files changed, 91 insertions(+), 18 deletions(-) diff --git a/lib/spack/spack/hash_types.py b/lib/spack/spack/hash_types.py index 554665e24a..5e122c308b 100644 --- a/lib/spack/spack/hash_types.py +++ b/lib/spack/spack/hash_types.py @@ -19,12 +19,15 @@ class SpecHashDescriptor(object): We currently use different hashes for different use cases. """ - hash_types = ('_dag_hash', '_build_hash', '_full_hash') + hash_types = ('_dag_hash', '_build_hash', '_full_hash', '_package_hash') - def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None): + def __init__(self, deptype=('link', 'run'), package_hash=False, attr=None, + override=None): self.deptype = dp.canonical_deptype(deptype) self.package_hash = package_hash self.attr = attr + # Allow spec hashes to have an alternate computation method + self.override = override #: Default Hash descriptor, used by Spec.dag_hash() and stored in the DB. @@ -40,3 +43,9 @@ build_hash = SpecHashDescriptor( #: Full hash used in build pipelines to determine when to rebuild packages. full_hash = SpecHashDescriptor( deptype=('build', 'link', 'run'), package_hash=True, attr='_full_hash') + + +#: Package hash used as part of full hash +package_hash = SpecHashDescriptor( + deptype=(), package_hash=True, attr='_package_hash', + override=lambda s: s.package.content_hash()) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 7a69dad881..934d3317fd 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1030,9 +1030,8 @@ class Spec(object): #: Cache for spec's prefix, computed lazily in the corresponding property _prefix = None - def __init__(self, spec_like=None, - normal=False, concrete=False, external_path=None, - external_modules=None, full_hash=None): + def __init__(self, spec_like=None, normal=False, + concrete=False, external_path=None, external_modules=None): """Create a new Spec. Arguments: @@ -1046,8 +1045,6 @@ class Spec(object): self._concrete = concrete self.external_path = external_path self.external_module = external_module - self._full_hash = full_hash - """ # Copy if spec_like is a Spec. @@ -1068,6 +1065,8 @@ class Spec(object): self._hash = None self._build_hash = None + self._full_hash = None + self._package_hash = None self._dunder_hash = None self._package = None @@ -1082,7 +1081,6 @@ class Spec(object): self._concrete = concrete self.external_path = external_path self.external_modules = Spec._format_module_list(external_modules) - self._full_hash = full_hash # Older spack versions did not compute full_hash or build_hash, # and we may not have the necessary information to recompute them @@ -1501,6 +1499,8 @@ class Spec(object): """ # TODO: curently we strip build dependencies by default. Rethink # this when we move to using package hashing on all specs. + if hash.override is not None: + return hash.override(self) node_dict = self.to_node_dict(hash=hash) yaml_text = syaml.dump(node_dict, default_flow_style=True) return spack.util.hash.b32_hash(yaml_text) @@ -1528,6 +1528,10 @@ class Spec(object): return hash_string[:length] + def package_hash(self): + """Compute the hash of the contents of the package for this node""" + return self._cached_hash(ht.package_hash) + def dag_hash(self, length=None): """This is Spack's default hash, used to identify installations. @@ -1653,7 +1657,7 @@ class Spec(object): d['patches'] = variant._patches_in_order_of_appearance if hash.package_hash: - package_hash = self.package.content_hash() + package_hash = self.package_hash() # Full hashes are in bytes if (not isinstance(package_hash, six.text_type) @@ -1822,6 +1826,7 @@ class Spec(object): spec._hash = node.get('hash', None) spec._build_hash = node.get('build_hash', None) spec._full_hash = node.get('full_hash', None) + spec._package_hash = node.get('package_hash', None) if 'version' in node or 'versions' in node: spec.versions = vn.VersionList.from_dict(node) @@ -3441,12 +3446,14 @@ class Spec(object): self._dunder_hash = other._dunder_hash self._normal = other._normal self._full_hash = other._full_hash + self._package_hash = other._package_hash else: self._hash = None self._build_hash = None self._dunder_hash = None self._normal = False self._full_hash = None + self._package_hash = None return changed @@ -4281,19 +4288,22 @@ class Spec(object): # _dependents of these specs should not be trusted. # Variants may also be ignored here for now... + # Keep all cached hashes because we will invalidate the ones that need + # invalidating later, and we don't want to invalidate unnecessarily + if transitive: - self_nodes = dict((s.name, s.copy(deps=False)) + self_nodes = dict((s.name, s.copy(deps=False, caches=True)) for s in self.traverse(root=True) if s.name not in other) - other_nodes = dict((s.name, s.copy(deps=False)) + other_nodes = dict((s.name, s.copy(deps=False, caches=True)) for s in other.traverse(root=True)) else: # If we're not doing a transitive splice, then we only want the # root of other. - self_nodes = dict((s.name, s.copy(deps=False)) + self_nodes = dict((s.name, s.copy(deps=False, caches=True)) for s in self.traverse(root=True) if s.name != other.name) - other_nodes = {other.name: other.copy(deps=False)} + other_nodes = {other.name: other.copy(deps=False, caches=True)} nodes = other_nodes.copy() nodes.update(self_nodes) @@ -4314,17 +4324,41 @@ class Spec(object): if any(dep not in other_nodes for dep in dependencies): nodes[name].build_spec = other[name].build_spec - # Clear cached hashes - nodes[self.name].clear_cached_hashes() + ret = nodes[self.name] + + # Clear cached hashes for all affected nodes + # Do not touch unaffected nodes + for dep in ret.traverse(root=True, order='post'): + opposite = other_nodes if dep.name in self_nodes else self_nodes + if any(name in dep for name in opposite.keys()): + # Record whether hashes are already cached + # So we don't try to compute a hash from insufficient + # provenance later + has_build_hash = getattr(dep, ht.build_hash.attr, None) + has_full_hash = getattr(dep, ht.full_hash.attr, None) + + # package hash cannot be affected by splice + dep.clear_cached_hashes(ignore=['_package_hash']) + + # Since this is a concrete spec, we want to make sure hashes + # are cached writing specs only writes cached hashes in case + # the spec is too old to have full provenance for these hashes, + # so we can't rely on doing it at write time. + if has_build_hash: + _ = dep.build_hash() + if has_full_hash: + _ = dep.full_hash() + return nodes[self.name] - def clear_cached_hashes(self): + def clear_cached_hashes(self, ignore=()): """ Clears all cached hashes in a Spec, while preserving other properties. """ for attr in ht.SpecHashDescriptor.hash_types: - if hasattr(self, attr): - setattr(self, attr, None) + if attr not in ignore: + if hasattr(self, attr): + setattr(self, attr, None) def __hash__(self): # If the spec is concrete, we leverage the DAG hash and just use diff --git a/lib/spack/spack/test/spec_semantics.py b/lib/spack/spack/test/spec_semantics.py index c21be38491..acf0116cfb 100644 --- a/lib/spack/spack/test/spec_semantics.py +++ b/lib/spack/spack/test/spec_semantics.py @@ -1027,6 +1027,36 @@ class TestSpecSematics(object): # Finally, the spec should know it's been spliced: assert out.spliced + @pytest.mark.parametrize('transitive', [True, False]) + def test_splice_with_cached_hashes(self, transitive): + spec = Spec('splice-t') + dep = Spec('splice-h+foo') + spec.concretize() + dep.concretize() + + # monkeypatch hashes so we can test that they are cached + spec._full_hash = 'aaaaaa' + spec._build_hash = 'aaaaaa' + dep._full_hash = 'bbbbbb' + dep._build_hash = 'bbbbbb' + spec['splice-h']._full_hash = 'cccccc' + spec['splice-h']._build_hash = 'cccccc' + spec['splice-z']._full_hash = 'dddddd' + spec['splice-z']._build_hash = 'dddddd' + dep['splice-z']._full_hash = 'eeeeee' + dep['splice-z']._build_hash = 'eeeeee' + + out = spec.splice(dep, transitive=transitive) + out_z_expected = (dep if transitive else spec)['splice-z'] + + assert out.full_hash() != spec.full_hash() + assert (out['splice-h'].full_hash() == dep.full_hash()) == transitive + assert out['splice-z'].full_hash() == out_z_expected.full_hash() + + assert out.build_hash() != spec.build_hash() + assert (out['splice-h'].build_hash() == dep.build_hash()) == transitive + assert out['splice-z'].build_hash() == out_z_expected.build_hash() + @pytest.mark.parametrize('transitive', [True, False]) def test_splice_input_unchanged(self, transitive): spec = Spec('splice-t').concretized() -- cgit v1.2.3-70-g09d2