summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPeter Josef Scheibel <scheibel1@llnl.gov>2019-07-12 19:06:17 -0700
committerGreg Becker <becker33@llnl.gov>2019-07-22 13:45:34 -0500
commit7ec1d320a82f28e465557046e4b75385bebda77e (patch)
tree31ba52e70f690bbe6e796ec63899807daeca6bed /lib
parent0715b512a19d5d966f596559274ca7b8a3406701 (diff)
downloadspack-7ec1d320a82f28e465557046e4b75385bebda77e.tar.gz
spack-7ec1d320a82f28e465557046e4b75385bebda77e.tar.bz2
spack-7ec1d320a82f28e465557046e4b75385bebda77e.tar.xz
spack-7ec1d320a82f28e465557046e4b75385bebda77e.zip
hashes: consolidate and make hashing logic more consistent
Spack has evolved to have three types of hash functions, and it's becoming hard to tell when each one is called. Whlie we aren't yet ready to get rid of them, we can refactor them so that the code is clearer and easier to track. - Add a `hash_types` module with concise descriptors for hashes. - Consolidate hashing logic in a private `Spec._spec_hash()` function. - `dag_hash()`, `build_hash()`, and `full_hash()` all call `_spec_hash()` - `to_node_dict()`, `to_dict()`, `to_yaml()` and `to_json()` now take a `hash` parameter consistent with the one that `_spec_hash()` requires. Co-authored-by: Todd Gamblin <tgamblin@llnl.gov>
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/cmd/buildcache.py7
-rw-r--r--lib/spack/spack/environment.py12
-rw-r--r--lib/spack/spack/hash_types.py36
-rw-r--r--lib/spack/spack/spec.py265
-rw-r--r--lib/spack/spack/test/cmd/env.py9
-rw-r--r--lib/spack/spack/test/cmd/install.py3
-rw-r--r--lib/spack/spack/test/spec_dag.py2
-rw-r--r--lib/spack/spack/test/spec_yaml.py4
8 files changed, 254 insertions, 84 deletions
diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py
index 9824f96cb4..c6c97ddfc7 100644
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@@ -12,15 +12,16 @@ import spack.binary_distribution as bindist
import spack.cmd
import spack.cmd.common.arguments as arguments
import spack.environment as ev
+import spack.hash_types as ht
import spack.relocate
import spack.repo
import spack.spec
import spack.store
-
-from spack.error import SpecError
import spack.config
import spack.repo
import spack.store
+
+from spack.error import SpecError
from spack.paths import etc_path
from spack.spec import Spec, save_dependency_spec_yamls
from spack.spec_set import CombinatorialSpecSet
@@ -543,7 +544,7 @@ def save_spec_yamls(args):
root_spec = Spec(args.root_spec)
root_spec.concretize()
- root_spec_as_yaml = root_spec.to_yaml(all_deps=True)
+ root_spec_as_yaml = root_spec.to_yaml(hash=ht.build_hash)
save_dependency_spec_yamls(
root_spec_as_yaml, args.yaml_dir, args.specs.split())
diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py
index c7f57cc61d..824f60d338 100644
--- a/lib/spack/spack/environment.py
+++ b/lib/spack/spack/environment.py
@@ -20,11 +20,13 @@ import llnl.util.tty as tty
from llnl.util.tty.color import colorize
import spack.error
+import spack.hash_types as ht
import spack.repo
import spack.schema.env
import spack.spec
import spack.util.spack_json as sjson
import spack.config
+
from spack.filesystem_view import YamlFilesystemView
from spack.util.environment import EnvironmentModifications
import spack.architecture as architecture
@@ -884,7 +886,7 @@ class Environment(object):
# spec might be in the user_specs, but not installed.
# TODO: Redo name-based comparison for old style envs
spec = next(s for s in self.user_specs if s.satisfies(user_spec))
- concrete = self.specs_by_hash.get(spec.dag_hash(all_deps=True))
+ concrete = self.specs_by_hash.get(spec.build_hash())
if not concrete:
concrete = spec.concretized()
self._add_concrete_spec(spec, concrete)
@@ -996,7 +998,7 @@ class Environment(object):
# update internal lists of specs
self.concretized_user_specs.append(spec)
- h = concrete.dag_hash(all_deps=True)
+ h = concrete.build_hash()
self.concretized_order.append(h)
self.specs_by_hash[h] = concrete
@@ -1111,9 +1113,9 @@ class Environment(object):
concrete_specs = {}
for spec in self.specs_by_hash.values():
for s in spec.traverse():
- dag_hash_all = s.dag_hash(all_deps=True)
+ dag_hash_all = s.build_hash()
if dag_hash_all not in concrete_specs:
- spec_dict = s.to_node_dict(all_deps=True)
+ spec_dict = s.to_node_dict(hash=ht.build_hash)
spec_dict[s.name]['hash'] = s.dag_hash()
concrete_specs[dag_hash_all] = spec_dict
@@ -1172,7 +1174,7 @@ class Environment(object):
self.specs_by_hash = {}
for _, spec in specs_by_hash.items():
dag_hash = spec.dag_hash()
- build_hash = spec.dag_hash(all_deps=True)
+ build_hash = spec.build_hash()
if dag_hash in root_hashes:
old_hash_to_new[dag_hash] = build_hash
diff --git a/lib/spack/spack/hash_types.py b/lib/spack/spack/hash_types.py
new file mode 100644
index 0000000000..01e31bb465
--- /dev/null
+++ b/lib/spack/spack/hash_types.py
@@ -0,0 +1,36 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+"""Definitions that control how Spack creates Spec hashes."""
+
+import spack.dependency as dp
+
+
+class SpecHashDescriptor(object):
+ """This class defines how hashes are generated on Spec objects.
+
+ Spec hashes in Spack are generated from a serialized (e.g., with
+ YAML) representation of the Spec graph. The representation may only
+ include certain dependency types, and it may optionally include a
+ canonicalized hash of the package.py for each node in the graph.
+
+ We currently use different hashes for different use cases.
+ """
+ def __init__(self, deptype=('link', 'run'), package_hash=False):
+ self.deptype = dp.canonical_deptype(deptype)
+ self.package_hash = package_hash
+
+
+#: Default Hash descriptor, used by Spec.dag_hash() and stored in the DB.
+dag_hash = SpecHashDescriptor(deptype=('link', 'run'), package_hash=False)
+
+
+#: Hash descriptor that includes build dependencies.
+build_hash = SpecHashDescriptor(
+ deptype=('build', 'link', 'run'), package_hash=False)
+
+
+#: Full hash used in build pipelines to determine when to rebuild packages.
+full_hash = SpecHashDescriptor(deptype=('link', 'run'), package_hash=True)
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index 1a8d9016c0..009775acf4 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -100,14 +100,15 @@ import spack.paths
import spack.architecture
import spack.compiler
import spack.compilers as compilers
+import spack.dependency as dp
import spack.error
+import spack.hash_types as ht
import spack.parse
import spack.repo
import spack.store
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
-from spack.dependency import Dependency, all_deptypes, canonical_deptype
from spack.util.module_cmd import get_path_from_module, load_module
from spack.error import NoLibrariesError, NoHeadersError
from spack.error import SpecError, UnsatisfiableSpecError
@@ -966,7 +967,7 @@ class Spec(object):
self.name + " does not depend on " + comma_or(name))
def _find_deps(self, where, deptype):
- deptype = canonical_deptype(deptype)
+ deptype = dp.canonical_deptype(deptype)
return [dep for dep in where.values()
if deptype and (not dep.deptypes or
@@ -1192,7 +1193,7 @@ class Spec(object):
cover = kwargs.get('cover', 'nodes')
direction = kwargs.get('direction', 'children')
order = kwargs.get('order', 'pre')
- deptype = canonical_deptype(deptype)
+ deptype = dp.canonical_deptype(deptype)
# Make sure kwargs have legal values; raise ValueError if not.
def validate(name, val, allowed_values):
@@ -1286,60 +1287,129 @@ class Spec(object):
def prefix(self, value):
self._prefix = Prefix(value)
- def dag_hash(self, length=None, all_deps=False):
- """Return a hash of the entire spec DAG, including connectivity."""
- if not self.concrete:
- h = self._dag_hash(all_deps=all_deps)
- # An upper bound of None is equivalent to len(h). An upper bound of
- # 0 produces the empty string
- return h[:length]
-
- if not self._hash:
- self._hash = self._dag_hash(all_deps=False)
-
- if not self._build_hash:
- self._build_hash = self._dag_hash(all_deps=True)
-
- h = self._build_hash if all_deps else self._hash
- return h[:length]
+ def _spec_hash(self, hash):
+ """Utility method for computing different types of Spec hashes.
- def _dag_hash(self, all_deps=False):
- yaml_text = syaml.dump(
- self.to_node_dict(all_deps=all_deps),
- default_flow_style=True,
- width=maxint)
+ Arguments:
+ hash (SpecHashDescriptor): type of hash to generate.
+ """
+ # TODO: curently we strip build dependencies by default. Rethink
+ # this when we move to using package hashing on all specs.
+ yaml_text = syaml.dump(self.to_node_dict(hash=hash),
+ default_flow_style=True, width=maxint)
sha = hashlib.sha1(yaml_text.encode('utf-8'))
-
b32_hash = base64.b32encode(sha.digest()).lower()
+
if sys.version_info[0] >= 3:
b32_hash = b32_hash.decode('utf-8')
return b32_hash
- def dag_hash_bit_prefix(self, bits):
- """Get the first <bits> bits of the DAG hash as an integer type."""
- return base32_prefix_bits(self.dag_hash(), bits)
+ def _cached_hash(self, length, attr, hash):
+ """Helper function for storing a cached hash on the spec.
+
+ This will run _spec_hash() with the deptype and package_hash
+ parameters, and if this spec is concrete, it will store the value
+ in the supplied attribute on this spec.
+
+ Arguments:
+ hash (SpecHashDescriptor): type of hash to generate.
+ """
+ hash_string = getattr(self, attr, None)
+ if hash_string:
+ return hash_string[:length]
+ else:
+ hash_string = self._spec_hash(hash)
+ if self.concrete:
+ setattr(self, attr, hash_string)
+
+ return hash_string[:length]
+
+ def dag_hash(self, length=None):
+ """This is Spack's default hash, used to identify installations.
+
+ At the moment, it excludes build dependencies to avoid rebuilding
+ packages whenever build dependency versions change. We will
+ revise this to include more detailed provenance when the
+ concretizer can more aggressievly reuse installed dependencies.
+ """
+ return self._cached_hash(length, '_hash', ht.dag_hash)
+
+ def build_hash(self, length=None):
+ """Hash used to store specs in environments.
+
+ This hash includes build dependencies, and we need to preserve
+ them to be able to rebuild an entire environment for a user.
+ """
+ return self._cached_hash(length, '_build_hash', ht.build_hash)
def full_hash(self, length=None):
- if not self.concrete:
- raise SpecError("Spec is not concrete: " + str(self))
+ """Hash to determine when to rebuild packages in the build pipeline.
+
+ This hash includes the package hash, so that we know when package
+ files has changed between builds. It does not currently include
+ build dependencies, though it likely should.
- if not self._full_hash:
- yaml_text = syaml.dump(
- self.to_node_dict(hash_function=lambda s: s.full_hash()),
- default_flow_style=True, width=maxint)
- package_hash = self.package.content_hash()
- sha = hashlib.sha1(yaml_text.encode('utf-8') + package_hash)
+ TODO: investigate whether to include build deps here.
+ """
+ return self._cached_hash(length, '_full_hash', ht.full_hash)
- b32_hash = base64.b32encode(sha.digest()).lower()
- if sys.version_info[0] >= 3:
- b32_hash = b32_hash.decode('utf-8')
+ def dag_hash_bit_prefix(self, bits):
+ """Get the first <bits> bits of the DAG hash as an integer type."""
+ return base32_prefix_bits(self.dag_hash(), bits)
+
+ def to_node_dict(self, hash=ht.dag_hash):
+ """Create a dictionary representing the state of this Spec.
+
+ ``to_node_dict`` creates the content that is eventually hashed by
+ Spack to create identifiers like the DAG hash (see
+ ``dag_hash()``). Example result of ``to_node_dict`` for the
+ ``sqlite`` package::
+
+ {
+ 'sqlite': {
+ 'version': '3.28.0',
+ 'arch': {
+ 'platform': 'darwin',
+ 'platform_os': 'mojave',
+ 'target': 'x86_64',
+ },
+ 'compiler': {
+ 'name': 'clang',
+ 'version': '10.0.0-apple',
+ },
+ 'namespace': 'builtin',
+ 'parameters': {
+ 'fts': 'true',
+ 'functions': 'false',
+ 'cflags': [],
+ 'cppflags': [],
+ 'cxxflags': [],
+ 'fflags': [],
+ 'ldflags': [],
+ 'ldlibs': [],
+ },
+ 'dependencies': {
+ 'readline': {
+ 'hash': 'zvaa4lhlhilypw5quj3akyd3apbq5gap',
+ 'type': ['build', 'link'],
+ }
+ },
+ }
+ }
- self._full_hash = b32_hash
+ Note that the dictionary returned does *not* include the hash of
+ the *root* of the spec, though it does include hashes for each
+ dependency, and (optionally) the package file corresponding to
+ each node.
- return self._full_hash[:length]
+ See ``to_dict()`` for a "complete" spec hash, with hashes for
+ each node and nodes for each dependency (instead of just their
+ hashes).
- def to_node_dict(self, hash_function=None, all_deps=False):
+ Arguments:
+ hash (SpecHashDescriptor) type of hash to generate.
+ """
d = syaml_dict()
if self.versions:
@@ -1378,47 +1448,102 @@ class Spec(object):
if hasattr(variant, '_patches_in_order_of_appearance'):
d['patches'] = variant._patches_in_order_of_appearance
- # TODO: restore build dependencies here once we have less picky
- # TODO: concretization.
- if all_deps:
- deptypes = ('link', 'run', 'build')
- else:
- deptypes = ('link', 'run')
- deps = self.dependencies_dict(deptype=deptypes)
+ if hash.package_hash:
+ d['package_hash'] = self.package.content_hash()
+
+ deps = self.dependencies_dict(deptype=hash.deptype)
if deps:
- if hash_function is None:
- hash_function = lambda s: s.dag_hash(all_deps=all_deps)
d['dependencies'] = syaml_dict([
(name,
syaml_dict([
- ('hash', hash_function(dspec.spec)),
+ ('hash', dspec.spec._spec_hash(hash)),
('type', sorted(str(s) for s in dspec.deptypes))])
) for name, dspec in sorted(deps.items())
])
return syaml_dict([(self.name, d)])
- def to_dict(self, all_deps=False):
- if all_deps:
- deptypes = ('link', 'run', 'build')
- else:
- deptypes = ('link', 'run')
+ def to_dict(self, hash=ht.dag_hash):
+ """Create a dictionary suitable for writing this spec to YAML or JSON.
+
+ This dictionaries like the one that is ultimately written to a
+ ``spec.yaml`` file in each Spack installation directory. For
+ example, for sqlite::
+
+ {
+ 'spec': [
+ {
+ 'sqlite': {
+ 'version': '3.28.0',
+ 'arch': {
+ 'platform': 'darwin',
+ 'platform_os': 'mojave',
+ 'target': 'x86_64',
+ },
+ 'compiler': {
+ 'name': 'clang',
+ 'version': '10.0.0-apple',
+ },
+ 'namespace': 'builtin',
+ 'parameters': {
+ 'fts': 'true',
+ 'functions': 'false',
+ 'cflags': [],
+ 'cppflags': [],
+ 'cxxflags': [],
+ 'fflags': [],
+ 'ldflags': [],
+ 'ldlibs': [],
+ },
+ 'dependencies': {
+ 'readline': {
+ 'hash': 'zvaa4lhlhilypw5quj3akyd3apbq5gap',
+ 'type': ['build', 'link'],
+ }
+ },
+ 'hash': '722dzmgymxyxd6ovjvh4742kcetkqtfs'
+ }
+ },
+ # ... more node dicts for readline and its dependencies ...
+ ]
+ }
+
+ Note that this dictionary starts with the 'spec' key, and what
+ follows is a list starting with the root spec, followed by its
+ dependencies in preorder. Each node in the list also has a
+ 'hash' key that contains the hash of the node *without* the hash
+ field included.
+
+ In the example, the package content hash is not included in the
+ spec, but if ``package_hash`` were true there would be an
+ additional field on each node called ``package_hash``.
+
+ ``from_dict()`` can be used to read back in a spec that has been
+ converted to a dictionary, serialized, and read back in.
+
+ Arguments:
+ deptype (tuple or str): dependency types to include when
+ traversing the spec.
+ package_hash (bool): whether to include package content
+ hashes in the dictionary.
+
+ """
node_list = []
- for s in self.traverse(order='pre', deptype=deptypes):
- node = s.to_node_dict(all_deps=all_deps)
+ for s in self.traverse(order='pre', deptype=hash.deptype):
+ node = s.to_node_dict(hash)
node[s.name]['hash'] = s.dag_hash()
- if all_deps:
- node[s.name]['build_hash'] = s.dag_hash(all_deps=True)
+ if 'build' in hash.deptype:
+ node[s.name]['build_hash'] = s.build_hash()
node_list.append(node)
return syaml_dict([('spec', node_list)])
- def to_yaml(self, stream=None, all_deps=False):
+ def to_yaml(self, stream=None, hash=ht.dag_hash):
return syaml.dump(
- self.to_dict(all_deps), stream=stream, default_flow_style=False)
+ self.to_dict(hash), stream=stream, default_flow_style=False)
- def to_json(self, stream=None):
- return sjson.dump(self.to_dict(), stream)
+ def to_json(self, stream=None, hash=ht.dag_hash):
+ return sjson.dump(self.to_dict(hash), stream)
@staticmethod
def from_node_dict(node):
@@ -2135,7 +2260,7 @@ class Spec(object):
for when_spec, dependency in conditions.items():
if self.satisfies(when_spec, strict=True):
if dep is None:
- dep = Dependency(self.name, Spec(name), type=())
+ dep = dp.Dependency(self.name, Spec(name), type=())
try:
dep.merge(dependency)
except UnsatisfiableSpecError as e:
@@ -2814,13 +2939,13 @@ class Spec(object):
# If we preserved the original structure, we can copy them
# safely. If not, they need to be recomputed.
if caches is None:
- caches = (deps is True or deps == all_deptypes)
+ caches = (deps is True or deps == dp.all_deptypes)
# If we copy dependencies, preserve DAG structure in the new spec
if deps:
# If caller restricted deptypes to be copied, adjust that here.
# By default, just copy all deptypes
- deptypes = all_deptypes
+ deptypes = dp.all_deptypes
if isinstance(deps, (tuple, list)):
deptypes = deps
self._dup_deps(other, deptypes, caches)
@@ -3621,7 +3746,7 @@ class Spec(object):
types = set(dep_spec.deptypes)
out += '['
- for t in all_deptypes:
+ for t in dp.all_deptypes:
out += ''.join(t[0] if t in types else ' ')
out += '] '
@@ -3980,7 +4105,7 @@ def save_dependency_spec_yamls(
yaml_path = os.path.join(output_directory, '{0}.yaml'.format(dep_name))
with open(yaml_path, 'w') as fd:
- fd.write(dep_spec.to_yaml(all_deps=True))
+ fd.write(dep_spec.to_yaml(hash=ht.build_hash))
def base32_prefix_bits(hash_string, bits):
diff --git a/lib/spack/spack/test/cmd/env.py b/lib/spack/spack/test/cmd/env.py
index 23db0e429d..2e610e6a8f 100644
--- a/lib/spack/spack/test/cmd/env.py
+++ b/lib/spack/spack/test/cmd/env.py
@@ -10,8 +10,10 @@ import pytest
import llnl.util.filesystem as fs
+import spack.hash_types as ht
import spack.modules
import spack.environment as ev
+
from spack.cmd.env import _env_create
from spack.spec import Spec
from spack.main import SpackCommand
@@ -643,7 +645,8 @@ def create_v1_lockfile_dict(roots, all_specs):
# Version one lockfiles use the dag hash without build deps as keys,
# but they write out the full node dict (including build deps)
"concrete_specs": dict(
- (s.dag_hash(), s.to_node_dict(all_deps=True)) for s in all_specs
+ (s.dag_hash(), s.to_node_dict(hash=ht.build_hash))
+ for s in all_specs
)
}
return test_lockfile_dict
@@ -676,8 +679,8 @@ def test_read_old_lock_and_write_new(tmpdir):
# When the lockfile is rewritten, it should adopt the new hash scheme
# which accounts for all dependencies, including build dependencies
assert hashes == set([
- x.dag_hash(all_deps=True),
- y.dag_hash(all_deps=True)])
+ x.build_hash(),
+ y.build_hash()])
@pytest.mark.usefixtures('config')
diff --git a/lib/spack/spack/test/cmd/install.py b/lib/spack/spack/test/cmd/install.py
index 05971da904..5b5ee1bc49 100644
--- a/lib/spack/spack/test/cmd/install.py
+++ b/lib/spack/spack/test/cmd/install.py
@@ -15,6 +15,7 @@ import pytest
import llnl.util.filesystem as fs
import spack.config
+import spack.hash_types as ht
import spack.package
import spack.cmd.install
from spack.error import SpackError
@@ -540,7 +541,7 @@ def test_cdash_install_from_spec_yaml(tmpdir, mock_fetch, install_mockery,
pkg_spec.concretize()
with open(spec_yaml_path, 'w') as fd:
- fd.write(pkg_spec.to_yaml(all_deps=True))
+ fd.write(pkg_spec.to_yaml(hash=ht.build_hash))
install(
'--log-format=cdash',
diff --git a/lib/spack/spack/test/spec_dag.py b/lib/spack/spack/test/spec_dag.py
index 773faeb1fd..628f67948f 100644
--- a/lib/spack/spack/test/spec_dag.py
+++ b/lib/spack/spack/test/spec_dag.py
@@ -114,7 +114,7 @@ def test_installed_deps():
c_spec.concretize()
assert c_spec['d'].version == spack.version.Version('2')
- c_installed = spack.spec.Spec.from_dict(c_spec.to_dict(all_deps=False))
+ c_installed = spack.spec.Spec.from_dict(c_spec.to_dict())
for spec in c_installed.traverse():
setattr(spec.package, 'installed', True)
diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py
index 1a0ef94f57..5c274797a4 100644
--- a/lib/spack/spack/test/spec_yaml.py
+++ b/lib/spack/spack/test/spec_yaml.py
@@ -12,8 +12,10 @@ import os
from collections import Iterable, Mapping
+import spack.hash_types as ht
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
+
from spack import repo
from spack.spec import Spec, save_dependency_spec_yamls
from spack.util.spack_yaml import syaml_dict
@@ -231,7 +233,7 @@ def test_save_dependency_spec_yamls_subset(tmpdir, config):
spec_a.concretize()
b_spec = spec_a['b']
c_spec = spec_a['c']
- spec_a_yaml = spec_a.to_yaml(all_deps=True)
+ spec_a_yaml = spec_a.to_yaml(hash=ht.build_hash)
save_dependency_spec_yamls(spec_a_yaml, output_path, ['b', 'c'])