From 7769367747db1aaede84956e30c1ccd4b11a5ce0 Mon Sep 17 00:00:00 2001 From: Mayeul d'Avezac Date: Mon, 1 Aug 2016 09:48:18 +0100 Subject: dag_hash stabilised by depending on sorted dict Spec.to_node_dict uses OrderedDict This is to try and ensure that the dag_hash is stable across python version and processes. --- lib/spack/spack/spec.py | 38 +++++++++++++++++++++++--------------- lib/spack/spack/test/spec_dag.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 0d72d454c6..b664c702b3 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -119,6 +119,7 @@ from spack.build_environment import get_path_from_module, load_module from spack.util.naming import mod_to_class from spack.util.prefix import Prefix from spack.util.string import * +from spack.util.spack_yaml import syaml_dict from spack.version import * from spack.provider_index import ProviderIndex @@ -910,22 +911,29 @@ class Spec(object): return b32_hash def to_node_dict(self): - d = {} + ordered_dict = lambda d: syaml_dict(sorted(d.items())) - params = dict((name, v.value) for name, v in self.variants.items()) - params.update(dict((name, value) - for name, value in self.compiler_flags.items())) + d = syaml_dict() + + params = syaml_dict(sorted( + (name, v.value) for name, v in self.variants.items())) + params.update(ordered_dict(self.compiler_flags)) if params: d['parameters'] = params - if self.dependencies(): - deps = self.dependencies_dict(deptype=('link', 'run')) - d['dependencies'] = dict( - (name, { - 'hash': dspec.spec.dag_hash(), - 'type': [str(s) for s in dspec.deptypes]}) - for name, dspec in deps.items()) + deps = self.dependencies_dict(deptype=('link', 'run')) + if deps: + d['dependencies'] = syaml_dict(sorted(( + ( + name, + ordered_dict({ + 'hash': dspec.spec.dag_hash(), + 'type': sorted([str(s) for s in dspec.deptypes]) + }) + ) + for name, dspec in deps.items() + ))) if self.namespace: d['namespace'] = self.namespace @@ -933,15 +941,15 @@ class Spec(object): if self.architecture: # TODO: Fix the target.to_dict to account for the tuple # Want it to be a dict of dicts - d['arch'] = self.architecture.to_dict() + d['arch'] = ordered_dict(self.architecture.to_dict()) if self.compiler: - d.update(self.compiler.to_dict()) + d['compiler'] = syaml_dict(self.compiler.to_dict()['compiler']) if self.versions: - d.update(self.versions.to_dict()) + d.update(ordered_dict(self.versions.to_dict())) - return {self.name: d} + return syaml_dict({self.name: d}) def to_yaml(self, stream=None): node_list = [] diff --git a/lib/spack/spack/test/spec_dag.py b/lib/spack/spack/test/spec_dag.py index 8f61c7ac76..5c2731041c 100644 --- a/lib/spack/spack/test/spec_dag.py +++ b/lib/spack/spack/test/spec_dag.py @@ -495,3 +495,31 @@ class SpecDagTest(MockPackagesTest): traversal = dag.traverse(deptype='run') self.assertEqual([x.name for x in traversal], names) + + def test_using_ordered_dict(self): + """ Checks that dicts are ordered + + Necessary to make sure that dag_hash is stable across python + versions and processes. + """ + def descend_and_check(iterable, level=0): + from spack.util.spack_yaml import syaml_dict + from collections import Iterable, Mapping + if isinstance(iterable, Mapping): + self.assertTrue(isinstance(iterable, syaml_dict)) + return descend_and_check(iterable.values(), level=level + 1) + max_level = level + for value in iterable: + if isinstance(value, Iterable) and not isinstance(value, str): + nlevel = descend_and_check(value, level=level + 1) + if nlevel > max_level: + max_level = nlevel + return max_level + + specs = ['mpileaks ^zmpi', 'dttop', 'dtuse'] + for spec in specs: + dag = Spec(spec) + dag.normalize() + level = descend_and_check(dag.to_node_dict()) + # level just makes sure we are doing something here + self.assertTrue(level >= 5) -- cgit v1.2.3-70-g09d2 From 69d45b49e9b9a589b1e8ae782a3c726eb2cad343 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 2 Sep 2016 01:26:01 -0700 Subject: Fix hash handling in directory layout - Currently, build dependencies are not currently hashed; we are waiting to hash these until we have smarter concretization that can reuse more installed specs. The layout needs to account for this when checking whethert things are installed. --- lib/spack/spack/directory_layout.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/spack/spack/directory_layout.py b/lib/spack/spack/directory_layout.py index 0ae6f765f4..73286483ef 100644 --- a/lib/spack/spack/directory_layout.py +++ b/lib/spack/spack/directory_layout.py @@ -268,6 +268,13 @@ class YamlDirectoryLayout(DirectoryLayout): if installed_spec == spec: return path + # DAG hashes currently do not include build dependencies. + # + # TODO: remove this when we do better concretization and don't + # ignore build-only deps in hashes. + elif installed_spec == spec.copy(deps=('link', 'run')): + return path + if spec.dag_hash() == installed_spec.dag_hash(): raise SpecHashCollisionError(spec, installed_spec) else: -- cgit v1.2.3-70-g09d2 From 9268b7aa7c6dee44433c1cbf6780fe9f84158011 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 2 Sep 2016 01:26:19 -0700 Subject: Fix hash copying in _dup. - Spec._dup() incorrectly copied cached hashes and normal/concrete values even when dependency structure was not preserved. - Now these are only copied when *all* dependencies are copied. --- lib/spack/spack/spec.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 99446d21dd..f742ca4616 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1911,8 +1911,9 @@ class Spec(object): self.external = other.external self.external_module = other.external_module self.namespace = other.namespace - self._hash = other._hash - self._cmp_key_cache = other._cmp_key_cache + + self.external = other.external + self.external_module = other.external_module # If we copy dependencies, preserve DAG structure in the new spec if deps: @@ -1940,11 +1941,20 @@ class Spec(object): new_spec._add_dependency( new_nodes[depspec.spec.name], depspec.deptypes) - # Since we preserved structure, we can copy _normal safely. - self._normal = other._normal - self._concrete = other._concrete - self.external = other.external - self.external_module = other.external_module + # These fields are all cached results of expensive operations. + # If we preserved the original structure, we can copy them + # safely. If not, they need to be recomputed. + if deps == True or deps == alldeps: + self._hash = other._hash + self._cmp_key_cache = other._cmp_key_cache + self._normal = other._normal + self._concrete = other._concrete + else: + self._hash = None + self._cmp_key_cache = None + self._normal = False + self._concrete = False + return changed def copy(self, deps=True): -- cgit v1.2.3-70-g09d2 From c8b4f978e14110f903ca17b4c77aa60547b5bce5 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 2 Sep 2016 02:54:03 -0700 Subject: Clean up stable hashing so that specs don't contain !!python/object/apply - only output basic lists, dicts, etc. - spec and database now parse and write specs as ordered data. --- lib/spack/spack/architecture.py | 14 +++++---- lib/spack/spack/database.py | 10 +++--- lib/spack/spack/spec.py | 68 +++++++++++++++++++---------------------- lib/spack/spack/version.py | 9 ++++-- 4 files changed, 51 insertions(+), 50 deletions(-) diff --git a/lib/spack/spack/architecture.py b/lib/spack/spack/architecture.py index 0d210f9741..57e266722e 100644 --- a/lib/spack/spack/architecture.py +++ b/lib/spack/spack/architecture.py @@ -87,6 +87,7 @@ import spack.compilers from spack.util.naming import mod_to_class from spack.util.environment import get_path from spack.util.multiproc import parmap +from spack.util.spack_yaml import syaml_dict import spack.error as serr @@ -407,12 +408,13 @@ class Arch(object): return (platform, platform_os, target) def to_dict(self): - d = {} - d['platform'] = str(self.platform) if self.platform else None - d['platform_os'] = str(self.platform_os) if self.platform_os else None - d['target'] = str(self.target) if self.target else None - - return d + return syaml_dict(( + ('platform', + str(self.platform) if self.platform else None), + ('platform_os', + str(self.platform_os) if self.platform_os else None), + ('target', + str(self.target) if self.target else None))) def _target_from_dict(target_name, plat=None): diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index 043e3b953d..e450b4d424 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -42,7 +42,6 @@ filesystem. import os import socket -import yaml from yaml.error import MarkedYAMLError, YAMLError import llnl.util.tty as tty @@ -54,7 +53,7 @@ from spack.version import Version from spack.spec import Spec from spack.error import SpackError from spack.repository import UnknownPackageError - +import spack.util.spack_yaml as syaml # DB goes in this directory underneath the root _db_dirname = '.spack-db' @@ -197,7 +196,8 @@ class Database(object): } try: - return yaml.dump(database, stream=stream, default_flow_style=False) + return syaml.dump( + database, stream=stream, default_flow_style=False) except YAMLError as e: raise SpackYAMLError("error writing YAML database:", str(e)) @@ -247,9 +247,9 @@ class Database(object): try: if isinstance(stream, basestring): with open(stream, 'r') as f: - yfile = yaml.load(f) + yfile = syaml.load(f) else: - yfile = yaml.load(stream) + yfile = syaml.load(stream) except MarkedYAMLError as e: raise SpackYAMLError("error parsing YAML database:", str(e)) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index ac77abd57b..8b0486c4da 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -102,7 +102,6 @@ import sys from StringIO import StringIO from operator import attrgetter -import yaml from yaml.error import MarkedYAMLError import llnl.util.tty as tty @@ -119,6 +118,7 @@ from spack.build_environment import get_path_from_module, load_module from spack.util.naming import mod_to_class from spack.util.prefix import Prefix from spack.util.string import * +import spack.util.spack_yaml as syaml from spack.util.spack_yaml import syaml_dict from spack.version import * from spack.provider_index import ProviderIndex @@ -267,9 +267,10 @@ class CompilerSpec(object): return (self.name, self.versions) def to_dict(self): - d = {'name': self.name} + d = syaml_dict([('name', self.name)]) d.update(self.versions.to_dict()) - return {'compiler': d} + + return syaml_dict([('compiler', d)]) @staticmethod def from_dict(d): @@ -903,7 +904,7 @@ class Spec(object): return self._hash[:length] else: # XXX(deptype): ignore 'build' dependencies here - yaml_text = yaml.dump( + yaml_text = syaml.dump( self.to_node_dict(), default_flow_style=True, width=sys.maxint) sha = hashlib.sha1(yaml_text) b32_hash = base64.b32encode(sha.digest()).lower()[:length] @@ -912,45 +913,37 @@ class Spec(object): return b32_hash def to_node_dict(self): - ordered_dict = lambda d: syaml_dict(sorted(d.items())) - d = syaml_dict() - params = syaml_dict(sorted( - (name, v.value) for name, v in self.variants.items())) - params.update(ordered_dict(self.compiler_flags)) - - if params: - d['parameters'] = params + if self.versions: + d.update(self.versions.to_dict()) - deps = self.dependencies_dict(deptype=('link', 'run')) - if deps: - d['dependencies'] = syaml_dict(sorted(( - ( - name, - ordered_dict({ - 'hash': dspec.spec.dag_hash(), - 'type': sorted([str(s) for s in dspec.deptypes]) - }) - ) - for name, dspec in deps.items() - ))) + if self.compiler: + d.update(self.compiler.to_dict()) if self.namespace: d['namespace'] = self.namespace - if self.architecture: - # TODO: Fix the target.to_dict to account for the tuple - # Want it to be a dict of dicts - d['arch'] = ordered_dict(self.architecture.to_dict()) + params = syaml_dict(sorted( + (name, v.value) for name, v in self.variants.items())) + params.update(sorted(self.compiler_flags.items())) + if params: + d['parameters'] = params - if self.compiler: - d['compiler'] = syaml_dict(self.compiler.to_dict()['compiler']) + if self.architecture: + d['arch'] = self.architecture.to_dict() - if self.versions: - d.update(ordered_dict(self.versions.to_dict())) + deps = self.dependencies_dict(deptype=('link', 'run')) + if deps: + d['dependencies'] = syaml_dict([ + (name, + syaml_dict([ + ('hash', dspec.spec.dag_hash()), + ('type', sorted(str(s) for s in dspec.deptypes))]) + ) for name, dspec in sorted(deps.items()) + ]) - return syaml_dict({self.name: d}) + return syaml_dict([(self.name, d)]) def to_yaml(self, stream=None): node_list = [] @@ -958,8 +951,9 @@ class Spec(object): node = s.to_node_dict() node[s.name]['hash'] = s.dag_hash() node_list.append(node) - return yaml.dump({'spec': node_list}, - stream=stream, default_flow_style=False) + return syaml.dump( + syaml_dict([('spec', node_list)]), + stream=stream, default_flow_style=False) @staticmethod def from_node_dict(node): @@ -1033,7 +1027,7 @@ class Spec(object): """ try: - yfile = yaml.load(stream) + yfile = syaml.load(stream) except MarkedYAMLError as e: raise SpackYAMLError("error parsing YAML spec:", str(e)) @@ -1952,7 +1946,7 @@ class Spec(object): # These fields are all cached results of expensive operations. # If we preserved the original structure, we can copy them # safely. If not, they need to be recomputed. - if deps == True or deps == alldeps: + if deps is True or deps == alldeps: self._hash = other._hash self._cmp_key_cache = other._cmp_key_cache self._normal = other._normal diff --git a/lib/spack/spack/version.py b/lib/spack/spack/version.py index bc96dcd716..e3efa6c87a 100644 --- a/lib/spack/spack/version.py +++ b/lib/spack/spack/version.py @@ -49,6 +49,7 @@ from bisect import bisect_left from functools import wraps from functools_backport import total_ordering +from spack.util.spack_yaml import syaml_dict __all__ = ['Version', 'VersionRange', 'VersionList', 'ver'] @@ -593,9 +594,13 @@ class VersionList(object): def to_dict(self): """Generate human-readable dict for YAML.""" if self.concrete: - return {'version': str(self[0])} + return syaml_dict([ + ('version', str(self[0])) + ]) else: - return {'versions': [str(v) for v in self]} + return syaml_dict([ + ('versions', [str(v) for v in self]) + ]) @staticmethod def from_dict(dictionary): -- cgit v1.2.3-70-g09d2