From fd3bb5177b325a6c8b49420a5440feb99073263a Mon Sep 17 00:00:00 2001 From: Scott Wittenburg Date: Mon, 7 Mar 2022 13:16:33 -0700 Subject: env: Use order of roots to resolve DAG hash conflicts in legacy lockfiles --- lib/spack/spack/environment/environment.py | 32 +++++++++++++++++++++-------- lib/spack/spack/test/cmd/env.py | 33 ++++++++++++++---------------- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py index 18a39dc1ad..c3d4c67c49 100644 --- a/lib/spack/spack/environment/environment.py +++ b/lib/spack/spack/environment/environment.py @@ -1817,31 +1817,47 @@ class Environment(object): roots = d['roots'] self.concretized_user_specs = [Spec(r['spec']) for r in roots] self.concretized_order = [r['hash'] for r in roots] - json_specs_by_hash = collections.OrderedDict() - for lockfile_key in sorted(d['concrete_specs']): - json_specs_by_hash[lockfile_key] = d['concrete_specs'][lockfile_key] + json_specs_by_hash = d['concrete_specs'] - specs_by_hash = collections.OrderedDict() + # Track specs by their lockfile key. Currently spack uses the finest + # grained hash as the lockfile key, while older formats used the build + # hash or a previous incarnation of the DAG hash (one that did not + # include build deps or package hash). + specs_by_hash = {} + # Track specs by their DAG hash, allows handling DAG hash collisions + first_seen = {} + + # First pass: Put each spec in the map ignoring dependencies for lockfile_key, node_dict in json_specs_by_hash.items(): specs_by_hash[lockfile_key] = Spec.from_node_dict(node_dict) + # Second pass: For each spec, get its dependencies from the node dict + # and add them to the spec for lockfile_key, node_dict in json_specs_by_hash.items(): for _, dep_hash, deptypes, _ in ( Spec.dependencies_from_node_dict(node_dict)): specs_by_hash[lockfile_key]._add_dependency( specs_by_hash[dep_hash], deptypes) + # Traverse the root specs one at a time in the order they appear. + # The first time we see each DAG hash, that's the one we want to + # keep. This is only required as long as we support older lockfile + # formats where the mapping from DAG hash to lockfile key is possibly + # one-to-many. + for lockfile_key in self.concretized_order: + for s in specs_by_hash[lockfile_key].traverse(): + if s.dag_hash() not in first_seen: + first_seen[s.dag_hash()] = s + # Now make sure concretized_order and our internal specs dict # contains the keys used by modern spack (i.e. the dag_hash # that includes build deps and package hash). self.concretized_order = [specs_by_hash[h_key].dag_hash() for h_key in self.concretized_order] - for _, env_spec in specs_by_hash.items(): - spec_dag_hash = env_spec.dag_hash() - if spec_dag_hash in self.concretized_order: - self.specs_by_hash[spec_dag_hash] = env_spec + for spec_dag_hash in self.concretized_order: + self.specs_by_hash[spec_dag_hash] = first_seen[spec_dag_hash] def write(self, regenerate=True): """Writes an in-memory environment to its location on disk. diff --git a/lib/spack/spack/test/cmd/env.py b/lib/spack/spack/test/cmd/env.py index 48ef699f33..32c7f08f32 100644 --- a/lib/spack/spack/test/cmd/env.py +++ b/lib/spack/spack/test/cmd/env.py @@ -2866,32 +2866,29 @@ def test_environment_query_spec_by_hash(mock_stage, mock_fetch, install_mockery) def test_read_legacy_lockfile_and_reconcretize(mock_stage, mock_fetch, install_mockery): - """Make sure that when we read a legacy environment lock file with a hash - conflict (one from before we switched to full hash), the behavior as to - which of the conflicting specs we pick is deterministic and reproducible. - When we read the lockfile, we (somewhat arbitrarily) process specs in - alphabetical order of their lockfile key. Consequently, when reading an - old lockfile where two specs have a dag hash conflict we expect to keep the - second one we encounter. After we force reconcretization, we should both of - the specs that originally conflicted present in the environment again.""" + """In legacy lockfiles there is possibly a one-to-many relationship between + DAG hash lockfile keys. In the case of DAG hash conflicts, we always keep + the spec associated with whichever root spec came first in "roots". After + we force reconcretization, there should no longer be conflicts, i.e. all + specs that originally conflicted should be present in the environment + again.""" legacy_lockfile_path = os.path.join( spack.paths.test_path, 'data', 'legacy_env', 'spack.lock' ) - # In the legacy lockfile, we have two conflicting specs that differ only - # in a build-only dependency. The lockfile keys and conflicting specs - # are: - # wci7a3a -> dttop ^dtbuild1@0.5 - # 5zg6wxw -> dttop ^dtbuild1@1.0 - # So when we initially read the legacy lockfile, we expect to have kept - # the version of dttop that depends on dtbuild1@0.5 + # The order of the root specs in this environment is: + # [ + # wci7a3a -> dttop ^dtbuild1@0.5, + # 5zg6wxw -> dttop ^dtbuild1@1.0 + # ] + # So in the legacy lockfile we have two versions of dttop with the same DAG + # hash (in the past DAG hash did not take build deps into account). Make + # sure we keep the correct instance of each spec, i.e. the one that appeared + # first. env('create', 'test', legacy_lockfile_path) test = ev.read('test') - # Before we forcefully reconcretize, we expect there will be only a - # single actual spec in the environment, and it should depend on - # dtbuild1@1.0, since that root appears last in the list. assert len(test.specs_by_hash) == 1 single_root = next(iter(test.specs_by_hash.values())) -- cgit v1.2.3-60-g2f50