From c44e854d05de6b4cea4736546317748d05e5d833 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Sat, 3 Feb 2024 11:05:45 +0100 Subject: Environment views: dependents before dependencies, resolve identical file conflicts (#42350) Fix two separate problems: 1. We want to always visit parents before children while creating views (when it comes to ignoring conflicts, the first instance generated in the view is chosen, and we want the parent instance to have precedence). Our preorder traversal does not guarantee that, but our topological- order traversal does. 2. For copy style views with packages x depending on y, where /foo is a symlink to /foo, we want to guarantee that: * A conflict is not registered * /foo is chosen (otherwise, the "foo" symlink would become self-referential if relocated relative to the view root) Note that * This is an exception to [1] (in this case the dependency instance overrides the dependent) * Prior to this change, if "foo" was ignored as a conflict, it was possible to create this self-referential symlink Add tests for each of these cases --- lib/spack/llnl/util/link_tree.py | 50 +++++++++++++++++-------- lib/spack/spack/environment/environment.py | 10 ++--- lib/spack/spack/filesystem_view.py | 3 +- lib/spack/spack/test/cmd/env.py | 49 ++++++++++++++++++++++-- lib/spack/spack/test/cmd/view.py | 4 +- lib/spack/spack/test/llnl/util/link_tree.py | 58 +++++++++++++++++++++++++++++ 6 files changed, 146 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/spack/llnl/util/link_tree.py b/lib/spack/llnl/util/link_tree.py index 175b3ca433..e6299e08e6 100644 --- a/lib/spack/llnl/util/link_tree.py +++ b/lib/spack/llnl/util/link_tree.py @@ -58,9 +58,10 @@ class SourceMergeVisitor(BaseDirectoryVisitor): # bit to the relative path in the destination dir. self.projection: str = "" - # When a file blocks another file, the conflict can sometimes be resolved / ignored - # (e.g. /LICENSE or //__init__.py conflicts can be - # ignored). + # Two files f and g conflict if they are not os.path.samefile(f, g) and they are both + # projected to the same destination file. These conflicts are not necessarily fatal, and + # can be resolved or ignored. For example /LICENSE or + # //__init__.py conflicts can be ignored). self.file_conflicts: List[MergeConflict] = [] # When we have to create a dir where a file is, or a file where a dir is, we have fatal @@ -71,7 +72,8 @@ class SourceMergeVisitor(BaseDirectoryVisitor): # and can run mkdir in order. self.directories: Dict[str, Tuple[str, str]] = {} - # Files to link. Maps dst_rel to (src_root, src_rel) + # Files to link. Maps dst_rel to (src_root, src_rel). This is an ordered dict, where files + # are guaranteed to be grouped by src_root in the order they were visited. self.files: Dict[str, Tuple[str, str]] = {} def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool: @@ -134,38 +136,54 @@ class SourceMergeVisitor(BaseDirectoryVisitor): self.visit_file(root, rel_path, depth) return False - def visit_file(self, root: str, rel_path: str, depth: int) -> None: + def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = False) -> None: proj_rel_path = os.path.join(self.projection, rel_path) if self.ignore(rel_path): pass elif proj_rel_path in self.directories: # Can't create a file where a dir is; fatal error - src_a_root, src_a_relpath = self.directories[proj_rel_path] self.fatal_conflicts.append( MergeConflict( dst=proj_rel_path, - src_a=os.path.join(src_a_root, src_a_relpath), + src_a=os.path.join(*self.directories[proj_rel_path]), src_b=os.path.join(root, rel_path), ) ) elif proj_rel_path in self.files: - # In some cases we can resolve file-file conflicts - src_a_root, src_a_relpath = self.files[proj_rel_path] - self.file_conflicts.append( - MergeConflict( - dst=proj_rel_path, - src_a=os.path.join(src_a_root, src_a_relpath), - src_b=os.path.join(root, rel_path), + # When two files project to the same path, they conflict iff they are distinct. + # If they are the same (i.e. one links to the other), register regular files rather + # than symlinks. The reason is that in copy-type views, we need a copy of the actual + # file, not the symlink. + + src_a = os.path.join(*self.files[proj_rel_path]) + src_b = os.path.join(root, rel_path) + + try: + samefile = os.path.samefile(src_a, src_b) + except OSError: + samefile = False + + if not samefile: + # Distinct files produce a conflict. + self.file_conflicts.append( + MergeConflict(dst=proj_rel_path, src_a=src_a, src_b=src_b) ) - ) + return + + if not symlink: + # Remove the link in favor of the actual file. The del is necessary to maintain the + # order of the files dict, which is grouped by root. + del self.files[proj_rel_path] + self.files[proj_rel_path] = (root, rel_path) + else: # Otherwise register this file to be linked. self.files[proj_rel_path] = (root, rel_path) def visit_symlinked_file(self, root: str, rel_path: str, depth: int) -> None: # Treat symlinked files as ordinary files (without "dereferencing") - self.visit_file(root, rel_path, depth) + self.visit_file(root, rel_path, depth, symlink=True) def set_projection(self, projection: str) -> None: self.projection = os.path.normpath(projection) diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py index 4320667274..e54ff5709c 100644 --- a/lib/spack/spack/environment/environment.py +++ b/lib/spack/spack/environment/environment.py @@ -663,10 +663,8 @@ class ViewDescriptor: return True def specs_for_view(self, concrete_roots: List[Spec]) -> List[Spec]: - """ - From the list of concretized user specs in the environment, flatten - the dags, and filter selected, installed specs, remove duplicates on dag hash. - """ + """Flatten the DAGs of the concrete roots, keep only unique, selected, and installed specs + in topological order from root to leaf.""" if self.link == "all": deptype = dt.LINK | dt.RUN elif self.link == "run": @@ -674,7 +672,9 @@ class ViewDescriptor: else: deptype = dt.NONE - specs = traverse.traverse_nodes(concrete_roots, deptype=deptype, key=traverse.by_dag_hash) + specs = traverse.traverse_nodes( + concrete_roots, order="topo", deptype=deptype, key=traverse.by_dag_hash + ) # Filter selected, installed specs with spack.store.STORE.db.read_transaction(): diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py index 5c346f1f67..21d001fc9f 100644 --- a/lib/spack/spack/filesystem_view.py +++ b/lib/spack/spack/filesystem_view.py @@ -655,7 +655,8 @@ class SimpleFilesystemView(FilesystemView): raise ConflictingSpecsError(current_spec, conflicting_spec) seen[metadata_dir] = current_spec - def add_specs(self, *specs, **kwargs): + def add_specs(self, *specs: spack.spec.Spec) -> None: + """Link a root-to-leaf topologically ordered list of specs into the view.""" assert all((s.concrete for s in specs)) if len(specs) == 0: return diff --git a/lib/spack/spack/test/cmd/env.py b/lib/spack/spack/test/cmd/env.py index cfb167320e..e22502f5d0 100644 --- a/lib/spack/spack/test/cmd/env.py +++ b/lib/spack/spack/test/cmd/env.py @@ -1471,8 +1471,8 @@ def test_env_view_fails_dir_file(tmpdir, mock_packages, mock_stage, mock_fetch, view_dir = tmpdir.join("view") env("create", "--with-view=%s" % view_dir, "test") with ev.read("test"): - add("view-dir-file") - add("view-dir-dir") + add("view-file") + add("view-dir") with pytest.raises( llnl.util.link_tree.MergeConflictSummary, match=os.path.join("bin", "x") ): @@ -1486,8 +1486,8 @@ def test_env_view_succeeds_symlinked_dir_file( view_dir = tmpdir.join("view") env("create", "--with-view=%s" % view_dir, "test") with ev.read("test"): - add("view-dir-symlinked-dir") - add("view-dir-dir") + add("view-symlinked-dir") + add("view-dir") install() x_dir = os.path.join(str(view_dir), "bin", "x") assert os.path.exists(os.path.join(x_dir, "file_in_dir")) @@ -3891,3 +3891,44 @@ spack: assert not os.path.exists(env_spec_dir) else: assert os.path.exists(env_spec_dir) + + +def test_env_view_resolves_identical_file_conflicts(tmp_path, install_mockery, mock_fetch): + """When files clash in a view, but refer to the same file on disk (for example, the dependent + symlinks to a file in the dependency at the same relative path), Spack links the first regular + file instead of symlinks. This is important for copy type views where we need the underlying + file to be copied instead of the symlink (when a symlink would be copied, it would become a + self-referencing symlink after relocation). The test uses a symlink type view though, since + that keeps track of the original file path.""" + with ev.create("env", with_view=tmp_path / "view") as e: + add("view-resolve-conflict-top") + install() + top = e.matching_spec("view-resolve-conflict-top").prefix + bottom = e.matching_spec("view-file").prefix + + # In this example we have `./bin/x` in 3 prefixes, two links, one regular file. We expect the + # regular file to be linked into the view. There are also 2 links at `./bin/y`, but no regular + # file, so we expect standard behavior: first entry is linked into the view. + + # view-resolve-conflict-top/bin/ + # x -> view-file/bin/x + # y -> view-resolve-conflict-middle/bin/y # expect this y to be linked + # view-resolve-conflict-middle/bin/ + # x -> view-file/bin/x + # y -> view-file/bin/x + # view-file/bin/ + # x # expect this x to be linked + + assert os.readlink(tmp_path / "view" / "bin" / "x") == bottom.bin.x + assert os.readlink(tmp_path / "view" / "bin" / "y") == top.bin.y + + +def test_env_view_ignores_different_file_conflicts(tmp_path, install_mockery, mock_fetch): + """Test that file-file conflicts for two unique files in environment views are ignored, and + that the dependent's file is linked into the view, not the dependency's file.""" + with ev.create("env", with_view=tmp_path / "view") as e: + add("view-ignore-conflict") + install() + prefix_dependent = e.matching_spec("view-ignore-conflict").prefix + # The dependent's file is linked into the view + assert os.readlink(tmp_path / "view" / "bin" / "x") == prefix_dependent.bin.x diff --git a/lib/spack/spack/test/cmd/view.py b/lib/spack/spack/test/cmd/view.py index eaf6999868..530b998a42 100644 --- a/lib/spack/spack/test/cmd/view.py +++ b/lib/spack/spack/test/cmd/view.py @@ -191,7 +191,7 @@ def test_view_files_not_ignored( pkg.do_install() pkg.assert_installed(spec.prefix) - install("view-dir-file") # Arbitrary package to add noise + install("view-file") # Arbitrary package to add noise viewpath = str(tmpdir.mkdir("view_{0}".format(cmd))) @@ -205,7 +205,7 @@ def test_view_files_not_ignored( prefix_in_view = viewpath args = [] - view(cmd, *(args + [viewpath, "view-not-ignored", "view-dir-file"])) + view(cmd, *(args + [viewpath, "view-not-ignored", "view-file"])) pkg.assert_installed(prefix_in_view) view("remove", viewpath, "view-not-ignored") diff --git a/lib/spack/spack/test/llnl/util/link_tree.py b/lib/spack/spack/test/llnl/util/link_tree.py index febad1d773..1d57922be2 100644 --- a/lib/spack/spack/test/llnl/util/link_tree.py +++ b/lib/spack/spack/test/llnl/util/link_tree.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import os +import pathlib import sys import pytest @@ -339,3 +340,60 @@ def test_destination_merge_visitor_file_dir_clashes(tmpdir): visit_directory_tree(str(tmpdir.join("a")), DestinationMergeVisitor(b_to_a)) assert b_to_a.fatal_conflicts assert b_to_a.fatal_conflicts[0].dst == "example" + + +def test_source_merge_visitor_does_not_register_identical_file_conflicts(tmp_path: pathlib.Path): + """Tests whether the SourceMergeVisitor does not register identical file conflicts. + but instead registers the file that triggers the potential conflict.""" + (tmp_path / "dir_bottom").mkdir() + (tmp_path / "dir_bottom" / "file").write_bytes(b"hello") + + (tmp_path / "dir_top").mkdir() + (tmp_path / "dir_top" / "file").symlink_to(tmp_path / "dir_bottom" / "file") + (tmp_path / "dir_top" / "zzzz").write_bytes(b"hello") + + visitor = SourceMergeVisitor() + visitor.set_projection(str(tmp_path / "view")) + + visit_directory_tree(str(tmp_path / "dir_top"), visitor) + + # After visiting the top dir, we should have `file` and `zzzz` listed, in that order. Using + # .items() to test order. + assert list(visitor.files.items()) == [ + (str(tmp_path / "view" / "file"), (str(tmp_path / "dir_top"), "file")), + (str(tmp_path / "view" / "zzzz"), (str(tmp_path / "dir_top"), "zzzz")), + ] + + # Then after visiting the bottom dir, the "conflict" should be resolved, and `file` should + # come from the bottom dir. + visit_directory_tree(str(tmp_path / "dir_bottom"), visitor) + assert not visitor.file_conflicts + assert list(visitor.files.items()) == [ + (str(tmp_path / "view" / "zzzz"), (str(tmp_path / "dir_top"), "zzzz")), + (str(tmp_path / "view" / "file"), (str(tmp_path / "dir_bottom"), "file")), + ] + + +def test_source_merge_visitor_does_deals_with_dangling_symlinks(tmp_path: pathlib.Path): + """When a file and a dangling symlink conflict, this should be handled like a file conflict.""" + (tmp_path / "dir_a").mkdir() + os.symlink("non-existent", str(tmp_path / "dir_a" / "file")) + + (tmp_path / "dir_b").mkdir() + (tmp_path / "dir_b" / "file").write_bytes(b"data") + + visitor = SourceMergeVisitor() + visitor.set_projection(str(tmp_path / "view")) + + visit_directory_tree(str(tmp_path / "dir_a"), visitor) + visit_directory_tree(str(tmp_path / "dir_b"), visitor) + + # Check that a conflict was registered. + assert len(visitor.file_conflicts) == 1 + conflict = visitor.file_conflicts[0] + assert conflict.src_a == str(tmp_path / "dir_a" / "file") + assert conflict.src_b == str(tmp_path / "dir_b" / "file") + assert conflict.dst == str(tmp_path / "view" / "file") + + # The first file encountered should be listed. + assert visitor.files == {str(tmp_path / "view" / "file"): (str(tmp_path / "dir_a"), "file")} -- cgit v1.2.3-70-g09d2