From 519deac54458068d69775d9a1ceb89a91a9b2731 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Mon, 12 Feb 2024 19:52:52 +0100 Subject: Fix multiple issues with Python in views (#42601) This fixes bugs, performance issues, and removes no longer necessary code. Short version: 1. Creating views from Python extensions would error if the Spack `opt` dir itself was in some symlinked directory. Use of `realpath` would expand those, and keying into `merge_map` would fail. 2. Creating views from Python extensions (and Python itself, potentially) could fail if the `bin/` dir contains symlinks pointing outside the package prefix -- Spack keyed into `merge_map[target_of_symlink]` incorrectly. 3. In the `python` package the `remove_files_from_view` function was broken after a breaking API change two years ago (#24355). However, the entire function body was redundant anyways, so solved it by removing it. 4. Notions of "global view" (i.e. python extensions being linked into Python's own prefix instead of into a view) are completely outdated, and removed. It used to be supported but was removed years ago. 5. Views for Python extension would _always_ copy non-symlinks in `./bin/*`, which is a big mistake, since all we care about is rewriting shebangs of scripts; we don't want to copy binaries. Now we first check if the file is executable, and then read two bytes to check if it has a shebang, and only if so, copy the entire file and patch up shebangs. The bug fixes for (1) and (2) basically consist of getting rid of `realpath` entirely, and instead simply keep track of file identifiers of files that are copied/modified in the view. Only after patching up regular files do we iterate over symlinks and check if they target one of those. If so, retarget it to the modified file in the view. --- lib/spack/llnl/util/filesystem.py | 34 ++++++++++++--------- lib/spack/spack/build_systems/python.py | 54 ++++++++++++++++++++++----------- 2 files changed, 57 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py index cff512a167..b13fc00bd0 100644 --- a/lib/spack/llnl/util/filesystem.py +++ b/lib/spack/llnl/util/filesystem.py @@ -920,28 +920,34 @@ def get_filetype(path_name): return output.strip() +def has_shebang(path): + """Returns whether a path has a shebang line. Returns False if the file cannot be opened.""" + try: + with open(path, "rb") as f: + return f.read(2) == b"#!" + except OSError: + return False + + @system_path_filter def is_nonsymlink_exe_with_shebang(path): - """ - Returns whether the path is an executable script with a shebang. - Return False when the path is a *symlink* to an executable script. - """ + """Returns whether the path is an executable regular file with a shebang. Returns False too + when the path is a symlink to a script, and also when the file cannot be opened.""" try: st = os.lstat(path) - # Should not be a symlink - if stat.S_ISLNK(st.st_mode): - return False + except OSError: + return False - # Should be executable - if not st.st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH): - return False + # Should not be a symlink + if stat.S_ISLNK(st.st_mode): + return False - # Should start with a shebang - with open(path, "rb") as f: - return f.read(2) == b"#!" - except (IOError, OSError): + # Should be executable + if not st.st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH): return False + return has_shebang(path) + @system_path_filter(arg_slice=slice(1)) def chgrp_if_not_world_writable(path, group): diff --git a/lib/spack/spack/build_systems/python.py b/lib/spack/spack/build_systems/python.py index 39dc7519e8..824d634b3e 100644 --- a/lib/spack/spack/build_systems/python.py +++ b/lib/spack/spack/build_systems/python.py @@ -6,7 +6,8 @@ import inspect import os import re import shutil -from typing import Iterable, List, Mapping, Optional +import stat +from typing import Dict, Iterable, List, Mapping, Optional, Tuple import archspec @@ -136,31 +137,50 @@ class PythonExtension(spack.package_base.PackageBase): return conflicts def add_files_to_view(self, view, merge_map, skip_if_exists=True): - if not self.extendee_spec: + # Patch up shebangs to the python linked in the view only if python is built by Spack. + if not self.extendee_spec or self.extendee_spec.external: return super().add_files_to_view(view, merge_map, skip_if_exists) + # We only patch shebangs in the bin directory. + copied_files: Dict[Tuple[int, int], str] = {} # File identifier -> source + delayed_links: List[Tuple[str, str]] = [] # List of symlinks from merge map + bin_dir = self.spec.prefix.bin python_prefix = self.extendee_spec.prefix - python_is_external = self.extendee_spec.external - global_view = fs.same_path(python_prefix, view.get_projection_for_spec(self.spec)) for src, dst in merge_map.items(): - if os.path.exists(dst): + if skip_if_exists and os.path.lexists(dst): continue - elif global_view or not fs.path_contains_subdirectory(src, bin_dir): + + if not fs.path_contains_subdirectory(src, bin_dir): view.link(src, dst) - elif not os.path.islink(src): + continue + + s = os.lstat(src) + + # Symlink is delayed because we may need to re-target if its target is copied in view + if stat.S_ISLNK(s.st_mode): + delayed_links.append((src, dst)) + continue + + # If it's executable and has a shebang, copy and patch it. + if (s.st_mode & 0b111) and fs.has_shebang(src): + copied_files[(s.st_dev, s.st_ino)] = dst shutil.copy2(src, dst) - is_script = fs.is_nonsymlink_exe_with_shebang(src) - if is_script and not python_is_external: - fs.filter_file( - python_prefix, - os.path.abspath(view.get_projection_for_spec(self.spec)), - dst, - ) + fs.filter_file( + python_prefix, os.path.abspath(view.get_projection_for_spec(self.spec)), dst + ) + + # Finally re-target the symlinks that point to copied files. + for src, dst in delayed_links: + try: + s = os.stat(src) + target = copied_files[(s.st_dev, s.st_ino)] + except (OSError, KeyError): + target = None + if target: + os.symlink(target, dst) else: - orig_link_target = os.path.realpath(src) - new_link_target = os.path.abspath(merge_map[orig_link_target]) - view.link(new_link_target, dst) + view.link(src, dst, spec=self.spec) def remove_files_from_view(self, view, merge_map): ignore_namespace = False -- cgit v1.2.3-70-g09d2