diff options
author | James Smillie <83249606+jamessmillie@users.noreply.github.com> | 2023-08-25 13:18:19 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-25 12:18:19 -0700 |
commit | 349ba83bc6a41dc5c641afc355fe9aebfeb505ed (patch) | |
tree | 8bc0a5a7215bc96a0f1f6cd18b353bdf99378814 /lib/spack/llnl/util | |
parent | ecfd9ef12bc4bf39ff92df01921c8f641b54e43e (diff) | |
download | spack-349ba83bc6a41dc5c641afc355fe9aebfeb505ed.tar.gz spack-349ba83bc6a41dc5c641afc355fe9aebfeb505ed.tar.bz2 spack-349ba83bc6a41dc5c641afc355fe9aebfeb505ed.tar.xz spack-349ba83bc6a41dc5c641afc355fe9aebfeb505ed.zip |
Windows symlinking support (#38599)
This reapplies 66f7540, which adds supports for hardlinks/junctions on
Windows systems where developer mode is not enabled.
The commit was reverted on account of multiple issues:
* Checks added to prevent dangling symlinks were interfering with
existing CI builds on Linux (i.e. builds that otherwise succeed were
failing for creating dangling symlinks).
* The logic also updated symlinking to perform redirection of relative
paths, which lead to malformed symlinks.
This commit fixes these issues.
Diffstat (limited to 'lib/spack/llnl/util')
-rw-r--r-- | lib/spack/llnl/util/filesystem.py | 98 | ||||
-rw-r--r-- | lib/spack/llnl/util/symlink.py | 339 |
2 files changed, 349 insertions, 88 deletions
diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py index bbe83de340..a23053df9c 100644 --- a/lib/spack/llnl/util/filesystem.py +++ b/lib/spack/llnl/util/filesystem.py @@ -18,11 +18,13 @@ import stat import sys import tempfile from contextlib import contextmanager +from itertools import accumulate from typing import Callable, Iterable, List, Match, Optional, Tuple, Union +import llnl.util.symlink from llnl.util import tty from llnl.util.lang import dedupe, memoized -from llnl.util.symlink import islink, symlink +from llnl.util.symlink import islink, readlink, resolve_link_target_relative_to_the_link, symlink from spack.util.executable import Executable, which from spack.util.path import path_to_os_path, system_path_filter @@ -101,7 +103,7 @@ if sys.version_info < (3, 7, 4): pass # follow symlinks (aka don't not follow symlinks) - follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst)) + follow = follow_symlinks or not (islink(src) and islink(dst)) if follow: # use the real function if it exists def lookup(name): @@ -169,7 +171,7 @@ def rename(src, dst): if sys.platform == "win32": # Windows path existence checks will sometimes fail on junctions/links/symlinks # so check for that case - if os.path.exists(dst) or os.path.islink(dst): + if os.path.exists(dst) or islink(dst): os.remove(dst) os.rename(src, dst) @@ -566,7 +568,7 @@ def set_install_permissions(path): # If this points to a file maintained in a Spack prefix, it is assumed that # this function will be invoked on the target. If the file is outside a # Spack-maintained prefix, the permissions should not be modified. - if os.path.islink(path): + if islink(path): return if os.path.isdir(path): os.chmod(path, 0o755) @@ -635,7 +637,7 @@ def chmod_x(entry, perms): @system_path_filter def copy_mode(src, dest): """Set the mode of dest to that of src unless it is a link.""" - if os.path.islink(dest): + if islink(dest): return src_mode = os.stat(src).st_mode dest_mode = os.stat(dest).st_mode @@ -722,25 +724,11 @@ def install(src, dest): @system_path_filter -def resolve_link_target_relative_to_the_link(link): - """ - os.path.isdir uses os.path.exists, which for links will check - the existence of the link target. If the link target is relative to - the link, we need to construct a pathname that is valid from - our cwd (which may not be the same as the link's directory) - """ - target = os.readlink(link) - if os.path.isabs(target): - return target - link_dir = os.path.dirname(os.path.abspath(link)) - return os.path.join(link_dir, target) - - -@system_path_filter def copy_tree( src: str, dest: str, symlinks: bool = True, + allow_broken_symlinks: bool = sys.platform != "win32", ignore: Optional[Callable[[str], bool]] = None, _permissions: bool = False, ): @@ -763,6 +751,8 @@ def copy_tree( src (str): the directory to copy dest (str): the destination directory symlinks (bool): whether or not to preserve symlinks + allow_broken_symlinks (bool): whether or not to allow broken (dangling) symlinks, + On Windows, setting this to True will raise an exception. Defaults to true on unix. ignore (typing.Callable): function indicating which files to ignore _permissions (bool): for internal use only @@ -770,6 +760,8 @@ def copy_tree( IOError: if *src* does not match any files or directories ValueError: if *src* is a parent directory of *dest* """ + if allow_broken_symlinks and sys.platform == "win32": + raise llnl.util.symlink.SymlinkError("Cannot allow broken symlinks on Windows!") if _permissions: tty.debug("Installing {0} to {1}".format(src, dest)) else: @@ -783,6 +775,11 @@ def copy_tree( if not files: raise IOError("No such file or directory: '{0}'".format(src)) + # For Windows hard-links and junctions, the source path must exist to make a symlink. Add + # all symlinks to this list while traversing the tree, then when finished, make all + # symlinks at the end. + links = [] + for src in files: abs_src = os.path.abspath(src) if not abs_src.endswith(os.path.sep): @@ -805,7 +802,7 @@ def copy_tree( ignore=ignore, follow_nonexisting=True, ): - if os.path.islink(s): + if islink(s): link_target = resolve_link_target_relative_to_the_link(s) if symlinks: target = os.readlink(s) @@ -819,7 +816,9 @@ def copy_tree( tty.debug("Redirecting link {0} to {1}".format(target, new_target)) target = new_target - symlink(target, d) + links.append((target, d, s)) + continue + elif os.path.isdir(link_target): mkdirp(d) else: @@ -834,9 +833,17 @@ def copy_tree( set_install_permissions(d) copy_mode(s, d) + for target, d, s in links: + symlink(target, d, allow_broken_symlinks=allow_broken_symlinks) + if _permissions: + set_install_permissions(d) + copy_mode(s, d) + @system_path_filter -def install_tree(src, dest, symlinks=True, ignore=None): +def install_tree( + src, dest, symlinks=True, ignore=None, allow_broken_symlinks=sys.platform != "win32" +): """Recursively install an entire directory tree rooted at *src*. Same as :py:func:`copy_tree` with the addition of setting proper @@ -847,12 +854,21 @@ def install_tree(src, dest, symlinks=True, ignore=None): dest (str): the destination directory symlinks (bool): whether or not to preserve symlinks ignore (typing.Callable): function indicating which files to ignore + allow_broken_symlinks (bool): whether or not to allow broken (dangling) symlinks, + On Windows, setting this to True will raise an exception. Raises: IOError: if *src* does not match any files or directories ValueError: if *src* is a parent directory of *dest* """ - copy_tree(src, dest, symlinks=symlinks, ignore=ignore, _permissions=True) + copy_tree( + src, + dest, + symlinks=symlinks, + allow_broken_symlinks=allow_broken_symlinks, + ignore=ignore, + _permissions=True, + ) @system_path_filter @@ -1256,7 +1272,12 @@ def traverse_tree( Keyword Arguments: order (str): Whether to do pre- or post-order traversal. Accepted values are 'pre' and 'post' - ignore (typing.Callable): function indicating which files to ignore + ignore (typing.Callable): function indicating which files to ignore. This will also + ignore symlinks if they point to an ignored file (regardless of whether the symlink + is explicitly ignored); note this only supports one layer of indirection (i.e. if + you have x -> y -> z, and z is ignored but x/y are not, then y would be ignored + but not x). To avoid this, make sure the ignore function also ignores the symlink + paths too. follow_nonexisting (bool): Whether to descend into directories in ``src`` that do not exit in ``dest``. Default is True follow_links (bool): Whether to descend into symlinks in ``src`` @@ -1283,11 +1304,24 @@ def traverse_tree( dest_child = os.path.join(dest_path, f) rel_child = os.path.join(rel_path, f) + # If the source path is a link and the link's source is ignored, then ignore the link too, + # but only do this if the ignore is defined. + if ignore is not None: + if islink(source_child) and not follow_links: + target = readlink(source_child) + all_parents = accumulate(target.split(os.sep), lambda x, y: os.path.join(x, y)) + if any(map(ignore, all_parents)): + tty.warn( + f"Skipping {source_path} because the source or a part of the source's " + f"path is included in the ignores." + ) + continue + # Treat as a directory # TODO: for symlinks, os.path.isdir looks for the link target. If the # target is relative to the link, then that may not resolve properly # relative to our cwd - see resolve_link_target_relative_to_the_link - if os.path.isdir(source_child) and (follow_links or not os.path.islink(source_child)): + if os.path.isdir(source_child) and (follow_links or not islink(source_child)): # When follow_nonexisting isn't set, don't descend into dirs # in source that do not exist in dest if follow_nonexisting or os.path.exists(dest_child): @@ -1313,7 +1347,11 @@ def traverse_tree( def lexists_islink_isdir(path): """Computes the tuple (lexists(path), islink(path), isdir(path)) in a minimal - number of stat calls.""" + number of stat calls on unix. Use os.path and symlink.islink methods for windows.""" + if sys.platform == "win32": + if not os.path.lexists(path): + return False, False, False + return os.path.lexists(path), islink(path), os.path.isdir(path) # First try to lstat, so we know if it's a link or not. try: lst = os.lstat(path) @@ -1528,7 +1566,7 @@ def remove_if_dead_link(path): Parameters: path (str): The potential dead link """ - if os.path.islink(path) and not os.path.exists(path): + if islink(path) and not os.path.exists(path): os.unlink(path) @@ -1587,7 +1625,7 @@ def remove_linked_tree(path): kwargs["onerror"] = readonly_file_handler(ignore_errors=True) if os.path.exists(path): - if os.path.islink(path): + if islink(path): shutil.rmtree(os.path.realpath(path), **kwargs) os.unlink(path) else: @@ -2693,7 +2731,7 @@ def remove_directory_contents(dir): """Remove all contents of a directory.""" if os.path.exists(dir): for entry in [os.path.join(dir, entry) for entry in os.listdir(dir)]: - if os.path.isfile(entry) or os.path.islink(entry): + if os.path.isfile(entry) or islink(entry): os.unlink(entry) else: shutil.rmtree(entry) diff --git a/lib/spack/llnl/util/symlink.py b/lib/spack/llnl/util/symlink.py index 69aacaf9f0..d1084a13fe 100644 --- a/lib/spack/llnl/util/symlink.py +++ b/lib/spack/llnl/util/symlink.py @@ -2,77 +2,188 @@ # Spack Project Developers. See the top-level COPYRIGHT file for details. # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -import errno import os +import re import shutil +import subprocess import sys import tempfile -from os.path import exists, join -from llnl.util import lang +from llnl.util import lang, tty + +from spack.error import SpackError +from spack.util.path import system_path_filter if sys.platform == "win32": from win32file import CreateHardLink +is_windows = sys.platform == "win32" + -def symlink(real_path, link_path): +def symlink(source_path: str, link_path: str, allow_broken_symlinks: bool = not is_windows): """ - Create a symbolic link. + Create a link. + + On non-Windows and Windows with System Administrator + privleges this will be a normal symbolic link via + os.symlink. + + On Windows without privledges the link will be a + junction for a directory and a hardlink for a file. + On Windows the various link types are: + + Symbolic Link: A link to a file or directory on the + same or different volume (drive letter) or even to + a remote file or directory (using UNC in its path). + Need System Administrator privileges to make these. - On Windows, use junctions if os.symlink fails. + Hard Link: A link to a file on the same volume (drive + letter) only. Every file (file's data) has at least 1 + hard link (file's name). But when this method creates + a new hard link there will be 2. Deleting all hard + links effectively deletes the file. Don't need System + Administrator privileges. + + Junction: A link to a directory on the same or different + volume (drive letter) but not to a remote directory. Don't + need System Administrator privileges. + + Parameters: + source_path (str): The real file or directory that the link points to. + Must be absolute OR relative to the link. + link_path (str): The path where the link will exist. + allow_broken_symlinks (bool): On Linux or Mac, don't raise an exception if the source_path + doesn't exist. This will still raise an exception on Windows. """ - if sys.platform != "win32": - os.symlink(real_path, link_path) - elif _win32_can_symlink(): - # Windows requires target_is_directory=True when the target is a dir. - os.symlink(real_path, link_path, target_is_directory=os.path.isdir(real_path)) - else: - try: - # Try to use junctions - _win32_junction(real_path, link_path) - except OSError as e: - if e.errno == errno.EEXIST: - # EEXIST error indicates that file we're trying to "link" - # is already present, don't bother trying to copy which will also fail - # just raise - raise + source_path = os.path.normpath(source_path) + win_source_path = source_path + link_path = os.path.normpath(link_path) + + # Never allow broken links on Windows. + if sys.platform == "win32" and allow_broken_symlinks: + raise ValueError("allow_broken_symlinks parameter cannot be True on Windows.") + + if not allow_broken_symlinks: + # Perform basic checks to make sure symlinking will succeed + if os.path.lexists(link_path): + raise SymlinkError(f"Link path ({link_path}) already exists. Cannot create link.") + + if not os.path.exists(source_path): + if os.path.isabs(source_path) and not allow_broken_symlinks: + # An absolute source path that does not exist will result in a broken link. + raise SymlinkError( + f"Source path ({source_path}) is absolute but does not exist. Resulting " + f"link would be broken so not making link." + ) else: - # If all else fails, fall back to copying files - shutil.copyfile(real_path, link_path) + # os.symlink can create a link when the given source path is relative to + # the link path. Emulate this behavior and check to see if the source exists + # relative to the link patg ahead of link creation to prevent broken + # links from being made. + link_parent_dir = os.path.dirname(link_path) + relative_path = os.path.join(link_parent_dir, source_path) + if os.path.exists(relative_path): + # In order to work on windows, the source path needs to be modified to be + # relative because hardlink/junction dont resolve relative paths the same + # way as os.symlink. This is ignored on other operating systems. + win_source_path = relative_path + elif not allow_broken_symlinks: + raise SymlinkError( + f"The source path ({source_path}) is not relative to the link path " + f"({link_path}). Resulting link would be broken so not making link." + ) + + # Create the symlink + if sys.platform == "win32" and not _windows_can_symlink(): + _windows_create_link(win_source_path, link_path) + else: + os.symlink(source_path, link_path, target_is_directory=os.path.isdir(source_path)) + + +def islink(path: str) -> bool: + """Override os.islink to give correct answer for spack logic. + + For Non-Windows: a link can be determined with the os.path.islink method. + Windows-only methods will return false for other operating systems. + For Windows: spack considers symlinks, hard links, and junctions to + all be links, so if any of those are True, return True. -def islink(path): - return os.path.islink(path) or _win32_is_junction(path) + Args: + path (str): path to check if it is a link. + Returns: + bool - whether the path is any kind link or not. + """ + return any([os.path.islink(path), _windows_is_junction(path), _windows_is_hardlink(path)]) + + +def _windows_is_hardlink(path: str) -> bool: + """Determines if a path is a windows hard link. This is accomplished + by looking at the number of links using os.stat. A non-hard-linked file + will have a st_nlink value of 1, whereas a hard link will have a value + larger than 1. Note that both the original and hard-linked file will + return True because they share the same inode. -# '_win32' functions based on -# https://github.com/Erotemic/ubelt/blob/master/ubelt/util_links.py -def _win32_junction(path, link): - # junctions require absolute paths - if not os.path.isabs(link): - link = os.path.abspath(link) + Args: + path (str): Windows path to check for a hard link - # os.symlink will fail if link exists, emulate the behavior here - if exists(link): - raise OSError(errno.EEXIST, "File exists: %s -> %s" % (link, path)) + Returns: + bool - Whether the path is a hard link or not. + """ + if sys.platform != "win32" or os.path.islink(path) or not os.path.exists(path): + return False - if not os.path.isabs(path): - parent = os.path.join(link, os.pardir) - path = os.path.join(parent, path) - path = os.path.abspath(path) + return os.stat(path).st_nlink > 1 - CreateHardLink(link, path) + +def _windows_is_junction(path: str) -> bool: + """Determines if a path is a windows junction. A junction can be + determined using a bitwise AND operation between the file's + attribute bitmask and the known junction bitmask (0x400). + + Args: + path (str): A non-file path + + Returns: + bool - whether the path is a junction or not. + """ + if sys.platform != "win32" or os.path.islink(path) or os.path.isfile(path): + return False + + import ctypes.wintypes + + get_file_attributes = ctypes.windll.kernel32.GetFileAttributesW # type: ignore[attr-defined] + get_file_attributes.argtypes = (ctypes.wintypes.LPWSTR,) + get_file_attributes.restype = ctypes.wintypes.DWORD + + invalid_file_attributes = 0xFFFFFFFF + reparse_point = 0x400 + file_attr = get_file_attributes(str(path)) + + if file_attr == invalid_file_attributes: + return False + + return file_attr & reparse_point > 0 @lang.memoized -def _win32_can_symlink(): +def _windows_can_symlink() -> bool: + """ + Determines if windows is able to make a symlink depending on + the system configuration and the level of the user's permissions. + """ + if sys.platform != "win32": + tty.warn("windows_can_symlink method can't be used on non-Windows OS.") + return False + tempdir = tempfile.mkdtemp() - dpath = join(tempdir, "dpath") - fpath = join(tempdir, "fpath.txt") + dpath = os.path.join(tempdir, "dpath") + fpath = os.path.join(tempdir, "fpath.txt") - dlink = join(tempdir, "dlink") - flink = join(tempdir, "flink.txt") + dlink = os.path.join(tempdir, "dlink") + flink = os.path.join(tempdir, "flink.txt") import llnl.util.filesystem as fs @@ -96,24 +207,136 @@ def _win32_can_symlink(): return can_symlink_directories and can_symlink_files -def _win32_is_junction(path): +def _windows_create_link(source: str, link: str): """ - Determines if a path is a win32 junction + Attempts to create a Hard Link or Junction as an alternative + to a symbolic link. This is called when symbolic links cannot + be created. """ - if os.path.islink(path): - return False + if sys.platform != "win32": + raise SymlinkError("windows_create_link method can't be used on non-Windows OS.") + elif os.path.isdir(source): + _windows_create_junction(source=source, link=link) + elif os.path.isfile(source): + _windows_create_hard_link(path=source, link=link) + else: + raise SymlinkError( + f"Cannot create link from {source}. It is neither a file nor a directory." + ) - if sys.platform == "win32": - import ctypes.wintypes - GetFileAttributes = ctypes.windll.kernel32.GetFileAttributesW - GetFileAttributes.argtypes = (ctypes.wintypes.LPWSTR,) - GetFileAttributes.restype = ctypes.wintypes.DWORD +def _windows_create_junction(source: str, link: str): + """Duly verify that the path and link are eligible to create a junction, + then create the junction. + """ + if sys.platform != "win32": + raise SymlinkError("windows_create_junction method can't be used on non-Windows OS.") + elif not os.path.exists(source): + raise SymlinkError("Source path does not exist, cannot create a junction.") + elif os.path.lexists(link): + raise SymlinkError("Link path already exists, cannot create a junction.") + elif not os.path.isdir(source): + raise SymlinkError("Source path is not a directory, cannot create a junction.") - INVALID_FILE_ATTRIBUTES = 0xFFFFFFFF - FILE_ATTRIBUTE_REPARSE_POINT = 0x400 + import subprocess - res = GetFileAttributes(path) - return res != INVALID_FILE_ATTRIBUTES and bool(res & FILE_ATTRIBUTE_REPARSE_POINT) + cmd = ["cmd", "/C", "mklink", "/J", link, source] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = proc.communicate() + tty.debug(out.decode()) + if proc.returncode != 0: + err = err.decode() + tty.error(err) + raise SymlinkError("Make junction command returned a non-zero return code.", err) + + +def _windows_create_hard_link(path: str, link: str): + """Duly verify that the path and link are eligible to create a hard + link, then create the hard link. + """ + if sys.platform != "win32": + raise SymlinkError("windows_create_hard_link method can't be used on non-Windows OS.") + elif not os.path.exists(path): + raise SymlinkError(f"File path {path} does not exist. Cannot create hard link.") + elif os.path.lexists(link): + raise SymlinkError(f"Link path ({link}) already exists. Cannot create hard link.") + elif not os.path.isfile(path): + raise SymlinkError(f"File path ({link}) is not a file. Cannot create hard link.") + else: + tty.debug(f"Creating hard link {link} pointing to {path}") + CreateHardLink(link, path) + + +def readlink(path: str): + """Spack utility to override of os.readlink method to work cross platform""" + if _windows_is_hardlink(path): + return _windows_read_hard_link(path) + elif _windows_is_junction(path): + return _windows_read_junction(path) + else: + return os.readlink(path) - return False + +def _windows_read_hard_link(link: str) -> str: + """Find all of the files that point to the same inode as the link""" + if sys.platform != "win32": + raise SymlinkError("Can't read hard link on non-Windows OS.") + link = os.path.abspath(link) + fsutil_cmd = ["fsutil", "hardlink", "list", link] + proc = subprocess.Popen(fsutil_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + out, err = proc.communicate() + if proc.returncode != 0: + raise SymlinkError(f"An error occurred while reading hard link: {err.decode()}") + + # fsutil response does not include the drive name, so append it back to each linked file. + drive, link_tail = os.path.splitdrive(os.path.abspath(link)) + links = set([os.path.join(drive, p) for p in out.decode().splitlines()]) + links.remove(link) + if len(links) == 1: + return links.pop() + elif len(links) > 1: + # TODO: How best to handle the case where 3 or more paths point to a single inode? + raise SymlinkError(f"Found multiple paths pointing to the same inode {links}") + else: + raise SymlinkError("Cannot determine hard link source path.") + + +def _windows_read_junction(link: str): + """Find the path that a junction points to.""" + if sys.platform != "win32": + raise SymlinkError("Can't read junction on non-Windows OS.") + + link = os.path.abspath(link) + link_basename = os.path.basename(link) + link_parent = os.path.dirname(link) + fsutil_cmd = ["dir", "/a:l", link_parent] + proc = subprocess.Popen(fsutil_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + out, err = proc.communicate() + if proc.returncode != 0: + raise SymlinkError(f"An error occurred while reading junction: {err.decode()}") + matches = re.search(rf"<JUNCTION>\s+{link_basename} \[(.*)]", out.decode()) + if matches: + return matches.group(1) + else: + raise SymlinkError("Could not find junction path.") + + +@system_path_filter +def resolve_link_target_relative_to_the_link(link): + """ + os.path.isdir uses os.path.exists, which for links will check + the existence of the link target. If the link target is relative to + the link, we need to construct a pathname that is valid from + our cwd (which may not be the same as the link's directory) + """ + target = readlink(link) + if os.path.isabs(target): + return target + link_dir = os.path.dirname(os.path.abspath(link)) + return os.path.join(link_dir, target) + + +class SymlinkError(SpackError): + """Exception class for errors raised while creating symlinks, + junctions and hard links + """ |