diff options
author | Harmen Stoppels <me@harmenstoppels.nl> | 2024-11-08 22:55:53 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-08 13:55:53 -0800 |
commit | 907a37145f9434d36b238ef8f435f157a7fb129b (patch) | |
tree | ec33e4c12de2f2a1ef38d5f39b0ec96951e95c0d /lib | |
parent | 4778d2d332d36c3db0054746d75531d6e357effb (diff) | |
download | spack-907a37145f9434d36b238ef8f435f157a7fb129b.tar.gz spack-907a37145f9434d36b238ef8f435f157a7fb129b.tar.bz2 spack-907a37145f9434d36b238ef8f435f157a7fb129b.tar.xz spack-907a37145f9434d36b238ef8f435f157a7fb129b.zip |
llnl.util.filesystem: multiple entrypoints and max_depth (#47495)
If a package `foo` doesn't implement `libs`, the default was to search recursively for `libfoo` whenever asking for `spec[foo].libs` (this also happens automatically if a package includes `foo` as a link dependency).
This can lead to some strange behavior:
1. A package that is normally used as a build dependency (e.g. `cmake` at one point) is referenced like
`depends_on(cmake)` which leads to a fully-recursive search for `libcmake` (this can take
"forever" when CMake is registered as an external with a prefix like `/usr`, particularly on NFS mounts).
2. A similar hang can occur if a package is registered as an external with an incorrect prefix
- [x] Update the default library search to stop after a maximum depth (by default, search
the root prefix and each directory in it, but no lower).
- [x]
The following is a list of known changes to `find` compared to `develop`:
1. Matching directories are no longer returned -- `find` consistently only finds non-dirs,
even at `max_depth`
2. Symlinked directories are followed (needed to support max_depth)
3. `find(..., "dir/*.txt")` is allowed, for finding files inside certain dirs. These "complex"
patterns are delegated to `glob`, like they are on `develop`.
4. `root` and `files` arguments both support generic sequences, and `root`
allows both `str` and `path` types. This allows us to specify multiple entry points to `find`.
---------
Co-authored-by: Peter Scheibel <scheibel1@llnl.gov>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/llnl/util/filesystem.py | 265 | ||||
-rw-r--r-- | lib/spack/llnl/util/lang.py | 17 | ||||
-rw-r--r-- | lib/spack/spack/test/llnl/util/file_list.py | 32 | ||||
-rw-r--r-- | lib/spack/spack/test/llnl/util/filesystem.py | 228 | ||||
-rw-r--r-- | lib/spack/spack/test/llnl/util/lang.py | 16 | ||||
-rw-r--r-- | lib/spack/spack/test/test_suite.py | 8 |
6 files changed, 455 insertions, 111 deletions
diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py index 00bb270151..83cbe45104 100644 --- a/lib/spack/llnl/util/filesystem.py +++ b/lib/spack/llnl/util/filesystem.py @@ -20,11 +20,23 @@ import sys import tempfile from contextlib import contextmanager from itertools import accumulate -from typing import Callable, Iterable, List, Match, Optional, Tuple, Union +from typing import ( + Callable, + Deque, + Dict, + Iterable, + List, + Match, + Optional, + Sequence, + Set, + Tuple, + Union, +) import llnl.util.symlink from llnl.util import tty -from llnl.util.lang import dedupe, memoized +from llnl.util.lang import dedupe, fnmatch_translate_multiple, memoized from llnl.util.symlink import islink, readlink, resolve_link_target_relative_to_the_link, symlink from ..path import path_to_os_path, system_path_filter @@ -85,6 +97,8 @@ __all__ = [ "visit_directory_tree", ] +Path = Union[str, pathlib.Path] + if sys.version_info < (3, 7, 4): # monkeypatch shutil.copystat to fix PermissionError when copying read-only # files on Lustre when using Python < 3.7.4 @@ -1673,105 +1687,199 @@ def find_first(root: str, files: Union[Iterable[str], str], bfs_depth: int = 2) return FindFirstFile(root, *files, bfs_depth=bfs_depth).find() -def find(root, files, recursive=True): - """Search for ``files`` starting from the ``root`` directory. - - Like GNU/BSD find but written entirely in Python. - - Examples: - - .. code-block:: console - - $ find /usr -name python - - is equivalent to: - - >>> find('/usr', 'python') - - .. code-block:: console - - $ find /usr/local/bin -maxdepth 1 -name python - - is equivalent to: - - >>> find('/usr/local/bin', 'python', recursive=False) +def find( + root: Union[Path, Sequence[Path]], + files: Union[str, Sequence[str]], + recursive: bool = True, + max_depth: Optional[int] = None, +) -> List[str]: + """Finds all non-directory files matching the patterns from ``files`` starting from ``root``. + This function returns a deterministic result for the same input and directory structure when + run multiple times. Symlinked directories are followed, and unique directories are searched + only once. Each matching file is returned only once at lowest depth in case multiple paths + exist due to symlinked directories. Accepts any glob characters accepted by fnmatch: ========== ==================================== Pattern Meaning ========== ==================================== - ``*`` matches everything + ``*`` matches one or more characters ``?`` matches any single character ``[seq]`` matches any character in ``seq`` ``[!seq]`` matches any character not in ``seq`` ========== ==================================== + Examples: + + >>> find("/usr", "*.txt", recursive=True, max_depth=2) + + finds all files with the extension ``.txt`` in the directory ``/usr`` and subdirectories up to + depth 2. + + >>> find(["/usr", "/var"], ["*.txt", "*.log"], recursive=True) + + finds all files with the extension ``.txt`` or ``.log`` in the directories ``/usr`` and + ``/var`` at any depth. + + >>> find("/usr", "GL/*.h", recursive=True) + + finds all header files in a directory GL at any depth in the directory ``/usr``. + Parameters: - root (str): The root directory to start searching from - files (str or collections.abc.Sequence): Library name(s) to search for - recursive (bool): if False search only root folder, - if True descends top-down from the root. Defaults to True. + root: One or more root directories to start searching from + files: One or more filename patterns to search for + recursive: if False search only root, if True descends from roots. Defaults to True. + max_depth: if set, don't search below this depth. Cannot be set if recursive is False - Returns: - list: The files that have been found + Returns a list of absolute, matching file paths. """ + if isinstance(root, (str, pathlib.Path)): + root = [root] + elif not isinstance(root, collections.abc.Sequence): + raise TypeError(f"'root' arg must be a path or a sequence of paths, not '{type(root)}']") + if isinstance(files, str): files = [files] + elif not isinstance(files, collections.abc.Sequence): + raise TypeError(f"'files' arg must be str or a sequence of str, not '{type(files)}']") - if recursive: - tty.debug(f"Find (recursive): {root} {str(files)}") - result = _find_recursive(root, files) - else: - tty.debug(f"Find (not recursive): {root} {str(files)}") - result = _find_non_recursive(root, files) + # If recursive is false, max_depth can only be None or 0 + if max_depth and not recursive: + raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False") - tty.debug(f"Find complete: {root} {str(files)}") + tty.debug(f"Find (max depth = {max_depth}): {root} {files}") + if not recursive: + max_depth = 0 + elif max_depth is None: + max_depth = sys.maxsize + result = _find_max_depth(root, files, max_depth) + tty.debug(f"Find complete: {root} {files}") return result -@system_path_filter -def _find_recursive(root, search_files): - # The variable here is **on purpose** a defaultdict. The idea is that - # we want to poke the filesystem as little as possible, but still maintain - # stability in the order of the answer. Thus we are recording each library - # found in a key, and reconstructing the stable order later. - found_files = collections.defaultdict(list) - - # Make the path absolute to have os.walk also return an absolute path - root = os.path.abspath(root) - for path, _, list_files in os.walk(root): - for search_file in search_files: - matches = glob.glob(os.path.join(path, search_file)) - matches = [os.path.join(path, x) for x in matches] - found_files[search_file].extend(matches) +def _log_file_access_issue(e: OSError, path: str) -> None: + errno_name = errno.errorcode.get(e.errno, "UNKNOWN") + tty.debug(f"find must skip {path}: {errno_name} {e}") - answer = [] - for search_file in search_files: - answer.extend(found_files[search_file]) - return answer +def _file_id(s: os.stat_result) -> Tuple[int, int]: + # Note: on windows, st_ino is the file index and st_dev is the volume serial number. See + # https://github.com/python/cpython/blob/3.9/Python/fileutils.c + return (s.st_ino, s.st_dev) -@system_path_filter -def _find_non_recursive(root, search_files): - # The variable here is **on purpose** a defaultdict as os.list_dir - # can return files in any order (does not preserve stability) - found_files = collections.defaultdict(list) +def _dedupe_files(paths: List[str]) -> List[str]: + """Deduplicate files by inode and device, dropping files that cannot be accessed.""" + unique_files: List[str] = [] + # tuple of (inode, device) for each file without following symlinks + visited: Set[Tuple[int, int]] = set() + for path in paths: + try: + stat_info = os.lstat(path) + except OSError as e: + _log_file_access_issue(e, path) + continue + file_id = _file_id(stat_info) + if file_id not in visited: + unique_files.append(path) + visited.add(file_id) + return unique_files + + +def _find_max_depth( + roots: Sequence[Path], globs: Sequence[str], max_depth: int = sys.maxsize +) -> List[str]: + """See ``find`` for the public API.""" + # We optimize for the common case of simple filename only patterns: a single, combined regex + # is used. For complex patterns that include path components, we use a slower glob call from + # every directory we visit within max_depth. + filename_only_patterns = { + f"pattern_{i}": os.path.normcase(x) for i, x in enumerate(globs) if "/" not in x + } + complex_patterns = {f"pattern_{i}": x for i, x in enumerate(globs) if "/" in x} + regex = re.compile(fnmatch_translate_multiple(filename_only_patterns)) + # Ordered dictionary that keeps track of what pattern found which files + matched_paths: Dict[str, List[str]] = {f"pattern_{i}": [] for i, _ in enumerate(globs)} + # Ensure returned paths are always absolute + roots = [os.path.abspath(r) for r in roots] + # Breadth-first search queue. Each element is a tuple of (depth, dir) + dir_queue: Deque[Tuple[int, str]] = collections.deque() + # Set of visited directories. Each element is a tuple of (inode, device) + visited_dirs: Set[Tuple[int, int]] = set() + + for root in roots: + try: + stat_root = os.stat(root) + except OSError as e: + _log_file_access_issue(e, root) + continue + dir_id = _file_id(stat_root) + if dir_id not in visited_dirs: + dir_queue.appendleft((0, root)) + visited_dirs.add(dir_id) - # Make the path absolute to have absolute path returned - root = os.path.abspath(root) + while dir_queue: + depth, curr_dir = dir_queue.pop() + try: + dir_iter = os.scandir(curr_dir) + except OSError as e: + _log_file_access_issue(e, curr_dir) + continue - for search_file in search_files: - matches = glob.glob(os.path.join(root, search_file)) - matches = [os.path.join(root, x) for x in matches] - found_files[search_file].extend(matches) + # Use glob.glob for complex patterns. + for pattern_name, pattern in complex_patterns.items(): + matched_paths[pattern_name].extend( + path + for path in glob.glob(os.path.join(curr_dir, pattern)) + if not os.path.isdir(path) + ) - answer = [] - for search_file in search_files: - answer.extend(found_files[search_file]) + with dir_iter: + ordered_entries = sorted(dir_iter, key=lambda x: x.name) + for dir_entry in ordered_entries: + try: + it_is_a_dir = dir_entry.is_dir(follow_symlinks=True) + except OSError as e: + # Possible permission issue, or a symlink that cannot be resolved (ELOOP). + _log_file_access_issue(e, dir_entry.path) + continue - return answer + if it_is_a_dir: + if depth >= max_depth: + continue + try: + # The stat should be performed in a try/except block. We repeat that here + # vs. moving to the above block because we only want to call `stat` if we + # haven't exceeded our max_depth + if sys.platform == "win32": + # Note: st_ino/st_dev on DirEntry.stat are not set on Windows, so we + # have to call os.stat + stat_info = os.stat(dir_entry.path, follow_symlinks=True) + else: + stat_info = dir_entry.stat(follow_symlinks=True) + except OSError as e: + _log_file_access_issue(e, dir_entry.path) + continue + + dir_id = _file_id(stat_info) + if dir_id not in visited_dirs: + dir_queue.appendleft((depth + 1, dir_entry.path)) + visited_dirs.add(dir_id) + elif filename_only_patterns: + m = regex.match(os.path.normcase(dir_entry.name)) + if not m: + continue + for pattern_name in filename_only_patterns: + if m.group(pattern_name): + matched_paths[pattern_name].append(dir_entry.path) + break + + all_matching_paths = [path for paths in matched_paths.values() for path in paths] + + # we only dedupe files if we have any complex patterns, since only they can match the same file + # multiple times + return _dedupe_files(all_matching_paths) if complex_patterns else all_matching_paths # Utilities for libraries and headers @@ -2210,7 +2318,9 @@ def find_system_libraries(libraries, shared=True): return libraries_found -def find_libraries(libraries, root, shared=True, recursive=False, runtime=True): +def find_libraries( + libraries, root, shared=True, recursive=False, runtime=True, max_depth: Optional[int] = None +): """Returns an iterable of full paths to libraries found in a root dir. Accepts any glob characters accepted by fnmatch: @@ -2231,6 +2341,8 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True): otherwise for static. Defaults to True. recursive (bool): if False search only root folder, if True descends top-down from the root. Defaults to False. + max_depth (int): if set, don't search below this depth. Cannot be set + if recursive is False runtime (bool): Windows only option, no-op elsewhere. If true, search for runtime shared libs (.DLL), otherwise, search for .Lib files. If shared is false, this has no meaning. @@ -2239,6 +2351,7 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True): Returns: LibraryList: The libraries that have been found """ + if isinstance(libraries, str): libraries = [libraries] elif not isinstance(libraries, collections.abc.Sequence): @@ -2271,8 +2384,10 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True): libraries = ["{0}.{1}".format(lib, suffix) for lib in libraries for suffix in suffixes] if not recursive: + if max_depth: + raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False") # If not recursive, look for the libraries directly in root - return LibraryList(find(root, libraries, False)) + return LibraryList(find(root, libraries, recursive=False)) # To speedup the search for external packages configured e.g. in /usr, # perform first non-recursive search in root/lib then in root/lib64 and @@ -2290,7 +2405,7 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True): if found_libs: break else: - found_libs = find(root, libraries, True) + found_libs = find(root, libraries, recursive=True, max_depth=max_depth) return LibraryList(found_libs) diff --git a/lib/spack/llnl/util/lang.py b/lib/spack/llnl/util/lang.py index f43773346a..4913a50fad 100644 --- a/lib/spack/llnl/util/lang.py +++ b/lib/spack/llnl/util/lang.py @@ -5,15 +5,17 @@ import collections.abc import contextlib +import fnmatch import functools import itertools import os import re import sys import traceback +import typing import warnings from datetime import datetime, timedelta -from typing import Callable, Iterable, List, Tuple, TypeVar +from typing import Callable, Dict, Iterable, List, Tuple, TypeVar # Ignore emacs backups when listing modules ignore_modules = r"^\.#|~$" @@ -859,6 +861,19 @@ def elide_list(line_list: List[str], max_num: int = 10) -> List[str]: return line_list +if sys.version_info >= (3, 9): + PatternStr = re.Pattern[str] +else: + PatternStr = typing.Pattern[str] + + +def fnmatch_translate_multiple(named_patterns: Dict[str, str]) -> str: + """Similar to ``fnmatch.translate``, but takes an ordered dictionary where keys are pattern + names, and values are filename patterns. The output is a regex that matches any of the + patterns in order, and named capture groups are used to identify which pattern matched.""" + return "|".join(f"(?P<{n}>{fnmatch.translate(p)})" for n, p in named_patterns.items()) + + @contextlib.contextmanager def nullcontext(*args, **kwargs): """Empty context manager. diff --git a/lib/spack/spack/test/llnl/util/file_list.py b/lib/spack/spack/test/llnl/util/file_list.py index 75ba3ae89d..e2ff5a8210 100644 --- a/lib/spack/spack/test/llnl/util/file_list.py +++ b/lib/spack/spack/test/llnl/util/file_list.py @@ -9,7 +9,7 @@ import sys import pytest -from llnl.util.filesystem import HeaderList, LibraryList, find, find_headers, find_libraries +from llnl.util.filesystem import HeaderList, LibraryList, find_headers, find_libraries import spack.paths @@ -324,33 +324,3 @@ def test_searching_order(search_fn, search_list, root, kwargs): # List should be empty here assert len(rlist) == 0 - - -@pytest.mark.parametrize( - "root,search_list,kwargs,expected", - [ - ( - search_dir, - "*/*bar.tx?", - {"recursive": False}, - [ - os.path.join(search_dir, os.path.join("a", "foobar.txt")), - os.path.join(search_dir, os.path.join("b", "bar.txp")), - os.path.join(search_dir, os.path.join("c", "bar.txt")), - ], - ), - ( - search_dir, - "*/*bar.tx?", - {"recursive": True}, - [ - os.path.join(search_dir, os.path.join("a", "foobar.txt")), - os.path.join(search_dir, os.path.join("b", "bar.txp")), - os.path.join(search_dir, os.path.join("c", "bar.txt")), - ], - ), - ], -) -def test_find_with_globbing(root, search_list, kwargs, expected): - matches = find(root, search_list, **kwargs) - assert sorted(matches) == sorted(expected) diff --git a/lib/spack/spack/test/llnl/util/filesystem.py b/lib/spack/spack/test/llnl/util/filesystem.py index a0c9874769..fd801295f4 100644 --- a/lib/spack/spack/test/llnl/util/filesystem.py +++ b/lib/spack/spack/test/llnl/util/filesystem.py @@ -6,6 +6,7 @@ """Tests for ``llnl/util/filesystem.py``""" import filecmp import os +import pathlib import shutil import stat import sys @@ -14,7 +15,8 @@ from contextlib import contextmanager import pytest import llnl.util.filesystem as fs -from llnl.util.symlink import islink, readlink, symlink +import llnl.util.symlink +from llnl.util.symlink import _windows_can_symlink, islink, readlink, symlink import spack.paths @@ -1035,3 +1037,227 @@ def test_windows_sfn(tmpdir): assert "d\\LONGER~1" in fs.windows_sfn(d) assert "d\\LONGER~2" in fs.windows_sfn(e) shutil.rmtree(tmpdir.join("d")) + + +@pytest.fixture +def dir_structure_with_things_to_find(tmpdir): + """ + <root>/ + dir_one/ + file_one + dir_two/ + dir_three/ + dir_four/ + file_two + file_three + file_four + """ + dir_one = tmpdir.join("dir_one").ensure(dir=True) + tmpdir.join("dir_two").ensure(dir=True) + dir_three = tmpdir.join("dir_three").ensure(dir=True) + dir_four = dir_three.join("dir_four").ensure(dir=True) + + locations = {} + locations["file_one"] = str(dir_one.join("file_one").ensure()) + locations["file_two"] = str(dir_four.join("file_two").ensure()) + locations["file_three"] = str(dir_three.join("file_three").ensure()) + locations["file_four"] = str(tmpdir.join("file_four").ensure()) + + return str(tmpdir), locations + + +def test_find_path_glob_matches(dir_structure_with_things_to_find): + root, locations = dir_structure_with_things_to_find + # both file name and path match + assert ( + fs.find(root, "file_two") + == fs.find(root, "*/*/file_two") + == fs.find(root, "dir_t*/*/*two") + == [locations["file_two"]] + ) + # ensure that * does not match directory separators + assert fs.find(root, "dir*file_two") == [] + # ensure that file name matches after / are matched from the start of the file name + assert fs.find(root, "*/ile_two") == [] + # file name matches exist, but not with these paths + assert fs.find(root, "dir_one/*/*two") == fs.find(root, "*/*/*/*/file_two") == [] + + +def test_find_max_depth(dir_structure_with_things_to_find): + root, locations = dir_structure_with_things_to_find + + # Make sure the paths we use to verify are absolute + assert os.path.isabs(locations["file_one"]) + + assert set(fs.find(root, "file_*", max_depth=0)) == {locations["file_four"]} + assert set(fs.find(root, "file_*", max_depth=1)) == { + locations["file_one"], + locations["file_three"], + locations["file_four"], + } + assert set(fs.find(root, "file_two", max_depth=2)) == {locations["file_two"]} + assert not set(fs.find(root, "file_two", max_depth=1)) + assert set(fs.find(root, "file_two")) == {locations["file_two"]} + assert set(fs.find(root, "file_*")) == set(locations.values()) + + +def test_find_max_depth_relative(dir_structure_with_things_to_find): + """find_max_depth should return absolute paths even if the provided path is relative.""" + root, locations = dir_structure_with_things_to_find + with fs.working_dir(root): + assert set(fs.find(".", "file_*", max_depth=0)) == {locations["file_four"]} + assert set(fs.find(".", "file_two", max_depth=2)) == {locations["file_two"]} + + +@pytest.mark.parametrize("recursive,max_depth", [(False, -1), (False, 1)]) +def test_max_depth_and_recursive_errors(tmpdir, recursive, max_depth): + root = str(tmpdir) + error_str = "cannot be set if recursive is False" + with pytest.raises(ValueError, match=error_str): + fs.find(root, ["some_file"], recursive=recursive, max_depth=max_depth) + + with pytest.raises(ValueError, match=error_str): + fs.find_libraries(["some_lib"], root, recursive=recursive, max_depth=max_depth) + + +@pytest.fixture(params=[True, False]) +def complex_dir_structure(request, tmpdir): + """ + "lx-dy" means "level x, directory y" + "lx-fy" means "level x, file y" + "lx-sy" means "level x, symlink y" + + <root>/ + l1-d1/ + l2-d1/ + l3-s1 -> l1-d2 # points to directory above l2-d1 + l3-d2/ + l4-f1 + l3-s3 -> l1-d1 # cyclic link + l3-d4/ + l4-f2 + l1-d2/ + l2-f1 + l2-d2/ + l3-f3 + l2-s3 -> l2-d2 + l1-s3 -> l3-d4 # a link that "skips" a directory level + l1-s4 -> l2-s3 # a link to a link to a dir + """ + use_junctions = request.param + if sys.platform == "win32" and not use_junctions and not _windows_can_symlink(): + pytest.skip("This Windows instance is not configured with symlink support") + elif sys.platform != "win32" and use_junctions: + pytest.skip("Junctions are a Windows-only feature") + + l1_d1 = tmpdir.join("l1-d1").ensure(dir=True) + l2_d1 = l1_d1.join("l2-d1").ensure(dir=True) + l3_d2 = l2_d1.join("l3-d2").ensure(dir=True) + l3_d4 = l2_d1.join("l3-d4").ensure(dir=True) + l1_d2 = tmpdir.join("l1-d2").ensure(dir=True) + l2_d2 = l1_d2.join("l1-d2").ensure(dir=True) + + if use_junctions: + link_fn = llnl.util.symlink._windows_create_junction + else: + link_fn = os.symlink + + link_fn(l1_d2, pathlib.Path(l2_d1) / "l3-s1") + link_fn(l1_d1, pathlib.Path(l2_d1) / "l3-s3") + link_fn(l3_d4, pathlib.Path(tmpdir) / "l1-s3") + l2_s3 = pathlib.Path(l1_d2) / "l2-s3" + link_fn(l2_d2, l2_s3) + link_fn(l2_s3, pathlib.Path(tmpdir) / "l1-s4") + + locations = { + "l4-f1": str(l3_d2.join("l4-f1").ensure()), + "l4-f2-full": str(l3_d4.join("l4-f2").ensure()), + "l4-f2-link": str(pathlib.Path(tmpdir) / "l1-s3" / "l4-f2"), + "l2-f1": str(l1_d2.join("l2-f1").ensure()), + "l2-f1-link": str(pathlib.Path(tmpdir) / "l1-d1" / "l2-d1" / "l3-s1" / "l2-f1"), + "l3-f3-full": str(l2_d2.join("l3-f3").ensure()), + "l3-f3-link-l1": str(pathlib.Path(tmpdir) / "l1-s4" / "l3-f3"), + } + + return str(tmpdir), locations + + +def test_find_max_depth_symlinks(complex_dir_structure): + root, locations = complex_dir_structure + root = pathlib.Path(root) + assert set(fs.find(root, "l4-f1")) == {locations["l4-f1"]} + assert set(fs.find(root / "l1-s3", "l4-f2", max_depth=0)) == {locations["l4-f2-link"]} + assert set(fs.find(root / "l1-d1", "l2-f1")) == {locations["l2-f1-link"]} + # File is accessible via symlink and subdir, the link path will be + # searched first, and the directory will not be searched again when + # it is encountered the second time (via not-link) in the traversal + assert set(fs.find(root, "l4-f2")) == {locations["l4-f2-link"]} + # File is accessible only via the dir, so the full file path should + # be reported + assert set(fs.find(root / "l1-d1", "l4-f2")) == {locations["l4-f2-full"]} + # Check following links to links + assert set(fs.find(root, "l3-f3")) == {locations["l3-f3-link-l1"]} + + +def test_find_max_depth_multiple_and_repeated_entry_points(complex_dir_structure): + root, locations = complex_dir_structure + + fst = str(pathlib.Path(root) / "l1-d1" / "l2-d1") + snd = str(pathlib.Path(root) / "l1-d2") + nonexistent = str(pathlib.Path(root) / "nonexistent") + + assert set(fs.find([fst, snd, fst, snd, nonexistent], ["l*-f*"], max_depth=1)) == { + locations["l2-f1"], + locations["l4-f1"], + locations["l4-f2-full"], + locations["l3-f3-full"], + } + + +def test_multiple_patterns(complex_dir_structure): + root, _ = complex_dir_structure + paths = fs.find(root, ["l2-f1", "l*-d*/l3-f3", "*", "*/*"]) + # There shouldn't be duplicate results with multiple, overlapping patterns + assert len(set(paths)) == len(paths) + # All files should be found + filenames = [os.path.basename(p) for p in paths] + assert set(filenames) == {"l2-f1", "l3-f3", "l4-f1", "l4-f2"} + # They are ordered by first matching pattern (this is a bit of an implementation detail, + # and we could decide to change the exact order in the future) + assert filenames[0] == "l2-f1" + assert filenames[1] == "l3-f3" + + +def test_find_input_types(tmp_path: pathlib.Path): + """test that find only accepts sequences and instances of pathlib.Path and str for root, and + only sequences and instances of str for patterns. In principle mypy catches these issues, but + it is not enabled on all call-sites.""" + (tmp_path / "file.txt").write_text("") + assert ( + fs.find(tmp_path, "file.txt") + == fs.find(str(tmp_path), "file.txt") + == fs.find([tmp_path, str(tmp_path)], "file.txt") + == fs.find((tmp_path, str(tmp_path)), "file.txt") + == fs.find(tmp_path, "file.txt") + == fs.find(tmp_path, ["file.txt"]) + == fs.find(tmp_path, ("file.txt",)) + == [str(tmp_path / "file.txt")] + ) + + with pytest.raises(TypeError): + fs.find(tmp_path, pathlib.Path("file.txt")) # type: ignore + + with pytest.raises(TypeError): + fs.find(1, "file.txt") # type: ignore + + +def test_find_only_finds_files(tmp_path: pathlib.Path): + """ensure that find only returns files even at max_depth""" + (tmp_path / "subdir").mkdir() + (tmp_path / "subdir" / "dir").mkdir() + (tmp_path / "subdir" / "file.txt").write_text("") + assert ( + fs.find(tmp_path, "*", max_depth=1) + == fs.find(tmp_path, "*/*", max_depth=1) + == [str(tmp_path / "subdir" / "file.txt")] + ) diff --git a/lib/spack/spack/test/llnl/util/lang.py b/lib/spack/spack/test/llnl/util/lang.py index 52dcf3950a..6926c50cd8 100644 --- a/lib/spack/spack/test/llnl/util/lang.py +++ b/lib/spack/spack/test/llnl/util/lang.py @@ -373,3 +373,19 @@ def test_deprecated_property(): _SomeClass.deprecated.error_lvl = 2 with pytest.raises(AttributeError): _ = s.deprecated + + +def test_fnmatch_multiple(): + named_patterns = {"a": "libf*o.so", "b": "libb*r.so"} + regex = re.compile(llnl.util.lang.fnmatch_translate_multiple(named_patterns)) + + a = regex.match("libfoo.so") + assert a and a.group("a") == "libfoo.so" + + b = regex.match("libbar.so") + assert b and b.group("b") == "libbar.so" + + assert not regex.match("libfoo.so.1") + assert not regex.match("libbar.so.1") + assert not regex.match("libfoo.solibbar.so") + assert not regex.match("libbaz.so") diff --git a/lib/spack/spack/test/test_suite.py b/lib/spack/spack/test/test_suite.py index 60a54e7171..3ed4e30d42 100644 --- a/lib/spack/spack/test/test_suite.py +++ b/lib/spack/spack/test/test_suite.py @@ -501,18 +501,20 @@ def test_find_required_file(tmpdir): # First just find a single path results = spack.install_test.find_required_file( - tmpdir.join("c"), filename, expected=1, recursive=True + str(tmpdir.join("c")), filename, expected=1, recursive=True ) assert isinstance(results, str) # Ensure none file if do not recursively search that directory with pytest.raises(spack.install_test.SkipTest, match="Expected 1"): spack.install_test.find_required_file( - tmpdir.join("c"), filename, expected=1, recursive=False + str(tmpdir.join("c")), filename, expected=1, recursive=False ) # Now make sure we get all of the files - results = spack.install_test.find_required_file(tmpdir, filename, expected=3, recursive=True) + results = spack.install_test.find_required_file( + str(tmpdir), filename, expected=3, recursive=True + ) assert isinstance(results, list) and len(results) == 3 |