diff options
author | Harmen Stoppels <harmenstoppels@gmail.com> | 2024-11-07 20:53:29 +0100 |
---|---|---|
committer | Peter Scheibel <scheibel1@llnl.gov> | 2024-11-07 13:09:10 -0800 |
commit | ed916ffe6ce9bd6af94cbd4538b06f258a8d766c (patch) | |
tree | c6a63975e34657aee75078d484c263b1a728afdb | |
parent | 4fbdf2f2c08e52d354bc22e1838d9e5dc6d1d332 (diff) | |
download | spack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.tar.gz spack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.tar.bz2 spack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.tar.xz spack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.zip |
Revert "filesystem.py: add `max_depth` argument to `find` (#41945)"
This reverts commit 38c8069ab42f44aa9f4779968937fc6842dc2109.
-rw-r--r-- | lib/spack/llnl/util/filesystem.py | 175 | ||||
-rw-r--r-- | lib/spack/spack/test/llnl/util/file_list.py | 32 | ||||
-rw-r--r-- | lib/spack/spack/test/llnl/util/filesystem.py | 158 | ||||
-rw-r--r-- | var/spack/repos/builtin.mock/packages/attributes-foo/package.py | 4 |
4 files changed, 77 insertions, 292 deletions
diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py index b63b6e94b3..00bb270151 100644 --- a/lib/spack/llnl/util/filesystem.py +++ b/lib/spack/llnl/util/filesystem.py @@ -1673,20 +1673,16 @@ def find_first(root: str, files: Union[Iterable[str], str], bfs_depth: int = 2) return FindFirstFile(root, *files, bfs_depth=bfs_depth).find() -def find(root, files, recursive=True, max_depth: Optional[int] = None): +def find(root, files, recursive=True): """Search for ``files`` starting from the ``root`` directory. Like GNU/BSD find but written entirely in Python. - Specifically this behaves like `find -type f`: it only returns - results that are files. When searching recursively, this behaves - as `find` with the `-L` option (follows symlinks). - Examples: .. code-block:: console - $ find -L /usr -name python + $ find /usr -name python is equivalent to: @@ -1716,8 +1712,6 @@ def find(root, files, recursive=True, max_depth: Optional[int] = None): files (str or collections.abc.Sequence): Library name(s) to search for recursive (bool): if False search only root folder, if True descends top-down from the root. Defaults to True. - max_depth (int): if set, don't search below this depth. Cannot be set - if recursive is False Returns: list: The files that have been found @@ -1725,135 +1719,59 @@ def find(root, files, recursive=True, max_depth: Optional[int] = None): if isinstance(files, str): files = [files] - # If recursive is false, max_depth can only be None or 0 - if max_depth and not recursive: - raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False") - - if not recursive: - max_depth = 0 - elif max_depth is None: - max_depth = sys.maxsize - - tty.debug(f"Find (max depth = {max_depth}): {root} {str(files)}") - result = find_max_depth(root, files, max_depth) + if recursive: + tty.debug(f"Find (recursive): {root} {str(files)}") + result = _find_recursive(root, files) + else: + tty.debug(f"Find (not recursive): {root} {str(files)}") + result = _find_non_recursive(root, files) tty.debug(f"Find complete: {root} {str(files)}") return result -@system_path_filter(arg_slice=slice(1)) -def find_max_depth(root, globs, max_depth: Optional[int] = None): - """Given a set of non-recursive glob file patterns, finds all - files matching those patterns up to a maximum specified depth. - - If a directory has a name which matches an input pattern, it will - not be included in the results. - - If ``max_depth`` is specified, does not search below that depth. +@system_path_filter +def _find_recursive(root, search_files): + # The variable here is **on purpose** a defaultdict. The idea is that + # we want to poke the filesystem as little as possible, but still maintain + # stability in the order of the answer. Thus we are recording each library + # found in a key, and reconstructing the stable order later. + found_files = collections.defaultdict(list) - If ``globs`` is a list, files matching earlier entries are placed - in the return value before files matching later entries. - """ - # If root doesn't exist, then we say we found nothing. If it - # exists but is not a dir, we assume the user would want to - # know; likewise if it exists but we do not have permission to - # access it. - try: - stat_root = os.stat(root) - except OSError as e: - if e.errno == errno.ENOENT: - return [] - else: - raise - if not stat.S_ISDIR(stat_root.st_mode): - raise ValueError(f"{root} is not a directory") + # Make the path absolute to have os.walk also return an absolute path + root = os.path.abspath(root) + for path, _, list_files in os.walk(root): + for search_file in search_files: + matches = glob.glob(os.path.join(path, search_file)) + matches = [os.path.join(path, x) for x in matches] + found_files[search_file].extend(matches) - if max_depth is None: - max_depth = sys.maxsize + answer = [] + for search_file in search_files: + answer.extend(found_files[search_file]) - if isinstance(globs, str): - globs = [globs] - # Apply normcase to regular expressions and to the filenames: - # this respects case-sensitivity semantics of different OSes - # (e.g. file search is typically case-insensitive on Windows) - regexes = [re.compile(fnmatch.translate(os.path.normcase(x))) for x in globs] + return answer - # Note later calls to os.scandir etc. return abspaths if the - # input is absolute, see https://docs.python.org/3/library/os.html#os.DirEntry.path - root = os.path.abspath(root) +@system_path_filter +def _find_non_recursive(root, search_files): + # The variable here is **on purpose** a defaultdict as os.list_dir + # can return files in any order (does not preserve stability) found_files = collections.defaultdict(list) - def _dir_id(stat_info): - # Note: on windows, st_ino is the file index and st_dev - # is the volume serial number. See - # https://github.com/python/cpython/blob/3.9/Python/fileutils.c - return (stat_info.st_ino, stat_info.st_dev) - - def _log_file_access_issue(e): - errno_name = errno.errorcode.get(e.errno, "UNKNOWN") - tty.debug(f"find must skip {dir_entry.path}: {errno_name} {str(e)}") - - visited_dirs = set([_dir_id(stat_root)]) - - # Each queue item stores the depth and path - # This achieves a consistent traversal order by iterating through - # each directory in alphabetical order. - # This also traverses in BFS order to ensure finding the shortest - # path to any file (or one of the shortest paths, if there are - # several - the one returned will be consistent given the prior - # point). - dir_queue = collections.deque([(0, root)]) - while dir_queue: - depth, next_dir = dir_queue.pop() - try: - dir_iter = os.scandir(next_dir) - except OSError: - # Most commonly, this would be a permissions issue, for - # example if we are scanning an external directory like /usr - continue - - with dir_iter: - ordered_entries = sorted(dir_iter, key=lambda x: x.name) - for dir_entry in ordered_entries: - try: - it_is_a_dir = dir_entry.is_dir(follow_symlinks=True) - except OSError as e: - # Possible permission issue, or a symlink that cannot - # be resolved (ELOOP). - _log_file_access_issue(e) - continue + # Make the path absolute to have absolute path returned + root = os.path.abspath(root) - if it_is_a_dir and (depth < max_depth): - try: - # The stat should be performed in a try/except block. - # We repeat that here vs. moving to the above block - # because we only want to call `stat` if we haven't - # exceeded our max_depth - if sys.platform == "win32": - # Note: st_ino/st_dev on DirEntry.stat are not set on - # Windows, so we have to call os.stat - stat_info = os.stat(dir_entry.path, follow_symlinks=True) - else: - stat_info = dir_entry.stat(follow_symlinks=True) - except OSError as e: - _log_file_access_issue(e) - continue - - dir_id = _dir_id(stat_info) - if dir_id not in visited_dirs: - dir_queue.appendleft((depth + 1, dir_entry.path)) - visited_dirs.add(dir_id) - else: - fname = os.path.basename(dir_entry.path) - for pattern in regexes: - if pattern.match(os.path.normcase(fname)): - found_files[pattern].append(os.path.join(next_dir, fname)) + for search_file in search_files: + matches = glob.glob(os.path.join(root, search_file)) + matches = [os.path.join(root, x) for x in matches] + found_files[search_file].extend(matches) - # TODO: for fully-recursive searches, we can print a warning after - # after having searched everything up to some fixed depth + answer = [] + for search_file in search_files: + answer.extend(found_files[search_file]) - return list(itertools.chain(*[found_files[x] for x in regexes])) + return answer # Utilities for libraries and headers @@ -2292,9 +2210,7 @@ def find_system_libraries(libraries, shared=True): return libraries_found -def find_libraries( - libraries, root, shared=True, recursive=False, runtime=True, max_depth: Optional[int] = None -): +def find_libraries(libraries, root, shared=True, recursive=False, runtime=True): """Returns an iterable of full paths to libraries found in a root dir. Accepts any glob characters accepted by fnmatch: @@ -2315,8 +2231,6 @@ def find_libraries( otherwise for static. Defaults to True. recursive (bool): if False search only root folder, if True descends top-down from the root. Defaults to False. - max_depth (int): if set, don't search below this depth. Cannot be set - if recursive is False runtime (bool): Windows only option, no-op elsewhere. If true, search for runtime shared libs (.DLL), otherwise, search for .Lib files. If shared is false, this has no meaning. @@ -2325,7 +2239,6 @@ def find_libraries( Returns: LibraryList: The libraries that have been found """ - if isinstance(libraries, str): libraries = [libraries] elif not isinstance(libraries, collections.abc.Sequence): @@ -2358,10 +2271,8 @@ def find_libraries( libraries = ["{0}.{1}".format(lib, suffix) for lib in libraries for suffix in suffixes] if not recursive: - if max_depth: - raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False") # If not recursive, look for the libraries directly in root - return LibraryList(find(root, libraries, recursive=False)) + return LibraryList(find(root, libraries, False)) # To speedup the search for external packages configured e.g. in /usr, # perform first non-recursive search in root/lib then in root/lib64 and @@ -2379,7 +2290,7 @@ def find_libraries( if found_libs: break else: - found_libs = find(root, libraries, recursive=True, max_depth=max_depth) + found_libs = find(root, libraries, True) return LibraryList(found_libs) diff --git a/lib/spack/spack/test/llnl/util/file_list.py b/lib/spack/spack/test/llnl/util/file_list.py index e2ff5a8210..75ba3ae89d 100644 --- a/lib/spack/spack/test/llnl/util/file_list.py +++ b/lib/spack/spack/test/llnl/util/file_list.py @@ -9,7 +9,7 @@ import sys import pytest -from llnl.util.filesystem import HeaderList, LibraryList, find_headers, find_libraries +from llnl.util.filesystem import HeaderList, LibraryList, find, find_headers, find_libraries import spack.paths @@ -324,3 +324,33 @@ def test_searching_order(search_fn, search_list, root, kwargs): # List should be empty here assert len(rlist) == 0 + + +@pytest.mark.parametrize( + "root,search_list,kwargs,expected", + [ + ( + search_dir, + "*/*bar.tx?", + {"recursive": False}, + [ + os.path.join(search_dir, os.path.join("a", "foobar.txt")), + os.path.join(search_dir, os.path.join("b", "bar.txp")), + os.path.join(search_dir, os.path.join("c", "bar.txt")), + ], + ), + ( + search_dir, + "*/*bar.tx?", + {"recursive": True}, + [ + os.path.join(search_dir, os.path.join("a", "foobar.txt")), + os.path.join(search_dir, os.path.join("b", "bar.txp")), + os.path.join(search_dir, os.path.join("c", "bar.txt")), + ], + ), + ], +) +def test_find_with_globbing(root, search_list, kwargs, expected): + matches = find(root, search_list, **kwargs) + assert sorted(matches) == sorted(expected) diff --git a/lib/spack/spack/test/llnl/util/filesystem.py b/lib/spack/spack/test/llnl/util/filesystem.py index 01379be94c..a0c9874769 100644 --- a/lib/spack/spack/test/llnl/util/filesystem.py +++ b/lib/spack/spack/test/llnl/util/filesystem.py @@ -6,7 +6,6 @@ """Tests for ``llnl/util/filesystem.py``""" import filecmp import os -import pathlib import shutil import stat import sys @@ -15,8 +14,7 @@ from contextlib import contextmanager import pytest import llnl.util.filesystem as fs -import llnl.util.symlink -from llnl.util.symlink import _windows_can_symlink, islink, readlink, symlink +from llnl.util.symlink import islink, readlink, symlink import spack.paths @@ -1037,157 +1035,3 @@ def test_windows_sfn(tmpdir): assert "d\\LONGER~1" in fs.windows_sfn(d) assert "d\\LONGER~2" in fs.windows_sfn(e) shutil.rmtree(tmpdir.join("d")) - - -@pytest.fixture -def dir_structure_with_things_to_find(tmpdir): - """ - <root>/ - dir_one/ - file_one - dir_two/ - dir_three/ - dir_four/ - file_two - file_three - file_four - """ - dir_one = tmpdir.join("dir_one").ensure(dir=True) - tmpdir.join("dir_two").ensure(dir=True) - dir_three = tmpdir.join("dir_three").ensure(dir=True) - dir_four = dir_three.join("dir_four").ensure(dir=True) - - locations = {} - locations["file_one"] = str(dir_one.join("file_one").ensure()) - locations["file_two"] = str(dir_four.join("file_two").ensure()) - locations["file_three"] = str(dir_three.join("file_three").ensure()) - locations["file_four"] = str(tmpdir.join("file_four").ensure()) - - return str(tmpdir), locations - - -def test_find_max_depth(dir_structure_with_things_to_find): - root, locations = dir_structure_with_things_to_find - - # Make sure the paths we use to verify are absolute - assert os.path.isabs(locations["file_one"]) - - assert set(fs.find_max_depth(root, "file_*", 0)) == {locations["file_four"]} - assert set(fs.find_max_depth(root, "file_*", 1)) == { - locations["file_one"], - locations["file_three"], - locations["file_four"], - } - assert set(fs.find_max_depth(root, "file_two", 2)) == {locations["file_two"]} - assert not set(fs.find_max_depth(root, "file_two", 1)) - assert set(fs.find_max_depth(root, "file_two")) == {locations["file_two"]} - assert set(fs.find_max_depth(root, "file_*")) == set(locations.values()) - - -def test_find_max_depth_relative(dir_structure_with_things_to_find): - """find_max_depth should return absolute paths even if - the provided path is relative. - """ - root, locations = dir_structure_with_things_to_find - with fs.working_dir(root): - assert set(fs.find_max_depth(".", "file_*", 0)) == {locations["file_four"]} - assert set(fs.find_max_depth(".", "file_two", 2)) == {locations["file_two"]} - - -@pytest.mark.parametrize("recursive,max_depth", [(False, -1), (False, 1)]) -def test_max_depth_and_recursive_errors(tmpdir, recursive, max_depth): - root = str(tmpdir) - error_str = "cannot be set if recursive is False" - with pytest.raises(ValueError, match=error_str): - fs.find(root, ["some_file"], recursive=recursive, max_depth=max_depth) - - with pytest.raises(ValueError, match=error_str): - fs.find_libraries(["some_lib"], root, recursive=recursive, max_depth=max_depth) - - -def dir_structure_with_things_to_find_links(tmpdir, use_junctions=False): - """ - "lx-dy" means "level x, directory y" - "lx-fy" means "level x, file y" - "lx-sy" means "level x, symlink y" - - <root>/ - l1-d1/ - l2-d1/ - l3-s1 -> l1-d2 # points to directory above l2-d1 - l3-d2/ - l4-f1 - l3-s3 -> l1-d1 # cyclic link - l3-d4/ - l4-f2 - l1-d2/ - l2-f1 - l2-d2/ - l3-f3 - l2-s3 -> l2-d2 - l1-s3 -> l3-d4 # a link that "skips" a directory level - l1-s4 -> l2-s3 # a link to a link to a dir - """ - if sys.platform == "win32" and (not use_junctions) and (not _windows_can_symlink()): - pytest.skip("This Windows instance is not configured with symlink support") - - l1_d1 = tmpdir.join("l1-d1").ensure(dir=True) - l2_d1 = l1_d1.join("l2-d1").ensure(dir=True) - l3_d2 = l2_d1.join("l3-d2").ensure(dir=True) - l3_d4 = l2_d1.join("l3-d4").ensure(dir=True) - l1_d2 = tmpdir.join("l1-d2").ensure(dir=True) - l2_d2 = l1_d2.join("l1-d2").ensure(dir=True) - - if use_junctions: - link_fn = llnl.util.symlink._windows_create_junction - else: - link_fn = os.symlink - - link_fn(l1_d2, pathlib.Path(l2_d1) / "l3-s1") - link_fn(l1_d1, pathlib.Path(l2_d1) / "l3-s3") - link_fn(l3_d4, pathlib.Path(tmpdir) / "l1-s3") - l2_s3 = pathlib.Path(l1_d2) / "l2-s3" - link_fn(l2_d2, l2_s3) - link_fn(l2_s3, pathlib.Path(tmpdir) / "l1-s4") - - locations = {} - locations["l4-f1"] = str(l3_d2.join("l4-f1").ensure()) - locations["l4-f2-full"] = str(l3_d4.join("l4-f2").ensure()) - locations["l4-f2-link"] = str(pathlib.Path(tmpdir) / "l1-s3" / "l4-f2") - locations["l2-f1"] = str(l1_d2.join("l2-f1").ensure()) - locations["l2-f1-link"] = str(pathlib.Path(tmpdir) / "l1-d1" / "l2-d1" / "l3-s1" / "l2-f1") - locations["l3-f3-full"] = str(l2_d2.join("l3-f3").ensure()) - locations["l3-f3-link-l1"] = str(pathlib.Path(tmpdir) / "l1-s4" / "l3-f3") - - return str(tmpdir), locations - - -def _check_find_links(root, locations): - root = pathlib.Path(root) - assert set(fs.find_max_depth(root, "l4-f1")) == {locations["l4-f1"]} - assert set(fs.find_max_depth(root / "l1-s3", "l4-f2", 0)) == {locations["l4-f2-link"]} - assert set(fs.find_max_depth(root / "l1-d1", "l2-f1")) == {locations["l2-f1-link"]} - # File is accessible via symlink and subdir, the link path will be - # searched first, and the directory will not be searched again when - # it is encountered the second time (via not-link) in the traversal - assert set(fs.find_max_depth(root, "l4-f2")) == {locations["l4-f2-link"]} - # File is accessible only via the dir, so the full file path should - # be reported - assert set(fs.find_max_depth(root / "l1-d1", "l4-f2")) == {locations["l4-f2-full"]} - # Check following links to links - assert set(fs.find_max_depth(root, "l3-f3")) == {locations["l3-f3-link-l1"]} - - -@pytest.mark.parametrize( - "use_junctions", - [ - False, - pytest.param( - True, - marks=pytest.mark.skipif(sys.platform != "win32", reason="Only Windows has junctions"), - ), - ], -) -def test_find_max_depth_symlinks(tmpdir, use_junctions): - root, locations = dir_structure_with_things_to_find_links(tmpdir, use_junctions=use_junctions) - _check_find_links(root, locations) diff --git a/var/spack/repos/builtin.mock/packages/attributes-foo/package.py b/var/spack/repos/builtin.mock/packages/attributes-foo/package.py index b882fc9b65..31c88f4b08 100644 --- a/var/spack/repos/builtin.mock/packages/attributes-foo/package.py +++ b/var/spack/repos/builtin.mock/packages/attributes-foo/package.py @@ -44,7 +44,7 @@ class AttributesFoo(BundlePackage): # Header provided by the bar virutal package @property def bar_headers(self): - return find_headers("bar", root=self.home.include, recursive=True) + return find_headers("bar/bar", root=self.home.include, recursive=False) # Libary provided by the bar virtual package @property @@ -59,7 +59,7 @@ class AttributesFoo(BundlePackage): # Header provided by the baz virtual package @property def baz_headers(self): - return find_headers("baz", root=self.baz_home.include, recursive=True) + return find_headers("baz/baz", root=self.baz_home.include, recursive=False) # Library provided by the baz virtual package @property |