summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2024-11-07 20:53:29 +0100
committerPeter Scheibel <scheibel1@llnl.gov>2024-11-07 13:09:10 -0800
commited916ffe6ce9bd6af94cbd4538b06f258a8d766c (patch)
treec6a63975e34657aee75078d484c263b1a728afdb
parent4fbdf2f2c08e52d354bc22e1838d9e5dc6d1d332 (diff)
downloadspack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.tar.gz
spack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.tar.bz2
spack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.tar.xz
spack-ed916ffe6ce9bd6af94cbd4538b06f258a8d766c.zip
Revert "filesystem.py: add `max_depth` argument to `find` (#41945)"
This reverts commit 38c8069ab42f44aa9f4779968937fc6842dc2109.
-rw-r--r--lib/spack/llnl/util/filesystem.py175
-rw-r--r--lib/spack/spack/test/llnl/util/file_list.py32
-rw-r--r--lib/spack/spack/test/llnl/util/filesystem.py158
-rw-r--r--var/spack/repos/builtin.mock/packages/attributes-foo/package.py4
4 files changed, 77 insertions, 292 deletions
diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py
index b63b6e94b3..00bb270151 100644
--- a/lib/spack/llnl/util/filesystem.py
+++ b/lib/spack/llnl/util/filesystem.py
@@ -1673,20 +1673,16 @@ def find_first(root: str, files: Union[Iterable[str], str], bfs_depth: int = 2)
return FindFirstFile(root, *files, bfs_depth=bfs_depth).find()
-def find(root, files, recursive=True, max_depth: Optional[int] = None):
+def find(root, files, recursive=True):
"""Search for ``files`` starting from the ``root`` directory.
Like GNU/BSD find but written entirely in Python.
- Specifically this behaves like `find -type f`: it only returns
- results that are files. When searching recursively, this behaves
- as `find` with the `-L` option (follows symlinks).
-
Examples:
.. code-block:: console
- $ find -L /usr -name python
+ $ find /usr -name python
is equivalent to:
@@ -1716,8 +1712,6 @@ def find(root, files, recursive=True, max_depth: Optional[int] = None):
files (str or collections.abc.Sequence): Library name(s) to search for
recursive (bool): if False search only root folder,
if True descends top-down from the root. Defaults to True.
- max_depth (int): if set, don't search below this depth. Cannot be set
- if recursive is False
Returns:
list: The files that have been found
@@ -1725,135 +1719,59 @@ def find(root, files, recursive=True, max_depth: Optional[int] = None):
if isinstance(files, str):
files = [files]
- # If recursive is false, max_depth can only be None or 0
- if max_depth and not recursive:
- raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False")
-
- if not recursive:
- max_depth = 0
- elif max_depth is None:
- max_depth = sys.maxsize
-
- tty.debug(f"Find (max depth = {max_depth}): {root} {str(files)}")
- result = find_max_depth(root, files, max_depth)
+ if recursive:
+ tty.debug(f"Find (recursive): {root} {str(files)}")
+ result = _find_recursive(root, files)
+ else:
+ tty.debug(f"Find (not recursive): {root} {str(files)}")
+ result = _find_non_recursive(root, files)
tty.debug(f"Find complete: {root} {str(files)}")
return result
-@system_path_filter(arg_slice=slice(1))
-def find_max_depth(root, globs, max_depth: Optional[int] = None):
- """Given a set of non-recursive glob file patterns, finds all
- files matching those patterns up to a maximum specified depth.
-
- If a directory has a name which matches an input pattern, it will
- not be included in the results.
-
- If ``max_depth`` is specified, does not search below that depth.
+@system_path_filter
+def _find_recursive(root, search_files):
+ # The variable here is **on purpose** a defaultdict. The idea is that
+ # we want to poke the filesystem as little as possible, but still maintain
+ # stability in the order of the answer. Thus we are recording each library
+ # found in a key, and reconstructing the stable order later.
+ found_files = collections.defaultdict(list)
- If ``globs`` is a list, files matching earlier entries are placed
- in the return value before files matching later entries.
- """
- # If root doesn't exist, then we say we found nothing. If it
- # exists but is not a dir, we assume the user would want to
- # know; likewise if it exists but we do not have permission to
- # access it.
- try:
- stat_root = os.stat(root)
- except OSError as e:
- if e.errno == errno.ENOENT:
- return []
- else:
- raise
- if not stat.S_ISDIR(stat_root.st_mode):
- raise ValueError(f"{root} is not a directory")
+ # Make the path absolute to have os.walk also return an absolute path
+ root = os.path.abspath(root)
+ for path, _, list_files in os.walk(root):
+ for search_file in search_files:
+ matches = glob.glob(os.path.join(path, search_file))
+ matches = [os.path.join(path, x) for x in matches]
+ found_files[search_file].extend(matches)
- if max_depth is None:
- max_depth = sys.maxsize
+ answer = []
+ for search_file in search_files:
+ answer.extend(found_files[search_file])
- if isinstance(globs, str):
- globs = [globs]
- # Apply normcase to regular expressions and to the filenames:
- # this respects case-sensitivity semantics of different OSes
- # (e.g. file search is typically case-insensitive on Windows)
- regexes = [re.compile(fnmatch.translate(os.path.normcase(x))) for x in globs]
+ return answer
- # Note later calls to os.scandir etc. return abspaths if the
- # input is absolute, see https://docs.python.org/3/library/os.html#os.DirEntry.path
- root = os.path.abspath(root)
+@system_path_filter
+def _find_non_recursive(root, search_files):
+ # The variable here is **on purpose** a defaultdict as os.list_dir
+ # can return files in any order (does not preserve stability)
found_files = collections.defaultdict(list)
- def _dir_id(stat_info):
- # Note: on windows, st_ino is the file index and st_dev
- # is the volume serial number. See
- # https://github.com/python/cpython/blob/3.9/Python/fileutils.c
- return (stat_info.st_ino, stat_info.st_dev)
-
- def _log_file_access_issue(e):
- errno_name = errno.errorcode.get(e.errno, "UNKNOWN")
- tty.debug(f"find must skip {dir_entry.path}: {errno_name} {str(e)}")
-
- visited_dirs = set([_dir_id(stat_root)])
-
- # Each queue item stores the depth and path
- # This achieves a consistent traversal order by iterating through
- # each directory in alphabetical order.
- # This also traverses in BFS order to ensure finding the shortest
- # path to any file (or one of the shortest paths, if there are
- # several - the one returned will be consistent given the prior
- # point).
- dir_queue = collections.deque([(0, root)])
- while dir_queue:
- depth, next_dir = dir_queue.pop()
- try:
- dir_iter = os.scandir(next_dir)
- except OSError:
- # Most commonly, this would be a permissions issue, for
- # example if we are scanning an external directory like /usr
- continue
-
- with dir_iter:
- ordered_entries = sorted(dir_iter, key=lambda x: x.name)
- for dir_entry in ordered_entries:
- try:
- it_is_a_dir = dir_entry.is_dir(follow_symlinks=True)
- except OSError as e:
- # Possible permission issue, or a symlink that cannot
- # be resolved (ELOOP).
- _log_file_access_issue(e)
- continue
+ # Make the path absolute to have absolute path returned
+ root = os.path.abspath(root)
- if it_is_a_dir and (depth < max_depth):
- try:
- # The stat should be performed in a try/except block.
- # We repeat that here vs. moving to the above block
- # because we only want to call `stat` if we haven't
- # exceeded our max_depth
- if sys.platform == "win32":
- # Note: st_ino/st_dev on DirEntry.stat are not set on
- # Windows, so we have to call os.stat
- stat_info = os.stat(dir_entry.path, follow_symlinks=True)
- else:
- stat_info = dir_entry.stat(follow_symlinks=True)
- except OSError as e:
- _log_file_access_issue(e)
- continue
-
- dir_id = _dir_id(stat_info)
- if dir_id not in visited_dirs:
- dir_queue.appendleft((depth + 1, dir_entry.path))
- visited_dirs.add(dir_id)
- else:
- fname = os.path.basename(dir_entry.path)
- for pattern in regexes:
- if pattern.match(os.path.normcase(fname)):
- found_files[pattern].append(os.path.join(next_dir, fname))
+ for search_file in search_files:
+ matches = glob.glob(os.path.join(root, search_file))
+ matches = [os.path.join(root, x) for x in matches]
+ found_files[search_file].extend(matches)
- # TODO: for fully-recursive searches, we can print a warning after
- # after having searched everything up to some fixed depth
+ answer = []
+ for search_file in search_files:
+ answer.extend(found_files[search_file])
- return list(itertools.chain(*[found_files[x] for x in regexes]))
+ return answer
# Utilities for libraries and headers
@@ -2292,9 +2210,7 @@ def find_system_libraries(libraries, shared=True):
return libraries_found
-def find_libraries(
- libraries, root, shared=True, recursive=False, runtime=True, max_depth: Optional[int] = None
-):
+def find_libraries(libraries, root, shared=True, recursive=False, runtime=True):
"""Returns an iterable of full paths to libraries found in a root dir.
Accepts any glob characters accepted by fnmatch:
@@ -2315,8 +2231,6 @@ def find_libraries(
otherwise for static. Defaults to True.
recursive (bool): if False search only root folder,
if True descends top-down from the root. Defaults to False.
- max_depth (int): if set, don't search below this depth. Cannot be set
- if recursive is False
runtime (bool): Windows only option, no-op elsewhere. If true,
search for runtime shared libs (.DLL), otherwise, search
for .Lib files. If shared is false, this has no meaning.
@@ -2325,7 +2239,6 @@ def find_libraries(
Returns:
LibraryList: The libraries that have been found
"""
-
if isinstance(libraries, str):
libraries = [libraries]
elif not isinstance(libraries, collections.abc.Sequence):
@@ -2358,10 +2271,8 @@ def find_libraries(
libraries = ["{0}.{1}".format(lib, suffix) for lib in libraries for suffix in suffixes]
if not recursive:
- if max_depth:
- raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False")
# If not recursive, look for the libraries directly in root
- return LibraryList(find(root, libraries, recursive=False))
+ return LibraryList(find(root, libraries, False))
# To speedup the search for external packages configured e.g. in /usr,
# perform first non-recursive search in root/lib then in root/lib64 and
@@ -2379,7 +2290,7 @@ def find_libraries(
if found_libs:
break
else:
- found_libs = find(root, libraries, recursive=True, max_depth=max_depth)
+ found_libs = find(root, libraries, True)
return LibraryList(found_libs)
diff --git a/lib/spack/spack/test/llnl/util/file_list.py b/lib/spack/spack/test/llnl/util/file_list.py
index e2ff5a8210..75ba3ae89d 100644
--- a/lib/spack/spack/test/llnl/util/file_list.py
+++ b/lib/spack/spack/test/llnl/util/file_list.py
@@ -9,7 +9,7 @@ import sys
import pytest
-from llnl.util.filesystem import HeaderList, LibraryList, find_headers, find_libraries
+from llnl.util.filesystem import HeaderList, LibraryList, find, find_headers, find_libraries
import spack.paths
@@ -324,3 +324,33 @@ def test_searching_order(search_fn, search_list, root, kwargs):
# List should be empty here
assert len(rlist) == 0
+
+
+@pytest.mark.parametrize(
+ "root,search_list,kwargs,expected",
+ [
+ (
+ search_dir,
+ "*/*bar.tx?",
+ {"recursive": False},
+ [
+ os.path.join(search_dir, os.path.join("a", "foobar.txt")),
+ os.path.join(search_dir, os.path.join("b", "bar.txp")),
+ os.path.join(search_dir, os.path.join("c", "bar.txt")),
+ ],
+ ),
+ (
+ search_dir,
+ "*/*bar.tx?",
+ {"recursive": True},
+ [
+ os.path.join(search_dir, os.path.join("a", "foobar.txt")),
+ os.path.join(search_dir, os.path.join("b", "bar.txp")),
+ os.path.join(search_dir, os.path.join("c", "bar.txt")),
+ ],
+ ),
+ ],
+)
+def test_find_with_globbing(root, search_list, kwargs, expected):
+ matches = find(root, search_list, **kwargs)
+ assert sorted(matches) == sorted(expected)
diff --git a/lib/spack/spack/test/llnl/util/filesystem.py b/lib/spack/spack/test/llnl/util/filesystem.py
index 01379be94c..a0c9874769 100644
--- a/lib/spack/spack/test/llnl/util/filesystem.py
+++ b/lib/spack/spack/test/llnl/util/filesystem.py
@@ -6,7 +6,6 @@
"""Tests for ``llnl/util/filesystem.py``"""
import filecmp
import os
-import pathlib
import shutil
import stat
import sys
@@ -15,8 +14,7 @@ from contextlib import contextmanager
import pytest
import llnl.util.filesystem as fs
-import llnl.util.symlink
-from llnl.util.symlink import _windows_can_symlink, islink, readlink, symlink
+from llnl.util.symlink import islink, readlink, symlink
import spack.paths
@@ -1037,157 +1035,3 @@ def test_windows_sfn(tmpdir):
assert "d\\LONGER~1" in fs.windows_sfn(d)
assert "d\\LONGER~2" in fs.windows_sfn(e)
shutil.rmtree(tmpdir.join("d"))
-
-
-@pytest.fixture
-def dir_structure_with_things_to_find(tmpdir):
- """
- <root>/
- dir_one/
- file_one
- dir_two/
- dir_three/
- dir_four/
- file_two
- file_three
- file_four
- """
- dir_one = tmpdir.join("dir_one").ensure(dir=True)
- tmpdir.join("dir_two").ensure(dir=True)
- dir_three = tmpdir.join("dir_three").ensure(dir=True)
- dir_four = dir_three.join("dir_four").ensure(dir=True)
-
- locations = {}
- locations["file_one"] = str(dir_one.join("file_one").ensure())
- locations["file_two"] = str(dir_four.join("file_two").ensure())
- locations["file_three"] = str(dir_three.join("file_three").ensure())
- locations["file_four"] = str(tmpdir.join("file_four").ensure())
-
- return str(tmpdir), locations
-
-
-def test_find_max_depth(dir_structure_with_things_to_find):
- root, locations = dir_structure_with_things_to_find
-
- # Make sure the paths we use to verify are absolute
- assert os.path.isabs(locations["file_one"])
-
- assert set(fs.find_max_depth(root, "file_*", 0)) == {locations["file_four"]}
- assert set(fs.find_max_depth(root, "file_*", 1)) == {
- locations["file_one"],
- locations["file_three"],
- locations["file_four"],
- }
- assert set(fs.find_max_depth(root, "file_two", 2)) == {locations["file_two"]}
- assert not set(fs.find_max_depth(root, "file_two", 1))
- assert set(fs.find_max_depth(root, "file_two")) == {locations["file_two"]}
- assert set(fs.find_max_depth(root, "file_*")) == set(locations.values())
-
-
-def test_find_max_depth_relative(dir_structure_with_things_to_find):
- """find_max_depth should return absolute paths even if
- the provided path is relative.
- """
- root, locations = dir_structure_with_things_to_find
- with fs.working_dir(root):
- assert set(fs.find_max_depth(".", "file_*", 0)) == {locations["file_four"]}
- assert set(fs.find_max_depth(".", "file_two", 2)) == {locations["file_two"]}
-
-
-@pytest.mark.parametrize("recursive,max_depth", [(False, -1), (False, 1)])
-def test_max_depth_and_recursive_errors(tmpdir, recursive, max_depth):
- root = str(tmpdir)
- error_str = "cannot be set if recursive is False"
- with pytest.raises(ValueError, match=error_str):
- fs.find(root, ["some_file"], recursive=recursive, max_depth=max_depth)
-
- with pytest.raises(ValueError, match=error_str):
- fs.find_libraries(["some_lib"], root, recursive=recursive, max_depth=max_depth)
-
-
-def dir_structure_with_things_to_find_links(tmpdir, use_junctions=False):
- """
- "lx-dy" means "level x, directory y"
- "lx-fy" means "level x, file y"
- "lx-sy" means "level x, symlink y"
-
- <root>/
- l1-d1/
- l2-d1/
- l3-s1 -> l1-d2 # points to directory above l2-d1
- l3-d2/
- l4-f1
- l3-s3 -> l1-d1 # cyclic link
- l3-d4/
- l4-f2
- l1-d2/
- l2-f1
- l2-d2/
- l3-f3
- l2-s3 -> l2-d2
- l1-s3 -> l3-d4 # a link that "skips" a directory level
- l1-s4 -> l2-s3 # a link to a link to a dir
- """
- if sys.platform == "win32" and (not use_junctions) and (not _windows_can_symlink()):
- pytest.skip("This Windows instance is not configured with symlink support")
-
- l1_d1 = tmpdir.join("l1-d1").ensure(dir=True)
- l2_d1 = l1_d1.join("l2-d1").ensure(dir=True)
- l3_d2 = l2_d1.join("l3-d2").ensure(dir=True)
- l3_d4 = l2_d1.join("l3-d4").ensure(dir=True)
- l1_d2 = tmpdir.join("l1-d2").ensure(dir=True)
- l2_d2 = l1_d2.join("l1-d2").ensure(dir=True)
-
- if use_junctions:
- link_fn = llnl.util.symlink._windows_create_junction
- else:
- link_fn = os.symlink
-
- link_fn(l1_d2, pathlib.Path(l2_d1) / "l3-s1")
- link_fn(l1_d1, pathlib.Path(l2_d1) / "l3-s3")
- link_fn(l3_d4, pathlib.Path(tmpdir) / "l1-s3")
- l2_s3 = pathlib.Path(l1_d2) / "l2-s3"
- link_fn(l2_d2, l2_s3)
- link_fn(l2_s3, pathlib.Path(tmpdir) / "l1-s4")
-
- locations = {}
- locations["l4-f1"] = str(l3_d2.join("l4-f1").ensure())
- locations["l4-f2-full"] = str(l3_d4.join("l4-f2").ensure())
- locations["l4-f2-link"] = str(pathlib.Path(tmpdir) / "l1-s3" / "l4-f2")
- locations["l2-f1"] = str(l1_d2.join("l2-f1").ensure())
- locations["l2-f1-link"] = str(pathlib.Path(tmpdir) / "l1-d1" / "l2-d1" / "l3-s1" / "l2-f1")
- locations["l3-f3-full"] = str(l2_d2.join("l3-f3").ensure())
- locations["l3-f3-link-l1"] = str(pathlib.Path(tmpdir) / "l1-s4" / "l3-f3")
-
- return str(tmpdir), locations
-
-
-def _check_find_links(root, locations):
- root = pathlib.Path(root)
- assert set(fs.find_max_depth(root, "l4-f1")) == {locations["l4-f1"]}
- assert set(fs.find_max_depth(root / "l1-s3", "l4-f2", 0)) == {locations["l4-f2-link"]}
- assert set(fs.find_max_depth(root / "l1-d1", "l2-f1")) == {locations["l2-f1-link"]}
- # File is accessible via symlink and subdir, the link path will be
- # searched first, and the directory will not be searched again when
- # it is encountered the second time (via not-link) in the traversal
- assert set(fs.find_max_depth(root, "l4-f2")) == {locations["l4-f2-link"]}
- # File is accessible only via the dir, so the full file path should
- # be reported
- assert set(fs.find_max_depth(root / "l1-d1", "l4-f2")) == {locations["l4-f2-full"]}
- # Check following links to links
- assert set(fs.find_max_depth(root, "l3-f3")) == {locations["l3-f3-link-l1"]}
-
-
-@pytest.mark.parametrize(
- "use_junctions",
- [
- False,
- pytest.param(
- True,
- marks=pytest.mark.skipif(sys.platform != "win32", reason="Only Windows has junctions"),
- ),
- ],
-)
-def test_find_max_depth_symlinks(tmpdir, use_junctions):
- root, locations = dir_structure_with_things_to_find_links(tmpdir, use_junctions=use_junctions)
- _check_find_links(root, locations)
diff --git a/var/spack/repos/builtin.mock/packages/attributes-foo/package.py b/var/spack/repos/builtin.mock/packages/attributes-foo/package.py
index b882fc9b65..31c88f4b08 100644
--- a/var/spack/repos/builtin.mock/packages/attributes-foo/package.py
+++ b/var/spack/repos/builtin.mock/packages/attributes-foo/package.py
@@ -44,7 +44,7 @@ class AttributesFoo(BundlePackage):
# Header provided by the bar virutal package
@property
def bar_headers(self):
- return find_headers("bar", root=self.home.include, recursive=True)
+ return find_headers("bar/bar", root=self.home.include, recursive=False)
# Libary provided by the bar virtual package
@property
@@ -59,7 +59,7 @@ class AttributesFoo(BundlePackage):
# Header provided by the baz virtual package
@property
def baz_headers(self):
- return find_headers("baz", root=self.baz_home.include, recursive=True)
+ return find_headers("baz/baz", root=self.baz_home.include, recursive=False)
# Library provided by the baz virtual package
@property