summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRobert Blackwell <rblackwell@flatironinstitute.org>2021-10-28 10:39:16 -0400
committerGitHub <noreply@github.com>2021-10-28 07:39:16 -0700
commit8fd94e31142e6eb49cf846e3dcedda17eba87cf1 (patch)
treead940ae95ddfe3c01f2447c94673958c0723dba1 /lib
parentc13f915735636a8285538b458d888742a3d27f4d (diff)
downloadspack-8fd94e31142e6eb49cf846e3dcedda17eba87cf1.tar.gz
spack-8fd94e31142e6eb49cf846e3dcedda17eba87cf1.tar.bz2
spack-8fd94e31142e6eb49cf846e3dcedda17eba87cf1.tar.xz
spack-8fd94e31142e6eb49cf846e3dcedda17eba87cf1.zip
YamlFilesystemView: improve file removal performance via batching (#24355)
* Drastically improve YamlFilesystemView file removal via batching The `remove_file` routine has to check if the file is owned by multiple packages, so it doesn't remove necessary files. This is done by the `get_all_specs` routine, which walks the entire package tree. With large numbers of packages on shared file systems, this can take seconds per file tree traversal, which adds up extremely quickly. For example, a single deactivate of a largish python package in our software stack on GPFS took approximately 40 minutes. This patch simply replaces `remove_file` with a batch `remove_files` routine. This routine removes a list of files rather than a single file, requiring only one traversal per batch. In practice this means a package can be removed in seconds time, rather than potentially hours, essentially a ~100x speedup (ignoring initial deactivation logic, which takes about 3 minutes in our test setup).
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/build_systems/python.py6
-rw-r--r--lib/spack/spack/filesystem_view.py31
-rw-r--r--lib/spack/spack/package.py3
3 files changed, 23 insertions, 17 deletions
diff --git a/lib/spack/spack/build_systems/python.py b/lib/spack/spack/build_systems/python.py
index fd772acfbb..a308e77cb9 100644
--- a/lib/spack/spack/build_systems/python.py
+++ b/lib/spack/spack/build_systems/python.py
@@ -393,11 +393,15 @@ class PythonPackage(PackageBase):
self.spec
)
)
+
+ to_remove = []
for src, dst in merge_map.items():
if ignore_namespace and namespace_init(dst):
continue
if global_view or not path_contains_subdirectory(src, bin_dir):
- view.remove_file(src, dst)
+ to_remove.append(dst)
else:
os.remove(dst)
+
+ view.remove_files(to_remove)
diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py
index 27a8708596..659ae550ae 100644
--- a/lib/spack/spack/filesystem_view.py
+++ b/lib/spack/spack/filesystem_view.py
@@ -442,11 +442,7 @@ class YamlFilesystemView(FilesystemView):
# now unmerge the directory tree
tree.unmerge_directories(view_dst, ignore_file)
- def remove_file(self, src, dest):
- if not os.path.lexists(dest):
- tty.warn("Tried to remove %s which does not exist" % dest)
- return
-
+ def remove_files(self, files):
def needs_file(spec, file):
# convert the file we want to remove to a source in this spec
projection = self.get_projection_for_spec(spec)
@@ -465,16 +461,23 @@ class YamlFilesystemView(FilesystemView):
manifest = {}
return test_path in manifest
- # remove if dest is not owned by any other package in the view
- # This will only be false if two packages are merged into a prefix
- # and have a conflicting file
+ specs = self.get_all_specs()
+
+ for file in files:
+ if not os.path.lexists(file):
+ tty.warn("Tried to remove %s which does not exist" % file)
+ continue
+
+ # remove if file is not owned by any other package in the view
+ # This will only be false if two packages are merged into a prefix
+ # and have a conflicting file
- # check all specs for whether they own the file. That include the spec
- # we are currently removing, as we remove files before unlinking the
- # metadata directory.
- if len([s for s in self.get_all_specs()
- if needs_file(s, dest)]) <= 1:
- os.remove(dest)
+ # check all specs for whether they own the file. That include the spec
+ # we are currently removing, as we remove files before unlinking the
+ # metadata directory.
+ if len([s for s in specs if needs_file(s, file)]) <= 1:
+ tty.debug("Removing file " + file)
+ os.remove(file)
def check_added(self, spec):
assert spec.concrete
diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py
index 00ba8a38d5..c22898c5a3 100644
--- a/lib/spack/spack/package.py
+++ b/lib/spack/spack/package.py
@@ -469,8 +469,7 @@ class PackageViewMixin(object):
example if two packages include the same file, it should only be
removed when both packages are removed.
"""
- for src, dst in merge_map.items():
- view.remove_file(src, dst)
+ view.remove_files(merge_map.values())
def test_log_pathname(test_stage, spec):