summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2023-09-28 15:24:56 +0200
committerGitHub <noreply@github.com>2023-09-28 13:24:56 +0000
commit6d55066b94a9669816fce5c22102997a1f7b21a5 (patch)
tree31a89427de7a59af12878dbc2a98ea50ce8e8d19
parent78132f2d6b0529cb2280f91d77026340c19f322e (diff)
downloadspack-6d55066b94a9669816fce5c22102997a1f7b21a5.tar.gz
spack-6d55066b94a9669816fce5c22102997a1f7b21a5.tar.bz2
spack-6d55066b94a9669816fce5c22102997a1f7b21a5.tar.xz
spack-6d55066b94a9669816fce5c22102997a1f7b21a5.zip
Use st_nlink in hardlink tracking (#39328)
Only add potential hardlinks to a set/dict, instead of each file. This should be much cheaper, since hardlinks are very rare.
-rw-r--r--lib/spack/spack/binary_distribution.py12
-rw-r--r--lib/spack/spack/hooks/drop_redundant_rpaths.py11
2 files changed, 12 insertions, 11 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index 50043af762..fc6056e6be 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -647,8 +647,7 @@ class BuildManifestVisitor(BaseDirectoryVisitor):
directories."""
def __init__(self):
- # Save unique identifiers of files to avoid
- # relocating hardlink files for each path.
+ # Save unique identifiers of hardlinks to avoid relocating them multiple times
self.visited = set()
# Lists of files we will check
@@ -657,6 +656,8 @@ class BuildManifestVisitor(BaseDirectoryVisitor):
def seen_before(self, root, rel_path):
stat_result = os.lstat(os.path.join(root, rel_path))
+ if stat_result.st_nlink == 1:
+ return False
identifier = (stat_result.st_dev, stat_result.st_ino)
if identifier in self.visited:
return True
@@ -1581,9 +1582,10 @@ def dedupe_hardlinks_if_necessary(root, buildinfo):
for rel_path in buildinfo[key]:
stat_result = os.lstat(os.path.join(root, rel_path))
identifier = (stat_result.st_dev, stat_result.st_ino)
- if identifier in visited:
- continue
- visited.add(identifier)
+ if stat_result.st_nlink > 1:
+ if identifier in visited:
+ continue
+ visited.add(identifier)
new_list.append(rel_path)
buildinfo[key] = new_list
diff --git a/lib/spack/spack/hooks/drop_redundant_rpaths.py b/lib/spack/spack/hooks/drop_redundant_rpaths.py
index 4cbbf5359d..a32d435e38 100644
--- a/lib/spack/spack/hooks/drop_redundant_rpaths.py
+++ b/lib/spack/spack/hooks/drop_redundant_rpaths.py
@@ -79,8 +79,7 @@ class ElfFilesWithRPathVisitor(BaseDirectoryVisitor):
"""Visitor that collects all elf files that have an rpath"""
def __init__(self):
- # Map from (ino, dev) -> path. We need 1 path per file, if there are hardlinks,
- # we don't need to store the path multiple times.
+ # Keep track of what hardlinked files we've already visited.
self.visited = set()
def visit_file(self, root, rel_path, depth):
@@ -89,10 +88,10 @@ class ElfFilesWithRPathVisitor(BaseDirectoryVisitor):
identifier = (s.st_ino, s.st_dev)
# We're hitting a hardlink or symlink of an excluded lib, no need to parse.
- if identifier in self.visited:
- return
-
- self.visited.add(identifier)
+ if s.st_nlink > 1:
+ if identifier in self.visited:
+ return
+ self.visited.add(identifier)
result = drop_redundant_rpaths(filepath)