diff options
author | Harmen Stoppels <harmenstoppels@gmail.com> | 2023-09-28 15:24:56 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-28 13:24:56 +0000 |
commit | 6d55066b94a9669816fce5c22102997a1f7b21a5 (patch) | |
tree | 31a89427de7a59af12878dbc2a98ea50ce8e8d19 /lib | |
parent | 78132f2d6b0529cb2280f91d77026340c19f322e (diff) | |
download | spack-6d55066b94a9669816fce5c22102997a1f7b21a5.tar.gz spack-6d55066b94a9669816fce5c22102997a1f7b21a5.tar.bz2 spack-6d55066b94a9669816fce5c22102997a1f7b21a5.tar.xz spack-6d55066b94a9669816fce5c22102997a1f7b21a5.zip |
Use st_nlink in hardlink tracking (#39328)
Only add potential hardlinks to a set/dict, instead of each file. This
should be much cheaper, since hardlinks are very rare.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 12 | ||||
-rw-r--r-- | lib/spack/spack/hooks/drop_redundant_rpaths.py | 11 |
2 files changed, 12 insertions, 11 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 50043af762..fc6056e6be 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -647,8 +647,7 @@ class BuildManifestVisitor(BaseDirectoryVisitor): directories.""" def __init__(self): - # Save unique identifiers of files to avoid - # relocating hardlink files for each path. + # Save unique identifiers of hardlinks to avoid relocating them multiple times self.visited = set() # Lists of files we will check @@ -657,6 +656,8 @@ class BuildManifestVisitor(BaseDirectoryVisitor): def seen_before(self, root, rel_path): stat_result = os.lstat(os.path.join(root, rel_path)) + if stat_result.st_nlink == 1: + return False identifier = (stat_result.st_dev, stat_result.st_ino) if identifier in self.visited: return True @@ -1581,9 +1582,10 @@ def dedupe_hardlinks_if_necessary(root, buildinfo): for rel_path in buildinfo[key]: stat_result = os.lstat(os.path.join(root, rel_path)) identifier = (stat_result.st_dev, stat_result.st_ino) - if identifier in visited: - continue - visited.add(identifier) + if stat_result.st_nlink > 1: + if identifier in visited: + continue + visited.add(identifier) new_list.append(rel_path) buildinfo[key] = new_list diff --git a/lib/spack/spack/hooks/drop_redundant_rpaths.py b/lib/spack/spack/hooks/drop_redundant_rpaths.py index 4cbbf5359d..a32d435e38 100644 --- a/lib/spack/spack/hooks/drop_redundant_rpaths.py +++ b/lib/spack/spack/hooks/drop_redundant_rpaths.py @@ -79,8 +79,7 @@ class ElfFilesWithRPathVisitor(BaseDirectoryVisitor): """Visitor that collects all elf files that have an rpath""" def __init__(self): - # Map from (ino, dev) -> path. We need 1 path per file, if there are hardlinks, - # we don't need to store the path multiple times. + # Keep track of what hardlinked files we've already visited. self.visited = set() def visit_file(self, root, rel_path, depth): @@ -89,10 +88,10 @@ class ElfFilesWithRPathVisitor(BaseDirectoryVisitor): identifier = (s.st_ino, s.st_dev) # We're hitting a hardlink or symlink of an excluded lib, no need to parse. - if identifier in self.visited: - return - - self.visited.add(identifier) + if s.st_nlink > 1: + if identifier in self.visited: + return + self.visited.add(identifier) result = drop_redundant_rpaths(filepath) |