From 6d55066b94a9669816fce5c22102997a1f7b21a5 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Thu, 28 Sep 2023 15:24:56 +0200 Subject: Use st_nlink in hardlink tracking (#39328) Only add potential hardlinks to a set/dict, instead of each file. This should be much cheaper, since hardlinks are very rare. --- lib/spack/spack/binary_distribution.py | 12 +++++++----- lib/spack/spack/hooks/drop_redundant_rpaths.py | 11 +++++------ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 50043af762..fc6056e6be 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -647,8 +647,7 @@ class BuildManifestVisitor(BaseDirectoryVisitor): directories.""" def __init__(self): - # Save unique identifiers of files to avoid - # relocating hardlink files for each path. + # Save unique identifiers of hardlinks to avoid relocating them multiple times self.visited = set() # Lists of files we will check @@ -657,6 +656,8 @@ class BuildManifestVisitor(BaseDirectoryVisitor): def seen_before(self, root, rel_path): stat_result = os.lstat(os.path.join(root, rel_path)) + if stat_result.st_nlink == 1: + return False identifier = (stat_result.st_dev, stat_result.st_ino) if identifier in self.visited: return True @@ -1581,9 +1582,10 @@ def dedupe_hardlinks_if_necessary(root, buildinfo): for rel_path in buildinfo[key]: stat_result = os.lstat(os.path.join(root, rel_path)) identifier = (stat_result.st_dev, stat_result.st_ino) - if identifier in visited: - continue - visited.add(identifier) + if stat_result.st_nlink > 1: + if identifier in visited: + continue + visited.add(identifier) new_list.append(rel_path) buildinfo[key] = new_list diff --git a/lib/spack/spack/hooks/drop_redundant_rpaths.py b/lib/spack/spack/hooks/drop_redundant_rpaths.py index 4cbbf5359d..a32d435e38 100644 --- a/lib/spack/spack/hooks/drop_redundant_rpaths.py +++ b/lib/spack/spack/hooks/drop_redundant_rpaths.py @@ -79,8 +79,7 @@ class ElfFilesWithRPathVisitor(BaseDirectoryVisitor): """Visitor that collects all elf files that have an rpath""" def __init__(self): - # Map from (ino, dev) -> path. We need 1 path per file, if there are hardlinks, - # we don't need to store the path multiple times. + # Keep track of what hardlinked files we've already visited. self.visited = set() def visit_file(self, root, rel_path, depth): @@ -89,10 +88,10 @@ class ElfFilesWithRPathVisitor(BaseDirectoryVisitor): identifier = (s.st_ino, s.st_dev) # We're hitting a hardlink or symlink of an excluded lib, no need to parse. - if identifier in self.visited: - return - - self.visited.add(identifier) + if s.st_nlink > 1: + if identifier in self.visited: + return + self.visited.add(identifier) result = drop_redundant_rpaths(filepath) -- cgit v1.2.3-60-g2f50