summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/spack/spack/binary_distribution.py178
-rw-r--r--lib/spack/spack/filesystem_view.py4
-rw-r--r--lib/spack/spack/relocate.py10
-rw-r--r--lib/spack/spack/rewiring.py4
-rw-r--r--lib/spack/spack/test/bindist.py46
-rw-r--r--lib/spack/spack/test/packaging.py6
-rw-r--r--lib/spack/spack/test/relocate.py4
7 files changed, 190 insertions, 62 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index b712e82cc0..7ae91dd6bf 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -23,7 +23,7 @@ from six.moves.urllib.error import HTTPError, URLError
import llnl.util.filesystem as fsys
import llnl.util.lang
import llnl.util.tty as tty
-from llnl.util.filesystem import mkdirp
+from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, visit_directory_tree
import spack.cmd
import spack.config as config
@@ -642,6 +642,51 @@ def read_buildinfo_file(prefix):
return buildinfo
+class BuildManifestVisitor(BaseDirectoryVisitor):
+ """Visitor that collects a list of files and symlinks
+ that can be checked for need of relocation. It knows how
+ to dedupe hardlinks and deal with symlinks to files and
+ directories."""
+
+ def __init__(self):
+ # Save unique identifiers of files to avoid
+ # relocating hardlink files for each path.
+ self.visited = set()
+
+ # Lists of files we will check
+ self.files = []
+ self.symlinks = []
+
+ def seen_before(self, root, rel_path):
+ stat_result = os.lstat(os.path.join(root, rel_path))
+ identifier = (stat_result.st_dev, stat_result.st_ino)
+ if identifier in self.visited:
+ return True
+ else:
+ self.visited.add(identifier)
+ return False
+
+ def visit_file(self, root, rel_path, depth):
+ if self.seen_before(root, rel_path):
+ return
+ self.files.append(rel_path)
+
+ def visit_symlinked_file(self, root, rel_path, depth):
+ # Note: symlinks *can* be hardlinked, but it is unclear if
+ # symlinks can be relinked in-place (preserving inode).
+ # Therefore, we do *not* de-dupe hardlinked symlinks.
+ self.symlinks.append(rel_path)
+
+ def before_visit_dir(self, root, rel_path, depth):
+ return os.path.basename(rel_path) not in (".spack", "man")
+
+ def before_visit_symlinked_dir(self, root, rel_path, depth):
+ # Treat symlinked directories simply as symlinks.
+ self.visit_symlinked_file(root, rel_path, depth)
+ # Never recurse into symlinked directories.
+ return False
+
+
def get_buildfile_manifest(spec):
"""
Return a data structure with information about a build, including
@@ -657,57 +702,52 @@ def get_buildfile_manifest(spec):
"link_to_relocate": [],
"other": [],
"binary_to_relocate_fullpath": [],
+ "hardlinks_deduped": True,
}
- exclude_list = (".spack", "man")
-
- # Do this at during tarball creation to save time when tarball unpacked.
- # Used by make_package_relative to determine binaries to change.
- for root, dirs, files in os.walk(spec.prefix, topdown=True):
- dirs[:] = [d for d in dirs if d not in exclude_list]
-
- # Directories may need to be relocated too.
- for directory in dirs:
- dir_path_name = os.path.join(root, directory)
- rel_path_name = os.path.relpath(dir_path_name, spec.prefix)
- if os.path.islink(dir_path_name):
- link = os.readlink(dir_path_name)
- if os.path.isabs(link) and link.startswith(spack.store.layout.root):
- data["link_to_relocate"].append(rel_path_name)
-
- for filename in files:
- path_name = os.path.join(root, filename)
- m_type, m_subtype = fsys.mime_type(path_name)
- rel_path_name = os.path.relpath(path_name, spec.prefix)
- added = False
-
- if os.path.islink(path_name):
- link = os.readlink(path_name)
- if os.path.isabs(link):
- # Relocate absolute links into the spack tree
- if link.startswith(spack.store.layout.root):
- data["link_to_relocate"].append(rel_path_name)
- added = True
-
- if relocate.needs_binary_relocation(m_type, m_subtype):
- if (
- (
- m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable")
- and sys.platform != "darwin"
- )
- or (m_subtype in ("x-mach-binary") and sys.platform == "darwin")
- or (not filename.endswith(".o"))
- ):
- data["binary_to_relocate"].append(rel_path_name)
- data["binary_to_relocate_fullpath"].append(path_name)
- added = True
+ # Guard against filesystem footguns of hardlinks and symlinks by using
+ # a visitor to retrieve a list of files and symlinks, so we don't have
+ # to worry about hardlinks of symlinked dirs and what not.
+ visitor = BuildManifestVisitor()
+ root = spec.prefix
+ visit_directory_tree(root, visitor)
+
+ # Symlinks.
+
+ # Obvious bugs:
+ # 1. relative links are not relocated.
+ # 2. paths are used as strings.
+ for rel_path in visitor.symlinks:
+ abs_path = os.path.join(root, rel_path)
+ link = os.readlink(abs_path)
+ if os.path.isabs(link) and link.startswith(spack.store.layout.root):
+ data["link_to_relocate"].append(rel_path)
+
+ # Non-symlinks.
+ for rel_path in visitor.files:
+ abs_path = os.path.join(root, rel_path)
+ m_type, m_subtype = fsys.mime_type(abs_path)
+
+ if relocate.needs_binary_relocation(m_type, m_subtype):
+ # Why is this branch not part of needs_binary_relocation? :(
+ if (
+ (
+ m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable")
+ and sys.platform != "darwin"
+ )
+ or (m_subtype in ("x-mach-binary") and sys.platform == "darwin")
+ or (not rel_path.endswith(".o"))
+ ):
+ data["binary_to_relocate"].append(rel_path)
+ data["binary_to_relocate_fullpath"].append(abs_path)
+ continue
+
+ elif relocate.needs_text_relocation(m_type, m_subtype):
+ data["text_to_relocate"].append(rel_path)
+ continue
- if relocate.needs_text_relocation(m_type, m_subtype):
- data["text_to_relocate"].append(rel_path_name)
- added = True
+ data["other"].append(abs_path)
- if not added:
- data["other"].append(path_name)
return data
@@ -734,6 +774,7 @@ def write_buildinfo_file(spec, workdir, rel=False):
buildinfo["relocate_textfiles"] = manifest["text_to_relocate"]
buildinfo["relocate_binaries"] = manifest["binary_to_relocate"]
buildinfo["relocate_links"] = manifest["link_to_relocate"]
+ buildinfo["hardlinks_deduped"] = manifest["hardlinks_deduped"]
buildinfo["prefix_to_hash"] = prefix_to_hash
filename = buildinfo_file_name(workdir)
with open(filename, "w") as outfile:
@@ -1441,6 +1482,38 @@ def check_package_relocatable(workdir, spec, allow_root):
relocate.raise_if_not_relocatable(cur_path_names, allow_root)
+def dedupe_hardlinks_if_necessary(root, buildinfo):
+ """Updates a buildinfo dict for old archives that did
+ not dedupe hardlinks. De-duping hardlinks is necessary
+ when relocating files in parallel and in-place. This
+ means we must preserve inodes when relocating."""
+
+ # New archives don't need this.
+ if buildinfo.get("hardlinks_deduped", False):
+ return
+
+ # Clearly we can assume that an inode is either in the
+ # textfile or binary group, but let's just stick to
+ # a single set of visited nodes.
+ visited = set()
+
+ # Note: we do *not* dedupe hardlinked symlinks, since
+ # it seems difficult or even impossible to relink
+ # symlinks while preserving inode.
+ for key in ("relocate_textfiles", "relocate_binaries"):
+ if key not in buildinfo:
+ continue
+ new_list = []
+ for rel_path in buildinfo[key]:
+ stat_result = os.lstat(os.path.join(root, rel_path))
+ identifier = (stat_result.st_dev, stat_result.st_ino)
+ if identifier in visited:
+ continue
+ visited.add(identifier)
+ new_list.append(rel_path)
+ buildinfo[key] = new_list
+
+
def relocate_package(spec, allow_root):
"""
Relocate the given package
@@ -1503,6 +1576,9 @@ def relocate_package(spec, allow_root):
tty.debug("Relocating package from", "%s to %s." % (old_layout_root, new_layout_root))
+ # Old archives maybe have hardlinks repeated.
+ dedupe_hardlinks_if_necessary(workdir, buildinfo)
+
def is_backup_file(file):
return file.endswith("~")
@@ -1548,7 +1624,7 @@ def relocate_package(spec, allow_root):
# For all buildcaches
# relocate the install prefixes in text files including dependencies
- relocate.relocate_text(text_names, prefix_to_prefix_text)
+ relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text)
paths_to_relocate = [old_prefix, old_layout_root]
paths_to_relocate.extend(prefix_to_hash.keys())
@@ -1564,13 +1640,13 @@ def relocate_package(spec, allow_root):
)
)
# relocate the install prefixes in binary files including dependencies
- relocate.relocate_text_bin(files_to_relocate, prefix_to_prefix_bin)
+ relocate.unsafe_relocate_text_bin(files_to_relocate, prefix_to_prefix_bin)
# If we are installing back to the same location
# relocate the sbang location if the spack directory changed
else:
if old_spack_prefix != new_spack_prefix:
- relocate.relocate_text(text_names, prefix_to_prefix_text)
+ relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text)
def _extract_inner_tarball(spec, filename, extract_to, unsigned, remote_checksum):
diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py
index 2373ec5e45..db2d6c9477 100644
--- a/lib/spack/spack/filesystem_view.py
+++ b/lib/spack/spack/filesystem_view.py
@@ -95,11 +95,11 @@ def view_copy(src, dst, view, spec=None):
prefix_to_projection[dep.prefix] = view.get_projection_for_spec(dep)
if spack.relocate.is_binary(dst):
- spack.relocate.relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection)
+ spack.relocate.unsafe_relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection)
else:
prefix_to_projection[spack.store.layout.root] = view._root
prefix_to_projection[orig_sbang] = new_sbang
- spack.relocate.relocate_text(files=[dst], prefixes=prefix_to_projection)
+ spack.relocate.unsafe_relocate_text(files=[dst], prefixes=prefix_to_projection)
try:
stat = os.stat(src)
os.chown(dst, stat.st_uid, stat.st_gid)
diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py
index 3ef332c204..3297965b6e 100644
--- a/lib/spack/spack/relocate.py
+++ b/lib/spack/spack/relocate.py
@@ -744,12 +744,15 @@ def relocate_links(links, orig_layout_root, orig_install_prefix, new_install_pre
tty.warn(msg.format(link_target, abs_link, new_install_prefix))
-def relocate_text(files, prefixes, concurrency=32):
+def unsafe_relocate_text(files, prefixes, concurrency=32):
"""Relocate text file from the original installation prefix to the
new prefix.
Relocation also affects the the path in Spack's sbang script.
+ Note: unsafe when files contains duplicates, such as repeated paths,
+ symlinks, hardlinks.
+
Args:
files (list): Text files to be relocated
prefixes (OrderedDict): String prefixes which need to be changed
@@ -786,11 +789,14 @@ def relocate_text(files, prefixes, concurrency=32):
tp.join()
-def relocate_text_bin(binaries, prefixes, concurrency=32):
+def unsafe_relocate_text_bin(binaries, prefixes, concurrency=32):
"""Replace null terminated path strings hard coded into binaries.
The new install prefix must be shorter than the original one.
+ Note: unsafe when files contains duplicates, such as repeated paths,
+ symlinks, hardlinks.
+
Args:
binaries (list): binaries to be relocated
prefixes (OrderedDict): String prefixes which need to be changed.
diff --git a/lib/spack/spack/rewiring.py b/lib/spack/spack/rewiring.py
index 8a2dcad035..6b6a37ddd1 100644
--- a/lib/spack/spack/rewiring.py
+++ b/lib/spack/spack/rewiring.py
@@ -70,7 +70,7 @@ def rewire_node(spec, explicit):
for rel_path in manifest.get("text_to_relocate", [])
]
if text_to_relocate:
- relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
+ relocate.unsafe_relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
bins_to_relocate = [
os.path.join(tempdir, spec.dag_hash(), rel_path)
@@ -97,7 +97,7 @@ def rewire_node(spec, explicit):
spec.build_spec.prefix,
spec.prefix,
)
- relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
+ relocate.unsafe_relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
# Copy package into place, except for spec.json (because spec.json
# describes the old spec and not the new spliced spec).
shutil.copytree(
diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py
index 7e9066ccbf..d0a8a0a4f0 100644
--- a/lib/spack/spack/test/bindist.py
+++ b/lib/spack/spack/test/bindist.py
@@ -11,6 +11,8 @@ import sys
import py
import pytest
+from llnl.util.filesystem import visit_directory_tree
+
import spack.binary_distribution as bindist
import spack.config
import spack.hooks.sbang as sbang
@@ -633,3 +635,47 @@ def test_FetchCacheError_pretty_printing_single():
assert "Multiple errors" not in str_e
assert "RuntimeError: Oops!" in str_e
assert str_e.rstrip() == str_e
+
+
+def test_build_manifest_visitor(tmpdir):
+ dir = "directory"
+ file = os.path.join("directory", "file")
+
+ with tmpdir.as_cwd():
+ # Create a file inside a directory
+ os.mkdir(dir)
+ with open(file, "wb") as f:
+ f.write(b"example file")
+
+ # Symlink the dir
+ os.symlink(dir, "symlink_to_directory")
+
+ # Symlink the file
+ os.symlink(file, "symlink_to_file")
+
+ # Hardlink the file
+ os.link(file, "hardlink_of_file")
+
+ # Hardlinked symlinks: seems like this is only a thing on Linux,
+ # on Darwin the symlink *target* is hardlinked, on Linux the
+ # symlink *itself* is hardlinked.
+ if sys.platform.startswith("linux"):
+ os.link("symlink_to_file", "hardlink_of_symlink_to_file")
+ os.link("symlink_to_directory", "hardlink_of_symlink_to_directory")
+
+ visitor = bindist.BuildManifestVisitor()
+ visit_directory_tree(str(tmpdir), visitor)
+
+ # We de-dupe hardlinks of files, so there should really be just one file
+ assert len(visitor.files) == 1
+
+ # We do not de-dupe symlinks, cause it's unclear how to update symlinks
+ # in-place, preserving inodes.
+ if sys.platform.startswith("linux"):
+ assert len(visitor.symlinks) == 4 # includes hardlinks of symlinks.
+ else:
+ assert len(visitor.symlinks) == 2
+
+ with tmpdir.as_cwd():
+ assert not any(os.path.islink(f) or os.path.isdir(f) for f in visitor.files)
+ assert all(os.path.islink(f) for f in visitor.symlinks)
diff --git a/lib/spack/spack/test/packaging.py b/lib/spack/spack/test/packaging.py
index f0d1c240e8..0ed4f943a3 100644
--- a/lib/spack/spack/test/packaging.py
+++ b/lib/spack/spack/test/packaging.py
@@ -36,7 +36,7 @@ from spack.relocate import (
needs_binary_relocation,
needs_text_relocation,
relocate_links,
- relocate_text,
+ unsafe_relocate_text,
)
from spack.spec import Spec
@@ -190,7 +190,7 @@ echo $PATH"""
@pytest.mark.usefixtures("install_mockery")
-def test_relocate_text(tmpdir):
+def test_unsafe_relocate_text(tmpdir):
spec = Spec("trivial-install-test-package")
spec.concretize()
with tmpdir.as_cwd():
@@ -203,7 +203,7 @@ def test_relocate_text(tmpdir):
filenames = [filename]
new_dir = "/opt/rh/devtoolset/"
# Singleton dict doesn't matter if Ordered
- relocate_text(filenames, {old_dir: new_dir})
+ unsafe_relocate_text(filenames, {old_dir: new_dir})
with open(filename, "r") as script:
for line in script:
assert new_dir in line
diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py
index b79f0ba1a4..ab306759cd 100644
--- a/lib/spack/spack/test/relocate.py
+++ b/lib/spack/spack/test/relocate.py
@@ -406,7 +406,7 @@ def test_relocate_text_bin(hello_world, copy_binary, tmpdir):
orig_path_bytes = str(orig_binary.dirpath()).encode("utf-8")
new_path_bytes = str(new_binary.dirpath()).encode("utf-8")
- spack.relocate.relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes})
+ spack.relocate.unsafe_relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes})
# Check original directory is not there anymore and it was
# substituted with the new one
@@ -421,7 +421,7 @@ def test_relocate_text_bin_raise_if_new_prefix_is_longer(tmpdir):
with open(fpath, "w") as f:
f.write("/short")
with pytest.raises(spack.relocate.BinaryTextReplaceError):
- spack.relocate.relocate_text_bin([fpath], {short_prefix: long_prefix})
+ spack.relocate.unsafe_relocate_text_bin([fpath], {short_prefix: long_prefix})
@pytest.mark.requires_executables("install_name_tool", "file", "cc")