diff options
author | Harmen Stoppels <harmenstoppels@gmail.com> | 2022-11-04 00:34:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-03 17:34:00 -0600 |
commit | b52be759788cf20f1458c80e6e005744f5856c73 (patch) | |
tree | a53db50ae7ca4ea9e779be099faee6581fc24eda | |
parent | 7fc49c42ee27bcd2629e3ac112e63da460d9a26c (diff) | |
download | spack-b52be759788cf20f1458c80e6e005744f5856c73.tar.gz spack-b52be759788cf20f1458c80e6e005744f5856c73.tar.bz2 spack-b52be759788cf20f1458c80e6e005744f5856c73.tar.xz spack-b52be759788cf20f1458c80e6e005744f5856c73.zip |
Experimental binding of shared ELF libraries (#31948)
Adds another post install hook that loops over the install prefix, looking for shared libraries type of ELF files, and sets the soname to their own absolute paths.
The idea being, whenever somebody links against those libraries, the linker copies the soname (which is the absolute path to the library) as a "needed" library, so that at runtime the dynamic loader realizes the needed library is a path which should be loaded directly without searching.
As a result:
1. rpaths are not used for the fixed/static list of needed libraries in the dynamic section (only for _actually_ dynamically loaded libraries through `dlopen`), which largely solves the issue that Spack's rpaths are a heuristic (`<prefix>/lib` and `<prefix>/lib64` might not be where libraries really are...)
2. improved startup times (no library search required)
-rw-r--r-- | etc/spack/defaults/config.yaml | 18 | ||||
-rw-r--r-- | lib/spack/docs/config_yaml.rst | 52 | ||||
-rw-r--r-- | lib/spack/llnl/util/lang.py | 8 | ||||
-rw-r--r-- | lib/spack/spack/bootstrap.py | 5 | ||||
-rw-r--r-- | lib/spack/spack/build_environment.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/hooks/__init__.py | 11 | ||||
-rw-r--r-- | lib/spack/spack/hooks/absolutify_elf_sonames.py | 171 | ||||
-rw-r--r-- | lib/spack/spack/package_base.py | 9 | ||||
-rw-r--r-- | lib/spack/spack/schema/config.py | 20 | ||||
-rw-r--r-- | lib/spack/spack/test/build_environment.py | 2 | ||||
-rw-r--r-- | lib/spack/spack/test/cmd/config.py | 8 | ||||
-rw-r--r-- | lib/spack/spack/test/hooks/absolutify_elf_sonames.py | 81 | ||||
-rw-r--r-- | var/spack/repos/builtin/packages/cuda/package.py | 3 | ||||
-rw-r--r-- | var/spack/repos/builtin/packages/nvhpc/package.py | 3 | ||||
-rw-r--r-- | var/spack/repos/builtin/packages/openjdk/package.py | 5 |
15 files changed, 383 insertions, 15 deletions
diff --git a/etc/spack/defaults/config.yaml b/etc/spack/defaults/config.yaml index e72608248f..b3356428fe 100644 --- a/etc/spack/defaults/config.yaml +++ b/etc/spack/defaults/config.yaml @@ -187,10 +187,20 @@ config: package_lock_timeout: null - # Control whether Spack embeds RPATH or RUNPATH attributes in ELF binaries. - # Has no effect on macOS. DO NOT MIX these within the same install tree. - # See the Spack documentation for details. - shared_linking: 'rpath' + # Control how shared libraries are located at runtime on Linux. See the + # the Spack documentation for details. + shared_linking: + # Spack automatically embeds runtime search paths in ELF binaries for their + # dependencies. Their type can either be "rpath" or "runpath". For glibc, rpath is + # inherited and has precedence over LD_LIBRARY_PATH; runpath is not inherited + # and of lower precedence. DO NOT MIX these within the same install tree. + type: rpath + + + # (Experimental) Embed absolute paths of dependent libraries directly in ELF + # binaries to avoid runtime search. This can improve startup time of + # executables with many dependencies, in particular on slow filesystems. + bind: false # Set to 'false' to allow installation on filesystems that doesn't allow setgid bit diff --git a/lib/spack/docs/config_yaml.rst b/lib/spack/docs/config_yaml.rst index b93384e81b..f2159c64cc 100644 --- a/lib/spack/docs/config_yaml.rst +++ b/lib/spack/docs/config_yaml.rst @@ -224,9 +224,9 @@ them). Please note that we currently disable ccache's ``hash_dir`` feature to avoid an issue with the stage directory (see https://github.com/LLNL/spack/pull/3761#issuecomment-294352232). ------------------- -``shared_linking`` ------------------- +----------------------- +``shared_linking:type`` +----------------------- Control whether Spack embeds ``RPATH`` or ``RUNPATH`` attributes in ELF binaries so that they can find their dependencies. Has no effect on macOS. @@ -245,6 +245,52 @@ the loading object. DO NOT MIX the two options within the same install tree. +----------------------- +``shared_linking:bind`` +----------------------- + +This is an *experimental option* that controls whether Spack embeds absolute paths +to needed shared libraries in ELF executables and shared libraries on Linux. Setting +this option to ``true`` has two advantages: + +1. **Improved startup time**: when running an executable, the dynamic loader does not + have to perform a search for needed libraries, they are loaded directly. +2. **Reliability**: libraries loaded at runtime are those that were linked to. This + minimizes the risk of accidentally picking up system libraries. + +In the current implementation, Spack sets the soname (shared object name) of +libraries to their install path upon installation. This has two implications: + +1. binding does not apply to libraries installed *before* the option was enabled; +2. toggling the option off does *not* prevent binding of libraries installed when + the option was still enabled. + +It is also worth noting that: + +1. Applications relying on ``dlopen(3)`` will continue to work, even when they open + a library by name. This is because ``RPATH``\s are retained in binaries also + when ``bind`` is enabled. +2. ``LD_PRELOAD`` continues to work for the typical use case of overriding + symbols, such as preloading a library with a more efficient ``malloc``. + However, the preloaded library will be loaded *additionally to*, instead of + *in place of* another library with the same name --- this can be problematic + in very rare cases where libraries rely on a particular ``init`` or ``fini`` + order. + +.. note:: + + In some cases packages provide *stub libraries* that only contain an interface + for linking, but lack an implementation for runtime. An example of this is + ``libcuda.so``, provided by the CUDA toolkit; it can be used to link against, + but the library needed at runtime is the one installed with the CUDA driver. + To avoid binding those libraries, they can be marked as non-bindable using + a property in the package: + + .. code-block:: python + + class Example(Package): + non_bindable_shared_objects = ["libinterface.so"] + ---------------------- ``terminal_title`` ---------------------- diff --git a/lib/spack/llnl/util/lang.py b/lib/spack/llnl/util/lang.py index f1a0b7ba31..51bd710ddb 100644 --- a/lib/spack/llnl/util/lang.py +++ b/lib/spack/llnl/util/lang.py @@ -1022,6 +1022,14 @@ def stable_partition( return true_items, false_items +def ensure_last(lst, *elements): + """Performs a stable partition of lst, ensuring that ``elements`` + occur at the end of ``lst`` in specified order. Mutates ``lst``. + Raises ``ValueError`` if any ``elements`` are not already in ``lst``.""" + for elt in elements: + lst.append(lst.pop(lst.index(elt))) + + class TypedMutableSequence(MutableSequence): """Base class that behaves like a list, just with a different type. diff --git a/lib/spack/spack/bootstrap.py b/lib/spack/spack/bootstrap.py index d92609fdcd..60f8153ae2 100644 --- a/lib/spack/spack/bootstrap.py +++ b/lib/spack/spack/bootstrap.py @@ -675,6 +675,11 @@ def _add_externals_if_missing(): _REF_COUNT = 0 +def is_bootstrapping(): + global _REF_COUNT + return _REF_COUNT > 0 + + @contextlib.contextmanager def ensure_bootstrap_configuration(): # The context manager is reference counted to ensure we don't swap multiple diff --git a/lib/spack/spack/build_environment.py b/lib/spack/spack/build_environment.py index 463c4dcde7..1496e9359f 100644 --- a/lib/spack/spack/build_environment.py +++ b/lib/spack/spack/build_environment.py @@ -317,7 +317,7 @@ def set_compiler_environment_variables(pkg, env): env.set("SPACK_LINKER_ARG", compiler.linker_arg) # Check whether we want to force RPATH or RUNPATH - if spack.config.get("config:shared_linking") == "rpath": + if spack.config.get("config:shared_linking:type") == "rpath": env.set("SPACK_DTAGS_TO_STRIP", compiler.enable_new_dtags) env.set("SPACK_DTAGS_TO_ADD", compiler.disable_new_dtags) else: diff --git a/lib/spack/spack/hooks/__init__.py b/lib/spack/spack/hooks/__init__.py index 699464c913..f30082ccf6 100644 --- a/lib/spack/spack/hooks/__init__.py +++ b/lib/spack/spack/hooks/__init__.py @@ -27,7 +27,8 @@ This can be used to implement support for things like module systems (e.g. modules, lmod, etc.) or to add other custom features. """ -import llnl.util.lang + +from llnl.util.lang import ensure_last, list_modules import spack.paths @@ -44,11 +45,11 @@ class _HookRunner(object): def _populate_hooks(cls): # Lazily populate the list of hooks cls._hooks = [] - relative_names = list(llnl.util.lang.list_modules(spack.paths.hooks_path)) - # We want this hook to be the last registered - relative_names.sort(key=lambda x: x == "write_install_manifest") - assert relative_names[-1] == "write_install_manifest" + relative_names = list(list_modules(spack.paths.hooks_path)) + + # Ensure that write_install_manifest comes last + ensure_last(relative_names, "absolutify_elf_sonames", "write_install_manifest") for name in relative_names: module_name = __name__ + "." + name diff --git a/lib/spack/spack/hooks/absolutify_elf_sonames.py b/lib/spack/spack/hooks/absolutify_elf_sonames.py new file mode 100644 index 0000000000..d16de2ea39 --- /dev/null +++ b/lib/spack/spack/hooks/absolutify_elf_sonames.py @@ -0,0 +1,171 @@ +# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import os + +import llnl.util.tty as tty +from llnl.util.filesystem import BaseDirectoryVisitor, visit_directory_tree +from llnl.util.lang import elide_list + +import spack.bootstrap +import spack.config +import spack.relocate +from spack.util.elf import ElfParsingError, parse_elf +from spack.util.executable import Executable + + +def is_shared_library_elf(filepath): + """Return true if filepath is most a shared library. + Our definition of a shared library for ELF requires: + 1. a dynamic section, + 2. a soname OR lack of interpreter. + The problem is that PIE objects (default on Ubuntu) are + ET_DYN too, and not all shared libraries have a soname... + no interpreter is typically the best indicator then.""" + try: + with open(filepath, "rb") as f: + elf = parse_elf(f, interpreter=True, dynamic_section=True) + return elf.has_pt_dynamic and (elf.has_soname or not elf.has_pt_interp) + except (IOError, OSError, ElfParsingError): + return False + + +class SharedLibrariesVisitor(BaseDirectoryVisitor): + """Visitor that collects all shared libraries in a prefix, with the + exception of an exclude list.""" + + def __init__(self, exclude_list): + + # List of file and directory names to be excluded + self.exclude_list = frozenset(exclude_list) + + # Map from (ino, dev) -> path. We need 1 path per file, if there are hardlinks, + # we don't need to store the path multiple times. + self.libraries = dict() + + # Set of (ino, dev) pairs (excluded by symlinks). + self.excluded_through_symlink = set() + + def visit_file(self, root, rel_path, depth): + # Check if excluded + basename = os.path.basename(rel_path) + if basename in self.exclude_list: + return + + filepath = os.path.join(root, rel_path) + s = os.lstat(filepath) + identifier = (s.st_ino, s.st_dev) + + # We're hitting a hardlink or symlink of an excluded lib, no need to parse. + if identifier in self.libraries or identifier in self.excluded_through_symlink: + return + + # Register the file if it's a shared lib that needs to be patched. + if is_shared_library_elf(filepath): + self.libraries[identifier] = rel_path + + def visit_symlinked_file(self, root, rel_path, depth): + # We don't need to follow the symlink and parse the file, since we will hit + # it by recursing the prefix anyways. We only need to check if the target + # should be excluded based on the filename of the symlink. E.g. when excluding + # libf.so, which is a symlink to libf.so.1.2.3, we keep track of the stat data + # of the latter. + basename = os.path.basename(rel_path) + if basename not in self.exclude_list: + return + + # Register the (ino, dev) pair as ignored (if the symlink is not dangling) + filepath = os.path.join(root, rel_path) + try: + s = os.stat(filepath) + except OSError: + return + self.excluded_through_symlink.add((s.st_ino, s.st_dev)) + + def before_visit_dir(self, root, rel_path, depth): + # Allow skipping over directories. E.g. `<prefix>/lib/stubs` can be skipped by + # adding `"stubs"` to the exclude list. + return os.path.basename(rel_path) not in self.exclude_list + + def before_visit_symlinked_dir(self, root, rel_path, depth): + # Never enter symlinked dirs, since we don't want to leave the prefix, and + # we'll enter the target dir inside the prefix anyways since we're recursing + # everywhere. + return False + + def get_shared_libraries_relative_paths(self): + """Get the libraries that should be patched, with the excluded libraries + removed.""" + for identifier in self.excluded_through_symlink: + self.libraries.pop(identifier, None) + + return [rel_path for rel_path in self.libraries.values()] + + +def patch_sonames(patchelf, root, rel_paths): + """Set the soname to the file's own path for a list of + given shared libraries.""" + fixed = [] + for rel_path in rel_paths: + filepath = os.path.join(root, rel_path) + normalized = os.path.normpath(filepath) + args = ["--set-soname", normalized, normalized] + output = patchelf(*args, output=str, error=str, fail_on_error=False) + if patchelf.returncode == 0: + fixed.append(rel_path) + else: + # Note: treat as warning to avoid (long) builds to fail post-install. + tty.warn("patchelf: failed to set soname of {}: {}".format(normalized, output.strip())) + return fixed + + +def find_and_patch_sonames(prefix, exclude_list, patchelf): + # Locate all shared libraries in the prefix dir of the spec, excluding + # the ones set in the non_bindable_shared_objects property. + visitor = SharedLibrariesVisitor(exclude_list) + visit_directory_tree(prefix, visitor) + + # Patch all sonames. + relative_paths = visitor.get_shared_libraries_relative_paths() + return patch_sonames(patchelf, prefix, relative_paths) + + +def post_install(spec): + # Skip if disabled + if not spack.config.get("config:shared_linking:bind", False): + return + + # Skip externals + if spec.external: + return + + # Only enable on platforms using ELF. + if not spec.satisfies("platform=linux") and not spec.satisfies("platform=cray"): + return + + # Disable this hook when bootstrapping, to avoid recursion. + if spack.bootstrap.is_bootstrapping(): + return + + # Should failing to locate patchelf be a hard error? + patchelf_path = spack.relocate._patchelf() + if not patchelf_path: + return + patchelf = Executable(patchelf_path) + + fixes = find_and_patch_sonames(spec.prefix, spec.package.non_bindable_shared_objects, patchelf) + + if not fixes: + return + + # Unfortunately this does not end up in the build logs. + tty.info( + "{}: Patched {} {}: {}".format( + spec.name, + len(fixes), + "soname" if len(fixes) == 1 else "sonames", + ", ".join(elide_list(fixes, max_num=5)), + ) + ) diff --git a/lib/spack/spack/package_base.py b/lib/spack/spack/package_base.py index cf96beca58..1871e6785b 100644 --- a/lib/spack/spack/package_base.py +++ b/lib/spack/spack/package_base.py @@ -564,6 +564,15 @@ class PackageBase(six.with_metaclass(PackageMeta, WindowsRPathMeta, PackageViewM #: for immediate dependencies. transitive_rpaths = True + #: List of shared objects that should be replaced with a different library at + #: runtime. Typically includes stub libraries like libcuda.so. When linking + #: against a library listed here, the dependent will only record its soname + #: or filename, not its absolute path, so that the dynamic linker will search + #: for it. Note: accepts both file names and directory names, for example + #: ``["libcuda.so", "stubs"]`` will ensure libcuda.so and all libraries in the + #: stubs directory are not bound by path.""" + non_bindable_shared_objects = [] # type: List[str] + #: List of prefix-relative file paths (or a single path). If these do #: not exist after install, or if they exist but are not files, #: sanity checks fail. diff --git a/lib/spack/spack/schema/config.py b/lib/spack/spack/schema/config.py index c82990da5c..96e34a051a 100644 --- a/lib/spack/spack/schema/config.py +++ b/lib/spack/spack/schema/config.py @@ -20,7 +20,18 @@ properties = { "type": "object", "default": {}, "properties": { - "shared_linking": {"type": "string", "enum": ["rpath", "runpath"]}, + "shared_linking": { + "anyOf": [ + {"type": "string", "enum": ["rpath", "runpath"]}, + { + "type": "object", + "properties": { + "type": {"type": "string", "enum": ["rpath", "runpath"]}, + "bind": {"type": "boolean"}, + }, + }, + ] + }, "install_tree": { "anyOf": [ { @@ -136,4 +147,11 @@ def update(data): data["url_fetch_method"] = "curl" if use_curl else "urllib" changed = True + shared_linking = data.get("shared_linking", None) + if isinstance(shared_linking, six.string_types): + # deprecated short-form shared_linking: rpath/runpath + # add value as `type` in updated shared_linking + data["shared_linking"] = {"type": shared_linking, "bind": False} + changed = True + return changed diff --git a/lib/spack/spack/test/build_environment.py b/lib/spack/spack/test/build_environment.py index 6ba8fc056f..9b894a9351 100644 --- a/lib/spack/spack/test/build_environment.py +++ b/lib/spack/spack/test/build_environment.py @@ -465,7 +465,7 @@ def test_setting_dtags_based_on_config(config_setting, expected_flag, config, mo pkg = s.package env = EnvironmentModifications() - with spack.config.override("config:shared_linking", config_setting): + with spack.config.override("config:shared_linking", {"type": config_setting, "bind": False}): spack.build_environment.set_compiler_environment_variables(pkg, env) modifications = env.group_by_name() assert "SPACK_DTAGS_TO_STRIP" in modifications diff --git a/lib/spack/spack/test/cmd/config.py b/lib/spack/spack/test/cmd/config.py index e5ad82cf43..56943f5d8b 100644 --- a/lib/spack/spack/test/cmd/config.py +++ b/lib/spack/spack/test/cmd/config.py @@ -606,6 +606,14 @@ def check_config_updated(data): assert data["install_tree"]["projections"] == {"all": "{name}-{version}"} +def test_config_update_shared_linking(mutable_config): + # Old syntax: config:shared_linking:rpath/runpath + # New syntax: config:shared_linking:{type:rpath/runpath,bind:True/False} + with spack.config.override("config:shared_linking", "runpath"): + assert spack.config.get("config:shared_linking:type") == "runpath" + assert not spack.config.get("config:shared_linking:bind") + + def test_config_prefer_upstream( tmpdir_factory, install_mockery, mock_fetch, mutable_config, gen_mock_layout, monkeypatch ): diff --git a/lib/spack/spack/test/hooks/absolutify_elf_sonames.py b/lib/spack/spack/test/hooks/absolutify_elf_sonames.py new file mode 100644 index 0000000000..2163b776dc --- /dev/null +++ b/lib/spack/spack/test/hooks/absolutify_elf_sonames.py @@ -0,0 +1,81 @@ +# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + + +import os + +import pytest + +import llnl.util.filesystem as fs + +import spack.platforms +from spack.hooks.absolutify_elf_sonames import ( + SharedLibrariesVisitor, + find_and_patch_sonames, +) +from spack.util.executable import Executable + + +def skip_unless_linux(f): + return pytest.mark.skipif( + str(spack.platforms.real_host()) != "linux", reason="only tested on linux for now" + )(f) + + +class ExecutableIntercept: + def __init__(self): + self.calls = [] + + def __call__(self, *args, **kwargs): + self.calls.append(args) + + @property + def returncode(self): + return 0 + + +@pytest.mark.requires_executables("gcc") +@skip_unless_linux +def test_shared_libraries_visitor(tmpdir): + """Integration test for soname rewriting""" + gcc = Executable("gcc") + + # Create a directory structure like this: + # ./no-soname.so # just a shared library without a soname + # ./soname.so # a shared library with a soname + # ./executable.so # an executable masquerading as a shared lib + # ./libskipme.so # a shared library with a soname + # ./mydir/parent_dir -> .. # a symlinked dir, causing a cycle + # ./mydir/skip_symlink -> ../libskipme # a symlink to a library + + with fs.working_dir(str(tmpdir)): + with open("hello.c", "w") as f: + f.write("int main(){return 0;}") + gcc("hello.c", "-o", "no-soname.so", "--shared") + gcc("hello.c", "-o", "soname.so", "--shared", "-Wl,-soname,example.so") + gcc("hello.c", "-pie", "-o", "executable.so") + gcc("hello.c", "-o", "libskipme.so", "-Wl,-soname,libskipme.so") + os.mkdir("my_dir") + os.symlink("..", os.path.join("my_dir", "parent_dir")) + os.symlink(os.path.join("..", "libskipme.so"), os.path.join("my_dir", "skip_symlink")) + + # Visit the whole prefix, but exclude `skip_symlink` + visitor = SharedLibrariesVisitor(exclude_list=["skip_symlink"]) + fs.visit_directory_tree(str(tmpdir), visitor) + relative_paths = visitor.get_shared_libraries_relative_paths() + + assert "no-soname.so" in relative_paths + assert "soname.so" in relative_paths + assert "executable.so" not in relative_paths + assert "libskipme.so" not in relative_paths + + # Run the full hook of finding libs and setting sonames. + patchelf = ExecutableIntercept() + find_and_patch_sonames(str(tmpdir), ["skip_symlink"], patchelf) + assert len(patchelf.calls) == 2 + elf_1 = tmpdir.join("no-soname.so") + elf_2 = tmpdir.join("soname.so") + assert ("--set-soname", elf_1, elf_1) in patchelf.calls + assert ("--set-soname", elf_2, elf_2) in patchelf.calls diff --git a/var/spack/repos/builtin/packages/cuda/package.py b/var/spack/repos/builtin/packages/cuda/package.py index e8157b54ae..2375e318af 100644 --- a/var/spack/repos/builtin/packages/cuda/package.py +++ b/var/spack/repos/builtin/packages/cuda/package.py @@ -583,3 +583,6 @@ class Cuda(Package): if "compat" not in parts and "stubs" not in parts: filtered_libs.append(lib) return LibraryList(filtered_libs) + + # Avoid binding stub libraries by absolute path + non_bindable_shared_objects = ["stubs"] diff --git a/var/spack/repos/builtin/packages/nvhpc/package.py b/var/spack/repos/builtin/packages/nvhpc/package.py index b7e625b97c..2ade4bc9e6 100644 --- a/var/spack/repos/builtin/packages/nvhpc/package.py +++ b/var/spack/repos/builtin/packages/nvhpc/package.py @@ -415,3 +415,6 @@ class Nvhpc(Package): libs.append("libnvf") return find_libraries(libs, root=prefix, recursive=True) + + # Avoid binding stub libraries by absolute path + non_bindable_shared_objects = ["stubs"] diff --git a/var/spack/repos/builtin/packages/openjdk/package.py b/var/spack/repos/builtin/packages/openjdk/package.py index 6382dc3688..fab50a28f8 100644 --- a/var/spack/repos/builtin/packages/openjdk/package.py +++ b/var/spack/repos/builtin/packages/openjdk/package.py @@ -413,3 +413,8 @@ class Openjdk(Package): class_paths = find(dependent_spec.prefix, "*.jar") classpath = os.pathsep.join(class_paths) env.prepend_path("CLASSPATH", classpath) + + # Since we provide openjdk as a binary, we can't remove an obsolete glibc + # fix that prevents us from modifying the soname of libjvm.so. If we move + # to source builds this should be possible. + non_bindable_shared_objects = ["libjvm.so"] |