From 43f3a35150f162ec36de96022c2d4c58388ea01b Mon Sep 17 00:00:00 2001
From: Harmen Stoppels <me@harmenstoppels.nl>
Date: Sat, 27 Apr 2024 16:49:20 +0200
Subject: gcc: generate spec file and fix external libc default paths after
 install from cache (#43839)

Co-authored-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
---
 lib/spack/spack/compiler.py                       | 18 +------
 lib/spack/spack/detection/path.py                 | 19 ++-----
 lib/spack/spack/installer.py                      |  3 ++
 lib/spack/spack/test/util/libc.py                 | 26 ++++++++++
 lib/spack/spack/util/elf.py                       | 10 ++++
 lib/spack/spack/util/libc.py                      | 59 ++++++++++++++++++++++
 var/spack/repos/builtin/packages/gcc/package.py   | 61 +++++++++++++++++++++++
 var/spack/repos/builtin/packages/glibc/package.py |  4 ++
 var/spack/repos/builtin/packages/musl/package.py  |  4 ++
 9 files changed, 172 insertions(+), 32 deletions(-)
 create mode 100644 lib/spack/spack/test/util/libc.py

diff --git a/lib/spack/spack/compiler.py b/lib/spack/spack/compiler.py
index 4bd15a3219..251ce682c8 100644
--- a/lib/spack/spack/compiler.py
+++ b/lib/spack/spack/compiler.py
@@ -8,7 +8,6 @@ import itertools
 import os
 import platform
 import re
-import shlex
 import shutil
 import sys
 import tempfile
@@ -182,21 +181,6 @@ def _parse_non_system_link_dirs(string: str) -> List[str]:
     return list(p for p in link_dirs if not in_system_subdirectory(p))
 
 
-def _parse_dynamic_linker(output: str):
-    """Parse -dynamic-linker /path/to/ld.so from compiler output"""
-    for line in reversed(output.splitlines()):
-        if "-dynamic-linker" not in line:
-            continue
-        args = shlex.split(line)
-
-        for idx in reversed(range(1, len(args))):
-            arg = args[idx]
-            if arg == "-dynamic-linker" or args == "--dynamic-linker":
-                return args[idx + 1]
-            elif arg.startswith("--dynamic-linker=") or arg.startswith("-dynamic-linker="):
-                return arg.split("=", 1)[1]
-
-
 def in_system_subdirectory(path):
     system_dirs = [
         "/lib/",
@@ -452,7 +436,7 @@ class Compiler:
         if not output:
             return None
 
-        dynamic_linker = _parse_dynamic_linker(output)
+        dynamic_linker = spack.util.libc.parse_dynamic_linker(output)
 
         if not dynamic_linker:
             return None
diff --git a/lib/spack/spack/detection/path.py b/lib/spack/spack/detection/path.py
index 514d1c13e2..711e17467e 100644
--- a/lib/spack/spack/detection/path.py
+++ b/lib/spack/spack/detection/path.py
@@ -83,26 +83,15 @@ def executables_in_path(path_hints: List[str]) -> Dict[str, str]:
     return path_to_dict(search_paths)
 
 
-def get_elf_compat(path):
-    """For ELF files, get a triplet (EI_CLASS, EI_DATA, e_machine) and see if
-    it is host-compatible."""
-    # On ELF platforms supporting, we try to be a bit smarter when it comes to shared
-    # libraries, by dropping those that are not host compatible.
-    with open(path, "rb") as f:
-        elf = elf_utils.parse_elf(f, only_header=True)
-        return (elf.is_64_bit, elf.is_little_endian, elf.elf_hdr.e_machine)
-
-
 def accept_elf(path, host_compat):
-    """Accept an ELF file if the header matches the given compat triplet,
-    obtained with :py:func:`get_elf_compat`. In case it's not an ELF (e.g.
-    static library, or some arbitrary file, fall back to is_readable_file)."""
+    """Accept an ELF file if the header matches the given compat triplet. In case it's not an ELF
+    (e.g. static library, or some arbitrary file, fall back to is_readable_file)."""
     # Fast path: assume libraries at least have .so in their basename.
     # Note: don't replace with splitext, because of libsmth.so.1.2.3 file names.
     if ".so" not in os.path.basename(path):
         return llnl.util.filesystem.is_readable_file(path)
     try:
-        return host_compat == get_elf_compat(path)
+        return host_compat == elf_utils.get_elf_compat(path)
     except (OSError, elf_utils.ElfParsingError):
         return llnl.util.filesystem.is_readable_file(path)
 
@@ -155,7 +144,7 @@ def libraries_in_ld_and_system_library_path(
     search_paths = list(llnl.util.lang.dedupe(search_paths, key=file_identifier))
 
     try:
-        host_compat = get_elf_compat(sys.executable)
+        host_compat = elf_utils.get_elf_compat(sys.executable)
         accept = lambda path: accept_elf(path, host_compat)
     except (OSError, elf_utils.ElfParsingError):
         accept = llnl.util.filesystem.is_readable_file
diff --git a/lib/spack/spack/installer.py b/lib/spack/spack/installer.py
index ee67c07fd3..1f33a7c6b0 100644
--- a/lib/spack/spack/installer.py
+++ b/lib/spack/spack/installer.py
@@ -489,6 +489,9 @@ def _process_binary_cache_tarball(
     with timer.measure("install"), spack.util.path.filter_padding():
         binary_distribution.extract_tarball(pkg.spec, download_result, force=False, timer=timer)
 
+        if hasattr(pkg, "_post_buildcache_install_hook"):
+            pkg._post_buildcache_install_hook()
+
         pkg.installed_from_binary_cache = True
         spack.store.STORE.db.add(pkg.spec, spack.store.STORE.layout, explicit=explicit)
         return True
diff --git a/lib/spack/spack/test/util/libc.py b/lib/spack/spack/test/util/libc.py
new file mode 100644
index 0000000000..f0ccc27a51
--- /dev/null
+++ b/lib/spack/spack/test/util/libc.py
@@ -0,0 +1,26 @@
+# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import pytest
+
+from spack.util import libc
+
+
+@pytest.mark.parametrize(
+    "libc_prefix,startfile_prefix,expected",
+    [
+        # Ubuntu
+        ("/usr", "/usr/lib/x86_64-linux-gnu", "/usr/include/x86_64-linux-gnu"),
+        ("/usr", "/usr/lib/x86_64-linux-musl", "/usr/include/x86_64-linux-musl"),
+        ("/usr", "/usr/lib/aarch64-linux-gnu", "/usr/include/aarch64-linux-gnu"),
+        ("/usr", "/usr/lib/aarch64-linux-musl", "/usr/include/aarch64-linux-musl"),
+        # rhel-like
+        ("/usr", "/usr/lib64", "/usr/include"),
+        ("/usr", "/usr/lib", "/usr/include"),
+    ],
+)
+@pytest.mark.not_on_windows("The unit test deals with unix-like paths")
+def test_header_dir_computation(libc_prefix, startfile_prefix, expected):
+    """Tests that we compute the correct header directory from the prefix of the libc startfiles"""
+    assert libc.libc_include_dir_from_startfile_prefix(libc_prefix, startfile_prefix) == expected
diff --git a/lib/spack/spack/util/elf.py b/lib/spack/spack/util/elf.py
index 64577bf8fb..f0fda07787 100644
--- a/lib/spack/spack/util/elf.py
+++ b/lib/spack/spack/util/elf.py
@@ -655,6 +655,16 @@ def pt_interp(path: str) -> Optional[str]:
     return elf.pt_interp_str.decode("utf-8")
 
 
+def get_elf_compat(path):
+    """Get a triplet (EI_CLASS, EI_DATA, e_machine) from an ELF file, which can be used to see if
+    two ELF files are compatible."""
+    # On ELF platforms supporting, we try to be a bit smarter when it comes to shared
+    # libraries, by dropping those that are not host compatible.
+    with open(path, "rb") as f:
+        elf = parse_elf(f, only_header=True)
+        return (elf.is_64_bit, elf.is_little_endian, elf.elf_hdr.e_machine)
+
+
 class ElfCStringUpdatesFailed(Exception):
     def __init__(
         self, rpath: Optional[UpdateCStringAction], pt_interp: Optional[UpdateCStringAction]
diff --git a/lib/spack/spack/util/libc.py b/lib/spack/spack/util/libc.py
index df0101bd46..a0bdfdd76f 100644
--- a/lib/spack/spack/util/libc.py
+++ b/lib/spack/spack/util/libc.py
@@ -4,7 +4,9 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
 import os
+import os.path
 import re
+import shlex
 import sys
 from subprocess import PIPE, run
 from typing import Optional
@@ -115,3 +117,60 @@ def libc_from_current_python_process() -> Optional["spack.spec.Spec"]:
         return None
 
     return libc_from_dynamic_linker(dynamic_linker)
+
+
+def startfile_prefix(prefix: str, compatible_with: str = sys.executable) -> Optional[str]:
+    # Search for crt1.o at max depth 2 compatible with the ELF file provided in compatible_with.
+    # This is useful for finding external libc startfiles on a multiarch system.
+    try:
+        compat = spack.util.elf.get_elf_compat(compatible_with)
+        accept = lambda path: spack.util.elf.get_elf_compat(path) == compat
+    except Exception:
+        accept = lambda path: True
+
+    queue = [(0, prefix)]
+    while queue:
+        depth, path = queue.pop()
+        try:
+            iterator = os.scandir(path)
+        except OSError:
+            continue
+        with iterator:
+            for entry in iterator:
+                try:
+                    if entry.is_dir(follow_symlinks=True):
+                        if depth < 2:
+                            queue.append((depth + 1, entry.path))
+                    elif entry.name == "crt1.o" and accept(entry.path):
+                        return path
+                except Exception:
+                    continue
+    return None
+
+
+def parse_dynamic_linker(output: str):
+    """Parse -dynamic-linker /path/to/ld.so from compiler output"""
+    for line in reversed(output.splitlines()):
+        if "-dynamic-linker" not in line:
+            continue
+        args = shlex.split(line)
+
+        for idx in reversed(range(1, len(args))):
+            arg = args[idx]
+            if arg == "-dynamic-linker" or args == "--dynamic-linker":
+                return args[idx + 1]
+            elif arg.startswith("--dynamic-linker=") or arg.startswith("-dynamic-linker="):
+                return arg.split("=", 1)[1]
+
+
+def libc_include_dir_from_startfile_prefix(
+    libc_prefix: str, startfile_prefix: str
+) -> Optional[str]:
+    """Heuristic to determine the glibc include directory from the startfile prefix. Replaces
+    $libc_prefix/lib*/<multiarch> with $libc_prefix/include/<multiarch>. This function does not
+    check if the include directory actually exists or is correct."""
+    parts = os.path.relpath(startfile_prefix, libc_prefix).split(os.path.sep)
+    if parts[0] not in ("lib", "lib64", "libx32", "lib32"):
+        return None
+    parts[0] = "include"
+    return os.path.join(libc_prefix, *parts)
diff --git a/var/spack/repos/builtin/packages/gcc/package.py b/var/spack/repos/builtin/packages/gcc/package.py
index b70ff7586d..6d94e2785d 100644
--- a/var/spack/repos/builtin/packages/gcc/package.py
+++ b/var/spack/repos/builtin/packages/gcc/package.py
@@ -16,6 +16,7 @@ from llnl.util.symlink import readlink
 
 import spack.platforms
 import spack.util.executable
+import spack.util.libc
 from spack.operating_systems.mac_os import macos_sdk_path, macos_version
 from spack.package import *
 
@@ -1152,3 +1153,63 @@ class Gcc(AutotoolsPackage, GNUMirrorPackage):
             )
         # The version of gcc-runtime is the same as the %gcc used to "compile" it
         pkg("gcc-runtime").requires(f"@={str(spec.version)}", when=f"%{str(spec)}")
+
+    def _post_buildcache_install_hook(self):
+        if not self.spec.satisfies("platform=linux"):
+            return
+
+        # Setting up the runtime environment shouldn't be necessary here.
+        relocation_args = []
+        gcc = self.spec["gcc"].command
+        specs_file = os.path.join(self.spec_dir, "specs")
+        dryrun = gcc("test.c", "-###", output=os.devnull, error=str).strip()
+        if not dryrun:
+            tty.warn(f"Cannot relocate {specs_file}, compiler might not be working properly")
+            return
+        dynamic_linker = spack.util.libc.parse_dynamic_linker(dryrun)
+        if not dynamic_linker:
+            tty.warn(f"Cannot relocate {specs_file}, compiler might not be working properly")
+            return
+
+        libc = spack.util.libc.libc_from_dynamic_linker(dynamic_linker)
+
+        # We search for crt1.o ourselves because `gcc -print-prile-name=crt1.o` can give a rather
+        # convoluted relative path from a different prefix.
+        startfile_prefix = spack.util.libc.startfile_prefix(libc.external_path, dynamic_linker)
+
+        gcc_can_locate = lambda p: os.path.isabs(
+            gcc(f"-print-file-name={p}", output=str, error=os.devnull).strip()
+        )
+
+        if not gcc_can_locate("crt1.o"):
+            relocation_args.append(f"-B{startfile_prefix}")
+
+        # libc headers may also be in a multiarch subdir.
+        header_dir = spack.util.libc.libc_include_dir_from_startfile_prefix(
+            libc.external_path, startfile_prefix
+        )
+        if header_dir and all(
+            os.path.exists(os.path.join(header_dir, h))
+            for h in libc.package_class.representative_headers
+        ):
+            relocation_args.append(f"-isystem {header_dir}")
+        else:
+            tty.warn(
+                f"Cannot relocate {specs_file} include directories, "
+                f"compiler might not be working properly"
+            )
+
+        # Delete current spec files.
+        try:
+            os.unlink(specs_file)
+        except OSError:
+            pass
+
+        # Write a new one and append flags for libc
+        self.write_specs_file()
+
+        if relocation_args:
+            with open(specs_file, "a") as f:
+                print("*self_spec:", file=f)
+                print(f"+ {' '.join(relocation_args)}", file=f)
+                print(file=f)
diff --git a/var/spack/repos/builtin/packages/glibc/package.py b/var/spack/repos/builtin/packages/glibc/package.py
index 16315c4bbc..f09f455ab6 100644
--- a/var/spack/repos/builtin/packages/glibc/package.py
+++ b/var/spack/repos/builtin/packages/glibc/package.py
@@ -22,6 +22,10 @@ class Glibc(AutotoolsPackage, GNUMirrorPackage):
     build_directory = "build"
     tags = ["runtime"]
 
+    # This is used when the package is external and we need to find the actual default include path
+    # which may be in a multiarch subdir.
+    representative_headers = ["ieee754.h"]
+
     license("LGPL-2.1-or-later")
 
     provides("libc")
diff --git a/var/spack/repos/builtin/packages/musl/package.py b/var/spack/repos/builtin/packages/musl/package.py
index 4b503feb64..622bd004e8 100644
--- a/var/spack/repos/builtin/packages/musl/package.py
+++ b/var/spack/repos/builtin/packages/musl/package.py
@@ -29,6 +29,10 @@ class Musl(MakefilePackage):
 
     license("MIT")
 
+    # This is used when the package is external and we need to find the actual default include path
+    # which may be in a multiarch subdir.
+    representative_headers = ["iso646.h"]
+
     provides("libc")
 
     version("1.2.4", sha256="7a35eae33d5372a7c0da1188de798726f68825513b7ae3ebe97aaaa52114f039")
-- 
cgit v1.2.3-70-g09d2