diff options
author | Harmen Stoppels <me@harmenstoppels.nl> | 2024-01-10 13:21:15 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-10 13:21:15 +0100 |
commit | 8c1226ece898b3943413804a66cfeb8a23a488de (patch) | |
tree | 690e8a083b71474d13d0e97b7420d507425e2d61 | |
parent | feebd35f910c2925bdafe8f5460b67bc2db9cdcd (diff) | |
download | spack-8c1226ece898b3943413804a66cfeb8a23a488de.tar.gz spack-8c1226ece898b3943413804a66cfeb8a23a488de.tar.bz2 spack-8c1226ece898b3943413804a66cfeb8a23a488de.tar.xz spack-8c1226ece898b3943413804a66cfeb8a23a488de.zip |
binary_distribution.py: list parent dirs in binary tarball (#41773)
* Bump the build cache layout version from 1 to 2
* Version to lists parent directories of the prefix in the tarball too, which is required from some container runtimes
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 79 | ||||
-rw-r--r-- | lib/spack/spack/test/bindist.py | 83 |
2 files changed, 116 insertions, 46 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index b741e72501..751ec1ef7f 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -68,7 +68,10 @@ from spack.util.executable import which BUILD_CACHE_RELATIVE_PATH = "build_cache" BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp" -CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1 + +#: The build cache layout version that this version of Spack creates. +#: Version 2: includes parent directories of the package prefix in the tarball +CURRENT_BUILD_CACHE_LAYOUT_VERSION = 2 class BuildCacheDatabase(spack_db.Database): @@ -1174,6 +1177,16 @@ def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None: except OSError: files_to_skip = [] + # First add all directories leading up to `prefix` (Spack <= 0.21 did not do this, leading to + # issues when tarballs are used in runtimes like AWS lambda). Skip the file system root. + parent_dirs = reversed(pathlib.Path(prefix).parents) + next(parent_dirs) # skip the root: slices are supported from python 3.10 + for parent_dir in parent_dirs: + dir_info = tarfile.TarInfo(_tarinfo_name(str(parent_dir))) + dir_info.type = tarfile.DIRTYPE + dir_info.mode = 0o755 + tar.addfile(dir_info) + dir_stack = [prefix] while dir_stack: dir = dir_stack.pop() @@ -2047,11 +2060,12 @@ def _extract_inner_tarball(spec, filename, extract_to, signature_required: bool, def _tar_strip_component(tar: tarfile.TarFile, prefix: str): - """Strip the top-level directory `prefix` from the member names in a tarfile.""" + """Yield all members of tarfile that start with given prefix, and strip that prefix (including + symlinks)""" # Including trailing /, otherwise we end up with absolute paths. regex = re.compile(re.escape(prefix) + "/*") - # Remove the top-level directory from the member (link)names. + # Only yield members in the package prefix. # Note: when a tarfile is created, relative in-prefix symlinks are # expanded to matching member names of tarfile entries. So, we have # to ensure that those are updated too. @@ -2059,12 +2073,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str): # them. for m in tar.getmembers(): result = regex.match(m.name) - assert result is not None + if not result: + continue m.name = m.name[result.end() :] if m.linkname: result = regex.match(m.linkname) if result: m.linkname = m.linkname[result.end() :] + yield m def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): @@ -2110,7 +2126,7 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): _delete_staged_downloads(download_result) shutil.rmtree(tmpdir) raise e - elif layout_version == 1: + elif 1 <= layout_version <= 2: # Newer buildcache layout: the .spack file contains just # in the install tree, the signature, if it exists, is # wrapped around the spec.json at the root. If sig verify @@ -2138,8 +2154,10 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): try: with closing(tarfile.open(tarfile_path, "r")) as tar: # Remove install prefix from tarfil to extract directly into spec.prefix - _tar_strip_component(tar, prefix=_ensure_common_prefix(tar)) - tar.extractall(path=spec.prefix) + tar.extractall( + path=spec.prefix, + members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)), + ) except Exception: shutil.rmtree(spec.prefix, ignore_errors=True) _delete_staged_downloads(download_result) @@ -2174,20 +2192,47 @@ def extract_tarball(spec, download_result, force=False, timer=timer.NULL_TIMER): def _ensure_common_prefix(tar: tarfile.TarFile) -> str: - # Get the shortest length directory. - common_prefix = min((e.name for e in tar.getmembers() if e.isdir()), key=len, default=None) + # Find the lowest `binary_distribution` file (hard-coded forward slash is on purpose). + binary_distribution = min( + ( + e.name + for e in tar.getmembers() + if e.isfile() and e.name.endswith(".spack/binary_distribution") + ), + key=len, + default=None, + ) + + if binary_distribution is None: + raise ValueError("Tarball is not a Spack package, missing binary_distribution file") + + pkg_path = pathlib.PurePosixPath(binary_distribution).parent.parent + + # Even the most ancient Spack version has required to list the dir of the package itself, so + # guard against broken tarballs where `path.parent.parent` is empty. + if pkg_path == pathlib.PurePosixPath(): + raise ValueError("Invalid tarball, missing package prefix dir") - if common_prefix is None: - raise ValueError("Tarball does not contain a common prefix") + pkg_prefix = str(pkg_path) - # Validate that each file starts with the prefix + # Ensure all tar entries are in the pkg_prefix dir, and if they're not, they should be parent + # dirs of it. + has_prefix = False for member in tar.getmembers(): - if not member.name.startswith(common_prefix): - raise ValueError( - f"Tarball contains file {member.name} outside of prefix {common_prefix}" - ) + stripped = member.name.rstrip("/") + if not ( + stripped.startswith(pkg_prefix) or member.isdir() and pkg_prefix.startswith(stripped) + ): + raise ValueError(f"Tarball contains file {stripped} outside of prefix {pkg_prefix}") + if member.isdir() and stripped == pkg_prefix: + has_prefix = True + + # This is technically not required, but let's be defensive about the existence of the package + # prefix dir. + if not has_prefix: + raise ValueError(f"Tarball does not contain a common prefix {pkg_prefix}") - return common_prefix + return pkg_prefix def install_root_node(spec, unsigned=False, force=False, sha256=None): diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 5544c521d1..0f8b0d58ce 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -14,7 +14,7 @@ import tarfile import urllib.error import urllib.request import urllib.response -from pathlib import PurePath +from pathlib import Path, PurePath import py import pytest @@ -201,6 +201,9 @@ def dummy_prefix(tmpdir): with open(data, "w") as f: f.write("hello world") + with open(p.join(".spack", "binary_distribution"), "w") as f: + f.write("{}") + os.symlink("app", relative_app_link) os.symlink(app, absolute_app_link) @@ -887,24 +890,29 @@ def test_default_index_json_404(): fetcher.conditional_fetch() -def test_tarball_doesnt_include_buildinfo_twice(tmpdir): +def _all_parents(prefix): + parts = [p for p in prefix.split("/")] + return ["/".join(parts[: i + 1]) for i in range(len(parts))] + + +def test_tarball_doesnt_include_buildinfo_twice(tmp_path: Path): """When tarballing a package that was installed from a buildcache, make sure that the buildinfo file is not included twice in the tarball.""" - p = tmpdir.mkdir("prefix") - p.mkdir(".spack") + p = tmp_path / "prefix" + p.joinpath(".spack").mkdir(parents=True) # Create a binary_distribution file in the .spack folder - with open(p.join(".spack", "binary_distribution"), "w") as f: + with open(p / ".spack" / "binary_distribution", "w") as f: f.write(syaml.dump({"metadata", "old"})) # Now create a tarball, which should include a new binary_distribution file - tarball = str(tmpdir.join("prefix.tar.gz")) + tarball = str(tmp_path / "prefix.tar.gz") bindist._do_create_tarball( - tarfile_path=tarball, binaries_dir=p.strpath, buildinfo={"metadata": "new"} + tarfile_path=tarball, binaries_dir=str(p), buildinfo={"metadata": "new"} ) - expected_prefix = p.strpath.lstrip("/") + expected_prefix = str(p).lstrip("/") # Verify we don't have a repeated binary_distribution file, # and that the tarball contains the new one, not the old one. @@ -913,21 +921,20 @@ def test_tarball_doesnt_include_buildinfo_twice(tmpdir): "metadata": "new" } assert tar.getnames() == [ - f"{expected_prefix}", + *_all_parents(expected_prefix), f"{expected_prefix}/.spack", f"{expected_prefix}/.spack/binary_distribution", ] -def test_reproducible_tarball_is_reproducible(tmpdir): - p = tmpdir.mkdir("prefix") - p.mkdir("bin") - p.mkdir(".spack") - - app = p.join("bin", "app") +def test_reproducible_tarball_is_reproducible(tmp_path: Path): + p = tmp_path / "prefix" + p.joinpath("bin").mkdir(parents=True) + p.joinpath(".spack").mkdir(parents=True) + app = p / "bin" / "app" - tarball_1 = str(tmpdir.join("prefix-1.tar.gz")) - tarball_2 = str(tmpdir.join("prefix-2.tar.gz")) + tarball_1 = str(tmp_path / "prefix-1.tar.gz") + tarball_2 = str(tmp_path / "prefix-2.tar.gz") with open(app, "w") as f: f.write("hello world") @@ -936,16 +943,16 @@ def test_reproducible_tarball_is_reproducible(tmpdir): # Create a tarball with a certain mtime of bin/app os.utime(app, times=(0, 0)) - bindist._do_create_tarball(tarball_1, binaries_dir=p.strpath, buildinfo=buildinfo) + bindist._do_create_tarball(tarball_1, binaries_dir=str(p), buildinfo=buildinfo) # Do it another time with different mtime of bin/app os.utime(app, times=(10, 10)) - bindist._do_create_tarball(tarball_2, binaries_dir=p.strpath, buildinfo=buildinfo) + bindist._do_create_tarball(tarball_2, binaries_dir=str(p), buildinfo=buildinfo) # They should be bitwise identical: assert filecmp.cmp(tarball_1, tarball_2, shallow=False) - expected_prefix = p.strpath.lstrip("/") + expected_prefix = str(p).lstrip("/") # Sanity check for contents: with tarfile.open(tarball_1, mode="r") as f: @@ -954,7 +961,7 @@ def test_reproducible_tarball_is_reproducible(tmpdir): assert m.uname == m.gname == "" assert set(f.getnames()) == { - f"{expected_prefix}", + *_all_parents(expected_prefix), f"{expected_prefix}/bin", f"{expected_prefix}/bin/app", f"{expected_prefix}/.spack", @@ -1002,8 +1009,10 @@ def test_tarball_normalized_permissions(tmpdir): def test_tarball_common_prefix(dummy_prefix, tmpdir): - """Tests whether Spack can figure out the package directory - from the tarball contents, and strip them when extracting.""" + """Tests whether Spack can figure out the package directory from the tarball contents, and + strip them when extracting. This test creates a CURRENT_BUILD_CACHE_LAYOUT_VERSION=1 type + tarball where the parent directories of the package prefix are missing. Spack should be able + to figure out the common prefix and extract the files into the correct location.""" # When creating a tarball, Python (and tar) use relative paths, # Absolute paths become relative to `/`, so drop the leading `/`. @@ -1020,11 +1029,10 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir): common_prefix = bindist._ensure_common_prefix(tar) assert common_prefix == expected_prefix - # Strip the prefix from the tar entries - bindist._tar_strip_component(tar, common_prefix) - # Extract into prefix2 - tar.extractall(path="prefix2") + tar.extractall( + path="prefix2", members=bindist._tar_strip_component(tar, common_prefix) + ) # Verify files are all there at the correct level. assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"} @@ -1044,13 +1052,30 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir): ) +def test_tarfile_missing_binary_distribution_file(tmpdir): + """A tarfile that does not contain a .spack/binary_distribution file cannot be + used to install.""" + with tmpdir.as_cwd(): + # An empty .spack dir. + with tarfile.open("empty.tar", mode="w") as tar: + tarinfo = tarfile.TarInfo(name="example/.spack") + tarinfo.type = tarfile.DIRTYPE + tar.addfile(tarinfo) + + with pytest.raises(ValueError, match="missing binary_distribution file"): + bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r")) + + def test_tarfile_without_common_directory_prefix_fails(tmpdir): """A tarfile that only contains files without a common package directory should fail to extract, as we won't know where to put the files.""" with tmpdir.as_cwd(): # Create a broken tarball with just a file, no directories. with tarfile.open("empty.tar", mode="w") as tar: - tar.addfile(tarfile.TarInfo(name="example/file"), fileobj=io.BytesIO(b"hello")) + tar.addfile( + tarfile.TarInfo(name="example/.spack/binary_distribution"), + fileobj=io.BytesIO(b"hello"), + ) with pytest.raises(ValueError, match="Tarball does not contain a common prefix"): bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r")) @@ -1091,7 +1116,7 @@ def test_tarfile_of_spec_prefix(tmpdir): # Verify that entries are added in depth-first pre-order, files preceding dirs, # entries ordered alphabetically assert tar.getnames() == [ - f"{expected_prefix}", + *_all_parents(expected_prefix), f"{expected_prefix}/file", f"{expected_prefix}/hardlink", f"{expected_prefix}/symlink", |