From 03c0d7413907fc46b82bbc77dd9bd2276b5e070e Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Wed, 2 Aug 2023 17:06:13 +0200 Subject: buildcache extractall: extract directly into spec.prefix (#37441) - Run `mkdirp` on `spec.prefix` - Extract directly into `spec.prefix` 1. No need for `$store/tmp.xxx` where we extract the tarball directly, pray that it has one subdir `--`, and then `rm -rf` the package prefix followed by `mv`. 2. No need to clean up this temp dir in `spack clean`. 3. Instead figure out package directory prefix from the tarball contents, and strip the tarinfo entries accordingly (kinda like tar --strip-components but more strict) - Set package dir permissions - Don't error during error handling when files cannot removed - No need to "enrich" spec.json with this tarball-toplevel-path After this PR, we can in fact tarball packages relative to `/` instead of `spec.prefix/..`, which makes it possible to use Spack tarballs as container layers, where relocation is impossible, and rootfs tarballs are expected. --- lib/spack/docs/signing.rst | 8 +-- lib/spack/spack/binary_distribution.py | 96 +++++++++++++++++++------------ lib/spack/spack/cmd/clean.py | 6 +- lib/spack/spack/schema/buildcache_spec.py | 13 +---- lib/spack/spack/test/bindist.py | 96 +++++++++++++++++++++++++++++++ 5 files changed, 159 insertions(+), 60 deletions(-) (limited to 'lib') diff --git a/lib/spack/docs/signing.rst b/lib/spack/docs/signing.rst index f88b87f07e..dc44eaec1d 100644 --- a/lib/spack/docs/signing.rst +++ b/lib/spack/docs/signing.rst @@ -217,13 +217,7 @@ file would live in the ``build_cache`` directory of a binary mirror:: "binary_cache_checksum": { "hash_algorithm": "sha256", "hash": "4f1e46452c35a5e61bcacca205bae1bfcd60a83a399af201a29c95b7cc3e1423" - }, - - "buildinfo": { - "relative_prefix": - "linux-ubuntu18.04-haswell/gcc-7.5.0/zlib-1.2.12-llv2ysfdxnppzjrt5ldybb5c52qbmoow", - "relative_rpaths": false - } + } } -----BEGIN PGP SIGNATURE----- diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index b0c72b9740..339fafb2ea 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -52,6 +52,7 @@ import spack.util.spack_yaml as syaml import spack.util.url as url_util import spack.util.web as web_util from spack.caches import misc_cache_location +from spack.package_prefs import get_package_dir_permissions, get_package_group from spack.relocate_text import utf8_paths_to_single_binary_regex from spack.spec import Spec from spack.stage import Stage @@ -1312,15 +1313,7 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option else: raise ValueError("{0} not a valid spec file type".format(spec_file)) spec_dict["buildcache_layout_version"] = 1 - bchecksum = {} - bchecksum["hash_algorithm"] = "sha256" - bchecksum["hash"] = checksum - spec_dict["binary_cache_checksum"] = bchecksum - # Add original install prefix relative to layout root to spec.json. - # This will be used to determine is the directory layout has changed. - buildinfo = {} - buildinfo["relative_prefix"] = os.path.relpath(spec.prefix, spack.store.STORE.layout.root) - spec_dict["buildinfo"] = buildinfo + spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum} with open(specfile_path, "w") as outfile: # Note: when using gpg clear sign, we need to avoid long lines (19995 chars). @@ -1799,6 +1792,27 @@ def _extract_inner_tarball(spec, filename, extract_to, unsigned, remote_checksum return tarfile_path +def _tar_strip_component(tar: tarfile.TarFile, prefix: str): + """Strip the top-level directory `prefix` from the member names in a tarfile.""" + # Including trailing /, otherwise we end up with absolute paths. + regex = re.compile(re.escape(prefix) + "/*") + + # Remove the top-level directory from the member (link)names. + # Note: when a tarfile is created, relative in-prefix symlinks are + # expanded to matching member names of tarfile entries. So, we have + # to ensure that those are updated too. + # Absolute symlinks are copied verbatim -- relocation should take care of + # them. + for m in tar.getmembers(): + result = regex.match(m.name) + assert result is not None + m.name = m.name[result.end() :] + if m.linkname: + result = regex.match(m.linkname) + if result: + m.linkname = m.linkname[result.end() :] + + def extract_tarball(spec, download_result, unsigned=False, force=False): """ extract binary tarball for given package into install area @@ -1809,6 +1823,14 @@ def extract_tarball(spec, download_result, unsigned=False, force=False): else: raise NoOverwriteException(str(spec.prefix)) + # Create the install prefix + fsys.mkdirp( + spec.prefix, + mode=get_package_dir_permissions(spec), + group=get_package_group(spec), + default_perms="parents", + ) + specfile_path = download_result["specfile_stage"].save_filename with open(specfile_path, "r") as inputfile: @@ -1862,42 +1884,23 @@ def extract_tarball(spec, download_result, unsigned=False, force=False): tarfile_path, size, contents, "sha256", expected, local_checksum ) - new_relative_prefix = str(os.path.relpath(spec.prefix, spack.store.STORE.layout.root)) - # if the original relative prefix is in the spec file use it - buildinfo = spec_dict.get("buildinfo", {}) - old_relative_prefix = buildinfo.get("relative_prefix", new_relative_prefix) - rel = buildinfo.get("relative_rpaths") - info = "old relative prefix %s\nnew relative prefix %s\nrelative rpaths %s" - tty.debug(info % (old_relative_prefix, new_relative_prefix, rel), level=2) - - # Extract the tarball into the store root, presumably on the same filesystem. - # The directory created is the base directory name of the old prefix. - # Moving the old prefix name to the new prefix location should preserve - # hard links and symbolic links. - extract_tmp = os.path.join(spack.store.STORE.layout.root, ".tmp") - mkdirp(extract_tmp) - extracted_dir = os.path.join(extract_tmp, old_relative_prefix.split(os.path.sep)[-1]) - - with closing(tarfile.open(tarfile_path, "r")) as tar: - try: - tar.extractall(path=extract_tmp) - except Exception as e: - _delete_staged_downloads(download_result) - shutil.rmtree(extracted_dir) - raise e try: - shutil.move(extracted_dir, spec.prefix) - except Exception as e: + with closing(tarfile.open(tarfile_path, "r")) as tar: + # Remove install prefix from tarfil to extract directly into spec.prefix + _tar_strip_component(tar, prefix=_ensure_common_prefix(tar)) + tar.extractall(path=spec.prefix) + except Exception: + shutil.rmtree(spec.prefix, ignore_errors=True) _delete_staged_downloads(download_result) - shutil.rmtree(extracted_dir) - raise e + raise + os.remove(tarfile_path) os.remove(specfile_path) try: relocate_package(spec) except Exception as e: - shutil.rmtree(spec.prefix) + shutil.rmtree(spec.prefix, ignore_errors=True) raise e else: manifest_file = os.path.join( @@ -1910,12 +1913,29 @@ def extract_tarball(spec, download_result, unsigned=False, force=False): tty.warn("No manifest file in tarball for spec %s" % spec_id) finally: if tmpdir: - shutil.rmtree(tmpdir) + shutil.rmtree(tmpdir, ignore_errors=True) if os.path.exists(filename): os.remove(filename) _delete_staged_downloads(download_result) +def _ensure_common_prefix(tar: tarfile.TarFile) -> str: + # Get the shortest length directory. + common_prefix = min((e.name for e in tar.getmembers() if e.isdir()), key=len, default=None) + + if common_prefix is None: + raise ValueError("Tarball does not contain a common prefix") + + # Validate that each file starts with the prefix + for member in tar.getmembers(): + if not member.name.startswith(common_prefix): + raise ValueError( + f"Tarball contains file {member.name} outside of prefix {common_prefix}" + ) + + return common_prefix + + def install_root_node(spec, unsigned=False, force=False, sha256=None): """Install the root node of a concrete spec from a buildcache. diff --git a/lib/spack/spack/cmd/clean.py b/lib/spack/spack/cmd/clean.py index 44e7f8e49e..93edcd712f 100644 --- a/lib/spack/spack/cmd/clean.py +++ b/lib/spack/spack/cmd/clean.py @@ -114,11 +114,7 @@ def clean(parser, args): if args.stage: tty.msg("Removing all temporary build stages") spack.stage.purge() - # Temp directory where buildcaches are extracted - extract_tmp = os.path.join(spack.store.STORE.layout.root, ".tmp") - if os.path.exists(extract_tmp): - tty.debug("Removing {0}".format(extract_tmp)) - shutil.rmtree(extract_tmp) + if args.downloads: tty.msg("Removing cached downloads") spack.caches.fetch_cache.destroy() diff --git a/lib/spack/spack/schema/buildcache_spec.py b/lib/spack/spack/schema/buildcache_spec.py index eeeb6dc085..7c74c7f4eb 100644 --- a/lib/spack/spack/schema/buildcache_spec.py +++ b/lib/spack/spack/schema/buildcache_spec.py @@ -6,7 +6,7 @@ """Schema for a buildcache spec.yaml file .. literalinclude:: _spack_root/lib/spack/spack/schema/buildcache_spec.py - :lines: 14- + :lines: 13- """ import spack.schema.spec @@ -16,15 +16,8 @@ schema = { "type": "object", "additionalProperties": False, "properties": { - "buildinfo": { - "type": "object", - "additionalProperties": False, - "required": ["relative_prefix"], - "properties": { - "relative_prefix": {"type": "string"}, - "relative_rpaths": {"type": "boolean"}, - }, - }, + # `buildinfo` is no longer needed as of Spack 0.21 + "buildinfo": {"type": "object"}, "spec": { "type": "object", "additionalProperties": True, diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 4ef280469b..b778651455 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -12,6 +12,7 @@ import tarfile import urllib.error import urllib.request import urllib.response +from pathlib import PurePath import py import pytest @@ -177,6 +178,33 @@ def install_dir_non_default_layout(tmpdir): spack.store.STORE.layout = real_layout +@pytest.fixture(scope="function") +def dummy_prefix(tmpdir): + """Dummy prefix used for testing tarball creation, validation, extraction""" + p = tmpdir.mkdir("prefix") + assert os.path.isabs(p) + + p.mkdir("bin") + p.mkdir("share") + p.mkdir(".spack") + + app = p.join("bin", "app") + relative_app_link = p.join("bin", "relative_app_link") + absolute_app_link = p.join("bin", "absolute_app_link") + data = p.join("share", "file") + + with open(app, "w") as f: + f.write("hello world") + + with open(data, "w") as f: + f.write("hello world") + + os.symlink("app", relative_app_link) + os.symlink(app, absolute_app_link) + + return str(p) + + args = ["file"] if sys.platform == "darwin": args.extend(["/usr/bin/clang++", "install_name_tool"]) @@ -966,3 +994,71 @@ def test_tarball_normalized_permissions(tmpdir): # not-executable-by-user files should be 0o644 assert path_to_member["pkg/share/file"].mode == 0o644 + + +def test_tarball_common_prefix(dummy_prefix, tmpdir): + """Tests whether Spack can figure out the package directory + from the tarball contents, and strip them when extracting.""" + + # When creating a tarball, Python (and tar) use relative paths, + # Absolute paths become relative to `/`, so drop the leading `/`. + assert os.path.isabs(dummy_prefix) + expected_prefix = PurePath(dummy_prefix).as_posix().lstrip("/") + + with tmpdir.as_cwd(): + # Create a tarball (using absolute path for prefix dir) + with tarfile.open("example.tar", mode="w") as tar: + tar.add(name=dummy_prefix) + + # Open, verify common prefix, and extract it. + with tarfile.open("example.tar", mode="r") as tar: + common_prefix = bindist._ensure_common_prefix(tar) + assert common_prefix == expected_prefix + + # Strip the prefix from the tar entries + bindist._tar_strip_component(tar, common_prefix) + + # Extract into prefix2 + tar.extractall(path="prefix2") + + # Verify files are all there at the correct level. + assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"} + assert set(os.listdir(os.path.join("prefix2", "bin"))) == { + "app", + "relative_app_link", + "absolute_app_link", + } + assert set(os.listdir(os.path.join("prefix2", "share"))) == {"file"} + + # Relative symlink should still be correct + assert os.readlink(os.path.join("prefix2", "bin", "relative_app_link")) == "app" + + # Absolute symlink should remain absolute -- this is for relocation to fix up. + assert os.readlink(os.path.join("prefix2", "bin", "absolute_app_link")) == os.path.join( + dummy_prefix, "bin", "app" + ) + + +def test_tarfile_without_common_directory_prefix_fails(tmpdir): + """A tarfile that only contains files without a common package directory + should fail to extract, as we won't know where to put the files.""" + with tmpdir.as_cwd(): + # Create a broken tarball with just a file, no directories. + with tarfile.open("empty.tar", mode="w") as tar: + tar.addfile(tarfile.TarInfo(name="example/file"), fileobj=io.BytesIO(b"hello")) + + with pytest.raises(ValueError, match="Tarball does not contain a common prefix"): + bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r")) + + +def test_tarfile_with_files_outside_common_prefix(tmpdir, dummy_prefix): + """If a file is outside of the common prefix, we should fail.""" + with tmpdir.as_cwd(): + with tarfile.open("broken.tar", mode="w") as tar: + tar.add(name=dummy_prefix) + tar.addfile(tarfile.TarInfo(name="/etc/config_file"), fileobj=io.BytesIO(b"hello")) + + with pytest.raises( + ValueError, match="Tarball contains file /etc/config_file outside of prefix" + ): + bindist._ensure_common_prefix(tarfile.open("broken.tar", mode="r")) -- cgit v1.2.3-60-g2f50