diff options
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 147 | ||||
-rw-r--r-- | lib/spack/spack/test/bindist.py | 80 |
2 files changed, 177 insertions, 50 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 9758e8b9ea..38a3333475 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -6,6 +6,8 @@ import codecs import collections import hashlib +import io +import itertools import json import multiprocessing.pool import os @@ -20,7 +22,8 @@ import urllib.error import urllib.parse import urllib.request import warnings -from contextlib import closing +from contextlib import closing, contextmanager +from gzip import GzipFile from urllib.error import HTTPError, URLError import ruamel.yaml as yaml @@ -739,34 +742,31 @@ def get_buildfile_manifest(spec): return data -def write_buildinfo_file(spec, workdir, rel=False): - """ - Create a cache file containing information - required for the relocation - """ - manifest = get_buildfile_manifest(spec) +def prefixes_to_hashes(spec): + return { + str(s.prefix): s.dag_hash() + for s in itertools.chain( + spec.traverse(root=True, deptype="link"), spec.dependencies(deptype="run") + ) + } - prefix_to_hash = dict() - prefix_to_hash[str(spec.package.prefix)] = spec.dag_hash() - deps = spack.build_environment.get_rpath_deps(spec.package) - for d in deps + spec.dependencies(deptype="run"): - prefix_to_hash[str(d.prefix)] = d.dag_hash() - # Create buildinfo data and write it to disk - buildinfo = {} - buildinfo["sbang_install_path"] = spack.hooks.sbang.sbang_install_path() - buildinfo["relative_rpaths"] = rel - buildinfo["buildpath"] = spack.store.layout.root - buildinfo["spackprefix"] = spack.paths.prefix - buildinfo["relative_prefix"] = os.path.relpath(spec.prefix, spack.store.layout.root) - buildinfo["relocate_textfiles"] = manifest["text_to_relocate"] - buildinfo["relocate_binaries"] = manifest["binary_to_relocate"] - buildinfo["relocate_links"] = manifest["link_to_relocate"] - buildinfo["hardlinks_deduped"] = manifest["hardlinks_deduped"] - buildinfo["prefix_to_hash"] = prefix_to_hash - filename = buildinfo_file_name(workdir) - with open(filename, "w") as outfile: - outfile.write(syaml.dump(buildinfo, default_flow_style=True)) +def get_buildinfo_dict(spec, rel=False): + """Create metadata for a tarball""" + manifest = get_buildfile_manifest(spec) + + return { + "sbang_install_path": spack.hooks.sbang.sbang_install_path(), + "relative_rpaths": rel, + "buildpath": spack.store.layout.root, + "spackprefix": spack.paths.prefix, + "relative_prefix": os.path.relpath(spec.prefix, spack.store.layout.root), + "relocate_textfiles": manifest["text_to_relocate"], + "relocate_binaries": manifest["binary_to_relocate"], + "relocate_links": manifest["link_to_relocate"], + "hardlinks_deduped": manifest["hardlinks_deduped"], + "prefix_to_hash": prefixes_to_hashes(spec), + } def tarball_directory_name(spec): @@ -1139,6 +1139,68 @@ def generate_key_index(key_prefix, tmpdir=None): shutil.rmtree(tmpdir) +@contextmanager +def gzip_compressed_tarfile(path): + """Create a reproducible, compressed tarfile""" + # Create gzip compressed tarball of the install prefix + # 1) Use explicit empty filename and mtime 0 for gzip header reproducibility. + # If the filename="" is dropped, Python will use fileobj.name instead. + # This should effectively mimick `gzip --no-name`. + # 2) On AMD Ryzen 3700X and an SSD disk, we have the following on compression speed: + # compresslevel=6 gzip default: llvm takes 4mins, roughly 2.1GB + # compresslevel=9 python default: llvm takes 12mins, roughly 2.1GB + # So we follow gzip. + with open(path, "wb") as fileobj, closing( + GzipFile(filename="", mode="wb", compresslevel=6, mtime=0, fileobj=fileobj) + ) as gzip_file, tarfile.TarFile(name="", mode="w", fileobj=gzip_file) as tar: + yield tar + + +def deterministic_tarinfo(tarinfo: tarfile.TarInfo): + # We only add files, symlinks, hardlinks, and directories + # No character devices, block devices and FIFOs should ever enter a tarball. + if tarinfo.isdev(): + return None + + # For distribution, it makes no sense to user/group data; since (a) they don't exist + # on other machines, and (b) they lead to surprises as `tar x` run as root will change + # ownership if it can. We want to extract as the current user. By setting owner to root, + # root will extract as root, and non-privileged user will extract as themselves. + tarinfo.uid = 0 + tarinfo.gid = 0 + tarinfo.uname = "" + tarinfo.gname = "" + + # Reset mtime to epoch time, our prefixes are not truly immutable, so files may get + # touched; as long as the content does not change, this ensures we get stable tarballs. + tarinfo.mtime = 0 + + # Normalize mode + if tarinfo.isfile() or tarinfo.islnk(): + # If user can execute, use 0o755; else 0o644 + # This is to avoid potentially unsafe world writable & exeutable files that may get + # extracted when Python or tar is run with privileges + tarinfo.mode = 0o644 if tarinfo.mode & 0o100 == 0 else 0o755 + else: # symbolic link and directories + tarinfo.mode = 0o755 + + return tarinfo + + +def tar_add_metadata(tar: tarfile.TarFile, path: str, data: dict): + # Serialize buildinfo for the tarball + bstring = syaml.dump(data, default_flow_style=True).encode("utf-8") + tarinfo = tarfile.TarInfo(name=path) + tarinfo.size = len(bstring) + tar.addfile(deterministic_tarinfo(tarinfo), io.BytesIO(bstring)) + + +def _do_create_tarball(tarfile_path, binaries_dir, pkg_dir, buildinfo): + with gzip_compressed_tarfile(tarfile_path) as tar: + tar.add(name=binaries_dir, arcname=pkg_dir, filter=deterministic_tarinfo) + tar_add_metadata(tar, buildinfo_file_name(pkg_dir), buildinfo) + + def _build_tarball( spec, out_url, @@ -1217,39 +1279,26 @@ def _build_tarball( os.remove(temp_tarfile_path) else: binaries_dir = spec.prefix - mkdirp(os.path.join(workdir, ".spack")) # create info for later relocation and create tar - write_buildinfo_file(spec, workdir, relative) + buildinfo = get_buildinfo_dict(spec, relative) # optionally make the paths in the binaries relative to each other # in the spack install tree before creating tarball try: if relative: - make_package_relative(workdir, spec, allow_root) + make_package_relative(workdir, spec, buildinfo, allow_root) elif not allow_root: - ensure_package_relocatable(workdir, binaries_dir) + ensure_package_relocatable(buildinfo, binaries_dir) except Exception as e: - shutil.rmtree(workdir) - shutil.rmtree(tarfile_dir) shutil.rmtree(tmpdir) tty.die(e) - # create gzip compressed tarball of the install prefix - # On AMD Ryzen 3700X and an SSD disk, we have the following on compression speed: - # compresslevel=6 gzip default: llvm takes 4mins, roughly 2.1GB - # compresslevel=9 python default: llvm takes 12mins, roughly 2.1GB - # So we follow gzip. - with closing(tarfile.open(tarfile_path, "w:gz", compresslevel=6)) as tar: - tar.add(name=binaries_dir, arcname=pkg_dir) - if not relative: - # Add buildinfo file - buildinfo_path = buildinfo_file_name(workdir) - buildinfo_arcname = buildinfo_file_name(pkg_dir) - tar.add(name=buildinfo_path, arcname=buildinfo_arcname) + _do_create_tarball(tarfile_path, binaries_dir, pkg_dir, buildinfo) # remove copy of install directory - shutil.rmtree(workdir) + if relative: + shutil.rmtree(workdir) # get the sha256 checksum of the tarball checksum = checksum_tarball(tarfile_path) @@ -1536,13 +1585,12 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None): return None -def make_package_relative(workdir, spec, allow_root): +def make_package_relative(workdir, spec, buildinfo, allow_root): """ Change paths in binaries to relative paths. Change absolute symlinks to relative symlinks. """ prefix = spec.prefix - buildinfo = read_buildinfo_file(workdir) old_layout_root = buildinfo["buildpath"] orig_path_names = list() cur_path_names = list() @@ -1566,9 +1614,8 @@ def make_package_relative(workdir, spec, allow_root): relocate.make_link_relative(cur_path_names, orig_path_names) -def ensure_package_relocatable(workdir, binaries_dir): +def ensure_package_relocatable(buildinfo, binaries_dir): """Check if package binaries are relocatable.""" - buildinfo = read_buildinfo_file(workdir) binaries = [os.path.join(binaries_dir, f) for f in buildinfo["relocate_binaries"]] relocate.ensure_binaries_are_relocatable(binaries) diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 81f5f7377c..cde16c241e 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -2,11 +2,13 @@ # Spack Project Developers. See the top-level COPYRIGHT file for details. # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import filecmp import glob import io import os import platform import sys +import tarfile import urllib.error import urllib.request import urllib.response @@ -952,3 +954,81 @@ def test_correct_specs_are_pushed( bindist.push([spec], push_url, include_root=root, include_dependencies=deps) assert packages_to_push == expected + + +def test_reproducible_tarball_is_reproducible(tmpdir): + p = tmpdir.mkdir("prefix") + p.mkdir("bin") + p.mkdir(".spack") + + app = p.join("bin", "app") + + tarball_1 = str(tmpdir.join("prefix-1.tar.gz")) + tarball_2 = str(tmpdir.join("prefix-2.tar.gz")) + + with open(app, "w") as f: + f.write("hello world") + + buildinfo = {"metadata": "yes please"} + + # Create a tarball with a certain mtime of bin/app + os.utime(app, times=(0, 0)) + bindist._do_create_tarball(tarball_1, binaries_dir=p, pkg_dir="pkg", buildinfo=buildinfo) + + # Do it another time with different mtime of bin/app + os.utime(app, times=(10, 10)) + bindist._do_create_tarball(tarball_2, binaries_dir=p, pkg_dir="pkg", buildinfo=buildinfo) + + # They should be bitwise identical: + assert filecmp.cmp(tarball_1, tarball_2, shallow=False) + + # Sanity check for contents: + with tarfile.open(tarball_1, mode="r") as f: + for m in f.getmembers(): + assert m.uid == m.gid == m.mtime == 0 + assert m.uname == m.gname == "" + + assert set(f.getnames()) == { + "pkg", + "pkg/bin", + "pkg/bin/app", + "pkg/.spack", + "pkg/.spack/binary_distribution", + } + + +def test_tarball_normalized_permissions(tmpdir): + p = tmpdir.mkdir("prefix") + p.mkdir("bin") + p.mkdir("share") + p.mkdir(".spack") + + app = p.join("bin", "app") + data = p.join("share", "file") + tarball = str(tmpdir.join("prefix.tar.gz")) + + # Everyone can write & execute. This should turn into 0o755 when the tarball is + # extracted (on a different system). + with open(app, "w", opener=lambda path, flags: os.open(path, flags, 0o777)) as f: + f.write("hello world") + + # User doesn't have execute permissions, but group/world have; this should also + # turn into 0o644 (user read/write, group&world only read). + with open(data, "w", opener=lambda path, flags: os.open(path, flags, 0o477)) as f: + f.write("hello world") + + bindist._do_create_tarball(tarball, binaries_dir=p, pkg_dir="pkg", buildinfo={}) + + with tarfile.open(tarball) as tar: + path_to_member = {member.name: member for member in tar.getmembers()} + + # directories should have 0o755 + assert path_to_member["pkg"].mode == 0o755 + assert path_to_member["pkg/bin"].mode == 0o755 + assert path_to_member["pkg/.spack"].mode == 0o755 + + # executable-by-user files should be 0o755 + assert path_to_member["pkg/bin/app"].mode == 0o755 + + # not-executable-by-user files should be 0o644 + assert path_to_member["pkg/share/file"].mode == 0o644 |