summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/spack/spack/binary_distribution.py147
-rw-r--r--lib/spack/spack/test/bindist.py80
2 files changed, 177 insertions, 50 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index 9758e8b9ea..38a3333475 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -6,6 +6,8 @@
import codecs
import collections
import hashlib
+import io
+import itertools
import json
import multiprocessing.pool
import os
@@ -20,7 +22,8 @@ import urllib.error
import urllib.parse
import urllib.request
import warnings
-from contextlib import closing
+from contextlib import closing, contextmanager
+from gzip import GzipFile
from urllib.error import HTTPError, URLError
import ruamel.yaml as yaml
@@ -739,34 +742,31 @@ def get_buildfile_manifest(spec):
return data
-def write_buildinfo_file(spec, workdir, rel=False):
- """
- Create a cache file containing information
- required for the relocation
- """
- manifest = get_buildfile_manifest(spec)
+def prefixes_to_hashes(spec):
+ return {
+ str(s.prefix): s.dag_hash()
+ for s in itertools.chain(
+ spec.traverse(root=True, deptype="link"), spec.dependencies(deptype="run")
+ )
+ }
- prefix_to_hash = dict()
- prefix_to_hash[str(spec.package.prefix)] = spec.dag_hash()
- deps = spack.build_environment.get_rpath_deps(spec.package)
- for d in deps + spec.dependencies(deptype="run"):
- prefix_to_hash[str(d.prefix)] = d.dag_hash()
- # Create buildinfo data and write it to disk
- buildinfo = {}
- buildinfo["sbang_install_path"] = spack.hooks.sbang.sbang_install_path()
- buildinfo["relative_rpaths"] = rel
- buildinfo["buildpath"] = spack.store.layout.root
- buildinfo["spackprefix"] = spack.paths.prefix
- buildinfo["relative_prefix"] = os.path.relpath(spec.prefix, spack.store.layout.root)
- buildinfo["relocate_textfiles"] = manifest["text_to_relocate"]
- buildinfo["relocate_binaries"] = manifest["binary_to_relocate"]
- buildinfo["relocate_links"] = manifest["link_to_relocate"]
- buildinfo["hardlinks_deduped"] = manifest["hardlinks_deduped"]
- buildinfo["prefix_to_hash"] = prefix_to_hash
- filename = buildinfo_file_name(workdir)
- with open(filename, "w") as outfile:
- outfile.write(syaml.dump(buildinfo, default_flow_style=True))
+def get_buildinfo_dict(spec, rel=False):
+ """Create metadata for a tarball"""
+ manifest = get_buildfile_manifest(spec)
+
+ return {
+ "sbang_install_path": spack.hooks.sbang.sbang_install_path(),
+ "relative_rpaths": rel,
+ "buildpath": spack.store.layout.root,
+ "spackprefix": spack.paths.prefix,
+ "relative_prefix": os.path.relpath(spec.prefix, spack.store.layout.root),
+ "relocate_textfiles": manifest["text_to_relocate"],
+ "relocate_binaries": manifest["binary_to_relocate"],
+ "relocate_links": manifest["link_to_relocate"],
+ "hardlinks_deduped": manifest["hardlinks_deduped"],
+ "prefix_to_hash": prefixes_to_hashes(spec),
+ }
def tarball_directory_name(spec):
@@ -1139,6 +1139,68 @@ def generate_key_index(key_prefix, tmpdir=None):
shutil.rmtree(tmpdir)
+@contextmanager
+def gzip_compressed_tarfile(path):
+ """Create a reproducible, compressed tarfile"""
+ # Create gzip compressed tarball of the install prefix
+ # 1) Use explicit empty filename and mtime 0 for gzip header reproducibility.
+ # If the filename="" is dropped, Python will use fileobj.name instead.
+ # This should effectively mimick `gzip --no-name`.
+ # 2) On AMD Ryzen 3700X and an SSD disk, we have the following on compression speed:
+ # compresslevel=6 gzip default: llvm takes 4mins, roughly 2.1GB
+ # compresslevel=9 python default: llvm takes 12mins, roughly 2.1GB
+ # So we follow gzip.
+ with open(path, "wb") as fileobj, closing(
+ GzipFile(filename="", mode="wb", compresslevel=6, mtime=0, fileobj=fileobj)
+ ) as gzip_file, tarfile.TarFile(name="", mode="w", fileobj=gzip_file) as tar:
+ yield tar
+
+
+def deterministic_tarinfo(tarinfo: tarfile.TarInfo):
+ # We only add files, symlinks, hardlinks, and directories
+ # No character devices, block devices and FIFOs should ever enter a tarball.
+ if tarinfo.isdev():
+ return None
+
+ # For distribution, it makes no sense to user/group data; since (a) they don't exist
+ # on other machines, and (b) they lead to surprises as `tar x` run as root will change
+ # ownership if it can. We want to extract as the current user. By setting owner to root,
+ # root will extract as root, and non-privileged user will extract as themselves.
+ tarinfo.uid = 0
+ tarinfo.gid = 0
+ tarinfo.uname = ""
+ tarinfo.gname = ""
+
+ # Reset mtime to epoch time, our prefixes are not truly immutable, so files may get
+ # touched; as long as the content does not change, this ensures we get stable tarballs.
+ tarinfo.mtime = 0
+
+ # Normalize mode
+ if tarinfo.isfile() or tarinfo.islnk():
+ # If user can execute, use 0o755; else 0o644
+ # This is to avoid potentially unsafe world writable & exeutable files that may get
+ # extracted when Python or tar is run with privileges
+ tarinfo.mode = 0o644 if tarinfo.mode & 0o100 == 0 else 0o755
+ else: # symbolic link and directories
+ tarinfo.mode = 0o755
+
+ return tarinfo
+
+
+def tar_add_metadata(tar: tarfile.TarFile, path: str, data: dict):
+ # Serialize buildinfo for the tarball
+ bstring = syaml.dump(data, default_flow_style=True).encode("utf-8")
+ tarinfo = tarfile.TarInfo(name=path)
+ tarinfo.size = len(bstring)
+ tar.addfile(deterministic_tarinfo(tarinfo), io.BytesIO(bstring))
+
+
+def _do_create_tarball(tarfile_path, binaries_dir, pkg_dir, buildinfo):
+ with gzip_compressed_tarfile(tarfile_path) as tar:
+ tar.add(name=binaries_dir, arcname=pkg_dir, filter=deterministic_tarinfo)
+ tar_add_metadata(tar, buildinfo_file_name(pkg_dir), buildinfo)
+
+
def _build_tarball(
spec,
out_url,
@@ -1217,39 +1279,26 @@ def _build_tarball(
os.remove(temp_tarfile_path)
else:
binaries_dir = spec.prefix
- mkdirp(os.path.join(workdir, ".spack"))
# create info for later relocation and create tar
- write_buildinfo_file(spec, workdir, relative)
+ buildinfo = get_buildinfo_dict(spec, relative)
# optionally make the paths in the binaries relative to each other
# in the spack install tree before creating tarball
try:
if relative:
- make_package_relative(workdir, spec, allow_root)
+ make_package_relative(workdir, spec, buildinfo, allow_root)
elif not allow_root:
- ensure_package_relocatable(workdir, binaries_dir)
+ ensure_package_relocatable(buildinfo, binaries_dir)
except Exception as e:
- shutil.rmtree(workdir)
- shutil.rmtree(tarfile_dir)
shutil.rmtree(tmpdir)
tty.die(e)
- # create gzip compressed tarball of the install prefix
- # On AMD Ryzen 3700X and an SSD disk, we have the following on compression speed:
- # compresslevel=6 gzip default: llvm takes 4mins, roughly 2.1GB
- # compresslevel=9 python default: llvm takes 12mins, roughly 2.1GB
- # So we follow gzip.
- with closing(tarfile.open(tarfile_path, "w:gz", compresslevel=6)) as tar:
- tar.add(name=binaries_dir, arcname=pkg_dir)
- if not relative:
- # Add buildinfo file
- buildinfo_path = buildinfo_file_name(workdir)
- buildinfo_arcname = buildinfo_file_name(pkg_dir)
- tar.add(name=buildinfo_path, arcname=buildinfo_arcname)
+ _do_create_tarball(tarfile_path, binaries_dir, pkg_dir, buildinfo)
# remove copy of install directory
- shutil.rmtree(workdir)
+ if relative:
+ shutil.rmtree(workdir)
# get the sha256 checksum of the tarball
checksum = checksum_tarball(tarfile_path)
@@ -1536,13 +1585,12 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
return None
-def make_package_relative(workdir, spec, allow_root):
+def make_package_relative(workdir, spec, buildinfo, allow_root):
"""
Change paths in binaries to relative paths. Change absolute symlinks
to relative symlinks.
"""
prefix = spec.prefix
- buildinfo = read_buildinfo_file(workdir)
old_layout_root = buildinfo["buildpath"]
orig_path_names = list()
cur_path_names = list()
@@ -1566,9 +1614,8 @@ def make_package_relative(workdir, spec, allow_root):
relocate.make_link_relative(cur_path_names, orig_path_names)
-def ensure_package_relocatable(workdir, binaries_dir):
+def ensure_package_relocatable(buildinfo, binaries_dir):
"""Check if package binaries are relocatable."""
- buildinfo = read_buildinfo_file(workdir)
binaries = [os.path.join(binaries_dir, f) for f in buildinfo["relocate_binaries"]]
relocate.ensure_binaries_are_relocatable(binaries)
diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py
index 81f5f7377c..cde16c241e 100644
--- a/lib/spack/spack/test/bindist.py
+++ b/lib/spack/spack/test/bindist.py
@@ -2,11 +2,13 @@
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+import filecmp
import glob
import io
import os
import platform
import sys
+import tarfile
import urllib.error
import urllib.request
import urllib.response
@@ -952,3 +954,81 @@ def test_correct_specs_are_pushed(
bindist.push([spec], push_url, include_root=root, include_dependencies=deps)
assert packages_to_push == expected
+
+
+def test_reproducible_tarball_is_reproducible(tmpdir):
+ p = tmpdir.mkdir("prefix")
+ p.mkdir("bin")
+ p.mkdir(".spack")
+
+ app = p.join("bin", "app")
+
+ tarball_1 = str(tmpdir.join("prefix-1.tar.gz"))
+ tarball_2 = str(tmpdir.join("prefix-2.tar.gz"))
+
+ with open(app, "w") as f:
+ f.write("hello world")
+
+ buildinfo = {"metadata": "yes please"}
+
+ # Create a tarball with a certain mtime of bin/app
+ os.utime(app, times=(0, 0))
+ bindist._do_create_tarball(tarball_1, binaries_dir=p, pkg_dir="pkg", buildinfo=buildinfo)
+
+ # Do it another time with different mtime of bin/app
+ os.utime(app, times=(10, 10))
+ bindist._do_create_tarball(tarball_2, binaries_dir=p, pkg_dir="pkg", buildinfo=buildinfo)
+
+ # They should be bitwise identical:
+ assert filecmp.cmp(tarball_1, tarball_2, shallow=False)
+
+ # Sanity check for contents:
+ with tarfile.open(tarball_1, mode="r") as f:
+ for m in f.getmembers():
+ assert m.uid == m.gid == m.mtime == 0
+ assert m.uname == m.gname == ""
+
+ assert set(f.getnames()) == {
+ "pkg",
+ "pkg/bin",
+ "pkg/bin/app",
+ "pkg/.spack",
+ "pkg/.spack/binary_distribution",
+ }
+
+
+def test_tarball_normalized_permissions(tmpdir):
+ p = tmpdir.mkdir("prefix")
+ p.mkdir("bin")
+ p.mkdir("share")
+ p.mkdir(".spack")
+
+ app = p.join("bin", "app")
+ data = p.join("share", "file")
+ tarball = str(tmpdir.join("prefix.tar.gz"))
+
+ # Everyone can write & execute. This should turn into 0o755 when the tarball is
+ # extracted (on a different system).
+ with open(app, "w", opener=lambda path, flags: os.open(path, flags, 0o777)) as f:
+ f.write("hello world")
+
+ # User doesn't have execute permissions, but group/world have; this should also
+ # turn into 0o644 (user read/write, group&world only read).
+ with open(data, "w", opener=lambda path, flags: os.open(path, flags, 0o477)) as f:
+ f.write("hello world")
+
+ bindist._do_create_tarball(tarball, binaries_dir=p, pkg_dir="pkg", buildinfo={})
+
+ with tarfile.open(tarball) as tar:
+ path_to_member = {member.name: member for member in tar.getmembers()}
+
+ # directories should have 0o755
+ assert path_to_member["pkg"].mode == 0o755
+ assert path_to_member["pkg/bin"].mode == 0o755
+ assert path_to_member["pkg/.spack"].mode == 0o755
+
+ # executable-by-user files should be 0o755
+ assert path_to_member["pkg/bin/app"].mode == 0o755
+
+ # not-executable-by-user files should be 0o644
+ assert path_to_member["pkg/share/file"].mode == 0o644