diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/binary_distribution.py | 143 | ||||
-rw-r--r-- | lib/spack/spack/test/bindist.py | 92 |
2 files changed, 162 insertions, 73 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 90cd577fe8..a9b1d6280b 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -23,7 +23,7 @@ import urllib.request import warnings from contextlib import closing, contextmanager from gzip import GzipFile -from typing import List, NamedTuple, Optional, Union +from typing import Dict, List, NamedTuple, Optional, Tuple, Union from urllib.error import HTTPError, URLError import llnl.util.filesystem as fsys @@ -625,8 +625,7 @@ def buildinfo_file_name(prefix): """ Filename of the binary package meta-data file """ - name = os.path.join(prefix, ".spack/binary_distribution") - return name + return os.path.join(prefix, ".spack/binary_distribution") def read_buildinfo_file(prefix): @@ -1158,57 +1157,99 @@ def gzip_compressed_tarfile(path): yield tar -def deterministic_tarinfo(tarinfo: tarfile.TarInfo): - # We only add files, symlinks, hardlinks, and directories - # No character devices, block devices and FIFOs should ever enter a tarball. - if tarinfo.isdev(): - return None +def _tarinfo_name(p: str): + return p.lstrip("/") - # For distribution, it makes no sense to user/group data; since (a) they don't exist - # on other machines, and (b) they lead to surprises as `tar x` run as root will change - # ownership if it can. We want to extract as the current user. By setting owner to root, - # root will extract as root, and non-privileged user will extract as themselves. - tarinfo.uid = 0 - tarinfo.gid = 0 - tarinfo.uname = "" - tarinfo.gname = "" - - # Reset mtime to epoch time, our prefixes are not truly immutable, so files may get - # touched; as long as the content does not change, this ensures we get stable tarballs. - tarinfo.mtime = 0 - - # Normalize mode - if tarinfo.isfile() or tarinfo.islnk(): - # If user can execute, use 0o755; else 0o644 - # This is to avoid potentially unsafe world writable & exeutable files that may get - # extracted when Python or tar is run with privileges - tarinfo.mode = 0o644 if tarinfo.mode & 0o100 == 0 else 0o755 - else: # symbolic link and directories - tarinfo.mode = 0o755 - - return tarinfo - - -def tar_add_metadata(tar: tarfile.TarFile, path: str, data: dict): - # Serialize buildinfo for the tarball - bstring = syaml.dump(data, default_flow_style=True).encode("utf-8") - tarinfo = tarfile.TarInfo(name=path) - tarinfo.size = len(bstring) - tar.addfile(deterministic_tarinfo(tarinfo), io.BytesIO(bstring)) - - -def deterministic_tarinfo_without_buildinfo(tarinfo: tarfile.TarInfo): - """Skip buildinfo file when creating a tarball, and normalize other tarinfo fields.""" - if tarinfo.name.endswith("/.spack/binary_distribution"): - return None - return deterministic_tarinfo(tarinfo) +def tarfile_of_spec_prefix(tar: tarfile.TarFile, prefix: str) -> None: + """Create a tarfile of an install prefix of a spec. Skips existing buildinfo file. + Only adds regular files, symlinks and dirs. Skips devices, fifos. Preserves hardlinks. + Normalizes permissions like git. Tar entries are added in depth-first pre-order, with + dir entries partitioned by file | dir, and sorted alphabetically, for reproducibility. + Partitioning ensures only one dir is in memory at a time, and sorting improves compression. + + Args: + tar: tarfile object to add files to + prefix: absolute install prefix of spec""" + if not os.path.isabs(prefix) or not os.path.isdir(prefix): + raise ValueError(f"prefix '{prefix}' must be an absolute path to a directory") + hardlink_to_tarinfo_name: Dict[Tuple[int, int], str] = dict() + stat_key = lambda stat: (stat.st_dev, stat.st_ino) + + try: # skip buildinfo file if it exists + files_to_skip = [stat_key(os.lstat(buildinfo_file_name(prefix)))] + except OSError: + files_to_skip = [] + + dir_stack = [prefix] + while dir_stack: + dir = dir_stack.pop() + + # Add the dir before its contents + dir_info = tarfile.TarInfo(_tarinfo_name(dir)) + dir_info.type = tarfile.DIRTYPE + dir_info.mode = 0o755 + tar.addfile(dir_info) + + # Sort by name: reproducible & improves compression + with os.scandir(dir) as it: + entries = sorted(it, key=lambda entry: entry.name) + + new_dirs = [] + for entry in entries: + if entry.is_dir(follow_symlinks=False): + new_dirs.append(entry.path) + continue + + file_info = tarfile.TarInfo(_tarinfo_name(entry.path)) + + s = entry.stat(follow_symlinks=False) + + # Skip existing binary distribution files. + id = stat_key(s) + if id in files_to_skip: + continue + + # Normalize the mode + file_info.mode = 0o644 if s.st_mode & 0o100 == 0 else 0o755 + + if entry.is_symlink(): + file_info.type = tarfile.SYMTYPE + file_info.linkname = os.readlink(entry.path) + tar.addfile(file_info) + + elif entry.is_file(follow_symlinks=False): + # Deduplicate hardlinks + if s.st_nlink > 1: + if id in hardlink_to_tarinfo_name: + file_info.type = tarfile.LNKTYPE + file_info.linkname = hardlink_to_tarinfo_name[id] + tar.addfile(file_info) + continue + hardlink_to_tarinfo_name[id] = file_info.name + + # If file not yet seen, copy it. + file_info.type = tarfile.REGTYPE + file_info.size = s.st_size + with open(entry.path, "rb") as f: + tar.addfile(file_info, f) -def _do_create_tarball(tarfile_path: str, binaries_dir: str, pkg_dir: str, buildinfo: dict): + dir_stack.extend(reversed(new_dirs)) # we pop, so reverse to stay alphabetical + + +def _do_create_tarball(tarfile_path: str, binaries_dir: str, buildinfo: dict): with gzip_compressed_tarfile(tarfile_path) as tar: - tar.add(name=binaries_dir, arcname=pkg_dir, filter=deterministic_tarinfo_without_buildinfo) - tar_add_metadata(tar, buildinfo_file_name(pkg_dir), buildinfo) + # Tarball the install prefix + tarfile_of_spec_prefix(tar, binaries_dir) + + # Serialize buildinfo for the tarball + bstring = syaml.dump(buildinfo, default_flow_style=True).encode("utf-8") + tarinfo = tarfile.TarInfo(name=_tarinfo_name(buildinfo_file_name(binaries_dir))) + tarinfo.type = tarfile.REGTYPE + tarinfo.size = len(bstring) + tarinfo.mode = 0o644 + tar.addfile(tarinfo, io.BytesIO(bstring)) class PushOptions(NamedTuple): @@ -1280,14 +1321,12 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option ): raise NoOverwriteException(url_util.format(remote_specfile_path)) - pkg_dir = os.path.basename(spec.prefix.rstrip(os.path.sep)) - binaries_dir = spec.prefix # create info for later relocation and create tar buildinfo = get_buildinfo_dict(spec) - _do_create_tarball(tarfile_path, binaries_dir, pkg_dir, buildinfo) + _do_create_tarball(tarfile_path, binaries_dir, buildinfo) # get the sha256 checksum of the tarball checksum = checksum_tarball(tarfile_path) diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index ea065c26a5..20802bbdd8 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -899,22 +899,21 @@ def test_tarball_doesnt_include_buildinfo_twice(tmpdir): tarball = str(tmpdir.join("prefix.tar.gz")) bindist._do_create_tarball( - tarfile_path=tarball, - binaries_dir=str(p), - pkg_dir="my-pkg-prefix", - buildinfo={"metadata": "new"}, + tarfile_path=tarball, binaries_dir=p.strpath, buildinfo={"metadata": "new"} ) + expected_prefix = p.strpath.lstrip("/") + # Verify we don't have a repeated binary_distribution file, # and that the tarball contains the new one, not the old one. with tarfile.open(tarball) as tar: - assert syaml.load(tar.extractfile("my-pkg-prefix/.spack/binary_distribution")) == { + assert syaml.load(tar.extractfile(f"{expected_prefix}/.spack/binary_distribution")) == { "metadata": "new" } assert tar.getnames() == [ - "my-pkg-prefix", - "my-pkg-prefix/.spack", - "my-pkg-prefix/.spack/binary_distribution", + f"{expected_prefix}", + f"{expected_prefix}/.spack", + f"{expected_prefix}/.spack/binary_distribution", ] @@ -935,15 +934,17 @@ def test_reproducible_tarball_is_reproducible(tmpdir): # Create a tarball with a certain mtime of bin/app os.utime(app, times=(0, 0)) - bindist._do_create_tarball(tarball_1, binaries_dir=p, pkg_dir="pkg", buildinfo=buildinfo) + bindist._do_create_tarball(tarball_1, binaries_dir=p.strpath, buildinfo=buildinfo) # Do it another time with different mtime of bin/app os.utime(app, times=(10, 10)) - bindist._do_create_tarball(tarball_2, binaries_dir=p, pkg_dir="pkg", buildinfo=buildinfo) + bindist._do_create_tarball(tarball_2, binaries_dir=p.strpath, buildinfo=buildinfo) # They should be bitwise identical: assert filecmp.cmp(tarball_1, tarball_2, shallow=False) + expected_prefix = p.strpath.lstrip("/") + # Sanity check for contents: with tarfile.open(tarball_1, mode="r") as f: for m in f.getmembers(): @@ -951,11 +952,11 @@ def test_reproducible_tarball_is_reproducible(tmpdir): assert m.uname == m.gname == "" assert set(f.getnames()) == { - "pkg", - "pkg/bin", - "pkg/bin/app", - "pkg/.spack", - "pkg/.spack/binary_distribution", + f"{expected_prefix}", + f"{expected_prefix}/bin", + f"{expected_prefix}/bin/app", + f"{expected_prefix}/.spack", + f"{expected_prefix}/.spack/binary_distribution", } @@ -979,21 +980,23 @@ def test_tarball_normalized_permissions(tmpdir): with open(data, "w", opener=lambda path, flags: os.open(path, flags, 0o477)) as f: f.write("hello world") - bindist._do_create_tarball(tarball, binaries_dir=p, pkg_dir="pkg", buildinfo={}) + bindist._do_create_tarball(tarball, binaries_dir=p.strpath, buildinfo={}) + + expected_prefix = p.strpath.lstrip("/") with tarfile.open(tarball) as tar: path_to_member = {member.name: member for member in tar.getmembers()} # directories should have 0o755 - assert path_to_member["pkg"].mode == 0o755 - assert path_to_member["pkg/bin"].mode == 0o755 - assert path_to_member["pkg/.spack"].mode == 0o755 + assert path_to_member[f"{expected_prefix}"].mode == 0o755 + assert path_to_member[f"{expected_prefix}/bin"].mode == 0o755 + assert path_to_member[f"{expected_prefix}/.spack"].mode == 0o755 # executable-by-user files should be 0o755 - assert path_to_member["pkg/bin/app"].mode == 0o755 + assert path_to_member[f"{expected_prefix}/bin/app"].mode == 0o755 # not-executable-by-user files should be 0o644 - assert path_to_member["pkg/share/file"].mode == 0o644 + assert path_to_member[f"{expected_prefix}/share/file"].mode == 0o644 def test_tarball_common_prefix(dummy_prefix, tmpdir): @@ -1062,3 +1065,50 @@ def test_tarfile_with_files_outside_common_prefix(tmpdir, dummy_prefix): ValueError, match="Tarball contains file /etc/config_file outside of prefix" ): bindist._ensure_common_prefix(tarfile.open("broken.tar", mode="r")) + + +def test_tarfile_of_spec_prefix(tmpdir): + """Tests whether hardlinks, symlinks, files and dirs are added correctly, + and that the order of entries is correct.""" + prefix = tmpdir.mkdir("prefix") + prefix.ensure("a_directory", dir=True).join("file").write("hello") + prefix.ensure("c_directory", dir=True).join("file").write("hello") + prefix.ensure("b_directory", dir=True).join("file").write("hello") + prefix.join("file").write("hello") + os.symlink(prefix.join("file"), prefix.join("symlink")) + os.link(prefix.join("file"), prefix.join("hardlink")) + + file = tmpdir.join("example.tar") + + with tarfile.open(file, mode="w") as tar: + bindist.tarfile_of_spec_prefix(tar, prefix.strpath) + + expected_prefix = prefix.strpath.lstrip("/") + + with tarfile.open(file, mode="r") as tar: + # Verify that entries are added in depth-first pre-order, files preceding dirs, + # entries ordered alphabetically + assert tar.getnames() == [ + f"{expected_prefix}", + f"{expected_prefix}/file", + f"{expected_prefix}/hardlink", + f"{expected_prefix}/symlink", + f"{expected_prefix}/a_directory", + f"{expected_prefix}/a_directory/file", + f"{expected_prefix}/b_directory", + f"{expected_prefix}/b_directory/file", + f"{expected_prefix}/c_directory", + f"{expected_prefix}/c_directory/file", + ] + + # Check that the types are all correct + assert tar.getmember(f"{expected_prefix}").isdir() + assert tar.getmember(f"{expected_prefix}/file").isreg() + assert tar.getmember(f"{expected_prefix}/hardlink").islnk() + assert tar.getmember(f"{expected_prefix}/symlink").issym() + assert tar.getmember(f"{expected_prefix}/a_directory").isdir() + assert tar.getmember(f"{expected_prefix}/a_directory/file").isreg() + assert tar.getmember(f"{expected_prefix}/b_directory").isdir() + assert tar.getmember(f"{expected_prefix}/b_directory/file").isreg() + assert tar.getmember(f"{expected_prefix}/c_directory").isdir() + assert tar.getmember(f"{expected_prefix}/c_directory/file").isreg() |