summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorScott Wittenburg <scott.wittenburg@kitware.com>2023-11-09 06:30:41 -0700
committerGitHub <noreply@github.com>2023-11-09 13:30:41 +0000
commit1baed0d833e75a427467ef5686ea73a6c7700069 (patch)
treee7185bcf5f2e71afe312daa15fcd52471f3147a7 /lib
parentcadc2a1aa59d6e1824d2bce8661a5cb1d444e80f (diff)
downloadspack-1baed0d833e75a427467ef5686ea73a6c7700069.tar.gz
spack-1baed0d833e75a427467ef5686ea73a6c7700069.tar.bz2
spack-1baed0d833e75a427467ef5686ea73a6c7700069.tar.xz
spack-1baed0d833e75a427467ef5686ea73a6c7700069.zip
buildcache: skip unrecognized metadata files (#40941)
This commit improves forward compatibility of Spack with newer build cache metadata formats. Before this commit, invalid or unrecognized metadata would be fatal errors, now they just cause a mirror to be skipped. Co-authored-by: Harmen Stoppels <me@harmenstoppels.nl>
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/binary_distribution.py118
-rw-r--r--lib/spack/spack/test/bindist.py76
2 files changed, 163 insertions, 31 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index 6a49ab445e..8cfb891640 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -66,8 +66,9 @@ from spack.spec import Spec
from spack.stage import Stage
from spack.util.executable import which
-_build_cache_relative_path = "build_cache"
-_build_cache_keys_relative_path = "_pgp"
+BUILD_CACHE_RELATIVE_PATH = "build_cache"
+BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
+CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1
class BuildCacheDatabase(spack_db.Database):
@@ -481,7 +482,7 @@ class BinaryCacheIndex:
scheme = urllib.parse.urlparse(mirror_url).scheme
if scheme != "oci" and not web_util.url_exists(
- url_util.join(mirror_url, _build_cache_relative_path, "index.json")
+ url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json")
):
return False
@@ -600,6 +601,10 @@ class NewLayoutException(spack.error.SpackError):
super().__init__(msg)
+class InvalidMetadataFile(spack.error.SpackError):
+ pass
+
+
class UnsignedPackageException(spack.error.SpackError):
"""
Raised if installation of unsigned package is attempted without
@@ -614,11 +619,11 @@ def compute_hash(data):
def build_cache_relative_path():
- return _build_cache_relative_path
+ return BUILD_CACHE_RELATIVE_PATH
def build_cache_keys_relative_path():
- return _build_cache_keys_relative_path
+ return BUILD_CACHE_KEYS_RELATIVE_PATH
def build_cache_prefix(prefix):
@@ -1401,7 +1406,7 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
spec_dict = sjson.load(content)
else:
raise ValueError("{0} not a valid spec file type".format(spec_file))
- spec_dict["buildcache_layout_version"] = 1
+ spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
with open(specfile_path, "w") as outfile:
@@ -1560,6 +1565,42 @@ def _delete_staged_downloads(download_result):
download_result["specfile_stage"].destroy()
+def _get_valid_spec_file(path: str, max_supported_layout: int) -> Tuple[Dict, int]:
+ """Read and validate a spec file, returning the spec dict with its layout version, or raising
+ InvalidMetadataFile if invalid."""
+ try:
+ with open(path, "rb") as f:
+ binary_content = f.read()
+ except OSError:
+ raise InvalidMetadataFile(f"No such file: {path}")
+
+ # In the future we may support transparently decompressing compressed spec files.
+ if binary_content[:2] == b"\x1f\x8b":
+ raise InvalidMetadataFile("Compressed spec files are not supported")
+
+ try:
+ as_string = binary_content.decode("utf-8")
+ if path.endswith(".json.sig"):
+ spec_dict = Spec.extract_json_from_clearsig(as_string)
+ else:
+ spec_dict = json.loads(as_string)
+ except Exception as e:
+ raise InvalidMetadataFile(f"Could not parse {path} due to: {e}") from e
+
+ # Ensure this version is not too new.
+ try:
+ layout_version = int(spec_dict.get("buildcache_layout_version", 0))
+ except ValueError as e:
+ raise InvalidMetadataFile("Could not parse layout version") from e
+
+ if layout_version > max_supported_layout:
+ raise InvalidMetadataFile(
+ f"Layout version {layout_version} is too new for this version of Spack"
+ )
+
+ return spec_dict, layout_version
+
+
def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
"""
Download binary tarball for given package into stage area, returning
@@ -1652,6 +1693,18 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
try:
local_specfile_stage.fetch()
local_specfile_stage.check()
+ try:
+ _get_valid_spec_file(
+ local_specfile_stage.save_filename,
+ CURRENT_BUILD_CACHE_LAYOUT_VERSION,
+ )
+ except InvalidMetadataFile as e:
+ tty.warn(
+ f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
+ f"from {mirror} due to invalid metadata file: {e}"
+ )
+ local_specfile_stage.destroy()
+ continue
except Exception:
continue
local_specfile_stage.cache_local()
@@ -1674,14 +1727,26 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
else:
ext = "json.sig" if try_signed else "json"
- specfile_path = url_util.join(mirror, _build_cache_relative_path, specfile_prefix)
+ specfile_path = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, specfile_prefix)
specfile_url = f"{specfile_path}.{ext}"
- spackfile_url = url_util.join(mirror, _build_cache_relative_path, tarball)
+ spackfile_url = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, tarball)
local_specfile_stage = try_fetch(specfile_url)
if local_specfile_stage:
local_specfile_path = local_specfile_stage.save_filename
signature_verified = False
+ try:
+ _get_valid_spec_file(
+ local_specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
+ )
+ except InvalidMetadataFile as e:
+ tty.warn(
+ f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
+ f"from {mirror} due to invalid metadata file: {e}"
+ )
+ local_specfile_stage.destroy()
+ continue
+
if try_signed and not unsigned:
# If we found a signed specfile at the root, try to verify
# the signature immediately. We will not download the
@@ -2001,24 +2066,16 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
)
specfile_path = download_result["specfile_stage"].save_filename
-
- with open(specfile_path, "r") as inputfile:
- content = inputfile.read()
- if specfile_path.endswith(".json.sig"):
- spec_dict = Spec.extract_json_from_clearsig(content)
- else:
- spec_dict = sjson.load(content)
-
+ spec_dict, layout_version = _get_valid_spec_file(
+ specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
+ )
bchecksum = spec_dict["binary_cache_checksum"]
filename = download_result["tarball_stage"].save_filename
signature_verified = download_result["signature_verified"]
tmpdir = None
- if (
- "buildcache_layout_version" not in spec_dict
- or int(spec_dict["buildcache_layout_version"]) < 1
- ):
+ if layout_version == 0:
# Handle the older buildcache layout where the .spack file
# contains a spec json, maybe an .asc file (signature),
# and another tarball containing the actual install tree.
@@ -2029,7 +2086,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
_delete_staged_downloads(download_result)
shutil.rmtree(tmpdir)
raise e
- else:
+ elif layout_version == 1:
# Newer buildcache layout: the .spack file contains just
# in the install tree, the signature, if it exists, is
# wrapped around the spec.json at the root. If sig verify
@@ -2053,7 +2110,6 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
raise NoChecksumException(
tarfile_path, size, contents, "sha256", expected, local_checksum
)
-
try:
with closing(tarfile.open(tarfile_path, "r")) as tar:
# Remove install prefix from tarfil to extract directly into spec.prefix
@@ -2184,10 +2240,10 @@ def try_direct_fetch(spec, mirrors=None):
for mirror in binary_mirrors:
buildcache_fetch_url_json = url_util.join(
- mirror.fetch_url, _build_cache_relative_path, specfile_name
+ mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, specfile_name
)
buildcache_fetch_url_signed_json = url_util.join(
- mirror.fetch_url, _build_cache_relative_path, signed_specfile_name
+ mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, signed_specfile_name
)
try:
_, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
@@ -2292,7 +2348,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
for mirror in mirror_collection.values():
fetch_url = mirror.fetch_url
keys_url = url_util.join(
- fetch_url, _build_cache_relative_path, _build_cache_keys_relative_path
+ fetch_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
)
keys_index = url_util.join(keys_url, "index.json")
@@ -2357,7 +2413,7 @@ def push_keys(*mirrors, **kwargs):
for mirror in mirrors:
push_url = getattr(mirror, "push_url", mirror)
keys_url = url_util.join(
- push_url, _build_cache_relative_path, _build_cache_keys_relative_path
+ push_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
)
keys_local = url_util.local_file_path(keys_url)
@@ -2495,11 +2551,11 @@ def download_buildcache_entry(file_descriptions, mirror_url=None):
)
if mirror_url:
- mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
+ mirror_root = os.path.join(mirror_url, BUILD_CACHE_RELATIVE_PATH)
return _download_buildcache_entry(mirror_root, file_descriptions)
for mirror in spack.mirror.MirrorCollection(binary=True).values():
- mirror_root = os.path.join(mirror.fetch_url, _build_cache_relative_path)
+ mirror_root = os.path.join(mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH)
if _download_buildcache_entry(mirror_root, file_descriptions):
return True
@@ -2590,7 +2646,7 @@ class DefaultIndexFetcher:
def get_remote_hash(self):
# Failure to fetch index.json.hash is not fatal
- url_index_hash = url_util.join(self.url, _build_cache_relative_path, "index.json.hash")
+ url_index_hash = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json.hash")
try:
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers))
except urllib.error.URLError:
@@ -2611,7 +2667,7 @@ class DefaultIndexFetcher:
return FetchIndexResult(etag=None, hash=None, data=None, fresh=True)
# Otherwise, download index.json
- url_index = url_util.join(self.url, _build_cache_relative_path, "index.json")
+ url_index = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
try:
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers))
@@ -2655,7 +2711,7 @@ class EtagIndexFetcher:
def conditional_fetch(self) -> FetchIndexResult:
# Just do a conditional fetch immediately
- url = url_util.join(self.url, _build_cache_relative_path, "index.json")
+ url = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
headers = {
"User-Agent": web_util.SPACK_USER_AGENT,
"If-None-Match": '"{}"'.format(self.etag),
diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py
index 20802bbdd8..ea9caf7fc0 100644
--- a/lib/spack/spack/test/bindist.py
+++ b/lib/spack/spack/test/bindist.py
@@ -4,7 +4,9 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import filecmp
import glob
+import gzip
import io
+import json
import os
import platform
import sys
@@ -1112,3 +1114,77 @@ def test_tarfile_of_spec_prefix(tmpdir):
assert tar.getmember(f"{expected_prefix}/b_directory/file").isreg()
assert tar.getmember(f"{expected_prefix}/c_directory").isdir()
assert tar.getmember(f"{expected_prefix}/c_directory/file").isreg()
+
+
+@pytest.mark.parametrize("layout,expect_success", [(None, True), (1, True), (2, False)])
+def test_get_valid_spec_file(tmp_path, layout, expect_success):
+ # Test reading a spec.json file that does not specify a layout version.
+ spec_dict = Spec("example").to_dict()
+ path = tmp_path / "spec.json"
+ effective_layout = layout or 0 # If not specified it should be 0
+
+ # Add a layout version
+ if layout is not None:
+ spec_dict["buildcache_layout_version"] = layout
+
+ # Save to file
+ with open(path, "w") as f:
+ json.dump(spec_dict, f)
+
+ try:
+ spec_dict_disk, layout_disk = bindist._get_valid_spec_file(
+ str(path), max_supported_layout=1
+ )
+ assert expect_success
+ assert spec_dict_disk == spec_dict
+ assert layout_disk == effective_layout
+ except bindist.InvalidMetadataFile:
+ assert not expect_success
+
+
+def test_get_valid_spec_file_doesnt_exist(tmp_path):
+ with pytest.raises(bindist.InvalidMetadataFile, match="No such file"):
+ bindist._get_valid_spec_file(str(tmp_path / "no-such-file"), max_supported_layout=1)
+
+
+def test_get_valid_spec_file_gzipped(tmp_path):
+ # Create a gzipped file, contents don't matter
+ path = tmp_path / "spec.json.gz"
+ with gzip.open(path, "wb") as f:
+ f.write(b"hello")
+ with pytest.raises(
+ bindist.InvalidMetadataFile, match="Compressed spec files are not supported"
+ ):
+ bindist._get_valid_spec_file(str(path), max_supported_layout=1)
+
+
+@pytest.mark.parametrize("filename", ["spec.json", "spec.json.sig"])
+def test_get_valid_spec_file_no_json(tmp_path, filename):
+ tmp_path.joinpath(filename).write_text("not json")
+ with pytest.raises(bindist.InvalidMetadataFile):
+ bindist._get_valid_spec_file(str(tmp_path / filename), max_supported_layout=1)
+
+
+def test_download_tarball_with_unsupported_layout_fails(tmp_path, mutable_config, capsys):
+ layout_version = bindist.CURRENT_BUILD_CACHE_LAYOUT_VERSION + 1
+ spec = Spec("gmake@4.4.1%gcc@13.1.0 arch=linux-ubuntu23.04-zen2")
+ spec._mark_concrete()
+ spec_dict = spec.to_dict()
+ spec_dict["buildcache_layout_version"] = layout_version
+
+ # Setup a basic local build cache structure
+ path = (
+ tmp_path / bindist.build_cache_relative_path() / bindist.tarball_name(spec, ".spec.json")
+ )
+ path.parent.mkdir(parents=True)
+ with open(path, "w") as f:
+ json.dump(spec_dict, f)
+
+ # Configure as a mirror.
+ mirror_cmd("add", "test-mirror", str(tmp_path))
+
+ # Shouldn't be able "download" this.
+ assert bindist.download_tarball(spec, unsigned=True) is None
+
+ # And there should be a warning about an unsupported layout version.
+ assert f"Layout version {layout_version} is too new" in capsys.readouterr().err