summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2023-01-07 12:22:40 +0100
committerGitHub <noreply@github.com>2023-01-07 12:22:40 +0100
commit8a1b81797834dd04de883c767867e2f85d45c9c4 (patch)
tree8eeb7a96974c72379313945db362d0f93eb417e1 /lib
parent86e346e90695d5ceca3ec031d3d5470a4151e99d (diff)
downloadspack-8a1b81797834dd04de883c767867e2f85d45c9c4.tar.gz
spack-8a1b81797834dd04de883c767867e2f85d45c9c4.tar.bz2
spack-8a1b81797834dd04de883c767867e2f85d45c9c4.tar.xz
spack-8a1b81797834dd04de883c767867e2f85d45c9c4.zip
Allow spec.json files to be clearsigned, use transparent compression for *.json (#34490)
This commit allows (remote) spec.json files to be clearsigned and gzipped. The idea is to reduce the number of requests and number of bytes transferred
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/binary_distribution.py252
-rw-r--r--lib/spack/spack/spec.py31
-rw-r--r--lib/spack/spack/test/bindist.py105
-rw-r--r--lib/spack/spack/test/cmd/ci.py4
-rw-r--r--lib/spack/spack/test/spec_yaml.py21
5 files changed, 255 insertions, 158 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index 8ae7207768..f629ebbae2 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -5,7 +5,9 @@
import codecs
import collections
+import gzip
import hashlib
+import io
import json
import multiprocessing.pool
import os
@@ -613,6 +615,28 @@ def read_buildinfo_file(prefix):
return buildinfo
+def transparently_decompress_bytes(binary_stream):
+ """Wrap stream in a decompress wrapper if gzip compressed"""
+ # Get magic bytes
+ if isinstance(binary_stream, io.BytesIO):
+ # Not peekable... Alternatively io.BufferedReader(io.BytesIO(...))
+ # but to add yet another wrapper just to read two bytes that are
+ # already in memory... sigh.
+ magic = binary_stream.read(2)
+ binary_stream.seek(0)
+ else:
+ magic = binary_stream.peek(2)
+
+ # Verify magic
+ if magic.startswith(b"\x1f\x8b"):
+ return gzip.GzipFile(fileobj=binary_stream)
+ return binary_stream
+
+
+def transparently_decompress_bytes_to_string(binary_stream, encoding="utf-8"):
+ return codecs.getreader(encoding)(transparently_decompress_bytes(binary_stream))
+
+
class BuildManifestVisitor(BaseDirectoryVisitor):
"""Visitor that collects a list of files and symlinks
that can be checked for need of relocation. It knows how
@@ -845,6 +869,42 @@ def sign_specfile(key, force, specfile_path):
spack.util.gpg.sign(key, specfile_path, signed_specfile_path, clearsign=True)
+def _load_clearsigned_json(stream):
+ # Skip the PGP header
+ stream.readline()
+ stream.readline()
+ json = stream.read()
+ footer_index = json.rfind("-----BEGIN PGP SIGNATURE-----")
+ if footer_index == -1 or not json[footer_index - 1].isspace():
+ raise ValueError("Could not find PGP signature in clearsigned json file.")
+ return sjson.load(json[:footer_index])
+
+
+def _load_possibly_clearsigned_json(stream):
+ if _is_clearsigned_stream(stream):
+ return _load_clearsigned_json(stream)
+ return sjson.load(stream)
+
+
+def _is_clearsigned_stream(stream):
+ curr = stream.tell()
+ header = stream.read(34)
+ stream.seek(curr)
+ return header == "-----BEGIN PGP SIGNED MESSAGE-----"
+
+
+def is_clearsigned_file(path):
+ with open(path, "r") as f:
+ return _is_clearsigned_stream(f)
+
+
+def load_possibly_clearsigned_json(s):
+ """Deserialize JSON from a string or stream s, removing any clearsign
+ header/footer."""
+ s = io.StringIO(s) if isinstance(s, str) else s
+ return _load_possibly_clearsigned_json(s)
+
+
def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_dir, concurrency):
"""Read all the specs listed in the provided list, using thread given thread parallelism,
generate the index, and push it to the mirror.
@@ -867,11 +927,7 @@ def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_di
if spec_file_contents:
# Need full spec.json name or this gets confused with index.json.
- if spec_url.endswith(".json.sig"):
- specfile_json = Spec.extract_json_from_clearsig(spec_file_contents)
- return Spec.from_dict(specfile_json)
- if spec_url.endswith(".json"):
- return Spec.from_json(spec_file_contents)
+ return Spec.from_dict(load_possibly_clearsigned_json(spec_file_contents))
tp = multiprocessing.pool.ThreadPool(processes=concurrency)
try:
@@ -933,8 +989,8 @@ def _specs_from_cache_aws_cli(cache_prefix):
aws = which("aws")
def file_read_method(file_path):
- with open(file_path) as fd:
- return fd.read()
+ with open(file_path, "rb") as f:
+ return transparently_decompress_bytes_to_string(f).read()
tmpspecsdir = tempfile.mkdtemp()
sync_command_args = [
@@ -981,7 +1037,7 @@ def _specs_from_cache_fallback(cache_prefix):
contents = None
try:
_, _, spec_file = web_util.read_from_url(url)
- contents = codecs.getreader("utf-8")(spec_file).read()
+ contents = transparently_decompress_bytes_to_string(spec_file).read()
except (URLError, web_util.SpackWebError) as url_err:
tty.error("Error reading specfile: {0}".format(url))
tty.error(url_err)
@@ -1379,7 +1435,7 @@ def try_verify(specfile_path):
return True
-def try_fetch(url_to_fetch):
+def try_fetch(url_to_fetch, try_decompress=False):
"""Utility function to try and fetch a file from a url, stage it
locally, and return the path to the staged file.
@@ -1398,6 +1454,21 @@ def try_fetch(url_to_fetch):
stage.destroy()
return None
+ if not try_decompress:
+ return stage
+
+ # Stage has some logic for automatically expanding
+ # archives, but it is based on file extensions. So instead,
+ # we more or less repeat the logic.
+ try:
+ tmp = stage.save_filename + ".tmp"
+ with gzip.open(stage.save_filename, "rb") as compressed:
+ with open(tmp, "wb") as decompressed:
+ shutil.copyfileobj(compressed, decompressed)
+ os.rename(tmp, stage.save_filename)
+ except OSError:
+ pass
+
return stage
@@ -1467,61 +1538,45 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
}
)
- tried_to_verify_sigs = []
-
- # Assumes we care more about finding a spec file by preferred ext
- # than by mirrory priority. This can be made less complicated as
- # we remove support for deprecated spec formats and buildcache layouts.
+ verification_failure = False
for ext in ["json.sig", "json"]:
- for mirror_to_try in mirrors_to_try:
- specfile_url = "{0}.{1}".format(mirror_to_try["specfile"], ext)
- spackfile_url = mirror_to_try["spackfile"]
- local_specfile_stage = try_fetch(specfile_url)
- if local_specfile_stage:
- local_specfile_path = local_specfile_stage.save_filename
- signature_verified = False
-
- if ext.endswith(".sig") and not unsigned:
- # If we found a signed specfile at the root, try to verify
- # the signature immediately. We will not download the
- # tarball if we could not verify the signature.
- tried_to_verify_sigs.append(specfile_url)
- signature_verified = try_verify(local_specfile_path)
- if not signature_verified:
- tty.warn("Failed to verify: {0}".format(specfile_url))
-
- if unsigned or signature_verified or not ext.endswith(".sig"):
- # We will download the tarball in one of three cases:
- # 1. user asked for --no-check-signature
- # 2. user didn't ask for --no-check-signature, but we
- # found a spec.json.sig and verified the signature already
- # 3. neither of the first two cases are true, but this file
- # is *not* a signed json (not a spec.json.sig file). That
- # means we already looked at all the mirrors and either didn't
- # find any .sig files or couldn't verify any of them. But it
- # is still possible to find an old style binary package where
- # the signature is a detached .asc file in the outer archive
- # of the tarball, and in that case, the only way to know is to
- # download the tarball. This is a deprecated use case, so if
- # something goes wrong during the extraction process (can't
- # verify signature, checksum doesn't match) we will fail at
- # that point instead of trying to download more tarballs from
- # the remaining mirrors, looking for one we can use.
- tarball_stage = try_fetch(spackfile_url)
- if tarball_stage:
- return {
- "tarball_stage": tarball_stage,
- "specfile_stage": local_specfile_stage,
- "signature_verified": signature_verified,
- }
+ for mirror in mirrors_to_try:
+ # Try to download the specfile. For any legacy version of Spack's buildcache
+ # we definitely require this file.
+ specfile_url = "{0}.{1}".format(mirror["specfile"], ext)
+ specfile_stage = try_fetch(specfile_url, try_decompress=True)
+ if not specfile_stage:
+ continue
- local_specfile_stage.destroy()
+ specfile_path = specfile_stage.save_filename
+
+ # If it is a clearsign file, we must verify it (unless disabled)
+ should_verify = not unsigned and is_clearsigned_file(specfile_path)
+ if should_verify and not try_verify(specfile_path):
+ verification_failure = True
+ tty.warn("Failed to verify: {0}".format(specfile_url))
+ specfile_stage.destroy()
+ continue
+
+ # In case the spec.json is not clearsigned, it means it's a legacy
+ # format, where either the signature is in the tarball with binaries, or
+ # the package is unsigned. Verification
+ # is then postponed.
+ spackfile_url = mirror["spackfile"]
+ tarball_stage = try_fetch(spackfile_url)
+ if tarball_stage:
+ return {
+ "tarball_stage": tarball_stage,
+ "specfile_stage": specfile_stage,
+ "signature_verified": should_verify, # should_verify implies it was verified
+ }
+ specfile_stage.destroy()
# Falling through the nested loops meeans we exhaustively searched
# for all known kinds of spec files on all mirrors and did not find
# an acceptable one for which we could download a tarball.
- if tried_to_verify_sigs:
+ if verification_failure:
raise NoVerifyException(
(
"Spack found new style signed binary packages, "
@@ -1802,11 +1857,7 @@ def extract_tarball(spec, download_result, allow_root=False, unsigned=False, for
specfile_path = download_result["specfile_stage"].save_filename
with open(specfile_path, "r") as inputfile:
- content = inputfile.read()
- if specfile_path.endswith(".json.sig"):
- spec_dict = Spec.extract_json_from_clearsig(content)
- else:
- spec_dict = sjson.load(content)
+ spec_dict = load_possibly_clearsigned_json(inputfile)
bchecksum = spec_dict["binary_cache_checksum"]
filename = download_result["tarball_stage"].save_filename
@@ -1971,54 +2022,35 @@ def try_direct_fetch(spec, mirrors=None):
"""
specfile_name = tarball_name(spec, ".spec.json")
signed_specfile_name = tarball_name(spec, ".spec.json.sig")
- specfile_is_signed = False
found_specs = []
for mirror in spack.mirror.MirrorCollection(mirrors=mirrors).values():
- buildcache_fetch_url_json = url_util.join(
- mirror.fetch_url, _build_cache_relative_path, specfile_name
- )
- buildcache_fetch_url_signed_json = url_util.join(
- mirror.fetch_url, _build_cache_relative_path, signed_specfile_name
- )
- try:
- _, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
- specfile_is_signed = True
- except (URLError, web_util.SpackWebError, HTTPError) as url_err:
+ for file in (specfile_name, signed_specfile_name):
+ url = url_util.join(mirror.fetch_url, _build_cache_relative_path, file)
try:
- _, _, fs = web_util.read_from_url(buildcache_fetch_url_json)
- except (URLError, web_util.SpackWebError, HTTPError) as url_err_x:
+ _, _, fs = web_util.read_from_url(url)
+ except (URLError, web_util.SpackWebError, HTTPError) as url_err:
tty.debug(
- "Did not find {0} on {1}".format(
- specfile_name, buildcache_fetch_url_signed_json
- ),
+ "Did not find {0} on {1}".format(specfile_name, url),
url_err,
level=2,
)
- tty.debug(
- "Did not find {0} on {1}".format(specfile_name, buildcache_fetch_url_json),
- url_err_x,
- level=2,
- )
continue
- specfile_contents = codecs.getreader("utf-8")(fs).read()
-
- # read the spec from the build cache file. All specs in build caches
- # are concrete (as they are built) so we need to mark this spec
- # concrete on read-in.
- if specfile_is_signed:
- specfile_json = Spec.extract_json_from_clearsig(specfile_contents)
- fetched_spec = Spec.from_dict(specfile_json)
- else:
- fetched_spec = Spec.from_json(specfile_contents)
- fetched_spec._mark_concrete()
- found_specs.append(
- {
- "mirror_url": mirror.fetch_url,
- "spec": fetched_spec,
- }
- )
+ # read the spec from the build cache file. All specs in build caches
+ # are concrete (as they are built) so we need to mark this spec
+ # concrete on read-in.
+ stream = transparently_decompress_bytes_to_string(fs)
+ fetched_spec = Spec.from_dict(load_possibly_clearsigned_json(stream))
+ fetched_spec._mark_concrete()
+
+ found_specs.append(
+ {
+ "mirror_url": mirror.fetch_url,
+ "spec": fetched_spec,
+ }
+ )
+ break
return found_specs
@@ -2097,7 +2129,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
try:
_, _, json_file = web_util.read_from_url(keys_index)
- json_index = sjson.load(codecs.getreader("utf-8")(json_file))
+ json_index = sjson.load(transparently_decompress_bytes_to_string(json_file))
except (URLError, web_util.SpackWebError) as url_err:
if web_util.url_exists(keys_index):
err_msg = [
@@ -2422,11 +2454,15 @@ class DefaultIndexFetcher:
raise FetchIndexError("Could not fetch index from {}".format(url_index), e)
try:
- result = codecs.getreader("utf-8")(response).read()
+ binary_result = response.read()
except ValueError as e:
return FetchCacheError("Remote index {} is invalid".format(url_index), e)
- computed_hash = compute_hash(result)
+ # The hash is computed on the raw bytes
+ computed_hash = compute_hash(binary_result)
+
+ # Only then decode as string, possibly decompress
+ result = transparently_decompress_bytes_to_string(io.BytesIO(binary_result)).read()
# We don't handle computed_hash != remote_hash here, which can happen
# when remote index.json and index.json.hash are out of sync, or if
@@ -2480,15 +2516,21 @@ class EtagIndexFetcher:
raise FetchIndexError("Could not fetch index {}".format(url), e) from e
try:
- result = codecs.getreader("utf-8")(response).read()
+ binary_result = response.read()
except ValueError as e:
raise FetchIndexError("Remote index {} is invalid".format(url), e) from e
+ # The hash is computed on the raw bytes
+ computed_hash = compute_hash(binary_result)
+
+ # Only then decode as string, possibly decompress
+ result = transparently_decompress_bytes_to_string(io.BytesIO(binary_result)).read()
+
headers = response.headers
etag_header_value = headers.get("Etag", None) or headers.get("etag", None)
return FetchIndexResult(
etag=web_util.parse_etag(etag_header_value),
- hash=compute_hash(result),
+ hash=computed_hash,
data=result,
fresh=False,
)
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index e8306ae0b7..95f2951f2b 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -156,16 +156,6 @@ default_format = "{name}{@version}"
default_format += "{%compiler.name}{@compiler.version}{compiler_flags}"
default_format += "{variants}{arch=architecture}"
-#: Regular expression to pull spec contents out of clearsigned signature
-#: file.
-CLEARSIGN_FILE_REGEX = re.compile(
- (
- r"^-----BEGIN PGP SIGNED MESSAGE-----"
- r"\s+Hash:\s+[^\s]+\s+(.+)-----BEGIN PGP SIGNATURE-----"
- ),
- re.MULTILINE | re.DOTALL,
-)
-
#: specfile format version. Must increase monotonically
specfile_format_version = 3
@@ -2454,27 +2444,6 @@ class Spec(object):
raise sjson.SpackJSONError("error parsing JSON spec:", str(e)) from e
@staticmethod
- def extract_json_from_clearsig(data):
- m = CLEARSIGN_FILE_REGEX.search(data)
- if m:
- return sjson.load(m.group(1))
- return sjson.load(data)
-
- @staticmethod
- def from_signed_json(stream):
- """Construct a spec from clearsigned json spec file.
-
- Args:
- stream: string or file object to read from.
- """
- data = stream
- if hasattr(stream, "read"):
- data = stream.read()
-
- extracted_json = Spec.extract_json_from_clearsig(data)
- return Spec.from_dict(extracted_json)
-
- @staticmethod
def from_detection(spec_str, extra_attributes=None):
"""Construct a spec from a spec string determined during external
detection and attach extra attributes to it.
diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py
index dc6cb35177..9d801c6d5e 100644
--- a/lib/spack/spack/test/bindist.py
+++ b/lib/spack/spack/test/bindist.py
@@ -3,7 +3,9 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import glob
+import gzip
import io
+import json
import os
import platform
import sys
@@ -889,3 +891,106 @@ def test_default_index_json_404():
with pytest.raises(bindist.FetchIndexError, match="Could not fetch index"):
fetcher.conditional_fetch()
+
+
+def test_read_spec_from_signed_json():
+ spec_dir = os.path.join(test_path, "data", "mirrors", "signed_json")
+ file_name = (
+ "linux-ubuntu18.04-haswell-gcc-8.4.0-"
+ "zlib-1.2.12-g7otk5dra3hifqxej36m5qzm7uyghqgb.spec.json.sig"
+ )
+ spec_path = os.path.join(spec_dir, file_name)
+
+ def check_spec(spec_to_check):
+ assert spec_to_check.name == "zlib"
+ assert spec_to_check._hash == "g7otk5dra3hifqxej36m5qzm7uyghqgb"
+
+ with open(spec_path) as fd:
+ s = Spec.from_dict(bindist.load_possibly_clearsigned_json(fd))
+ check_spec(s)
+
+
+def test_load_clearsigned_json():
+ obj = {"hello": "world"}
+ clearsigned = """\
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA512
+
+{}
+-----BEGIN PGP SIGNATURE-----
+xyz
+-----END PGP SIGNATURE-----""".format(
+ json.dumps(obj)
+ )
+
+ # Should accept strings and streams
+ assert bindist.load_possibly_clearsigned_json(clearsigned) == obj
+ assert bindist.load_possibly_clearsigned_json(io.StringIO(clearsigned)) == obj
+
+
+def test_load_without_clearsigned_json():
+ obj = {"hello": "world"}
+ not_clearsigned = json.dumps(obj)
+
+ # Should accept strings and streams
+ assert bindist.load_possibly_clearsigned_json(not_clearsigned) == obj
+ assert bindist.load_possibly_clearsigned_json(io.StringIO(not_clearsigned)) == obj
+
+
+def test_json_containing_clearsigned_message_is_json():
+ # Test that we don't interpret json with a PGP signed message as a string somewhere
+ # as a clearsigned message. It should just deserialize the json contents.
+ val = """\
+"-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA512
+
+{}
+-----BEGIN PGP SIGNATURE-----
+signature
+-----END PGP SIGNATURE-----
+"""
+ input = json.dumps({"key": val})
+ assert bindist.load_possibly_clearsigned_json(input)["key"] == val
+
+
+def test_clearsign_signature_part_of_json_string():
+ # Check if we can deal with a string in json containing the string that is used
+ # at the start of a PGP signature.
+ obj = {"-----BEGIN PGP SIGNATURE-----": "-----BEGIN PGP SIGNATURE-----"}
+ input = """\
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA512
+
+{}
+-----BEGIN PGP SIGNATURE-----
+signature
+-----END PGP SIGNATURE-----
+""".format(
+ json.dumps(obj)
+ )
+ assert bindist.load_possibly_clearsigned_json(input) == obj
+
+
+def test_broken_clearsign_signature():
+ # In this test there is no PGP signature.
+ obj = {"-----BEGIN PGP SIGNATURE-----": "-----BEGIN PGP SIGNATURE-----"}
+ input = """\
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA512
+
+{}
+""".format(
+ json.dumps(obj)
+ )
+ with pytest.raises(ValueError, match="Could not find PGP signature"):
+ bindist.load_possibly_clearsigned_json(input)
+
+
+def test_transparent_decompression():
+ """Test a roundtrip of string -> gzip -> string."""
+ text = "this is utf8 that should be compressed"
+ gzipped = io.BytesIO()
+ with gzip.GzipFile(fileobj=gzipped, mode="w", compresslevel=1, mtime=0) as f:
+ f.write(text.encode("utf-8"))
+ gzipped.seek(0)
+ assert bindist.transparently_decompress_bytes_to_string(gzipped).read() == text
diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py
index 3033862c9b..e27bd48a13 100644
--- a/lib/spack/spack/test/cmd/ci.py
+++ b/lib/spack/spack/test/cmd/ci.py
@@ -1324,7 +1324,9 @@ spack:
if file_name.endswith(".spec.json.sig"):
spec_json_path = os.path.join(buildcache_path, file_name)
with open(spec_json_path) as json_fd:
- json_object = Spec.extract_json_from_clearsig(json_fd.read())
+ json_object = spack.binary_distribution.load_possibly_clearsigned_json(
+ json_fd
+ )
jsonschema.validate(json_object, specfile_schema)
logs_dir = working_dir.join("logs_dir")
diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py
index f1adf1cad9..1bc2e4c7be 100644
--- a/lib/spack/spack/test/spec_yaml.py
+++ b/lib/spack/spack/test/spec_yaml.py
@@ -47,27 +47,6 @@ def test_simple_spec():
check_json_round_trip(spec)
-def test_read_spec_from_signed_json():
- spec_dir = os.path.join(spack.paths.test_path, "data", "mirrors", "signed_json")
- file_name = (
- "linux-ubuntu18.04-haswell-gcc-8.4.0-"
- "zlib-1.2.12-g7otk5dra3hifqxej36m5qzm7uyghqgb.spec.json.sig"
- )
- spec_path = os.path.join(spec_dir, file_name)
-
- def check_spec(spec_to_check):
- assert spec_to_check.name == "zlib"
- assert spec_to_check._hash == "g7otk5dra3hifqxej36m5qzm7uyghqgb"
-
- with open(spec_path) as fd:
- s = Spec.from_signed_json(fd)
- check_spec(s)
-
- with open(spec_path) as fd:
- s = Spec.from_signed_json(fd.read())
- check_spec(s)
-
-
def test_normal_spec(mock_packages):
spec = Spec("mpileaks+debug~opt")
spec.normalize()