summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2023-01-09 10:52:07 +0100
committerGitHub <noreply@github.com>2023-01-09 10:52:07 +0100
commit47011f594f6558b623af68f728e14894d5da0939 (patch)
tree76708f6f2decad25a1fa29cac8ec369bae208c08
parenta00f692f20cca8a77459c7539493d5f6b6f58d45 (diff)
downloadspack-47011f594f6558b623af68f728e14894d5da0939.tar.gz
spack-47011f594f6558b623af68f728e14894d5da0939.tar.bz2
spack-47011f594f6558b623af68f728e14894d5da0939.tar.xz
spack-47011f594f6558b623af68f728e14894d5da0939.zip
Revert "Allow spec.json files to be clearsigned, use transparent compression for *.json (#34490)" (#34856)
This reverts commit 8a1b81797834dd04de883c767867e2f85d45c9c4.
-rw-r--r--lib/spack/spack/binary_distribution.py252
-rw-r--r--lib/spack/spack/spec.py31
-rw-r--r--lib/spack/spack/test/bindist.py105
-rw-r--r--lib/spack/spack/test/cmd/ci.py4
-rw-r--r--lib/spack/spack/test/spec_yaml.py21
5 files changed, 158 insertions, 255 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index f629ebbae2..8ae7207768 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -5,9 +5,7 @@
import codecs
import collections
-import gzip
import hashlib
-import io
import json
import multiprocessing.pool
import os
@@ -615,28 +613,6 @@ def read_buildinfo_file(prefix):
return buildinfo
-def transparently_decompress_bytes(binary_stream):
- """Wrap stream in a decompress wrapper if gzip compressed"""
- # Get magic bytes
- if isinstance(binary_stream, io.BytesIO):
- # Not peekable... Alternatively io.BufferedReader(io.BytesIO(...))
- # but to add yet another wrapper just to read two bytes that are
- # already in memory... sigh.
- magic = binary_stream.read(2)
- binary_stream.seek(0)
- else:
- magic = binary_stream.peek(2)
-
- # Verify magic
- if magic.startswith(b"\x1f\x8b"):
- return gzip.GzipFile(fileobj=binary_stream)
- return binary_stream
-
-
-def transparently_decompress_bytes_to_string(binary_stream, encoding="utf-8"):
- return codecs.getreader(encoding)(transparently_decompress_bytes(binary_stream))
-
-
class BuildManifestVisitor(BaseDirectoryVisitor):
"""Visitor that collects a list of files and symlinks
that can be checked for need of relocation. It knows how
@@ -869,42 +845,6 @@ def sign_specfile(key, force, specfile_path):
spack.util.gpg.sign(key, specfile_path, signed_specfile_path, clearsign=True)
-def _load_clearsigned_json(stream):
- # Skip the PGP header
- stream.readline()
- stream.readline()
- json = stream.read()
- footer_index = json.rfind("-----BEGIN PGP SIGNATURE-----")
- if footer_index == -1 or not json[footer_index - 1].isspace():
- raise ValueError("Could not find PGP signature in clearsigned json file.")
- return sjson.load(json[:footer_index])
-
-
-def _load_possibly_clearsigned_json(stream):
- if _is_clearsigned_stream(stream):
- return _load_clearsigned_json(stream)
- return sjson.load(stream)
-
-
-def _is_clearsigned_stream(stream):
- curr = stream.tell()
- header = stream.read(34)
- stream.seek(curr)
- return header == "-----BEGIN PGP SIGNED MESSAGE-----"
-
-
-def is_clearsigned_file(path):
- with open(path, "r") as f:
- return _is_clearsigned_stream(f)
-
-
-def load_possibly_clearsigned_json(s):
- """Deserialize JSON from a string or stream s, removing any clearsign
- header/footer."""
- s = io.StringIO(s) if isinstance(s, str) else s
- return _load_possibly_clearsigned_json(s)
-
-
def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_dir, concurrency):
"""Read all the specs listed in the provided list, using thread given thread parallelism,
generate the index, and push it to the mirror.
@@ -927,7 +867,11 @@ def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_di
if spec_file_contents:
# Need full spec.json name or this gets confused with index.json.
- return Spec.from_dict(load_possibly_clearsigned_json(spec_file_contents))
+ if spec_url.endswith(".json.sig"):
+ specfile_json = Spec.extract_json_from_clearsig(spec_file_contents)
+ return Spec.from_dict(specfile_json)
+ if spec_url.endswith(".json"):
+ return Spec.from_json(spec_file_contents)
tp = multiprocessing.pool.ThreadPool(processes=concurrency)
try:
@@ -989,8 +933,8 @@ def _specs_from_cache_aws_cli(cache_prefix):
aws = which("aws")
def file_read_method(file_path):
- with open(file_path, "rb") as f:
- return transparently_decompress_bytes_to_string(f).read()
+ with open(file_path) as fd:
+ return fd.read()
tmpspecsdir = tempfile.mkdtemp()
sync_command_args = [
@@ -1037,7 +981,7 @@ def _specs_from_cache_fallback(cache_prefix):
contents = None
try:
_, _, spec_file = web_util.read_from_url(url)
- contents = transparently_decompress_bytes_to_string(spec_file).read()
+ contents = codecs.getreader("utf-8")(spec_file).read()
except (URLError, web_util.SpackWebError) as url_err:
tty.error("Error reading specfile: {0}".format(url))
tty.error(url_err)
@@ -1435,7 +1379,7 @@ def try_verify(specfile_path):
return True
-def try_fetch(url_to_fetch, try_decompress=False):
+def try_fetch(url_to_fetch):
"""Utility function to try and fetch a file from a url, stage it
locally, and return the path to the staged file.
@@ -1454,21 +1398,6 @@ def try_fetch(url_to_fetch, try_decompress=False):
stage.destroy()
return None
- if not try_decompress:
- return stage
-
- # Stage has some logic for automatically expanding
- # archives, but it is based on file extensions. So instead,
- # we more or less repeat the logic.
- try:
- tmp = stage.save_filename + ".tmp"
- with gzip.open(stage.save_filename, "rb") as compressed:
- with open(tmp, "wb") as decompressed:
- shutil.copyfileobj(compressed, decompressed)
- os.rename(tmp, stage.save_filename)
- except OSError:
- pass
-
return stage
@@ -1538,45 +1467,61 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
}
)
- verification_failure = False
- for ext in ["json.sig", "json"]:
- for mirror in mirrors_to_try:
- # Try to download the specfile. For any legacy version of Spack's buildcache
- # we definitely require this file.
- specfile_url = "{0}.{1}".format(mirror["specfile"], ext)
- specfile_stage = try_fetch(specfile_url, try_decompress=True)
- if not specfile_stage:
- continue
-
- specfile_path = specfile_stage.save_filename
+ tried_to_verify_sigs = []
- # If it is a clearsign file, we must verify it (unless disabled)
- should_verify = not unsigned and is_clearsigned_file(specfile_path)
- if should_verify and not try_verify(specfile_path):
- verification_failure = True
- tty.warn("Failed to verify: {0}".format(specfile_url))
- specfile_stage.destroy()
- continue
+ # Assumes we care more about finding a spec file by preferred ext
+ # than by mirrory priority. This can be made less complicated as
+ # we remove support for deprecated spec formats and buildcache layouts.
+ for ext in ["json.sig", "json"]:
+ for mirror_to_try in mirrors_to_try:
+ specfile_url = "{0}.{1}".format(mirror_to_try["specfile"], ext)
+ spackfile_url = mirror_to_try["spackfile"]
+ local_specfile_stage = try_fetch(specfile_url)
+ if local_specfile_stage:
+ local_specfile_path = local_specfile_stage.save_filename
+ signature_verified = False
+
+ if ext.endswith(".sig") and not unsigned:
+ # If we found a signed specfile at the root, try to verify
+ # the signature immediately. We will not download the
+ # tarball if we could not verify the signature.
+ tried_to_verify_sigs.append(specfile_url)
+ signature_verified = try_verify(local_specfile_path)
+ if not signature_verified:
+ tty.warn("Failed to verify: {0}".format(specfile_url))
+
+ if unsigned or signature_verified or not ext.endswith(".sig"):
+ # We will download the tarball in one of three cases:
+ # 1. user asked for --no-check-signature
+ # 2. user didn't ask for --no-check-signature, but we
+ # found a spec.json.sig and verified the signature already
+ # 3. neither of the first two cases are true, but this file
+ # is *not* a signed json (not a spec.json.sig file). That
+ # means we already looked at all the mirrors and either didn't
+ # find any .sig files or couldn't verify any of them. But it
+ # is still possible to find an old style binary package where
+ # the signature is a detached .asc file in the outer archive
+ # of the tarball, and in that case, the only way to know is to
+ # download the tarball. This is a deprecated use case, so if
+ # something goes wrong during the extraction process (can't
+ # verify signature, checksum doesn't match) we will fail at
+ # that point instead of trying to download more tarballs from
+ # the remaining mirrors, looking for one we can use.
+ tarball_stage = try_fetch(spackfile_url)
+ if tarball_stage:
+ return {
+ "tarball_stage": tarball_stage,
+ "specfile_stage": local_specfile_stage,
+ "signature_verified": signature_verified,
+ }
- # In case the spec.json is not clearsigned, it means it's a legacy
- # format, where either the signature is in the tarball with binaries, or
- # the package is unsigned. Verification
- # is then postponed.
- spackfile_url = mirror["spackfile"]
- tarball_stage = try_fetch(spackfile_url)
- if tarball_stage:
- return {
- "tarball_stage": tarball_stage,
- "specfile_stage": specfile_stage,
- "signature_verified": should_verify, # should_verify implies it was verified
- }
- specfile_stage.destroy()
+ local_specfile_stage.destroy()
# Falling through the nested loops meeans we exhaustively searched
# for all known kinds of spec files on all mirrors and did not find
# an acceptable one for which we could download a tarball.
- if verification_failure:
+ if tried_to_verify_sigs:
raise NoVerifyException(
(
"Spack found new style signed binary packages, "
@@ -1857,7 +1802,11 @@ def extract_tarball(spec, download_result, allow_root=False, unsigned=False, for
specfile_path = download_result["specfile_stage"].save_filename
with open(specfile_path, "r") as inputfile:
- spec_dict = load_possibly_clearsigned_json(inputfile)
+ content = inputfile.read()
+ if specfile_path.endswith(".json.sig"):
+ spec_dict = Spec.extract_json_from_clearsig(content)
+ else:
+ spec_dict = sjson.load(content)
bchecksum = spec_dict["binary_cache_checksum"]
filename = download_result["tarball_stage"].save_filename
@@ -2022,35 +1971,54 @@ def try_direct_fetch(spec, mirrors=None):
"""
specfile_name = tarball_name(spec, ".spec.json")
signed_specfile_name = tarball_name(spec, ".spec.json.sig")
+ specfile_is_signed = False
found_specs = []
for mirror in spack.mirror.MirrorCollection(mirrors=mirrors).values():
- for file in (specfile_name, signed_specfile_name):
- url = url_util.join(mirror.fetch_url, _build_cache_relative_path, file)
+ buildcache_fetch_url_json = url_util.join(
+ mirror.fetch_url, _build_cache_relative_path, specfile_name
+ )
+ buildcache_fetch_url_signed_json = url_util.join(
+ mirror.fetch_url, _build_cache_relative_path, signed_specfile_name
+ )
+ try:
+ _, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
+ specfile_is_signed = True
+ except (URLError, web_util.SpackWebError, HTTPError) as url_err:
try:
- _, _, fs = web_util.read_from_url(url)
- except (URLError, web_util.SpackWebError, HTTPError) as url_err:
+ _, _, fs = web_util.read_from_url(buildcache_fetch_url_json)
+ except (URLError, web_util.SpackWebError, HTTPError) as url_err_x:
tty.debug(
- "Did not find {0} on {1}".format(specfile_name, url),
+ "Did not find {0} on {1}".format(
+ specfile_name, buildcache_fetch_url_signed_json
+ ),
url_err,
level=2,
)
+ tty.debug(
+ "Did not find {0} on {1}".format(specfile_name, buildcache_fetch_url_json),
+ url_err_x,
+ level=2,
+ )
continue
+ specfile_contents = codecs.getreader("utf-8")(fs).read()
+
+ # read the spec from the build cache file. All specs in build caches
+ # are concrete (as they are built) so we need to mark this spec
+ # concrete on read-in.
+ if specfile_is_signed:
+ specfile_json = Spec.extract_json_from_clearsig(specfile_contents)
+ fetched_spec = Spec.from_dict(specfile_json)
+ else:
+ fetched_spec = Spec.from_json(specfile_contents)
+ fetched_spec._mark_concrete()
- # read the spec from the build cache file. All specs in build caches
- # are concrete (as they are built) so we need to mark this spec
- # concrete on read-in.
- stream = transparently_decompress_bytes_to_string(fs)
- fetched_spec = Spec.from_dict(load_possibly_clearsigned_json(stream))
- fetched_spec._mark_concrete()
-
- found_specs.append(
- {
- "mirror_url": mirror.fetch_url,
- "spec": fetched_spec,
- }
- )
- break
+ found_specs.append(
+ {
+ "mirror_url": mirror.fetch_url,
+ "spec": fetched_spec,
+ }
+ )
return found_specs
@@ -2129,7 +2097,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
try:
_, _, json_file = web_util.read_from_url(keys_index)
- json_index = sjson.load(transparently_decompress_bytes_to_string(json_file))
+ json_index = sjson.load(codecs.getreader("utf-8")(json_file))
except (URLError, web_util.SpackWebError) as url_err:
if web_util.url_exists(keys_index):
err_msg = [
@@ -2454,15 +2422,11 @@ class DefaultIndexFetcher:
raise FetchIndexError("Could not fetch index from {}".format(url_index), e)
try:
- binary_result = response.read()
+ result = codecs.getreader("utf-8")(response).read()
except ValueError as e:
return FetchCacheError("Remote index {} is invalid".format(url_index), e)
- # The hash is computed on the raw bytes
- computed_hash = compute_hash(binary_result)
-
- # Only then decode as string, possibly decompress
- result = transparently_decompress_bytes_to_string(io.BytesIO(binary_result)).read()
+ computed_hash = compute_hash(result)
# We don't handle computed_hash != remote_hash here, which can happen
# when remote index.json and index.json.hash are out of sync, or if
@@ -2516,21 +2480,15 @@ class EtagIndexFetcher:
raise FetchIndexError("Could not fetch index {}".format(url), e) from e
try:
- binary_result = response.read()
+ result = codecs.getreader("utf-8")(response).read()
except ValueError as e:
raise FetchIndexError("Remote index {} is invalid".format(url), e) from e
- # The hash is computed on the raw bytes
- computed_hash = compute_hash(binary_result)
-
- # Only then decode as string, possibly decompress
- result = transparently_decompress_bytes_to_string(io.BytesIO(binary_result)).read()
-
headers = response.headers
etag_header_value = headers.get("Etag", None) or headers.get("etag", None)
return FetchIndexResult(
etag=web_util.parse_etag(etag_header_value),
- hash=computed_hash,
+ hash=compute_hash(result),
data=result,
fresh=False,
)
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index 95f2951f2b..e8306ae0b7 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -156,6 +156,16 @@ default_format = "{name}{@version}"
default_format += "{%compiler.name}{@compiler.version}{compiler_flags}"
default_format += "{variants}{arch=architecture}"
+#: Regular expression to pull spec contents out of clearsigned signature
+#: file.
+CLEARSIGN_FILE_REGEX = re.compile(
+ (
+ r"^-----BEGIN PGP SIGNED MESSAGE-----"
+ r"\s+Hash:\s+[^\s]+\s+(.+)-----BEGIN PGP SIGNATURE-----"
+ ),
+ re.MULTILINE | re.DOTALL,
+)
+
#: specfile format version. Must increase monotonically
specfile_format_version = 3
@@ -2444,6 +2454,27 @@ class Spec(object):
raise sjson.SpackJSONError("error parsing JSON spec:", str(e)) from e
@staticmethod
+ def extract_json_from_clearsig(data):
+ m = CLEARSIGN_FILE_REGEX.search(data)
+ if m:
+ return sjson.load(m.group(1))
+ return sjson.load(data)
+
+ @staticmethod
+ def from_signed_json(stream):
+ """Construct a spec from clearsigned json spec file.
+
+ Args:
+ stream: string or file object to read from.
+ """
+ data = stream
+ if hasattr(stream, "read"):
+ data = stream.read()
+
+ extracted_json = Spec.extract_json_from_clearsig(data)
+ return Spec.from_dict(extracted_json)
+
+ @staticmethod
def from_detection(spec_str, extra_attributes=None):
"""Construct a spec from a spec string determined during external
detection and attach extra attributes to it.
diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py
index 9d801c6d5e..dc6cb35177 100644
--- a/lib/spack/spack/test/bindist.py
+++ b/lib/spack/spack/test/bindist.py
@@ -3,9 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import glob
-import gzip
import io
-import json
import os
import platform
import sys
@@ -891,106 +889,3 @@ def test_default_index_json_404():
with pytest.raises(bindist.FetchIndexError, match="Could not fetch index"):
fetcher.conditional_fetch()
-
-
-def test_read_spec_from_signed_json():
- spec_dir = os.path.join(test_path, "data", "mirrors", "signed_json")
- file_name = (
- "linux-ubuntu18.04-haswell-gcc-8.4.0-"
- "zlib-1.2.12-g7otk5dra3hifqxej36m5qzm7uyghqgb.spec.json.sig"
- )
- spec_path = os.path.join(spec_dir, file_name)
-
- def check_spec(spec_to_check):
- assert spec_to_check.name == "zlib"
- assert spec_to_check._hash == "g7otk5dra3hifqxej36m5qzm7uyghqgb"
-
- with open(spec_path) as fd:
- s = Spec.from_dict(bindist.load_possibly_clearsigned_json(fd))
- check_spec(s)
-
-
-def test_load_clearsigned_json():
- obj = {"hello": "world"}
- clearsigned = """\
------BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA512
-
-{}
------BEGIN PGP SIGNATURE-----
-xyz
------END PGP SIGNATURE-----""".format(
- json.dumps(obj)
- )
-
- # Should accept strings and streams
- assert bindist.load_possibly_clearsigned_json(clearsigned) == obj
- assert bindist.load_possibly_clearsigned_json(io.StringIO(clearsigned)) == obj
-
-
-def test_load_without_clearsigned_json():
- obj = {"hello": "world"}
- not_clearsigned = json.dumps(obj)
-
- # Should accept strings and streams
- assert bindist.load_possibly_clearsigned_json(not_clearsigned) == obj
- assert bindist.load_possibly_clearsigned_json(io.StringIO(not_clearsigned)) == obj
-
-
-def test_json_containing_clearsigned_message_is_json():
- # Test that we don't interpret json with a PGP signed message as a string somewhere
- # as a clearsigned message. It should just deserialize the json contents.
- val = """\
-"-----BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA512
-
-{}
------BEGIN PGP SIGNATURE-----
-signature
------END PGP SIGNATURE-----
-"""
- input = json.dumps({"key": val})
- assert bindist.load_possibly_clearsigned_json(input)["key"] == val
-
-
-def test_clearsign_signature_part_of_json_string():
- # Check if we can deal with a string in json containing the string that is used
- # at the start of a PGP signature.
- obj = {"-----BEGIN PGP SIGNATURE-----": "-----BEGIN PGP SIGNATURE-----"}
- input = """\
------BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA512
-
-{}
------BEGIN PGP SIGNATURE-----
-signature
------END PGP SIGNATURE-----
-""".format(
- json.dumps(obj)
- )
- assert bindist.load_possibly_clearsigned_json(input) == obj
-
-
-def test_broken_clearsign_signature():
- # In this test there is no PGP signature.
- obj = {"-----BEGIN PGP SIGNATURE-----": "-----BEGIN PGP SIGNATURE-----"}
- input = """\
------BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA512
-
-{}
-""".format(
- json.dumps(obj)
- )
- with pytest.raises(ValueError, match="Could not find PGP signature"):
- bindist.load_possibly_clearsigned_json(input)
-
-
-def test_transparent_decompression():
- """Test a roundtrip of string -> gzip -> string."""
- text = "this is utf8 that should be compressed"
- gzipped = io.BytesIO()
- with gzip.GzipFile(fileobj=gzipped, mode="w", compresslevel=1, mtime=0) as f:
- f.write(text.encode("utf-8"))
- gzipped.seek(0)
- assert bindist.transparently_decompress_bytes_to_string(gzipped).read() == text
diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py
index e27bd48a13..3033862c9b 100644
--- a/lib/spack/spack/test/cmd/ci.py
+++ b/lib/spack/spack/test/cmd/ci.py
@@ -1324,9 +1324,7 @@ spack:
if file_name.endswith(".spec.json.sig"):
spec_json_path = os.path.join(buildcache_path, file_name)
with open(spec_json_path) as json_fd:
- json_object = spack.binary_distribution.load_possibly_clearsigned_json(
- json_fd
- )
+ json_object = Spec.extract_json_from_clearsig(json_fd.read())
jsonschema.validate(json_object, specfile_schema)
logs_dir = working_dir.join("logs_dir")
diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py
index 1bc2e4c7be..f1adf1cad9 100644
--- a/lib/spack/spack/test/spec_yaml.py
+++ b/lib/spack/spack/test/spec_yaml.py
@@ -47,6 +47,27 @@ def test_simple_spec():
check_json_round_trip(spec)
+def test_read_spec_from_signed_json():
+ spec_dir = os.path.join(spack.paths.test_path, "data", "mirrors", "signed_json")
+ file_name = (
+ "linux-ubuntu18.04-haswell-gcc-8.4.0-"
+ "zlib-1.2.12-g7otk5dra3hifqxej36m5qzm7uyghqgb.spec.json.sig"
+ )
+ spec_path = os.path.join(spec_dir, file_name)
+
+ def check_spec(spec_to_check):
+ assert spec_to_check.name == "zlib"
+ assert spec_to_check._hash == "g7otk5dra3hifqxej36m5qzm7uyghqgb"
+
+ with open(spec_path) as fd:
+ s = Spec.from_signed_json(fd)
+ check_spec(s)
+
+ with open(spec_path) as fd:
+ s = Spec.from_signed_json(fd.read())
+ check_spec(s)
+
+
def test_normal_spec(mock_packages):
spec = Spec("mpileaks+debug~opt")
spec.normalize()