From 47011f594f6558b623af68f728e14894d5da0939 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Mon, 9 Jan 2023 10:52:07 +0100 Subject: Revert "Allow spec.json files to be clearsigned, use transparent compression for *.json (#34490)" (#34856) This reverts commit 8a1b81797834dd04de883c767867e2f85d45c9c4. --- lib/spack/spack/binary_distribution.py | 252 ++++++++++++++------------------- lib/spack/spack/spec.py | 31 ++++ lib/spack/spack/test/bindist.py | 105 -------------- lib/spack/spack/test/cmd/ci.py | 4 +- lib/spack/spack/test/spec_yaml.py | 21 +++ 5 files changed, 158 insertions(+), 255 deletions(-) diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index f629ebbae2..8ae7207768 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -5,9 +5,7 @@ import codecs import collections -import gzip import hashlib -import io import json import multiprocessing.pool import os @@ -615,28 +613,6 @@ def read_buildinfo_file(prefix): return buildinfo -def transparently_decompress_bytes(binary_stream): - """Wrap stream in a decompress wrapper if gzip compressed""" - # Get magic bytes - if isinstance(binary_stream, io.BytesIO): - # Not peekable... Alternatively io.BufferedReader(io.BytesIO(...)) - # but to add yet another wrapper just to read two bytes that are - # already in memory... sigh. - magic = binary_stream.read(2) - binary_stream.seek(0) - else: - magic = binary_stream.peek(2) - - # Verify magic - if magic.startswith(b"\x1f\x8b"): - return gzip.GzipFile(fileobj=binary_stream) - return binary_stream - - -def transparently_decompress_bytes_to_string(binary_stream, encoding="utf-8"): - return codecs.getreader(encoding)(transparently_decompress_bytes(binary_stream)) - - class BuildManifestVisitor(BaseDirectoryVisitor): """Visitor that collects a list of files and symlinks that can be checked for need of relocation. It knows how @@ -869,42 +845,6 @@ def sign_specfile(key, force, specfile_path): spack.util.gpg.sign(key, specfile_path, signed_specfile_path, clearsign=True) -def _load_clearsigned_json(stream): - # Skip the PGP header - stream.readline() - stream.readline() - json = stream.read() - footer_index = json.rfind("-----BEGIN PGP SIGNATURE-----") - if footer_index == -1 or not json[footer_index - 1].isspace(): - raise ValueError("Could not find PGP signature in clearsigned json file.") - return sjson.load(json[:footer_index]) - - -def _load_possibly_clearsigned_json(stream): - if _is_clearsigned_stream(stream): - return _load_clearsigned_json(stream) - return sjson.load(stream) - - -def _is_clearsigned_stream(stream): - curr = stream.tell() - header = stream.read(34) - stream.seek(curr) - return header == "-----BEGIN PGP SIGNED MESSAGE-----" - - -def is_clearsigned_file(path): - with open(path, "r") as f: - return _is_clearsigned_stream(f) - - -def load_possibly_clearsigned_json(s): - """Deserialize JSON from a string or stream s, removing any clearsign - header/footer.""" - s = io.StringIO(s) if isinstance(s, str) else s - return _load_possibly_clearsigned_json(s) - - def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_dir, concurrency): """Read all the specs listed in the provided list, using thread given thread parallelism, generate the index, and push it to the mirror. @@ -927,7 +867,11 @@ def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_di if spec_file_contents: # Need full spec.json name or this gets confused with index.json. - return Spec.from_dict(load_possibly_clearsigned_json(spec_file_contents)) + if spec_url.endswith(".json.sig"): + specfile_json = Spec.extract_json_from_clearsig(spec_file_contents) + return Spec.from_dict(specfile_json) + if spec_url.endswith(".json"): + return Spec.from_json(spec_file_contents) tp = multiprocessing.pool.ThreadPool(processes=concurrency) try: @@ -989,8 +933,8 @@ def _specs_from_cache_aws_cli(cache_prefix): aws = which("aws") def file_read_method(file_path): - with open(file_path, "rb") as f: - return transparently_decompress_bytes_to_string(f).read() + with open(file_path) as fd: + return fd.read() tmpspecsdir = tempfile.mkdtemp() sync_command_args = [ @@ -1037,7 +981,7 @@ def _specs_from_cache_fallback(cache_prefix): contents = None try: _, _, spec_file = web_util.read_from_url(url) - contents = transparently_decompress_bytes_to_string(spec_file).read() + contents = codecs.getreader("utf-8")(spec_file).read() except (URLError, web_util.SpackWebError) as url_err: tty.error("Error reading specfile: {0}".format(url)) tty.error(url_err) @@ -1435,7 +1379,7 @@ def try_verify(specfile_path): return True -def try_fetch(url_to_fetch, try_decompress=False): +def try_fetch(url_to_fetch): """Utility function to try and fetch a file from a url, stage it locally, and return the path to the staged file. @@ -1454,21 +1398,6 @@ def try_fetch(url_to_fetch, try_decompress=False): stage.destroy() return None - if not try_decompress: - return stage - - # Stage has some logic for automatically expanding - # archives, but it is based on file extensions. So instead, - # we more or less repeat the logic. - try: - tmp = stage.save_filename + ".tmp" - with gzip.open(stage.save_filename, "rb") as compressed: - with open(tmp, "wb") as decompressed: - shutil.copyfileobj(compressed, decompressed) - os.rename(tmp, stage.save_filename) - except OSError: - pass - return stage @@ -1538,45 +1467,61 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None): } ) - verification_failure = False - for ext in ["json.sig", "json"]: - for mirror in mirrors_to_try: - # Try to download the specfile. For any legacy version of Spack's buildcache - # we definitely require this file. - specfile_url = "{0}.{1}".format(mirror["specfile"], ext) - specfile_stage = try_fetch(specfile_url, try_decompress=True) - if not specfile_stage: - continue - - specfile_path = specfile_stage.save_filename + tried_to_verify_sigs = [] - # If it is a clearsign file, we must verify it (unless disabled) - should_verify = not unsigned and is_clearsigned_file(specfile_path) - if should_verify and not try_verify(specfile_path): - verification_failure = True - tty.warn("Failed to verify: {0}".format(specfile_url)) - specfile_stage.destroy() - continue + # Assumes we care more about finding a spec file by preferred ext + # than by mirrory priority. This can be made less complicated as + # we remove support for deprecated spec formats and buildcache layouts. + for ext in ["json.sig", "json"]: + for mirror_to_try in mirrors_to_try: + specfile_url = "{0}.{1}".format(mirror_to_try["specfile"], ext) + spackfile_url = mirror_to_try["spackfile"] + local_specfile_stage = try_fetch(specfile_url) + if local_specfile_stage: + local_specfile_path = local_specfile_stage.save_filename + signature_verified = False + + if ext.endswith(".sig") and not unsigned: + # If we found a signed specfile at the root, try to verify + # the signature immediately. We will not download the + # tarball if we could not verify the signature. + tried_to_verify_sigs.append(specfile_url) + signature_verified = try_verify(local_specfile_path) + if not signature_verified: + tty.warn("Failed to verify: {0}".format(specfile_url)) + + if unsigned or signature_verified or not ext.endswith(".sig"): + # We will download the tarball in one of three cases: + # 1. user asked for --no-check-signature + # 2. user didn't ask for --no-check-signature, but we + # found a spec.json.sig and verified the signature already + # 3. neither of the first two cases are true, but this file + # is *not* a signed json (not a spec.json.sig file). That + # means we already looked at all the mirrors and either didn't + # find any .sig files or couldn't verify any of them. But it + # is still possible to find an old style binary package where + # the signature is a detached .asc file in the outer archive + # of the tarball, and in that case, the only way to know is to + # download the tarball. This is a deprecated use case, so if + # something goes wrong during the extraction process (can't + # verify signature, checksum doesn't match) we will fail at + # that point instead of trying to download more tarballs from + # the remaining mirrors, looking for one we can use. + tarball_stage = try_fetch(spackfile_url) + if tarball_stage: + return { + "tarball_stage": tarball_stage, + "specfile_stage": local_specfile_stage, + "signature_verified": signature_verified, + } - # In case the spec.json is not clearsigned, it means it's a legacy - # format, where either the signature is in the tarball with binaries, or - # the package is unsigned. Verification - # is then postponed. - spackfile_url = mirror["spackfile"] - tarball_stage = try_fetch(spackfile_url) - if tarball_stage: - return { - "tarball_stage": tarball_stage, - "specfile_stage": specfile_stage, - "signature_verified": should_verify, # should_verify implies it was verified - } - specfile_stage.destroy() + local_specfile_stage.destroy() # Falling through the nested loops meeans we exhaustively searched # for all known kinds of spec files on all mirrors and did not find # an acceptable one for which we could download a tarball. - if verification_failure: + if tried_to_verify_sigs: raise NoVerifyException( ( "Spack found new style signed binary packages, " @@ -1857,7 +1802,11 @@ def extract_tarball(spec, download_result, allow_root=False, unsigned=False, for specfile_path = download_result["specfile_stage"].save_filename with open(specfile_path, "r") as inputfile: - spec_dict = load_possibly_clearsigned_json(inputfile) + content = inputfile.read() + if specfile_path.endswith(".json.sig"): + spec_dict = Spec.extract_json_from_clearsig(content) + else: + spec_dict = sjson.load(content) bchecksum = spec_dict["binary_cache_checksum"] filename = download_result["tarball_stage"].save_filename @@ -2022,35 +1971,54 @@ def try_direct_fetch(spec, mirrors=None): """ specfile_name = tarball_name(spec, ".spec.json") signed_specfile_name = tarball_name(spec, ".spec.json.sig") + specfile_is_signed = False found_specs = [] for mirror in spack.mirror.MirrorCollection(mirrors=mirrors).values(): - for file in (specfile_name, signed_specfile_name): - url = url_util.join(mirror.fetch_url, _build_cache_relative_path, file) + buildcache_fetch_url_json = url_util.join( + mirror.fetch_url, _build_cache_relative_path, specfile_name + ) + buildcache_fetch_url_signed_json = url_util.join( + mirror.fetch_url, _build_cache_relative_path, signed_specfile_name + ) + try: + _, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json) + specfile_is_signed = True + except (URLError, web_util.SpackWebError, HTTPError) as url_err: try: - _, _, fs = web_util.read_from_url(url) - except (URLError, web_util.SpackWebError, HTTPError) as url_err: + _, _, fs = web_util.read_from_url(buildcache_fetch_url_json) + except (URLError, web_util.SpackWebError, HTTPError) as url_err_x: tty.debug( - "Did not find {0} on {1}".format(specfile_name, url), + "Did not find {0} on {1}".format( + specfile_name, buildcache_fetch_url_signed_json + ), url_err, level=2, ) + tty.debug( + "Did not find {0} on {1}".format(specfile_name, buildcache_fetch_url_json), + url_err_x, + level=2, + ) continue + specfile_contents = codecs.getreader("utf-8")(fs).read() + + # read the spec from the build cache file. All specs in build caches + # are concrete (as they are built) so we need to mark this spec + # concrete on read-in. + if specfile_is_signed: + specfile_json = Spec.extract_json_from_clearsig(specfile_contents) + fetched_spec = Spec.from_dict(specfile_json) + else: + fetched_spec = Spec.from_json(specfile_contents) + fetched_spec._mark_concrete() - # read the spec from the build cache file. All specs in build caches - # are concrete (as they are built) so we need to mark this spec - # concrete on read-in. - stream = transparently_decompress_bytes_to_string(fs) - fetched_spec = Spec.from_dict(load_possibly_clearsigned_json(stream)) - fetched_spec._mark_concrete() - - found_specs.append( - { - "mirror_url": mirror.fetch_url, - "spec": fetched_spec, - } - ) - break + found_specs.append( + { + "mirror_url": mirror.fetch_url, + "spec": fetched_spec, + } + ) return found_specs @@ -2129,7 +2097,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None): try: _, _, json_file = web_util.read_from_url(keys_index) - json_index = sjson.load(transparently_decompress_bytes_to_string(json_file)) + json_index = sjson.load(codecs.getreader("utf-8")(json_file)) except (URLError, web_util.SpackWebError) as url_err: if web_util.url_exists(keys_index): err_msg = [ @@ -2454,15 +2422,11 @@ class DefaultIndexFetcher: raise FetchIndexError("Could not fetch index from {}".format(url_index), e) try: - binary_result = response.read() + result = codecs.getreader("utf-8")(response).read() except ValueError as e: return FetchCacheError("Remote index {} is invalid".format(url_index), e) - # The hash is computed on the raw bytes - computed_hash = compute_hash(binary_result) - - # Only then decode as string, possibly decompress - result = transparently_decompress_bytes_to_string(io.BytesIO(binary_result)).read() + computed_hash = compute_hash(result) # We don't handle computed_hash != remote_hash here, which can happen # when remote index.json and index.json.hash are out of sync, or if @@ -2516,21 +2480,15 @@ class EtagIndexFetcher: raise FetchIndexError("Could not fetch index {}".format(url), e) from e try: - binary_result = response.read() + result = codecs.getreader("utf-8")(response).read() except ValueError as e: raise FetchIndexError("Remote index {} is invalid".format(url), e) from e - # The hash is computed on the raw bytes - computed_hash = compute_hash(binary_result) - - # Only then decode as string, possibly decompress - result = transparently_decompress_bytes_to_string(io.BytesIO(binary_result)).read() - headers = response.headers etag_header_value = headers.get("Etag", None) or headers.get("etag", None) return FetchIndexResult( etag=web_util.parse_etag(etag_header_value), - hash=computed_hash, + hash=compute_hash(result), data=result, fresh=False, ) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 95f2951f2b..e8306ae0b7 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -156,6 +156,16 @@ default_format = "{name}{@version}" default_format += "{%compiler.name}{@compiler.version}{compiler_flags}" default_format += "{variants}{arch=architecture}" +#: Regular expression to pull spec contents out of clearsigned signature +#: file. +CLEARSIGN_FILE_REGEX = re.compile( + ( + r"^-----BEGIN PGP SIGNED MESSAGE-----" + r"\s+Hash:\s+[^\s]+\s+(.+)-----BEGIN PGP SIGNATURE-----" + ), + re.MULTILINE | re.DOTALL, +) + #: specfile format version. Must increase monotonically specfile_format_version = 3 @@ -2443,6 +2453,27 @@ class Spec(object): except Exception as e: raise sjson.SpackJSONError("error parsing JSON spec:", str(e)) from e + @staticmethod + def extract_json_from_clearsig(data): + m = CLEARSIGN_FILE_REGEX.search(data) + if m: + return sjson.load(m.group(1)) + return sjson.load(data) + + @staticmethod + def from_signed_json(stream): + """Construct a spec from clearsigned json spec file. + + Args: + stream: string or file object to read from. + """ + data = stream + if hasattr(stream, "read"): + data = stream.read() + + extracted_json = Spec.extract_json_from_clearsig(data) + return Spec.from_dict(extracted_json) + @staticmethod def from_detection(spec_str, extra_attributes=None): """Construct a spec from a spec string determined during external diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 9d801c6d5e..dc6cb35177 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -3,9 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) import glob -import gzip import io -import json import os import platform import sys @@ -891,106 +889,3 @@ def test_default_index_json_404(): with pytest.raises(bindist.FetchIndexError, match="Could not fetch index"): fetcher.conditional_fetch() - - -def test_read_spec_from_signed_json(): - spec_dir = os.path.join(test_path, "data", "mirrors", "signed_json") - file_name = ( - "linux-ubuntu18.04-haswell-gcc-8.4.0-" - "zlib-1.2.12-g7otk5dra3hifqxej36m5qzm7uyghqgb.spec.json.sig" - ) - spec_path = os.path.join(spec_dir, file_name) - - def check_spec(spec_to_check): - assert spec_to_check.name == "zlib" - assert spec_to_check._hash == "g7otk5dra3hifqxej36m5qzm7uyghqgb" - - with open(spec_path) as fd: - s = Spec.from_dict(bindist.load_possibly_clearsigned_json(fd)) - check_spec(s) - - -def test_load_clearsigned_json(): - obj = {"hello": "world"} - clearsigned = """\ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA512 - -{} ------BEGIN PGP SIGNATURE----- -xyz ------END PGP SIGNATURE-----""".format( - json.dumps(obj) - ) - - # Should accept strings and streams - assert bindist.load_possibly_clearsigned_json(clearsigned) == obj - assert bindist.load_possibly_clearsigned_json(io.StringIO(clearsigned)) == obj - - -def test_load_without_clearsigned_json(): - obj = {"hello": "world"} - not_clearsigned = json.dumps(obj) - - # Should accept strings and streams - assert bindist.load_possibly_clearsigned_json(not_clearsigned) == obj - assert bindist.load_possibly_clearsigned_json(io.StringIO(not_clearsigned)) == obj - - -def test_json_containing_clearsigned_message_is_json(): - # Test that we don't interpret json with a PGP signed message as a string somewhere - # as a clearsigned message. It should just deserialize the json contents. - val = """\ -"-----BEGIN PGP SIGNED MESSAGE----- -Hash: SHA512 - -{} ------BEGIN PGP SIGNATURE----- -signature ------END PGP SIGNATURE----- -""" - input = json.dumps({"key": val}) - assert bindist.load_possibly_clearsigned_json(input)["key"] == val - - -def test_clearsign_signature_part_of_json_string(): - # Check if we can deal with a string in json containing the string that is used - # at the start of a PGP signature. - obj = {"-----BEGIN PGP SIGNATURE-----": "-----BEGIN PGP SIGNATURE-----"} - input = """\ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA512 - -{} ------BEGIN PGP SIGNATURE----- -signature ------END PGP SIGNATURE----- -""".format( - json.dumps(obj) - ) - assert bindist.load_possibly_clearsigned_json(input) == obj - - -def test_broken_clearsign_signature(): - # In this test there is no PGP signature. - obj = {"-----BEGIN PGP SIGNATURE-----": "-----BEGIN PGP SIGNATURE-----"} - input = """\ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA512 - -{} -""".format( - json.dumps(obj) - ) - with pytest.raises(ValueError, match="Could not find PGP signature"): - bindist.load_possibly_clearsigned_json(input) - - -def test_transparent_decompression(): - """Test a roundtrip of string -> gzip -> string.""" - text = "this is utf8 that should be compressed" - gzipped = io.BytesIO() - with gzip.GzipFile(fileobj=gzipped, mode="w", compresslevel=1, mtime=0) as f: - f.write(text.encode("utf-8")) - gzipped.seek(0) - assert bindist.transparently_decompress_bytes_to_string(gzipped).read() == text diff --git a/lib/spack/spack/test/cmd/ci.py b/lib/spack/spack/test/cmd/ci.py index e27bd48a13..3033862c9b 100644 --- a/lib/spack/spack/test/cmd/ci.py +++ b/lib/spack/spack/test/cmd/ci.py @@ -1324,9 +1324,7 @@ spack: if file_name.endswith(".spec.json.sig"): spec_json_path = os.path.join(buildcache_path, file_name) with open(spec_json_path) as json_fd: - json_object = spack.binary_distribution.load_possibly_clearsigned_json( - json_fd - ) + json_object = Spec.extract_json_from_clearsig(json_fd.read()) jsonschema.validate(json_object, specfile_schema) logs_dir = working_dir.join("logs_dir") diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py index 1bc2e4c7be..f1adf1cad9 100644 --- a/lib/spack/spack/test/spec_yaml.py +++ b/lib/spack/spack/test/spec_yaml.py @@ -47,6 +47,27 @@ def test_simple_spec(): check_json_round_trip(spec) +def test_read_spec_from_signed_json(): + spec_dir = os.path.join(spack.paths.test_path, "data", "mirrors", "signed_json") + file_name = ( + "linux-ubuntu18.04-haswell-gcc-8.4.0-" + "zlib-1.2.12-g7otk5dra3hifqxej36m5qzm7uyghqgb.spec.json.sig" + ) + spec_path = os.path.join(spec_dir, file_name) + + def check_spec(spec_to_check): + assert spec_to_check.name == "zlib" + assert spec_to_check._hash == "g7otk5dra3hifqxej36m5qzm7uyghqgb" + + with open(spec_path) as fd: + s = Spec.from_signed_json(fd) + check_spec(s) + + with open(spec_path) as fd: + s = Spec.from_signed_json(fd.read()) + check_spec(s) + + def test_normal_spec(mock_packages): spec = Spec("mpileaks+debug~opt") spec.normalize() -- cgit v1.2.3-70-g09d2