diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/ci.py | 9 | ||||
-rw-r--r-- | lib/spack/spack/test/spec_yaml.py | 49 | ||||
-rw-r--r-- | lib/spack/spack/util/spack_yaml.py | 25 |
3 files changed, 80 insertions, 3 deletions
diff --git a/lib/spack/spack/ci.py b/lib/spack/spack/ci.py index bd664c664d..7237c95e69 100644 --- a/lib/spack/spack/ci.py +++ b/lib/spack/spack/ci.py @@ -22,6 +22,8 @@ from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import HTTPHandler, Request, build_opener +import ruamel.yaml + import llnl.util.filesystem as fs import llnl.util.tty as tty from llnl.util.lang import memoized @@ -1310,8 +1312,11 @@ def generate_gitlab_ci_yaml( if not rebuild_everything: sys.exit(1) - with open(output_file, "w") as outf: - outf.write(syaml.dump(sorted_output, default_flow_style=True)) + # Minimize yaml output size through use of anchors + syaml.anchorify(sorted_output) + + with open(output_file, "w") as f: + ruamel.yaml.YAML().dump(sorted_output, f) def _url_encode_string(input_string): diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py index 5dd854628a..df63fdf72e 100644 --- a/lib/spack/spack/test/spec_yaml.py +++ b/lib/spack/spack/test/spec_yaml.py @@ -13,10 +13,12 @@ import collections import collections.abc import gzip import inspect +import io import json import os import pytest +import ruamel.yaml import spack.hash_types as ht import spack.paths @@ -505,3 +507,50 @@ def test_load_json_specfiles(specfile, expected_hash, reader_cls): # JSON or YAML file, not a list for edge in s2.traverse_edges(): assert isinstance(edge.virtuals, tuple), edge + + +def test_anchorify_1(): + """Test that anchorify replaces duplicate values with references to a single instance, and + that that results in anchors in the output YAML.""" + before = {"a": [1, 2, 3], "b": [1, 2, 3]} + after = {"a": [1, 2, 3], "b": [1, 2, 3]} + syaml.anchorify(after) + assert before == after + assert after["a"] is after["b"] + + # Check if anchors are used + out = io.StringIO() + ruamel.yaml.YAML().dump(after, out) + assert ( + out.getvalue() + == """\ +a: &id001 +- 1 +- 2 +- 3 +b: *id001 +""" + ) + + +def test_anchorify_2(): + before = {"a": {"b": {"c": True}}, "d": {"b": {"c": True}}, "e": {"c": True}} + after = {"a": {"b": {"c": True}}, "d": {"b": {"c": True}}, "e": {"c": True}} + syaml.anchorify(after) + assert before == after + assert after["a"] is after["d"] + assert after["a"]["b"] is after["e"] + + # Check if anchors are used + out = io.StringIO() + ruamel.yaml.YAML().dump(after, out) + assert ( + out.getvalue() + == """\ +a: &id001 + b: &id002 + c: true +d: *id001 +e: *id002 +""" + ) diff --git a/lib/spack/spack/util/spack_yaml.py b/lib/spack/spack/util/spack_yaml.py index 9dadc25d82..200025411e 100644 --- a/lib/spack/spack/util/spack_yaml.py +++ b/lib/spack/spack/util/spack_yaml.py @@ -20,7 +20,7 @@ import enum import functools import io import re -from typing import IO, List, Optional +from typing import IO, Any, Callable, Dict, List, Optional, Union import ruamel.yaml from ruamel.yaml import comments, constructor, emitter, error, representer @@ -493,6 +493,29 @@ def name_mark(name): return error.StringMark(name, None, None, None, None, None) +def anchorify(data: Union[dict, list], identifier: Callable[[Any], str] = repr) -> None: + """Replace identical dict/list branches in tree with references to earlier instances. The YAML + serializer generate anchors for them, resulting in small yaml files.""" + anchors: Dict[str, Union[dict, list]] = {} + queue: List[Union[dict, list]] = [data] + + while queue: + item = queue.pop() + + for key, value in item.items() if isinstance(item, dict) else enumerate(item): + if not isinstance(value, (dict, list)): + continue + + id = identifier(value) + anchor = anchors.get(id) + + if anchor is None: + anchors[id] = value + queue.append(value) + else: + item[key] = anchor # replace with reference + + class SpackYAMLError(spack.error.SpackError): """Raised when there are issues with YAML parsing.""" |