summaryrefslogtreecommitdiff
path: root/lib/spack/spack/util/spack_yaml.py
blob: f4bec63f5cc64b39a79faa1152772023b7d97131 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

"""Enhanced YAML parsing for Spack.

- ``load()`` preserves YAML Marks on returned objects -- this allows
  us to access file and line information later.

- ``Our load methods use ``OrderedDict`` class instead of YAML's
  default unorderd dict.

"""
import collections
import collections.abc
import copy
import ctypes
import enum
import functools
import io
import re
from typing import IO, List, Optional

import ruamel.yaml
from ruamel.yaml import comments, constructor, emitter, error, representer

from llnl.util.tty.color import cextra, clen, colorize

import spack.error

# Only export load and dump
__all__ = ["load", "dump", "SpackYAMLError"]


# Make new classes so we can add custom attributes.
# Also, use OrderedDict instead of just dict.
class syaml_dict(collections.OrderedDict):
    def __repr__(self):
        mappings = (f"{k!r}: {v!r}" for k, v in self.items())
        return "{%s}" % ", ".join(mappings)


class syaml_list(list):
    __repr__ = list.__repr__


class syaml_str(str):
    __repr__ = str.__repr__


class syaml_int(int):
    __repr__ = int.__repr__


#: mapping from syaml type -> primitive type
syaml_types = {syaml_str: str, syaml_int: int, syaml_dict: dict, syaml_list: list}


markable_types = set(syaml_types) | {comments.CommentedSeq, comments.CommentedMap}


def syaml_type(obj):
    """Get the corresponding syaml wrapper type for a primitive type.

    Return:
        (object): syaml-typed copy of object, or the obj if no wrapper
    """
    for syaml_t, t in syaml_types.items():
        if type(obj) is not bool and isinstance(obj, t):
            return syaml_t(obj) if type(obj) is not syaml_t else obj
    return obj


def markable(obj):
    """Whether an object can be marked."""
    return type(obj) in markable_types


def mark(obj, node):
    """Add start and end markers to an object."""
    if hasattr(node, "start_mark"):
        obj._start_mark = node.start_mark
    elif hasattr(node, "_start_mark"):
        obj._start_mark = node._start_mark
    if hasattr(node, "end_mark"):
        obj._end_mark = node.end_mark
    elif hasattr(node, "_end_mark"):
        obj._end_mark = node._end_mark


def marked(obj):
    """Whether an object has been marked by spack_yaml."""
    return (
        hasattr(obj, "_start_mark")
        and obj._start_mark
        or hasattr(obj, "_end_mark")
        and obj._end_mark
    )


class OrderedLineConstructor(constructor.RoundTripConstructor):
    """YAML loader specifically intended for reading Spack configuration
    files. It preserves order and line numbers. It also has special-purpose
    logic for handling dictionary keys that indicate a Spack config
    override: namely any key that contains an "extra" ':' character.

    Mappings read in by this loader behave like an ordered dict.
    Sequences, mappings, and strings also have new attributes,
    ``_start_mark`` and ``_end_mark``, that preserve YAML line
    information in the output data.

    """

    #
    # Override construct_yaml_* so that we can apply _start_mark/_end_mark to
    # them. The superclass returns CommentedMap/CommentedSeq objects that we
    # can add attributes to (and we depend on their behavior to preserve
    # comments).
    #
    # The inherited sequence/dictionary constructors return empty instances
    # and fill in with mappings later.  We preserve this behavior.
    #

    def construct_yaml_str(self, node):
        value = super().construct_yaml_str(node)
        # There is no specific marker to indicate that we are parsing a key,
        # so this assumes we are talking about a Spack config override key if
        # it ends with a ':' and does not contain a '@' (which can appear
        # in config values that refer to Spack specs)
        if value and value.endswith(":") and "@" not in value:
            value = syaml_str(value[:-1])
            value.override = True
        else:
            value = syaml_str(value)
        mark(value, node)
        return value

    def construct_yaml_seq(self, node):
        gen = super().construct_yaml_seq(node)
        data = next(gen)
        if markable(data):
            mark(data, node)
        yield data
        for x in gen:
            pass

    def construct_yaml_map(self, node):
        gen = super().construct_yaml_map(node)
        data = next(gen)
        if markable(data):
            mark(data, node)
        yield data
        for x in gen:
            pass


# register above new constructors
OrderedLineConstructor.add_constructor(
    "tag:yaml.org,2002:map", OrderedLineConstructor.construct_yaml_map
)
OrderedLineConstructor.add_constructor(
    "tag:yaml.org,2002:seq", OrderedLineConstructor.construct_yaml_seq
)
OrderedLineConstructor.add_constructor(
    "tag:yaml.org,2002:str", OrderedLineConstructor.construct_yaml_str
)


class OrderedLineRepresenter(representer.RoundTripRepresenter):
    """Representer that preserves ordering and formats ``syaml_*`` objects.

    This representer preserves insertion ordering ``syaml_dict`` objects
    when they're written out.  It also has some custom formatters
    for ``syaml_*`` objects so that they are formatted like their
    regular Python equivalents, instead of ugly YAML pyobjects.
    """

    def ignore_aliases(self, _data):
        """Make the dumper NEVER print YAML aliases."""
        return True

    def represent_data(self, data):
        result = super().represent_data(data)
        if data is None:
            result.value = syaml_str("null")
        return result

    def represent_str(self, data):
        if hasattr(data, "override") and data.override:
            data = data + ":"
        return super().represent_str(data)


class SafeRepresenter(representer.RoundTripRepresenter):
    def ignore_aliases(self, _data):
        """Make the dumper NEVER print YAML aliases."""
        return True


# Make our special objects look like normal YAML ones.
representer.RoundTripRepresenter.add_representer(
    syaml_dict, representer.RoundTripRepresenter.represent_dict
)
representer.RoundTripRepresenter.add_representer(
    syaml_list, representer.RoundTripRepresenter.represent_list
)
representer.RoundTripRepresenter.add_representer(
    syaml_int, representer.RoundTripRepresenter.represent_int
)
representer.RoundTripRepresenter.add_representer(
    syaml_str, representer.RoundTripRepresenter.represent_str
)
OrderedLineRepresenter.add_representer(syaml_str, OrderedLineRepresenter.represent_str)


#: Max integer helps avoid passing too large a value to cyaml.
maxint = 2 ** (ctypes.sizeof(ctypes.c_int) * 8 - 1) - 1


def return_string_when_no_stream(func):
    @functools.wraps(func)
    def wrapper(data, stream=None, **kwargs):
        if stream:
            return func(data, stream=stream, **kwargs)
        stream = io.StringIO()
        func(data, stream=stream, **kwargs)
        return stream.getvalue()

    return wrapper


@return_string_when_no_stream
def dump(data, stream=None, default_flow_style=False):
    handler = ConfigYAML(yaml_type=YAMLType.GENERIC_YAML)
    handler.default_flow_style = default_flow_style
    handler.width = maxint
    return handler.dump(data, stream=stream)


def file_line(mark):
    """Format a mark as <file>:<line> information."""
    result = mark.name
    if mark.line:
        result += ":" + str(mark.line)
    return result


#: Global for interactions between LineAnnotationDumper and dump_annotated().
#: This is nasty but YAML doesn't give us many ways to pass arguments --
#: yaml.dump() takes a class (not an instance) and instantiates the dumper
#: itself, so we can't just pass an instance
_ANNOTATIONS: List[str] = []


class LineAnnotationRepresenter(OrderedLineRepresenter):
    """Representer that generates per-line annotations.

    Annotations are stored in the ``_annotations`` global.  After one
    dump pass, the strings in ``_annotations`` will correspond one-to-one
    with the lines output by the dumper.

    LineAnnotationDumper records blame information after each line is
    generated. As each line is parsed, it saves file/line info for each
    object printed. At the end of each line, it creates an annotation
    based on the saved mark and stores it in ``_annotations``.

    For an example of how to use this, see ``dump_annotated()``, which
    writes to a ``StringIO`` then joins the lines from that with
    annotations.
    """

    def represent_data(self, data):
        """Force syaml_str to be passed through with marks."""
        result = super().represent_data(data)
        if data is None:
            result.value = syaml_str("null")
        elif isinstance(result.value, str):
            result.value = syaml_str(data)
        if markable(result.value):
            mark(result.value, data)
        return result


class LineAnnotationEmitter(emitter.Emitter):
    saved = None

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        del _ANNOTATIONS[:]
        self.colors = "KgrbmcyGRBMCY"
        self.filename_colors = {}

    def process_scalar(self):
        super().process_scalar()
        if marked(self.event.value):
            self.saved = self.event.value

    def write_line_break(self):
        super().write_line_break()
        if self.saved is None:
            _ANNOTATIONS.append(colorize("@K{---}"))
            return

        # append annotations at the end of each line
        if self.saved:
            mark = self.saved._start_mark

            color = self.filename_colors.get(mark.name)
            if not color:
                ncolors = len(self.colors)
                color = self.colors[len(self.filename_colors) % ncolors]
                self.filename_colors[mark.name] = color

            fmt = "@%s{%%s}" % color
            ann = fmt % mark.name
            if mark.line is not None:
                ann += ":@c{%s}" % (mark.line + 1)
            _ANNOTATIONS.append(colorize(ann))
        else:
            _ANNOTATIONS.append("")

    def write_comment(self, comment, pre=False):
        pass


class YAMLType(enum.Enum):
    """YAML configurations handled by Spack"""

    #: Generic YAML configuration
    GENERIC_YAML = enum.auto()
    #: A Spack config file with overrides
    SPACK_CONFIG_FILE = enum.auto()
    #: A Spack config file with line annotations
    ANNOTATED_SPACK_CONFIG_FILE = enum.auto()


class ConfigYAML:
    """Handles the loading and dumping of Spack's YAML files."""

    def __init__(self, yaml_type: YAMLType) -> None:
        self.yaml = ruamel.yaml.YAML(typ="rt", pure=True)
        if yaml_type == YAMLType.GENERIC_YAML:
            self.yaml.Representer = SafeRepresenter
        elif yaml_type == YAMLType.ANNOTATED_SPACK_CONFIG_FILE:
            self.yaml.Representer = LineAnnotationRepresenter
            self.yaml.Emitter = LineAnnotationEmitter
            self.yaml.Constructor = OrderedLineConstructor
        else:
            self.yaml.Representer = OrderedLineRepresenter
            self.yaml.Constructor = OrderedLineConstructor

    def load(self, stream: IO):
        """Loads the YAML data from a stream and returns it.

        Args:
            stream: stream to load from.

        Raises:
            SpackYAMLError: if anything goes wrong while loading
        """
        try:
            return self.yaml.load(stream)

        except error.MarkedYAMLError as e:
            msg = "error parsing YAML"
            error_mark = e.context_mark if e.context_mark else e.problem_mark
            if error_mark:
                line, column = error_mark.line, error_mark.column
                msg += f": near {error_mark.name}, {str(line)}, {str(column)}"
            else:
                msg += f": {stream.name}"
            msg += f": {e.problem}"
            raise SpackYAMLError(msg, e) from e

        except Exception as e:
            msg = "cannot load Spack YAML configuration"
            raise SpackYAMLError(msg, e) from e

    def dump(self, data, stream: Optional[IO] = None, *, transform=None) -> None:
        """Dumps the YAML data to a stream.

        Args:
            data: data to be dumped
            stream: stream to dump the data into.

        Raises:
            SpackYAMLError: if anything goes wrong while dumping
        """
        try:
            return self.yaml.dump(data, stream=stream, transform=transform)
        except Exception as e:
            msg = "cannot dump Spack YAML configuration"
            raise SpackYAMLError(msg, str(e)) from e

    def as_string(self, data) -> str:
        """Returns a string representing the YAML data passed as input."""
        result = io.StringIO()
        self.dump(data, stream=result)
        return result.getvalue()


def deepcopy(data):
    """Returns a deepcopy of the input YAML data."""
    result = copy.deepcopy(data)

    if isinstance(result, comments.CommentedMap):
        # HACK to fully copy ruamel CommentedMap that doesn't provide copy
        # method. Especially necessary for environments
        extracted_comments = extract_comments(data)
        if extracted_comments:
            set_comments(result, data_comments=extracted_comments)

    return result


def load_config(str_or_file):
    """Load but modify the loader instance so that it will add __line__
    attributes to the returned object."""
    handler = ConfigYAML(yaml_type=YAMLType.SPACK_CONFIG_FILE)
    return handler.load(str_or_file)


def load(*args, **kwargs):
    handler = ConfigYAML(yaml_type=YAMLType.GENERIC_YAML)
    return handler.load(*args, **kwargs)


@return_string_when_no_stream
def dump_config(data, stream, *, default_flow_style=False, blame=False):
    if blame:
        handler = ConfigYAML(yaml_type=YAMLType.ANNOTATED_SPACK_CONFIG_FILE)
        handler.yaml.default_flow_style = default_flow_style
        return _dump_annotated(handler, data, stream)

    handler = ConfigYAML(yaml_type=YAMLType.SPACK_CONFIG_FILE)
    handler.yaml.default_flow_style = default_flow_style
    return handler.dump(data, stream)


def _dump_annotated(handler, data, stream=None):
    sio = io.StringIO()
    handler.dump(data, sio)

    # write_line_break() is not called by YAML for empty lines, so we
    # skip empty lines here with \n+.
    lines = re.split(r"\n+", sio.getvalue().rstrip())

    getvalue = None
    if stream is None:
        stream = io.StringIO()
        getvalue = stream.getvalue

    # write out annotations and lines, accounting for color
    width = max(clen(a) for a in _ANNOTATIONS)
    formats = ["%%-%ds  %%s\n" % (width + cextra(a)) for a in _ANNOTATIONS]

    for f, a, l in zip(formats, _ANNOTATIONS, lines):
        stream.write(f % (a, l))

    if getvalue:
        return getvalue()


def sorted_dict(dict_like):
    """Return an ordered dict with all the fields sorted recursively.

    Args:
        dict_like (dict): dictionary to be sorted

    Returns:
        dictionary sorted recursively
    """
    result = syaml_dict(sorted(dict_like.items()))
    for key, value in result.items():
        if isinstance(value, collections.abc.Mapping):
            result[key] = sorted_dict(value)
    return result


def extract_comments(data):
    """Extract and returns comments from some YAML data"""
    return getattr(data, comments.Comment.attrib, None)


def set_comments(data, *, data_comments):
    """Set comments on some YAML data"""
    return setattr(data, comments.Comment.attrib, data_comments)


def name_mark(name):
    """Returns a mark with just a name"""
    return error.StringMark(name, None, None, None, None, None)


class SpackYAMLError(spack.error.SpackError):
    """Raised when there are issues with YAML parsing."""

    def __init__(self, msg, yaml_error):
        super().__init__(msg, str(yaml_error))