From 1a677e30f3ac42b68dc8f2f6342ffd2eb3d97359 Mon Sep 17 00:00:00 2001
From: Todd Gamblin <tgamblin@llnl.gov>
Date: Wed, 30 May 2018 10:25:22 -0700
Subject: config: add `spack config blame` command (#8081)

- `spack config blame` is similar to `spack config get`, but it prints
  out the config file and line number that each line of the merged
  configuration came from.

- This is a debugging tool for understanding where Spack config settings
  come from.

- add tests for config blame
---
 lib/spack/spack/cmd/config.py           |  13 +++
 lib/spack/spack/config.py               |  50 +++++++++-
 lib/spack/spack/test/config.py          |  40 ++++++++
 lib/spack/spack/test/util/spack_yaml.py |  86 +++++++++++++++++
 lib/spack/spack/util/spack_yaml.py      | 160 +++++++++++++++++++++++++++++++-
 5 files changed, 339 insertions(+), 10 deletions(-)
 create mode 100644 lib/spack/spack/test/util/spack_yaml.py

(limited to 'lib')

diff --git a/lib/spack/spack/cmd/config.py b/lib/spack/spack/cmd/config.py
index 4d665c5bf6..10ab4c667b 100644
--- a/lib/spack/spack/cmd/config.py
+++ b/lib/spack/spack/cmd/config.py
@@ -48,6 +48,14 @@ def setup_parser(subparser):
                             metavar='SECTION',
                             choices=spack.config.section_schemas)
 
+    blame_parser = sp.add_parser(
+        'blame', help='print configuration annotated with source file:line')
+    blame_parser.add_argument('section',
+                              help="configuration section to print. "
+                              "options: %(choices)s",
+                              metavar='SECTION',
+                              choices=spack.config.section_schemas)
+
     edit_parser = sp.add_parser('edit', help='edit configuration file')
     edit_parser.add_argument('section',
                              help="configuration section to edit. "
@@ -60,6 +68,10 @@ def config_get(args):
     spack.config.config.print_section(args.section)
 
 
+def config_blame(args):
+    spack.config.config.print_section(args.section, blame=True)
+
+
 def config_edit(args):
     if not args.scope:
         if args.section == 'compilers':
@@ -76,5 +88,6 @@ def config_edit(args):
 
 def config(parser, args):
     action = {'get': config_get,
+              'blame': config_blame,
               'edit': config_edit}
     action[args.config_command](args)
diff --git a/lib/spack/spack/config.py b/lib/spack/spack/config.py
index 5157e918aa..73738c9ed3 100644
--- a/lib/spack/spack/config.py
+++ b/lib/spack/spack/config.py
@@ -219,11 +219,13 @@ class InternalConfigScope(ConfigScope):
     def __init__(self, name, data=None):
         self.name = name
         self.sections = syaml.syaml_dict()
+
         if data:
             for section in data:
                 dsec = data[section]
                 _validate_section({section: dsec}, section_schemas[section])
-                self.sections[section] = syaml.syaml_dict({section: dsec})
+                self.sections[section] = _mark_internal(
+                    syaml.syaml_dict({section: dsec}), name)
 
     def get_section_filename(self, section):
         raise NotImplementedError(
@@ -240,6 +242,7 @@ class InternalConfigScope(ConfigScope):
         data = self.get_section(section)
         if data is not None:
             _validate_section(data, section_schemas[section])
+        self.sections[section] = _mark_internal(data, self.name)
 
     def __repr__(self):
         return '<InternalConfigScope: %s>' % self.name
@@ -361,13 +364,13 @@ class Configuration(object):
 
         """
         _validate_section_name(section)
-        merged_section = syaml.syaml_dict()
 
         if scope is None:
             scopes = self.scopes.values()
         else:
             scopes = [self._validate_scope(scope)]
 
+        merged_section = syaml.syaml_dict()
         for scope in scopes:
             # read potentially cached data from the scope.
 
@@ -414,6 +417,7 @@ class Configuration(object):
         while parts:
             key = parts.pop(0)
             value = value.get(key, default)
+
         return value
 
     def set(self, path, value, scope=None):
@@ -442,12 +446,13 @@ class Configuration(object):
         for scope in self.scopes.values():
             yield scope
 
-    def print_section(self, section):
+    def print_section(self, section, blame=False):
         """Print a configuration to stdout."""
         try:
             data = syaml.syaml_dict()
             data[section] = self.get_config(section)
-            syaml.dump(data, stream=sys.stdout, default_flow_style=False)
+            syaml.dump(
+                data, stream=sys.stdout, default_flow_style=False, blame=blame)
         except (yaml.YAMLError, IOError):
             raise ConfigError("Error reading configuration: %s" % section)
 
@@ -593,7 +598,7 @@ def _override(string):
 
 def _mark_overrides(data):
     if isinstance(data, list):
-        return [_mark_overrides(elt) for elt in data]
+        return syaml.syaml_list(_mark_overrides(elt) for elt in data)
 
     elif isinstance(data, dict):
         marked = syaml.syaml_dict()
@@ -608,6 +613,26 @@ def _mark_overrides(data):
         return data
 
 
+def _mark_internal(data, name):
+    """Add a simple name mark to raw YAML/JSON data.
+
+    This is used by `spack config blame` to show where config lines came from.
+    """
+    if isinstance(data, dict):
+        d = syaml.syaml_dict((_mark_internal(k, name), _mark_internal(v, name))
+                             for k, v in data.items())
+    elif isinstance(data, list):
+        d = syaml.syaml_list(_mark_internal(e, name) for e in data)
+    else:
+        d = syaml.syaml_type(data)
+
+    if syaml.markable(d):
+        d._start_mark = yaml.Mark(name, None, None, None, None, None)
+        d._end_mark = yaml.Mark(name, None, None, None, None, None)
+
+    return d
+
+
 def _merge_yaml(dest, source):
     """Merges source into dest; entries in source take precedence over dest.
 
@@ -639,6 +664,9 @@ def _merge_yaml(dest, source):
 
     # Source dict is merged into dest.
     elif they_are(dict):
+        # track keys for marking
+        key_marks = {}
+
         for sk, sv in iteritems(source):
             if _override(sk) or sk not in dest:
                 # if sk ended with ::, or if it's new, completely override
@@ -646,6 +674,18 @@ def _merge_yaml(dest, source):
             else:
                 # otherwise, merge the YAML
                 dest[sk] = _merge_yaml(dest[sk], source[sk])
+
+            # this seems unintuitive, but see below. We need this because
+            # Python dicts do not overwrite keys on insert, and we want
+            # to copy mark information on source keys to dest.
+            key_marks[sk] = sk
+
+        # ensure that keys are marked in the destination.  the key_marks dict
+        # ensures we can get the actual source key objects from dest keys
+        for dk in dest.keys():
+            if dk in key_marks:
+                syaml.mark(dk, key_marks[dk])
+
         return dest
 
     # In any other case, overwrite with a copy of the source value.
diff --git a/lib/spack/spack/test/config.py b/lib/spack/spack/test/config.py
index 0d2359c1ab..7b1e7d0ee1 100644
--- a/lib/spack/spack/test/config.py
+++ b/lib/spack/spack/test/config.py
@@ -438,6 +438,46 @@ def test_internal_config_filename(config, write_config_file):
         config.get_config_filename('command_line', 'config')
 
 
+def test_mark_internal():
+    data = {
+        'config': {
+            'bool': False,
+            'int': 6,
+            'numbers': [1, 2, 3],
+            'string': 'foo',
+            'dict': {
+                'more_numbers': [1, 2, 3],
+                'another_string': 'foo',
+                'another_int': 7,
+            }
+        }
+    }
+
+    marked = spack.config._mark_internal(data, 'x')
+
+    # marked version should be equal to the original
+    assert data == marked
+
+    def assert_marked(obj):
+        if type(obj) is bool:
+            return  # can't subclass bool, so can't mark it
+
+        assert hasattr(obj, '_start_mark') and obj._start_mark.name == 'x'
+        assert hasattr(obj, '_end_mark') and obj._end_mark.name == 'x'
+
+    # everything in the marked version should have marks
+    checks = (marked.keys(), marked.values(),
+              marked['config'].keys(), marked['config'].values(),
+              marked['config']['numbers'],
+              marked['config']['dict'].keys(),
+              marked['config']['dict'].values(),
+              marked['config']['dict']['more_numbers'])
+
+    for seq in checks:
+        for obj in seq:
+            assert_marked(obj)
+
+
 def test_internal_config_from_data():
     config = spack.config.Configuration()
 
diff --git a/lib/spack/spack/test/util/spack_yaml.py b/lib/spack/spack/test/util/spack_yaml.py
new file mode 100644
index 0000000000..7790b74c1c
--- /dev/null
+++ b/lib/spack/spack/test/util/spack_yaml.py
@@ -0,0 +1,86 @@
+##############################################################################
+# Copyright (c) 2013-2018, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory.
+#
+# This file is part of Spack.
+# Created by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
+# LLNL-CODE-647188
+#
+# For details, see https://github.com/spack/spack
+# Please also see the NOTICE and LICENSE files for our notice and the LGPL.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License (as
+# published by the Free Software Foundation) version 2.1, February 1999.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
+# conditions of the GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+##############################################################################
+import re
+
+import spack.config
+from spack.main import SpackCommand
+
+config_cmd = SpackCommand('config')
+
+
+def get_config_line(pattern, lines):
+    """Get a configuration line that matches a particular pattern."""
+    line = next((l for l in lines if re.search(pattern, l)), None)
+    assert line is not None, 'no such line!'
+    return line
+
+
+def check_blame(element, file_name, line=None):
+    """Check that `config blame config` gets right file/line for an element.
+
+    This runs `spack config blame config` and scrapes the output for a
+    particular YAML key. It thne checks that the requested file/line info
+    is also on that line.
+
+    Line is optional; if it is ``None`` we just check for the
+    ``file_name``, which may just be a name for a special config scope
+    like ``_builtin`` or ``command_line``.
+    """
+    output = config_cmd('blame', 'config')
+
+    blame_lines = output.rstrip().split('\n')
+    element_line = get_config_line(element + ':', blame_lines)
+
+    annotation = file_name
+    if line is not None:
+        annotation += ':%d' % line
+
+    assert file_name in element_line
+
+
+def test_config_blame(config):
+    """check blame info for elements in mock configuration."""
+    config_file = config.get_config_filename('site', 'config')
+
+    check_blame('install_tree', config_file, 2)
+    check_blame('source_cache', config_file, 11)
+    check_blame('misc_cache', config_file, 12)
+    check_blame('verify_ssl', config_file, 13)
+    check_blame('checksum', config_file, 14)
+    check_blame('dirty', config_file, 15)
+
+
+def test_config_blame_with_override(config):
+    """check blame for an element from an override scope"""
+    config_file = config.get_config_filename('site', 'config')
+
+    with spack.config.override('config:install_tree', 'foobar'):
+        check_blame('install_tree', 'overrides')
+
+        check_blame('source_cache', config_file, 11)
+        check_blame('misc_cache', config_file, 12)
+        check_blame('verify_ssl', config_file, 13)
+        check_blame('checksum', config_file, 14)
+        check_blame('dirty', config_file, 15)
diff --git a/lib/spack/spack/util/spack_yaml.py b/lib/spack/spack/util/spack_yaml.py
index 152b57de19..f7adadb980 100644
--- a/lib/spack/spack/util/spack_yaml.py
+++ b/lib/spack/spack/util/spack_yaml.py
@@ -31,13 +31,17 @@
   default unorderd dict.
 
 """
+from six import string_types, StringIO
+
 import yaml
 from yaml import Loader, Dumper
 from yaml.nodes import MappingNode, SequenceNode, ScalarNode
 from yaml.constructor import ConstructorError
-from spack.util.ordereddict import OrderedDict
+
+from llnl.util.tty.color import colorize, clen, cextra
 
 import spack.error
+from spack.util.ordereddict import OrderedDict
 
 # Only export load and dump
 __all__ = ['load', 'dump', 'SpackYAMLError']
@@ -60,10 +64,56 @@ class syaml_str(str):
     __repr__ = str.__repr__
 
 
+class syaml_int(int):
+    __repr__ = str.__repr__
+
+
+#: mapping from syaml type -> primitive type
+syaml_types = {
+    syaml_str: string_types,
+    syaml_int: int,
+    syaml_dict: dict,
+    syaml_list: list,
+}
+
+
+def syaml_type(obj):
+    """Get the corresponding syaml wrapper type for a primitive type.
+
+    Return:
+        (object): syaml-typed copy of object, or the obj if no wrapper
+    """
+    for syaml_t, t in syaml_types.items():
+        if type(obj) is not bool and isinstance(obj, t):
+            return syaml_t(obj) if type(obj) != syaml_t else obj
+    return obj
+
+
+def markable(obj):
+    """Whether an object can be marked."""
+    return type(obj) in syaml_types
+
+
 def mark(obj, node):
     """Add start and end markers to an object."""
-    obj._start_mark = node.start_mark
-    obj._end_mark = node.end_mark
+    if not markable(obj):
+        return
+
+    if hasattr(node, 'start_mark'):
+        obj._start_mark = node.start_mark
+    elif hasattr(node, '_start_mark'):
+        obj._start_mark = node._start_mark
+
+    if hasattr(node, 'end_mark'):
+        obj._end_mark = node.end_mark
+    elif hasattr(node, '_end_mark'):
+        obj._end_mark = node._end_mark
+
+
+def marked(obj):
+    """Whether an object has been marked by spack_yaml."""
+    return (hasattr(obj, '_start_mark') and obj._start_mark or
+            hasattr(obj, '_end_mark') and obj._end_mark)
 
 
 class OrderedLineLoader(Loader):
@@ -193,6 +243,7 @@ class OrderedLineDumper(Dumper):
                 node.flow_style = self.default_flow_style
             else:
                 node.flow_style = best_style
+
         return node
 
     def ignore_aliases(self, _data):
@@ -204,6 +255,77 @@ class OrderedLineDumper(Dumper):
 OrderedLineDumper.add_representer(syaml_dict, OrderedLineDumper.represent_dict)
 OrderedLineDumper.add_representer(syaml_list, OrderedLineDumper.represent_list)
 OrderedLineDumper.add_representer(syaml_str, OrderedLineDumper.represent_str)
+OrderedLineDumper.add_representer(syaml_int, OrderedLineDumper.represent_int)
+
+
+def file_line(mark):
+    """Format a mark as <file>:<line> information."""
+    result = mark.name
+    if mark.line:
+        result += ':' + str(mark.line)
+    return result
+
+
+#: Global for interactions between LineAnnotationDumper and dump_annotated().
+#: This is nasty but YAML doesn't give us many ways to pass arguments --
+#: yaml.dump() takes a class (not an instance) and instantiates the dumper
+#: itself, so we can't just pass an instance
+_annotations = []
+
+
+class LineAnnotationDumper(OrderedLineDumper):
+    """Dumper that generates per-line annotations.
+
+    Annotations are stored in the ``_annotations`` global.  After one
+    dump pass, the strings in ``_annotations`` will correspond one-to-one
+    with the lines output by the dumper.
+
+    LineAnnotationDumper records blame information after each line is
+    generated. As each line is parsed, it saves file/line info for each
+    object printed. At the end of each line, it creates an annotation
+    based on the saved mark and stores it in ``_annotations``.
+
+    For an example of how to use this, see ``dump_annotated()``, which
+    writes to a ``StringIO`` then joins the lines from that with
+    annotations.
+    """
+    saved = None
+
+    def __init__(self, *args, **kwargs):
+        super(LineAnnotationDumper, self).__init__(*args, **kwargs)
+        del _annotations[:]
+
+    def process_scalar(self):
+        super(LineAnnotationDumper, self).process_scalar()
+        if marked(self.event.value):
+            self.saved = self.event.value
+
+    def represent_data(self, data):
+        """Force syaml_str to be passed through with marks."""
+        result = super(LineAnnotationDumper, self).represent_data(data)
+        if isinstance(result.value, string_types):
+            result.value = syaml_str(data)
+        mark(result.value, data)
+        return result
+
+    def write_stream_start(self):
+        super(LineAnnotationDumper, self).write_stream_start()
+        _annotations.append(colorize('@K{---}'))
+
+    def write_line_break(self):
+        super(LineAnnotationDumper, self).write_line_break()
+        if not self.saved:
+            return
+
+        # append annotations at the end of each line
+        if self.saved:
+            mark = self.saved._start_mark
+            ann = '@K{%s}' % mark.name
+            if mark.line is not None:
+                ann += ':@c{%s}' % (mark.line + 1)
+            _annotations.append(colorize(ann))
+        else:
+            _annotations.append('')
 
 
 def load(*args, **kwargs):
@@ -214,8 +336,36 @@ def load(*args, **kwargs):
 
 
 def dump(*args, **kwargs):
-    kwargs['Dumper'] = OrderedLineDumper
-    return yaml.dump(*args, **kwargs)
+    blame = kwargs.pop('blame', False)
+
+    if blame:
+        return dump_annotated(*args, **kwargs)
+    else:
+        kwargs['Dumper'] = OrderedLineDumper
+        return yaml.dump(*args, **kwargs)
+
+
+def dump_annotated(data, stream=None, *args, **kwargs):
+    kwargs['Dumper'] = LineAnnotationDumper
+
+    sio = StringIO()
+    yaml.dump(data, sio, *args, **kwargs)
+    lines = sio.getvalue().rstrip().split('\n')
+
+    getvalue = None
+    if stream is None:
+        stream = StringIO()
+        getvalue = stream.getvalue
+
+    # write out annotations and linees, accounting for color
+    width = max(clen(a) for a in _annotations)
+    formats = ['%%-%ds  %%s\n' % (width + cextra(a)) for a in _annotations]
+
+    for f, a, l in zip(formats, _annotations, lines):
+        stream.write(f % (a, l))
+
+    if getvalue:
+        return getvalue()
 
 
 class SpackYAMLError(spack.error.SpackError):
-- 
cgit v1.2.3-70-g09d2