From 3b8b13809e4f9e8d7456c4d2e27d7ff069c8e44e Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Tue, 1 Jan 2019 09:11:49 +0100 Subject: Improve validation of modules.yaml (#9878) This PR improves the validation of `modules.yaml` by introducing a custom validator that checks if an attribute listed in `properties` or `patternProperties` is a valid spec. This new check applied to the test case in #9857 gives: ```console $ spack install szip ==> Error: /home/mculpo/.spack/linux/modules.yaml:5: "^python@2.7@" is an invalid spec [Invalid version specifier] ``` Details: * Moved the set-up of a custom validator class to spack.schema * In Spack we use `jsonschema` to validate configuration files against a schema. We also need custom validators to enforce writing default values within "properties" or "patternProperties" attributes. * Currently, validators were customized at the place of use and with the recent introduction of environments that meant we were setting-up and using 2 different validator classes in two different modules. * This commit moves the set-up of a custom validator class in the `spack.schema` module and refactors the code in `spack.config` and `spack.environments` to use it. * Added a custom validator to check if an attribute is a valid spec * Added a custom validator that can be used on objects, which yields an error if the attribute is not a valid spec. * Updated the schema for modules.yaml * Updated modules.yaml to fix a few inconsistencies: - a few attributes were not tested properly using 'anyOf' - suffixes has been updated to also check that the attribute is a spec - hierarchical_scheme has been updated to hierarchy * Removed $ref from every schema * $ref is not composable or particularly legible * Use python dicts and regular old variables instead. --- lib/spack/llnl/util/lang.py | 3 + lib/spack/spack/config.py | 52 +-------- lib/spack/spack/environment.py | 10 +- lib/spack/spack/schema/__init__.py | 78 ++++++++++++++ lib/spack/spack/schema/env.py | 2 - lib/spack/spack/schema/merged.py | 3 +- lib/spack/spack/schema/modules.py | 213 ++++++++++++++++++------------------- lib/spack/spack/test/schema.py | 77 ++++++++++++++ 8 files changed, 269 insertions(+), 169 deletions(-) create mode 100644 lib/spack/spack/test/schema.py (limited to 'lib') diff --git a/lib/spack/llnl/util/lang.py b/lib/spack/llnl/util/lang.py index 0a86896f5b..b335d3d7d3 100644 --- a/lib/spack/llnl/util/lang.py +++ b/lib/spack/llnl/util/lang.py @@ -562,6 +562,9 @@ class Singleton(object): def __contains__(self, element): return element in self.instance + def __call__(self, *args, **kwargs): + return self.instance(*args, **kwargs) + def __iter__(self): return iter(self.instance) diff --git a/lib/spack/spack/config.py b/lib/spack/spack/config.py index 2ecd669cfe..dc98d8f0fc 100644 --- a/lib/spack/spack/config.py +++ b/lib/spack/spack/config.py @@ -32,7 +32,6 @@ schemas are in submodules of :py:mod:`spack.schema`. import copy import os -import re import sys import multiprocessing from contextlib import contextmanager @@ -49,6 +48,7 @@ from llnl.util.filesystem import mkdirp import spack.paths import spack.architecture +import spack.schema import spack.schema.compilers import spack.schema.mirrors import spack.schema.repos @@ -115,47 +115,6 @@ def first_existing(dictionary, keys): raise KeyError("None of %s is in dict!" % keys) -def _extend_with_default(validator_class): - """Add support for the 'default' attr for properties and patternProperties. - - jsonschema does not handle this out of the box -- it only - validates. This allows us to set default values for configs - where certain fields are `None` b/c they're deleted or - commented out. - - """ - import jsonschema - validate_properties = validator_class.VALIDATORS["properties"] - validate_pattern_properties = validator_class.VALIDATORS[ - "patternProperties"] - - def set_defaults(validator, properties, instance, schema): - for property, subschema in iteritems(properties): - if "default" in subschema: - instance.setdefault( - property, copy.deepcopy(subschema["default"])) - for err in validate_properties( - validator, properties, instance, schema): - yield err - - def set_pp_defaults(validator, properties, instance, schema): - for property, subschema in iteritems(properties): - if "default" in subschema: - if isinstance(instance, dict): - for key, val in iteritems(instance): - if re.match(property, key) and val is None: - instance[key] = copy.deepcopy(subschema["default"]) - - for err in validate_pattern_properties( - validator, properties, instance, schema): - yield err - - return jsonschema.validators.extend(validator_class, { - "properties": set_defaults, - "patternProperties": set_pp_defaults - }) - - class ConfigScope(object): """This class represents a configuration scope. @@ -697,17 +656,10 @@ def _validate(data, schema, set_defaults=True): This leverages the line information (start_mark, end_mark) stored on Spack YAML structures. - """ import jsonschema - - if not hasattr(_validate, 'validator'): - default_setting_validator = _extend_with_default( - jsonschema.Draft4Validator) - _validate.validator = default_setting_validator - try: - _validate.validator(schema).validate(data) + spack.schema.Validator(schema).validate(data) except jsonschema.ValidationError as e: raise ConfigFormatError(e, data) diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index ad95949d90..361e9fcd82 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -8,7 +8,6 @@ import re import sys import shutil -import jsonschema import ruamel.yaml import llnl.util.filesystem as fs @@ -67,9 +66,6 @@ lockfile_format_version = 1 #: legal first keys in the spack.yaml manifest file env_schema_keys = ('spack', 'env') -#: jsonschema validator for environments -_validator = None - def valid_env_name(name): return re.match(valid_environment_name_re, name) @@ -306,11 +302,9 @@ def all_environments(): def validate(data, filename=None): - global _validator - if _validator is None: - _validator = jsonschema.Draft4Validator(spack.schema.env.schema) + import jsonschema try: - _validator.validate(data) + spack.schema.Validator(spack.schema.env.schema).validate(data) except jsonschema.ValidationError as e: raise spack.config.ConfigFormatError( e, data, filename, e.instance.lc.line + 1) diff --git a/lib/spack/spack/schema/__init__.py b/lib/spack/spack/schema/__init__.py index b7a90827f2..a770c536dc 100644 --- a/lib/spack/spack/schema/__init__.py +++ b/lib/spack/spack/schema/__init__.py @@ -4,3 +4,81 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) """This module contains jsonschema files for all of Spack's YAML formats.""" + +import copy +import re + +import six + +import llnl.util.lang +import spack.spec + + +# jsonschema is imported lazily as it is heavy to import +# and increases the start-up time +def _make_validator(): + import jsonschema + _validate_properties = jsonschema.Draft4Validator.VALIDATORS["properties"] + _validate_pattern_properties = jsonschema.Draft4Validator.VALIDATORS[ + "patternProperties" + ] + + def _set_defaults(validator, properties, instance, schema): + """Adds support for the 'default' attribute in 'properties'. + + ``jsonschema`` does not handle this out of the box -- it only + validates. This allows us to set default values for configs + where certain fields are `None` b/c they're deleted or + commented out. + """ + for property, subschema in six.iteritems(properties): + if "default" in subschema: + instance.setdefault( + property, copy.deepcopy(subschema["default"])) + for err in _validate_properties( + validator, properties, instance, schema): + yield err + + def _set_pp_defaults(validator, properties, instance, schema): + """Adds support for the 'default' attribute in 'patternProperties'. + + ``jsonschema`` does not handle this out of the box -- it only + validates. This allows us to set default values for configs + where certain fields are `None` b/c they're deleted or + commented out. + """ + for property, subschema in six.iteritems(properties): + if "default" in subschema: + if isinstance(instance, dict): + for key, val in six.iteritems(instance): + if re.match(property, key) and val is None: + instance[key] = copy.deepcopy(subschema["default"]) + + for err in _validate_pattern_properties( + validator, properties, instance, schema): + yield err + + def _validate_spec(validator, is_spec, instance, schema): + """Check if the attributes on instance are valid specs.""" + import jsonschema + if not validator.is_type(instance, "object"): + return + + for spec_str in instance: + try: + spack.spec.parse(spec_str) + except spack.spec.SpecParseError as e: + yield jsonschema.ValidationError( + '"{0}" is an invalid spec [{1}]'.format(spec_str, str(e)) + ) + + return jsonschema.validators.extend( + jsonschema.Draft4Validator, { + "validate_spec": _validate_spec, + "properties": _set_defaults, + "patternProperties": _set_pp_defaults + } + ) + + +Validator = llnl.util.lang.Singleton(_make_validator) diff --git a/lib/spack/spack/schema/env.py b/lib/spack/spack/schema/env.py index 7b6ad602f7..e2c01b4c89 100644 --- a/lib/spack/spack/schema/env.py +++ b/lib/spack/spack/schema/env.py @@ -11,13 +11,11 @@ from llnl.util.lang import union_dicts import spack.schema.merged -import spack.schema.modules schema = { '$schema': 'http://json-schema.org/schema#', 'title': 'Spack environment file schema', - 'definitions': spack.schema.modules.definitions, 'type': 'object', 'additionalProperties': False, 'patternProperties': { diff --git a/lib/spack/spack/schema/merged.py b/lib/spack/spack/schema/merged.py index 4505728076..89a908720a 100644 --- a/lib/spack/spack/schema/merged.py +++ b/lib/spack/spack/schema/merged.py @@ -6,7 +6,7 @@ """Schema for configuration merged into one file. .. literalinclude:: ../spack/schema/merged.py - :lines: 40- + :lines: 39- """ from llnl.util.lang import union_dicts @@ -33,7 +33,6 @@ properties = union_dicts( schema = { '$schema': 'http://json-schema.org/schema#', 'title': 'Spack merged configuration file schema', - 'definitions': spack.schema.modules.definitions, 'type': 'object', 'additionalProperties': False, 'properties': properties, diff --git a/lib/spack/spack/schema/modules.py b/lib/spack/spack/schema/modules.py index 0a54856105..873c89f253 100644 --- a/lib/spack/spack/schema/modules.py +++ b/lib/spack/spack/schema/modules.py @@ -9,110 +9,110 @@ :lines: 13- """ +#: Matches a spec or a multi-valued variant but not another +#: valid keyword. +#: +#: THIS NEEDS TO BE UPDATED FOR EVERY NEW KEYWORD THAT +#: IS ADDED IMMEDIATELY BELOW THE MODULE TYPE ATTRIBUTE +spec_regex = r'(?!hierarchy|verbose|hash_length|whitelist|' \ + r'blacklist|naming_scheme|core_compilers|all)(^\w[\w-]*)' + +#: Matches an anonymous spec, i.e. a spec without a root name +anonymous_spec_regex = r'^[\^@%+~]' #: Definitions for parts of module schema -definitions = { - 'array_of_strings': { - 'type': 'array', - 'default': [], - 'items': { - 'type': 'string' - } - }, - 'dictionary_of_strings': { - 'type': 'object', - 'patternProperties': { - r'\w[\w-]*': { # key - 'type': 'string' - } - } - }, - 'dependency_selection': { - 'type': 'string', - 'enum': ['none', 'direct', 'all'] - }, - 'module_file_configuration': { - 'type': 'object', - 'default': {}, - 'additionalProperties': False, - 'properties': { - 'filter': { - 'type': 'object', - 'default': {}, - 'additionalProperties': False, - 'properties': { - 'environment_blacklist': { - 'type': 'array', - 'default': [], - 'items': { - 'type': 'string' - } +array_of_strings = { + 'type': 'array', 'default': [], 'items': {'type': 'string'} +} + +dictionary_of_strings = { + 'type': 'object', 'patternProperties': {r'\w[\w-]*': {'type': 'string'}} +} + +dependency_selection = {'type': 'string', 'enum': ['none', 'direct', 'all']} + +module_file_configuration = { + 'type': 'object', + 'default': {}, + 'additionalProperties': False, + 'properties': { + 'filter': { + 'type': 'object', + 'default': {}, + 'additionalProperties': False, + 'properties': { + 'environment_blacklist': { + 'type': 'array', + 'default': [], + 'items': { + 'type': 'string' } } - }, - 'template': { - 'type': 'string' - }, - 'autoload': { - '$ref': '#/definitions/dependency_selection'}, - 'prerequisites': { - '$ref': '#/definitions/dependency_selection'}, - 'conflict': { - '$ref': '#/definitions/array_of_strings'}, - 'load': { - '$ref': '#/definitions/array_of_strings'}, - 'suffixes': { - '$ref': '#/definitions/dictionary_of_strings'}, - 'environment': { - 'type': 'object', - 'default': {}, - 'additionalProperties': False, - 'properties': { - 'set': { - '$ref': '#/definitions/dictionary_of_strings'}, - 'unset': { - '$ref': '#/definitions/array_of_strings'}, - 'prepend_path': { - '$ref': '#/definitions/dictionary_of_strings'}, - 'append_path': { - '$ref': '#/definitions/dictionary_of_strings'} + } + }, + 'template': { + 'type': 'string' + }, + 'autoload': dependency_selection, + 'prerequisites': dependency_selection, + 'conflict': array_of_strings, + 'load': array_of_strings, + 'suffixes': { + 'type': 'object', + 'validate_spec': True, + 'patternProperties': { + r'\w[\w-]*': { # key + 'type': 'string' } } + }, + 'environment': { + 'type': 'object', + 'default': {}, + 'additionalProperties': False, + 'properties': { + 'set': dictionary_of_strings, + 'unset': array_of_strings, + 'prepend_path': dictionary_of_strings, + 'append_path': dictionary_of_strings + } } - }, - 'module_type_configuration': { - 'type': 'object', - 'default': {}, - 'anyOf': [ - {'properties': { - 'verbose': { - 'type': 'boolean', - 'default': False - }, - 'hash_length': { - 'type': 'integer', - 'minimum': 0, - 'default': 7 - }, - 'whitelist': { - '$ref': '#/definitions/array_of_strings'}, - 'blacklist': { - '$ref': '#/definitions/array_of_strings'}, - 'blacklist_implicits': { - 'type': 'boolean', - 'default': False - }, - 'naming_scheme': { - 'type': 'string' # Can we be more specific here? - } - }}, - {'patternProperties': { - r'\w[\w-]*': { - '$ref': '#/definitions/module_file_configuration' - } - }} - ] } +}, + +module_type_configuration = { + 'type': 'object', + 'default': {}, + 'allOf': [ + {'properties': { + 'verbose': { + 'type': 'boolean', + 'default': False + }, + 'hash_length': { + 'type': 'integer', + 'minimum': 0, + 'default': 7 + }, + 'whitelist': array_of_strings, + 'blacklist': array_of_strings, + 'blacklist_implicits': { + 'type': 'boolean', + 'default': False + }, + 'naming_scheme': { + 'type': 'string' # Can we be more specific here? + }, + 'all': module_file_configuration, + } + }, + {'validate_spec': True, + 'patternProperties': { + spec_regex: module_file_configuration, + anonymous_spec_regex: module_file_configuration, + } + } + ] } @@ -127,8 +127,9 @@ properties = { 'type': 'object', 'patternProperties': { # prefix-relative path to be inspected for existence - r'\w[\w-]*': { - '$ref': '#/definitions/array_of_strings'}}}, + r'\w[\w-]*': array_of_strings + } + }, 'enable': { 'type': 'array', 'default': [], @@ -138,28 +139,27 @@ properties = { 'lmod': { 'allOf': [ # Base configuration - {'$ref': '#/definitions/module_type_configuration'}, + module_type_configuration, { - 'core_compilers': { - '$ref': '#/definitions/array_of_strings' + 'type': 'object', + 'properties': { + 'core_compilers': array_of_strings, + 'hierarchy': array_of_strings }, - 'hierarchical_scheme': { - '$ref': '#/definitions/array_of_strings' - } } # Specific lmod extensions ] }, 'tcl': { 'allOf': [ # Base configuration - {'$ref': '#/definitions/module_type_configuration'}, + module_type_configuration, {} # Specific tcl extensions ] }, 'dotkit': { 'allOf': [ # Base configuration - {'$ref': '#/definitions/module_type_configuration'}, + module_type_configuration, {} # Specific dotkit extensions ] }, @@ -172,7 +172,6 @@ properties = { schema = { '$schema': 'http://json-schema.org/schema#', 'title': 'Spack module file configuration file schema', - 'definitions': definitions, 'type': 'object', 'additionalProperties': False, 'properties': properties, diff --git a/lib/spack/spack/test/schema.py b/lib/spack/spack/test/schema.py new file mode 100644 index 0000000000..b638647641 --- /dev/null +++ b/lib/spack/spack/test/schema.py @@ -0,0 +1,77 @@ +# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import jsonschema +import pytest + + +import spack.schema + + +@pytest.fixture() +def validate_spec_schema(): + return { + 'type': 'object', + 'validate_spec': True, + 'patternProperties': { + r'\w[\w-]*': { + 'type': 'string' + } + } + } + + +@pytest.fixture() +def module_suffixes_schema(): + return { + 'type': 'object', + 'properties': { + 'tcl': { + 'type': 'object', + 'patternProperties': { + r'\w[\w-]*': { + 'type': 'object', + 'properties': { + 'suffixes': { + 'validate_spec': True, + 'patternProperties': { + r'\w[\w-]*': { + 'type': 'string', + } + } + } + } + } + } + } + } + } + + +@pytest.mark.regression('9857') +def test_validate_spec(validate_spec_schema): + v = spack.schema.Validator(validate_spec_schema) + data = {'foo@3.7': 'bar'} + + # Validate good data (the key is a spec) + v.validate(data) + + # Check that invalid data throws + data['^python@3.7@'] = 'baz' + with pytest.raises(jsonschema.ValidationError) as exc_err: + v.validate(data) + + assert 'is an invalid spec' in str(exc_err.value) + + +@pytest.mark.regression('9857') +def test_module_suffixes(module_suffixes_schema): + v = spack.schema.Validator(module_suffixes_schema) + data = {'tcl': {'all': {'suffixes': {'^python@2.7@': 'py2.7'}}}} + + with pytest.raises(jsonschema.ValidationError) as exc_err: + v.validate(data) + + assert 'is an invalid spec' in str(exc_err.value) -- cgit v1.2.3-60-g2f50