summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd Gamblin <tgamblin@llnl.gov>2016-12-05 10:03:58 -0800
committerbecker33 <becker33@llnl.gov>2016-12-05 10:03:58 -0800
commit41b8f31bcd299d4ea7dd40138f725f77576278a3 (patch)
treed3f50e9e465860c651ee9856aa3cff53628dc077
parent161d6205b2dcc61296913701f1b088157fcea118 (diff)
downloadspack-41b8f31bcd299d4ea7dd40138f725f77576278a3.tar.gz
spack-41b8f31bcd299d4ea7dd40138f725f77576278a3.tar.bz2
spack-41b8f31bcd299d4ea7dd40138f725f77576278a3.tar.xz
spack-41b8f31bcd299d4ea7dd40138f725f77576278a3.zip
Use JSON for the database instead of YAML. (#2189)
* Use JSON for the database instead of YAML. - JSON is much faster than YAML *and* can preserve ordered keys. - 170x+ faster than Python YAML when using unordered dicts - 55x faster than Python YAML (both using OrderedDicts) - 8x faster than C YAML (with OrderedDicts) - JSON is built into Python, unlike C YAML, so doesn't add a dependency. - Don't need human readability for the package database. - JSON requires no major changes to the code -- same object model as YAML. - add to_json, from_json methods to spec. * Add tests to ensure JSON and YAML don't need to be ordered in DB. * Write index.json first time it's not found instead of requiring reindex. * flake8 bug.
-rwxr-xr-xbin/spack8
-rw-r--r--lib/spack/spack/database.py77
-rw-r--r--lib/spack/spack/spec.py60
-rw-r--r--lib/spack/spack/test/database.py2
-rw-r--r--lib/spack/spack/test/spec_dag.py28
-rw-r--r--lib/spack/spack/test/spec_yaml.py124
-rw-r--r--lib/spack/spack/util/spack_json.py56
-rw-r--r--lib/spack/spack/util/spack_yaml.py17
8 files changed, 282 insertions, 90 deletions
diff --git a/bin/spack b/bin/spack
index 7cc1b28a37..454a9a5b2d 100755
--- a/bin/spack
+++ b/bin/spack
@@ -41,14 +41,6 @@ SPACK_PREFIX = os.path.dirname(os.path.dirname(SPACK_FILE))
SPACK_LIB_PATH = os.path.join(SPACK_PREFIX, "lib", "spack")
sys.path.insert(0, SPACK_LIB_PATH)
-# Try to use system YAML if it is available, as it might have libyaml
-# support (for faster loading via C). Load it before anything in
-# lib/spack/external so it will take precedence over Spack's PyYAML.
-try:
- import yaml
-except ImportError:
- pass # ignore and use slow yaml
-
# Add external libs
SPACK_EXTERNAL_LIBS = os.path.join(SPACK_LIB_PATH, "external")
sys.path.insert(0, SPACK_EXTERNAL_LIBS)
diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py
index e8902ec024..a01795ca0f 100644
--- a/lib/spack/spack/database.py
+++ b/lib/spack/spack/database.py
@@ -55,6 +55,7 @@ from spack.version import Version
import spack.spec
from spack.error import SpackError
import spack.util.spack_yaml as syaml
+import spack.util.spack_json as sjson
# DB goes in this directory underneath the root
@@ -134,10 +135,12 @@ class Database(object):
under ``root/.spack-db``, which is created if it does not
exist. This is the ``db_dir``.
- The Database will attempt to read an ``index.yaml`` file in
- ``db_dir``. If it does not find one, it will be created when
- needed by scanning the entire Database root for ``spec.yaml``
- files according to Spack's ``DirectoryLayout``.
+ The Database will attempt to read an ``index.json`` file in
+ ``db_dir``. If it does not find one, it will fall back to read
+ an ``index.yaml`` if one is present. If that does not exist, it
+ will create a database when needed by scanning the entire
+ Database root for ``spec.yaml`` files according to Spack's
+ ``DirectoryLayout``.
Caller may optionally provide a custom ``db_dir`` parameter
where data will be stored. This is intended to be used for
@@ -154,7 +157,8 @@ class Database(object):
self._db_dir = db_dir
# Set up layout of database files within the db dir
- self._index_path = join_path(self._db_dir, 'index.yaml')
+ self._old_yaml_index_path = join_path(self._db_dir, 'index.yaml')
+ self._index_path = join_path(self._db_dir, 'index.json')
self._lock_path = join_path(self._db_dir, 'lock')
# This is for other classes to use to lock prefix directories.
@@ -179,8 +183,8 @@ class Database(object):
"""Get a read lock context manager for use in a `with` block."""
return ReadTransaction(self.lock, self._read, timeout=timeout)
- def _write_to_yaml(self, stream):
- """Write out the databsae to a YAML file.
+ def _write_to_file(self, stream):
+ """Write out the databsae to a JSON file.
This function does not do any locking or transactions.
"""
@@ -201,12 +205,12 @@ class Database(object):
}
try:
- return syaml.dump(
- database, stream=stream, default_flow_style=False)
+ sjson.dump(database, stream)
except YAMLError as e:
- raise SpackYAMLError("error writing YAML database:", str(e))
+ raise syaml.SpackYAMLError(
+ "error writing YAML database:", str(e))
- def _read_spec_from_yaml(self, hash_key, installs):
+ def _read_spec_from_dict(self, hash_key, installs):
"""Recursively construct a spec from a hash in a YAML database.
Does not do any locking.
@@ -241,24 +245,32 @@ class Database(object):
child = data[dhash].spec
spec._add_dependency(child, dtypes)
- def _read_from_yaml(self, stream):
+ def _read_from_file(self, stream, format='json'):
"""
- Fill database from YAML, do not maintain old data
+ Fill database from file, do not maintain old data
Translate the spec portions from node-dict form to spec form
Does not do any locking.
"""
+ if format.lower() == 'json':
+ load = sjson.load
+ elif format.lower() == 'yaml':
+ load = syaml.load
+ else:
+ raise ValueError("Invalid database format: %s" % format)
+
try:
if isinstance(stream, basestring):
with open(stream, 'r') as f:
- yfile = syaml.load(f)
+ fdata = load(f)
else:
- yfile = syaml.load(stream)
-
+ fdata = load(stream)
except MarkedYAMLError as e:
- raise SpackYAMLError("error parsing YAML database:", str(e))
+ raise syaml.SpackYAMLError("error parsing YAML database:", str(e))
+ except Exception as e:
+ raise CorruptDatabaseError("error parsing database:", str(e))
- if yfile is None:
+ if fdata is None:
return
def check(cond, msg):
@@ -266,10 +278,10 @@ class Database(object):
raise CorruptDatabaseError(
"Spack database is corrupt: %s" % msg, self._index_path)
- check('database' in yfile, "No 'database' attribute in YAML.")
+ check('database' in fdata, "No 'database' attribute in YAML.")
# High-level file checks
- db = yfile['database']
+ db = fdata['database']
check('installs' in db, "No 'installs' in YAML DB.")
check('version' in db, "No 'version' in YAML DB.")
@@ -303,7 +315,7 @@ class Database(object):
for hash_key, rec in installs.items():
try:
# This constructs a spec DAG from the list of all installs
- spec = self._read_spec_from_yaml(hash_key, installs)
+ spec = self._read_spec_from_dict(hash_key, installs)
# Insert the brand new spec in the database. Each
# spec has its own copies of its dependency specs.
@@ -342,7 +354,7 @@ class Database(object):
def _read_suppress_error():
try:
if os.path.isfile(self._index_path):
- self._read_from_yaml(self._index_path)
+ self._read_from_file(self._index_path)
except CorruptDatabaseError as e:
self._error = e
self._data = {}
@@ -426,7 +438,7 @@ class Database(object):
# Write a temporary database file them move it into place
try:
with open(temp_file, 'w') as f:
- self._write_to_yaml(f)
+ self._write_to_file(f)
os.rename(temp_file, self._index_path)
except:
# Clean up temp file if something goes wrong.
@@ -437,11 +449,24 @@ class Database(object):
def _read(self):
"""Re-read Database from the data in the set location.
- This does no locking.
+ This does no locking, with one exception: it will automatically
+ migrate an index.yaml to an index.json if possible. This requires
+ taking a write lock.
+
"""
if os.path.isfile(self._index_path):
- # Read from YAML file if a database exists
- self._read_from_yaml(self._index_path)
+ # Read from JSON file if a JSON database exists
+ self._read_from_file(self._index_path, format='json')
+
+ elif os.path.isfile(self._old_yaml_index_path):
+ if os.access(self._db_dir, os.R_OK | os.W_OK):
+ # if we can write, then read AND write a JSON file.
+ self._read_from_file(self._old_yaml_index_path, format='yaml')
+ with WriteTransaction(self.lock, timeout=_db_lock_timeout):
+ self._write(None, None, None)
+ else:
+ # Read chck for a YAML file if we can't find JSON.
+ self._read_from_file(self._old_yaml_index_path, format='yaml')
else:
# The file doesn't exist, try to traverse the directory.
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index 86eaa3bb91..2ef3757689 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -117,6 +117,7 @@ from spack.build_environment import get_path_from_module, load_module
from spack.util.prefix import Prefix
from spack.util.string import *
import spack.util.spack_yaml as syaml
+import spack.util.spack_json as sjson
from spack.util.spack_yaml import syaml_dict
from spack.util.crypto import prefix_bits
from spack.version import *
@@ -153,7 +154,6 @@ __all__ = [
'UnsatisfiableArchitectureSpecError',
'UnsatisfiableProviderSpecError',
'UnsatisfiableDependencySpecError',
- 'SpackYAMLError',
'AmbiguousHashError']
# Valid pattern for an identifier in Spack
@@ -1174,15 +1174,21 @@ class Spec(object):
return syaml_dict([(self.name, d)])
- def to_yaml(self, stream=None):
+ def to_dict(self):
node_list = []
for s in self.traverse(order='pre', deptype=('link', 'run')):
node = s.to_node_dict()
node[s.name]['hash'] = s.dag_hash()
node_list.append(node)
+
+ return syaml_dict([('spec', node_list)])
+
+ def to_yaml(self, stream=None):
return syaml.dump(
- syaml_dict([('spec', node_list)]),
- stream=stream, default_flow_style=False)
+ self.to_dict(), stream=stream, default_flow_style=False)
+
+ def to_json(self, stream=None):
+ return sjson.dump(self.to_dict(), stream)
@staticmethod
def from_node_dict(node):
@@ -1245,22 +1251,13 @@ class Spec(object):
yield dep_name, dag_hash, list(deptypes)
@staticmethod
- def from_yaml(stream):
+ def from_dict(data):
"""Construct a spec from YAML.
Parameters:
- stream -- string or file object to read from.
-
- TODO: currently discards hashes. Include hashes when they
- represent more than the DAG does.
-
+ data -- a nested dict/list data structure read from YAML or JSON.
"""
- try:
- yfile = syaml.load(stream)
- except MarkedYAMLError as e:
- raise SpackYAMLError("error parsing YAML spec:", str(e))
-
- nodes = yfile['spec']
+ nodes = data['spec']
# Read nodes out of list. Root spec is the first element;
# dependencies are the following elements.
@@ -1285,6 +1282,32 @@ class Spec(object):
return spec
+ @staticmethod
+ def from_yaml(stream):
+ """Construct a spec from YAML.
+
+ Parameters:
+ stream -- string or file object to read from.
+ """
+ try:
+ data = syaml.load(stream)
+ return Spec.from_dict(data)
+ except MarkedYAMLError as e:
+ raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
+
+ @staticmethod
+ def from_json(stream):
+ """Construct a spec from JSON.
+
+ Parameters:
+ stream -- string or file object to read from.
+ """
+ try:
+ data = sjson.load(stream)
+ return Spec.from_dict(data)
+ except Exception as e:
+ raise sjson.SpackJSONError("error parsing JSON spec:", str(e))
+
def _concretize_helper(self, presets=None, visited=None):
"""Recursive helper function for concretize().
This concretizes everything bottom-up. As things are
@@ -3064,11 +3087,6 @@ class UnsatisfiableDependencySpecError(UnsatisfiableSpecError):
provided, required, "dependency")
-class SpackYAMLError(spack.error.SpackError):
- def __init__(self, msg, yaml_error):
- super(SpackYAMLError, self).__init__(msg, str(yaml_error))
-
-
class AmbiguousHashError(SpecError):
def __init__(self, msg, *specs):
super(AmbiguousHashError, self).__init__(msg)
diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py
index 1a1281e10e..55a1d84e20 100644
--- a/lib/spack/spack/test/database.py
+++ b/lib/spack/spack/test/database.py
@@ -75,7 +75,7 @@ class DatabaseTest(MockDatabase):
def test_005_db_exists(self):
"""Make sure db cache file exists after creating."""
- index_file = join_path(self.install_path, '.spack-db', 'index.yaml')
+ index_file = join_path(self.install_path, '.spack-db', 'index.json')
lock_file = join_path(self.install_path, '.spack-db', 'lock')
self.assertTrue(os.path.exists(index_file))
diff --git a/lib/spack/spack/test/spec_dag.py b/lib/spack/spack/test/spec_dag.py
index 0bc63bcf0f..dd536f945c 100644
--- a/lib/spack/spack/test/spec_dag.py
+++ b/lib/spack/spack/test/spec_dag.py
@@ -496,34 +496,6 @@ class SpecDagTest(MockPackagesTest):
traversal = dag.traverse(deptype='run')
self.assertEqual([x.name for x in traversal], names)
- def test_using_ordered_dict(self):
- """ Checks that dicts are ordered
-
- Necessary to make sure that dag_hash is stable across python
- versions and processes.
- """
- def descend_and_check(iterable, level=0):
- from spack.util.spack_yaml import syaml_dict
- from collections import Iterable, Mapping
- if isinstance(iterable, Mapping):
- self.assertTrue(isinstance(iterable, syaml_dict))
- return descend_and_check(iterable.values(), level=level + 1)
- max_level = level
- for value in iterable:
- if isinstance(value, Iterable) and not isinstance(value, str):
- nlevel = descend_and_check(value, level=level + 1)
- if nlevel > max_level:
- max_level = nlevel
- return max_level
-
- specs = ['mpileaks ^zmpi', 'dttop', 'dtuse']
- for spec in specs:
- dag = Spec(spec)
- dag.normalize()
- level = descend_and_check(dag.to_node_dict())
- # level just makes sure we are doing something here
- self.assertTrue(level >= 5)
-
def test_hash_bits(self):
"""Ensure getting first n bits of a base32-encoded DAG hash works."""
diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py
index 964aea9422..442c6e6e81 100644
--- a/lib/spack/spack/test/spec_yaml.py
+++ b/lib/spack/spack/test/spec_yaml.py
@@ -27,6 +27,10 @@
YAML format preserves DAG informatoin in the spec.
"""
+import spack.util.spack_yaml as syaml
+import spack.util.spack_json as sjson
+from spack.util.spack_yaml import syaml_dict
+
from spack.spec import Spec
from spack.test.mock_packages_test import *
@@ -64,3 +68,123 @@ class SpecYamlTest(MockPackagesTest):
for dep in ('callpath', 'mpich', 'dyninst', 'libdwarf', 'libelf'):
self.assertTrue(spec[dep].eq_dag(yaml_spec[dep]))
+
+ def test_using_ordered_dict(self):
+ """ Checks that dicts are ordered
+
+ Necessary to make sure that dag_hash is stable across python
+ versions and processes.
+ """
+ def descend_and_check(iterable, level=0):
+ from spack.util.spack_yaml import syaml_dict
+ from collections import Iterable, Mapping
+ if isinstance(iterable, Mapping):
+ self.assertTrue(isinstance(iterable, syaml_dict))
+ return descend_and_check(iterable.values(), level=level + 1)
+ max_level = level
+ for value in iterable:
+ if isinstance(value, Iterable) and not isinstance(value, str):
+ nlevel = descend_and_check(value, level=level + 1)
+ if nlevel > max_level:
+ max_level = nlevel
+ return max_level
+
+ specs = ['mpileaks ^zmpi', 'dttop', 'dtuse']
+ for spec in specs:
+ dag = Spec(spec)
+ dag.normalize()
+ level = descend_and_check(dag.to_node_dict())
+ # level just makes sure we are doing something here
+ self.assertTrue(level >= 5)
+
+ def test_ordered_read_not_required_for_consistent_dag_hash(self):
+ """Make sure ordered serialization isn't required to preserve hashes.
+
+ For consistent hashes, we require that YAML and json documents
+ have their keys serialized in a deterministic order. However, we
+ don't want to require them to be serialized in order. This
+ ensures that is not reauired.
+
+ """
+ specs = ['mpileaks ^zmpi', 'dttop', 'dtuse']
+ for spec in specs:
+ spec = Spec(spec)
+ spec.concretize()
+
+ #
+ # Dict & corresponding YAML & JSON from the original spec.
+ #
+ spec_dict = spec.to_dict()
+ spec_yaml = spec.to_yaml()
+ spec_json = spec.to_json()
+
+ #
+ # Make a spec with reversed OrderedDicts for every
+ # OrderedDict in the original.
+ #
+ reversed_spec_dict = reverse_all_dicts(spec.to_dict())
+
+ #
+ # Dump to YAML and JSON
+ #
+ yaml_string = syaml.dump(spec_dict, default_flow_style=False)
+ reversed_yaml_string = syaml.dump(reversed_spec_dict,
+ default_flow_style=False)
+ json_string = sjson.dump(spec_dict)
+ reversed_json_string = sjson.dump(reversed_spec_dict)
+
+ #
+ # Do many consistency checks
+ #
+
+ # spec yaml is ordered like the spec dict
+ self.assertEqual(yaml_string, spec_yaml)
+ self.assertEqual(json_string, spec_json)
+
+ # reversed string is different from the original, so it
+ # *would* generate a different hash
+ self.assertNotEqual(yaml_string, reversed_yaml_string)
+ self.assertNotEqual(json_string, reversed_json_string)
+
+ # build specs from the "wrongly" ordered data
+ round_trip_yaml_spec = Spec.from_yaml(yaml_string)
+ round_trip_json_spec = Spec.from_json(json_string)
+ round_trip_reversed_yaml_spec = Spec.from_yaml(
+ reversed_yaml_string)
+ round_trip_reversed_json_spec = Spec.from_yaml(
+ reversed_json_string)
+
+ # TODO: remove this when build deps are in provenance.
+ spec = spec.copy(deps=('link', 'run'))
+
+ # specs are equal to the original
+ self.assertEqual(spec, round_trip_yaml_spec)
+ self.assertEqual(spec, round_trip_json_spec)
+ self.assertEqual(spec, round_trip_reversed_yaml_spec)
+ self.assertEqual(spec, round_trip_reversed_json_spec)
+ self.assertEqual(round_trip_yaml_spec,
+ round_trip_reversed_yaml_spec)
+ self.assertEqual(round_trip_json_spec,
+ round_trip_reversed_json_spec)
+
+ # dag_hashes are equal
+ self.assertEqual(
+ spec.dag_hash(), round_trip_yaml_spec.dag_hash())
+ self.assertEqual(
+ spec.dag_hash(), round_trip_json_spec.dag_hash())
+ self.assertEqual(
+ spec.dag_hash(), round_trip_reversed_yaml_spec.dag_hash())
+ self.assertEqual(
+ spec.dag_hash(), round_trip_reversed_json_spec.dag_hash())
+
+
+def reverse_all_dicts(data):
+ """Descend into data and reverse all the dictionaries"""
+ if isinstance(data, dict):
+ return syaml_dict(reversed(
+ [(reverse_all_dicts(k), reverse_all_dicts(v))
+ for k, v in data.items()]))
+ elif isinstance(data, (list, tuple)):
+ return type(data)(reverse_all_dicts(elt) for elt in data)
+ else:
+ return data
diff --git a/lib/spack/spack/util/spack_json.py b/lib/spack/spack/util/spack_json.py
new file mode 100644
index 0000000000..240ce86c68
--- /dev/null
+++ b/lib/spack/spack/util/spack_json.py
@@ -0,0 +1,56 @@
+##############################################################################
+# Copyright (c) 2013-2016, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory.
+#
+# This file is part of Spack.
+# Created by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
+# LLNL-CODE-647188
+#
+# For details, see https://github.com/llnl/spack
+# Please also see the LICENSE file for our notice and the LGPL.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License (as
+# published by the Free Software Foundation) version 2.1, February 1999.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
+# conditions of the GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+##############################################################################
+"""Simple wrapper around JSON to guarantee consistent use of load/dump. """
+import json
+import spack.error
+
+__all__ = ['load', 'dump', 'SpackJSONError']
+
+_json_dump_args = {
+ 'indent': True,
+ 'separators': (',', ': ')
+}
+
+
+def load(stream):
+ """Spack JSON needs to be ordered to support specs."""
+ if isinstance(stream, basestring):
+ return json.loads(stream)
+ else:
+ return json.load(stream)
+
+
+def dump(data, stream=None):
+ """Dump JSON with a reasonable amount of indentation and separation."""
+ if stream is None:
+ return json.dumps(data, **_json_dump_args)
+ else:
+ return json.dump(data, stream, **_json_dump_args)
+
+
+class SpackJSONError(spack.error.SpackError):
+ """Raised when there are issues with JSON parsing."""
+ def __init__(self, msg, yaml_error):
+ super(SpackJSONError, self).__init__(msg, str(yaml_error))
diff --git a/lib/spack/spack/util/spack_yaml.py b/lib/spack/spack/util/spack_yaml.py
index c27db52066..9d4c607908 100644
--- a/lib/spack/spack/util/spack_yaml.py
+++ b/lib/spack/spack/util/spack_yaml.py
@@ -32,23 +32,21 @@
"""
import yaml
-try:
- from yaml import CLoader as Loader, CDumper as Dumper
-except ImportError as e:
- from yaml import Loader, Dumper
+from yaml import Loader, Dumper
from yaml.nodes import *
from yaml.constructor import ConstructorError
from ordereddict_backport import OrderedDict
+import spack.error
+
# Only export load and dump
-__all__ = ['load', 'dump']
+__all__ = ['load', 'dump', 'SpackYAMLError']
# Make new classes so we can add custom attributes.
# Also, use OrderedDict instead of just dict.
class syaml_dict(OrderedDict):
-
def __repr__(self):
mappings = ('%r: %r' % (k, v) for k, v in self.items())
return '{%s}' % ', '.join(mappings)
@@ -153,6 +151,7 @@ class OrderedLineLoader(Loader):
mark(mapping, node)
return mapping
+
# register above new constructors
OrderedLineLoader.add_constructor(
u'tag:yaml.org,2002:map', OrderedLineLoader.construct_yaml_map)
@@ -223,3 +222,9 @@ def load(*args, **kwargs):
def dump(*args, **kwargs):
kwargs['Dumper'] = OrderedLineDumper
return yaml.dump(*args, **kwargs)
+
+
+class SpackYAMLError(spack.error.SpackError):
+ """Raised when there are issues with YAML parsing."""
+ def __init__(self, msg, yaml_error):
+ super(SpackYAMLError, self).__init__(msg, str(yaml_error))