From 6b559912c1deb47820cf8bce10028c770adaf5b8 Mon Sep 17 00:00:00 2001 From: Andrew W Elble Date: Fri, 20 Mar 2020 20:05:51 -0400 Subject: performance: add a verification file to the database (#14693) Reading the database repeatedly can be quite slow. We need a way to speed up Spack when it reads the DB multiple times, but the DB has not been modified between reads (which is nearly all the time). - [x] Add a file containing a unique uuid that is regenerated at database write time. Use this uuid to suppress re-parsing the database contents if we know a previous uuid and the uuid has not changed. - [x] Fix mutable_database fixture so that it resets the last seen verifier when it resets. - [x] Enable not rereading the database immediately after a write. Make the tests reset the last seen verifier in between tests that use the database fixture. - [x] make presence of uuid module optional --- lib/spack/spack/database.py | 29 ++++++++++++++++++++++++++--- lib/spack/spack/test/conftest.py | 2 ++ lib/spack/spack/test/database.py | 24 ++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index 4b889da425..ca9ed67fb5 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -26,6 +26,12 @@ import os import socket import sys import time +try: + import uuid + _use_uuid = True +except ImportError: + _use_uuid = False + pass import llnl.util.tty as tty import six @@ -294,6 +300,7 @@ class Database(object): # Set up layout of database files within the db dir self._index_path = os.path.join(self._db_dir, 'index.json') + self._verifier_path = os.path.join(self._db_dir, 'index_verifier') self._lock_path = os.path.join(self._db_dir, 'lock') # This is for other classes to use to lock prefix directories. @@ -315,6 +322,7 @@ class Database(object): mkdirp(self._failure_dir) self.is_upstream = is_upstream + self.last_seen_verifier = '' # initialize rest of state. self.db_lock_timeout = ( @@ -913,6 +921,11 @@ class Database(object): with open(temp_file, 'w') as f: self._write_to_file(f) os.rename(temp_file, self._index_path) + if _use_uuid: + with open(self._verifier_path, 'w') as f: + new_verifier = str(uuid.uuid4()) + f.write(new_verifier) + self.last_seen_verifier = new_verifier except BaseException as e: tty.debug(e) # Clean up temp file if something goes wrong. @@ -928,8 +941,18 @@ class Database(object): write lock. """ if os.path.isfile(self._index_path): - # Read from file if a database exists - self._read_from_file(self._index_path) + current_verifier = '' + if _use_uuid: + try: + with open(self._verifier_path, 'r') as f: + current_verifier = f.read() + except BaseException: + pass + if ((current_verifier != self.last_seen_verifier) or + (current_verifier == '')): + self.last_seen_verifier = current_verifier + # Read from file if a database exists + self._read_from_file(self._index_path) return elif self.is_upstream: raise UpstreamDatabaseLockingError( @@ -1339,7 +1362,7 @@ class Database(object): # TODO: handling of hashes restriction is not particularly elegant. hash_key = query_spec.dag_hash() if (hash_key in self._data and - (not hashes or hash_key in hashes)): + (not hashes or hash_key in hashes)): return [self._data[hash_key].spec] else: return [] diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index 04e870d336..6703742142 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -525,6 +525,8 @@ def database(mock_store, mock_packages, config): """This activates the mock store, packages, AND config.""" with use_store(mock_store): yield mock_store.db + # Force reading the database again between tests + mock_store.db.last_seen_verifier = '' @pytest.fixture(scope='function') diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index 88e6c42693..28311c7501 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -13,6 +13,12 @@ import multiprocessing import os import pytest import json +try: + import uuid + _use_uuid = True +except ImportError: + _use_uuid = False + pass import llnl.util.lock as lk from llnl.util.tty.colify import colify @@ -469,6 +475,21 @@ def test_015_write_and_read(mutable_database): assert new_rec.installed == rec.installed +def test_017_write_and_read_without_uuid(mutable_database, monkeypatch): + monkeypatch.setattr(spack.database, '_use_uuid', False) + # write and read DB + with spack.store.db.write_transaction(): + specs = spack.store.db.query() + recs = [spack.store.db.get_record(s) for s in specs] + + for spec, rec in zip(specs, recs): + new_rec = spack.store.db.get_record(spec) + assert new_rec.ref_count == rec.ref_count + assert new_rec.spec == rec.spec + assert new_rec.path == rec.path + assert new_rec.installed == rec.installed + + def test_020_db_sanity(database): """Make sure query() returns what's actually in the db.""" _check_db_sanity(database) @@ -703,6 +724,9 @@ def test_old_external_entries_prefix(mutable_database): with open(spack.store.db._index_path, 'w') as f: f.write(json.dumps(db_obj)) + if _use_uuid: + with open(spack.store.db._verifier_path, 'w') as f: + f.write(str(uuid.uuid4())) record = spack.store.db.get_record(s) -- cgit v1.2.3-70-g09d2