summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd Gamblin <tgamblin@llnl.gov>2023-11-06 17:00:37 -0800
committerGitHub <noreply@github.com>2023-11-07 01:00:37 +0000
commit910190f55bb5467305dd75a4dac8c60f1f51e283 (patch)
tree38d35cd1b434ee78b6df796cce3f16c3adab0ac1
parent4ce80b95f3cbb8b6e8ce6bb4546dee76b1f398dc (diff)
downloadspack-910190f55bb5467305dd75a4dac8c60f1f51e283.tar.gz
spack-910190f55bb5467305dd75a4dac8c60f1f51e283.tar.bz2
spack-910190f55bb5467305dd75a4dac8c60f1f51e283.tar.xz
spack-910190f55bb5467305dd75a4dac8c60f1f51e283.zip
database: optimize query() by skipping unnecessary virtual checks (#40898)
Most queries will end up calling `spec.satisfies(query)` on everything in the DB, which will cause Spack to ask whether the query spec is virtual if its name doesn't match the target spec's. This can be expensive, because it can cause Spack to check if any new virtuals showed up in *all* the packages it knows about. That can currently trigger thousands of `stat()` calls. We can avoid the virtual check for most successful queries if we consider that if there *is* a match by name, the query spec *can't* be virtual. This PR adds an optimization to the query loop to save any comparisons that would trigger a virtual check for last. - [x] Add a `deferred` list to the `query()` loop. - [x] First run through the `query()` loop *only* checks for name matches. - [x] Query loop now returns early if there's a name match, skipping most `satisfies()` calls. - [x] Second run through the `deferred()` list only runs if query spec is virtual. - [x] Fix up handling of concrete specs. - [x] Add test for querying virtuals in DB. - [x] Avoid allocating deferred if not necessary. --------- Co-authored-by: Harmen Stoppels <me@harmenstoppels.nl>
-rw-r--r--lib/spack/spack/database.py43
-rw-r--r--lib/spack/spack/test/database.py8
2 files changed, 42 insertions, 9 deletions
diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py
index f252fbc05d..ecda8c36b0 100644
--- a/lib/spack/spack/database.py
+++ b/lib/spack/spack/database.py
@@ -1522,14 +1522,18 @@ class Database:
# TODO: like installed and known that can be queried? Or are
# TODO: these really special cases that only belong here?
- # Just look up concrete specs with hashes; no fancy search.
- if isinstance(query_spec, spack.spec.Spec) and query_spec.concrete:
- # TODO: handling of hashes restriction is not particularly elegant.
- hash_key = query_spec.dag_hash()
- if hash_key in self._data and (not hashes or hash_key in hashes):
- return [self._data[hash_key].spec]
- else:
- return []
+ if query_spec is not any:
+ if not isinstance(query_spec, spack.spec.Spec):
+ query_spec = spack.spec.Spec(query_spec)
+
+ # Just look up concrete specs with hashes; no fancy search.
+ if query_spec.concrete:
+ # TODO: handling of hashes restriction is not particularly elegant.
+ hash_key = query_spec.dag_hash()
+ if hash_key in self._data and (not hashes or hash_key in hashes):
+ return [self._data[hash_key].spec]
+ else:
+ return []
# Abstract specs require more work -- currently we test
# against everything.
@@ -1537,6 +1541,9 @@ class Database:
start_date = start_date or datetime.datetime.min
end_date = end_date or datetime.datetime.max
+ # save specs whose name doesn't match for last, to avoid a virtual check
+ deferred = []
+
for key, rec in self._data.items():
if hashes is not None and rec.spec.dag_hash() not in hashes:
continue
@@ -1561,8 +1568,26 @@ class Database:
if not (start_date < inst_date < end_date):
continue
- if query_spec is any or rec.spec.satisfies(query_spec):
+ if query_spec is any:
results.append(rec.spec)
+ continue
+
+ # check anon specs and exact name matches first
+ if not query_spec.name or rec.spec.name == query_spec.name:
+ if rec.spec.satisfies(query_spec):
+ results.append(rec.spec)
+
+ # save potential virtual matches for later, but not if we already found a match
+ elif not results:
+ deferred.append(rec.spec)
+
+ # Checking for virtuals is expensive, so we save it for last and only if needed.
+ # If we get here, we didn't find anything in the DB that matched by name.
+ # If we did fine something, the query spec can't be virtual b/c we matched an actual
+ # package installation, so skip the virtual check entirely. If we *didn't* find anything,
+ # check all the deferred specs *if* the query is virtual.
+ if not results and query_spec is not any and deferred and query_spec.virtual:
+ results = [spec for spec in deferred if spec.satisfies(query_spec)]
return results
diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py
index 3033370ac6..ee3e5da81e 100644
--- a/lib/spack/spack/test/database.py
+++ b/lib/spack/spack/test/database.py
@@ -803,6 +803,14 @@ def test_query_spec_with_non_conditional_virtual_dependency(database):
assert len(results) == 1
+def test_query_virtual_spec(database):
+ """Make sure we can query for virtuals in the DB"""
+ results = spack.store.STORE.db.query_local("mpi")
+ assert len(results) == 3
+ names = [s.name for s in results]
+ assert all(name in names for name in ["mpich", "mpich2", "zmpi"])
+
+
def test_failed_spec_path_error(database):
"""Ensure spec not concrete check is covered."""
s = spack.spec.Spec("a")