From 6e0b7959a46ae2ff92af78524a50943179a12f0e Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Tue, 2 Aug 2022 20:43:14 +0200 Subject: Optimize reuse from buildcaches (#30806) * database: don't sort on return from query_local * ASP-based solver: don't build the hash-lookup dictionary twice Building this dictionary twice and traversing all the specs might be time-consuming for large buildcaches. --- lib/spack/spack/database.py | 9 +++++++-- lib/spack/spack/solver/asp.py | 15 ++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index c8943d70ad..df41459dff 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -1525,9 +1525,14 @@ class Database(object): _query.__doc__ += _query_docstring def query_local(self, *args, **kwargs): - """Query only the local Spack database.""" + """Query only the local Spack database. + + This function doesn't guarantee any sorting of the returned + data for performance reason, since comparing specs for __lt__ + may be an expensive operation. + """ with self.read_transaction(): - return sorted(self._query(*args, **kwargs)) + return self._query(*args, **kwargs) if query_local.__doc__ is None: query_local.__doc__ = "" diff --git a/lib/spack/spack/solver/asp.py b/lib/spack/spack/solver/asp.py index f8cba20adf..a50b10133f 100644 --- a/lib/spack/spack/solver/asp.py +++ b/lib/spack/spack/solver/asp.py @@ -670,7 +670,7 @@ class PyclingoDriver(object): if result.satisfiable: # build spec from the best model - builder = SpecBuilder(specs, reuse=reuse) + builder = SpecBuilder(specs, hash_lookup=setup.reusable_and_possible) min_cost, best_model = min(models) tuples = [(sym.name, [stringify(a) for a in sym.arguments]) for sym in best_model] answers = builder.build_specs(tuples) @@ -718,6 +718,7 @@ class SpackSolverSetup(object): # hashes we've already added facts for self.seen_hashes = set() + self.reusable_and_possible = {} # id for dummy variables self._condition_id_counter = itertools.count() @@ -1730,6 +1731,7 @@ class SpackSolverSetup(object): # be dependencies (don't tell it about the others) h = spec.dag_hash() if spec.name in possible and h not in self.seen_hashes: + self.reusable_and_possible[h] = spec try: # Only consider installed packages for repo we know spack.repo.path.get(spec) @@ -1887,7 +1889,7 @@ class SpecBuilder(object): #: Attributes that don't need actions ignored_attributes = ["opt_criterion"] - def __init__(self, specs, reuse=None): + def __init__(self, specs, hash_lookup=None): self._specs = {} self._result = None self._command_line_specs = specs @@ -1896,11 +1898,7 @@ class SpecBuilder(object): # Pass in as arguments reusable specs and plug them in # from this dictionary during reconstruction - self._hash_lookup = {} - if reuse is not None: - for spec in reuse: - for node in spec.traverse(): - self._hash_lookup.setdefault(node.dag_hash(), node) + self._hash_lookup = hash_lookup or {} def hash(self, pkg, h): if pkg not in self._specs: @@ -2205,8 +2203,7 @@ class Solver(object): # Specs from buildcaches try: index = spack.binary_distribution.update_cache_and_get_specs() - reusable_specs.extend([s for s in index if not s.satisfies("dev_path=*")]) - + reusable_specs.extend(index) except (spack.binary_distribution.FetchCacheError, IndexError): # this is raised when no mirrors had indices. # TODO: update mirror configuration so it can indicate that the -- cgit v1.2.3-70-g09d2