summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2023-08-17 08:08:50 +0200
committerGitHub <noreply@github.com>2023-08-17 08:08:50 +0200
commit190a1bf5238279a53b85fdad9e1b3f2a44689ae9 (patch)
tree171faa4f45e3783a3c10ed7ffc958abc52dc1a7e
parente381e166ec27bb56c2d3bc856e7020224a005d1d (diff)
downloadspack-190a1bf5238279a53b85fdad9e1b3f2a44689ae9.tar.gz
spack-190a1bf5238279a53b85fdad9e1b3f2a44689ae9.tar.bz2
spack-190a1bf5238279a53b85fdad9e1b3f2a44689ae9.tar.xz
spack-190a1bf5238279a53b85fdad9e1b3f2a44689ae9.zip
Delay abstract hashes lookup (#39251)
Delay lookup for abstract hashes until concretization time, instead of until Spec comparison. This has a few advantages: 1. `satisfies` / `intersects` etc don't always know where to resolve the abstract hash (in some cases it's wrong to look in the current env, db, buildcache, ...). Better to let the call site dictate it. 2. Allows search by abstract hash without triggering a database lookup, causing quadratic complexity issues (accidental nested loop during search) 3. Simplifies queries against the buildcache, they can now use Spec instances instead of strings. The rules are straightforward: 1. a satisfies b when b's hash is prefix of a's hash 2. a intersects b when either a's or b's hash is a prefix of b's or a's hash respectively
-rw-r--r--lib/spack/spack/binary_distribution.py16
-rw-r--r--lib/spack/spack/environment/environment.py6
-rw-r--r--lib/spack/spack/solver/asp.py2
-rw-r--r--lib/spack/spack/spec.py76
-rw-r--r--lib/spack/spack/spec_list.py4
-rw-r--r--lib/spack/spack/test/spec_semantics.py35
-rw-r--r--lib/spack/spack/test/spec_syntax.py21
7 files changed, 92 insertions, 68 deletions
diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py
index 339fafb2ea..7ba8b5ff07 100644
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@@ -2383,22 +2383,12 @@ class BinaryCacheQuery:
self.possible_specs = specs
- def __call__(self, spec, **kwargs):
+ def __call__(self, spec: Spec, **kwargs):
"""
Args:
- spec (str): The spec being searched for in its string representation or hash.
+ spec: The spec being searched for
"""
- matches = []
- if spec.startswith("/"):
- # Matching a DAG hash
- query_hash = spec.replace("/", "")
- for candidate_spec in self.possible_specs:
- if candidate_spec.dag_hash().startswith(query_hash):
- matches.append(candidate_spec)
- else:
- # Matching a spec constraint
- matches = [s for s in self.possible_specs if s.satisfies(spec)]
- return matches
+ return [s for s in self.possible_specs if s.satisfies(spec)]
class FetchIndexError(Exception):
diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py
index 176273c2a4..bd2a633980 100644
--- a/lib/spack/spack/environment/environment.py
+++ b/lib/spack/spack/environment/environment.py
@@ -1994,14 +1994,10 @@ class Environment:
def all_matching_specs(self, *specs: spack.spec.Spec) -> List[Spec]:
"""Returns all concretized specs in the environment satisfying any of the input specs"""
- # Look up abstract hashes ahead of time, to avoid O(n^2) traversal.
- specs = [s.lookup_hash() for s in specs]
-
- # Avoid double lookup by directly calling _satisfies.
return [
s
for s in traverse.traverse_nodes(self.concrete_roots(), key=traverse.by_dag_hash)
- if any(s._satisfies(t) for t in specs)
+ if any(s.satisfies(t) for t in specs)
]
@spack.repo.autospec
diff --git a/lib/spack/spack/solver/asp.py b/lib/spack/spack/solver/asp.py
index 5f0e7d767d..42f4814006 100644
--- a/lib/spack/spack/solver/asp.py
+++ b/lib/spack/spack/solver/asp.py
@@ -2953,6 +2953,7 @@ class Solver:
setup_only (bool): if True, stop after setup and don't solve (default False).
"""
# Check upfront that the variants are admissible
+ specs = [s.lookup_hash() for s in specs]
reusable_specs = self._check_input_and_extract_concrete_specs(specs)
reusable_specs.extend(self._reusable_specs(specs))
setup = SpackSolverSetup(tests=tests)
@@ -2976,6 +2977,7 @@ class Solver:
stats (bool): print internal statistics if set to True
tests (bool): add test dependencies to the solve
"""
+ specs = [s.lookup_hash() for s in specs]
reusable_specs = self._check_input_and_extract_concrete_specs(specs)
reusable_specs.extend(self._reusable_specs(specs))
setup = SpackSolverSetup(tests=tests)
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index 3d8887a773..60841ed8e2 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -1925,19 +1925,15 @@ class Spec:
store, or finally, binary caches."""
import spack.environment
- matches = []
active_env = spack.environment.active_environment()
- if active_env:
- env_matches = active_env.get_by_hash(self.abstract_hash) or []
- matches = [m for m in env_matches if m._satisfies(self)]
- if not matches:
- db_matches = spack.store.STORE.db.get_by_hash(self.abstract_hash) or []
- matches = [m for m in db_matches if m._satisfies(self)]
- if not matches:
- query = spack.binary_distribution.BinaryCacheQuery(True)
- remote_matches = query("/" + self.abstract_hash) or []
- matches = [m for m in remote_matches if m._satisfies(self)]
+ # First env, then store, then binary cache
+ matches = (
+ (active_env.all_matching_specs(self) if active_env else [])
+ or spack.store.STORE.db.query(self, installed=any)
+ or spack.binary_distribution.BinaryCacheQuery(True)(self)
+ )
+
if not matches:
raise InvalidHashError(self, self.abstract_hash)
@@ -1958,19 +1954,17 @@ class Spec:
spec = self.copy(deps=False)
# root spec is replaced
if spec.abstract_hash:
- new = self._lookup_hash()
- spec._dup(new)
+ spec._dup(self._lookup_hash())
return spec
# Get dependencies that need to be replaced
for node in self.traverse(root=False):
if node.abstract_hash:
- new = node._lookup_hash()
- spec._add_dependency(new, deptypes=(), virtuals=())
+ spec._add_dependency(node._lookup_hash(), deptypes=(), virtuals=())
# reattach nodes that were not otherwise satisfied by new dependencies
for node in self.traverse(root=False):
- if not any(n._satisfies(node) for n in spec.traverse()):
+ if not any(n.satisfies(node) for n in spec.traverse()):
spec._add_dependency(node.copy(), deptypes=(), virtuals=())
return spec
@@ -1983,9 +1977,7 @@ class Spec:
if not any(node for node in self.traverse(order="post") if node.abstract_hash):
return
- spec_by_hash = self.lookup_hash()
-
- self._dup(spec_by_hash)
+ self._dup(self.lookup_hash())
def to_node_dict(self, hash=ht.dag_hash):
"""Create a dictionary representing the state of this Spec.
@@ -3721,15 +3713,19 @@ class Spec:
"""
other = self._autospec(other)
- lhs = self.lookup_hash() or self
- rhs = other.lookup_hash() or other
-
- return lhs._intersects(rhs, deps)
-
- def _intersects(self, other: "Spec", deps: bool = True) -> bool:
if other.concrete and self.concrete:
return self.dag_hash() == other.dag_hash()
+ self_hash = self.dag_hash() if self.concrete else self.abstract_hash
+ other_hash = other.dag_hash() if other.concrete else other.abstract_hash
+
+ if (
+ self_hash
+ and other_hash
+ and not (self_hash.startswith(other_hash) or other_hash.startswith(self_hash))
+ ):
+ return False
+
# If the names are different, we need to consider virtuals
if self.name != other.name and self.name and other.name:
if self.virtual and other.virtual:
@@ -3789,19 +3785,8 @@ class Spec:
# If we need to descend into dependencies, do it, otherwise we're done.
if deps:
return self._intersects_dependencies(other)
- else:
- return True
-
- def satisfies(self, other, deps=True):
- """
- This checks constraints on common dependencies against each other.
- """
- other = self._autospec(other)
- lhs = self.lookup_hash() or self
- rhs = other.lookup_hash() or other
-
- return lhs._satisfies(rhs, deps=deps)
+ return True
def _intersects_dependencies(self, other):
if not other._dependencies or not self._dependencies:
@@ -3838,7 +3823,7 @@ class Spec:
return True
- def _satisfies(self, other: "Spec", deps: bool = True) -> bool:
+ def satisfies(self, other: "Spec", deps: bool = True) -> bool:
"""Return True if all concrete specs matching self also match other, otherwise False.
Args:
@@ -3853,6 +3838,13 @@ class Spec:
# objects.
return self.concrete and self.dag_hash() == other.dag_hash()
+ # If the right-hand side has an abstract hash, make sure it's a prefix of the
+ # left-hand side's (abstract) hash.
+ if other.abstract_hash:
+ compare_hash = self.dag_hash() if self.concrete else self.abstract_hash
+ if not compare_hash or not compare_hash.startswith(other.abstract_hash):
+ return False
+
# If the names are different, we need to consider virtuals
if self.name != other.name and self.name and other.name:
# A concrete provider can satisfy a virtual dependency.
@@ -4229,9 +4221,7 @@ class Spec:
def _cmp_iter(self):
"""Lazily yield components of self for comparison."""
- cmp_spec = self.lookup_hash() or self
-
- for item in cmp_spec._cmp_node():
+ for item in self._cmp_node():
yield item
# This needs to be in _cmp_iter so that no specs with different process hashes
@@ -4242,10 +4232,10 @@ class Spec:
# TODO: they exist for speed. We should benchmark whether it's really worth
# TODO: having two types of hashing now that we use `json` instead of `yaml` for
# TODO: spec hashing.
- yield cmp_spec.process_hash() if cmp_spec.concrete else None
+ yield self.process_hash() if self.concrete else None
def deps():
- for dep in sorted(itertools.chain.from_iterable(cmp_spec._dependencies.values())):
+ for dep in sorted(itertools.chain.from_iterable(self._dependencies.values())):
yield dep.spec.name
yield tuple(sorted(dep.deptypes))
yield hash(dep.spec)
diff --git a/lib/spack/spack/spec_list.py b/lib/spack/spack/spec_list.py
index 23e3a7f056..6cad15e14e 100644
--- a/lib/spack/spack/spec_list.py
+++ b/lib/spack/spack/spec_list.py
@@ -197,7 +197,9 @@ def _expand_matrix_constraints(matrix_config):
for combo in itertools.product(*expanded_rows):
# Construct a combined spec to test against excludes
flat_combo = [constraint for constraint_list in combo for constraint in constraint_list]
- flat_combo = [Spec(x) for x in flat_combo]
+
+ # Resolve abstract hashes so we can exclude by their concrete properties
+ flat_combo = [Spec(x).lookup_hash() for x in flat_combo]
test_spec = flat_combo[0].copy()
for constraint in flat_combo[1:]:
diff --git a/lib/spack/spack/test/spec_semantics.py b/lib/spack/spack/test/spec_semantics.py
index efe788bce3..32c2d70009 100644
--- a/lib/spack/spack/test/spec_semantics.py
+++ b/lib/spack/spack/test/spec_semantics.py
@@ -1291,3 +1291,38 @@ def test_constrain(factory, lhs_str, rhs_str, result, constrained_str):
rhs = factory(rhs_str)
rhs.constrain(lhs)
assert rhs == factory(constrained_str)
+
+
+def test_abstract_hash_intersects_and_satisfies(default_mock_concretization):
+ concrete: Spec = default_mock_concretization("a")
+ hash = concrete.dag_hash()
+ hash_5 = hash[:5]
+ hash_6 = hash[:6]
+ # abstract hash that doesn't have a common prefix with the others.
+ hash_other = f"{'a' if hash_5[0] == 'b' else 'b'}{hash_5[1:]}"
+
+ abstract_5 = Spec(f"a/{hash_5}")
+ abstract_6 = Spec(f"a/{hash_6}")
+ abstract_none = Spec(f"a/{hash_other}")
+ abstract = Spec("a")
+
+ def assert_subset(a: Spec, b: Spec):
+ assert a.intersects(b) and b.intersects(a) and a.satisfies(b) and not b.satisfies(a)
+
+ def assert_disjoint(a: Spec, b: Spec):
+ assert (
+ not a.intersects(b)
+ and not b.intersects(a)
+ and not a.satisfies(b)
+ and not b.satisfies(a)
+ )
+
+ # left-hand side is more constrained, so its
+ # concretization space is a subset of the right-hand side's
+ assert_subset(concrete, abstract_5)
+ assert_subset(abstract_6, abstract_5)
+ assert_subset(abstract_5, abstract)
+
+ # disjoint concretization space
+ assert_disjoint(abstract_none, concrete)
+ assert_disjoint(abstract_none, abstract_5)
diff --git a/lib/spack/spack/test/spec_syntax.py b/lib/spack/spack/test/spec_syntax.py
index 4a55752fdd..b79b829f96 100644
--- a/lib/spack/spack/test/spec_syntax.py
+++ b/lib/spack/spack/test/spec_syntax.py
@@ -726,22 +726,31 @@ def test_multiple_specs_with_hash(database, config):
@pytest.mark.db
def test_ambiguous_hash(mutable_database, default_mock_concretization, config):
+ """Test that abstract hash ambiguity is delayed until concretization.
+ In the past this ambiguity error would happen during parse time."""
+
+ # This is a very sketchy as manually setting hashes easily breaks invariants
x1 = default_mock_concretization("a")
x2 = x1.copy()
x1._hash = "xyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
+ x1._process_hash = "xyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
x2._hash = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
- mutable_database.add(x1, spack.store.STORE.layout)
- mutable_database.add(x2, spack.store.STORE.layout)
+ x2._process_hash = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+ assert x1 != x2 # doesn't hold when only the dag hash is modified.
+
+ mutable_database.add(x1, directory_layout=None)
+ mutable_database.add(x2, directory_layout=None)
# ambiguity in first hash character
+ s1 = SpecParser("/x").next_spec()
with pytest.raises(spack.spec.AmbiguousHashError):
- parsed_spec = SpecParser("/x").next_spec()
- parsed_spec.replace_hash()
+ s1.lookup_hash()
# ambiguity in first hash character AND spec name
+ s2 = SpecParser("a/x").next_spec()
with pytest.raises(spack.spec.AmbiguousHashError):
- parsed_spec = SpecParser("a/x").next_spec()
- parsed_spec.replace_hash()
+ s2.lookup_hash()
@pytest.mark.db