summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorHarmen Stoppels <me@harmenstoppels.nl>2024-09-09 17:26:30 +0200
committerGitHub <noreply@github.com>2024-09-09 17:26:30 +0200
commit9059756a110106371c461390306c03cda942a533 (patch)
treee5a6a9a6956803ec1f307c5c88be31f0f83ca971 /lib
parent67089b967e34408c019ecbcb85ea437370939c2b (diff)
downloadspack-9059756a110106371c461390306c03cda942a533.tar.gz
spack-9059756a110106371c461390306c03cda942a533.tar.bz2
spack-9059756a110106371c461390306c03cda942a533.tar.xz
spack-9059756a110106371c461390306c03cda942a533.zip
reindex: do not assume fixed layout (#46170)
`spack reindex` relies on projections from configuration to locate installed specs and prefixes. This is problematic because config can change over time, and we have reasons to do so when turning compilers into depedencies (removing `{compiler.name}-{compiler.version}` from projections) This commit makes reindex recursively search for .spack/ metadirs.
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/docs/conf.py1
-rw-r--r--lib/spack/spack/database.py214
-rw-r--r--lib/spack/spack/directory_layout.py122
-rw-r--r--lib/spack/spack/test/cmd/reindex.py20
-rw-r--r--lib/spack/spack/test/database.py60
-rw-r--r--lib/spack/spack/traverse.py9
6 files changed, 274 insertions, 152 deletions
diff --git a/lib/spack/docs/conf.py b/lib/spack/docs/conf.py
index 9819d6f4d3..95b711db93 100644
--- a/lib/spack/docs/conf.py
+++ b/lib/spack/docs/conf.py
@@ -218,6 +218,7 @@ nitpick_ignore = [
("py:class", "spack.spec.SpecfileReaderBase"),
("py:class", "spack.install_test.Pb"),
("py:class", "spack.filesystem_view.SimpleFilesystemView"),
+ ("py:class", "spack.traverse.EdgeAndDepth"),
]
# The reST default role (used for this markup: `text`) to use for all documents.
diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py
index 2b0c4f4c37..404288ff83 100644
--- a/lib/spack/spack/database.py
+++ b/lib/spack/spack/database.py
@@ -207,7 +207,7 @@ class InstallRecord:
def __init__(
self,
spec: "spack.spec.Spec",
- path: str,
+ path: Optional[str],
installed: bool,
ref_count: int = 0,
explicit: bool = False,
@@ -845,7 +845,7 @@ class Database:
):
tty.warn(f"Spack database version changed from {version} to {_DB_VERSION}. Upgrading.")
- self.reindex(spack.store.STORE.layout)
+ self.reindex()
installs = dict(
(k, v.to_dict(include_fields=self._record_fields)) for k, v in self._data.items()
)
@@ -918,8 +918,6 @@ class Database:
if self.is_upstream:
raise UpstreamDatabaseLockingError("Cannot reindex an upstream database")
- error: Optional[CorruptDatabaseError] = None
-
# Special transaction to avoid recursive reindex calls and to
# ignore errors if we need to rebuild a corrupt database.
def _read_suppress_error():
@@ -927,99 +925,116 @@ class Database:
if os.path.isfile(self._index_path):
self._read_from_file(self._index_path)
except CorruptDatabaseError as e:
- nonlocal error
- error = e
+ tty.warn(f"Reindexing corrupt database, error was: {e}")
self._data = {}
self._installed_prefixes = set()
- transaction = lk.WriteTransaction(
- self.lock, acquire=_read_suppress_error, release=self._write
- )
-
- with transaction:
- if error is not None:
- tty.warn(f"Spack database was corrupt. Will rebuild. Error was: {error}")
-
- old_data = self._data
- old_installed_prefixes = self._installed_prefixes
+ with lk.WriteTransaction(self.lock, acquire=_read_suppress_error, release=self._write):
+ old_installed_prefixes, self._installed_prefixes = self._installed_prefixes, set()
+ old_data, self._data = self._data, {}
try:
- self._construct_from_directory_layout(old_data)
+ self._reindex(old_data)
except BaseException:
# If anything explodes, restore old data, skip write.
self._data = old_data
self._installed_prefixes = old_installed_prefixes
raise
- def _construct_entry_from_directory_layout(
- self,
- old_data: Dict[str, InstallRecord],
- spec: "spack.spec.Spec",
- deprecator: Optional["spack.spec.Spec"] = None,
- ):
- # Try to recover explicit value from old DB, but
- # default it to True if DB was corrupt. This is
- # just to be conservative in case a command like
- # "autoremove" is run by the user after a reindex.
- tty.debug(f"Reconstructing from spec file: {spec}")
- explicit = True
- inst_time = os.stat(spec.prefix).st_ctime
- if old_data is not None:
- old_info = old_data.get(spec.dag_hash())
- if old_info is not None:
- explicit = old_info.explicit
- inst_time = old_info.installation_time
-
- self._add(spec, explicit=explicit, installation_time=inst_time)
- if deprecator:
- self._deprecate(spec, deprecator)
-
- def _construct_from_directory_layout(self, old_data: Dict[str, InstallRecord]):
- # Read first the spec files in the prefixes. They should be considered authoritative with
- # respect to DB reindexing, as entries in the DB may be corrupted in a way that still makes
- # them readable. If we considered DB entries authoritative instead, we would perpetuate
- # errors over a reindex.
- assert self.layout is not None, "Cannot reindex a database without a known layout"
- with self.layout.disable_upstream_check():
- # Initialize data in the reconstructed DB
- self._data = {}
- self._installed_prefixes = set()
-
- # Start inspecting the installed prefixes
- processed_specs = set()
-
- for spec in self.layout.all_specs():
- self._construct_entry_from_directory_layout(old_data, spec)
- processed_specs.add(spec)
-
- for spec, deprecator in self.layout.all_deprecated_specs():
- self._construct_entry_from_directory_layout(old_data, spec, deprecator)
- processed_specs.add(spec)
-
- for entry in old_data.values():
- # We already took care of this spec using spec file from its prefix.
- if entry.spec in processed_specs:
- tty.debug(
- f"Skipping reconstruction from old db: {entry.spec}"
- " [already reconstructed from spec file]"
- )
- continue
+ def _reindex(self, old_data: Dict[str, InstallRecord]):
+ # Specs on the file system are the source of truth for record.spec. The old database values
+ # if available are the source of truth for the rest of the record.
+ assert self.layout, "Database layout must be set to reindex"
- # If we arrived here it very likely means that we have external specs that are not
- # dependencies of other specs. This may be the case for externally installed
- # compilers or externally installed applications.
- tty.debug(f"Reconstructing from old db: {entry.spec}")
- try:
- self._add(
- spec=entry.spec,
- explicit=entry.explicit,
- installation_time=entry.installation_time,
- )
- processed_specs.add(entry.spec)
- except Exception as e:
- # Something went wrong, so the spec was not restored from old data
- tty.debug(e)
+ specs_from_fs = self.layout.all_specs()
+ deprecated_for = self.layout.deprecated_for(specs_from_fs)
+
+ known_specs: List[spack.spec.Spec] = [
+ *specs_from_fs,
+ *(deprecated for _, deprecated in deprecated_for),
+ *(rec.spec for rec in old_data.values()),
+ ]
+
+ upstream_hashes = {
+ dag_hash for upstream in self.upstream_dbs for dag_hash in upstream._data
+ }
+ upstream_hashes.difference_update(spec.dag_hash() for spec in known_specs)
+
+ def create_node(edge: spack.spec.DependencySpec, is_upstream: bool):
+ if is_upstream:
+ return
+
+ self._data[edge.spec.dag_hash()] = InstallRecord(
+ spec=edge.spec.copy(deps=False),
+ path=edge.spec.external_path if edge.spec.external else None,
+ installed=edge.spec.external,
+ )
- self._check_ref_counts()
+ # Store all nodes of known specs, excluding ones found in upstreams
+ tr.traverse_breadth_first_with_visitor(
+ known_specs,
+ tr.CoverNodesVisitor(
+ NoUpstreamVisitor(upstream_hashes, create_node), key=tr.by_dag_hash
+ ),
+ )
+
+ # Store the prefix and other information for specs were found on the file system
+ for s in specs_from_fs:
+ record = self._data[s.dag_hash()]
+ record.path = s.prefix
+ record.installed = True
+ record.explicit = True # conservative assumption
+ record.installation_time = os.stat(s.prefix).st_ctime
+
+ # Deprecate specs
+ for new, old in deprecated_for:
+ self._data[old.dag_hash()].deprecated_for = new.dag_hash()
+
+ # Copy data we have from the old database
+ for old_record in old_data.values():
+ record = self._data[old_record.spec.dag_hash()]
+ record.explicit = old_record.explicit
+ record.installation_time = old_record.installation_time
+ record.origin = old_record.origin
+ record.deprecated_for = old_record.deprecated_for
+
+ # Warn when the spec has been removed from the file system (i.e. it was not detected)
+ if not record.installed and old_record.installed:
+ tty.warn(
+ f"Spec {old_record.spec.short_spec} was marked installed in the database "
+ "but was not found on the file system. It is now marked as missing."
+ )
+
+ def create_edge(edge: spack.spec.DependencySpec, is_upstream: bool):
+ if not edge.parent:
+ return
+ parent_record = self._data[edge.parent.dag_hash()]
+ if is_upstream:
+ upstream, child_record = self.query_by_spec_hash(edge.spec.dag_hash())
+ assert upstream and child_record, "Internal error: upstream spec not found"
+ else:
+ child_record = self._data[edge.spec.dag_hash()]
+ parent_record.spec._add_dependency(
+ child_record.spec, depflag=edge.depflag, virtuals=edge.virtuals
+ )
+
+ # Then store edges
+ tr.traverse_breadth_first_with_visitor(
+ known_specs,
+ tr.CoverEdgesVisitor(
+ NoUpstreamVisitor(upstream_hashes, create_edge), key=tr.by_dag_hash
+ ),
+ )
+
+ # Finally update the ref counts
+ for record in self._data.values():
+ for dep in record.spec.dependencies(deptype=_TRACKED_DEPENDENCIES):
+ dep_record = self._data.get(dep.dag_hash())
+ if dep_record: # dep might be upstream
+ dep_record.ref_count += 1
+ if record.deprecated_for:
+ self._data[record.deprecated_for].ref_count += 1
+
+ self._check_ref_counts()
def _check_ref_counts(self):
"""Ensure consistency of reference counts in the DB.
@@ -1199,7 +1214,7 @@ class Database:
for dep in spec.edges_to_dependencies(depflag=_TRACKED_DEPENDENCIES):
dkey = dep.spec.dag_hash()
upstream, record = self.query_by_spec_hash(dkey)
- assert record, f"Missing dependency {dep.spec} in DB"
+ assert record, f"Missing dependency {dep.spec.short_spec} in DB"
new_spec._add_dependency(record.spec, depflag=dep.depflag, virtuals=dep.virtuals)
if not upstream:
record.ref_count += 1
@@ -1711,6 +1726,33 @@ class Database:
rec.explicit = explicit
+class NoUpstreamVisitor:
+ """Gives edges to upstream specs, but does follow edges from upstream specs."""
+
+ def __init__(
+ self,
+ upstream_hashes: Set[str],
+ on_visit: Callable[["spack.spec.DependencySpec", bool], None],
+ ):
+ self.upstream_hashes = upstream_hashes
+ self.on_visit = on_visit
+
+ def accept(self, item: tr.EdgeAndDepth) -> bool:
+ self.on_visit(item.edge, self.is_upstream(item))
+ return True
+
+ def is_upstream(self, item: tr.EdgeAndDepth) -> bool:
+ return item.edge.spec.dag_hash() in self.upstream_hashes
+
+ def neighbors(self, item: tr.EdgeAndDepth):
+ # Prune edges from upstream nodes, only follow database tracked dependencies
+ return (
+ []
+ if self.is_upstream(item)
+ else item.edge.spec.edges_to_dependencies(depflag=_TRACKED_DEPENDENCIES)
+ )
+
+
class UpstreamDatabaseLockingError(SpackError):
"""Raised when an operation would need to lock an upstream database"""
diff --git a/lib/spack/spack/directory_layout.py b/lib/spack/spack/directory_layout.py
index b094012fa6..7b715d14d3 100644
--- a/lib/spack/spack/directory_layout.py
+++ b/lib/spack/spack/directory_layout.py
@@ -4,14 +4,12 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import errno
-import glob
import os
-import posixpath
import re
import shutil
import sys
-from contextlib import contextmanager
from pathlib import Path
+from typing import List, Optional, Tuple
import llnl.util.filesystem as fs
from llnl.util.symlink import readlink
@@ -33,6 +31,42 @@ def _check_concrete(spec):
raise ValueError("Specs passed to a DirectoryLayout must be concrete!")
+def _get_spec(prefix: str) -> Optional["spack.spec.Spec"]:
+ """Returns a spec if the prefix contains a spec file in the .spack subdir"""
+ for f in ("spec.json", "spec.yaml"):
+ try:
+ return spack.spec.Spec.from_specfile(os.path.join(prefix, ".spack", f))
+ except Exception:
+ continue
+ return None
+
+
+def specs_from_metadata_dirs(root: str) -> List["spack.spec.Spec"]:
+ stack = [root]
+ specs = []
+
+ while stack:
+ prefix = stack.pop()
+
+ spec = _get_spec(prefix)
+
+ if spec:
+ spec.prefix = prefix
+ specs.append(spec)
+ continue
+
+ try:
+ scandir = os.scandir(prefix)
+ except OSError:
+ continue
+
+ with scandir as entries:
+ for entry in entries:
+ if entry.is_dir(follow_symlinks=False):
+ stack.append(entry.path)
+ return specs
+
+
class DirectoryLayout:
"""A directory layout is used to associate unique paths with specs.
Different installations are going to want different layouts for their
@@ -184,12 +218,6 @@ class DirectoryLayout:
return yaml_path if os.path.exists(yaml_path) else json_path
- @contextmanager
- def disable_upstream_check(self):
- self.check_upstream = False
- yield
- self.check_upstream = True
-
def metadata_path(self, spec):
return os.path.join(spec.prefix, self.metadata_dir)
@@ -244,53 +272,6 @@ class DirectoryLayout:
"Spec file in %s does not match hash!" % spec_file_path
)
- def all_specs(self):
- if not os.path.isdir(self.root):
- return []
-
- specs = []
- for _, path_scheme in self.projections.items():
- path_elems = ["*"] * len(path_scheme.split(posixpath.sep))
- # NOTE: Does not validate filename extension; should happen later
- path_elems += [self.metadata_dir, "spec.json"]
- pattern = os.path.join(self.root, *path_elems)
- spec_files = glob.glob(pattern)
- if not spec_files: # we're probably looking at legacy yaml...
- path_elems += [self.metadata_dir, "spec.yaml"]
- pattern = os.path.join(self.root, *path_elems)
- spec_files = glob.glob(pattern)
- specs.extend([self.read_spec(s) for s in spec_files])
- return specs
-
- def all_deprecated_specs(self):
- if not os.path.isdir(self.root):
- return []
-
- deprecated_specs = set()
- for _, path_scheme in self.projections.items():
- path_elems = ["*"] * len(path_scheme.split(posixpath.sep))
- # NOTE: Does not validate filename extension; should happen later
- path_elems += [
- self.metadata_dir,
- self.deprecated_dir,
- "*_spec.*",
- ] # + self.spec_file_name]
- pattern = os.path.join(self.root, *path_elems)
- spec_files = glob.glob(pattern)
- get_depr_spec_file = lambda x: os.path.join(
- os.path.dirname(os.path.dirname(x)), self.spec_file_name
- )
- deprecated_specs |= set(
- (self.read_spec(s), self.read_spec(get_depr_spec_file(s))) for s in spec_files
- )
- return deprecated_specs
-
- def specs_by_hash(self):
- by_hash = {}
- for spec in self.all_specs():
- by_hash[spec.dag_hash()] = spec
- return by_hash
-
def path_for_spec(self, spec):
"""Return absolute path from the root to a directory for the spec."""
_check_concrete(spec)
@@ -356,6 +337,35 @@ class DirectoryLayout:
raise e
path = os.path.dirname(path)
+ def all_specs(self) -> List["spack.spec.Spec"]:
+ """Returns a list of all specs detected in self.root, detected by `.spack` directories.
+ Their prefix is set to the directory containing the `.spack` directory. Note that these
+ specs may follow a different layout than the current layout if it was changed after
+ installation."""
+ return specs_from_metadata_dirs(self.root)
+
+ def deprecated_for(
+ self, specs: List["spack.spec.Spec"]
+ ) -> List[Tuple["spack.spec.Spec", "spack.spec.Spec"]]:
+ """Returns a list of tuples of specs (new, old) where new is deprecated for old"""
+ spec_with_deprecated = []
+ for spec in specs:
+ try:
+ deprecated = os.scandir(
+ os.path.join(str(spec.prefix), self.metadata_dir, self.deprecated_dir)
+ )
+ except OSError:
+ continue
+
+ with deprecated as entries:
+ for entry in entries:
+ try:
+ deprecated_spec = spack.spec.Spec.from_specfile(entry.path)
+ spec_with_deprecated.append((spec, deprecated_spec))
+ except Exception:
+ continue
+ return spec_with_deprecated
+
class DirectoryLayoutError(SpackError):
"""Superclass for directory layout errors."""
diff --git a/lib/spack/spack/test/cmd/reindex.py b/lib/spack/spack/test/cmd/reindex.py
index 6e48a3c21c..abaaf4530c 100644
--- a/lib/spack/spack/test/cmd/reindex.py
+++ b/lib/spack/spack/test/cmd/reindex.py
@@ -55,12 +55,24 @@ def test_reindex_with_deprecated_packages(
deprecate("-y", "libelf@0.8.12", "libelf@0.8.13")
- all_installed = spack.store.STORE.db.query(installed=any)
- non_deprecated = spack.store.STORE.db.query(installed=True)
+ db = spack.store.STORE.db
+
+ all_installed = db.query(installed=any)
+ non_deprecated = db.query(installed=True)
_clear_db(tmp_path)
reindex()
- assert spack.store.STORE.db.query(installed=any) == all_installed
- assert spack.store.STORE.db.query(installed=True) == non_deprecated
+ assert db.query(installed=any) == all_installed
+ assert db.query(installed=True) == non_deprecated
+
+ old_libelf = db.query_local_by_spec_hash(
+ db.query_local("libelf@0.8.12", installed=any)[0].dag_hash()
+ )
+ new_libelf = db.query_local_by_spec_hash(
+ db.query_local("libelf@0.8.13", installed=True)[0].dag_hash()
+ )
+ assert old_libelf.deprecated_for == new_libelf.spec.dag_hash()
+ assert new_libelf.deprecated_for is None
+ assert new_libelf.ref_count == 1
diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py
index d1d4917179..5e8e5abbc8 100644
--- a/lib/spack/spack/test/database.py
+++ b/lib/spack/spack/test/database.py
@@ -7,6 +7,7 @@ import datetime
import functools
import json
import os
+import re
import shutil
import sys
@@ -982,9 +983,12 @@ def test_reindex_removed_prefix_is_not_installed(mutable_database, mock_store, c
# Reindex should pick up libelf as a dependency of libdwarf
spack.store.STORE.reindex()
- # Reindexing should warn about libelf not being found on the filesystem
- err = capfd.readouterr()[1]
- assert "this directory does not contain an installation of the spec" in err
+ # Reindexing should warn about libelf not found on the filesystem
+ assert re.search(
+ "libelf@0.8.13.+ was marked installed in the database "
+ "but was not found on the file system",
+ capfd.readouterr().err,
+ )
# And we should still have libelf in the database, but not installed.
assert not mutable_database.query_one("libelf", installed=True)
@@ -1124,3 +1128,53 @@ def test_database_errors_with_just_a_version_key(tmp_path):
with pytest.raises(spack.database.InvalidDatabaseVersionError):
spack.database.Database(root).query_local()
+
+
+def test_reindex_with_upstreams(tmp_path, monkeypatch, mock_packages, config):
+ # Reindexing should not put install records of upstream entries into the local database. Here
+ # we install `mpileaks` locally with dependencies in the upstream. And we even install
+ # `mpileaks` with the same hash in the upstream. After reindexing, `mpileaks` should still be
+ # in the local db, and `callpath` should not.
+ mpileaks = spack.spec.Spec("mpileaks").concretized()
+ callpath = mpileaks.dependencies("callpath")[0]
+
+ upstream_store = spack.store.create(
+ {"config": {"install_tree": {"root": str(tmp_path / "upstream")}}}
+ )
+ monkeypatch.setattr(spack.store, "STORE", upstream_store)
+ callpath.package.do_install(fake=True)
+
+ local_store = spack.store.create(
+ {
+ "config": {"install_tree": {"root": str(tmp_path / "local")}},
+ "upstreams": {"my-upstream": {"install_tree": str(tmp_path / "upstream")}},
+ }
+ )
+ monkeypatch.setattr(spack.store, "STORE", local_store)
+ mpileaks.package.do_install(fake=True)
+
+ # Sanity check that callpath is from upstream.
+ assert not local_store.db.query_local("callpath")
+ assert local_store.db.query("callpath")
+
+ # Install mpileaks also upstream with the same hash to ensure that determining upstreamness
+ # checks local installs before upstream databases, even when the local database is being
+ # reindexed.
+ monkeypatch.setattr(spack.store, "STORE", upstream_store)
+ mpileaks.package.do_install(fake=True)
+
+ # Delete the local database
+ shutil.rmtree(local_store.db.database_directory)
+
+ # Create a new instance s.t. we don't have cached specs in memory
+ reindexed_local_store = spack.store.create(
+ {
+ "config": {"install_tree": {"root": str(tmp_path / "local")}},
+ "upstreams": {"my-upstream": {"install_tree": str(tmp_path / "upstream")}},
+ }
+ )
+ reindexed_local_store.db.reindex()
+
+ assert not reindexed_local_store.db.query_local("callpath")
+ assert reindexed_local_store.db.query("callpath") == [callpath]
+ assert reindexed_local_store.db.query_local("mpileaks") == [mpileaks]
diff --git a/lib/spack/spack/traverse.py b/lib/spack/spack/traverse.py
index 091ebfe193..f6c5589b2a 100644
--- a/lib/spack/spack/traverse.py
+++ b/lib/spack/spack/traverse.py
@@ -3,8 +3,8 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-from collections import defaultdict, namedtuple
-from typing import Union
+from collections import defaultdict
+from typing import NamedTuple, Union
import spack.deptypes as dt
import spack.spec
@@ -12,11 +12,14 @@ import spack.spec
# Export only the high-level API.
__all__ = ["traverse_edges", "traverse_nodes", "traverse_tree"]
+
#: Data class that stores a directed edge together with depth at
#: which the target vertex was found. It is passed to ``accept``
#: and ``neighbors`` of visitors, so they can decide whether to
#: follow the edge or not.
-EdgeAndDepth = namedtuple("EdgeAndDepth", ["edge", "depth"])
+class EdgeAndDepth(NamedTuple):
+ edge: "spack.spec.DependencySpec"
+ depth: int
def sort_edges(edges):