summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/spack/docs/conf.py1
-rw-r--r--lib/spack/spack/database.py214
-rw-r--r--lib/spack/spack/directory_layout.py122
-rw-r--r--lib/spack/spack/test/cmd/reindex.py20
-rw-r--r--lib/spack/spack/test/database.py60
-rw-r--r--lib/spack/spack/traverse.py9
6 files changed, 274 insertions, 152 deletions
diff --git a/lib/spack/docs/conf.py b/lib/spack/docs/conf.py
index 9819d6f4d3..95b711db93 100644
--- a/lib/spack/docs/conf.py
+++ b/lib/spack/docs/conf.py
@@ -218,6 +218,7 @@ nitpick_ignore = [
("py:class", "spack.spec.SpecfileReaderBase"),
("py:class", "spack.install_test.Pb"),
("py:class", "spack.filesystem_view.SimpleFilesystemView"),
+ ("py:class", "spack.traverse.EdgeAndDepth"),
]
# The reST default role (used for this markup: `text`) to use for all documents.
diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py
index 2b0c4f4c37..404288ff83 100644
--- a/lib/spack/spack/database.py
+++ b/lib/spack/spack/database.py
@@ -207,7 +207,7 @@ class InstallRecord:
def __init__(
self,
spec: "spack.spec.Spec",
- path: str,
+ path: Optional[str],
installed: bool,
ref_count: int = 0,
explicit: bool = False,
@@ -845,7 +845,7 @@ class Database:
):
tty.warn(f"Spack database version changed from {version} to {_DB_VERSION}. Upgrading.")
- self.reindex(spack.store.STORE.layout)
+ self.reindex()
installs = dict(
(k, v.to_dict(include_fields=self._record_fields)) for k, v in self._data.items()
)
@@ -918,8 +918,6 @@ class Database:
if self.is_upstream:
raise UpstreamDatabaseLockingError("Cannot reindex an upstream database")
- error: Optional[CorruptDatabaseError] = None
-
# Special transaction to avoid recursive reindex calls and to
# ignore errors if we need to rebuild a corrupt database.
def _read_suppress_error():
@@ -927,99 +925,116 @@ class Database:
if os.path.isfile(self._index_path):
self._read_from_file(self._index_path)
except CorruptDatabaseError as e:
- nonlocal error
- error = e
+ tty.warn(f"Reindexing corrupt database, error was: {e}")
self._data = {}
self._installed_prefixes = set()
- transaction = lk.WriteTransaction(
- self.lock, acquire=_read_suppress_error, release=self._write
- )
-
- with transaction:
- if error is not None:
- tty.warn(f"Spack database was corrupt. Will rebuild. Error was: {error}")
-
- old_data = self._data
- old_installed_prefixes = self._installed_prefixes
+ with lk.WriteTransaction(self.lock, acquire=_read_suppress_error, release=self._write):
+ old_installed_prefixes, self._installed_prefixes = self._installed_prefixes, set()
+ old_data, self._data = self._data, {}
try:
- self._construct_from_directory_layout(old_data)
+ self._reindex(old_data)
except BaseException:
# If anything explodes, restore old data, skip write.
self._data = old_data
self._installed_prefixes = old_installed_prefixes
raise
- def _construct_entry_from_directory_layout(
- self,
- old_data: Dict[str, InstallRecord],
- spec: "spack.spec.Spec",
- deprecator: Optional["spack.spec.Spec"] = None,
- ):
- # Try to recover explicit value from old DB, but
- # default it to True if DB was corrupt. This is
- # just to be conservative in case a command like
- # "autoremove" is run by the user after a reindex.
- tty.debug(f"Reconstructing from spec file: {spec}")
- explicit = True
- inst_time = os.stat(spec.prefix).st_ctime
- if old_data is not None:
- old_info = old_data.get(spec.dag_hash())
- if old_info is not None:
- explicit = old_info.explicit
- inst_time = old_info.installation_time
-
- self._add(spec, explicit=explicit, installation_time=inst_time)
- if deprecator:
- self._deprecate(spec, deprecator)
-
- def _construct_from_directory_layout(self, old_data: Dict[str, InstallRecord]):
- # Read first the spec files in the prefixes. They should be considered authoritative with
- # respect to DB reindexing, as entries in the DB may be corrupted in a way that still makes
- # them readable. If we considered DB entries authoritative instead, we would perpetuate
- # errors over a reindex.
- assert self.layout is not None, "Cannot reindex a database without a known layout"
- with self.layout.disable_upstream_check():
- # Initialize data in the reconstructed DB
- self._data = {}
- self._installed_prefixes = set()
-
- # Start inspecting the installed prefixes
- processed_specs = set()
-
- for spec in self.layout.all_specs():
- self._construct_entry_from_directory_layout(old_data, spec)
- processed_specs.add(spec)
-
- for spec, deprecator in self.layout.all_deprecated_specs():
- self._construct_entry_from_directory_layout(old_data, spec, deprecator)
- processed_specs.add(spec)
-
- for entry in old_data.values():
- # We already took care of this spec using spec file from its prefix.
- if entry.spec in processed_specs:
- tty.debug(
- f"Skipping reconstruction from old db: {entry.spec}"
- " [already reconstructed from spec file]"
- )
- continue
+ def _reindex(self, old_data: Dict[str, InstallRecord]):
+ # Specs on the file system are the source of truth for record.spec. The old database values
+ # if available are the source of truth for the rest of the record.
+ assert self.layout, "Database layout must be set to reindex"
- # If we arrived here it very likely means that we have external specs that are not
- # dependencies of other specs. This may be the case for externally installed
- # compilers or externally installed applications.
- tty.debug(f"Reconstructing from old db: {entry.spec}")
- try:
- self._add(
- spec=entry.spec,
- explicit=entry.explicit,
- installation_time=entry.installation_time,
- )
- processed_specs.add(entry.spec)
- except Exception as e:
- # Something went wrong, so the spec was not restored from old data
- tty.debug(e)
+ specs_from_fs = self.layout.all_specs()
+ deprecated_for = self.layout.deprecated_for(specs_from_fs)
+
+ known_specs: List[spack.spec.Spec] = [
+ *specs_from_fs,
+ *(deprecated for _, deprecated in deprecated_for),
+ *(rec.spec for rec in old_data.values()),
+ ]
+
+ upstream_hashes = {
+ dag_hash for upstream in self.upstream_dbs for dag_hash in upstream._data
+ }
+ upstream_hashes.difference_update(spec.dag_hash() for spec in known_specs)
+
+ def create_node(edge: spack.spec.DependencySpec, is_upstream: bool):
+ if is_upstream:
+ return
+
+ self._data[edge.spec.dag_hash()] = InstallRecord(
+ spec=edge.spec.copy(deps=False),
+ path=edge.spec.external_path if edge.spec.external else None,
+ installed=edge.spec.external,
+ )
- self._check_ref_counts()
+ # Store all nodes of known specs, excluding ones found in upstreams
+ tr.traverse_breadth_first_with_visitor(
+ known_specs,
+ tr.CoverNodesVisitor(
+ NoUpstreamVisitor(upstream_hashes, create_node), key=tr.by_dag_hash
+ ),
+ )
+
+ # Store the prefix and other information for specs were found on the file system
+ for s in specs_from_fs:
+ record = self._data[s.dag_hash()]
+ record.path = s.prefix
+ record.installed = True
+ record.explicit = True # conservative assumption
+ record.installation_time = os.stat(s.prefix).st_ctime
+
+ # Deprecate specs
+ for new, old in deprecated_for:
+ self._data[old.dag_hash()].deprecated_for = new.dag_hash()
+
+ # Copy data we have from the old database
+ for old_record in old_data.values():
+ record = self._data[old_record.spec.dag_hash()]
+ record.explicit = old_record.explicit
+ record.installation_time = old_record.installation_time
+ record.origin = old_record.origin
+ record.deprecated_for = old_record.deprecated_for
+
+ # Warn when the spec has been removed from the file system (i.e. it was not detected)
+ if not record.installed and old_record.installed:
+ tty.warn(
+ f"Spec {old_record.spec.short_spec} was marked installed in the database "
+ "but was not found on the file system. It is now marked as missing."
+ )
+
+ def create_edge(edge: spack.spec.DependencySpec, is_upstream: bool):
+ if not edge.parent:
+ return
+ parent_record = self._data[edge.parent.dag_hash()]
+ if is_upstream:
+ upstream, child_record = self.query_by_spec_hash(edge.spec.dag_hash())
+ assert upstream and child_record, "Internal error: upstream spec not found"
+ else:
+ child_record = self._data[edge.spec.dag_hash()]
+ parent_record.spec._add_dependency(
+ child_record.spec, depflag=edge.depflag, virtuals=edge.virtuals
+ )
+
+ # Then store edges
+ tr.traverse_breadth_first_with_visitor(
+ known_specs,
+ tr.CoverEdgesVisitor(
+ NoUpstreamVisitor(upstream_hashes, create_edge), key=tr.by_dag_hash
+ ),
+ )
+
+ # Finally update the ref counts
+ for record in self._data.values():
+ for dep in record.spec.dependencies(deptype=_TRACKED_DEPENDENCIES):
+ dep_record = self._data.get(dep.dag_hash())
+ if dep_record: # dep might be upstream
+ dep_record.ref_count += 1
+ if record.deprecated_for:
+ self._data[record.deprecated_for].ref_count += 1
+
+ self._check_ref_counts()
def _check_ref_counts(self):
"""Ensure consistency of reference counts in the DB.
@@ -1199,7 +1214,7 @@ class Database:
for dep in spec.edges_to_dependencies(depflag=_TRACKED_DEPENDENCIES):
dkey = dep.spec.dag_hash()
upstream, record = self.query_by_spec_hash(dkey)
- assert record, f"Missing dependency {dep.spec} in DB"
+ assert record, f"Missing dependency {dep.spec.short_spec} in DB"
new_spec._add_dependency(record.spec, depflag=dep.depflag, virtuals=dep.virtuals)
if not upstream:
record.ref_count += 1
@@ -1711,6 +1726,33 @@ class Database:
rec.explicit = explicit
+class NoUpstreamVisitor:
+ """Gives edges to upstream specs, but does follow edges from upstream specs."""
+
+ def __init__(
+ self,
+ upstream_hashes: Set[str],
+ on_visit: Callable[["spack.spec.DependencySpec", bool], None],
+ ):
+ self.upstream_hashes = upstream_hashes
+ self.on_visit = on_visit
+
+ def accept(self, item: tr.EdgeAndDepth) -> bool:
+ self.on_visit(item.edge, self.is_upstream(item))
+ return True
+
+ def is_upstream(self, item: tr.EdgeAndDepth) -> bool:
+ return item.edge.spec.dag_hash() in self.upstream_hashes
+
+ def neighbors(self, item: tr.EdgeAndDepth):
+ # Prune edges from upstream nodes, only follow database tracked dependencies
+ return (
+ []
+ if self.is_upstream(item)
+ else item.edge.spec.edges_to_dependencies(depflag=_TRACKED_DEPENDENCIES)
+ )
+
+
class UpstreamDatabaseLockingError(SpackError):
"""Raised when an operation would need to lock an upstream database"""
diff --git a/lib/spack/spack/directory_layout.py b/lib/spack/spack/directory_layout.py
index b094012fa6..7b715d14d3 100644
--- a/lib/spack/spack/directory_layout.py
+++ b/lib/spack/spack/directory_layout.py
@@ -4,14 +4,12 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import errno
-import glob
import os
-import posixpath
import re
import shutil
import sys
-from contextlib import contextmanager
from pathlib import Path
+from typing import List, Optional, Tuple
import llnl.util.filesystem as fs
from llnl.util.symlink import readlink
@@ -33,6 +31,42 @@ def _check_concrete(spec):
raise ValueError("Specs passed to a DirectoryLayout must be concrete!")
+def _get_spec(prefix: str) -> Optional["spack.spec.Spec"]:
+ """Returns a spec if the prefix contains a spec file in the .spack subdir"""
+ for f in ("spec.json", "spec.yaml"):
+ try:
+ return spack.spec.Spec.from_specfile(os.path.join(prefix, ".spack", f))
+ except Exception:
+ continue
+ return None
+
+
+def specs_from_metadata_dirs(root: str) -> List["spack.spec.Spec"]:
+ stack = [root]
+ specs = []
+
+ while stack:
+ prefix = stack.pop()
+
+ spec = _get_spec(prefix)
+
+ if spec:
+ spec.prefix = prefix
+ specs.append(spec)
+ continue
+
+ try:
+ scandir = os.scandir(prefix)
+ except OSError:
+ continue
+
+ with scandir as entries:
+ for entry in entries:
+ if entry.is_dir(follow_symlinks=False):
+ stack.append(entry.path)
+ return specs
+
+
class DirectoryLayout:
"""A directory layout is used to associate unique paths with specs.
Different installations are going to want different layouts for their
@@ -184,12 +218,6 @@ class DirectoryLayout:
return yaml_path if os.path.exists(yaml_path) else json_path
- @contextmanager
- def disable_upstream_check(self):
- self.check_upstream = False
- yield
- self.check_upstream = True
-
def metadata_path(self, spec):
return os.path.join(spec.prefix, self.metadata_dir)
@@ -244,53 +272,6 @@ class DirectoryLayout:
"Spec file in %s does not match hash!" % spec_file_path
)
- def all_specs(self):
- if not os.path.isdir(self.root):
- return []
-
- specs = []
- for _, path_scheme in self.projections.items():
- path_elems = ["*"] * len(path_scheme.split(posixpath.sep))
- # NOTE: Does not validate filename extension; should happen later
- path_elems += [self.metadata_dir, "spec.json"]
- pattern = os.path.join(self.root, *path_elems)
- spec_files = glob.glob(pattern)
- if not spec_files: # we're probably looking at legacy yaml...
- path_elems += [self.metadata_dir, "spec.yaml"]
- pattern = os.path.join(self.root, *path_elems)
- spec_files = glob.glob(pattern)
- specs.extend([self.read_spec(s) for s in spec_files])
- return specs
-
- def all_deprecated_specs(self):
- if not os.path.isdir(self.root):
- return []
-
- deprecated_specs = set()
- for _, path_scheme in self.projections.items():
- path_elems = ["*"] * len(path_scheme.split(posixpath.sep))
- # NOTE: Does not validate filename extension; should happen later
- path_elems += [
- self.metadata_dir,
- self.deprecated_dir,
- "*_spec.*",
- ] # + self.spec_file_name]
- pattern = os.path.join(self.root, *path_elems)
- spec_files = glob.glob(pattern)
- get_depr_spec_file = lambda x: os.path.join(
- os.path.dirname(os.path.dirname(x)), self.spec_file_name
- )
- deprecated_specs |= set(
- (self.read_spec(s), self.read_spec(get_depr_spec_file(s))) for s in spec_files
- )
- return deprecated_specs
-
- def specs_by_hash(self):
- by_hash = {}
- for spec in self.all_specs():
- by_hash[spec.dag_hash()] = spec
- return by_hash
-
def path_for_spec(self, spec):
"""Return absolute path from the root to a directory for the spec."""
_check_concrete(spec)
@@ -356,6 +337,35 @@ class DirectoryLayout:
raise e
path = os.path.dirname(path)
+ def all_specs(self) -> List["spack.spec.Spec"]:
+ """Returns a list of all specs detected in self.root, detected by `.spack` directories.
+ Their prefix is set to the directory containing the `.spack` directory. Note that these
+ specs may follow a different layout than the current layout if it was changed after
+ installation."""
+ return specs_from_metadata_dirs(self.root)
+
+ def deprecated_for(
+ self, specs: List["spack.spec.Spec"]
+ ) -> List[Tuple["spack.spec.Spec", "spack.spec.Spec"]]:
+ """Returns a list of tuples of specs (new, old) where new is deprecated for old"""
+ spec_with_deprecated = []
+ for spec in specs:
+ try:
+ deprecated = os.scandir(
+ os.path.join(str(spec.prefix), self.metadata_dir, self.deprecated_dir)
+ )
+ except OSError:
+ continue
+
+ with deprecated as entries:
+ for entry in entries:
+ try:
+ deprecated_spec = spack.spec.Spec.from_specfile(entry.path)
+ spec_with_deprecated.append((spec, deprecated_spec))
+ except Exception:
+ continue
+ return spec_with_deprecated
+
class DirectoryLayoutError(SpackError):
"""Superclass for directory layout errors."""
diff --git a/lib/spack/spack/test/cmd/reindex.py b/lib/spack/spack/test/cmd/reindex.py
index 6e48a3c21c..abaaf4530c 100644
--- a/lib/spack/spack/test/cmd/reindex.py
+++ b/lib/spack/spack/test/cmd/reindex.py
@@ -55,12 +55,24 @@ def test_reindex_with_deprecated_packages(
deprecate("-y", "libelf@0.8.12", "libelf@0.8.13")
- all_installed = spack.store.STORE.db.query(installed=any)
- non_deprecated = spack.store.STORE.db.query(installed=True)
+ db = spack.store.STORE.db
+
+ all_installed = db.query(installed=any)
+ non_deprecated = db.query(installed=True)
_clear_db(tmp_path)
reindex()
- assert spack.store.STORE.db.query(installed=any) == all_installed
- assert spack.store.STORE.db.query(installed=True) == non_deprecated
+ assert db.query(installed=any) == all_installed
+ assert db.query(installed=True) == non_deprecated
+
+ old_libelf = db.query_local_by_spec_hash(
+ db.query_local("libelf@0.8.12", installed=any)[0].dag_hash()
+ )
+ new_libelf = db.query_local_by_spec_hash(
+ db.query_local("libelf@0.8.13", installed=True)[0].dag_hash()
+ )
+ assert old_libelf.deprecated_for == new_libelf.spec.dag_hash()
+ assert new_libelf.deprecated_for is None
+ assert new_libelf.ref_count == 1
diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py
index d1d4917179..5e8e5abbc8 100644
--- a/lib/spack/spack/test/database.py
+++ b/lib/spack/spack/test/database.py
@@ -7,6 +7,7 @@ import datetime
import functools
import json
import os
+import re
import shutil
import sys
@@ -982,9 +983,12 @@ def test_reindex_removed_prefix_is_not_installed(mutable_database, mock_store, c
# Reindex should pick up libelf as a dependency of libdwarf
spack.store.STORE.reindex()
- # Reindexing should warn about libelf not being found on the filesystem
- err = capfd.readouterr()[1]
- assert "this directory does not contain an installation of the spec" in err
+ # Reindexing should warn about libelf not found on the filesystem
+ assert re.search(
+ "libelf@0.8.13.+ was marked installed in the database "
+ "but was not found on the file system",
+ capfd.readouterr().err,
+ )
# And we should still have libelf in the database, but not installed.
assert not mutable_database.query_one("libelf", installed=True)
@@ -1124,3 +1128,53 @@ def test_database_errors_with_just_a_version_key(tmp_path):
with pytest.raises(spack.database.InvalidDatabaseVersionError):
spack.database.Database(root).query_local()
+
+
+def test_reindex_with_upstreams(tmp_path, monkeypatch, mock_packages, config):
+ # Reindexing should not put install records of upstream entries into the local database. Here
+ # we install `mpileaks` locally with dependencies in the upstream. And we even install
+ # `mpileaks` with the same hash in the upstream. After reindexing, `mpileaks` should still be
+ # in the local db, and `callpath` should not.
+ mpileaks = spack.spec.Spec("mpileaks").concretized()
+ callpath = mpileaks.dependencies("callpath")[0]
+
+ upstream_store = spack.store.create(
+ {"config": {"install_tree": {"root": str(tmp_path / "upstream")}}}
+ )
+ monkeypatch.setattr(spack.store, "STORE", upstream_store)
+ callpath.package.do_install(fake=True)
+
+ local_store = spack.store.create(
+ {
+ "config": {"install_tree": {"root": str(tmp_path / "local")}},
+ "upstreams": {"my-upstream": {"install_tree": str(tmp_path / "upstream")}},
+ }
+ )
+ monkeypatch.setattr(spack.store, "STORE", local_store)
+ mpileaks.package.do_install(fake=True)
+
+ # Sanity check that callpath is from upstream.
+ assert not local_store.db.query_local("callpath")
+ assert local_store.db.query("callpath")
+
+ # Install mpileaks also upstream with the same hash to ensure that determining upstreamness
+ # checks local installs before upstream databases, even when the local database is being
+ # reindexed.
+ monkeypatch.setattr(spack.store, "STORE", upstream_store)
+ mpileaks.package.do_install(fake=True)
+
+ # Delete the local database
+ shutil.rmtree(local_store.db.database_directory)
+
+ # Create a new instance s.t. we don't have cached specs in memory
+ reindexed_local_store = spack.store.create(
+ {
+ "config": {"install_tree": {"root": str(tmp_path / "local")}},
+ "upstreams": {"my-upstream": {"install_tree": str(tmp_path / "upstream")}},
+ }
+ )
+ reindexed_local_store.db.reindex()
+
+ assert not reindexed_local_store.db.query_local("callpath")
+ assert reindexed_local_store.db.query("callpath") == [callpath]
+ assert reindexed_local_store.db.query_local("mpileaks") == [mpileaks]
diff --git a/lib/spack/spack/traverse.py b/lib/spack/spack/traverse.py
index 091ebfe193..f6c5589b2a 100644
--- a/lib/spack/spack/traverse.py
+++ b/lib/spack/spack/traverse.py
@@ -3,8 +3,8 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-from collections import defaultdict, namedtuple
-from typing import Union
+from collections import defaultdict
+from typing import NamedTuple, Union
import spack.deptypes as dt
import spack.spec
@@ -12,11 +12,14 @@ import spack.spec
# Export only the high-level API.
__all__ = ["traverse_edges", "traverse_nodes", "traverse_tree"]
+
#: Data class that stores a directed edge together with depth at
#: which the target vertex was found. It is passed to ``accept``
#: and ``neighbors`` of visitors, so they can decide whether to
#: follow the edge or not.
-EdgeAndDepth = namedtuple("EdgeAndDepth", ["edge", "depth"])
+class EdgeAndDepth(NamedTuple):
+ edge: "spack.spec.DependencySpec"
+ depth: int
def sort_edges(edges):