diff options
-rw-r--r-- | lib/spack/docs/conf.py | 1 | ||||
-rw-r--r-- | lib/spack/spack/database.py | 214 | ||||
-rw-r--r-- | lib/spack/spack/directory_layout.py | 122 | ||||
-rw-r--r-- | lib/spack/spack/test/cmd/reindex.py | 20 | ||||
-rw-r--r-- | lib/spack/spack/test/database.py | 60 | ||||
-rw-r--r-- | lib/spack/spack/traverse.py | 9 |
6 files changed, 274 insertions, 152 deletions
diff --git a/lib/spack/docs/conf.py b/lib/spack/docs/conf.py index 9819d6f4d3..95b711db93 100644 --- a/lib/spack/docs/conf.py +++ b/lib/spack/docs/conf.py @@ -218,6 +218,7 @@ nitpick_ignore = [ ("py:class", "spack.spec.SpecfileReaderBase"), ("py:class", "spack.install_test.Pb"), ("py:class", "spack.filesystem_view.SimpleFilesystemView"), + ("py:class", "spack.traverse.EdgeAndDepth"), ] # The reST default role (used for this markup: `text`) to use for all documents. diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index 2b0c4f4c37..404288ff83 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -207,7 +207,7 @@ class InstallRecord: def __init__( self, spec: "spack.spec.Spec", - path: str, + path: Optional[str], installed: bool, ref_count: int = 0, explicit: bool = False, @@ -845,7 +845,7 @@ class Database: ): tty.warn(f"Spack database version changed from {version} to {_DB_VERSION}. Upgrading.") - self.reindex(spack.store.STORE.layout) + self.reindex() installs = dict( (k, v.to_dict(include_fields=self._record_fields)) for k, v in self._data.items() ) @@ -918,8 +918,6 @@ class Database: if self.is_upstream: raise UpstreamDatabaseLockingError("Cannot reindex an upstream database") - error: Optional[CorruptDatabaseError] = None - # Special transaction to avoid recursive reindex calls and to # ignore errors if we need to rebuild a corrupt database. def _read_suppress_error(): @@ -927,99 +925,116 @@ class Database: if os.path.isfile(self._index_path): self._read_from_file(self._index_path) except CorruptDatabaseError as e: - nonlocal error - error = e + tty.warn(f"Reindexing corrupt database, error was: {e}") self._data = {} self._installed_prefixes = set() - transaction = lk.WriteTransaction( - self.lock, acquire=_read_suppress_error, release=self._write - ) - - with transaction: - if error is not None: - tty.warn(f"Spack database was corrupt. Will rebuild. Error was: {error}") - - old_data = self._data - old_installed_prefixes = self._installed_prefixes + with lk.WriteTransaction(self.lock, acquire=_read_suppress_error, release=self._write): + old_installed_prefixes, self._installed_prefixes = self._installed_prefixes, set() + old_data, self._data = self._data, {} try: - self._construct_from_directory_layout(old_data) + self._reindex(old_data) except BaseException: # If anything explodes, restore old data, skip write. self._data = old_data self._installed_prefixes = old_installed_prefixes raise - def _construct_entry_from_directory_layout( - self, - old_data: Dict[str, InstallRecord], - spec: "spack.spec.Spec", - deprecator: Optional["spack.spec.Spec"] = None, - ): - # Try to recover explicit value from old DB, but - # default it to True if DB was corrupt. This is - # just to be conservative in case a command like - # "autoremove" is run by the user after a reindex. - tty.debug(f"Reconstructing from spec file: {spec}") - explicit = True - inst_time = os.stat(spec.prefix).st_ctime - if old_data is not None: - old_info = old_data.get(spec.dag_hash()) - if old_info is not None: - explicit = old_info.explicit - inst_time = old_info.installation_time - - self._add(spec, explicit=explicit, installation_time=inst_time) - if deprecator: - self._deprecate(spec, deprecator) - - def _construct_from_directory_layout(self, old_data: Dict[str, InstallRecord]): - # Read first the spec files in the prefixes. They should be considered authoritative with - # respect to DB reindexing, as entries in the DB may be corrupted in a way that still makes - # them readable. If we considered DB entries authoritative instead, we would perpetuate - # errors over a reindex. - assert self.layout is not None, "Cannot reindex a database without a known layout" - with self.layout.disable_upstream_check(): - # Initialize data in the reconstructed DB - self._data = {} - self._installed_prefixes = set() - - # Start inspecting the installed prefixes - processed_specs = set() - - for spec in self.layout.all_specs(): - self._construct_entry_from_directory_layout(old_data, spec) - processed_specs.add(spec) - - for spec, deprecator in self.layout.all_deprecated_specs(): - self._construct_entry_from_directory_layout(old_data, spec, deprecator) - processed_specs.add(spec) - - for entry in old_data.values(): - # We already took care of this spec using spec file from its prefix. - if entry.spec in processed_specs: - tty.debug( - f"Skipping reconstruction from old db: {entry.spec}" - " [already reconstructed from spec file]" - ) - continue + def _reindex(self, old_data: Dict[str, InstallRecord]): + # Specs on the file system are the source of truth for record.spec. The old database values + # if available are the source of truth for the rest of the record. + assert self.layout, "Database layout must be set to reindex" - # If we arrived here it very likely means that we have external specs that are not - # dependencies of other specs. This may be the case for externally installed - # compilers or externally installed applications. - tty.debug(f"Reconstructing from old db: {entry.spec}") - try: - self._add( - spec=entry.spec, - explicit=entry.explicit, - installation_time=entry.installation_time, - ) - processed_specs.add(entry.spec) - except Exception as e: - # Something went wrong, so the spec was not restored from old data - tty.debug(e) + specs_from_fs = self.layout.all_specs() + deprecated_for = self.layout.deprecated_for(specs_from_fs) + + known_specs: List[spack.spec.Spec] = [ + *specs_from_fs, + *(deprecated for _, deprecated in deprecated_for), + *(rec.spec for rec in old_data.values()), + ] + + upstream_hashes = { + dag_hash for upstream in self.upstream_dbs for dag_hash in upstream._data + } + upstream_hashes.difference_update(spec.dag_hash() for spec in known_specs) + + def create_node(edge: spack.spec.DependencySpec, is_upstream: bool): + if is_upstream: + return + + self._data[edge.spec.dag_hash()] = InstallRecord( + spec=edge.spec.copy(deps=False), + path=edge.spec.external_path if edge.spec.external else None, + installed=edge.spec.external, + ) - self._check_ref_counts() + # Store all nodes of known specs, excluding ones found in upstreams + tr.traverse_breadth_first_with_visitor( + known_specs, + tr.CoverNodesVisitor( + NoUpstreamVisitor(upstream_hashes, create_node), key=tr.by_dag_hash + ), + ) + + # Store the prefix and other information for specs were found on the file system + for s in specs_from_fs: + record = self._data[s.dag_hash()] + record.path = s.prefix + record.installed = True + record.explicit = True # conservative assumption + record.installation_time = os.stat(s.prefix).st_ctime + + # Deprecate specs + for new, old in deprecated_for: + self._data[old.dag_hash()].deprecated_for = new.dag_hash() + + # Copy data we have from the old database + for old_record in old_data.values(): + record = self._data[old_record.spec.dag_hash()] + record.explicit = old_record.explicit + record.installation_time = old_record.installation_time + record.origin = old_record.origin + record.deprecated_for = old_record.deprecated_for + + # Warn when the spec has been removed from the file system (i.e. it was not detected) + if not record.installed and old_record.installed: + tty.warn( + f"Spec {old_record.spec.short_spec} was marked installed in the database " + "but was not found on the file system. It is now marked as missing." + ) + + def create_edge(edge: spack.spec.DependencySpec, is_upstream: bool): + if not edge.parent: + return + parent_record = self._data[edge.parent.dag_hash()] + if is_upstream: + upstream, child_record = self.query_by_spec_hash(edge.spec.dag_hash()) + assert upstream and child_record, "Internal error: upstream spec not found" + else: + child_record = self._data[edge.spec.dag_hash()] + parent_record.spec._add_dependency( + child_record.spec, depflag=edge.depflag, virtuals=edge.virtuals + ) + + # Then store edges + tr.traverse_breadth_first_with_visitor( + known_specs, + tr.CoverEdgesVisitor( + NoUpstreamVisitor(upstream_hashes, create_edge), key=tr.by_dag_hash + ), + ) + + # Finally update the ref counts + for record in self._data.values(): + for dep in record.spec.dependencies(deptype=_TRACKED_DEPENDENCIES): + dep_record = self._data.get(dep.dag_hash()) + if dep_record: # dep might be upstream + dep_record.ref_count += 1 + if record.deprecated_for: + self._data[record.deprecated_for].ref_count += 1 + + self._check_ref_counts() def _check_ref_counts(self): """Ensure consistency of reference counts in the DB. @@ -1199,7 +1214,7 @@ class Database: for dep in spec.edges_to_dependencies(depflag=_TRACKED_DEPENDENCIES): dkey = dep.spec.dag_hash() upstream, record = self.query_by_spec_hash(dkey) - assert record, f"Missing dependency {dep.spec} in DB" + assert record, f"Missing dependency {dep.spec.short_spec} in DB" new_spec._add_dependency(record.spec, depflag=dep.depflag, virtuals=dep.virtuals) if not upstream: record.ref_count += 1 @@ -1711,6 +1726,33 @@ class Database: rec.explicit = explicit +class NoUpstreamVisitor: + """Gives edges to upstream specs, but does follow edges from upstream specs.""" + + def __init__( + self, + upstream_hashes: Set[str], + on_visit: Callable[["spack.spec.DependencySpec", bool], None], + ): + self.upstream_hashes = upstream_hashes + self.on_visit = on_visit + + def accept(self, item: tr.EdgeAndDepth) -> bool: + self.on_visit(item.edge, self.is_upstream(item)) + return True + + def is_upstream(self, item: tr.EdgeAndDepth) -> bool: + return item.edge.spec.dag_hash() in self.upstream_hashes + + def neighbors(self, item: tr.EdgeAndDepth): + # Prune edges from upstream nodes, only follow database tracked dependencies + return ( + [] + if self.is_upstream(item) + else item.edge.spec.edges_to_dependencies(depflag=_TRACKED_DEPENDENCIES) + ) + + class UpstreamDatabaseLockingError(SpackError): """Raised when an operation would need to lock an upstream database""" diff --git a/lib/spack/spack/directory_layout.py b/lib/spack/spack/directory_layout.py index b094012fa6..7b715d14d3 100644 --- a/lib/spack/spack/directory_layout.py +++ b/lib/spack/spack/directory_layout.py @@ -4,14 +4,12 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import errno -import glob import os -import posixpath import re import shutil import sys -from contextlib import contextmanager from pathlib import Path +from typing import List, Optional, Tuple import llnl.util.filesystem as fs from llnl.util.symlink import readlink @@ -33,6 +31,42 @@ def _check_concrete(spec): raise ValueError("Specs passed to a DirectoryLayout must be concrete!") +def _get_spec(prefix: str) -> Optional["spack.spec.Spec"]: + """Returns a spec if the prefix contains a spec file in the .spack subdir""" + for f in ("spec.json", "spec.yaml"): + try: + return spack.spec.Spec.from_specfile(os.path.join(prefix, ".spack", f)) + except Exception: + continue + return None + + +def specs_from_metadata_dirs(root: str) -> List["spack.spec.Spec"]: + stack = [root] + specs = [] + + while stack: + prefix = stack.pop() + + spec = _get_spec(prefix) + + if spec: + spec.prefix = prefix + specs.append(spec) + continue + + try: + scandir = os.scandir(prefix) + except OSError: + continue + + with scandir as entries: + for entry in entries: + if entry.is_dir(follow_symlinks=False): + stack.append(entry.path) + return specs + + class DirectoryLayout: """A directory layout is used to associate unique paths with specs. Different installations are going to want different layouts for their @@ -184,12 +218,6 @@ class DirectoryLayout: return yaml_path if os.path.exists(yaml_path) else json_path - @contextmanager - def disable_upstream_check(self): - self.check_upstream = False - yield - self.check_upstream = True - def metadata_path(self, spec): return os.path.join(spec.prefix, self.metadata_dir) @@ -244,53 +272,6 @@ class DirectoryLayout: "Spec file in %s does not match hash!" % spec_file_path ) - def all_specs(self): - if not os.path.isdir(self.root): - return [] - - specs = [] - for _, path_scheme in self.projections.items(): - path_elems = ["*"] * len(path_scheme.split(posixpath.sep)) - # NOTE: Does not validate filename extension; should happen later - path_elems += [self.metadata_dir, "spec.json"] - pattern = os.path.join(self.root, *path_elems) - spec_files = glob.glob(pattern) - if not spec_files: # we're probably looking at legacy yaml... - path_elems += [self.metadata_dir, "spec.yaml"] - pattern = os.path.join(self.root, *path_elems) - spec_files = glob.glob(pattern) - specs.extend([self.read_spec(s) for s in spec_files]) - return specs - - def all_deprecated_specs(self): - if not os.path.isdir(self.root): - return [] - - deprecated_specs = set() - for _, path_scheme in self.projections.items(): - path_elems = ["*"] * len(path_scheme.split(posixpath.sep)) - # NOTE: Does not validate filename extension; should happen later - path_elems += [ - self.metadata_dir, - self.deprecated_dir, - "*_spec.*", - ] # + self.spec_file_name] - pattern = os.path.join(self.root, *path_elems) - spec_files = glob.glob(pattern) - get_depr_spec_file = lambda x: os.path.join( - os.path.dirname(os.path.dirname(x)), self.spec_file_name - ) - deprecated_specs |= set( - (self.read_spec(s), self.read_spec(get_depr_spec_file(s))) for s in spec_files - ) - return deprecated_specs - - def specs_by_hash(self): - by_hash = {} - for spec in self.all_specs(): - by_hash[spec.dag_hash()] = spec - return by_hash - def path_for_spec(self, spec): """Return absolute path from the root to a directory for the spec.""" _check_concrete(spec) @@ -356,6 +337,35 @@ class DirectoryLayout: raise e path = os.path.dirname(path) + def all_specs(self) -> List["spack.spec.Spec"]: + """Returns a list of all specs detected in self.root, detected by `.spack` directories. + Their prefix is set to the directory containing the `.spack` directory. Note that these + specs may follow a different layout than the current layout if it was changed after + installation.""" + return specs_from_metadata_dirs(self.root) + + def deprecated_for( + self, specs: List["spack.spec.Spec"] + ) -> List[Tuple["spack.spec.Spec", "spack.spec.Spec"]]: + """Returns a list of tuples of specs (new, old) where new is deprecated for old""" + spec_with_deprecated = [] + for spec in specs: + try: + deprecated = os.scandir( + os.path.join(str(spec.prefix), self.metadata_dir, self.deprecated_dir) + ) + except OSError: + continue + + with deprecated as entries: + for entry in entries: + try: + deprecated_spec = spack.spec.Spec.from_specfile(entry.path) + spec_with_deprecated.append((spec, deprecated_spec)) + except Exception: + continue + return spec_with_deprecated + class DirectoryLayoutError(SpackError): """Superclass for directory layout errors.""" diff --git a/lib/spack/spack/test/cmd/reindex.py b/lib/spack/spack/test/cmd/reindex.py index 6e48a3c21c..abaaf4530c 100644 --- a/lib/spack/spack/test/cmd/reindex.py +++ b/lib/spack/spack/test/cmd/reindex.py @@ -55,12 +55,24 @@ def test_reindex_with_deprecated_packages( deprecate("-y", "libelf@0.8.12", "libelf@0.8.13") - all_installed = spack.store.STORE.db.query(installed=any) - non_deprecated = spack.store.STORE.db.query(installed=True) + db = spack.store.STORE.db + + all_installed = db.query(installed=any) + non_deprecated = db.query(installed=True) _clear_db(tmp_path) reindex() - assert spack.store.STORE.db.query(installed=any) == all_installed - assert spack.store.STORE.db.query(installed=True) == non_deprecated + assert db.query(installed=any) == all_installed + assert db.query(installed=True) == non_deprecated + + old_libelf = db.query_local_by_spec_hash( + db.query_local("libelf@0.8.12", installed=any)[0].dag_hash() + ) + new_libelf = db.query_local_by_spec_hash( + db.query_local("libelf@0.8.13", installed=True)[0].dag_hash() + ) + assert old_libelf.deprecated_for == new_libelf.spec.dag_hash() + assert new_libelf.deprecated_for is None + assert new_libelf.ref_count == 1 diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index d1d4917179..5e8e5abbc8 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -7,6 +7,7 @@ import datetime import functools import json import os +import re import shutil import sys @@ -982,9 +983,12 @@ def test_reindex_removed_prefix_is_not_installed(mutable_database, mock_store, c # Reindex should pick up libelf as a dependency of libdwarf spack.store.STORE.reindex() - # Reindexing should warn about libelf not being found on the filesystem - err = capfd.readouterr()[1] - assert "this directory does not contain an installation of the spec" in err + # Reindexing should warn about libelf not found on the filesystem + assert re.search( + "libelf@0.8.13.+ was marked installed in the database " + "but was not found on the file system", + capfd.readouterr().err, + ) # And we should still have libelf in the database, but not installed. assert not mutable_database.query_one("libelf", installed=True) @@ -1124,3 +1128,53 @@ def test_database_errors_with_just_a_version_key(tmp_path): with pytest.raises(spack.database.InvalidDatabaseVersionError): spack.database.Database(root).query_local() + + +def test_reindex_with_upstreams(tmp_path, monkeypatch, mock_packages, config): + # Reindexing should not put install records of upstream entries into the local database. Here + # we install `mpileaks` locally with dependencies in the upstream. And we even install + # `mpileaks` with the same hash in the upstream. After reindexing, `mpileaks` should still be + # in the local db, and `callpath` should not. + mpileaks = spack.spec.Spec("mpileaks").concretized() + callpath = mpileaks.dependencies("callpath")[0] + + upstream_store = spack.store.create( + {"config": {"install_tree": {"root": str(tmp_path / "upstream")}}} + ) + monkeypatch.setattr(spack.store, "STORE", upstream_store) + callpath.package.do_install(fake=True) + + local_store = spack.store.create( + { + "config": {"install_tree": {"root": str(tmp_path / "local")}}, + "upstreams": {"my-upstream": {"install_tree": str(tmp_path / "upstream")}}, + } + ) + monkeypatch.setattr(spack.store, "STORE", local_store) + mpileaks.package.do_install(fake=True) + + # Sanity check that callpath is from upstream. + assert not local_store.db.query_local("callpath") + assert local_store.db.query("callpath") + + # Install mpileaks also upstream with the same hash to ensure that determining upstreamness + # checks local installs before upstream databases, even when the local database is being + # reindexed. + monkeypatch.setattr(spack.store, "STORE", upstream_store) + mpileaks.package.do_install(fake=True) + + # Delete the local database + shutil.rmtree(local_store.db.database_directory) + + # Create a new instance s.t. we don't have cached specs in memory + reindexed_local_store = spack.store.create( + { + "config": {"install_tree": {"root": str(tmp_path / "local")}}, + "upstreams": {"my-upstream": {"install_tree": str(tmp_path / "upstream")}}, + } + ) + reindexed_local_store.db.reindex() + + assert not reindexed_local_store.db.query_local("callpath") + assert reindexed_local_store.db.query("callpath") == [callpath] + assert reindexed_local_store.db.query_local("mpileaks") == [mpileaks] diff --git a/lib/spack/spack/traverse.py b/lib/spack/spack/traverse.py index 091ebfe193..f6c5589b2a 100644 --- a/lib/spack/spack/traverse.py +++ b/lib/spack/spack/traverse.py @@ -3,8 +3,8 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -from collections import defaultdict, namedtuple -from typing import Union +from collections import defaultdict +from typing import NamedTuple, Union import spack.deptypes as dt import spack.spec @@ -12,11 +12,14 @@ import spack.spec # Export only the high-level API. __all__ = ["traverse_edges", "traverse_nodes", "traverse_tree"] + #: Data class that stores a directed edge together with depth at #: which the target vertex was found. It is passed to ``accept`` #: and ``neighbors`` of visitors, so they can decide whether to #: follow the edge or not. -EdgeAndDepth = namedtuple("EdgeAndDepth", ["edge", "depth"]) +class EdgeAndDepth(NamedTuple): + edge: "spack.spec.DependencySpec" + depth: int def sort_edges(edges): |