From ba1d29502388ba85d9c45df4ac70c8b980a8720d Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Mon, 7 Aug 2023 12:47:52 +0200 Subject: Extract prefix locks and failure markers from Database (#39024) This PR extracts two responsibilities from the `Database` class: 1. Managing locks for prefixes during an installation 2. Marking installation failures and pushes them into their own class (`SpecLocker` and `FailureMarker`). These responsibilities are also pushed up into the `Store`, leaving to `Database` only the duty to manage `index.json` files. `SpecLocker` classes no longer share a global list of locks, but locks are per instance. Their identifier is simply `(dag hash, package name)`, and not the spec prefix path, to avoid circular dependencies across Store / Database / Spec. --- lib/spack/spack/cmd/clean.py | 3 +- lib/spack/spack/database.py | 500 ++++++++++++++++++------------------ lib/spack/spack/installer.py | 23 +- lib/spack/spack/package_base.py | 2 +- lib/spack/spack/stage.py | 2 +- lib/spack/spack/store.py | 37 ++- lib/spack/spack/test/cmd/clean.py | 4 +- lib/spack/spack/test/cmd/install.py | 9 +- lib/spack/spack/test/conftest.py | 13 +- lib/spack/spack/test/database.py | 63 ++--- lib/spack/spack/test/install.py | 4 +- lib/spack/spack/test/installer.py | 80 +++--- 12 files changed, 357 insertions(+), 383 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/clean.py b/lib/spack/spack/cmd/clean.py index 93edcd712f..89c1a90fc3 100644 --- a/lib/spack/spack/cmd/clean.py +++ b/lib/spack/spack/cmd/clean.py @@ -17,6 +17,7 @@ import spack.cmd.test import spack.config import spack.repo import spack.stage +import spack.store import spack.util.path from spack.paths import lib_path, var_path @@ -121,7 +122,7 @@ def clean(parser, args): if args.failures: tty.msg("Removing install failure marks") - spack.installer.clear_failures() + spack.store.STORE.failure_tracker.clear_all() if args.misc_cache: tty.msg("Removing cached information on repositories") diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index cc3a586218..ebca0f4850 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -21,10 +21,11 @@ filesystem. import contextlib import datetime import os +import pathlib import socket import sys import time -from typing import Dict, List, NamedTuple, Set, Type, Union +from typing import Any, Callable, Dict, Generator, List, NamedTuple, Set, Type, Union try: import uuid @@ -141,22 +142,23 @@ class InstallStatuses: def canonicalize(cls, query_arg): if query_arg is True: return [cls.INSTALLED] - elif query_arg is False: + if query_arg is False: return [cls.MISSING] - elif query_arg is any: + if query_arg is any: return [cls.INSTALLED, cls.DEPRECATED, cls.MISSING] - elif isinstance(query_arg, InstallStatus): + if isinstance(query_arg, InstallStatus): return [query_arg] - else: - try: # Try block catches if it is not an iterable at all - if any(type(x) != InstallStatus for x in query_arg): - raise TypeError - except TypeError: - raise TypeError( - "installation query must be `any`, boolean, " - "InstallStatus, or iterable of InstallStatus" - ) - return query_arg + try: + statuses = list(query_arg) + if all(isinstance(x, InstallStatus) for x in statuses): + return statuses + except TypeError: + pass + + raise TypeError( + "installation query must be `any`, boolean, " + "InstallStatus, or iterable of InstallStatus" + ) class InstallRecord: @@ -306,15 +308,16 @@ _QUERY_DOCSTRING = """ """ -#: Data class to configure locks in Database objects -#: -#: Args: -#: enable (bool): whether to enable locks or not. -#: database_timeout (int or None): timeout for the database lock -#: package_timeout (int or None): timeout for the package lock - class LockConfiguration(NamedTuple): + """Data class to configure locks in Database objects + + Args: + enable: whether to enable locks or not. + database_timeout: timeout for the database lock + package_timeout: timeout for the package lock + """ + enable: bool database_timeout: Optional[int] package_timeout: Optional[int] @@ -348,13 +351,230 @@ def lock_configuration(configuration): ) -class Database: - #: Per-process lock objects for each install prefix - _prefix_locks: Dict[str, lk.Lock] = {} +def prefix_lock_path(root_dir: Union[str, pathlib.Path]) -> pathlib.Path: + """Returns the path of the prefix lock file, given the root directory. + + Args: + root_dir: root directory containing the database directory + """ + return pathlib.Path(root_dir) / _DB_DIRNAME / "prefix_lock" + + +def failures_lock_path(root_dir: Union[str, pathlib.Path]) -> pathlib.Path: + """Returns the path of the failures lock file, given the root directory. + + Args: + root_dir: root directory containing the database directory + """ + return pathlib.Path(root_dir) / _DB_DIRNAME / "prefix_failures" + + +class SpecLocker: + """Manages acquiring and releasing read or write locks on concrete specs.""" + + def __init__(self, lock_path: Union[str, pathlib.Path], default_timeout: Optional[float]): + self.lock_path = pathlib.Path(lock_path) + self.default_timeout = default_timeout + + # Maps (spec.dag_hash(), spec.name) to the corresponding lock object + self.locks: Dict[Tuple[str, str], lk.Lock] = {} + + def lock(self, spec: "spack.spec.Spec", timeout: Optional[float] = None) -> lk.Lock: + """Returns a lock on a concrete spec. + + The lock is a byte range lock on the nth byte of a file. + + The lock file is ``self.lock_path``. + + n is the sys.maxsize-bit prefix of the DAG hash. This makes likelihood of collision is + very low AND it gives us readers-writer lock semantics with just a single lockfile, so + no cleanup required. + """ + assert spec.concrete, "cannot lock a non-concrete spec" + timeout = timeout or self.default_timeout + key = self._lock_key(spec) + + if key not in self.locks: + self.locks[key] = self.raw_lock(spec, timeout=timeout) + else: + self.locks[key].default_timeout = timeout + + return self.locks[key] + + def raw_lock(self, spec: "spack.spec.Spec", timeout: Optional[float] = None) -> lk.Lock: + """Returns a raw lock for a Spec, but doesn't keep track of it.""" + return lk.Lock( + str(self.lock_path), + start=spec.dag_hash_bit_prefix(bit_length(sys.maxsize)), + length=1, + default_timeout=timeout, + desc=spec.name, + ) + + def has_lock(self, spec: "spack.spec.Spec") -> bool: + """Returns True if the spec is already managed by this spec locker""" + return self._lock_key(spec) in self.locks + + def _lock_key(self, spec: "spack.spec.Spec") -> Tuple[str, str]: + return (spec.dag_hash(), spec.name) + + @contextlib.contextmanager + def write_lock(self, spec: "spack.spec.Spec") -> Generator["SpecLocker", None, None]: + lock = self.lock(spec) + lock.acquire_write() + + try: + yield self + except lk.LockError: + # This addresses the case where a nested lock attempt fails inside + # of this context manager + raise + except (Exception, KeyboardInterrupt): + lock.release_write() + raise + else: + lock.release_write() + + def clear(self, spec: "spack.spec.Spec") -> Tuple[bool, Optional[lk.Lock]]: + key = self._lock_key(spec) + lock = self.locks.pop(key, None) + return bool(lock), lock + + def clear_all(self, clear_fn: Optional[Callable[[lk.Lock], Any]] = None) -> None: + if clear_fn is not None: + for lock in self.locks.values(): + clear_fn(lock) + self.locks.clear() + + +class FailureTracker: + """Tracks installation failures. + + Prefix failure marking takes the form of a byte range lock on the nth + byte of a file for coordinating between concurrent parallel build + processes and a persistent file, named with the full hash and + containing the spec, in a subdirectory of the database to enable + persistence across overlapping but separate related build processes. + + The failure lock file lives alongside the install DB. + + ``n`` is the sys.maxsize-bit prefix of the associated DAG hash to make + the likelihood of collision very low with no cleanup required. + """ + + def __init__(self, root_dir: Union[str, pathlib.Path], default_timeout: Optional[float]): + #: Ensure a persistent location for dealing with parallel installation + #: failures (e.g., across near-concurrent processes). + self.dir = pathlib.Path(root_dir) / _DB_DIRNAME / "failures" + self.dir.mkdir(parents=True, exist_ok=True) + + self.locker = SpecLocker(failures_lock_path(root_dir), default_timeout=default_timeout) + + def clear(self, spec: "spack.spec.Spec", force: bool = False) -> None: + """Removes any persistent and cached failure tracking for the spec. + + see `mark()`. + + Args: + spec: the spec whose failure indicators are being removed + force: True if the failure information should be cleared when a failure lock + exists for the file, or False if the failure should not be cleared (e.g., + it may be associated with a concurrent build) + """ + locked = self.lock_taken(spec) + if locked and not force: + tty.msg(f"Retaining failure marking for {spec.name} due to lock") + return + + if locked: + tty.warn(f"Removing failure marking despite lock for {spec.name}") + + succeeded, lock = self.locker.clear(spec) + if succeeded and lock is not None: + lock.release_write() + + if self.persistent_mark(spec): + path = self._path(spec) + tty.debug(f"Removing failure marking for {spec.name}") + try: + path.unlink() + except OSError as err: + tty.warn( + f"Unable to remove failure marking for {spec.name} ({str(path)}): {str(err)}" + ) + + def clear_all(self) -> None: + """Force remove install failure tracking files.""" + tty.debug("Releasing prefix failure locks") + self.locker.clear_all( + clear_fn=lambda x: x.release_write() if x.is_write_locked() else True + ) + + tty.debug("Removing prefix failure tracking files") + try: + for fail_mark in os.listdir(str(self.dir)): + try: + (self.dir / fail_mark).unlink() + except OSError as exc: + tty.warn(f"Unable to remove failure marking file {fail_mark}: {str(exc)}") + except OSError as exc: + tty.warn(f"Unable to remove failure marking files: {str(exc)}") + + def mark(self, spec: "spack.spec.Spec") -> lk.Lock: + """Marks a spec as failing to install. + + Args: + spec: spec that failed to install + """ + # Dump the spec to the failure file for (manual) debugging purposes + path = self._path(spec) + path.write_text(spec.to_json()) + + # Also ensure a failure lock is taken to prevent cleanup removal + # of failure status information during a concurrent parallel build. + if not self.locker.has_lock(spec): + try: + mark = self.locker.lock(spec) + mark.acquire_write() + except lk.LockTimeoutError: + # Unlikely that another process failed to install at the same + # time but log it anyway. + tty.debug(f"PID {os.getpid()} failed to mark install failure for {spec.name}") + tty.warn(f"Unable to mark {spec.name} as failed.") + + return self.locker.lock(spec) + + def has_failed(self, spec: "spack.spec.Spec") -> bool: + """Return True if the spec is marked as failed.""" + # The failure was detected in this process. + if self.locker.has_lock(spec): + return True + + # The failure was detected by a concurrent process (e.g., an srun), + # which is expected to be holding a write lock if that is the case. + if self.lock_taken(spec): + return True + + # Determine if the spec may have been marked as failed by a separate + # spack build process running concurrently. + return self.persistent_mark(spec) + + def lock_taken(self, spec: "spack.spec.Spec") -> bool: + """Return True if another process has a failure lock on the spec.""" + check = self.locker.raw_lock(spec) + return check.is_write_locked() + + def persistent_mark(self, spec: "spack.spec.Spec") -> bool: + """Determine if the spec has a persistent failure marking.""" + return self._path(spec).exists() + + def _path(self, spec: "spack.spec.Spec") -> pathlib.Path: + """Return the path to the spec's failure file, which may not exist.""" + assert spec.concrete, "concrete spec required for failure path" + return self.dir / f"{spec.name}-{spec.dag_hash()}" - #: Per-process failure (lock) objects for each install prefix - _prefix_failures: Dict[str, lk.Lock] = {} +class Database: #: Fields written for each install record record_fields: Tuple[str, ...] = DEFAULT_INSTALL_RECORD_FIELDS @@ -392,24 +612,10 @@ class Database: self._verifier_path = os.path.join(self.database_directory, "index_verifier") self._lock_path = os.path.join(self.database_directory, "lock") - # This is for other classes to use to lock prefix directories. - self.prefix_lock_path = os.path.join(self.database_directory, "prefix_lock") - - # Ensure a persistent location for dealing with parallel installation - # failures (e.g., across near-concurrent processes). - self._failure_dir = os.path.join(self.database_directory, "failures") - - # Support special locks for handling parallel installation failures - # of a spec. - self.prefix_fail_path = os.path.join(self.database_directory, "prefix_failures") - # Create needed directories and files if not is_upstream and not os.path.exists(self.database_directory): fs.mkdirp(self.database_directory) - if not is_upstream and not os.path.exists(self._failure_dir): - fs.mkdirp(self._failure_dir) - self.is_upstream = is_upstream self.last_seen_verifier = "" # Failed write transactions (interrupted by exceptions) will alert @@ -423,15 +629,7 @@ class Database: # initialize rest of state. self.db_lock_timeout = lock_cfg.database_timeout - self.package_lock_timeout = lock_cfg.package_timeout - tty.debug("DATABASE LOCK TIMEOUT: {0}s".format(str(self.db_lock_timeout))) - timeout_format_str = ( - "{0}s".format(str(self.package_lock_timeout)) - if self.package_lock_timeout - else "No timeout" - ) - tty.debug("PACKAGE LOCK TIMEOUT: {0}".format(str(timeout_format_str))) self.lock: Union[ForbiddenLock, lk.Lock] if self.is_upstream: @@ -471,212 +669,6 @@ class Database: """Get a read lock context manager for use in a `with` block.""" return self._read_transaction_impl(self.lock, acquire=self._read) - def _failed_spec_path(self, spec): - """Return the path to the spec's failure file, which may not exist.""" - if not spec.concrete: - raise ValueError("Concrete spec required for failure path for {0}".format(spec.name)) - - return os.path.join(self._failure_dir, "{0}-{1}".format(spec.name, spec.dag_hash())) - - def clear_all_failures(self) -> None: - """Force remove install failure tracking files.""" - tty.debug("Releasing prefix failure locks") - for pkg_id in list(self._prefix_failures.keys()): - lock = self._prefix_failures.pop(pkg_id, None) - if lock: - lock.release_write() - - # Remove all failure markings (aka files) - tty.debug("Removing prefix failure tracking files") - for fail_mark in os.listdir(self._failure_dir): - try: - os.remove(os.path.join(self._failure_dir, fail_mark)) - except OSError as exc: - tty.warn( - "Unable to remove failure marking file {0}: {1}".format(fail_mark, str(exc)) - ) - - def clear_failure(self, spec: "spack.spec.Spec", force: bool = False) -> None: - """ - Remove any persistent and cached failure tracking for the spec. - - see `mark_failed()`. - - Args: - spec: the spec whose failure indicators are being removed - force: True if the failure information should be cleared when a prefix failure - lock exists for the file, or False if the failure should not be cleared (e.g., - it may be associated with a concurrent build) - """ - failure_locked = self.prefix_failure_locked(spec) - if failure_locked and not force: - tty.msg("Retaining failure marking for {0} due to lock".format(spec.name)) - return - - if failure_locked: - tty.warn("Removing failure marking despite lock for {0}".format(spec.name)) - - lock = self._prefix_failures.pop(spec.prefix, None) - if lock: - lock.release_write() - - if self.prefix_failure_marked(spec): - try: - path = self._failed_spec_path(spec) - tty.debug("Removing failure marking for {0}".format(spec.name)) - os.remove(path) - except OSError as err: - tty.warn( - "Unable to remove failure marking for {0} ({1}): {2}".format( - spec.name, path, str(err) - ) - ) - - def mark_failed(self, spec: "spack.spec.Spec") -> lk.Lock: - """ - Mark a spec as failing to install. - - Prefix failure marking takes the form of a byte range lock on the nth - byte of a file for coordinating between concurrent parallel build - processes and a persistent file, named with the full hash and - containing the spec, in a subdirectory of the database to enable - persistence across overlapping but separate related build processes. - - The failure lock file, ``spack.store.STORE.db.prefix_failures``, lives - alongside the install DB. ``n`` is the sys.maxsize-bit prefix of the - associated DAG hash to make the likelihood of collision very low with - no cleanup required. - """ - # Dump the spec to the failure file for (manual) debugging purposes - path = self._failed_spec_path(spec) - with open(path, "w") as f: - spec.to_json(f) - - # Also ensure a failure lock is taken to prevent cleanup removal - # of failure status information during a concurrent parallel build. - err = "Unable to mark {0.name} as failed." - - prefix = spec.prefix - if prefix not in self._prefix_failures: - mark = lk.Lock( - self.prefix_fail_path, - start=spec.dag_hash_bit_prefix(bit_length(sys.maxsize)), - length=1, - default_timeout=self.package_lock_timeout, - desc=spec.name, - ) - - try: - mark.acquire_write() - except lk.LockTimeoutError: - # Unlikely that another process failed to install at the same - # time but log it anyway. - tty.debug( - "PID {0} failed to mark install failure for {1}".format(os.getpid(), spec.name) - ) - tty.warn(err.format(spec)) - - # Whether we or another process marked it as a failure, track it - # as such locally. - self._prefix_failures[prefix] = mark - - return self._prefix_failures[prefix] - - def prefix_failed(self, spec: "spack.spec.Spec") -> bool: - """Return True if the prefix (installation) is marked as failed.""" - # The failure was detected in this process. - if spec.prefix in self._prefix_failures: - return True - - # The failure was detected by a concurrent process (e.g., an srun), - # which is expected to be holding a write lock if that is the case. - if self.prefix_failure_locked(spec): - return True - - # Determine if the spec may have been marked as failed by a separate - # spack build process running concurrently. - return self.prefix_failure_marked(spec) - - def prefix_failure_locked(self, spec: "spack.spec.Spec") -> bool: - """Return True if a process has a failure lock on the spec.""" - check = lk.Lock( - self.prefix_fail_path, - start=spec.dag_hash_bit_prefix(bit_length(sys.maxsize)), - length=1, - default_timeout=self.package_lock_timeout, - desc=spec.name, - ) - - return check.is_write_locked() - - def prefix_failure_marked(self, spec: "spack.spec.Spec") -> bool: - """Determine if the spec has a persistent failure marking.""" - return os.path.exists(self._failed_spec_path(spec)) - - def prefix_lock(self, spec: "spack.spec.Spec", timeout: Optional[float] = None) -> lk.Lock: - """Get a lock on a particular spec's installation directory. - - NOTE: The installation directory **does not** need to exist. - - Prefix lock is a byte range lock on the nth byte of a file. - - The lock file is ``spack.store.STORE.db.prefix_lock`` -- the DB - tells us what to call it and it lives alongside the install DB. - - n is the sys.maxsize-bit prefix of the DAG hash. This makes - likelihood of collision is very low AND it gives us - readers-writer lock semantics with just a single lockfile, so no - cleanup required. - """ - timeout = timeout or self.package_lock_timeout - prefix = spec.prefix - if prefix not in self._prefix_locks: - self._prefix_locks[prefix] = lk.Lock( - self.prefix_lock_path, - start=spec.dag_hash_bit_prefix(bit_length(sys.maxsize)), - length=1, - default_timeout=timeout, - desc=spec.name, - ) - elif timeout != self._prefix_locks[prefix].default_timeout: - self._prefix_locks[prefix].default_timeout = timeout - - return self._prefix_locks[prefix] - - @contextlib.contextmanager - def prefix_read_lock(self, spec): - prefix_lock = self.prefix_lock(spec) - prefix_lock.acquire_read() - - try: - yield self - except lk.LockError: - # This addresses the case where a nested lock attempt fails inside - # of this context manager - raise - except (Exception, KeyboardInterrupt): - prefix_lock.release_read() - raise - else: - prefix_lock.release_read() - - @contextlib.contextmanager - def prefix_write_lock(self, spec): - prefix_lock = self.prefix_lock(spec) - prefix_lock.acquire_write() - - try: - yield self - except lk.LockError: - # This addresses the case where a nested lock attempt fails inside - # of this context manager - raise - except (Exception, KeyboardInterrupt): - prefix_lock.release_write() - raise - else: - prefix_lock.release_write() - def _write_to_file(self, stream): """Write out the database in JSON format to the stream passed as argument. diff --git a/lib/spack/spack/installer.py b/lib/spack/spack/installer.py index 92d12e04cb..5b88808496 100644 --- a/lib/spack/spack/installer.py +++ b/lib/spack/spack/installer.py @@ -519,13 +519,6 @@ def _try_install_from_binary_cache( ) -def clear_failures() -> None: - """ - Remove all failure tracking markers for the Spack instance. - """ - spack.store.STORE.db.clear_all_failures() - - def combine_phase_logs(phase_log_files: List[str], log_path: str) -> None: """ Read set or list of logs and combine them into one file. @@ -1126,15 +1119,13 @@ class PackageInstaller: instance. """ - def __init__(self, installs: List[Tuple["spack.package_base.PackageBase", dict]] = []): + def __init__(self, installs: List[Tuple["spack.package_base.PackageBase", dict]] = []) -> None: """Initialize the installer. Args: installs (list): list of tuples, where each tuple consists of a package (PackageBase) and its associated install arguments (dict) - Return: - PackageInstaller: instance """ # List of build requests self.build_requests = [BuildRequest(pkg, install_args) for pkg, install_args in installs] @@ -1287,7 +1278,7 @@ class PackageInstaller: dep_id = package_id(dep_pkg) # Check for failure since a prefix lock is not required - if spack.store.STORE.db.prefix_failed(dep): + if spack.store.STORE.failure_tracker.has_failed(dep): action = "'spack install' the dependency" msg = "{0} is marked as an install failure: {1}".format(dep_id, action) raise InstallError(err.format(request.pkg_id, msg), pkg=dep_pkg) @@ -1502,7 +1493,7 @@ class PackageInstaller: if lock is None: tty.debug(msg.format("Acquiring", desc, pkg_id, pretty_seconds(timeout or 0))) op = "acquire" - lock = spack.store.STORE.db.prefix_lock(pkg.spec, timeout) + lock = spack.store.STORE.prefix_locker.lock(pkg.spec, timeout) if timeout != lock.default_timeout: tty.warn( "Expected prefix lock timeout {0}, not {1}".format( @@ -1627,12 +1618,12 @@ class PackageInstaller: # Clear any persistent failure markings _unless_ they are # associated with another process in this parallel build # of the spec. - spack.store.STORE.db.clear_failure(dep, force=False) + spack.store.STORE.failure_tracker.clear(dep, force=False) install_package = request.install_args.get("install_package") if install_package and request.pkg_id not in self.build_tasks: # Be sure to clear any previous failure - spack.store.STORE.db.clear_failure(request.spec, force=True) + spack.store.STORE.failure_tracker.clear(request.spec, force=True) # If not installing dependencies, then determine their # installation status before proceeding @@ -1888,7 +1879,7 @@ class PackageInstaller: err = "" if exc is None else ": {0}".format(str(exc)) tty.debug("Flagging {0} as failed{1}".format(pkg_id, err)) if mark: - self.failed[pkg_id] = spack.store.STORE.db.mark_failed(task.pkg.spec) + self.failed[pkg_id] = spack.store.STORE.failure_tracker.mark(task.pkg.spec) else: self.failed[pkg_id] = None task.status = STATUS_FAILED @@ -2074,7 +2065,7 @@ class PackageInstaller: # Flag a failed spec. Do not need an (install) prefix lock since # assume using a separate (failed) prefix lock file. - if pkg_id in self.failed or spack.store.STORE.db.prefix_failed(spec): + if pkg_id in self.failed or spack.store.STORE.failure_tracker.has_failed(spec): term_status.clear() tty.warn("{0} failed to install".format(pkg_id)) self._update_failed(task) diff --git a/lib/spack/spack/package_base.py b/lib/spack/spack/package_base.py index 6571f6d1dc..1d3dba5045 100644 --- a/lib/spack/spack/package_base.py +++ b/lib/spack/spack/package_base.py @@ -2209,7 +2209,7 @@ class PackageBase(WindowsRPath, PackageViewMixin, metaclass=PackageMeta): pkg = None # Pre-uninstall hook runs first. - with spack.store.STORE.db.prefix_write_lock(spec): + with spack.store.STORE.prefix_locker.write_lock(spec): if pkg is not None: try: spack.hooks.pre_uninstall(spec) diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 14754e1f52..1c932da4bb 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -326,7 +326,7 @@ class Stage: self.keep = keep # File lock for the stage directory. We use one file for all - # stage locks. See spack.database.Database.prefix_lock for + # stage locks. See spack.database.Database.prefix_locker.lock for # details on this approach. self._lock = None if lock: diff --git a/lib/spack/spack/store.py b/lib/spack/spack/store.py index a923080e9f..a0ab11c4c1 100644 --- a/lib/spack/spack/store.py +++ b/lib/spack/spack/store.py @@ -25,13 +25,14 @@ import uuid from typing import Any, Callable, Dict, Generator, List, Optional, Union import llnl.util.lang -import llnl.util.tty as tty +from llnl.util import tty import spack.config import spack.database import spack.directory_layout import spack.error import spack.paths +import spack.spec import spack.util.path #: default installation root, relative to the Spack install path @@ -134,18 +135,21 @@ def parse_install_tree(config_dict): class Store: """A store is a path full of installed Spack packages. - Stores consist of packages installed according to a - ``DirectoryLayout``, along with an index, or _database_ of their - contents. The directory layout controls what paths look like and how - Spack ensures that each unique spec gets its own unique directory (or - not, though we don't recommend that). The database is a single file - that caches metadata for the entire Spack installation. It prevents - us from having to spider the install tree to figure out what's there. + Stores consist of packages installed according to a ``DirectoryLayout``, along with a database + of their contents. + + The directory layout controls what paths look like and how Spack ensures that each unique spec + gets its own unique directory (or not, though we don't recommend that). + + The database is a single file that caches metadata for the entire Spack installation. It + prevents us from having to spider the install tree to figure out what's there. + + The store is also able to lock installation prefixes, and to mark installation failures. Args: root: path to the root of the install tree - unpadded_root: path to the root of the install tree without padding. - The sbang script has to be installed here to work with padded roots + unpadded_root: path to the root of the install tree without padding. The sbang script has + to be installed here to work with padded roots projections: expression according to guidelines that describes how to construct a path to a package prefix in this store hash_length: length of the hashes used in the directory layout. Spec hash suffixes will be @@ -170,6 +174,19 @@ class Store: self.upstreams = upstreams self.lock_cfg = lock_cfg self.db = spack.database.Database(root, upstream_dbs=upstreams, lock_cfg=lock_cfg) + + timeout_format_str = ( + f"{str(lock_cfg.package_timeout)}s" if lock_cfg.package_timeout else "No timeout" + ) + tty.debug("PACKAGE LOCK TIMEOUT: {0}".format(str(timeout_format_str))) + + self.prefix_locker = spack.database.SpecLocker( + spack.database.prefix_lock_path(root), default_timeout=lock_cfg.package_timeout + ) + self.failure_tracker = spack.database.FailureTracker( + self.root, default_timeout=lock_cfg.package_timeout + ) + self.layout = spack.directory_layout.DirectoryLayout( root, projections=projections, hash_length=hash_length ) diff --git a/lib/spack/spack/test/cmd/clean.py b/lib/spack/spack/test/cmd/clean.py index 8c711e4c12..12b1ca4991 100644 --- a/lib/spack/spack/test/cmd/clean.py +++ b/lib/spack/spack/test/cmd/clean.py @@ -10,9 +10,11 @@ import pytest import llnl.util.filesystem as fs import spack.caches +import spack.cmd.clean import spack.main import spack.package_base import spack.stage +import spack.store clean = spack.main.SpackCommand("clean") @@ -33,7 +35,7 @@ def mock_calls_for_clean(monkeypatch): monkeypatch.setattr(spack.stage, "purge", Counter("stages")) monkeypatch.setattr(spack.caches.fetch_cache, "destroy", Counter("downloads"), raising=False) monkeypatch.setattr(spack.caches.misc_cache, "destroy", Counter("caches")) - monkeypatch.setattr(spack.installer, "clear_failures", Counter("failures")) + monkeypatch.setattr(spack.store.STORE.failure_tracker, "clear_all", Counter("failures")) monkeypatch.setattr(spack.cmd.clean, "remove_python_cache", Counter("python_cache")) yield counts diff --git a/lib/spack/spack/test/cmd/install.py b/lib/spack/spack/test/cmd/install.py index e0518ab73d..872d91f0a8 100644 --- a/lib/spack/spack/test/cmd/install.py +++ b/lib/spack/spack/test/cmd/install.py @@ -23,6 +23,7 @@ import spack.config import spack.environment as ev import spack.hash_types as ht import spack.package_base +import spack.store import spack.util.executable from spack.error import SpackError from spack.main import SpackCommand @@ -705,9 +706,11 @@ def test_cache_only_fails(tmpdir, mock_fetch, install_mockery, capfd): assert "was not installed" in out # Check that failure prefix locks are still cached - failure_lock_prefixes = ",".join(spack.store.STORE.db._prefix_failures.keys()) - assert "libelf" in failure_lock_prefixes - assert "libdwarf" in failure_lock_prefixes + failed_packages = [ + pkg_name for dag_hash, pkg_name in spack.store.STORE.failure_tracker.locker.locks.keys() + ] + assert "libelf" in failed_packages + assert "libdwarf" in failed_packages def test_install_only_dependencies(tmpdir, mock_fetch, install_mockery): diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index e75585b0db..4f0632faa2 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -950,21 +950,14 @@ def disable_compiler_execution(monkeypatch, request): @pytest.fixture(scope="function") -def install_mockery(temporary_store, mutable_config, mock_packages): +def install_mockery(temporary_store: spack.store.Store, mutable_config, mock_packages): """Hooks a fake install directory, DB, and stage directory into Spack.""" # We use a fake package, so temporarily disable checksumming with spack.config.override("config:checksum", False): yield - # Also wipe out any cached prefix failure locks (associated with - # the session-scoped mock archive). - for pkg_id in list(temporary_store.db._prefix_failures.keys()): - lock = spack.store.STORE.db._prefix_failures.pop(pkg_id, None) - if lock: - try: - lock.release_write() - except Exception: - pass + # Wipe out any cached prefix failure locks (associated with the session-scoped mock archive) + temporary_store.failure_tracker.clear_all() @pytest.fixture(scope="function") diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index 3f5d7887b7..c1e92eedca 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -807,22 +807,22 @@ def test_query_spec_with_non_conditional_virtual_dependency(database): def test_failed_spec_path_error(database): """Ensure spec not concrete check is covered.""" s = spack.spec.Spec("a") - with pytest.raises(ValueError, match="Concrete spec required"): - spack.store.STORE.db._failed_spec_path(s) + with pytest.raises(AssertionError, match="concrete spec required"): + spack.store.STORE.failure_tracker.mark(s) @pytest.mark.db def test_clear_failure_keep(mutable_database, monkeypatch, capfd): """Add test coverage for clear_failure operation when to be retained.""" - def _is(db, spec): + def _is(self, spec): return True # Pretend the spec has been failure locked - monkeypatch.setattr(spack.database.Database, "prefix_failure_locked", _is) + monkeypatch.setattr(spack.database.FailureTracker, "lock_taken", _is) - s = spack.spec.Spec("a") - spack.store.STORE.db.clear_failure(s) + s = spack.spec.Spec("a").concretized() + spack.store.STORE.failure_tracker.clear(s) out = capfd.readouterr()[0] assert "Retaining failure marking" in out @@ -831,16 +831,16 @@ def test_clear_failure_keep(mutable_database, monkeypatch, capfd): def test_clear_failure_forced(default_mock_concretization, mutable_database, monkeypatch, capfd): """Add test coverage for clear_failure operation when force.""" - def _is(db, spec): + def _is(self, spec): return True # Pretend the spec has been failure locked - monkeypatch.setattr(spack.database.Database, "prefix_failure_locked", _is) + monkeypatch.setattr(spack.database.FailureTracker, "lock_taken", _is) # Ensure raise OSError when try to remove the non-existent marking - monkeypatch.setattr(spack.database.Database, "prefix_failure_marked", _is) + monkeypatch.setattr(spack.database.FailureTracker, "persistent_mark", _is) s = default_mock_concretization("a") - spack.store.STORE.db.clear_failure(s, force=True) + spack.store.STORE.failure_tracker.clear(s, force=True) out = capfd.readouterr()[1] assert "Removing failure marking despite lock" in out assert "Unable to remove failure marking" in out @@ -858,55 +858,34 @@ def test_mark_failed(default_mock_concretization, mutable_database, monkeypatch, with tmpdir.as_cwd(): s = default_mock_concretization("a") - spack.store.STORE.db.mark_failed(s) + spack.store.STORE.failure_tracker.mark(s) out = str(capsys.readouterr()[1]) assert "Unable to mark a as failed" in out - # Clean up the failure mark to ensure it does not interfere with other - # tests using the same spec. - del spack.store.STORE.db._prefix_failures[s.prefix] + spack.store.STORE.failure_tracker.clear_all() @pytest.mark.db def test_prefix_failed(default_mock_concretization, mutable_database, monkeypatch): - """Add coverage to prefix_failed operation.""" - - def _is(db, spec): - return True + """Add coverage to failed operation.""" s = default_mock_concretization("a") # Confirm the spec is not already marked as failed - assert not spack.store.STORE.db.prefix_failed(s) + assert not spack.store.STORE.failure_tracker.has_failed(s) # Check that a failure entry is sufficient - spack.store.STORE.db._prefix_failures[s.prefix] = None - assert spack.store.STORE.db.prefix_failed(s) + spack.store.STORE.failure_tracker.mark(s) + assert spack.store.STORE.failure_tracker.has_failed(s) # Remove the entry and check again - del spack.store.STORE.db._prefix_failures[s.prefix] - assert not spack.store.STORE.db.prefix_failed(s) + spack.store.STORE.failure_tracker.clear(s) + assert not spack.store.STORE.failure_tracker.has_failed(s) # Now pretend that the prefix failure is locked - monkeypatch.setattr(spack.database.Database, "prefix_failure_locked", _is) - assert spack.store.STORE.db.prefix_failed(s) - - -def test_prefix_read_lock_error(default_mock_concretization, mutable_database, monkeypatch): - """Cover the prefix read lock exception.""" - - def _raise(db, spec): - raise lk.LockError("Mock lock error") - - s = default_mock_concretization("a") - - # Ensure subsequent lock operations fail - monkeypatch.setattr(lk.Lock, "acquire_read", _raise) - - with pytest.raises(Exception): - with spack.store.STORE.db.prefix_read_lock(s): - assert False + monkeypatch.setattr(spack.database.FailureTracker, "lock_taken", lambda self, spec: True) + assert spack.store.STORE.failure_tracker.has_failed(s) def test_prefix_write_lock_error(default_mock_concretization, mutable_database, monkeypatch): @@ -921,7 +900,7 @@ def test_prefix_write_lock_error(default_mock_concretization, mutable_database, monkeypatch.setattr(lk.Lock, "acquire_write", _raise) with pytest.raises(Exception): - with spack.store.STORE.db.prefix_write_lock(s): + with spack.store.STORE.prefix_locker.write_lock(s): assert False diff --git a/lib/spack/spack/test/install.py b/lib/spack/spack/test/install.py index a6ac33fe96..76ab4d4698 100644 --- a/lib/spack/spack/test/install.py +++ b/lib/spack/spack/test/install.py @@ -159,7 +159,7 @@ def test_partial_install_delete_prefix_and_stage(install_mockery, mock_fetch, wo s.package.remove_prefix = rm_prefix_checker.remove_prefix # must clear failure markings for the package before re-installing it - spack.store.STORE.db.clear_failure(s, True) + spack.store.STORE.failure_tracker.clear(s, True) s.package.set_install_succeed() s.package.stage = MockStage(s.package.stage) @@ -354,7 +354,7 @@ def test_partial_install_keep_prefix(install_mockery, mock_fetch, monkeypatch, w assert os.path.exists(s.package.prefix) # must clear failure markings for the package before re-installing it - spack.store.STORE.db.clear_failure(s, True) + spack.store.STORE.failure_tracker.clear(s, True) s.package.set_install_succeed() s.package.stage = MockStage(s.package.stage) diff --git a/lib/spack/spack/test/installer.py b/lib/spack/spack/test/installer.py index a937f8f770..15888e683e 100644 --- a/lib/spack/spack/test/installer.py +++ b/lib/spack/spack/test/installer.py @@ -19,6 +19,7 @@ import spack.binary_distribution import spack.compilers import spack.concretize import spack.config +import spack.database import spack.installer as inst import spack.package_base import spack.package_prefs as prefs @@ -364,7 +365,7 @@ def test_ensure_locked_err(install_mockery, monkeypatch, tmpdir, capsys): """Test _ensure_locked when a non-lock exception is raised.""" mock_err_msg = "Mock exception error" - def _raise(lock, timeout): + def _raise(lock, timeout=None): raise RuntimeError(mock_err_msg) const_arg = installer_args(["trivial-install-test-package"], {}) @@ -432,7 +433,7 @@ def test_ensure_locked_new_lock(install_mockery, tmpdir, lock_type, reads, write def test_ensure_locked_new_warn(install_mockery, monkeypatch, tmpdir, capsys): - orig_pl = spack.database.Database.prefix_lock + orig_pl = spack.database.SpecLocker.lock def _pl(db, spec, timeout): lock = orig_pl(db, spec, timeout) @@ -444,7 +445,7 @@ def test_ensure_locked_new_warn(install_mockery, monkeypatch, tmpdir, capsys): installer = create_installer(const_arg) spec = installer.build_requests[0].pkg.spec - monkeypatch.setattr(spack.database.Database, "prefix_lock", _pl) + monkeypatch.setattr(spack.database.SpecLocker, "lock", _pl) lock_type = "read" ltype, lock = installer._ensure_locked(lock_type, spec.package) @@ -597,59 +598,50 @@ def test_dump_packages_deps_errs(install_mockery, tmpdir, monkeypatch, capsys): assert "Couldn't copy in provenance for cmake" in out -def test_clear_failures_success(install_mockery): +def test_clear_failures_success(tmpdir): """Test the clear_failures happy path.""" + failures = spack.database.FailureTracker(str(tmpdir), default_timeout=0.1) - # Set up a test prefix failure lock - lock = lk.Lock( - spack.store.STORE.db.prefix_fail_path, start=1, length=1, default_timeout=1e-9, desc="test" - ) - try: - lock.acquire_write() - except lk.LockTimeoutError: - tty.warn("Failed to write lock the test install failure") - spack.store.STORE.db._prefix_failures["test"] = lock + spec = spack.spec.Spec("a") + spec._mark_concrete() - # Set up a fake failure mark (or file) - fs.touch(os.path.join(spack.store.STORE.db._failure_dir, "test")) + # Set up a test prefix failure lock + failures.mark(spec) + assert failures.has_failed(spec) # Now clear failure tracking - inst.clear_failures() + failures.clear_all() # Ensure there are no cached failure locks or failure marks - assert len(spack.store.STORE.db._prefix_failures) == 0 - assert len(os.listdir(spack.store.STORE.db._failure_dir)) == 0 + assert len(failures.locker.locks) == 0 + assert len(os.listdir(failures.dir)) == 0 # Ensure the core directory and failure lock file still exist - assert os.path.isdir(spack.store.STORE.db._failure_dir) + assert os.path.isdir(failures.dir) + # Locks on windows are a no-op if sys.platform != "win32": - assert os.path.isfile(spack.store.STORE.db.prefix_fail_path) + assert os.path.isfile(failures.locker.lock_path) -def test_clear_failures_errs(install_mockery, monkeypatch, capsys): +@pytest.mark.xfail(sys.platform == "win32", reason="chmod does not prevent removal on Win") +def test_clear_failures_errs(tmpdir, capsys): """Test the clear_failures exception paths.""" - orig_fn = os.remove - err_msg = "Mock os remove" - - def _raise_except(path): - raise OSError(err_msg) - - # Set up a fake failure mark (or file) - fs.touch(os.path.join(spack.store.STORE.db._failure_dir, "test")) + failures = spack.database.FailureTracker(str(tmpdir), default_timeout=0.1) + spec = spack.spec.Spec("a") + spec._mark_concrete() + failures.mark(spec) - monkeypatch.setattr(os, "remove", _raise_except) + # Make the file marker not writeable, so that clearing_failures fails + failures.dir.chmod(0o000) # Clear failure tracking - inst.clear_failures() + failures.clear_all() # Ensure expected warning generated out = str(capsys.readouterr()[1]) assert "Unable to remove failure" in out - assert err_msg in out - - # Restore remove for teardown - monkeypatch.setattr(os, "remove", orig_fn) + failures.dir.chmod(0o750) def test_combine_phase_logs(tmpdir): @@ -694,14 +686,18 @@ def test_combine_phase_logs_does_not_care_about_encoding(tmpdir): assert f.read() == data * 2 -def test_check_deps_status_install_failure(install_mockery, monkeypatch): +def test_check_deps_status_install_failure(install_mockery): + """Tests that checking the dependency status on a request to install + 'a' fails, if we mark the dependency as failed. + """ + s = spack.spec.Spec("a").concretized() + for dep in s.traverse(root=False): + spack.store.STORE.failure_tracker.mark(dep) + const_arg = installer_args(["a"], {}) installer = create_installer(const_arg) request = installer.build_requests[0] - # Make sure the package is identified as failed - monkeypatch.setattr(spack.database.Database, "prefix_failed", _true) - with pytest.raises(inst.InstallError, match="install failure"): installer._check_deps_status(request) @@ -1006,7 +1002,7 @@ def test_install_failed(install_mockery, monkeypatch, capsys): installer = create_installer(const_arg) # Make sure the package is identified as failed - monkeypatch.setattr(spack.database.Database, "prefix_failed", _true) + monkeypatch.setattr(spack.database.FailureTracker, "has_failed", _true) with pytest.raises(inst.InstallError, match="request failed"): installer.install() @@ -1022,7 +1018,7 @@ def test_install_failed_not_fast(install_mockery, monkeypatch, capsys): installer = create_installer(const_arg) # Make sure the package is identified as failed - monkeypatch.setattr(spack.database.Database, "prefix_failed", _true) + monkeypatch.setattr(spack.database.FailureTracker, "has_failed", _true) with pytest.raises(inst.InstallError, match="request failed"): installer.install() @@ -1121,7 +1117,7 @@ def test_install_fail_fast_on_detect(install_mockery, monkeypatch, capsys): # # This will prevent b from installing, which will cause the build of a # to be skipped. - monkeypatch.setattr(spack.database.Database, "prefix_failed", _true) + monkeypatch.setattr(spack.database.FailureTracker, "has_failed", _true) with pytest.raises(inst.InstallError, match="after first install failure"): installer.install() -- cgit v1.2.3-60-g2f50