From d67b12eb7934637f119ed7ad6f80f3d5f574895b Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Mon, 24 Oct 2022 11:54:49 +0200 Subject: locks: improved errors (#33477) Instead of showing ``` ==> Error: Timed out waiting for a write lock. ``` show ``` ==> Error: Timed out waiting for a write lock after 1.200ms and 4 attempts on file: /some/file ``` s.t. we actually get to see where acquiring a lock failed even when not running in debug mode. And use pretty time units everywhere, so we don't get 1.45e-9 seconds but 1.450ns etc. --- lib/spack/llnl/util/lang.py | 20 ++++++++++++++++++++ lib/spack/llnl/util/lock.py | 22 ++++++++++++++++++---- lib/spack/spack/installer.py | 11 ++++++++--- lib/spack/spack/test/database.py | 2 +- lib/spack/spack/test/llnl/util/lang.py | 14 +++++++++++--- lib/spack/spack/test/llnl/util/lock.py | 2 +- 6 files changed, 59 insertions(+), 12 deletions(-) diff --git a/lib/spack/llnl/util/lang.py b/lib/spack/llnl/util/lang.py index 76b161cbbe..f1a0b7ba31 100644 --- a/lib/spack/llnl/util/lang.py +++ b/lib/spack/llnl/util/lang.py @@ -749,6 +749,26 @@ def pretty_string_to_date(date_str, now=None): raise ValueError(msg) +def pretty_seconds(seconds): + """Seconds to string with appropriate units + + Arguments: + seconds (float): Number of seconds + + Returns: + str: Time string with units + """ + if seconds >= 1: + value, unit = seconds, "s" + elif seconds >= 1e-3: + value, unit = seconds * 1e3, "ms" + elif seconds >= 1e-6: + value, unit = seconds * 1e6, "us" + else: + value, unit = seconds * 1e9, "ns" + return "%.3f%s" % (value, unit) + + class RequiredAttributeError(ValueError): def __init__(self, message): super(RequiredAttributeError, self).__init__(message) diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index 6dfba50abb..9c3bcd7a91 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -12,6 +12,7 @@ from datetime import datetime from typing import Dict, Tuple # novm import llnl.util.tty as tty +from llnl.util.lang import pretty_seconds import spack.util.string @@ -166,7 +167,7 @@ def _attempts_str(wait_time, nattempts): return "" attempts = spack.util.string.plural(nattempts, "attempt") - return " after {0:0.2f}s and {1}".format(wait_time, attempts) + return " after {} and {}".format(pretty_seconds(wait_time), attempts) class LockType(object): @@ -318,8 +319,8 @@ class Lock(object): raise LockROFileError(self.path) self._log_debug( - "{0} locking [{1}:{2}]: timeout {3} sec".format( - op_str.lower(), self._start, self._length, timeout + "{} locking [{}:{}]: timeout {}".format( + op_str.lower(), self._start, self._length, pretty_seconds(timeout or 0) ) ) @@ -340,7 +341,8 @@ class Lock(object): total_wait_time = time.time() - start_time return total_wait_time, num_attempts - raise LockTimeoutError("Timed out waiting for a {0} lock.".format(op_str.lower())) + total_wait_time = time.time() - start_time + raise LockTimeoutError(op_str.lower(), self.path, total_wait_time, num_attempts) def _poll_lock(self, op): """Attempt to acquire the lock in a non-blocking manner. Return whether @@ -780,6 +782,18 @@ class LockLimitError(LockError): class LockTimeoutError(LockError): """Raised when an attempt to acquire a lock times out.""" + def __init__(self, lock_type, path, time, attempts): + fmt = "Timed out waiting for a {} lock after {}.\n Made {} {} on file: {}" + super(LockTimeoutError, self).__init__( + fmt.format( + lock_type, + pretty_seconds(time), + attempts, + "attempt" if attempts == 1 else "attempts", + path, + ) + ) + class LockUpgradeError(LockError): """Raised when unable to upgrade from a read to a write lock.""" diff --git a/lib/spack/spack/installer.py b/lib/spack/spack/installer.py index ca53f87691..411c076aa8 100644 --- a/lib/spack/spack/installer.py +++ b/lib/spack/spack/installer.py @@ -42,6 +42,7 @@ import six import llnl.util.filesystem as fs import llnl.util.lock as lk import llnl.util.tty as tty +from llnl.util.lang import pretty_seconds from llnl.util.tty.color import colorize from llnl.util.tty.log import log_output @@ -1069,7 +1070,7 @@ class PackageInstaller(object): try: if lock is None: - tty.debug(msg.format("Acquiring", desc, pkg_id, timeout)) + tty.debug(msg.format("Acquiring", desc, pkg_id, pretty_seconds(timeout or 0))) op = "acquire" lock = spack.store.db.prefix_lock(pkg.spec, timeout) if timeout != lock.default_timeout: @@ -1088,14 +1089,18 @@ class PackageInstaller(object): # must be downgraded to be a read lock # Retain the original lock timeout, which is in the lock's # default_timeout setting. - tty.debug(msg.format("Downgrading to", desc, pkg_id, lock.default_timeout)) + tty.debug( + msg.format( + "Downgrading to", desc, pkg_id, pretty_seconds(lock.default_timeout or 0) + ) + ) op = "downgrade to" lock.downgrade_write_to_read() else: # read -> write # Only get here if the current lock is a read lock, which # must be upgraded to be a write lock - tty.debug(msg.format("Upgrading to", desc, pkg_id, timeout)) + tty.debug(msg.format("Upgrading to", desc, pkg_id, pretty_seconds(timeout or 0))) op = "upgrade to" lock.upgrade_read_to_write(timeout) tty.debug("{0} is now {1} locked".format(pkg_id, lock_type)) diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index fb22c6fec0..bd06e5eb2c 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -858,7 +858,7 @@ def test_mark_failed(mutable_database, monkeypatch, tmpdir, capsys): """Add coverage to mark_failed.""" def _raise_exc(lock): - raise lk.LockTimeoutError("Mock acquire_write failure") + raise lk.LockTimeoutError("write", "/mock-lock", 1.234, 10) # Ensure attempt to acquire write lock on the mark raises the exception monkeypatch.setattr(lk.Lock, "acquire_write", _raise_exc) diff --git a/lib/spack/spack/test/llnl/util/lang.py b/lib/spack/spack/test/llnl/util/lang.py index 94b067f3dc..bced121136 100644 --- a/lib/spack/spack/test/llnl/util/lang.py +++ b/lib/spack/spack/test/llnl/util/lang.py @@ -118,6 +118,14 @@ def test_pretty_string_to_date(format, pretty_string): assert t1 == t2 +def test_pretty_seconds(): + assert llnl.util.lang.pretty_seconds(2.1) == "2.100s" + assert llnl.util.lang.pretty_seconds(2.1 / 1000) == "2.100ms" + assert llnl.util.lang.pretty_seconds(2.1 / 1000 / 1000) == "2.100us" + assert llnl.util.lang.pretty_seconds(2.1 / 1000 / 1000 / 1000) == "2.100ns" + assert llnl.util.lang.pretty_seconds(2.1 / 1000 / 1000 / 1000 / 10) == "0.210ns" + + def test_match_predicate(): matcher = match_predicate(lambda x: True) assert matcher("foo") @@ -306,15 +314,15 @@ def test_grouped_exception(): due to the following failures: inner method raised ValueError: wow! File "{0}", \ -line 290, in test_grouped_exception +line 298, in test_grouped_exception inner() File "{0}", \ -line 287, in inner +line 295, in inner raise ValueError("wow!") top-level raised TypeError: ok File "{0}", \ -line 293, in test_grouped_exception +line 301, in test_grouped_exception raise TypeError("ok") """ ).format(__file__) diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index a9c6b64db8..bf812bcc1d 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -1294,7 +1294,7 @@ def test_lock_in_current_directory(tmpdir): def test_attempts_str(): assert lk._attempts_str(0, 0) == "" assert lk._attempts_str(0.12, 1) == "" - assert lk._attempts_str(12.345, 2) == " after 12.35s and 2 attempts" + assert lk._attempts_str(12.345, 2) == " after 12.345s and 2 attempts" def test_lock_str(): -- cgit v1.2.3-70-g09d2