diff options
author | Todd Gamblin <tgamblin@llnl.gov> | 2018-11-13 11:05:17 -0600 |
---|---|---|
committer | Todd Gamblin <tgamblin@llnl.gov> | 2018-11-13 11:05:17 -0600 |
commit | 42962f2409fe7b46543dc5974c45bb6392fcea99 (patch) | |
tree | 2063188dd6902f9b6ea7e8a2601f6131ac88046d /lib/spack/llnl/util/lock.py | |
parent | 041aa143db6964575625f1849de639541efb83a5 (diff) | |
parent | 8554e933d2a236df20d07a6e0416ab444790bd3d (diff) | |
download | spack-42962f2409fe7b46543dc5974c45bb6392fcea99.tar.gz spack-42962f2409fe7b46543dc5974c45bb6392fcea99.tar.bz2 spack-42962f2409fe7b46543dc5974c45bb6392fcea99.tar.xz spack-42962f2409fe7b46543dc5974c45bb6392fcea99.zip |
Merge branch 'releases/v0.12'v0.12.0
Diffstat (limited to 'lib/spack/llnl/util/lock.py')
-rw-r--r-- | lib/spack/llnl/util/lock.py | 284 |
1 files changed, 189 insertions, 95 deletions
diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index 5467838744..3beb219bd9 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -1,27 +1,8 @@ -############################################################################## -# Copyright (c) 2013-2017, Lawrence Livermore National Security, LLC. -# Produced at the Lawrence Livermore National Laboratory. +# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. # -# This file is part of Spack. -# Created by Todd Gamblin, tgamblin@llnl.gov, All rights reserved. -# LLNL-CODE-647188 -# -# For details, see https://github.com/spack/spack -# Please also see the NOTICE and LICENSE files for our notice and the LGPL. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License (as -# published by the Free Software Foundation) version 2.1, February 1999. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and -# conditions of the GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -############################################################################## +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + import os import fcntl import errno @@ -32,14 +13,8 @@ import llnl.util.tty as tty __all__ = ['Lock', 'LockTransaction', 'WriteTransaction', 'ReadTransaction', - 'LockError'] - - -# Default timeout in seconds, after which locks will raise exceptions. -_default_timeout = 60 - -# Sleep time per iteration in spin loop (in seconds) -_sleep_time = 1e-5 + 'LockError', 'LockTimeoutError', + 'LockPermissionError', 'LockROFileError', 'CantCreateLockError'] class Lock(object): @@ -49,9 +24,15 @@ class Lock(object): any filesystem implementation that supports locking through the fcntl calls. This includes distributed filesystems like Lustre (when flock is enabled) and recent NFS versions. + + Note that this is for managing contention over resources *between* + processes and not for managing contention between threads in a process: the + functions of this object are not thread-safe. A process also must not + maintain multiple locks on the same file. """ - def __init__(self, path, start=0, length=0): + def __init__(self, path, start=0, length=0, debug=False, + default_timeout=None): """Construct a new lock on the file at ``path``. By default, the lock applies to the whole file. Optionally, @@ -72,12 +53,44 @@ class Lock(object): self._start = start self._length = length - # PID and host of lock holder + # enable debug mode + self.debug = debug + + # If the user doesn't set a default timeout, or if they choose + # None, 0, etc. then lock attempts will not time out (unless the + # user sets a timeout for each attempt) + self.default_timeout = default_timeout or None + + # PID and host of lock holder (only used in debug mode) self.pid = self.old_pid = None self.host = self.old_host = None - def _lock(self, op, timeout=_default_timeout): - """This takes a lock using POSIX locks (``fnctl.lockf``). + @staticmethod + def _poll_interval_generator(_wait_times=None): + """This implements a backoff scheme for polling a contended resource + by suggesting a succession of wait times between polls. + + It suggests a poll interval of .1s until 2 seconds have passed, + then a poll interval of .2s until 10 seconds have passed, and finally + (for all requests after 10s) suggests a poll interval of .5s. + + This doesn't actually track elapsed time, it estimates the waiting + time as though the caller always waits for the full length of time + suggested by this function. + """ + num_requests = 0 + stage1, stage2, stage3 = _wait_times or (1e-1, 2e-1, 5e-1) + wait_time = stage1 + while True: + if num_requests >= 60: # 40 * .2 = 8 + wait_time = stage3 + elif num_requests >= 20: # 20 * .1 = 2 + wait_time = stage2 + num_requests += 1 + yield wait_time + + def _lock(self, op, timeout=None): + """This takes a lock using POSIX locks (``fcntl.lockf``). The lock is implemented as a spin lock using a nonblocking call to ``lockf()``. @@ -86,77 +99,113 @@ class Lock(object): pid and host to the lock file, in case the holding process needs to be killed later. - If the lock times out, it raises a ``LockError``. + If the lock times out, it raises a ``LockError``. If the lock is + successfully acquired, the total wait time and the number of attempts + is returned. """ - start_time = time.time() - while (time.time() - start_time) < timeout: - try: - # If we could write the file, we'd have opened it 'r+'. - # Raise an error when we attempt to upgrade to a write lock. - if op == fcntl.LOCK_EX: - if self._file and self._file.mode == 'r': - raise LockError( - "Can't take exclusive lock on read-only file: %s" - % self.path) - - # Create file and parent directories if they don't exist. - if self._file is None: - self._ensure_parent_directory() - - # Prefer to open 'r+' to allow upgrading to write - # lock later if possible. Open read-only if we can't - # write the lock file at all. - os_mode, fd_mode = (os.O_RDWR | os.O_CREAT), 'r+' - if os.path.exists(self.path) and not os.access( - self.path, os.W_OK): + assert op in (fcntl.LOCK_SH, fcntl.LOCK_EX) + + timeout = timeout or self.default_timeout + + # Create file and parent directories if they don't exist. + if self._file is None: + parent = self._ensure_parent_directory() + + # Open writable files as 'r+' so we can upgrade to write later + os_mode, fd_mode = (os.O_RDWR | os.O_CREAT), 'r+' + if os.path.exists(self.path): + if not os.access(self.path, os.W_OK): + if op == fcntl.LOCK_SH: + # can still lock read-only files if we open 'r' os_mode, fd_mode = os.O_RDONLY, 'r' + else: + raise LockROFileError(self.path) + + elif not os.access(parent, os.W_OK): + raise CantCreateLockError(self.path) + + fd = os.open(self.path, os_mode) + self._file = os.fdopen(fd, fd_mode) - fd = os.open(self.path, os_mode) - self._file = os.fdopen(fd, fd_mode) + elif op == fcntl.LOCK_EX and self._file.mode == 'r': + # Attempt to upgrade to write lock w/a read-only file. + # If the file were writable, we'd have opened it 'r+' + raise LockROFileError(self.path) - # Try to get the lock (will raise if not available.) - fcntl.lockf(self._file, op | fcntl.LOCK_NB, - self._length, self._start, os.SEEK_SET) + poll_intervals = iter(Lock._poll_interval_generator()) + start_time = time.time() + num_attempts = 0 + while (not timeout) or (time.time() - start_time) < timeout: + num_attempts += 1 + if self._poll_lock(op): + total_wait_time = time.time() - start_time + return total_wait_time, num_attempts + + time.sleep(next(poll_intervals)) + + num_attempts += 1 + if self._poll_lock(op): + total_wait_time = time.time() - start_time + return total_wait_time, num_attempts + + raise LockTimeoutError("Timed out waiting for lock.") + + def _poll_lock(self, op): + """Attempt to acquire the lock in a non-blocking manner. Return whether + the locking attempt succeeds + """ + try: + # Try to get the lock (will raise if not available.) + fcntl.lockf(self._file, op | fcntl.LOCK_NB, + self._length, self._start, os.SEEK_SET) + # help for debugging distributed locking + if self.debug: # All locks read the owner PID and host - self._read_lock_data() + self._read_debug_data() # Exclusive locks write their PID/host if op == fcntl.LOCK_EX: - self._write_lock_data() + self._write_debug_data() - return - - except IOError as e: - if e.errno in (errno.EAGAIN, errno.EACCES): - # EAGAIN and EACCES == locked by another process - pass - else: - raise - time.sleep(_sleep_time) + return True - raise LockError("Timed out waiting for lock.") + except IOError as e: + if e.errno in (errno.EAGAIN, errno.EACCES): + # EAGAIN and EACCES == locked by another process + pass + else: + raise def _ensure_parent_directory(self): parent = os.path.dirname(self.path) + + # relative paths to lockfiles in the current directory have no parent + if not parent: + return '.' + try: os.makedirs(parent) - return True except OSError as e: # makedirs can fail when diretory already exists. if not (e.errno == errno.EEXIST and os.path.isdir(parent) or e.errno == errno.EISDIR): raise + return parent - def _read_lock_data(self): + def _read_debug_data(self): """Read PID and host data out of the file if it is there.""" + self.old_pid = self.pid + self.old_host = self.host + line = self._file.read() if line: pid, host = line.strip().split(',') _, _, self.pid = pid.rpartition('=') _, _, self.host = host.rpartition('=') + self.pid = int(self.pid) - def _write_lock_data(self): + def _write_debug_data(self): """Write PID and host data to the file, recording old values.""" self.old_pid = self.pid self.old_host = self.host @@ -183,7 +232,7 @@ class Lock(object): self._file.close() self._file = None - def acquire_read(self, timeout=_default_timeout): + def acquire_read(self, timeout=None): """Acquires a recursive, shared lock for reading. Read and write locks can be acquired and released in arbitrary @@ -194,19 +243,22 @@ class Lock(object): the POSIX lock, False if it is a nested transaction. """ + timeout = timeout or self.default_timeout + if self._reads == 0 and self._writes == 0: - tty.debug('READ LOCK: {0.path}[{0._start}:{0._length}] [Acquiring]' - .format(self)) - self._lock(fcntl.LOCK_SH, timeout=timeout) # can raise LockError. - tty.debug('READ LOCK: {0.path}[{0._start}:{0._length}] [Acquired]' - .format(self)) + self._debug( + 'READ LOCK: {0.path}[{0._start}:{0._length}] [Acquiring]' + .format(self)) + # can raise LockError. + wait_time, nattempts = self._lock(fcntl.LOCK_SH, timeout=timeout) + self._acquired_debug('READ LOCK', wait_time, nattempts) self._reads += 1 return True else: self._reads += 1 return False - def acquire_write(self, timeout=_default_timeout): + def acquire_write(self, timeout=None): """Acquires a recursive, exclusive lock for writing. Read and write locks can be acquired and released in arbitrary @@ -217,13 +269,15 @@ class Lock(object): the POSIX lock, False if it is a nested transaction. """ + timeout = timeout or self.default_timeout + if self._writes == 0: - tty.debug( + self._debug( 'WRITE LOCK: {0.path}[{0._start}:{0._length}] [Acquiring]' .format(self)) - self._lock(fcntl.LOCK_EX, timeout=timeout) # can raise LockError. - tty.debug('WRITE LOCK: {0.path}[{0._start}:{0._length}] [Acquired]' - .format(self)) + # can raise LockError. + wait_time, nattempts = self._lock(fcntl.LOCK_EX, timeout=timeout) + self._acquired_debug('WRITE LOCK', wait_time, nattempts) self._writes += 1 return True else: @@ -243,8 +297,9 @@ class Lock(object): assert self._reads > 0 if self._reads == 1 and self._writes == 0: - tty.debug('READ LOCK: {0.path}[{0._start}:{0._length}] [Released]' - .format(self)) + self._debug( + 'READ LOCK: {0.path}[{0._start}:{0._length}] [Released]' + .format(self)) self._unlock() # can raise LockError. self._reads -= 1 return True @@ -265,8 +320,9 @@ class Lock(object): assert self._writes > 0 if self._writes == 1 and self._reads == 0: - tty.debug('WRITE LOCK: {0.path}[{0._start}:{0._length}] [Released]' - .format(self)) + self._debug( + 'WRITE LOCK: {0.path}[{0._start}:{0._length}] [Released]' + .format(self)) self._unlock() # can raise LockError. self._writes -= 1 return True @@ -274,6 +330,21 @@ class Lock(object): self._writes -= 1 return False + def _debug(self, *args): + tty.debug(*args) + + def _acquired_debug(self, lock_type, wait_time, nattempts): + attempts_format = 'attempt' if nattempts == 1 else 'attempt' + if nattempts > 1: + acquired_attempts_format = ' after {0:0.2f}s and {1:d} {2}'.format( + wait_time, nattempts, attempts_format) + else: + # Dont print anything if we succeeded immediately + acquired_attempts_format = '' + self._debug( + '{0}: {1.path}[{1._start}:{1._length}] [Acquired{2}]' + .format(lock_type, self, acquired_attempts_format)) + class LockTransaction(object): """Simple nested transaction context manager that uses a file lock. @@ -295,7 +366,7 @@ class LockTransaction(object): """ def __init__(self, lock, acquire_fn=None, release_fn=None, - timeout=_default_timeout): + timeout=None): self._lock = lock self._timeout = timeout self._acquire_fn = acquire_fn @@ -323,7 +394,7 @@ class LockTransaction(object): class ReadTransaction(LockTransaction): - + """LockTransaction context manager that does a read and releases it.""" def _enter(self): return self._lock.acquire_read(self._timeout) @@ -332,7 +403,7 @@ class ReadTransaction(LockTransaction): class WriteTransaction(LockTransaction): - + """LockTransaction context manager that does a write and releases it.""" def _enter(self): return self._lock.acquire_write(self._timeout) @@ -341,4 +412,27 @@ class WriteTransaction(LockTransaction): class LockError(Exception): + """Raised for any errors related to locks.""" + + +class LockTimeoutError(LockError): """Raised when an attempt to acquire a lock times out.""" + + +class LockPermissionError(LockError): + """Raised when there are permission issues with a lock.""" + + +class LockROFileError(LockPermissionError): + """Tried to take an exclusive lock on a read-only file.""" + def __init__(self, path): + msg = "Can't take write lock on read-only file: %s" % path + super(LockROFileError, self).__init__(msg) + + +class CantCreateLockError(LockPermissionError): + """Attempt to create a lock in an unwritable location.""" + def __init__(self, path): + msg = "cannot create lock '%s': " % path + msg += "file does not exist and location is not writable" + super(LockError, self).__init__(msg) |