From ab794fa7413d1650c1ba01ea18e74964877fdf99 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 30 Jun 2018 22:05:50 -0700 Subject: locks: llnl.util.lock now only writes host info when in debug mode - write locks previously wrote information about the lock holder (host and pid), and read locks woudl read this in. - This is really only for debugging, so only enable it then - add some tests that target debug info, and improve multiproc lock test output --- lib/spack/llnl/util/lock.py | 27 +++++++---- lib/spack/spack/test/llnl/util/lock.py | 84 ++++++++++++++++++++++++++++++++-- 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index 0d072418cc..794b201668 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -51,7 +51,7 @@ class Lock(object): is enabled) and recent NFS versions. """ - def __init__(self, path, start=0, length=0): + def __init__(self, path, start=0, length=0, debug=False): """Construct a new lock on the file at ``path``. By default, the lock applies to the whole file. Optionally, @@ -72,7 +72,10 @@ class Lock(object): self._start = start self._length = length - # PID and host of lock holder + # enable debug mode + self.debug = debug + + # PID and host of lock holder (only used in debug mode) self.pid = self.old_pid = None self.host = self.old_host = None @@ -118,12 +121,14 @@ class Lock(object): fcntl.lockf(self._file, op | fcntl.LOCK_NB, self._length, self._start, os.SEEK_SET) - # All locks read the owner PID and host - self._read_lock_data() + # help for debugging distributed locking + if self.debug: + # All locks read the owner PID and host + self._read_debug_data() - # Exclusive locks write their PID/host - if op == fcntl.LOCK_EX: - self._write_lock_data() + # Exclusive locks write their PID/host + if op == fcntl.LOCK_EX: + self._write_debug_data() return @@ -148,15 +153,19 @@ class Lock(object): e.errno == errno.EISDIR): raise - def _read_lock_data(self): + def _read_debug_data(self): """Read PID and host data out of the file if it is there.""" + self.old_pid = self.pid + self.old_host = self.host + line = self._file.read() if line: pid, host = line.strip().split(',') _, _, self.pid = pid.rpartition('=') _, _, self.host = host.rpartition('=') + self.pid = int(self.pid) - def _write_lock_data(self): + def _write_debug_data(self): """Write PID and host data to the file, recording old values.""" self.old_pid = self.pid self.old_host = self.host diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 7c38a2547f..5127462c58 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -62,13 +62,14 @@ actually on a shared filesystem. """ import os +import socket import shutil import tempfile import traceback import glob import getpass from contextlib import contextmanager -from multiprocessing import Process +from multiprocessing import Process, Queue import pytest @@ -201,17 +202,21 @@ def lock_path(lock_dir): os.unlink(lock_file) -def local_multiproc_test(*functions): +def local_multiproc_test(*functions, **kwargs): """Order some processes using simple barrier synchronization.""" b = mp.Barrier(len(functions), timeout=barrier_timeout) - procs = [Process(target=f, args=(b,)) for f in functions] + + args = (b,) + tuple(kwargs.get('extra_args', ())) + procs = [Process(target=f, args=args, name=f.__name__) + for f in functions] for p in procs: p.start() for p in procs: p.join() - assert p.exitcode == 0 + + assert all(p.exitcode == 0 for p in procs) def mpi_multiproc_test(*functions): @@ -907,3 +912,74 @@ def test_transaction_with_context_manager_and_exception(lock_path): assert vals['exception'] assert not vals['exited_fn'] assert not vals['exception_fn'] + + +def test_lock_debug_output(lock_path): + host = socket.getfqdn() + + def p1(barrier, q1, q2): + # exchange pids + p1_pid = os.getpid() + q1.put(p1_pid) + p2_pid = q2.get() + + # set up lock + lock = lk.Lock(lock_path, debug=True) + + with lk.WriteTransaction(lock): + # p1 takes write lock and writes pid/host to file + barrier.wait() # ------------------------------------ 1 + + assert lock.pid == p1_pid + assert lock.host == host + + # wait for p2 to verify contents of file + barrier.wait() # ---------------------------------------- 2 + + # wait for p2 to take a write lock + barrier.wait() # ---------------------------------------- 3 + + # verify pid/host info again + with lk.ReadTransaction(lock): + assert lock.old_pid == p1_pid + assert lock.old_host == host + + assert lock.pid == p2_pid + assert lock.host == host + + barrier.wait() # ---------------------------------------- 4 + + def p2(barrier, q1, q2): + # exchange pids + p2_pid = os.getpid() + p1_pid = q1.get() + q2.put(p2_pid) + + # set up lock + lock = lk.Lock(lock_path, debug=True) + + # p1 takes write lock and writes pid/host to file + barrier.wait() # ---------------------------------------- 1 + + # verify that p1 wrote information to lock file + with lk.ReadTransaction(lock): + assert lock.pid == p1_pid + assert lock.host == host + + barrier.wait() # ---------------------------------------- 2 + + # take a write lock on the file and verify pid/host info + with lk.WriteTransaction(lock): + assert lock.old_pid == p1_pid + assert lock.old_host == host + + assert lock.pid == p2_pid + assert lock.host == host + + barrier.wait() # ------------------------------------ 3 + + # wait for p1 to verify pid/host info + barrier.wait() # ---------------------------------------- 4 + + q1, q2 = Queue(), Queue() + local_multiproc_test(p2, p1, extra_args=(q1, q2)) -- cgit v1.2.3-70-g09d2