From 4a7b0afde2e04305697da2d441a560c2f976c3c4 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Mon, 31 May 2021 20:33:14 -0700 Subject: Log performance improvement (#23925) * util.tty.log: read up to 100 lines if ready Rework to read up to 100 lines from the captured stdin as long as data is ready to be read immediately. Adds a helper function to poll with `select` for ready data. This showed a roughly 5-10x perf improvement for high-rate writes through the logger with relatively short lines. * util.tty.log: Defer flushes to end of ready reads Rather than flush per line, flush per set of reads. Since this is a non-blocking loop, the total perceived wait is short. * util.tty.log: only scan each line once, usually Rather than always find all control characters then substitute them all, use `subn` to count the number of control characters replaced. Only if control characters exist find out what they are. This could be made truly single pass with sub with a function, but it's a more intrusive change and this got 99%ish of the performance improvement (roughly another 2x in some cases). * util.tty.log: remove check for `readable` Python < 3 does not support a readable check on streams, should not be necessary here since we control the only use and it's explicitly a stream to be read. --- lib/spack/llnl/util/tty/log.py | 59 ++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/spack/llnl/util/tty/log.py b/lib/spack/llnl/util/tty/log.py index 337eb0ac68..28f4f06edf 100644 --- a/lib/spack/llnl/util/tty/log.py +++ b/lib/spack/llnl/util/tty/log.py @@ -771,28 +771,39 @@ def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo, raise if in_pipe in rlist: - # Handle output from the calling process. - line = _retry(in_pipe.readline)() - if not line: - break - - # find control characters and strip them. - controls = control.findall(line) - line = control.sub('', line) - - # Echo to stdout if requested or forced. - if echo or force_echo: - sys.stdout.write(line) - sys.stdout.flush() - - # Stripped output to log file. - log_file.write(_strip(line)) - log_file.flush() - - if xon in controls: - force_echo = True - if xoff in controls: - force_echo = False + line_count = 0 + try: + while line_count < 100: + # Handle output from the calling process. + line = _retry(in_pipe.readline)() + if not line: + return + line_count += 1 + + # find control characters and strip them. + clean_line, num_controls = control.subn('', line) + + # Echo to stdout if requested or forced. + if echo or force_echo: + sys.stdout.write(clean_line) + + # Stripped output to log file. + log_file.write(_strip(clean_line)) + + if num_controls > 0: + controls = control.findall(line) + if xon in controls: + force_echo = True + if xoff in controls: + force_echo = False + + if not _input_available(in_pipe): + break + finally: + if line_count > 0: + if echo or force_echo: + sys.stdout.flush() + log_file.flush() except BaseException: tty.error("Exception occurred in writer daemon!") @@ -844,3 +855,7 @@ def _retry(function): continue raise return wrapped + + +def _input_available(f): + return f in select.select([f], [], [], 0)[0] -- cgit v1.2.3-70-g09d2