summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Whitlock <MatthewWhitlock.MW@gmail.com>2024-06-27 09:07:30 -0400
committerGitHub <noreply@github.com>2024-06-27 07:07:30 -0600
commit22c86074c8c14791a6e345ddaf17c53a305b000c (patch)
tree58a3033702738b56ea9882d0411631d451dcb730
parentef9e449322b89ec9f5e3e8ef95db4375bea55501 (diff)
downloadspack-22c86074c8c14791a6e345ddaf17c53a305b000c.tar.gz
spack-22c86074c8c14791a6e345ddaf17c53a305b000c.tar.bz2
spack-22c86074c8c14791a6e345ddaf17c53a305b000c.tar.xz
spack-22c86074c8c14791a6e345ddaf17c53a305b000c.zip
Fix bug in logfile parsing (#42706)
* Fix bug in logfile parsing * Cite ECMA-48 standards for CSI Co-authored-by: Wouter Deconinck <wdconinc@gmail.com> --------- Co-authored-by: Wouter Deconinck <wdconinc@gmail.com>
-rw-r--r--lib/spack/llnl/util/tty/log.py17
-rw-r--r--lib/spack/spack/test/llnl/util/tty/log.py15
2 files changed, 31 insertions, 1 deletions
diff --git a/lib/spack/llnl/util/tty/log.py b/lib/spack/llnl/util/tty/log.py
index 16fe45edfc..aeb1114c5a 100644
--- a/lib/spack/llnl/util/tty/log.py
+++ b/lib/spack/llnl/util/tty/log.py
@@ -33,8 +33,23 @@ except ImportError:
pass
+esc, bell, lbracket, bslash, newline = r"\x1b", r"\x07", r"\[", r"\\", r"\n"
+# Ansi Control Sequence Introducers (CSI) are a well-defined format
+# Standard ECMA-48: Control Functions for Character-Imaging I/O Devices, section 5.4
+# https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf
+csi_pre = f"{esc}{lbracket}"
+csi_param, csi_inter, csi_post = r"[0-?]", r"[ -/]", r"[@-~]"
+ansi_csi = f"{csi_pre}{csi_param}*{csi_inter}*{csi_post}"
+# General ansi escape sequences have well-defined prefixes,
+# but content and suffixes are less reliable.
+# Conservatively assume they end with either "<ESC>\" or "<BELL>",
+# with no intervening "<ESC>"/"<BELL>" keys or newlines
+esc_pre = f"{esc}[@-_]"
+esc_content = f"[^{esc}{bell}{newline}]"
+esc_post = f"(?:{esc}{bslash}|{bell})"
+ansi_esc = f"{esc_pre}{esc_content}*{esc_post}"
# Use this to strip escape sequences
-_escape = re.compile(r"\x1b[^m]*m|\x1b\[?1034h|\x1b\][0-9]+;[^\x07]*\x07")
+_escape = re.compile(f"{ansi_csi}|{ansi_esc}")
# control characters for enabling/disabling echo
#
diff --git a/lib/spack/spack/test/llnl/util/tty/log.py b/lib/spack/spack/test/llnl/util/tty/log.py
index b2f8cd8a9e..3ff3ee995c 100644
--- a/lib/spack/spack/test/llnl/util/tty/log.py
+++ b/lib/spack/spack/test/llnl/util/tty/log.py
@@ -93,6 +93,21 @@ def test_log_python_output_and_echo_output(capfd, tmpdir):
assert capfd.readouterr()[0] == "force echo\n"
+def test_log_output_with_control_codes(capfd, tmpdir):
+ with tmpdir.as_cwd():
+ with log.log_output("foo.txt"):
+ # Print a sample of formatted GCC error output
+ # Line obtained from the file generated by running gcc on a nonexistent file:
+ # gcc -fdiagnostics-color=always ./test.cpp 2>test.log
+ csi = "\x1b["
+ print(
+ f"{csi}01m{csi}Kgcc:{csi}m{csi}K {csi}01;31m{csi}Kerror: {csi}m{csi}K./test.cpp:"
+ )
+
+ with open("foo.txt") as f:
+ assert f.read() == "gcc: error: ./test.cpp:\n"
+
+
def _log_filter_fn(string):
return string.replace("foo", "bar")