summaryrefslogtreecommitdiff
path: root/lib/spack/external/ctest_log_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/spack/external/ctest_log_parser.py')
-rw-r--r--lib/spack/external/ctest_log_parser.py270
1 files changed, 214 insertions, 56 deletions
diff --git a/lib/spack/external/ctest_log_parser.py b/lib/spack/external/ctest_log_parser.py
index 6ef9642d85..0437b6e524 100644
--- a/lib/spack/external/ctest_log_parser.py
+++ b/lib/spack/external/ctest_log_parser.py
@@ -65,30 +65,72 @@ algorithms that duplicate the way CTest scrapes log files. To keep this
up to date with CTest, just make sure the ``*_matches`` and
``*_exceptions`` lists are kept up to date with CTest's build handler.
"""
+from __future__ import print_function
+from __future__ import division
+
import re
+import math
+import multiprocessing
+import time
+from contextlib import contextmanager
+
from six import StringIO
from six import string_types
+class prefilter(object):
+ """Make regular expressions faster with a simple prefiltering predicate.
+
+ Some regular expressions seem to be much more costly than others. In
+ most cases, we can evaluate a simple precondition, e.g.::
+
+ lambda x: "error" in x
+
+ to avoid evaluating expensive regexes on all lines in a file. This
+ can reduce parse time for large files by orders of magnitude when
+ evaluating lots of expressions.
+
+ A ``prefilter`` object is designed to act like a regex,, but
+ ``search`` and ``match`` check the precondition before bothering to
+ evaluate the regular expression.
-error_matches = [
+ Note that ``match`` and ``search`` just return ``True`` and ``False``
+ at the moment. Make them return a ``MatchObject`` or ``None`` if it
+ becomes necessary.
+ """
+ def __init__(self, precondition, *patterns):
+ self.patterns = [re.compile(p) for p in patterns]
+ self.pre = precondition
+ self.pattern = "\n ".join(
+ ('MERGED:',) + patterns)
+
+ def search(self, text):
+ return self.pre(text) and any(p.search(text) for p in self.patterns)
+
+ def match(self, text):
+ return self.pre(text) and any(p.match(text) for p in self.patterns)
+
+
+_error_matches = [
+ prefilter(
+ lambda x: any(s in x for s in (
+ 'Error:', 'error', 'undefined reference', 'multiply defined')),
+ "([^:]+): error[ \\t]*[0-9]+[ \\t]*:",
+ "([^:]+): (Error:|error|undefined reference|multiply defined)",
+ "([^ :]+) ?: (error|fatal error|catastrophic error)",
+ "([^:]+)\\(([^\\)]+)\\) ?: (error|fatal error|catastrophic error)"),
"^[Bb]us [Ee]rror",
"^[Ss]egmentation [Vv]iolation",
"^[Ss]egmentation [Ff]ault",
":.*[Pp]ermission [Dd]enied",
- "([^ :]+):([0-9]+): ([^ \\t])",
- "([^:]+): error[ \\t]*[0-9]+[ \\t]*:",
"^Error ([0-9]+):",
"^Fatal",
- "^Error: ",
+ "^[Ee]rror: ",
"^Error ",
"[0-9] ERROR: ",
"^\"[^\"]+\", line [0-9]+: [^Ww]",
"^cc[^C]*CC: ERROR File = ([^,]+), Line = ([0-9]+)",
"^ld([^:])*:([ \\t])*ERROR([^:])*:",
"^ild:([ \\t])*\\(undefined symbol\\)",
- "([^ :]+) : (error|fatal error|catastrophic error)",
- "([^:]+): (Error:|error|undefined reference|multiply defined)",
- "([^:]+)\\(([^\\)]+)\\) ?: (error|fatal error|catastrophic error)",
"^fatal error C[0-9]+:",
": syntax error ",
"^collect2: ld returned 1 exit status",
@@ -128,7 +170,7 @@ error_matches = [
"^Command .* failed with exit code",
]
-error_exceptions = [
+_error_exceptions = [
"instantiated from ",
"candidates are:",
": warning",
@@ -143,32 +185,38 @@ error_exceptions = [
]
#: Regexes to match file/line numbers in error/warning messages
-warning_matches = [
- "([^ :]+):([0-9]+): warning:",
- "([^ :]+):([0-9]+): note:",
+_warning_matches = [
+ prefilter(
+ lambda x: 'warning' in x,
+ "([^ :]+):([0-9]+): warning:",
+ "([^:]+): warning ([0-9]+):",
+ "([^:]+): warning[ \\t]*[0-9]+[ \\t]*:",
+ "([^ :]+) : warning",
+ "([^:]+): warning"),
+ prefilter(
+ lambda x: 'note:' in x,
+ "^([^ :]+):([0-9]+): note:"),
+ prefilter(
+ lambda x: any(s in x for s in ('Warning', 'Warnung')),
+ "^(Warning|Warnung) ([0-9]+):",
+ "^(Warning|Warnung)[ :]",
+ "^cxx: Warning:",
+ "([^ :]+):([0-9]+): (Warning|Warnung)",
+ "^CMake Warning.*:"),
+ "file: .* has no symbols",
"^cc[^C]*CC: WARNING File = ([^,]+), Line = ([0-9]+)",
"^ld([^:])*:([ \\t])*WARNING([^:])*:",
- "([^:]+): warning ([0-9]+):",
"^\"[^\"]+\", line [0-9]+: [Ww](arning|arnung)",
- "([^:]+): warning[ \\t]*[0-9]+[ \\t]*:",
- "^(Warning|Warnung) ([0-9]+):",
- "^(Warning|Warnung)[ :]",
"WARNING: ",
- "([^ :]+) : warning",
- "([^:]+): warning",
"\", line [0-9]+\\.[0-9]+: [0-9]+-[0-9]+ \\([WI]\\)",
- "^cxx: Warning:",
- ".*file: .* has no symbols",
- "([^ :]+):([0-9]+): (Warning|Warnung)",
"\\([0-9]*\\): remark #[0-9]*",
"\".*\", line [0-9]+: remark\\([0-9]*\\):",
"cc-[0-9]* CC: REMARK File = .*, Line = [0-9]*",
- "^CMake Warning.*:",
"^\\[WARNING\\]",
]
#: Regexes to match file/line numbers in error/warning messages
-warning_exceptions = [
+_warning_exceptions = [
"/usr/.*/X11/Xlib\\.h:[0-9]+: war.*: ANSI C\\+\\+ forbids declaration",
"/usr/.*/X11/Xutil\\.h:[0-9]+: war.*: ANSI C\\+\\+ forbids declaration",
"/usr/.*/X11/XResource\\.h:[0-9]+: war.*: ANSI C\\+\\+ forbids declaration",
@@ -188,7 +236,7 @@ warning_exceptions = [
]
#: Regexes to match file/line numbers in error/warning messages
-file_line_matches = [
+_file_line_matches = [
"^Warning W[0-9]+ ([a-zA-Z.\\:/0-9_+ ~-]+) ([0-9]+):",
"^([a-zA-Z./0-9_+ ~-]+):([0-9]+):",
"^([a-zA-Z.\\:/0-9_+ ~-]+)\\(([0-9]+)\\)",
@@ -250,25 +298,120 @@ class BuildWarning(LogEvent):
"""LogEvent subclass for build warnings."""
+def chunks(l, n):
+ """Divide l into n approximately-even chunks."""
+ chunksize = int(math.ceil(len(l) / n))
+ return [l[i:i + chunksize] for i in range(0, len(l), chunksize)]
+
+
+@contextmanager
+def _time(times, i):
+ start = time.time()
+ yield
+ end = time.time()
+ times[i] += end - start
+
+
def _match(matches, exceptions, line):
"""True if line matches a regex in matches and none in exceptions."""
return (any(m.search(line) for m in matches) and
not any(e.search(line) for e in exceptions))
-class CTestLogParser(object):
- """Log file parser that extracts errors and warnings."""
- def __init__(self):
- def compile(regex_array):
- return [re.compile(regex) for regex in regex_array]
+def _profile_match(matches, exceptions, line, match_times, exc_times):
+ """Profiled version of match().
+
+ Timing is expensive so we have two whole functions. This is much
+ longer because we have to break up the ``any()`` calls.
+
+ """
+ for i, m in enumerate(matches):
+ with _time(match_times, i):
+ if m.search(line):
+ break
+ else:
+ return False
+
+ for i, m in enumerate(exceptions):
+ with _time(exc_times, i):
+ if m.search(line):
+ return False
+ else:
+ return True
+
+
+def _parse(lines, offset, profile):
+ def compile(regex_array):
+ return [regex if isinstance(regex, prefilter) else re.compile(regex)
+ for regex in regex_array]
+
+ error_matches = compile(_error_matches)
+ error_exceptions = compile(_error_exceptions)
+ warning_matches = compile(_warning_matches)
+ warning_exceptions = compile(_warning_exceptions)
+ file_line_matches = compile(_file_line_matches)
+
+ matcher, args = _match, []
+ timings = []
+ if profile:
+ matcher = _profile_match
+ timings = [
+ [0.0] * len(error_matches), [0.0] * len(error_exceptions),
+ [0.0] * len(warning_matches), [0.0] * len(warning_exceptions)]
+
+ errors = []
+ warnings = []
+ for i, line in enumerate(lines):
+ # use CTest's regular expressions to scrape the log for events
+ if matcher(error_matches, error_exceptions, line, *timings[:2]):
+ event = BuildError(line.strip(), offset + i + 1)
+ errors.append(event)
+ elif matcher(warning_matches, warning_exceptions, line, *timings[2:]):
+ event = BuildWarning(line.strip(), offset + i + 1)
+ warnings.append(event)
+ else:
+ continue
+
+ # get file/line number for each event, if possible
+ for flm in file_line_matches:
+ match = flm.search(line)
+ if match:
+ event.source_file, event.source_line_no = match.groups()
+
+ return errors, warnings, timings
+
+
+def _parse_unpack(args):
+ return _parse(*args)
- self.error_matches = compile(error_matches)
- self.error_exceptions = compile(error_exceptions)
- self.warning_matches = compile(warning_matches)
- self.warning_exceptions = compile(warning_exceptions)
- self.file_line_matches = compile(file_line_matches)
- def parse(self, stream, context=6):
+class CTestLogParser(object):
+ """Log file parser that extracts errors and warnings."""
+ def __init__(self, profile=False):
+ # whether to record timing information
+ self.timings = []
+ self.profile = profile
+
+ def print_timings(self):
+ """Print out profile of time spent in different regular expressions."""
+ def stringify(elt):
+ return elt if isinstance(elt, str) else elt.pattern
+
+ index = 0
+ for name, arr in [('error_matches', _error_matches),
+ ('error_exceptions', _error_exceptions),
+ ('warning_matches', _warning_matches),
+ ('warning_exceptions', _warning_exceptions)]:
+
+ print()
+ print(name)
+ for i, elt in enumerate(arr):
+ print("%16.2f %s" % (
+ self.timings[index][i] * 1e6, stringify(elt)))
+ index += 1
+
+
+ def parse(self, stream, context=6, jobs=None):
"""Parse a log file by searching each line for errors and warnings.
Args:
@@ -276,35 +419,50 @@ class CTestLogParser(object):
context (int): lines of context to extract around each log event
Returns:
- (tuple): two lists containig ``BuildError`` and
+ (tuple): two lists containing ``BuildError`` and
``BuildWarning`` objects.
"""
if isinstance(stream, string_types):
with open(stream) as f:
- return self.parse(f)
+ return self.parse(f, context, jobs)
lines = [line for line in stream]
- errors = []
- warnings = []
- for i, line in enumerate(lines):
- # use CTest's regular expressions to scrape the log for events
- if _match(self.error_matches, self.error_exceptions, line):
- event = BuildError(line.strip(), i + 1)
- errors.append(event)
- elif _match(self.warning_matches, self.warning_exceptions, line):
- event = BuildWarning(line.strip(), i + 1)
- warnings.append(event)
- else:
- continue
-
- # get file/line number for each event, if possible
- for flm in self.file_line_matches:
- match = flm.search(line)
- if match:
- event.source_file, source_line_no = match.groups()
-
- # add log context, as well
+ if jobs is None:
+ jobs = multiprocessing.cpu_count()
+
+ # single-thread small logs
+ if len(lines) < 10 * jobs:
+ errors, warnings, self.timings = _parse(lines, 0, self.profile)
+
+ else:
+ # Build arguments for parallel jobs
+ args = []
+ offset = 0
+ for chunk in chunks(lines, jobs):
+ args.append((chunk, offset, self.profile))
+ offset += len(chunk)
+
+ # create a pool and farm out the matching job
+ pool = multiprocessing.Pool(jobs)
+ try:
+ # this is a workaround for a Python bug in Pool with ctrl-C
+ results = pool.map_async(_parse_unpack, args, 1).get(9999999)
+ errors, warnings, timings = zip(*results)
+ finally:
+ pool.terminate()
+
+ # merge results
+ errors = sum(errors, [])
+ warnings = sum(warnings, [])
+
+ if self.profile:
+ self.timings = [
+ [sum(i) for i in zip(*t)] for t in zip(*timings)]
+
+ # add log context to all events
+ for event in (errors + warnings):
+ i = event.line_no - 1
event.pre_context = [
l.rstrip() for l in lines[i - context:i]]
event.post_context = [