1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
# Copyright 2013-2020 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import re
import shlex
import sys
import itertools
from six import string_types
import spack.error
class Token(object):
"""Represents tokens; generated from input by lexer and fed to parse()."""
def __init__(self, type, value='', start=0, end=0):
self.type = type
self.value = value
self.start = start
self.end = end
def __repr__(self):
return str(self)
def __str__(self):
return "<%d: '%s'>" % (self.type, self.value)
def is_a(self, type):
return self.type == type
def __eq__(self, other):
return (self.type == other.type) and (self.value == other.value)
class Lexer(object):
"""Base class for Lexers that keep track of line numbers."""
def __init__(self, lexicon0, mode_switches_01=[],
lexicon1=[], mode_switches_10=[]):
self.scanner0 = re.Scanner(lexicon0)
self.mode_switches_01 = mode_switches_01
self.scanner1 = re.Scanner(lexicon1)
self.mode_switches_10 = mode_switches_10
self.mode = 0
def token(self, type, value=''):
if self.mode == 0:
return Token(type, value,
self.scanner0.match.start(0),
self.scanner0.match.end(0))
else:
return Token(type, value,
self.scanner1.match.start(0),
self.scanner1.match.end(0))
def lex_word(self, word):
scanner = self.scanner0
mode_switches = self.mode_switches_01
if self.mode == 1:
scanner = self.scanner1
mode_switches = self.mode_switches_10
tokens, remainder = scanner.scan(word)
remainder_used = 0
for i, t in enumerate(tokens):
if t.type in mode_switches:
# Combine post-switch tokens with remainder and
# scan in other mode
self.mode = 1 - self.mode # swap 0/1
remainder_used = 1
tokens = tokens[:i + 1] + self.lex_word(
word[word.index(t.value) + len(t.value):])
break
if remainder and not remainder_used:
raise LexError("Invalid character", word, word.index(remainder))
return tokens
def lex(self, text):
lexed = []
for word in text:
tokens = self.lex_word(word)
lexed.extend(tokens)
return lexed
class Parser(object):
"""Base class for simple recursive descent parsers."""
def __init__(self, lexer):
self.tokens = iter([]) # iterators over tokens, handled in order.
self.token = Token(None) # last accepted token
self.next = None # next token
self.lexer = lexer
self.text = None
def gettok(self):
"""Puts the next token in the input stream into self.next."""
try:
self.next = next(self.tokens)
except StopIteration:
self.next = None
def push_tokens(self, iterable):
"""Adds all tokens in some iterable to the token stream."""
self.tokens = itertools.chain(
iter(iterable), iter([self.next]), self.tokens)
self.gettok()
def accept(self, id):
"""Put the next symbol in self.token if accepted, then call gettok()"""
if self.next and self.next.is_a(id):
self.token = self.next
self.gettok()
return True
return False
def next_token_error(self, message):
"""Raise an error about the next token in the stream."""
raise ParseError(message, self.text, self.token.end)
def last_token_error(self, message):
"""Raise an error about the previous token in the stream."""
raise ParseError(message, self.text, self.token.start)
def unexpected_token(self):
self.next_token_error("Unexpected token: '%s'" % self.next.value)
def expect(self, id):
"""Like accept(), but fails if we don't like the next token."""
if self.accept(id):
return True
else:
if self.next:
self.unexpected_token()
else:
self.next_token_error("Unexpected end of input")
sys.exit(1)
def setup(self, text):
if isinstance(text, string_types):
text = shlex.split(str(text))
self.text = text
self.push_tokens(self.lexer.lex(text))
def parse(self, text):
self.setup(text)
return self.do_parse()
class ParseError(spack.error.SpackError):
"""Raised when we don't hit an error while parsing."""
def __init__(self, message, string, pos):
super(ParseError, self).__init__(message)
self.string = string
self.pos = pos
class LexError(ParseError):
"""Raised when we don't know how to lex something."""
def __init__(self, message, string, pos):
super(LexError, self).__init__(message, string, pos)
|