1 """ 2 A CPython inspired RPython parser. 3 """ 4 5 6 class Grammar(object): 7 """ 8 Base Grammar object. 9 10 Pass this to ParserGenerator.build_grammar to fill it with useful values for 11 the Parser. 12 """ 13 14 def __init__(self): 15 self.symbol_ids = {} 16 self.symbol_names = {} 17 self.symbol_to_label = {} 18 self.keyword_ids = {} 19 self.dfas = [] 20 self.labels = [0] 21 self.token_ids = {} 22 self.start = -1 23 24 def shared_copy(self): 25 new = self.__class__() 26 new.symbol_ids = self.symbol_ids 27 new.symbols_names = self.symbol_names 28 new.keyword_ids = self.keyword_ids 29 new.dfas = self.dfas 30 new.labels = self.labels 31 new.token_ids = self.token_ids 32 return new 33 34 def _freeze_(self): 35 # Remove some attributes not used in parsing. 36 try: 37 del self.symbol_to_label 38 del self.symbol_names 39 del self.symbol_ids 40 except AttributeError: 41 pass 42 return True 43 44 45 class Node(object): 46 47 __slots__ = ("type", ) 48 49 def __init__(self, type): 50 self.type = type 51 52 def __eq__(self, other): 53 raise NotImplementedError("abstract base class") 54 55 def __ne__(self, other): 56 return not self == other 57 58 def get_value(self): 59 return None 60 61 def get_child(self, i): 62 raise NotImplementedError("abstract base class") 63 64 def num_children(self): 65 return 0 66 67 def append_child(self, child): 68 raise NotImplementedError("abstract base class") 69 70 def get_lineno(self): 71 raise NotImplementedError("abstract base class") 72 73 def get_column(self): 74 raise NotImplementedError("abstract base class") 75 76 77 class Terminal(Node): 78 __slots__ = ("value", "lineno", "column") 79 def __init__(self, type, value, lineno, column): 80 Node.__init__(self, type) 81 self.value = value 82 self.lineno = lineno 83 self.column = column 84 85 def __repr__(self): 86 return "Terminal(type=%s, value=%r)" % (self.type, self.value) 87 88 def __eq__(self, other): 89 # For tests. 90 return (type(self) == type(other) and 91 self.type == other.type and 92 self.value == other.value) 93 94 def get_value(self): 95 return self.value 96 97 def get_lineno(self): 98 return self.lineno 99 100 def get_column(self): 101 return self.column 102 103 104 class AbstractNonterminal(Node): 105 __slots__ = () 106 107 def get_lineno(self): 108 return self.get_child(0).get_lineno() 109 110 def get_column(self): 111 return self.get_child(0).get_column() 112 113 def __eq__(self, other): 114 # For tests. 115 # grumble, annoying 116 if not isinstance(other, AbstractNonterminal): 117 return False 118 if self.type != other.type: 119 return False 120 if self.num_children() != other.num_children(): 121 return False 122 for i in range(self.num_children()): 123 if self.get_child(i) != other.get_child(i): 124 return False 125 return True 126 127 128 class Nonterminal(AbstractNonterminal): 129 __slots__ = ("_children", ) 130 def __init__(self, type, children): 131 Node.__init__(self, type) 132 self._children = children 133 134 def __repr__(self): 135 return "Nonterminal(type=%s, children=%r)" % (self.type, self._children) 136 137 def get_child(self, i): 138 return self._children[i] 139 140 def num_children(self): 141 return len(self._children) 142 143 def append_child(self, child): 144 self._children.append(child) 145 146 147 class Nonterminal1(AbstractNonterminal): 148 __slots__ = ("_child", ) 149 def __init__(self, type, child): 150 Node.__init__(self, type) 151 self._child = child 152 153 def __repr__(self): 154 return "Nonterminal(type=%s, children=[%r])" % (self.type, self._child) 155 156 def get_child(self, i): 157 assert i == 0 or i == -1 158 return self._child 159 160 def num_children(self): 161 return 1 162 163 def append_child(self, child): 164 assert 0, "should be unreachable" 165 166 167 class NonterminalEnc(Nonterminal1): 168 def __init__(self, type, child, encoding): 169 Nonterminal1.__init__(self, type, child) 170 self.encoding = encoding 171 172 def __repr__(self): 173 return "NonterminalEnc(type=%s, child=%r, encoding=%r)" % (self.type, self._child, self.encoding) 174 175 176 class ParseError(Exception): 177 178 def __init__(self, msg, token_type, value, lineno, column, line, 179 expected=-1): 180 self.msg = msg 181 self.token_type = token_type 182 self.value = value 183 self.lineno = lineno 184 self.column = column 185 self.line = line 186 self.expected = expected 187 188 def __str__(self): 189 return "ParserError(%s, %r)" % (self.token_type, self.value) 190 191 192 class Parser(object): 193 194 def __init__(self, grammar): 195 self.grammar = grammar 196 self.root = None 197 self.stack = None 198 199 def prepare(self, start=-1): 200 """Setup the parser for parsing. 201 202 Takes the starting symbol as an argument. 203 """ 204 if start == -1: 205 start = self.grammar.start 206 self.root = None 207 current_node = Nonterminal(start, []) 208 self.stack = [] 209 self.stack.append((self.grammar.dfas[start - 256], 0, current_node)) 210 211 def add_token(self, token_type, value, lineno, column, line): 212 label_index = self.classify(token_type, value, lineno, column, line) 213 sym_id = 0 # for the annotator 214 while True: 215 dfa, state_index, node = self.stack[-1] 216 states, first = dfa 217 arcs, is_accepting = states[state_index] 218 for i, next_state in arcs: 219 sym_id = self.grammar.labels[i] 220 if label_index == i: 221 # We matched a non-terminal. 222 self.shift(next_state, token_type, value, lineno, column) 223 state = states[next_state] 224 # While the only possible action is to accept, pop nodes off 225 # the stack. 226 while state[1] and not state[0]: 227 self.pop() 228 if not self.stack: 229 # Parsing is done. 230 return True 231 dfa, state_index, node = self.stack[-1] 232 state = dfa[0][state_index] 233 return False 234 elif sym_id >= 256: 235 sub_node_dfa = self.grammar.dfas[sym_id - 256] 236 # Check if this token can start a child node. 237 if label_index in sub_node_dfa[1]: 238 self.push(sub_node_dfa, next_state, sym_id, lineno, 239 column) 240 break 241 else: 242 # We failed to find any arcs to another state, so unless this 243 # state is accepting, it's invalid input. 244 if is_accepting: 245 self.pop() 246 if not self.stack: 247 raise ParseError("too much input", token_type, value, 248 lineno, column, line) 249 else: 250 # If only one possible input would satisfy, attach it to the 251 # error. 252 if len(arcs) == 1: 253 expected = sym_id 254 else: 255 expected = -1 256 raise ParseError("bad input", token_type, value, lineno, 257 column, line, expected) 258 259 def classify(self, token_type, value, lineno, column, line): 260 """Find the label for a token.""" 261 if token_type == self.grammar.KEYWORD_TOKEN: 262 label_index = self.grammar.keyword_ids.get(value, -1) 263 if label_index != -1: 264 return label_index 265 label_index = self.grammar.token_ids.get(token_type, -1) 266 if label_index == -1: 267 raise ParseError("invalid token", token_type, value, lineno, column, 268 line) 269 return label_index 270 271 def shift(self, next_state, token_type, value, lineno, column): 272 """Shift a non-terminal and prepare for the next state.""" 273 dfa, state, node = self.stack[-1] 274 new_node = Terminal(token_type, value, lineno, column) 275 node.append_child(new_node) 276 self.stack[-1] = (dfa, next_state, node) 277 278 def push(self, next_dfa, next_state, node_type, lineno, column): 279 """Push a terminal and adjust the current state.""" 280 dfa, state, node = self.stack[-1] 281 new_node = Nonterminal(node_type, []) 282 self.stack[-1] = (dfa, next_state, node) 283 self.stack.append((next_dfa, 0, new_node)) 284 285 def pop(self): 286 """Pop an entry off the stack and make its node a child of the last.""" 287 dfa, state, node = self.stack.pop() 288 if self.stack: 289 # we are now done with node, so we can store it more efficiently if 290 # it has just one child 291 if node.num_children() == 1: 292 node = Nonterminal1(node.type, node.get_child(0)) 293 self.stack[-1][2].append_child(node) 294 else: 295 self.root = node