Lichen (annotate pyparser/parser.py in c8ba74a474eb)

Lichen

Annotated pyparser/parser.py

775:c8ba74a474eb

2017-03-25

Paul Boddie

Fixed method name.

paul@437	1	"""
paul@437	2	A CPython inspired RPython parser.
paul@437	3	"""
paul@437	4
paul@437	5
paul@437	6	class Grammar(object):
paul@437	7	"""
paul@437	8	Base Grammar object.
paul@437	9
paul@437	10	Pass this to ParserGenerator.build_grammar to fill it with useful values for
paul@437	11	the Parser.
paul@437	12	"""
paul@437	13
paul@437	14	def __init__(self):
paul@437	15	self.symbol_ids = {}
paul@437	16	self.symbol_names = {}
paul@437	17	self.symbol_to_label = {}
paul@437	18	self.keyword_ids = {}
paul@437	19	self.dfas = []
paul@437	20	self.labels = [0]
paul@437	21	self.token_ids = {}
paul@437	22	self.start = -1
paul@437	23
paul@437	24	def shared_copy(self):
paul@437	25	new = self.__class__()
paul@437	26	new.symbol_ids = self.symbol_ids
paul@437	27	new.symbols_names = self.symbol_names
paul@437	28	new.keyword_ids = self.keyword_ids
paul@437	29	new.dfas = self.dfas
paul@437	30	new.labels = self.labels
paul@437	31	new.token_ids = self.token_ids
paul@437	32	return new
paul@437	33
paul@437	34	def _freeze_(self):
paul@437	35	# Remove some attributes not used in parsing.
paul@437	36	try:
paul@437	37	del self.symbol_to_label
paul@437	38	del self.symbol_names
paul@437	39	del self.symbol_ids
paul@437	40	except AttributeError:
paul@437	41	pass
paul@437	42	return True
paul@437	43
paul@437	44
paul@437	45	class Node(object):
paul@437	46
paul@437	47	__slots__ = ("type", )
paul@437	48
paul@437	49	def __init__(self, type):
paul@437	50	self.type = type
paul@437	51
paul@437	52	def __eq__(self, other):
paul@437	53	raise NotImplementedError("abstract base class")
paul@437	54
paul@437	55	def __ne__(self, other):
paul@437	56	return not self == other
paul@437	57
paul@437	58	def get_value(self):
paul@437	59	return None
paul@437	60
paul@437	61	def get_child(self, i):
paul@437	62	raise NotImplementedError("abstract base class")
paul@437	63
paul@437	64	def num_children(self):
paul@437	65	return 0
paul@437	66
paul@437	67	def append_child(self, child):
paul@437	68	raise NotImplementedError("abstract base class")
paul@437	69
paul@437	70	def get_lineno(self):
paul@437	71	raise NotImplementedError("abstract base class")
paul@437	72
paul@437	73	def get_column(self):
paul@437	74	raise NotImplementedError("abstract base class")
paul@437	75
paul@437	76
paul@437	77	class Terminal(Node):
paul@437	78	__slots__ = ("value", "lineno", "column")
paul@437	79	def __init__(self, type, value, lineno, column):
paul@437	80	Node.__init__(self, type)
paul@437	81	self.value = value
paul@437	82	self.lineno = lineno
paul@437	83	self.column = column
paul@437	84
paul@437	85	def __repr__(self):
paul@437	86	return "Terminal(type=%s, value=%r)" % (self.type, self.value)
paul@437	87
paul@437	88	def __eq__(self, other):
paul@437	89	# For tests.
paul@437	90	return (type(self) == type(other) and
paul@437	91	self.type == other.type and
paul@437	92	self.value == other.value)
paul@437	93
paul@437	94	def get_value(self):
paul@437	95	return self.value
paul@437	96
paul@437	97	def get_lineno(self):
paul@437	98	return self.lineno
paul@437	99
paul@437	100	def get_column(self):
paul@437	101	return self.column
paul@437	102
paul@437	103
paul@437	104	class AbstractNonterminal(Node):
paul@437	105	__slots__ = ()
paul@437	106
paul@437	107	def get_lineno(self):
paul@437	108	return self.get_child(0).get_lineno()
paul@437	109
paul@437	110	def get_column(self):
paul@437	111	return self.get_child(0).get_column()
paul@437	112
paul@437	113	def __eq__(self, other):
paul@437	114	# For tests.
paul@437	115	# grumble, annoying
paul@437	116	if not isinstance(other, AbstractNonterminal):
paul@437	117	return False
paul@437	118	if self.type != other.type:
paul@437	119	return False
paul@437	120	if self.num_children() != other.num_children():
paul@437	121	return False
paul@437	122	for i in range(self.num_children()):
paul@437	123	if self.get_child(i) != other.get_child(i):
paul@437	124	return False
paul@437	125	return True
paul@437	126
paul@437	127
paul@437	128	class Nonterminal(AbstractNonterminal):
paul@437	129	__slots__ = ("_children", )
paul@437	130	def __init__(self, type, children):
paul@437	131	Node.__init__(self, type)
paul@437	132	self._children = children
paul@437	133
paul@437	134	def __repr__(self):
paul@437	135	return "Nonterminal(type=%s, children=%r)" % (self.type, self._children)
paul@437	136
paul@437	137	def get_child(self, i):
paul@437	138	return self._children[i]
paul@437	139
paul@437	140	def num_children(self):
paul@437	141	return len(self._children)
paul@437	142
paul@437	143	def append_child(self, child):
paul@437	144	self._children.append(child)
paul@437	145
paul@437	146
paul@437	147	class Nonterminal1(AbstractNonterminal):
paul@437	148	__slots__ = ("_child", )
paul@437	149	def __init__(self, type, child):
paul@437	150	Node.__init__(self, type)
paul@437	151	self._child = child
paul@437	152
paul@437	153	def __repr__(self):
paul@437	154	return "Nonterminal(type=%s, children=[%r])" % (self.type, self._child)
paul@437	155
paul@437	156	def get_child(self, i):
paul@437	157	assert i == 0 or i == -1
paul@437	158	return self._child
paul@437	159
paul@437	160	def num_children(self):
paul@437	161	return 1
paul@437	162
paul@437	163	def append_child(self, child):
paul@437	164	assert 0, "should be unreachable"
paul@437	165
paul@437	166
paul@438	167	class NonterminalEnc(Nonterminal1):
paul@438	168	def __init__(self, type, child, encoding):
paul@438	169	Nonterminal1.__init__(self, type, child)
paul@438	170	self.encoding = encoding
paul@438	171
paul@438	172	def __repr__(self):
paul@438	173	return "NonterminalEnc(type=%s, child=%r, encoding=%r)" % (self.type, self._child, self.encoding)
paul@438	174
paul@437	175
paul@437	176	class ParseError(Exception):
paul@437	177
paul@437	178	def __init__(self, msg, token_type, value, lineno, column, line,
paul@437	179	expected=-1):
paul@437	180	self.msg = msg
paul@437	181	self.token_type = token_type
paul@437	182	self.value = value
paul@437	183	self.lineno = lineno
paul@437	184	self.column = column
paul@437	185	self.line = line
paul@437	186	self.expected = expected
paul@437	187
paul@437	188	def __str__(self):
paul@437	189	return "ParserError(%s, %r)" % (self.token_type, self.value)
paul@437	190
paul@437	191
paul@437	192	class Parser(object):
paul@437	193
paul@437	194	def __init__(self, grammar):
paul@437	195	self.grammar = grammar
paul@437	196	self.root = None
paul@437	197	self.stack = None
paul@437	198
paul@437	199	def prepare(self, start=-1):
paul@437	200	"""Setup the parser for parsing.
paul@437	201
paul@437	202	Takes the starting symbol as an argument.
paul@437	203	"""
paul@437	204	if start == -1:
paul@437	205	start = self.grammar.start
paul@437	206	self.root = None
paul@437	207	current_node = Nonterminal(start, [])
paul@437	208	self.stack = []
paul@437	209	self.stack.append((self.grammar.dfas[start - 256], 0, current_node))
paul@437	210
paul@437	211	def add_token(self, token_type, value, lineno, column, line):
paul@437	212	label_index = self.classify(token_type, value, lineno, column, line)
paul@437	213	sym_id = 0 # for the annotator
paul@437	214	while True:
paul@437	215	dfa, state_index, node = self.stack[-1]
paul@437	216	states, first = dfa
paul@437	217	arcs, is_accepting = states[state_index]
paul@437	218	for i, next_state in arcs:
paul@437	219	sym_id = self.grammar.labels[i]
paul@437	220	if label_index == i:
paul@437	221	# We matched a non-terminal.
paul@437	222	self.shift(next_state, token_type, value, lineno, column)
paul@437	223	state = states[next_state]
paul@437	224	# While the only possible action is to accept, pop nodes off
paul@437	225	# the stack.
paul@437	226	while state[1] and not state[0]:
paul@437	227	self.pop()
paul@437	228	if not self.stack:
paul@437	229	# Parsing is done.
paul@437	230	return True
paul@437	231	dfa, state_index, node = self.stack[-1]
paul@437	232	state = dfa[0][state_index]
paul@437	233	return False
paul@437	234	elif sym_id >= 256:
paul@437	235	sub_node_dfa = self.grammar.dfas[sym_id - 256]
paul@437	236	# Check if this token can start a child node.
paul@437	237	if label_index in sub_node_dfa[1]:
paul@437	238	self.push(sub_node_dfa, next_state, sym_id, lineno,
paul@437	239	column)
paul@437	240	break
paul@437	241	else:
paul@437	242	# We failed to find any arcs to another state, so unless this
paul@437	243	# state is accepting, it's invalid input.
paul@437	244	if is_accepting:
paul@437	245	self.pop()
paul@437	246	if not self.stack:
paul@437	247	raise ParseError("too much input", token_type, value,
paul@437	248	lineno, column, line)
paul@437	249	else:
paul@437	250	# If only one possible input would satisfy, attach it to the
paul@437	251	# error.
paul@437	252	if len(arcs) == 1:
paul@437	253	expected = sym_id
paul@437	254	else:
paul@437	255	expected = -1
paul@437	256	raise ParseError("bad input", token_type, value, lineno,
paul@437	257	column, line, expected)
paul@437	258
paul@437	259	def classify(self, token_type, value, lineno, column, line):
paul@437	260	"""Find the label for a token."""
paul@437	261	if token_type == self.grammar.KEYWORD_TOKEN:
paul@437	262	label_index = self.grammar.keyword_ids.get(value, -1)
paul@437	263	if label_index != -1:
paul@437	264	return label_index
paul@437	265	label_index = self.grammar.token_ids.get(token_type, -1)
paul@437	266	if label_index == -1:
paul@437	267	raise ParseError("invalid token", token_type, value, lineno, column,
paul@437	268	line)
paul@437	269	return label_index
paul@437	270
paul@437	271	def shift(self, next_state, token_type, value, lineno, column):
paul@437	272	"""Shift a non-terminal and prepare for the next state."""
paul@437	273	dfa, state, node = self.stack[-1]
paul@437	274	new_node = Terminal(token_type, value, lineno, column)
paul@437	275	node.append_child(new_node)
paul@437	276	self.stack[-1] = (dfa, next_state, node)
paul@437	277
paul@437	278	def push(self, next_dfa, next_state, node_type, lineno, column):
paul@437	279	"""Push a terminal and adjust the current state."""
paul@437	280	dfa, state, node = self.stack[-1]
paul@437	281	new_node = Nonterminal(node_type, [])
paul@437	282	self.stack[-1] = (dfa, next_state, node)
paul@437	283	self.stack.append((next_dfa, 0, new_node))
paul@437	284
paul@437	285	def pop(self):
paul@437	286	"""Pop an entry off the stack and make its node a child of the last."""
paul@437	287	dfa, state, node = self.stack.pop()
paul@437	288	if self.stack:
paul@437	289	# we are now done with node, so we can store it more efficiently if
paul@437	290	# it has just one child
paul@437	291	if node.num_children() == 1:
paul@437	292	node = Nonterminal1(node.type, node.get_child(0))
paul@437	293	self.stack[-1][2].append_child(node)
paul@437	294	else:
paul@437	295	self.root = node