1 # New parser tests. 2 import py 3 import tokenize 4 import StringIO 5 from pyparser import parser, metaparser, pygram 6 from pyparser.test.test_metaparser import MyGrammar 7 from pyparser.pygram import tokens as token 8 9 10 class SimpleParser(parser.Parser): 11 12 def parse(self, input): 13 self.prepare() 14 rl = StringIO.StringIO(input + "\n").readline 15 gen = tokenize.generate_tokens(rl) 16 for tp, value, begin, end, line in gen: 17 if self.add_token(tp, value, begin[0], begin[1], line): 18 py.test.raises(StopIteration, gen.next) 19 return self.root 20 21 22 def tree_from_string(expected, gram): 23 def count_indent(s): 24 indent = 0 25 for char in s: 26 if char != " ": 27 break 28 indent += 1 29 return indent 30 last_newline_index = 0 31 for i, char in enumerate(expected): 32 if char == "\n": 33 last_newline_index = i 34 elif char != " ": 35 break 36 if last_newline_index: 37 expected = expected[last_newline_index + 1:] 38 base_indent = count_indent(expected) 39 assert not divmod(base_indent, 4)[1], "not using 4 space indentation" 40 lines = [line[base_indent:] for line in expected.splitlines()] 41 last_indent = 0 42 node_stack = [] 43 for line in lines: 44 if not line.strip(): 45 continue 46 data = line.split() 47 if data[0].isupper(): 48 tp = token.get(data[0]) 49 if len(data) == 2: 50 value = data[1].strip("\"") 51 elif tp == token["NEWLINE"]: 52 value = "\n" 53 else: 54 value = "" 55 n = parser.Terminal(tp, value, 0, 0) 56 else: 57 tp = gram.symbol_ids[data[0]] 58 children = [] 59 n = parser.Nonterminal(tp, children) 60 new_indent = count_indent(line) 61 if new_indent >= last_indent: 62 if new_indent == last_indent and node_stack: 63 node_stack.pop() 64 if node_stack: 65 node_stack[-1].append_child(n) 66 node_stack.append(n) 67 else: 68 diff = last_indent - new_indent 69 pop_nodes = diff // 4 + 1 70 del node_stack[-pop_nodes:] 71 node_stack[-1].append_child(n) 72 node_stack.append(n) 73 last_indent = new_indent 74 return node_stack[0] 75 76 77 class TestParser: 78 79 def parser_for(self, gram, add_endmarker=True): 80 if add_endmarker: 81 gram += " NEWLINE ENDMARKER\n" 82 pgen = metaparser.ParserGenerator(gram) 83 g = pgen.build_grammar(MyGrammar) 84 return SimpleParser(g), g 85 86 def test_multiple_rules(self): 87 gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER 88 bar: NAME NUMBER\n""" 89 p, gram = self.parser_for(gram, False) 90 expected = """ 91 foo 92 NAME "next_rule" 93 bar 94 NAME "a_name" 95 NUMBER "42" 96 NAME "end" 97 NEWLINE 98 ENDMARKER""" 99 input = "next_rule a_name 42 end" 100 assert tree_from_string(expected, gram) == p.parse(input) 101 102 def test_recursive_rule(self): 103 gram = """foo: NAME bar STRING NEWLINE ENDMARKER 104 bar: NAME [bar] NUMBER\n""" 105 p, gram = self.parser_for(gram, False) 106 expected = """ 107 foo 108 NAME "hi" 109 bar 110 NAME "hello" 111 bar 112 NAME "a_name" 113 NUMBER "32" 114 NUMBER "42" 115 STRING "'string'" 116 NEWLINE 117 ENDMARKER""" 118 input = "hi hello a_name 32 42 'string'" 119 assert tree_from_string(expected, gram) == p.parse(input) 120 121 def test_symbol(self): 122 gram = """parent: first_child second_child NEWLINE ENDMARKER 123 first_child: NAME age 124 second_child: STRING 125 age: NUMBER\n""" 126 p, gram = self.parser_for(gram, False) 127 expected = """ 128 parent 129 first_child 130 NAME "harry" 131 age 132 NUMBER "13" 133 second_child 134 STRING "'fred'" 135 NEWLINE 136 ENDMARKER""" 137 input = "harry 13 'fred'" 138 assert tree_from_string(expected, gram) == p.parse(input) 139 140 def test_token(self): 141 p, gram = self.parser_for("foo: NAME") 142 expected = """ 143 foo 144 NAME "hi" 145 NEWLINE 146 ENDMARKER""" 147 assert tree_from_string(expected, gram) == p.parse("hi") 148 py.test.raises(parser.ParseError, p.parse, "567") 149 p, gram = self.parser_for("foo: NUMBER NAME STRING") 150 expected = """ 151 foo 152 NUMBER "42" 153 NAME "hi" 154 STRING "'bar'" 155 NEWLINE 156 ENDMARKER""" 157 assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'") 158 159 def test_optional(self): 160 p, gram = self.parser_for("foo: [NAME] 'end'") 161 expected = """ 162 foo 163 NAME "hi" 164 NAME "end" 165 NEWLINE 166 ENDMARKER""" 167 assert tree_from_string(expected, gram) == p.parse("hi end") 168 expected = """ 169 foo 170 NAME "end" 171 NEWLINE 172 ENDMARKER""" 173 assert tree_from_string(expected, gram) == p.parse("end") 174 175 def test_grouping(self): 176 p, gram = self.parser_for( 177 "foo: ((NUMBER NAME | STRING) | 'second_option')") 178 expected = """ 179 foo 180 NUMBER "42" 181 NAME "hi" 182 NEWLINE 183 ENDMARKER""" 184 assert tree_from_string(expected, gram) == p.parse("42 hi") 185 expected = """ 186 foo 187 STRING "'hi'" 188 NEWLINE 189 ENDMARKER""" 190 assert tree_from_string(expected, gram) == p.parse("'hi'") 191 expected = """ 192 foo 193 NAME "second_option" 194 NEWLINE 195 ENDMARKER""" 196 assert tree_from_string(expected, gram) == p.parse("second_option") 197 py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'") 198 py.test.raises(parser.ParseError, p.parse, "42 second_option") 199 200 def test_alternative(self): 201 p, gram = self.parser_for("foo: (NAME | NUMBER)") 202 expected = """ 203 foo 204 NAME "hi" 205 NEWLINE 206 ENDMARKER""" 207 assert tree_from_string(expected, gram) == p.parse("hi") 208 expected = """ 209 foo 210 NUMBER "42" 211 NEWLINE 212 ENDMARKER""" 213 assert tree_from_string(expected, gram) == p.parse("42") 214 py.test.raises(parser.ParseError, p.parse, "hi 23") 215 py.test.raises(parser.ParseError, p.parse, "23 hi") 216 py.test.raises(parser.ParseError, p.parse, "'some string'") 217 218 def test_keyword(self): 219 p, gram = self.parser_for("foo: 'key'") 220 expected = """ 221 foo 222 NAME "key" 223 NEWLINE 224 ENDMARKER""" 225 assert tree_from_string(expected, gram) == p.parse("key") 226 py.test.raises(parser.ParseError, p.parse, "") 227 p, gram = self.parser_for("foo: NAME 'key'") 228 expected = """ 229 foo 230 NAME "some_name" 231 NAME "key" 232 NEWLINE 233 ENDMARKER""" 234 assert tree_from_string(expected, gram) == p.parse("some_name key") 235 py.test.raises(parser.ParseError, p.parse, "some_name") 236 237 def test_repeaters(self): 238 p, gram = self.parser_for("foo: NAME+ 'end'") 239 expected = """ 240 foo 241 NAME "hi" 242 NAME "bye" 243 NAME "nothing" 244 NAME "end" 245 NEWLINE 246 ENDMARKER""" 247 assert tree_from_string(expected, gram) == p.parse("hi bye nothing end") 248 py.test.raises(parser.ParseError, p.parse, "end") 249 py.test.raises(parser.ParseError, p.parse, "hi bye") 250 p, gram = self.parser_for("foo: NAME* 'end'") 251 expected = """ 252 foo 253 NAME "hi" 254 NAME "bye" 255 NAME "end" 256 NEWLINE 257 ENDMARKER""" 258 assert tree_from_string(expected, gram) == p.parse("hi bye end") 259 py.test.raises(parser.ParseError, p.parse, "hi bye") 260 expected = """ 261 foo 262 NAME "end" 263 NEWLINE 264 ENDMARKER""" 265 assert tree_from_string(expected, gram) == p.parse("end") 266 267 p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'") 268 expected = """ 269 foo 270 NAME "a_name" 271 NAME "name_two" 272 NAME "end" 273 NEWLINE 274 ENDMARKER""" 275 assert tree_from_string(expected, gram) == p.parse("a_name name_two end") 276 expected = """ 277 foo 278 NUMBER "42" 279 NAME "name" 280 NAME "end" 281 NEWLINE 282 ENDMARKER""" 283 assert tree_from_string(expected, gram) == p.parse("42 name end") 284 py.test.raises(parser.ParseError, p.parse, "end") 285 p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'") 286 expected = """ 287 foo 288 NAME "hi" 289 NUMBER 42 290 NAME "end" 291 NEWLINE 292 ENDMARKER""" 293 assert tree_from_string(expected, gram) == p.parse("hi 42 end")