paul@437 | 1 | # New parser tests. |
paul@437 | 2 | import py |
paul@437 | 3 | import tokenize |
paul@437 | 4 | import StringIO |
paul@437 | 5 | from pyparser import parser, metaparser, pygram |
paul@437 | 6 | from pyparser.test.test_metaparser import MyGrammar |
paul@439 | 7 | from pyparser.pygram import tokens as token |
paul@437 | 8 | |
paul@437 | 9 | |
paul@437 | 10 | class SimpleParser(parser.Parser): |
paul@437 | 11 | |
paul@437 | 12 | def parse(self, input): |
paul@437 | 13 | self.prepare() |
paul@437 | 14 | rl = StringIO.StringIO(input + "\n").readline |
paul@437 | 15 | gen = tokenize.generate_tokens(rl) |
paul@437 | 16 | for tp, value, begin, end, line in gen: |
paul@437 | 17 | if self.add_token(tp, value, begin[0], begin[1], line): |
paul@437 | 18 | py.test.raises(StopIteration, gen.next) |
paul@437 | 19 | return self.root |
paul@437 | 20 | |
paul@437 | 21 | |
paul@437 | 22 | def tree_from_string(expected, gram): |
paul@437 | 23 | def count_indent(s): |
paul@437 | 24 | indent = 0 |
paul@437 | 25 | for char in s: |
paul@437 | 26 | if char != " ": |
paul@437 | 27 | break |
paul@437 | 28 | indent += 1 |
paul@437 | 29 | return indent |
paul@437 | 30 | last_newline_index = 0 |
paul@437 | 31 | for i, char in enumerate(expected): |
paul@437 | 32 | if char == "\n": |
paul@437 | 33 | last_newline_index = i |
paul@437 | 34 | elif char != " ": |
paul@437 | 35 | break |
paul@437 | 36 | if last_newline_index: |
paul@437 | 37 | expected = expected[last_newline_index + 1:] |
paul@437 | 38 | base_indent = count_indent(expected) |
paul@437 | 39 | assert not divmod(base_indent, 4)[1], "not using 4 space indentation" |
paul@437 | 40 | lines = [line[base_indent:] for line in expected.splitlines()] |
paul@437 | 41 | last_indent = 0 |
paul@437 | 42 | node_stack = [] |
paul@437 | 43 | for line in lines: |
paul@437 | 44 | if not line.strip(): |
paul@437 | 45 | continue |
paul@437 | 46 | data = line.split() |
paul@437 | 47 | if data[0].isupper(): |
paul@439 | 48 | tp = token.get(data[0]) |
paul@437 | 49 | if len(data) == 2: |
paul@437 | 50 | value = data[1].strip("\"") |
paul@439 | 51 | elif tp == token["NEWLINE"]: |
paul@437 | 52 | value = "\n" |
paul@437 | 53 | else: |
paul@437 | 54 | value = "" |
paul@437 | 55 | n = parser.Terminal(tp, value, 0, 0) |
paul@437 | 56 | else: |
paul@437 | 57 | tp = gram.symbol_ids[data[0]] |
paul@437 | 58 | children = [] |
paul@437 | 59 | n = parser.Nonterminal(tp, children) |
paul@437 | 60 | new_indent = count_indent(line) |
paul@437 | 61 | if new_indent >= last_indent: |
paul@437 | 62 | if new_indent == last_indent and node_stack: |
paul@437 | 63 | node_stack.pop() |
paul@437 | 64 | if node_stack: |
paul@437 | 65 | node_stack[-1].append_child(n) |
paul@437 | 66 | node_stack.append(n) |
paul@437 | 67 | else: |
paul@437 | 68 | diff = last_indent - new_indent |
paul@437 | 69 | pop_nodes = diff // 4 + 1 |
paul@437 | 70 | del node_stack[-pop_nodes:] |
paul@437 | 71 | node_stack[-1].append_child(n) |
paul@437 | 72 | node_stack.append(n) |
paul@437 | 73 | last_indent = new_indent |
paul@437 | 74 | return node_stack[0] |
paul@437 | 75 | |
paul@437 | 76 | |
paul@437 | 77 | class TestParser: |
paul@437 | 78 | |
paul@437 | 79 | def parser_for(self, gram, add_endmarker=True): |
paul@437 | 80 | if add_endmarker: |
paul@437 | 81 | gram += " NEWLINE ENDMARKER\n" |
paul@437 | 82 | pgen = metaparser.ParserGenerator(gram) |
paul@437 | 83 | g = pgen.build_grammar(MyGrammar) |
paul@437 | 84 | return SimpleParser(g), g |
paul@437 | 85 | |
paul@437 | 86 | def test_multiple_rules(self): |
paul@437 | 87 | gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER |
paul@437 | 88 | bar: NAME NUMBER\n""" |
paul@437 | 89 | p, gram = self.parser_for(gram, False) |
paul@437 | 90 | expected = """ |
paul@437 | 91 | foo |
paul@437 | 92 | NAME "next_rule" |
paul@437 | 93 | bar |
paul@437 | 94 | NAME "a_name" |
paul@437 | 95 | NUMBER "42" |
paul@437 | 96 | NAME "end" |
paul@437 | 97 | NEWLINE |
paul@437 | 98 | ENDMARKER""" |
paul@437 | 99 | input = "next_rule a_name 42 end" |
paul@437 | 100 | assert tree_from_string(expected, gram) == p.parse(input) |
paul@437 | 101 | |
paul@437 | 102 | def test_recursive_rule(self): |
paul@437 | 103 | gram = """foo: NAME bar STRING NEWLINE ENDMARKER |
paul@437 | 104 | bar: NAME [bar] NUMBER\n""" |
paul@437 | 105 | p, gram = self.parser_for(gram, False) |
paul@437 | 106 | expected = """ |
paul@437 | 107 | foo |
paul@437 | 108 | NAME "hi" |
paul@437 | 109 | bar |
paul@437 | 110 | NAME "hello" |
paul@437 | 111 | bar |
paul@437 | 112 | NAME "a_name" |
paul@437 | 113 | NUMBER "32" |
paul@437 | 114 | NUMBER "42" |
paul@437 | 115 | STRING "'string'" |
paul@437 | 116 | NEWLINE |
paul@437 | 117 | ENDMARKER""" |
paul@437 | 118 | input = "hi hello a_name 32 42 'string'" |
paul@437 | 119 | assert tree_from_string(expected, gram) == p.parse(input) |
paul@437 | 120 | |
paul@437 | 121 | def test_symbol(self): |
paul@437 | 122 | gram = """parent: first_child second_child NEWLINE ENDMARKER |
paul@437 | 123 | first_child: NAME age |
paul@437 | 124 | second_child: STRING |
paul@437 | 125 | age: NUMBER\n""" |
paul@437 | 126 | p, gram = self.parser_for(gram, False) |
paul@437 | 127 | expected = """ |
paul@437 | 128 | parent |
paul@437 | 129 | first_child |
paul@437 | 130 | NAME "harry" |
paul@437 | 131 | age |
paul@437 | 132 | NUMBER "13" |
paul@437 | 133 | second_child |
paul@437 | 134 | STRING "'fred'" |
paul@437 | 135 | NEWLINE |
paul@437 | 136 | ENDMARKER""" |
paul@437 | 137 | input = "harry 13 'fred'" |
paul@437 | 138 | assert tree_from_string(expected, gram) == p.parse(input) |
paul@437 | 139 | |
paul@437 | 140 | def test_token(self): |
paul@437 | 141 | p, gram = self.parser_for("foo: NAME") |
paul@437 | 142 | expected = """ |
paul@437 | 143 | foo |
paul@437 | 144 | NAME "hi" |
paul@437 | 145 | NEWLINE |
paul@437 | 146 | ENDMARKER""" |
paul@437 | 147 | assert tree_from_string(expected, gram) == p.parse("hi") |
paul@437 | 148 | py.test.raises(parser.ParseError, p.parse, "567") |
paul@437 | 149 | p, gram = self.parser_for("foo: NUMBER NAME STRING") |
paul@437 | 150 | expected = """ |
paul@437 | 151 | foo |
paul@437 | 152 | NUMBER "42" |
paul@437 | 153 | NAME "hi" |
paul@437 | 154 | STRING "'bar'" |
paul@437 | 155 | NEWLINE |
paul@437 | 156 | ENDMARKER""" |
paul@437 | 157 | assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'") |
paul@437 | 158 | |
paul@437 | 159 | def test_optional(self): |
paul@437 | 160 | p, gram = self.parser_for("foo: [NAME] 'end'") |
paul@437 | 161 | expected = """ |
paul@437 | 162 | foo |
paul@437 | 163 | NAME "hi" |
paul@437 | 164 | NAME "end" |
paul@437 | 165 | NEWLINE |
paul@437 | 166 | ENDMARKER""" |
paul@437 | 167 | assert tree_from_string(expected, gram) == p.parse("hi end") |
paul@437 | 168 | expected = """ |
paul@437 | 169 | foo |
paul@437 | 170 | NAME "end" |
paul@437 | 171 | NEWLINE |
paul@437 | 172 | ENDMARKER""" |
paul@437 | 173 | assert tree_from_string(expected, gram) == p.parse("end") |
paul@437 | 174 | |
paul@437 | 175 | def test_grouping(self): |
paul@437 | 176 | p, gram = self.parser_for( |
paul@437 | 177 | "foo: ((NUMBER NAME | STRING) | 'second_option')") |
paul@437 | 178 | expected = """ |
paul@437 | 179 | foo |
paul@437 | 180 | NUMBER "42" |
paul@437 | 181 | NAME "hi" |
paul@437 | 182 | NEWLINE |
paul@437 | 183 | ENDMARKER""" |
paul@437 | 184 | assert tree_from_string(expected, gram) == p.parse("42 hi") |
paul@437 | 185 | expected = """ |
paul@437 | 186 | foo |
paul@437 | 187 | STRING "'hi'" |
paul@437 | 188 | NEWLINE |
paul@437 | 189 | ENDMARKER""" |
paul@437 | 190 | assert tree_from_string(expected, gram) == p.parse("'hi'") |
paul@437 | 191 | expected = """ |
paul@437 | 192 | foo |
paul@437 | 193 | NAME "second_option" |
paul@437 | 194 | NEWLINE |
paul@437 | 195 | ENDMARKER""" |
paul@437 | 196 | assert tree_from_string(expected, gram) == p.parse("second_option") |
paul@437 | 197 | py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'") |
paul@437 | 198 | py.test.raises(parser.ParseError, p.parse, "42 second_option") |
paul@437 | 199 | |
paul@437 | 200 | def test_alternative(self): |
paul@437 | 201 | p, gram = self.parser_for("foo: (NAME | NUMBER)") |
paul@437 | 202 | expected = """ |
paul@437 | 203 | foo |
paul@437 | 204 | NAME "hi" |
paul@437 | 205 | NEWLINE |
paul@437 | 206 | ENDMARKER""" |
paul@437 | 207 | assert tree_from_string(expected, gram) == p.parse("hi") |
paul@437 | 208 | expected = """ |
paul@437 | 209 | foo |
paul@437 | 210 | NUMBER "42" |
paul@437 | 211 | NEWLINE |
paul@437 | 212 | ENDMARKER""" |
paul@437 | 213 | assert tree_from_string(expected, gram) == p.parse("42") |
paul@437 | 214 | py.test.raises(parser.ParseError, p.parse, "hi 23") |
paul@437 | 215 | py.test.raises(parser.ParseError, p.parse, "23 hi") |
paul@437 | 216 | py.test.raises(parser.ParseError, p.parse, "'some string'") |
paul@437 | 217 | |
paul@437 | 218 | def test_keyword(self): |
paul@437 | 219 | p, gram = self.parser_for("foo: 'key'") |
paul@437 | 220 | expected = """ |
paul@437 | 221 | foo |
paul@437 | 222 | NAME "key" |
paul@437 | 223 | NEWLINE |
paul@437 | 224 | ENDMARKER""" |
paul@437 | 225 | assert tree_from_string(expected, gram) == p.parse("key") |
paul@437 | 226 | py.test.raises(parser.ParseError, p.parse, "") |
paul@437 | 227 | p, gram = self.parser_for("foo: NAME 'key'") |
paul@437 | 228 | expected = """ |
paul@437 | 229 | foo |
paul@437 | 230 | NAME "some_name" |
paul@437 | 231 | NAME "key" |
paul@437 | 232 | NEWLINE |
paul@437 | 233 | ENDMARKER""" |
paul@437 | 234 | assert tree_from_string(expected, gram) == p.parse("some_name key") |
paul@437 | 235 | py.test.raises(parser.ParseError, p.parse, "some_name") |
paul@437 | 236 | |
paul@437 | 237 | def test_repeaters(self): |
paul@437 | 238 | p, gram = self.parser_for("foo: NAME+ 'end'") |
paul@437 | 239 | expected = """ |
paul@437 | 240 | foo |
paul@437 | 241 | NAME "hi" |
paul@437 | 242 | NAME "bye" |
paul@437 | 243 | NAME "nothing" |
paul@437 | 244 | NAME "end" |
paul@437 | 245 | NEWLINE |
paul@437 | 246 | ENDMARKER""" |
paul@437 | 247 | assert tree_from_string(expected, gram) == p.parse("hi bye nothing end") |
paul@437 | 248 | py.test.raises(parser.ParseError, p.parse, "end") |
paul@437 | 249 | py.test.raises(parser.ParseError, p.parse, "hi bye") |
paul@437 | 250 | p, gram = self.parser_for("foo: NAME* 'end'") |
paul@437 | 251 | expected = """ |
paul@437 | 252 | foo |
paul@437 | 253 | NAME "hi" |
paul@437 | 254 | NAME "bye" |
paul@437 | 255 | NAME "end" |
paul@437 | 256 | NEWLINE |
paul@437 | 257 | ENDMARKER""" |
paul@437 | 258 | assert tree_from_string(expected, gram) == p.parse("hi bye end") |
paul@437 | 259 | py.test.raises(parser.ParseError, p.parse, "hi bye") |
paul@437 | 260 | expected = """ |
paul@437 | 261 | foo |
paul@437 | 262 | NAME "end" |
paul@437 | 263 | NEWLINE |
paul@437 | 264 | ENDMARKER""" |
paul@437 | 265 | assert tree_from_string(expected, gram) == p.parse("end") |
paul@437 | 266 | |
paul@437 | 267 | p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'") |
paul@437 | 268 | expected = """ |
paul@437 | 269 | foo |
paul@437 | 270 | NAME "a_name" |
paul@437 | 271 | NAME "name_two" |
paul@437 | 272 | NAME "end" |
paul@437 | 273 | NEWLINE |
paul@437 | 274 | ENDMARKER""" |
paul@437 | 275 | assert tree_from_string(expected, gram) == p.parse("a_name name_two end") |
paul@437 | 276 | expected = """ |
paul@437 | 277 | foo |
paul@437 | 278 | NUMBER "42" |
paul@437 | 279 | NAME "name" |
paul@437 | 280 | NAME "end" |
paul@437 | 281 | NEWLINE |
paul@437 | 282 | ENDMARKER""" |
paul@437 | 283 | assert tree_from_string(expected, gram) == p.parse("42 name end") |
paul@437 | 284 | py.test.raises(parser.ParseError, p.parse, "end") |
paul@437 | 285 | p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'") |
paul@437 | 286 | expected = """ |
paul@437 | 287 | foo |
paul@437 | 288 | NAME "hi" |
paul@437 | 289 | NUMBER 42 |
paul@437 | 290 | NAME "end" |
paul@437 | 291 | NEWLINE |
paul@437 | 292 | ENDMARKER""" |
paul@437 | 293 | assert tree_from_string(expected, gram) == p.parse("hi 42 end") |