Lichen

pyparser/test/test_parser.py

940:6ddce984649b
2021-10-30 Paul Boddie Fixed expected result in comment.
     1 # New parser tests.     2 import py     3 import tokenize     4 import StringIO     5 from pyparser import parser, metaparser, pygram     6 from pyparser.test.test_metaparser import MyGrammar     7 from pyparser.pygram import tokens as token     8      9     10 class SimpleParser(parser.Parser):    11     12     def parse(self, input):    13         self.prepare()    14         rl = StringIO.StringIO(input + "\n").readline    15         gen = tokenize.generate_tokens(rl)    16         for tp, value, begin, end, line in gen:    17             if self.add_token(tp, value, begin[0], begin[1], line):    18                 py.test.raises(StopIteration, gen.next)    19         return self.root    20     21     22 def tree_from_string(expected, gram):    23     def count_indent(s):    24         indent = 0    25         for char in s:    26             if char != " ":    27                 break    28             indent += 1    29         return indent    30     last_newline_index = 0    31     for i, char in enumerate(expected):    32         if char == "\n":    33             last_newline_index = i    34         elif char != " ":    35             break    36     if last_newline_index:    37         expected = expected[last_newline_index + 1:]    38     base_indent = count_indent(expected)    39     assert not divmod(base_indent, 4)[1], "not using 4 space indentation"    40     lines = [line[base_indent:] for line in expected.splitlines()]    41     last_indent = 0    42     node_stack = []    43     for line in lines:    44         if not line.strip():    45             continue    46         data = line.split()    47         if data[0].isupper():    48             tp = token.get(data[0])    49             if len(data) == 2:    50                 value = data[1].strip("\"")    51             elif tp == token["NEWLINE"]:    52                 value = "\n"    53             else:    54                 value = ""    55             n = parser.Terminal(tp, value, 0, 0)    56         else:    57             tp = gram.symbol_ids[data[0]]    58             children = []    59             n = parser.Nonterminal(tp, children)    60         new_indent = count_indent(line)    61         if new_indent >= last_indent:    62             if new_indent == last_indent and node_stack:    63                 node_stack.pop()    64             if node_stack:    65                 node_stack[-1].append_child(n)    66             node_stack.append(n)    67         else:    68             diff = last_indent - new_indent    69             pop_nodes = diff // 4 + 1    70             del node_stack[-pop_nodes:]    71             node_stack[-1].append_child(n)    72             node_stack.append(n)    73         last_indent = new_indent    74     return node_stack[0]    75     76     77 class TestParser:    78     79     def parser_for(self, gram, add_endmarker=True):    80         if add_endmarker:    81             gram += " NEWLINE ENDMARKER\n"    82         pgen = metaparser.ParserGenerator(gram)    83         g = pgen.build_grammar(MyGrammar)    84         return SimpleParser(g), g    85     86     def test_multiple_rules(self):    87         gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER    88 bar: NAME NUMBER\n"""    89         p, gram = self.parser_for(gram, False)    90         expected = """    91         foo    92             NAME "next_rule"    93             bar    94                 NAME "a_name"    95                 NUMBER "42"    96             NAME "end"    97             NEWLINE    98             ENDMARKER"""    99         input = "next_rule a_name 42 end"   100         assert tree_from_string(expected, gram) == p.parse(input)   101    102     def test_recursive_rule(self):   103         gram = """foo: NAME bar STRING NEWLINE ENDMARKER   104 bar: NAME [bar] NUMBER\n"""   105         p, gram = self.parser_for(gram, False)   106         expected = """   107         foo   108             NAME "hi"   109             bar   110                 NAME "hello"   111                 bar   112                     NAME "a_name"   113                     NUMBER "32"   114                 NUMBER "42"   115             STRING "'string'"   116             NEWLINE   117             ENDMARKER"""   118         input = "hi hello a_name 32 42 'string'"   119         assert tree_from_string(expected, gram) == p.parse(input)   120    121     def test_symbol(self):   122         gram = """parent: first_child second_child NEWLINE ENDMARKER   123 first_child: NAME age   124 second_child: STRING   125 age: NUMBER\n"""   126         p, gram = self.parser_for(gram, False)   127         expected = """   128         parent   129             first_child   130                 NAME "harry"   131                 age   132                      NUMBER "13"   133             second_child   134                 STRING "'fred'"   135             NEWLINE   136             ENDMARKER"""   137         input = "harry 13 'fred'"   138         assert tree_from_string(expected, gram) == p.parse(input)   139    140     def test_token(self):   141         p, gram = self.parser_for("foo: NAME")   142         expected = """   143         foo   144            NAME "hi"   145            NEWLINE   146            ENDMARKER"""   147         assert tree_from_string(expected, gram) == p.parse("hi")   148         py.test.raises(parser.ParseError, p.parse, "567")   149         p, gram = self.parser_for("foo: NUMBER NAME STRING")   150         expected = """   151         foo   152            NUMBER "42"   153            NAME "hi"   154            STRING "'bar'"   155            NEWLINE   156            ENDMARKER"""   157         assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'")   158    159     def test_optional(self):   160         p, gram = self.parser_for("foo: [NAME] 'end'")   161         expected = """   162         foo   163             NAME "hi"   164             NAME "end"   165             NEWLINE   166             ENDMARKER"""   167         assert tree_from_string(expected, gram) == p.parse("hi end")   168         expected = """   169         foo   170             NAME "end"   171             NEWLINE   172             ENDMARKER"""   173         assert tree_from_string(expected, gram) == p.parse("end")   174    175     def test_grouping(self):   176         p, gram = self.parser_for(   177             "foo: ((NUMBER NAME | STRING) | 'second_option')")   178         expected = """   179         foo   180             NUMBER "42"   181             NAME "hi"   182             NEWLINE   183             ENDMARKER"""   184         assert tree_from_string(expected, gram) == p.parse("42 hi")   185         expected = """   186         foo   187             STRING "'hi'"   188             NEWLINE   189             ENDMARKER"""   190         assert tree_from_string(expected, gram) == p.parse("'hi'")   191         expected = """   192         foo   193             NAME "second_option"   194             NEWLINE   195             ENDMARKER"""   196         assert tree_from_string(expected, gram) == p.parse("second_option")   197         py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'")   198         py.test.raises(parser.ParseError, p.parse, "42 second_option")   199    200     def test_alternative(self):   201         p, gram = self.parser_for("foo: (NAME | NUMBER)")   202         expected = """   203         foo   204             NAME "hi"   205             NEWLINE   206             ENDMARKER"""   207         assert tree_from_string(expected, gram) == p.parse("hi")   208         expected = """   209         foo   210             NUMBER "42"   211             NEWLINE   212             ENDMARKER"""   213         assert tree_from_string(expected, gram) == p.parse("42")   214         py.test.raises(parser.ParseError, p.parse, "hi 23")   215         py.test.raises(parser.ParseError, p.parse, "23 hi")   216         py.test.raises(parser.ParseError, p.parse, "'some string'")   217    218     def test_keyword(self):   219         p, gram = self.parser_for("foo: 'key'")   220         expected = """   221         foo   222             NAME "key"   223             NEWLINE   224             ENDMARKER"""   225         assert tree_from_string(expected, gram) == p.parse("key")   226         py.test.raises(parser.ParseError, p.parse, "")   227         p, gram = self.parser_for("foo: NAME 'key'")   228         expected = """   229         foo   230             NAME "some_name"   231             NAME "key"   232             NEWLINE   233             ENDMARKER"""   234         assert tree_from_string(expected, gram) == p.parse("some_name key")   235         py.test.raises(parser.ParseError, p.parse, "some_name")   236    237     def test_repeaters(self):   238         p, gram = self.parser_for("foo: NAME+ 'end'")   239         expected = """   240         foo   241             NAME "hi"   242             NAME "bye"   243             NAME "nothing"   244             NAME "end"   245             NEWLINE   246             ENDMARKER"""   247         assert tree_from_string(expected, gram) == p.parse("hi bye nothing end")   248         py.test.raises(parser.ParseError, p.parse, "end")   249         py.test.raises(parser.ParseError, p.parse, "hi bye")   250         p, gram = self.parser_for("foo: NAME* 'end'")   251         expected = """   252         foo   253             NAME "hi"   254             NAME "bye"   255             NAME "end"   256             NEWLINE   257             ENDMARKER"""   258         assert tree_from_string(expected, gram) == p.parse("hi bye end")   259         py.test.raises(parser.ParseError, p.parse, "hi bye")   260         expected = """   261         foo   262             NAME "end"   263             NEWLINE   264             ENDMARKER"""   265         assert tree_from_string(expected, gram) == p.parse("end")   266    267         p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'")   268         expected = """   269         foo   270             NAME "a_name"   271             NAME "name_two"   272             NAME "end"   273             NEWLINE   274             ENDMARKER"""   275         assert tree_from_string(expected, gram) == p.parse("a_name name_two end")   276         expected = """   277         foo   278             NUMBER "42"   279             NAME "name"   280             NAME "end"   281             NEWLINE   282             ENDMARKER"""   283         assert tree_from_string(expected, gram) == p.parse("42 name end")   284         py.test.raises(parser.ParseError, p.parse, "end")   285         p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'")   286         expected = """   287         foo   288             NAME "hi"   289             NUMBER 42   290             NAME "end"   291             NEWLINE   292             ENDMARKER"""   293         assert tree_from_string(expected, gram) == p.parse("hi 42 end")