Lichen

Annotated pyparser/test/test_parser.py

858:8f8361de472a
2018-07-21 Paul Boddie Invocations in parameter lists may require temporary storage for contexts and accessors, but the storage locations must be different. Otherwise, the code will generate a sequence-point warning. Here, distinct ranges for parameters in lists are introduced, and accessors are now also stored in arrays, permitting distinct storage.
paul@437 1
# New parser tests.
paul@437 2
import py
paul@437 3
import tokenize
paul@437 4
import StringIO
paul@437 5
from pyparser import parser, metaparser, pygram
paul@437 6
from pyparser.test.test_metaparser import MyGrammar
paul@439 7
from pyparser.pygram import tokens as token
paul@437 8
paul@437 9
paul@437 10
class SimpleParser(parser.Parser):
paul@437 11
paul@437 12
    def parse(self, input):
paul@437 13
        self.prepare()
paul@437 14
        rl = StringIO.StringIO(input + "\n").readline
paul@437 15
        gen = tokenize.generate_tokens(rl)
paul@437 16
        for tp, value, begin, end, line in gen:
paul@437 17
            if self.add_token(tp, value, begin[0], begin[1], line):
paul@437 18
                py.test.raises(StopIteration, gen.next)
paul@437 19
        return self.root
paul@437 20
paul@437 21
paul@437 22
def tree_from_string(expected, gram):
paul@437 23
    def count_indent(s):
paul@437 24
        indent = 0
paul@437 25
        for char in s:
paul@437 26
            if char != " ":
paul@437 27
                break
paul@437 28
            indent += 1
paul@437 29
        return indent
paul@437 30
    last_newline_index = 0
paul@437 31
    for i, char in enumerate(expected):
paul@437 32
        if char == "\n":
paul@437 33
            last_newline_index = i
paul@437 34
        elif char != " ":
paul@437 35
            break
paul@437 36
    if last_newline_index:
paul@437 37
        expected = expected[last_newline_index + 1:]
paul@437 38
    base_indent = count_indent(expected)
paul@437 39
    assert not divmod(base_indent, 4)[1], "not using 4 space indentation"
paul@437 40
    lines = [line[base_indent:] for line in expected.splitlines()]
paul@437 41
    last_indent = 0
paul@437 42
    node_stack = []
paul@437 43
    for line in lines:
paul@437 44
        if not line.strip():
paul@437 45
            continue
paul@437 46
        data = line.split()
paul@437 47
        if data[0].isupper():
paul@439 48
            tp = token.get(data[0])
paul@437 49
            if len(data) == 2:
paul@437 50
                value = data[1].strip("\"")
paul@439 51
            elif tp == token["NEWLINE"]:
paul@437 52
                value = "\n"
paul@437 53
            else:
paul@437 54
                value = ""
paul@437 55
            n = parser.Terminal(tp, value, 0, 0)
paul@437 56
        else:
paul@437 57
            tp = gram.symbol_ids[data[0]]
paul@437 58
            children = []
paul@437 59
            n = parser.Nonterminal(tp, children)
paul@437 60
        new_indent = count_indent(line)
paul@437 61
        if new_indent >= last_indent:
paul@437 62
            if new_indent == last_indent and node_stack:
paul@437 63
                node_stack.pop()
paul@437 64
            if node_stack:
paul@437 65
                node_stack[-1].append_child(n)
paul@437 66
            node_stack.append(n)
paul@437 67
        else:
paul@437 68
            diff = last_indent - new_indent
paul@437 69
            pop_nodes = diff // 4 + 1
paul@437 70
            del node_stack[-pop_nodes:]
paul@437 71
            node_stack[-1].append_child(n)
paul@437 72
            node_stack.append(n)
paul@437 73
        last_indent = new_indent
paul@437 74
    return node_stack[0]
paul@437 75
paul@437 76
paul@437 77
class TestParser:
paul@437 78
paul@437 79
    def parser_for(self, gram, add_endmarker=True):
paul@437 80
        if add_endmarker:
paul@437 81
            gram += " NEWLINE ENDMARKER\n"
paul@437 82
        pgen = metaparser.ParserGenerator(gram)
paul@437 83
        g = pgen.build_grammar(MyGrammar)
paul@437 84
        return SimpleParser(g), g
paul@437 85
paul@437 86
    def test_multiple_rules(self):
paul@437 87
        gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER
paul@437 88
bar: NAME NUMBER\n"""
paul@437 89
        p, gram = self.parser_for(gram, False)
paul@437 90
        expected = """
paul@437 91
        foo
paul@437 92
            NAME "next_rule"
paul@437 93
            bar
paul@437 94
                NAME "a_name"
paul@437 95
                NUMBER "42"
paul@437 96
            NAME "end"
paul@437 97
            NEWLINE
paul@437 98
            ENDMARKER"""
paul@437 99
        input = "next_rule a_name 42 end"
paul@437 100
        assert tree_from_string(expected, gram) == p.parse(input)
paul@437 101
paul@437 102
    def test_recursive_rule(self):
paul@437 103
        gram = """foo: NAME bar STRING NEWLINE ENDMARKER
paul@437 104
bar: NAME [bar] NUMBER\n"""
paul@437 105
        p, gram = self.parser_for(gram, False)
paul@437 106
        expected = """
paul@437 107
        foo
paul@437 108
            NAME "hi"
paul@437 109
            bar
paul@437 110
                NAME "hello"
paul@437 111
                bar
paul@437 112
                    NAME "a_name"
paul@437 113
                    NUMBER "32"
paul@437 114
                NUMBER "42"
paul@437 115
            STRING "'string'"
paul@437 116
            NEWLINE
paul@437 117
            ENDMARKER"""
paul@437 118
        input = "hi hello a_name 32 42 'string'"
paul@437 119
        assert tree_from_string(expected, gram) == p.parse(input)
paul@437 120
paul@437 121
    def test_symbol(self):
paul@437 122
        gram = """parent: first_child second_child NEWLINE ENDMARKER
paul@437 123
first_child: NAME age
paul@437 124
second_child: STRING
paul@437 125
age: NUMBER\n"""
paul@437 126
        p, gram = self.parser_for(gram, False)
paul@437 127
        expected = """
paul@437 128
        parent
paul@437 129
            first_child
paul@437 130
                NAME "harry"
paul@437 131
                age
paul@437 132
                     NUMBER "13"
paul@437 133
            second_child
paul@437 134
                STRING "'fred'"
paul@437 135
            NEWLINE
paul@437 136
            ENDMARKER"""
paul@437 137
        input = "harry 13 'fred'"
paul@437 138
        assert tree_from_string(expected, gram) == p.parse(input)
paul@437 139
paul@437 140
    def test_token(self):
paul@437 141
        p, gram = self.parser_for("foo: NAME")
paul@437 142
        expected = """
paul@437 143
        foo
paul@437 144
           NAME "hi"
paul@437 145
           NEWLINE
paul@437 146
           ENDMARKER"""
paul@437 147
        assert tree_from_string(expected, gram) == p.parse("hi")
paul@437 148
        py.test.raises(parser.ParseError, p.parse, "567")
paul@437 149
        p, gram = self.parser_for("foo: NUMBER NAME STRING")
paul@437 150
        expected = """
paul@437 151
        foo
paul@437 152
           NUMBER "42"
paul@437 153
           NAME "hi"
paul@437 154
           STRING "'bar'"
paul@437 155
           NEWLINE
paul@437 156
           ENDMARKER"""
paul@437 157
        assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'")
paul@437 158
paul@437 159
    def test_optional(self):
paul@437 160
        p, gram = self.parser_for("foo: [NAME] 'end'")
paul@437 161
        expected = """
paul@437 162
        foo
paul@437 163
            NAME "hi"
paul@437 164
            NAME "end"
paul@437 165
            NEWLINE
paul@437 166
            ENDMARKER"""
paul@437 167
        assert tree_from_string(expected, gram) == p.parse("hi end")
paul@437 168
        expected = """
paul@437 169
        foo
paul@437 170
            NAME "end"
paul@437 171
            NEWLINE
paul@437 172
            ENDMARKER"""
paul@437 173
        assert tree_from_string(expected, gram) == p.parse("end")
paul@437 174
paul@437 175
    def test_grouping(self):
paul@437 176
        p, gram = self.parser_for(
paul@437 177
            "foo: ((NUMBER NAME | STRING) | 'second_option')")
paul@437 178
        expected = """
paul@437 179
        foo
paul@437 180
            NUMBER "42"
paul@437 181
            NAME "hi"
paul@437 182
            NEWLINE
paul@437 183
            ENDMARKER"""
paul@437 184
        assert tree_from_string(expected, gram) == p.parse("42 hi")
paul@437 185
        expected = """
paul@437 186
        foo
paul@437 187
            STRING "'hi'"
paul@437 188
            NEWLINE
paul@437 189
            ENDMARKER"""
paul@437 190
        assert tree_from_string(expected, gram) == p.parse("'hi'")
paul@437 191
        expected = """
paul@437 192
        foo
paul@437 193
            NAME "second_option"
paul@437 194
            NEWLINE
paul@437 195
            ENDMARKER"""
paul@437 196
        assert tree_from_string(expected, gram) == p.parse("second_option")
paul@437 197
        py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'")
paul@437 198
        py.test.raises(parser.ParseError, p.parse, "42 second_option")
paul@437 199
paul@437 200
    def test_alternative(self):
paul@437 201
        p, gram = self.parser_for("foo: (NAME | NUMBER)")
paul@437 202
        expected = """
paul@437 203
        foo
paul@437 204
            NAME "hi"
paul@437 205
            NEWLINE
paul@437 206
            ENDMARKER"""
paul@437 207
        assert tree_from_string(expected, gram) == p.parse("hi")
paul@437 208
        expected = """
paul@437 209
        foo
paul@437 210
            NUMBER "42"
paul@437 211
            NEWLINE
paul@437 212
            ENDMARKER"""
paul@437 213
        assert tree_from_string(expected, gram) == p.parse("42")
paul@437 214
        py.test.raises(parser.ParseError, p.parse, "hi 23")
paul@437 215
        py.test.raises(parser.ParseError, p.parse, "23 hi")
paul@437 216
        py.test.raises(parser.ParseError, p.parse, "'some string'")
paul@437 217
paul@437 218
    def test_keyword(self):
paul@437 219
        p, gram = self.parser_for("foo: 'key'")
paul@437 220
        expected = """
paul@437 221
        foo
paul@437 222
            NAME "key"
paul@437 223
            NEWLINE
paul@437 224
            ENDMARKER"""
paul@437 225
        assert tree_from_string(expected, gram) == p.parse("key")
paul@437 226
        py.test.raises(parser.ParseError, p.parse, "")
paul@437 227
        p, gram = self.parser_for("foo: NAME 'key'")
paul@437 228
        expected = """
paul@437 229
        foo
paul@437 230
            NAME "some_name"
paul@437 231
            NAME "key"
paul@437 232
            NEWLINE
paul@437 233
            ENDMARKER"""
paul@437 234
        assert tree_from_string(expected, gram) == p.parse("some_name key")
paul@437 235
        py.test.raises(parser.ParseError, p.parse, "some_name")
paul@437 236
paul@437 237
    def test_repeaters(self):
paul@437 238
        p, gram = self.parser_for("foo: NAME+ 'end'")
paul@437 239
        expected = """
paul@437 240
        foo
paul@437 241
            NAME "hi"
paul@437 242
            NAME "bye"
paul@437 243
            NAME "nothing"
paul@437 244
            NAME "end"
paul@437 245
            NEWLINE
paul@437 246
            ENDMARKER"""
paul@437 247
        assert tree_from_string(expected, gram) == p.parse("hi bye nothing end")
paul@437 248
        py.test.raises(parser.ParseError, p.parse, "end")
paul@437 249
        py.test.raises(parser.ParseError, p.parse, "hi bye")
paul@437 250
        p, gram = self.parser_for("foo: NAME* 'end'")
paul@437 251
        expected = """
paul@437 252
        foo
paul@437 253
            NAME "hi"
paul@437 254
            NAME "bye"
paul@437 255
            NAME "end"
paul@437 256
            NEWLINE
paul@437 257
            ENDMARKER"""
paul@437 258
        assert tree_from_string(expected, gram) == p.parse("hi bye end")
paul@437 259
        py.test.raises(parser.ParseError, p.parse, "hi bye")
paul@437 260
        expected = """
paul@437 261
        foo
paul@437 262
            NAME "end"
paul@437 263
            NEWLINE
paul@437 264
            ENDMARKER"""
paul@437 265
        assert tree_from_string(expected, gram) == p.parse("end")
paul@437 266
paul@437 267
        p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'")
paul@437 268
        expected = """
paul@437 269
        foo
paul@437 270
            NAME "a_name"
paul@437 271
            NAME "name_two"
paul@437 272
            NAME "end"
paul@437 273
            NEWLINE
paul@437 274
            ENDMARKER"""
paul@437 275
        assert tree_from_string(expected, gram) == p.parse("a_name name_two end")
paul@437 276
        expected = """
paul@437 277
        foo
paul@437 278
            NUMBER "42"
paul@437 279
            NAME "name"
paul@437 280
            NAME "end"
paul@437 281
            NEWLINE
paul@437 282
            ENDMARKER"""
paul@437 283
        assert tree_from_string(expected, gram) == p.parse("42 name end")
paul@437 284
        py.test.raises(parser.ParseError, p.parse, "end")
paul@437 285
        p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'")
paul@437 286
        expected = """
paul@437 287
        foo
paul@437 288
            NAME "hi"
paul@437 289
            NUMBER 42
paul@437 290
            NAME "end"
paul@437 291
            NEWLINE
paul@437 292
            ENDMARKER"""
paul@437 293
        assert tree_from_string(expected, gram) == p.parse("hi 42 end")