Lichen

Annotated pyparser/test/test_parser.py

828:fd110815f7f9
2018-06-24 Paul Boddie Changed target handling in the invocation code to eliminate superfluous temporary storage for targets and to treat context and target storage separately. Changed access instruction generation to expose the accessor test instructions more readily to the translator, with the error conversion being introduced by the translator. This allows the presence of accessor tests to be considered by the translator and for such tests to be included in generated code where they might otherwise be optimised away. Fixed detection of context temporary variables. Simplified the tracking of target and context temporary usage.
paul@437 1
# New parser tests.
paul@437 2
import py
paul@437 3
import tokenize
paul@437 4
import StringIO
paul@437 5
from pyparser import parser, metaparser, pygram
paul@437 6
from pyparser.test.test_metaparser import MyGrammar
paul@439 7
from pyparser.pygram import tokens as token
paul@437 8
paul@437 9
paul@437 10
class SimpleParser(parser.Parser):
paul@437 11
paul@437 12
    def parse(self, input):
paul@437 13
        self.prepare()
paul@437 14
        rl = StringIO.StringIO(input + "\n").readline
paul@437 15
        gen = tokenize.generate_tokens(rl)
paul@437 16
        for tp, value, begin, end, line in gen:
paul@437 17
            if self.add_token(tp, value, begin[0], begin[1], line):
paul@437 18
                py.test.raises(StopIteration, gen.next)
paul@437 19
        return self.root
paul@437 20
paul@437 21
paul@437 22
def tree_from_string(expected, gram):
paul@437 23
    def count_indent(s):
paul@437 24
        indent = 0
paul@437 25
        for char in s:
paul@437 26
            if char != " ":
paul@437 27
                break
paul@437 28
            indent += 1
paul@437 29
        return indent
paul@437 30
    last_newline_index = 0
paul@437 31
    for i, char in enumerate(expected):
paul@437 32
        if char == "\n":
paul@437 33
            last_newline_index = i
paul@437 34
        elif char != " ":
paul@437 35
            break
paul@437 36
    if last_newline_index:
paul@437 37
        expected = expected[last_newline_index + 1:]
paul@437 38
    base_indent = count_indent(expected)
paul@437 39
    assert not divmod(base_indent, 4)[1], "not using 4 space indentation"
paul@437 40
    lines = [line[base_indent:] for line in expected.splitlines()]
paul@437 41
    last_indent = 0
paul@437 42
    node_stack = []
paul@437 43
    for line in lines:
paul@437 44
        if not line.strip():
paul@437 45
            continue
paul@437 46
        data = line.split()
paul@437 47
        if data[0].isupper():
paul@439 48
            tp = token.get(data[0])
paul@437 49
            if len(data) == 2:
paul@437 50
                value = data[1].strip("\"")
paul@439 51
            elif tp == token["NEWLINE"]:
paul@437 52
                value = "\n"
paul@437 53
            else:
paul@437 54
                value = ""
paul@437 55
            n = parser.Terminal(tp, value, 0, 0)
paul@437 56
        else:
paul@437 57
            tp = gram.symbol_ids[data[0]]
paul@437 58
            children = []
paul@437 59
            n = parser.Nonterminal(tp, children)
paul@437 60
        new_indent = count_indent(line)
paul@437 61
        if new_indent >= last_indent:
paul@437 62
            if new_indent == last_indent and node_stack:
paul@437 63
                node_stack.pop()
paul@437 64
            if node_stack:
paul@437 65
                node_stack[-1].append_child(n)
paul@437 66
            node_stack.append(n)
paul@437 67
        else:
paul@437 68
            diff = last_indent - new_indent
paul@437 69
            pop_nodes = diff // 4 + 1
paul@437 70
            del node_stack[-pop_nodes:]
paul@437 71
            node_stack[-1].append_child(n)
paul@437 72
            node_stack.append(n)
paul@437 73
        last_indent = new_indent
paul@437 74
    return node_stack[0]
paul@437 75
paul@437 76
paul@437 77
class TestParser:
paul@437 78
paul@437 79
    def parser_for(self, gram, add_endmarker=True):
paul@437 80
        if add_endmarker:
paul@437 81
            gram += " NEWLINE ENDMARKER\n"
paul@437 82
        pgen = metaparser.ParserGenerator(gram)
paul@437 83
        g = pgen.build_grammar(MyGrammar)
paul@437 84
        return SimpleParser(g), g
paul@437 85
paul@437 86
    def test_multiple_rules(self):
paul@437 87
        gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER
paul@437 88
bar: NAME NUMBER\n"""
paul@437 89
        p, gram = self.parser_for(gram, False)
paul@437 90
        expected = """
paul@437 91
        foo
paul@437 92
            NAME "next_rule"
paul@437 93
            bar
paul@437 94
                NAME "a_name"
paul@437 95
                NUMBER "42"
paul@437 96
            NAME "end"
paul@437 97
            NEWLINE
paul@437 98
            ENDMARKER"""
paul@437 99
        input = "next_rule a_name 42 end"
paul@437 100
        assert tree_from_string(expected, gram) == p.parse(input)
paul@437 101
paul@437 102
    def test_recursive_rule(self):
paul@437 103
        gram = """foo: NAME bar STRING NEWLINE ENDMARKER
paul@437 104
bar: NAME [bar] NUMBER\n"""
paul@437 105
        p, gram = self.parser_for(gram, False)
paul@437 106
        expected = """
paul@437 107
        foo
paul@437 108
            NAME "hi"
paul@437 109
            bar
paul@437 110
                NAME "hello"
paul@437 111
                bar
paul@437 112
                    NAME "a_name"
paul@437 113
                    NUMBER "32"
paul@437 114
                NUMBER "42"
paul@437 115
            STRING "'string'"
paul@437 116
            NEWLINE
paul@437 117
            ENDMARKER"""
paul@437 118
        input = "hi hello a_name 32 42 'string'"
paul@437 119
        assert tree_from_string(expected, gram) == p.parse(input)
paul@437 120
paul@437 121
    def test_symbol(self):
paul@437 122
        gram = """parent: first_child second_child NEWLINE ENDMARKER
paul@437 123
first_child: NAME age
paul@437 124
second_child: STRING
paul@437 125
age: NUMBER\n"""
paul@437 126
        p, gram = self.parser_for(gram, False)
paul@437 127
        expected = """
paul@437 128
        parent
paul@437 129
            first_child
paul@437 130
                NAME "harry"
paul@437 131
                age
paul@437 132
                     NUMBER "13"
paul@437 133
            second_child
paul@437 134
                STRING "'fred'"
paul@437 135
            NEWLINE
paul@437 136
            ENDMARKER"""
paul@437 137
        input = "harry 13 'fred'"
paul@437 138
        assert tree_from_string(expected, gram) == p.parse(input)
paul@437 139
paul@437 140
    def test_token(self):
paul@437 141
        p, gram = self.parser_for("foo: NAME")
paul@437 142
        expected = """
paul@437 143
        foo
paul@437 144
           NAME "hi"
paul@437 145
           NEWLINE
paul@437 146
           ENDMARKER"""
paul@437 147
        assert tree_from_string(expected, gram) == p.parse("hi")
paul@437 148
        py.test.raises(parser.ParseError, p.parse, "567")
paul@437 149
        p, gram = self.parser_for("foo: NUMBER NAME STRING")
paul@437 150
        expected = """
paul@437 151
        foo
paul@437 152
           NUMBER "42"
paul@437 153
           NAME "hi"
paul@437 154
           STRING "'bar'"
paul@437 155
           NEWLINE
paul@437 156
           ENDMARKER"""
paul@437 157
        assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'")
paul@437 158
paul@437 159
    def test_optional(self):
paul@437 160
        p, gram = self.parser_for("foo: [NAME] 'end'")
paul@437 161
        expected = """
paul@437 162
        foo
paul@437 163
            NAME "hi"
paul@437 164
            NAME "end"
paul@437 165
            NEWLINE
paul@437 166
            ENDMARKER"""
paul@437 167
        assert tree_from_string(expected, gram) == p.parse("hi end")
paul@437 168
        expected = """
paul@437 169
        foo
paul@437 170
            NAME "end"
paul@437 171
            NEWLINE
paul@437 172
            ENDMARKER"""
paul@437 173
        assert tree_from_string(expected, gram) == p.parse("end")
paul@437 174
paul@437 175
    def test_grouping(self):
paul@437 176
        p, gram = self.parser_for(
paul@437 177
            "foo: ((NUMBER NAME | STRING) | 'second_option')")
paul@437 178
        expected = """
paul@437 179
        foo
paul@437 180
            NUMBER "42"
paul@437 181
            NAME "hi"
paul@437 182
            NEWLINE
paul@437 183
            ENDMARKER"""
paul@437 184
        assert tree_from_string(expected, gram) == p.parse("42 hi")
paul@437 185
        expected = """
paul@437 186
        foo
paul@437 187
            STRING "'hi'"
paul@437 188
            NEWLINE
paul@437 189
            ENDMARKER"""
paul@437 190
        assert tree_from_string(expected, gram) == p.parse("'hi'")
paul@437 191
        expected = """
paul@437 192
        foo
paul@437 193
            NAME "second_option"
paul@437 194
            NEWLINE
paul@437 195
            ENDMARKER"""
paul@437 196
        assert tree_from_string(expected, gram) == p.parse("second_option")
paul@437 197
        py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'")
paul@437 198
        py.test.raises(parser.ParseError, p.parse, "42 second_option")
paul@437 199
paul@437 200
    def test_alternative(self):
paul@437 201
        p, gram = self.parser_for("foo: (NAME | NUMBER)")
paul@437 202
        expected = """
paul@437 203
        foo
paul@437 204
            NAME "hi"
paul@437 205
            NEWLINE
paul@437 206
            ENDMARKER"""
paul@437 207
        assert tree_from_string(expected, gram) == p.parse("hi")
paul@437 208
        expected = """
paul@437 209
        foo
paul@437 210
            NUMBER "42"
paul@437 211
            NEWLINE
paul@437 212
            ENDMARKER"""
paul@437 213
        assert tree_from_string(expected, gram) == p.parse("42")
paul@437 214
        py.test.raises(parser.ParseError, p.parse, "hi 23")
paul@437 215
        py.test.raises(parser.ParseError, p.parse, "23 hi")
paul@437 216
        py.test.raises(parser.ParseError, p.parse, "'some string'")
paul@437 217
paul@437 218
    def test_keyword(self):
paul@437 219
        p, gram = self.parser_for("foo: 'key'")
paul@437 220
        expected = """
paul@437 221
        foo
paul@437 222
            NAME "key"
paul@437 223
            NEWLINE
paul@437 224
            ENDMARKER"""
paul@437 225
        assert tree_from_string(expected, gram) == p.parse("key")
paul@437 226
        py.test.raises(parser.ParseError, p.parse, "")
paul@437 227
        p, gram = self.parser_for("foo: NAME 'key'")
paul@437 228
        expected = """
paul@437 229
        foo
paul@437 230
            NAME "some_name"
paul@437 231
            NAME "key"
paul@437 232
            NEWLINE
paul@437 233
            ENDMARKER"""
paul@437 234
        assert tree_from_string(expected, gram) == p.parse("some_name key")
paul@437 235
        py.test.raises(parser.ParseError, p.parse, "some_name")
paul@437 236
paul@437 237
    def test_repeaters(self):
paul@437 238
        p, gram = self.parser_for("foo: NAME+ 'end'")
paul@437 239
        expected = """
paul@437 240
        foo
paul@437 241
            NAME "hi"
paul@437 242
            NAME "bye"
paul@437 243
            NAME "nothing"
paul@437 244
            NAME "end"
paul@437 245
            NEWLINE
paul@437 246
            ENDMARKER"""
paul@437 247
        assert tree_from_string(expected, gram) == p.parse("hi bye nothing end")
paul@437 248
        py.test.raises(parser.ParseError, p.parse, "end")
paul@437 249
        py.test.raises(parser.ParseError, p.parse, "hi bye")
paul@437 250
        p, gram = self.parser_for("foo: NAME* 'end'")
paul@437 251
        expected = """
paul@437 252
        foo
paul@437 253
            NAME "hi"
paul@437 254
            NAME "bye"
paul@437 255
            NAME "end"
paul@437 256
            NEWLINE
paul@437 257
            ENDMARKER"""
paul@437 258
        assert tree_from_string(expected, gram) == p.parse("hi bye end")
paul@437 259
        py.test.raises(parser.ParseError, p.parse, "hi bye")
paul@437 260
        expected = """
paul@437 261
        foo
paul@437 262
            NAME "end"
paul@437 263
            NEWLINE
paul@437 264
            ENDMARKER"""
paul@437 265
        assert tree_from_string(expected, gram) == p.parse("end")
paul@437 266
paul@437 267
        p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'")
paul@437 268
        expected = """
paul@437 269
        foo
paul@437 270
            NAME "a_name"
paul@437 271
            NAME "name_two"
paul@437 272
            NAME "end"
paul@437 273
            NEWLINE
paul@437 274
            ENDMARKER"""
paul@437 275
        assert tree_from_string(expected, gram) == p.parse("a_name name_two end")
paul@437 276
        expected = """
paul@437 277
        foo
paul@437 278
            NUMBER "42"
paul@437 279
            NAME "name"
paul@437 280
            NAME "end"
paul@437 281
            NEWLINE
paul@437 282
            ENDMARKER"""
paul@437 283
        assert tree_from_string(expected, gram) == p.parse("42 name end")
paul@437 284
        py.test.raises(parser.ParseError, p.parse, "end")
paul@437 285
        p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'")
paul@437 286
        expected = """
paul@437 287
        foo
paul@437 288
            NAME "hi"
paul@437 289
            NUMBER 42
paul@437 290
            NAME "end"
paul@437 291
            NEWLINE
paul@437 292
            ENDMARKER"""
paul@437 293
        assert tree_from_string(expected, gram) == p.parse("hi 42 end")