Lichen

Annotated pyparser/automata.py

580:e703b981b9b1
2017-02-13 Paul Boddie Eliminated redundant struct usage. method-wrapper-for-context
paul@437 1
# ______________________________________________________________________
paul@437 2
"""Module automata
paul@437 3
paul@437 4
THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
paul@437 5
TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists
paul@437 6
instead of a unique nested one)
paul@437 7
paul@437 8
$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $
paul@437 9
"""
paul@437 10
# ______________________________________________________________________
paul@437 11
# Module level definitions
paul@437 12
paul@437 13
# PYPY Modification: removed the EMPTY class as it's not needed here
paul@437 14
paul@437 15
paul@437 16
# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython
paul@437 17
# dicts (see pytokenize.py).  Then DFA.__init__() turns these dicts into
paul@437 18
# more compact strings.
paul@437 19
DEFAULT = object()
paul@437 20
paul@437 21
# PYPY Modification : removed all automata functions (any, maybe,
paul@437 22
#                     newArcPair, etc.)
paul@437 23
paul@437 24
ERROR_STATE = chr(255)
paul@437 25
paul@437 26
class DFA:
paul@437 27
    # ____________________________________________________________
paul@437 28
    def __init__(self, states, accepts, start = 0):
paul@437 29
        """ NOT_RPYTHON """
paul@437 30
        assert len(states) < 255 # no support for huge amounts of states
paul@437 31
        # construct string for looking up state transitions
paul@437 32
        string_states = [] * len(states)
paul@437 33
        # compute maximum
paul@437 34
        maximum = 0
paul@437 35
        for state in states:
paul@437 36
            for key in state:
paul@437 37
                if key == DEFAULT:
paul@437 38
                    continue
paul@437 39
                maximum = max(ord(key), maximum)
paul@437 40
        self.max_char = maximum + 1
paul@437 41
paul@437 42
        defaults = []
paul@437 43
        for i, state in enumerate(states):
paul@437 44
            default = ERROR_STATE
paul@437 45
            if DEFAULT in state:
paul@437 46
                default = chr(state[DEFAULT])
paul@437 47
            defaults.append(default)
paul@437 48
            string_state = [default] * self.max_char
paul@437 49
            for key, value in state.iteritems():
paul@437 50
                if key == DEFAULT:
paul@437 51
                    continue
paul@437 52
                assert len(key) == 1
paul@437 53
                assert ord(key) < self.max_char
paul@437 54
                string_state[ord(key)] = chr(value)
paul@437 55
            string_states.extend(string_state)
paul@437 56
        self.states = "".join(string_states)
paul@437 57
        self.defaults = "".join(defaults)
paul@437 58
        self.accepts = accepts
paul@437 59
        self.start = start
paul@437 60
paul@437 61
    # ____________________________________________________________
paul@437 62
paul@437 63
    def _next_state(self, item, crntState):
paul@437 64
        if ord(item) >= self.max_char:
paul@437 65
            return self.defaults[crntState]
paul@437 66
        else:
paul@437 67
            return self.states[crntState * self.max_char + ord(item)]
paul@437 68
paul@437 69
    def recognize(self, inVec, pos = 0):
paul@437 70
        crntState = self.start
paul@437 71
        lastAccept = False
paul@437 72
        i = pos
paul@437 73
        for i in range(pos, len(inVec)):
paul@437 74
            item = inVec[i]
paul@437 75
            accept = self.accepts[crntState]
paul@437 76
            crntState = self._next_state(item, crntState)
paul@437 77
            if crntState != ERROR_STATE:
paul@437 78
                pass
paul@437 79
            elif accept:
paul@437 80
                return i
paul@437 81
            elif lastAccept:
paul@437 82
                # This is now needed b/c of exception cases where there are
paul@437 83
                # transitions to dead states
paul@437 84
                return i - 1
paul@437 85
            else:
paul@437 86
                return -1
paul@437 87
            crntState = ord(crntState)
paul@437 88
            lastAccept = accept
paul@437 89
        # if self.states[crntState][1]:
paul@437 90
        if self.accepts[crntState]:
paul@437 91
            return i + 1
paul@437 92
        elif lastAccept:
paul@437 93
            return i
paul@437 94
        else:
paul@437 95
            return -1
paul@437 96
paul@437 97
# ______________________________________________________________________
paul@437 98
paul@437 99
class NonGreedyDFA (DFA):
paul@437 100
paul@437 101
    def recognize(self, inVec, pos = 0):
paul@437 102
        crntState = self.start
paul@437 103
        i = pos
paul@437 104
        for i in range(pos, len(inVec)):
paul@437 105
            item = inVec[i]
paul@437 106
            accept = self.accepts[crntState]
paul@437 107
            if accept:
paul@437 108
                return i
paul@437 109
            crntState = self._next_state(item, crntState)
paul@437 110
            if crntState == ERROR_STATE:
paul@437 111
                return -1
paul@437 112
            crntState = ord(crntState)
paul@437 113
            i += 1
paul@437 114
        if self.accepts[crntState]:
paul@437 115
            return i
paul@437 116
        else:
paul@437 117
            return -1
paul@437 118
paul@437 119
# ______________________________________________________________________
paul@437 120
# End of automata.py