Lichen

pyparser/automata.py

659:4f77c6b2fc68
2017-03-05 Paul Boddie Fixed concatenated list size initialisation.
     1 # ______________________________________________________________________     2 """Module automata     3      4 THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED     5 TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists     6 instead of a unique nested one)     7      8 $Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $     9 """    10 # ______________________________________________________________________    11 # Module level definitions    12     13 # PYPY Modification: removed the EMPTY class as it's not needed here    14     15     16 # PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython    17 # dicts (see pytokenize.py).  Then DFA.__init__() turns these dicts into    18 # more compact strings.    19 DEFAULT = object()    20     21 # PYPY Modification : removed all automata functions (any, maybe,    22 #                     newArcPair, etc.)    23     24 ERROR_STATE = chr(255)    25     26 class DFA:    27     # ____________________________________________________________    28     def __init__(self, states, accepts, start = 0):    29         """ NOT_RPYTHON """    30         assert len(states) < 255 # no support for huge amounts of states    31         # construct string for looking up state transitions    32         string_states = [] * len(states)    33         # compute maximum    34         maximum = 0    35         for state in states:    36             for key in state:    37                 if key == DEFAULT:    38                     continue    39                 maximum = max(ord(key), maximum)    40         self.max_char = maximum + 1    41     42         defaults = []    43         for i, state in enumerate(states):    44             default = ERROR_STATE    45             if DEFAULT in state:    46                 default = chr(state[DEFAULT])    47             defaults.append(default)    48             string_state = [default] * self.max_char    49             for key, value in state.iteritems():    50                 if key == DEFAULT:    51                     continue    52                 assert len(key) == 1    53                 assert ord(key) < self.max_char    54                 string_state[ord(key)] = chr(value)    55             string_states.extend(string_state)    56         self.states = "".join(string_states)    57         self.defaults = "".join(defaults)    58         self.accepts = accepts    59         self.start = start    60     61     # ____________________________________________________________    62     63     def _next_state(self, item, crntState):    64         if ord(item) >= self.max_char:    65             return self.defaults[crntState]    66         else:    67             return self.states[crntState * self.max_char + ord(item)]    68     69     def recognize(self, inVec, pos = 0):    70         crntState = self.start    71         lastAccept = False    72         i = pos    73         for i in range(pos, len(inVec)):    74             item = inVec[i]    75             accept = self.accepts[crntState]    76             crntState = self._next_state(item, crntState)    77             if crntState != ERROR_STATE:    78                 pass    79             elif accept:    80                 return i    81             elif lastAccept:    82                 # This is now needed b/c of exception cases where there are    83                 # transitions to dead states    84                 return i - 1    85             else:    86                 return -1    87             crntState = ord(crntState)    88             lastAccept = accept    89         # if self.states[crntState][1]:    90         if self.accepts[crntState]:    91             return i + 1    92         elif lastAccept:    93             return i    94         else:    95             return -1    96     97 # ______________________________________________________________________    98     99 class NonGreedyDFA (DFA):   100    101     def recognize(self, inVec, pos = 0):   102         crntState = self.start   103         i = pos   104         for i in range(pos, len(inVec)):   105             item = inVec[i]   106             accept = self.accepts[crntState]   107             if accept:   108                 return i   109             crntState = self._next_state(item, crntState)   110             if crntState == ERROR_STATE:   111                 return -1   112             crntState = ord(crntState)   113             i += 1   114         if self.accepts[crntState]:   115             return i   116         else:   117             return -1   118    119 # ______________________________________________________________________   120 # End of automata.py