1 # ______________________________________________________________________ 2 """Module automata 3 4 THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED 5 TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists 6 instead of a unique nested one) 7 8 $Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $ 9 """ 10 # ______________________________________________________________________ 11 # Module level definitions 12 13 # PYPY Modification: removed the EMPTY class as it's not needed here 14 15 16 # PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython 17 # dicts (see pytokenize.py). Then DFA.__init__() turns these dicts into 18 # more compact strings. 19 DEFAULT = object() 20 21 # PYPY Modification : removed all automata functions (any, maybe, 22 # newArcPair, etc.) 23 24 ERROR_STATE = chr(255) 25 26 class DFA: 27 # ____________________________________________________________ 28 def __init__(self, states, accepts, start = 0): 29 """ NOT_RPYTHON """ 30 assert len(states) < 255 # no support for huge amounts of states 31 # construct string for looking up state transitions 32 string_states = [] * len(states) 33 # compute maximum 34 maximum = 0 35 for state in states: 36 for key in state: 37 if key == DEFAULT: 38 continue 39 maximum = max(ord(key), maximum) 40 self.max_char = maximum + 1 41 42 defaults = [] 43 for i, state in enumerate(states): 44 default = ERROR_STATE 45 if DEFAULT in state: 46 default = chr(state[DEFAULT]) 47 defaults.append(default) 48 string_state = [default] * self.max_char 49 for key, value in state.iteritems(): 50 if key == DEFAULT: 51 continue 52 assert len(key) == 1 53 assert ord(key) < self.max_char 54 string_state[ord(key)] = chr(value) 55 string_states.extend(string_state) 56 self.states = "".join(string_states) 57 self.defaults = "".join(defaults) 58 self.accepts = accepts 59 self.start = start 60 61 # ____________________________________________________________ 62 63 def _next_state(self, item, crntState): 64 if ord(item) >= self.max_char: 65 return self.defaults[crntState] 66 else: 67 return self.states[crntState * self.max_char + ord(item)] 68 69 def recognize(self, inVec, pos = 0): 70 crntState = self.start 71 lastAccept = False 72 i = pos 73 for i in range(pos, len(inVec)): 74 item = inVec[i] 75 accept = self.accepts[crntState] 76 crntState = self._next_state(item, crntState) 77 if crntState != ERROR_STATE: 78 pass 79 elif accept: 80 return i 81 elif lastAccept: 82 # This is now needed b/c of exception cases where there are 83 # transitions to dead states 84 return i - 1 85 else: 86 return -1 87 crntState = ord(crntState) 88 lastAccept = accept 89 # if self.states[crntState][1]: 90 if self.accepts[crntState]: 91 return i + 1 92 elif lastAccept: 93 return i 94 else: 95 return -1 96 97 # ______________________________________________________________________ 98 99 class NonGreedyDFA (DFA): 100 101 def recognize(self, inVec, pos = 0): 102 crntState = self.start 103 i = pos 104 for i in range(pos, len(inVec)): 105 item = inVec[i] 106 accept = self.accepts[crntState] 107 if accept: 108 return i 109 crntState = self._next_state(item, crntState) 110 if crntState == ERROR_STATE: 111 return -1 112 crntState = ord(crntState) 113 i += 1 114 if self.accepts[crntState]: 115 return i 116 else: 117 return -1 118 119 # ______________________________________________________________________ 120 # End of automata.py