paul@437 | 1 | # ______________________________________________________________________ |
paul@437 | 2 | """Module automata |
paul@437 | 3 | |
paul@437 | 4 | THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED |
paul@437 | 5 | TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists |
paul@437 | 6 | instead of a unique nested one) |
paul@437 | 7 | |
paul@437 | 8 | $Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $ |
paul@437 | 9 | """ |
paul@437 | 10 | # ______________________________________________________________________ |
paul@437 | 11 | # Module level definitions |
paul@437 | 12 | |
paul@437 | 13 | # PYPY Modification: removed the EMPTY class as it's not needed here |
paul@437 | 14 | |
paul@437 | 15 | |
paul@437 | 16 | # PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython |
paul@437 | 17 | # dicts (see pytokenize.py). Then DFA.__init__() turns these dicts into |
paul@437 | 18 | # more compact strings. |
paul@437 | 19 | DEFAULT = object() |
paul@437 | 20 | |
paul@437 | 21 | # PYPY Modification : removed all automata functions (any, maybe, |
paul@437 | 22 | # newArcPair, etc.) |
paul@437 | 23 | |
paul@437 | 24 | ERROR_STATE = chr(255) |
paul@437 | 25 | |
paul@437 | 26 | class DFA: |
paul@437 | 27 | # ____________________________________________________________ |
paul@437 | 28 | def __init__(self, states, accepts, start = 0): |
paul@437 | 29 | """ NOT_RPYTHON """ |
paul@437 | 30 | assert len(states) < 255 # no support for huge amounts of states |
paul@437 | 31 | # construct string for looking up state transitions |
paul@437 | 32 | string_states = [] * len(states) |
paul@437 | 33 | # compute maximum |
paul@437 | 34 | maximum = 0 |
paul@437 | 35 | for state in states: |
paul@437 | 36 | for key in state: |
paul@437 | 37 | if key == DEFAULT: |
paul@437 | 38 | continue |
paul@437 | 39 | maximum = max(ord(key), maximum) |
paul@437 | 40 | self.max_char = maximum + 1 |
paul@437 | 41 | |
paul@437 | 42 | defaults = [] |
paul@437 | 43 | for i, state in enumerate(states): |
paul@437 | 44 | default = ERROR_STATE |
paul@437 | 45 | if DEFAULT in state: |
paul@437 | 46 | default = chr(state[DEFAULT]) |
paul@437 | 47 | defaults.append(default) |
paul@437 | 48 | string_state = [default] * self.max_char |
paul@437 | 49 | for key, value in state.iteritems(): |
paul@437 | 50 | if key == DEFAULT: |
paul@437 | 51 | continue |
paul@437 | 52 | assert len(key) == 1 |
paul@437 | 53 | assert ord(key) < self.max_char |
paul@437 | 54 | string_state[ord(key)] = chr(value) |
paul@437 | 55 | string_states.extend(string_state) |
paul@437 | 56 | self.states = "".join(string_states) |
paul@437 | 57 | self.defaults = "".join(defaults) |
paul@437 | 58 | self.accepts = accepts |
paul@437 | 59 | self.start = start |
paul@437 | 60 | |
paul@437 | 61 | # ____________________________________________________________ |
paul@437 | 62 | |
paul@437 | 63 | def _next_state(self, item, crntState): |
paul@437 | 64 | if ord(item) >= self.max_char: |
paul@437 | 65 | return self.defaults[crntState] |
paul@437 | 66 | else: |
paul@437 | 67 | return self.states[crntState * self.max_char + ord(item)] |
paul@437 | 68 | |
paul@437 | 69 | def recognize(self, inVec, pos = 0): |
paul@437 | 70 | crntState = self.start |
paul@437 | 71 | lastAccept = False |
paul@437 | 72 | i = pos |
paul@437 | 73 | for i in range(pos, len(inVec)): |
paul@437 | 74 | item = inVec[i] |
paul@437 | 75 | accept = self.accepts[crntState] |
paul@437 | 76 | crntState = self._next_state(item, crntState) |
paul@437 | 77 | if crntState != ERROR_STATE: |
paul@437 | 78 | pass |
paul@437 | 79 | elif accept: |
paul@437 | 80 | return i |
paul@437 | 81 | elif lastAccept: |
paul@437 | 82 | # This is now needed b/c of exception cases where there are |
paul@437 | 83 | # transitions to dead states |
paul@437 | 84 | return i - 1 |
paul@437 | 85 | else: |
paul@437 | 86 | return -1 |
paul@437 | 87 | crntState = ord(crntState) |
paul@437 | 88 | lastAccept = accept |
paul@437 | 89 | # if self.states[crntState][1]: |
paul@437 | 90 | if self.accepts[crntState]: |
paul@437 | 91 | return i + 1 |
paul@437 | 92 | elif lastAccept: |
paul@437 | 93 | return i |
paul@437 | 94 | else: |
paul@437 | 95 | return -1 |
paul@437 | 96 | |
paul@437 | 97 | # ______________________________________________________________________ |
paul@437 | 98 | |
paul@437 | 99 | class NonGreedyDFA (DFA): |
paul@437 | 100 | |
paul@437 | 101 | def recognize(self, inVec, pos = 0): |
paul@437 | 102 | crntState = self.start |
paul@437 | 103 | i = pos |
paul@437 | 104 | for i in range(pos, len(inVec)): |
paul@437 | 105 | item = inVec[i] |
paul@437 | 106 | accept = self.accepts[crntState] |
paul@437 | 107 | if accept: |
paul@437 | 108 | return i |
paul@437 | 109 | crntState = self._next_state(item, crntState) |
paul@437 | 110 | if crntState == ERROR_STATE: |
paul@437 | 111 | return -1 |
paul@437 | 112 | crntState = ord(crntState) |
paul@437 | 113 | i += 1 |
paul@437 | 114 | if self.accepts[crntState]: |
paul@437 | 115 | return i |
paul@437 | 116 | else: |
paul@437 | 117 | return -1 |
paul@437 | 118 | |
paul@437 | 119 | # ______________________________________________________________________ |
paul@437 | 120 | # End of automata.py |