1 # Used by genpytokenize.py to generate the parser in pytokenize.py 2 from pyparser.automata import DFA, DEFAULT 3 4 class EMPTY: pass 5 6 def newArcPair (states, transitionLabel): 7 s1Index = len(states) 8 s2Index = s1Index + 1 9 states.append([(transitionLabel, s2Index)]) 10 states.append([]) 11 return s1Index, s2Index 12 13 # ______________________________________________________________________ 14 15 def chain (states, *stateIndexPairs): 16 if len(stateIndexPairs) > 1: 17 start, lastFinish = stateIndexPairs[0] 18 for nStart, nFinish in stateIndexPairs[1:]: 19 states[lastFinish].append((EMPTY, nStart)) 20 lastFinish = nFinish 21 return start, nFinish 22 else: 23 return stateIndexPairs[0] 24 25 26 # ______________________________________________________________________ 27 28 def chainStr (states, str): 29 return chain(states, *map(lambda x : newArcPair(states, x), str)) 30 31 # ______________________________________________________________________ 32 33 def notChainStr (states, str): 34 """XXX I'm not sure this is how it should be done, but I'm going to 35 try it anyway. Note that for this case, I require only single character 36 arcs, since I would have to basically invert all accepting states and 37 non-accepting states of any sub-NFA's. 38 """ 39 assert len(str) > 0 40 arcs = map(lambda x : newArcPair(states, x), str) 41 finish = len(states) 42 states.append([]) 43 start, lastFinish = arcs[0] 44 states[start].append((EMPTY, finish)) 45 for crntStart, crntFinish in arcs[1:]: 46 states[lastFinish].append((EMPTY, crntStart)) 47 states[crntStart].append((EMPTY, finish)) 48 return start, finish 49 50 # ______________________________________________________________________ 51 52 def group (states, *stateIndexPairs): 53 if len(stateIndexPairs) > 1: 54 start = len(states) 55 finish = start + 1 56 startList = [] 57 states.append(startList) 58 states.append([]) 59 for eStart, eFinish in stateIndexPairs: 60 startList.append((EMPTY, eStart)) 61 states[eFinish].append((EMPTY, finish)) 62 return start, finish 63 else: 64 return stateIndexPairs[0] 65 66 # ______________________________________________________________________ 67 68 def groupStr (states, str): 69 return group(states, *map(lambda x : newArcPair(states, x), str)) 70 71 # ______________________________________________________________________ 72 73 def notGroup (states, *stateIndexPairs): 74 """Like group, but will add a DEFAULT transition to a new end state, 75 causing anything in the group to not match by going to a dead state. 76 XXX I think this is right... 77 """ 78 start, dead = group(states, *stateIndexPairs) 79 finish = len(states) 80 states.append([]) 81 states[start].append((DEFAULT, finish)) 82 return start, finish 83 84 # ______________________________________________________________________ 85 86 def notGroupStr (states, str): 87 return notGroup(states, *map(lambda x : newArcPair(states, x), str)) 88 # ______________________________________________________________________ 89 90 def any (states, *stateIndexPairs): 91 start, finish = group(states, *stateIndexPairs) 92 states[finish].append((EMPTY, start)) 93 return start, start 94 95 # ______________________________________________________________________ 96 97 def maybe (states, *stateIndexPairs): 98 start, finish = group(states, *stateIndexPairs) 99 states[start].append((EMPTY, finish)) 100 return start, finish 101 102 # ______________________________________________________________________ 103 104 def atleastonce (states, *stateIndexPairs): 105 start, finish = group(states, *stateIndexPairs) 106 states[finish].append((EMPTY, start)) 107 return start, finish 108 109 # ______________________________________________________________________ 110 111 def closure (states, start, result = 0L): 112 if None == result: 113 result = 0L 114 if 0 == (result & (1L << start)): 115 result |= (1L << start) 116 for label, arrow in states[start]: 117 if label == EMPTY: 118 result |= closure(states, arrow, result) 119 return result 120 121 # ______________________________________________________________________ 122 123 def nfaToDfa (states, start, finish): 124 tempStates = [] 125 startClosure = closure(states, start) 126 crntTempState = [startClosure, [], 0 != (startClosure & (1L << finish))] 127 tempStates.append(crntTempState) 128 index = 0 129 while index < len(tempStates): 130 crntTempState = tempStates[index] 131 crntClosure, crntArcs, crntAccept = crntTempState 132 for index2 in range(0, len(states)): 133 if 0 != (crntClosure & (1L << index2)): 134 for label, nfaArrow in states[index2]: 135 if label == EMPTY: 136 continue 137 foundTempArc = False 138 for tempArc in crntArcs: 139 if tempArc[0] == label: 140 foundTempArc = True 141 break 142 if not foundTempArc: 143 tempArc = [label, -1, 0L] 144 crntArcs.append(tempArc) 145 tempArc[2] = closure(states, nfaArrow, tempArc[2]) 146 for arcIndex in range(0, len(crntArcs)): 147 label, arrow, targetStates = crntArcs[arcIndex] 148 targetFound = False 149 arrow = 0 150 for destTempState in tempStates: 151 if destTempState[0] == targetStates: 152 targetFound = True 153 break 154 arrow += 1 155 if not targetFound: 156 assert arrow == len(tempStates) 157 newState = [targetStates, [], 0 != (targetStates & 158 (1L << finish))] 159 tempStates.append(newState) 160 crntArcs[arcIndex][1] = arrow 161 index += 1 162 tempStates = simplifyTempDfa(tempStates) 163 states = finalizeTempDfa(tempStates) 164 return states 165 166 # ______________________________________________________________________ 167 168 def sameState (s1, s2): 169 """sameState(s1, s2) 170 Note: 171 state := [ nfaclosure : Long, [ arc ], accept : Boolean ] 172 arc := [ label, arrow : Int, nfaClosure : Long ] 173 """ 174 if (len(s1[1]) != len(s2[1])) or (s1[2] != s2[2]): 175 return False 176 for arcIndex in range(0, len(s1[1])): 177 arc1 = s1[1][arcIndex] 178 arc2 = s2[1][arcIndex] 179 if arc1[:-1] != arc2[:-1]: 180 return False 181 return True 182 183 # ______________________________________________________________________ 184 185 def simplifyTempDfa (tempStates): 186 """simplifyTempDfa (tempStates) 187 """ 188 changes = True 189 deletedStates = [] 190 while changes: 191 changes = False 192 for i in range(1, len(tempStates)): 193 if i in deletedStates: 194 continue 195 for j in range(0, i): 196 if j in deletedStates: 197 continue 198 if sameState(tempStates[i], tempStates[j]): 199 deletedStates.append(i) 200 for k in range(0, len(tempStates)): 201 if k in deletedStates: 202 continue 203 for arc in tempStates[k][1]: 204 if arc[1] == i: 205 arc[1] = j 206 changes = True 207 break 208 for stateIndex in deletedStates: 209 tempStates[stateIndex] = None 210 return tempStates 211 # ______________________________________________________________________ 212 213 def finalizeTempDfa (tempStates): 214 """finalizeTempDfa (tempStates) 215 216 Input domain: 217 tempState := [ nfaClosure : Long, [ tempArc ], accept : Boolean ] 218 tempArc := [ label, arrow, nfaClosure ] 219 220 Output domain: 221 state := [ arcMap, accept : Boolean ] 222 """ 223 states = [] 224 accepts = [] 225 stateMap = {} 226 tempIndex = 0 227 for tempIndex in range(0, len(tempStates)): 228 tempState = tempStates[tempIndex] 229 if None != tempState: 230 stateMap[tempIndex] = len(states) 231 states.append({}) 232 accepts.append(tempState[2]) 233 for tempIndex in stateMap.keys(): 234 stateBitset, tempArcs, accepting = tempStates[tempIndex] 235 newIndex = stateMap[tempIndex] 236 arcMap = states[newIndex] 237 for tempArc in tempArcs: 238 arcMap[tempArc[0]] = stateMap[tempArc[1]] 239 return states, accepts 240