Lichen (file pyparser/pylexer.py at b8733c35d308)

     1 # Used by genpytokenize.py to generate the parser in pytokenize.py     2 from pyparser.automata import DFA, DEFAULT     3      4 class EMPTY: pass     5      6 def newArcPair (states, transitionLabel):     7     s1Index = len(states)     8     s2Index = s1Index + 1     9     states.append([(transitionLabel, s2Index)])    10     states.append([])    11     return s1Index, s2Index    12     13 # ______________________________________________________________________    14     15 def chain (states, *stateIndexPairs):    16     if len(stateIndexPairs) > 1:    17         start, lastFinish = stateIndexPairs[0]    18         for nStart, nFinish in stateIndexPairs[1:]:    19             states[lastFinish].append((EMPTY, nStart))    20             lastFinish = nFinish    21         return start, nFinish    22     else:    23         return stateIndexPairs[0]    24     25     26 # ______________________________________________________________________    27     28 def chainStr (states, str):    29     return chain(states, *map(lambda x : newArcPair(states, x), str))    30     31 # ______________________________________________________________________    32     33 def notChainStr (states, str):    34     """XXX I'm not sure this is how it should be done, but I'm going to    35     try it anyway.  Note that for this case, I require only single character    36     arcs, since I would have to basically invert all accepting states and    37     non-accepting states of any sub-NFA's.    38     """    39     assert len(str) > 0    40     arcs = map(lambda x : newArcPair(states, x), str)    41     finish = len(states)    42     states.append([])    43     start, lastFinish = arcs[0]    44     states[start].append((EMPTY, finish))    45     for crntStart, crntFinish in arcs[1:]:    46         states[lastFinish].append((EMPTY, crntStart))    47         states[crntStart].append((EMPTY, finish))    48     return start, finish    49     50 # ______________________________________________________________________    51     52 def group (states, *stateIndexPairs):    53     if len(stateIndexPairs) > 1:    54         start = len(states)    55         finish = start + 1    56         startList = []    57         states.append(startList)    58         states.append([])    59         for eStart, eFinish in stateIndexPairs:    60             startList.append((EMPTY, eStart))    61             states[eFinish].append((EMPTY, finish))    62         return start, finish    63     else:    64         return stateIndexPairs[0]    65     66 # ______________________________________________________________________    67     68 def groupStr (states, str):    69     return group(states, *map(lambda x : newArcPair(states, x), str))    70     71 # ______________________________________________________________________    72     73 def notGroup (states, *stateIndexPairs):    74     """Like group, but will add a DEFAULT transition to a new end state,    75     causing anything in the group to not match by going to a dead state.    76     XXX I think this is right...    77     """    78     start, dead = group(states, *stateIndexPairs)    79     finish = len(states)    80     states.append([])    81     states[start].append((DEFAULT, finish))    82     return start, finish    83     84 # ______________________________________________________________________    85     86 def notGroupStr (states, str):    87     return notGroup(states, *map(lambda x : newArcPair(states, x), str))    88 # ______________________________________________________________________    89     90 def any (states, *stateIndexPairs):    91     start, finish = group(states, *stateIndexPairs)    92     states[finish].append((EMPTY, start))    93     return start, start    94     95 # ______________________________________________________________________    96     97 def maybe (states, *stateIndexPairs):    98     start, finish = group(states, *stateIndexPairs)    99     states[start].append((EMPTY, finish))   100     return start, finish   101    102 # ______________________________________________________________________   103    104 def atleastonce (states, *stateIndexPairs):   105     start, finish = group(states, *stateIndexPairs)   106     states[finish].append((EMPTY, start))   107     return start, finish   108    109 # ______________________________________________________________________   110    111 def closure (states, start, result = 0L):   112     if None == result:   113         result = 0L   114     if 0 == (result & (1L << start)):   115         result |= (1L << start)   116         for label, arrow in states[start]:   117             if label == EMPTY:   118                 result |= closure(states, arrow, result)   119     return result   120    121 # ______________________________________________________________________   122    123 def nfaToDfa (states, start, finish):   124     tempStates = []   125     startClosure = closure(states, start)   126     crntTempState = [startClosure, [], 0 != (startClosure & (1L << finish))]   127     tempStates.append(crntTempState)   128     index = 0   129     while index < len(tempStates):   130         crntTempState = tempStates[index]   131         crntClosure, crntArcs, crntAccept = crntTempState   132         for index2 in range(0, len(states)):   133             if 0 != (crntClosure & (1L << index2)):   134                 for label, nfaArrow in states[index2]:   135                     if label == EMPTY:   136                         continue   137                     foundTempArc = False   138                     for tempArc in crntArcs:   139                         if tempArc[0] == label:   140                             foundTempArc = True   141                             break   142                     if not foundTempArc:   143                         tempArc = [label, -1, 0L]   144                         crntArcs.append(tempArc)   145                     tempArc[2] = closure(states, nfaArrow, tempArc[2])   146         for arcIndex in range(0, len(crntArcs)):   147             label, arrow, targetStates = crntArcs[arcIndex]   148             targetFound = False   149             arrow = 0   150             for destTempState in tempStates:   151                 if destTempState[0] == targetStates:   152                     targetFound = True   153                     break   154                 arrow += 1   155             if not targetFound:   156                 assert arrow == len(tempStates)   157                 newState = [targetStates, [], 0 != (targetStates &   158                                                     (1L << finish))]   159                 tempStates.append(newState)   160             crntArcs[arcIndex][1] = arrow   161         index += 1   162     tempStates = simplifyTempDfa(tempStates)   163     states = finalizeTempDfa(tempStates)   164     return states   165    166 # ______________________________________________________________________   167    168 def sameState (s1, s2):   169     """sameState(s1, s2)   170     Note:   171     state := [ nfaclosure : Long, [ arc ], accept : Boolean ]   172     arc := [ label, arrow : Int, nfaClosure : Long ]   173     """   174     if (len(s1[1]) != len(s2[1])) or (s1[2] != s2[2]):   175         return False   176     for arcIndex in range(0, len(s1[1])):   177         arc1 = s1[1][arcIndex]   178         arc2 = s2[1][arcIndex]   179         if arc1[:-1] != arc2[:-1]:   180             return False   181     return True   182    183 # ______________________________________________________________________   184    185 def simplifyTempDfa (tempStates):   186     """simplifyTempDfa (tempStates)   187     """   188     changes = True   189     deletedStates = []   190     while changes:   191         changes = False   192         for i in range(1, len(tempStates)):   193             if i in deletedStates:   194                 continue   195             for j in range(0, i):   196                 if j in deletedStates:   197                     continue   198                 if sameState(tempStates[i], tempStates[j]):   199                     deletedStates.append(i)   200                     for k in range(0, len(tempStates)):   201                         if k in deletedStates:   202                             continue   203                         for arc in tempStates[k][1]:   204                             if arc[1] == i:   205                                 arc[1] = j   206                     changes = True   207                     break   208     for stateIndex in deletedStates:   209         tempStates[stateIndex] = None   210     return tempStates   211 # ______________________________________________________________________   212    213 def finalizeTempDfa (tempStates):   214     """finalizeTempDfa (tempStates)   215        216     Input domain:   217     tempState := [ nfaClosure : Long, [ tempArc ], accept : Boolean ]   218     tempArc := [ label, arrow, nfaClosure ]   219    220     Output domain:   221     state := [ arcMap, accept : Boolean ]   222     """   223     states = []   224     accepts = []   225     stateMap = {}   226     tempIndex = 0   227     for tempIndex in range(0, len(tempStates)):   228         tempState = tempStates[tempIndex]   229         if None != tempState:   230             stateMap[tempIndex] = len(states)   231             states.append({})   232             accepts.append(tempState[2])   233     for tempIndex in stateMap.keys():   234         stateBitset, tempArcs, accepting = tempStates[tempIndex]   235         newIndex = stateMap[tempIndex]   236         arcMap = states[newIndex]   237         for tempArc in tempArcs:   238             arcMap[tempArc[0]] = stateMap[tempArc[1]]   239     return states, accepts   240