Lichen

Annotated pyparser/pylexer.py

503:644952535f7a
2017-01-23 Paul Boddie Added another, shorter test of get_using, commenting the existing test slightly.
paul@437 1
# Used by genpytokenize.py to generate the parser in pytokenize.py
paul@437 2
from pyparser.automata import DFA, DEFAULT
paul@437 3
paul@437 4
class EMPTY: pass
paul@437 5
paul@437 6
def newArcPair (states, transitionLabel):
paul@437 7
    s1Index = len(states)
paul@437 8
    s2Index = s1Index + 1
paul@437 9
    states.append([(transitionLabel, s2Index)])
paul@437 10
    states.append([])
paul@437 11
    return s1Index, s2Index
paul@437 12
paul@437 13
# ______________________________________________________________________
paul@437 14
paul@437 15
def chain (states, *stateIndexPairs):
paul@437 16
    if len(stateIndexPairs) > 1:
paul@437 17
        start, lastFinish = stateIndexPairs[0]
paul@437 18
        for nStart, nFinish in stateIndexPairs[1:]:
paul@437 19
            states[lastFinish].append((EMPTY, nStart))
paul@437 20
            lastFinish = nFinish
paul@437 21
        return start, nFinish
paul@437 22
    else:
paul@437 23
        return stateIndexPairs[0]
paul@437 24
paul@437 25
paul@437 26
# ______________________________________________________________________
paul@437 27
paul@437 28
def chainStr (states, str):
paul@437 29
    return chain(states, *map(lambda x : newArcPair(states, x), str))
paul@437 30
paul@437 31
# ______________________________________________________________________
paul@437 32
paul@437 33
def notChainStr (states, str):
paul@437 34
    """XXX I'm not sure this is how it should be done, but I'm going to
paul@437 35
    try it anyway.  Note that for this case, I require only single character
paul@437 36
    arcs, since I would have to basically invert all accepting states and
paul@437 37
    non-accepting states of any sub-NFA's.
paul@437 38
    """
paul@437 39
    assert len(str) > 0
paul@437 40
    arcs = map(lambda x : newArcPair(states, x), str)
paul@437 41
    finish = len(states)
paul@437 42
    states.append([])
paul@437 43
    start, lastFinish = arcs[0]
paul@437 44
    states[start].append((EMPTY, finish))
paul@437 45
    for crntStart, crntFinish in arcs[1:]:
paul@437 46
        states[lastFinish].append((EMPTY, crntStart))
paul@437 47
        states[crntStart].append((EMPTY, finish))
paul@437 48
    return start, finish
paul@437 49
paul@437 50
# ______________________________________________________________________
paul@437 51
paul@437 52
def group (states, *stateIndexPairs):
paul@437 53
    if len(stateIndexPairs) > 1:
paul@437 54
        start = len(states)
paul@437 55
        finish = start + 1
paul@437 56
        startList = []
paul@437 57
        states.append(startList)
paul@437 58
        states.append([])
paul@437 59
        for eStart, eFinish in stateIndexPairs:
paul@437 60
            startList.append((EMPTY, eStart))
paul@437 61
            states[eFinish].append((EMPTY, finish))
paul@437 62
        return start, finish
paul@437 63
    else:
paul@437 64
        return stateIndexPairs[0]
paul@437 65
paul@437 66
# ______________________________________________________________________
paul@437 67
paul@437 68
def groupStr (states, str):
paul@437 69
    return group(states, *map(lambda x : newArcPair(states, x), str))
paul@437 70
paul@437 71
# ______________________________________________________________________
paul@437 72
paul@437 73
def notGroup (states, *stateIndexPairs):
paul@437 74
    """Like group, but will add a DEFAULT transition to a new end state,
paul@437 75
    causing anything in the group to not match by going to a dead state.
paul@437 76
    XXX I think this is right...
paul@437 77
    """
paul@437 78
    start, dead = group(states, *stateIndexPairs)
paul@437 79
    finish = len(states)
paul@437 80
    states.append([])
paul@437 81
    states[start].append((DEFAULT, finish))
paul@437 82
    return start, finish
paul@437 83
paul@437 84
# ______________________________________________________________________
paul@437 85
paul@437 86
def notGroupStr (states, str):
paul@437 87
    return notGroup(states, *map(lambda x : newArcPair(states, x), str))
paul@437 88
# ______________________________________________________________________
paul@437 89
paul@437 90
def any (states, *stateIndexPairs):
paul@437 91
    start, finish = group(states, *stateIndexPairs)
paul@437 92
    states[finish].append((EMPTY, start))
paul@437 93
    return start, start
paul@437 94
paul@437 95
# ______________________________________________________________________
paul@437 96
paul@437 97
def maybe (states, *stateIndexPairs):
paul@437 98
    start, finish = group(states, *stateIndexPairs)
paul@437 99
    states[start].append((EMPTY, finish))
paul@437 100
    return start, finish
paul@437 101
paul@437 102
# ______________________________________________________________________
paul@437 103
paul@437 104
def atleastonce (states, *stateIndexPairs):
paul@437 105
    start, finish = group(states, *stateIndexPairs)
paul@437 106
    states[finish].append((EMPTY, start))
paul@437 107
    return start, finish
paul@437 108
paul@437 109
# ______________________________________________________________________
paul@437 110
paul@437 111
def closure (states, start, result = 0L):
paul@437 112
    if None == result:
paul@437 113
        result = 0L
paul@437 114
    if 0 == (result & (1L << start)):
paul@437 115
        result |= (1L << start)
paul@437 116
        for label, arrow in states[start]:
paul@437 117
            if label == EMPTY:
paul@437 118
                result |= closure(states, arrow, result)
paul@437 119
    return result
paul@437 120
paul@437 121
# ______________________________________________________________________
paul@437 122
paul@437 123
def nfaToDfa (states, start, finish):
paul@437 124
    tempStates = []
paul@437 125
    startClosure = closure(states, start)
paul@437 126
    crntTempState = [startClosure, [], 0 != (startClosure & (1L << finish))]
paul@437 127
    tempStates.append(crntTempState)
paul@437 128
    index = 0
paul@437 129
    while index < len(tempStates):
paul@437 130
        crntTempState = tempStates[index]
paul@437 131
        crntClosure, crntArcs, crntAccept = crntTempState
paul@437 132
        for index2 in range(0, len(states)):
paul@437 133
            if 0 != (crntClosure & (1L << index2)):
paul@437 134
                for label, nfaArrow in states[index2]:
paul@437 135
                    if label == EMPTY:
paul@437 136
                        continue
paul@437 137
                    foundTempArc = False
paul@437 138
                    for tempArc in crntArcs:
paul@437 139
                        if tempArc[0] == label:
paul@437 140
                            foundTempArc = True
paul@437 141
                            break
paul@437 142
                    if not foundTempArc:
paul@437 143
                        tempArc = [label, -1, 0L]
paul@437 144
                        crntArcs.append(tempArc)
paul@437 145
                    tempArc[2] = closure(states, nfaArrow, tempArc[2])
paul@437 146
        for arcIndex in range(0, len(crntArcs)):
paul@437 147
            label, arrow, targetStates = crntArcs[arcIndex]
paul@437 148
            targetFound = False
paul@437 149
            arrow = 0
paul@437 150
            for destTempState in tempStates:
paul@437 151
                if destTempState[0] == targetStates:
paul@437 152
                    targetFound = True
paul@437 153
                    break
paul@437 154
                arrow += 1
paul@437 155
            if not targetFound:
paul@437 156
                assert arrow == len(tempStates)
paul@437 157
                newState = [targetStates, [], 0 != (targetStates &
paul@437 158
                                                    (1L << finish))]
paul@437 159
                tempStates.append(newState)
paul@437 160
            crntArcs[arcIndex][1] = arrow
paul@437 161
        index += 1
paul@437 162
    tempStates = simplifyTempDfa(tempStates)
paul@437 163
    states = finalizeTempDfa(tempStates)
paul@437 164
    return states
paul@437 165
paul@437 166
# ______________________________________________________________________
paul@437 167
paul@437 168
def sameState (s1, s2):
paul@437 169
    """sameState(s1, s2)
paul@437 170
    Note:
paul@437 171
    state := [ nfaclosure : Long, [ arc ], accept : Boolean ]
paul@437 172
    arc := [ label, arrow : Int, nfaClosure : Long ]
paul@437 173
    """
paul@437 174
    if (len(s1[1]) != len(s2[1])) or (s1[2] != s2[2]):
paul@437 175
        return False
paul@437 176
    for arcIndex in range(0, len(s1[1])):
paul@437 177
        arc1 = s1[1][arcIndex]
paul@437 178
        arc2 = s2[1][arcIndex]
paul@437 179
        if arc1[:-1] != arc2[:-1]:
paul@437 180
            return False
paul@437 181
    return True
paul@437 182
paul@437 183
# ______________________________________________________________________
paul@437 184
paul@437 185
def simplifyTempDfa (tempStates):
paul@437 186
    """simplifyTempDfa (tempStates)
paul@437 187
    """
paul@437 188
    changes = True
paul@437 189
    deletedStates = []
paul@437 190
    while changes:
paul@437 191
        changes = False
paul@437 192
        for i in range(1, len(tempStates)):
paul@437 193
            if i in deletedStates:
paul@437 194
                continue
paul@437 195
            for j in range(0, i):
paul@437 196
                if j in deletedStates:
paul@437 197
                    continue
paul@437 198
                if sameState(tempStates[i], tempStates[j]):
paul@437 199
                    deletedStates.append(i)
paul@437 200
                    for k in range(0, len(tempStates)):
paul@437 201
                        if k in deletedStates:
paul@437 202
                            continue
paul@437 203
                        for arc in tempStates[k][1]:
paul@437 204
                            if arc[1] == i:
paul@437 205
                                arc[1] = j
paul@437 206
                    changes = True
paul@437 207
                    break
paul@437 208
    for stateIndex in deletedStates:
paul@437 209
        tempStates[stateIndex] = None
paul@437 210
    return tempStates
paul@437 211
# ______________________________________________________________________
paul@437 212
paul@437 213
def finalizeTempDfa (tempStates):
paul@437 214
    """finalizeTempDfa (tempStates)
paul@437 215
    
paul@437 216
    Input domain:
paul@437 217
    tempState := [ nfaClosure : Long, [ tempArc ], accept : Boolean ]
paul@437 218
    tempArc := [ label, arrow, nfaClosure ]
paul@437 219
paul@437 220
    Output domain:
paul@437 221
    state := [ arcMap, accept : Boolean ]
paul@437 222
    """
paul@437 223
    states = []
paul@437 224
    accepts = []
paul@437 225
    stateMap = {}
paul@437 226
    tempIndex = 0
paul@437 227
    for tempIndex in range(0, len(tempStates)):
paul@437 228
        tempState = tempStates[tempIndex]
paul@437 229
        if None != tempState:
paul@437 230
            stateMap[tempIndex] = len(states)
paul@437 231
            states.append({})
paul@437 232
            accepts.append(tempState[2])
paul@437 233
    for tempIndex in stateMap.keys():
paul@437 234
        stateBitset, tempArcs, accepting = tempStates[tempIndex]
paul@437 235
        newIndex = stateMap[tempIndex]
paul@437 236
        arcMap = states[newIndex]
paul@437 237
        for tempArc in tempArcs:
paul@437 238
            arcMap[tempArc[0]] = stateMap[tempArc[1]]
paul@437 239
    return states, accepts
paul@437 240