Lichen (annotate pyparser/automata.py in d760a72fbd2e)

# ______________________________________________________________________

paul@437

2

"""Module automata

THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED

paul@437

5

TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists

paul@437

6

instead of a unique nested one)

$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $

paul@437

9

"""

paul@437

10

# ______________________________________________________________________

paul@437

11

# Module level definitions

# PYPY Modification: removed the EMPTY class as it's not needed here

# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython

paul@437

17

# dicts (see pytokenize.py).  Then DFA.__init__() turns these dicts into

paul@437

18

# more compact strings.

paul@437

19

DEFAULT = object()

# PYPY Modification : removed all automata functions (any, maybe,

paul@437

22

#                     newArcPair, etc.)

ERROR_STATE = chr(255)

class DFA:

paul@437

27

    # ____________________________________________________________

paul@437

28

    def __init__(self, states, accepts, start = 0):

paul@437

29

        """ NOT_RPYTHON """

paul@437

30

        assert len(states) < 255 # no support for huge amounts of states

paul@437

31

        # construct string for looking up state transitions

paul@437

32

        string_states = [] * len(states)

paul@437

33

        # compute maximum

paul@437

34

        maximum = 0

paul@437

35

        for state in states:

paul@437

36

            for key in state:

paul@437

37

                if key == DEFAULT:

paul@437

38

                    continue

paul@437

39

                maximum = max(ord(key), maximum)

paul@437

40

        self.max_char = maximum + 1

        defaults = []

paul@437

43

        for i, state in enumerate(states):

paul@437

44

            default = ERROR_STATE

paul@437

45

            if DEFAULT in state:

paul@437

46

                default = chr(state[DEFAULT])

paul@437

47

            defaults.append(default)

paul@437

48

            string_state = [default] * self.max_char

paul@437

49

            for key, value in state.iteritems():

paul@437

50

                if key == DEFAULT:

paul@437

51

                    continue

paul@437

52

                assert len(key) == 1

paul@437

53

                assert ord(key) < self.max_char

paul@437

54

                string_state[ord(key)] = chr(value)

paul@437

55

            string_states.extend(string_state)

paul@437

56

        self.states = "".join(string_states)

paul@437

57

        self.defaults = "".join(defaults)

paul@437

58

        self.accepts = accepts

paul@437

59

        self.start = start

    # ____________________________________________________________

    def _next_state(self, item, crntState):

paul@437

64

        if ord(item) >= self.max_char:

paul@437

65

            return self.defaults[crntState]

paul@437

66

        else:

paul@437

67

            return self.states[crntState * self.max_char + ord(item)]

    def recognize(self, inVec, pos = 0):

paul@437

70

        crntState = self.start

paul@437

71

        lastAccept = False

paul@437

72

        i = pos

paul@437

73

        for i in range(pos, len(inVec)):

paul@437

74

            item = inVec[i]

paul@437

75

            accept = self.accepts[crntState]

paul@437

76

            crntState = self._next_state(item, crntState)

paul@437

77

            if crntState != ERROR_STATE:

paul@437

78

                pass

paul@437

79

            elif accept:

paul@437

80

                return i

paul@437

81

            elif lastAccept:

paul@437

82

                # This is now needed b/c of exception cases where there are

paul@437

83

                # transitions to dead states

paul@437

84

                return i - 1

paul@437

85

            else:

paul@437

86

                return -1

paul@437

87

            crntState = ord(crntState)

paul@437

88

            lastAccept = accept

paul@437

89

        # if self.states[crntState][1]:

paul@437

90

        if self.accepts[crntState]:

paul@437

91

            return i + 1

paul@437

92

        elif lastAccept:

paul@437

93

            return i

paul@437

94

        else:

paul@437

95

            return -1

# ______________________________________________________________________

class NonGreedyDFA (DFA):

    def recognize(self, inVec, pos = 0):

paul@437

102

        crntState = self.start

paul@437

103

        i = pos

paul@437

104

        for i in range(pos, len(inVec)):

paul@437

105

            item = inVec[i]

paul@437

106

            accept = self.accepts[crntState]

paul@437

107

            if accept:

paul@437

108

                return i

paul@437

109

            crntState = self._next_state(item, crntState)

paul@437

110

            if crntState == ERROR_STATE:

paul@437

111

                return -1

paul@437

112

            crntState = ord(crntState)

paul@437

113

            i += 1

paul@437

114

        if self.accepts[crntState]:

paul@437

115

            return i

paul@437

116

        else:

paul@437

117

            return -1

# ______________________________________________________________________

paul@437

120

# End of automata.py

paul@437	1	# ______________________________________________________________________
paul@437	2	"""Module automata
paul@437	3
paul@437	4	THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
paul@437	5	TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists
paul@437	6	instead of a unique nested one)
paul@437	7
paul@437	8	$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $
paul@437	9	"""
paul@437	10	# ______________________________________________________________________
paul@437	11	# Module level definitions
paul@437	12
paul@437	13	# PYPY Modification: removed the EMPTY class as it's not needed here
paul@437	14
paul@437	15
paul@437	16	# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython
paul@437	17	# dicts (see pytokenize.py). Then DFA.__init__() turns these dicts into
paul@437	18	# more compact strings.
paul@437	19	DEFAULT = object()
paul@437	20
paul@437	21	# PYPY Modification : removed all automata functions (any, maybe,
paul@437	22	# newArcPair, etc.)
paul@437	23
paul@437	24	ERROR_STATE = chr(255)
paul@437	25
paul@437	26	class DFA:
paul@437	27	# ____________________________________________________________
paul@437	28	def __init__(self, states, accepts, start = 0):
paul@437	29	""" NOT_RPYTHON """
paul@437	30	assert len(states) < 255 # no support for huge amounts of states
paul@437	31	# construct string for looking up state transitions
paul@437	32	string_states = [] * len(states)
paul@437	33	# compute maximum
paul@437	34	maximum = 0
paul@437	35	for state in states:
paul@437	36	for key in state:
paul@437	37	if key == DEFAULT:
paul@437	38	continue
paul@437	39	maximum = max(ord(key), maximum)
paul@437	40	self.max_char = maximum + 1
paul@437	41
paul@437	42	defaults = []
paul@437	43	for i, state in enumerate(states):
paul@437	44	default = ERROR_STATE
paul@437	45	if DEFAULT in state:
paul@437	46	default = chr(state[DEFAULT])
paul@437	47	defaults.append(default)
paul@437	48	string_state = [default] * self.max_char
paul@437	49	for key, value in state.iteritems():
paul@437	50	if key == DEFAULT:
paul@437	51	continue
paul@437	52	assert len(key) == 1
paul@437	53	assert ord(key) < self.max_char
paul@437	54	string_state[ord(key)] = chr(value)
paul@437	55	string_states.extend(string_state)
paul@437	56	self.states = "".join(string_states)
paul@437	57	self.defaults = "".join(defaults)
paul@437	58	self.accepts = accepts
paul@437	59	self.start = start
paul@437	60
paul@437	61	# ____________________________________________________________
paul@437	62
paul@437	63	def _next_state(self, item, crntState):
paul@437	64	if ord(item) >= self.max_char:
paul@437	65	return self.defaults[crntState]
paul@437	66	else:
paul@437	67	return self.states[crntState * self.max_char + ord(item)]
paul@437	68
paul@437	69	def recognize(self, inVec, pos = 0):
paul@437	70	crntState = self.start
paul@437	71	lastAccept = False
paul@437	72	i = pos
paul@437	73	for i in range(pos, len(inVec)):
paul@437	74	item = inVec[i]
paul@437	75	accept = self.accepts[crntState]
paul@437	76	crntState = self._next_state(item, crntState)
paul@437	77	if crntState != ERROR_STATE:
paul@437	78	pass
paul@437	79	elif accept:
paul@437	80	return i
paul@437	81	elif lastAccept:
paul@437	82	# This is now needed b/c of exception cases where there are
paul@437	83	# transitions to dead states
paul@437	84	return i - 1
paul@437	85	else:
paul@437	86	return -1
paul@437	87	crntState = ord(crntState)
paul@437	88	lastAccept = accept
paul@437	89	# if self.states[crntState][1]:
paul@437	90	if self.accepts[crntState]:
paul@437	91	return i + 1
paul@437	92	elif lastAccept:
paul@437	93	return i
paul@437	94	else:
paul@437	95	return -1
paul@437	96
paul@437	97	# ______________________________________________________________________
paul@437	98
paul@437	99	class NonGreedyDFA (DFA):
paul@437	100
paul@437	101	def recognize(self, inVec, pos = 0):
paul@437	102	crntState = self.start
paul@437	103	i = pos
paul@437	104	for i in range(pos, len(inVec)):
paul@437	105	item = inVec[i]
paul@437	106	accept = self.accepts[crntState]
paul@437	107	if accept:
paul@437	108	return i
paul@437	109	crntState = self._next_state(item, crntState)
paul@437	110	if crntState == ERROR_STATE:
paul@437	111	return -1
paul@437	112	crntState = ord(crntState)
paul@437	113	i += 1
paul@437	114	if self.accepts[crntState]:
paul@437	115	return i
paul@437	116	else:
paul@437	117	return -1
paul@437	118
paul@437	119	# ______________________________________________________________________
paul@437	120	# End of automata.py

Lichen

Annotated pyparser/automata.py