Lichen (annotate pyparser/pytokenizer.py in d70932955645)

from pyparser import automata

paul@437

2

from pyparser.pygram import tokens

paul@437

3

from pyparser.pytoken import python_opmap

paul@437

4

from pyparser.error import TokenError, TokenIndentationError

paul@437

5

from pyparser.pytokenize import tabsize, whiteSpaceDFA, \

paul@437

6

    triple_quoted, endDFAs, single_quoted, pseudoDFA

paul@437

7

from pyparser import consts

NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'

paul@437

10

NUMCHARS = '0123456789'

paul@437

11

ALNUMCHARS = NAMECHARS + NUMCHARS

paul@437

12

EXTENDED_ALNUMCHARS = ALNUMCHARS + '-.'

paul@437

13

WHITESPACES = ' \t\n\r\v\f'

def match_encoding_declaration(comment):

paul@437

16

    """returns the declared encoding or None

    This function is a replacement for :

paul@437

19

    >>> py_encoding = re.compile(r"coding[:=]\s*([-\w.]+)")

paul@437

20

    >>> py_encoding.search(comment)

paul@437

21

"""

paul@437

22

    index = comment.find('coding')

paul@437

23

    if index < 0:

paul@437

24

        return None

paul@437

25

    next_char = comment[index + 6]

paul@437

26

    if next_char not in ':=':

paul@437

27

        return None

paul@437

28

    end_of_decl = comment[index + 7:]

paul@437

29

    index = 0

paul@437

30

    for char in end_of_decl:

paul@437

31

        if char not in WHITESPACES:

paul@437

32

            break

paul@437

33

        index += 1

paul@437

34

    else:

paul@437

35

        return None

paul@437

36

    encoding = ''

paul@437

37

    for char in end_of_decl[index:]:

paul@437

38

        if char in EXTENDED_ALNUMCHARS:

paul@437

39

            encoding += char

paul@437

40

        else:

paul@437

41

            break

paul@437

42

    if encoding != '':

paul@437

43

        return encoding

paul@437

44

    return None

DUMMY_DFA = automata.DFA([], [])

def generate_tokens(lines, flags):

paul@437

50

"""

paul@437

51

    This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since

paul@437

52

    the original function is not RPYTHON (uses yield)

paul@437

53

    It was also slightly modified to generate Token instances instead

paul@437

54

    of the original 5-tuples -- it's now a 4-tuple of

    * the Token instance

paul@437

57

    * the whole line as a string

paul@437

58

    * the line number (the real one, counting continuation lines)

paul@437

59

    * the position on the line of the end of the token.

    Original docstring ::

        The generate_tokens() generator requires one argment, readline, which

paul@437

64

        must be a callable object which provides the same interface as the

paul@437

65

        readline() method of built-in file objects. Each call to the function

paul@437

66

        should return one line of input as a string.

        The generator produces 5-tuples with these members: the token type; the

paul@437

69

        token string; a 2-tuple (srow, scol) of ints specifying the row and

paul@437

70

        column where the token begins in the source; a 2-tuple (erow, ecol) of

paul@437

71

        ints specifying the row and column where the token ends in the source;

paul@437

72

        and the line on which the token was found. The line passed is the

paul@437

73

        logical line; continuation lines are included.

paul@437

74

"""

paul@437

75

    token_list = []

paul@437

76

    lnum = parenlev = continued = 0

paul@437

77

    namechars = NAMECHARS

paul@437

78

    numchars = NUMCHARS

paul@437

79

    contstr, needcont = '', 0

paul@437

80

    contline = None

paul@437

81

    indents = [0]

paul@437

82

    last_comment = ''

paul@437

83

    parenlevstart = (0, 0, "")

    # make the annotator happy

paul@437

86

    endDFA = DUMMY_DFA

paul@437

87

    # make the annotator happy

paul@437

88

    line = ''

paul@437

89

    pos = 0

paul@437

90

    lines.append("")

paul@437

91

    strstart = (0, 0, "")

paul@437

92

    for line in lines:

paul@437

93

        lnum = lnum + 1

paul@437

94

        line = universal_newline(line)

paul@437

95

        pos, max = 0, len(line)

        if contstr:

paul@437

98

            if not line:

paul@437

99

                raise TokenError(

paul@437

100

                    "EOF while scanning triple-quoted string literal",

paul@437

101

                    strstart[2], strstart[0], strstart[1]+1,

paul@437

102

                    token_list, lnum-1)

paul@437

103

            endmatch = endDFA.recognize(line)

paul@437

104

            if endmatch >= 0:

paul@437

105

                pos = end = endmatch

paul@439

106

                tok = (tokens["STRING"], contstr + line[:end], strstart[0],

paul@437

107

                       strstart[1], line)

paul@437

108

                token_list.append(tok)

paul@437

109

                last_comment = ''

paul@437

110

                contstr, needcont = '', 0

paul@437

111

                contline = None

paul@437

112

            elif (needcont and not line.endswith('\\\n') and

paul@437

113

                               not line.endswith('\\\r\n')):

paul@439

114

                tok = (tokens["ERRORTOKEN"], contstr + line, strstart[0],

paul@437

115

                       strstart[1], line)

paul@437

116

                token_list.append(tok)

paul@437

117

                last_comment = ''

paul@437

118

                contstr = ''

paul@437

119

                contline = None

paul@437

120

                continue

paul@437

121

            else:

paul@437

122

                contstr = contstr + line

paul@437

123

                contline = contline + line

paul@437

124

                continue

        elif parenlev == 0 and not continued:  # new statement

paul@437

127

            if not line: break

paul@437

128

            column = 0

paul@437

129

            while pos < max:                   # measure leading whitespace

paul@437

130

                if line[pos] == ' ': column = column + 1

paul@437

131

                elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize

paul@437

132

                elif line[pos] == '\f': column = 0

paul@437

133

                else: break

paul@437

134

                pos = pos + 1

paul@437

135

            if pos == max: break

            if line[pos] in '#\r\n':

paul@437

138

                # skip comments or blank lines

paul@437

139

                continue

            if column > indents[-1]:           # count indents or dedents

paul@437

142

                indents.append(column)

paul@439

143

                token_list.append((tokens["INDENT"], line[:pos], lnum, 0, line))

paul@437

144

                last_comment = ''

paul@437

145

            while column < indents[-1]:

paul@437

146

                indents = indents[:-1]

paul@439

147

                token_list.append((tokens["DEDENT"], '', lnum, pos, line))

paul@437

148

                last_comment = ''

paul@437

149

            if column != indents[-1]:

paul@437

150

                err = "unindent does not match any outer indentation level"

paul@437

151

                raise TokenIndentationError(err, line, lnum, 0, token_list)

        else:                                  # continued statement

paul@437

154

            if not line:

paul@437

155

                if parenlev > 0:

paul@437

156

                    lnum1, start1, line1 = parenlevstart

paul@437

157

                    raise TokenError("parenthesis is never closed", line1,

paul@437

158

                                     lnum1, start1 + 1, token_list, lnum)

paul@437

159

                raise TokenError("EOF in multi-line statement", line,

paul@437

160

                                 lnum, 0, token_list)

paul@437

161

            continued = 0

        while pos < max:

paul@437

164

            pseudomatch = pseudoDFA.recognize(line, pos)

paul@437

165

            if pseudomatch >= 0:                            # scan for tokens

paul@437

166

                # JDR: Modified

paul@437

167

                start = whiteSpaceDFA.recognize(line, pos)

paul@437

168

                if start < 0:

paul@437

169

                    start = pos

paul@437

170

                end = pseudomatch

                if start == end:

paul@437

173

                    raise TokenError("Unknown character", line,

paul@437

174

                                     lnum, start + 1, token_list)

                pos = end

paul@437

177

                token, initial = line[start:end], line[start]

paul@437

178

                if initial in numchars or \

paul@437

179

                   (initial == '.' and token != '.'):      # ordinary number

paul@439

180

                    token_list.append((tokens["NUMBER"], token, lnum, start, line))

paul@437

181

                    last_comment = ''

paul@437

182

                elif initial in '\r\n':

paul@437

183

                    if parenlev <= 0:

paul@439

184

                        tok = (tokens["NEWLINE"], last_comment, lnum, start, line)

paul@437

185

                        token_list.append(tok)

paul@437

186

                    last_comment = ''

paul@437

187

                elif initial == '#':

paul@437

188

                    # skip comment

paul@437

189

                    last_comment = token

paul@437

190

                elif token in triple_quoted:

paul@437

191

                    endDFA = endDFAs[token]

paul@437

192

                    endmatch = endDFA.recognize(line, pos)

paul@437

193

                    if endmatch >= 0:                     # all on one line

paul@437

194

                        pos = endmatch

paul@437

195

                        token = line[start:pos]

paul@439

196

                        tok = (tokens["STRING"], token, lnum, start, line)

paul@437

197

                        token_list.append(tok)

paul@437

198

                        last_comment = ''

paul@437

199

                    else:

paul@437

200

                        strstart = (lnum, start, line)

paul@437

201

                        contstr = line[start:]

paul@437

202

                        contline = line

paul@437

203

                        break

paul@437

204

                elif initial in single_quoted or \

paul@437

205

                    token[:2] in single_quoted or \

paul@437

206

                    token[:3] in single_quoted:

paul@437

207

                    if token[-1] == '\n':                  # continued string

paul@437

208

                        strstart = (lnum, start, line)

paul@437

209

                        endDFA = (endDFAs[initial] or endDFAs[token[1]] or

paul@437

210

                                   endDFAs[token[2]])

paul@437

211

                        contstr, needcont = line[start:], 1

paul@437

212

                        contline = line

paul@437

213

                        break

paul@437

214

                    else:                                  # ordinary string

paul@439

215

                        tok = (tokens["STRING"], token, lnum, start, line)

paul@437

216

                        token_list.append(tok)

paul@437

217

                        last_comment = ''

paul@437

218

                elif initial in namechars:                 # ordinary name

paul@439

219

                    token_list.append((tokens["NAME"], token, lnum, start, line))

paul@437

220

                    last_comment = ''

paul@437

221

                elif initial == '\\':                      # continued stmt

paul@437

222

                    continued = 1

paul@437

223

                else:

paul@437

224

                    if initial in '([{':

paul@437

225

                        if parenlev == 0:

paul@437

226

                            parenlevstart = (lnum, start, line)

paul@437

227

                        parenlev = parenlev + 1

paul@437

228

                    elif initial in ')]}':

paul@437

229

                        parenlev = parenlev - 1

paul@437

230

                        if parenlev < 0:

paul@437

231

                            raise TokenError("unmatched '%s'" % initial, line,

paul@437

232

                                             lnum, start + 1, token_list)

paul@437

233

                    if token in python_opmap:

paul@437

234

                        punct = python_opmap[token]

paul@437

235

                    else:

paul@439

236

                        punct = tokens["OP"]

paul@437

237

                    token_list.append((punct, token, lnum, start, line))

paul@437

238

                    last_comment = ''

paul@437

239

            else:

paul@437

240

                start = whiteSpaceDFA.recognize(line, pos)

paul@437

241

                if start < 0:

paul@437

242

                    start = pos

paul@437

243

                if start<max and line[start] in single_quoted:

paul@437

244

                    raise TokenError("EOL while scanning string literal",

paul@437

245

                             line, lnum, start+1, token_list)

paul@439

246

                tok = (tokens["ERRORTOKEN"], line[pos], lnum, pos, line)

paul@437

247

                token_list.append(tok)

paul@437

248

                last_comment = ''

paul@437

249

                pos = pos + 1

    lnum -= 1

paul@437

252

    if not (flags & consts.PyCF_DONT_IMPLY_DEDENT):

paul@439

253

        if token_list and token_list[-1][0] != tokens["NEWLINE"]:

paul@439

254

            tok = (tokens["NEWLINE"], '', lnum, 0, '\n')

paul@437

255

            token_list.append(tok)

paul@437

256

        for indent in indents[1:]:                # pop remaining indent levels

paul@439

257

            token_list.append((tokens["DEDENT"], '', lnum, pos, line))

paul@439

258

    tok = (tokens["NEWLINE"], '', lnum, 0, '\n')

paul@437

259

    token_list.append(tok)

    token_list.append((tokens["ENDMARKER"], '', lnum, pos, line))

paul@437

262

    return token_list

def universal_newline(line):

paul@437

266

    # show annotator that indexes below are non-negative

paul@437

267

    line_len_m2 = len(line) - 2

paul@437

268

    if line_len_m2 >= 0 and line[-2] == '\r' and line[-1] == '\n':

paul@437

269

        return line[:line_len_m2] + '\n'

paul@437

270

    line_len_m1 = len(line) - 1

paul@437

271

    if line_len_m1 >= 0 and line[-1] == '\r':

paul@437

272

        return line[:line_len_m1] + '\n'

paul@437

273

    return line

paul@437	1	from pyparser import automata
paul@437	2	from pyparser.pygram import tokens
paul@437	3	from pyparser.pytoken import python_opmap
paul@437	4	from pyparser.error import TokenError, TokenIndentationError
paul@437	5	from pyparser.pytokenize import tabsize, whiteSpaceDFA, \
paul@437	6	triple_quoted, endDFAs, single_quoted, pseudoDFA
paul@437	7	from pyparser import consts
paul@437	8
paul@437	9	NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
paul@437	10	NUMCHARS = '0123456789'
paul@437	11	ALNUMCHARS = NAMECHARS + NUMCHARS
paul@437	12	EXTENDED_ALNUMCHARS = ALNUMCHARS + '-.'
paul@437	13	WHITESPACES = ' \t\n\r\v\f'
paul@437	14
paul@437	15	def match_encoding_declaration(comment):
paul@437	16	"""returns the declared encoding or None
paul@437	17
paul@437	18	This function is a replacement for :
paul@437	19	>>> py_encoding = re.compile(r"coding[:=]\s*([-\w.]+)")
paul@437	20	>>> py_encoding.search(comment)
paul@437	21	"""
paul@437	22	index = comment.find('coding')
paul@437	23	if index < 0:
paul@437	24	return None
paul@437	25	next_char = comment[index + 6]
paul@437	26	if next_char not in ':=':
paul@437	27	return None
paul@437	28	end_of_decl = comment[index + 7:]
paul@437	29	index = 0
paul@437	30	for char in end_of_decl:
paul@437	31	if char not in WHITESPACES:
paul@437	32	break
paul@437	33	index += 1
paul@437	34	else:
paul@437	35	return None
paul@437	36	encoding = ''
paul@437	37	for char in end_of_decl[index:]:
paul@437	38	if char in EXTENDED_ALNUMCHARS:
paul@437	39	encoding += char
paul@437	40	else:
paul@437	41	break
paul@437	42	if encoding != '':
paul@437	43	return encoding
paul@437	44	return None
paul@437	45
paul@437	46
paul@437	47	DUMMY_DFA = automata.DFA([], [])
paul@437	48
paul@437	49	def generate_tokens(lines, flags):
paul@437	50	"""
paul@437	51	This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since
paul@437	52	the original function is not RPYTHON (uses yield)
paul@437	53	It was also slightly modified to generate Token instances instead
paul@437	54	of the original 5-tuples -- it's now a 4-tuple of
paul@437	55
paul@437	56	* the Token instance
paul@437	57	* the whole line as a string
paul@437	58	* the line number (the real one, counting continuation lines)
paul@437	59	* the position on the line of the end of the token.
paul@437	60
paul@437	61	Original docstring ::
paul@437	62
paul@437	63	The generate_tokens() generator requires one argment, readline, which
paul@437	64	must be a callable object which provides the same interface as the
paul@437	65	readline() method of built-in file objects. Each call to the function
paul@437	66	should return one line of input as a string.
paul@437	67
paul@437	68	The generator produces 5-tuples with these members: the token type; the
paul@437	69	token string; a 2-tuple (srow, scol) of ints specifying the row and
paul@437	70	column where the token begins in the source; a 2-tuple (erow, ecol) of
paul@437	71	ints specifying the row and column where the token ends in the source;
paul@437	72	and the line on which the token was found. The line passed is the
paul@437	73	logical line; continuation lines are included.
paul@437	74	"""
paul@437	75	token_list = []
paul@437	76	lnum = parenlev = continued = 0
paul@437	77	namechars = NAMECHARS
paul@437	78	numchars = NUMCHARS
paul@437	79	contstr, needcont = '', 0
paul@437	80	contline = None
paul@437	81	indents = [0]
paul@437	82	last_comment = ''
paul@437	83	parenlevstart = (0, 0, "")
paul@437	84
paul@437	85	# make the annotator happy
paul@437	86	endDFA = DUMMY_DFA
paul@437	87	# make the annotator happy
paul@437	88	line = ''
paul@437	89	pos = 0
paul@437	90	lines.append("")
paul@437	91	strstart = (0, 0, "")
paul@437	92	for line in lines:
paul@437	93	lnum = lnum + 1
paul@437	94	line = universal_newline(line)
paul@437	95	pos, max = 0, len(line)
paul@437	96
paul@437	97	if contstr:
paul@437	98	if not line:
paul@437	99	raise TokenError(
paul@437	100	"EOF while scanning triple-quoted string literal",
paul@437	101	strstart[2], strstart[0], strstart[1]+1,
paul@437	102	token_list, lnum-1)
paul@437	103	endmatch = endDFA.recognize(line)
paul@437	104	if endmatch >= 0:
paul@437	105	pos = end = endmatch
paul@439	106	tok = (tokens["STRING"], contstr + line[:end], strstart[0],
paul@437	107	strstart[1], line)
paul@437	108	token_list.append(tok)
paul@437	109	last_comment = ''
paul@437	110	contstr, needcont = '', 0
paul@437	111	contline = None
paul@437	112	elif (needcont and not line.endswith('\\\n') and
paul@437	113	not line.endswith('\\\r\n')):
paul@439	114	tok = (tokens["ERRORTOKEN"], contstr + line, strstart[0],
paul@437	115	strstart[1], line)
paul@437	116	token_list.append(tok)
paul@437	117	last_comment = ''
paul@437	118	contstr = ''
paul@437	119	contline = None
paul@437	120	continue
paul@437	121	else:
paul@437	122	contstr = contstr + line
paul@437	123	contline = contline + line
paul@437	124	continue
paul@437	125
paul@437	126	elif parenlev == 0 and not continued: # new statement
paul@437	127	if not line: break
paul@437	128	column = 0
paul@437	129	while pos < max: # measure leading whitespace
paul@437	130	if line[pos] == ' ': column = column + 1
paul@437	131	elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
paul@437	132	elif line[pos] == '\f': column = 0
paul@437	133	else: break
paul@437	134	pos = pos + 1
paul@437	135	if pos == max: break
paul@437	136
paul@437	137	if line[pos] in '#\r\n':
paul@437	138	# skip comments or blank lines
paul@437	139	continue
paul@437	140
paul@437	141	if column > indents[-1]: # count indents or dedents
paul@437	142	indents.append(column)
paul@439	143	token_list.append((tokens["INDENT"], line[:pos], lnum, 0, line))
paul@437	144	last_comment = ''
paul@437	145	while column < indents[-1]:
paul@437	146	indents = indents[:-1]
paul@439	147	token_list.append((tokens["DEDENT"], '', lnum, pos, line))
paul@437	148	last_comment = ''
paul@437	149	if column != indents[-1]:
paul@437	150	err = "unindent does not match any outer indentation level"
paul@437	151	raise TokenIndentationError(err, line, lnum, 0, token_list)
paul@437	152
paul@437	153	else: # continued statement
paul@437	154	if not line:
paul@437	155	if parenlev > 0:
paul@437	156	lnum1, start1, line1 = parenlevstart
paul@437	157	raise TokenError("parenthesis is never closed", line1,
paul@437	158	lnum1, start1 + 1, token_list, lnum)
paul@437	159	raise TokenError("EOF in multi-line statement", line,
paul@437	160	lnum, 0, token_list)
paul@437	161	continued = 0
paul@437	162
paul@437	163	while pos < max:
paul@437	164	pseudomatch = pseudoDFA.recognize(line, pos)
paul@437	165	if pseudomatch >= 0: # scan for tokens
paul@437	166	# JDR: Modified
paul@437	167	start = whiteSpaceDFA.recognize(line, pos)
paul@437	168	if start < 0:
paul@437	169	start = pos
paul@437	170	end = pseudomatch
paul@437	171
paul@437	172	if start == end:
paul@437	173	raise TokenError("Unknown character", line,
paul@437	174	lnum, start + 1, token_list)
paul@437	175
paul@437	176	pos = end
paul@437	177	token, initial = line[start:end], line[start]
paul@437	178	if initial in numchars or \
paul@437	179	(initial == '.' and token != '.'): # ordinary number
paul@439	180	token_list.append((tokens["NUMBER"], token, lnum, start, line))
paul@437	181	last_comment = ''
paul@437	182	elif initial in '\r\n':
paul@437	183	if parenlev <= 0:
paul@439	184	tok = (tokens["NEWLINE"], last_comment, lnum, start, line)
paul@437	185	token_list.append(tok)
paul@437	186	last_comment = ''
paul@437	187	elif initial == '#':
paul@437	188	# skip comment
paul@437	189	last_comment = token
paul@437	190	elif token in triple_quoted:
paul@437	191	endDFA = endDFAs[token]
paul@437	192	endmatch = endDFA.recognize(line, pos)
paul@437	193	if endmatch >= 0: # all on one line
paul@437	194	pos = endmatch
paul@437	195	token = line[start:pos]
paul@439	196	tok = (tokens["STRING"], token, lnum, start, line)
paul@437	197	token_list.append(tok)
paul@437	198	last_comment = ''
paul@437	199	else:
paul@437	200	strstart = (lnum, start, line)
paul@437	201	contstr = line[start:]
paul@437	202	contline = line
paul@437	203	break
paul@437	204	elif initial in single_quoted or \
paul@437	205	token[:2] in single_quoted or \
paul@437	206	token[:3] in single_quoted:
paul@437	207	if token[-1] == '\n': # continued string
paul@437	208	strstart = (lnum, start, line)
paul@437	209	endDFA = (endDFAs[initial] or endDFAs[token[1]] or
paul@437	210	endDFAs[token[2]])
paul@437	211	contstr, needcont = line[start:], 1
paul@437	212	contline = line
paul@437	213	break
paul@437	214	else: # ordinary string
paul@439	215	tok = (tokens["STRING"], token, lnum, start, line)
paul@437	216	token_list.append(tok)
paul@437	217	last_comment = ''
paul@437	218	elif initial in namechars: # ordinary name
paul@439	219	token_list.append((tokens["NAME"], token, lnum, start, line))
paul@437	220	last_comment = ''
paul@437	221	elif initial == '\\': # continued stmt
paul@437	222	continued = 1
paul@437	223	else:
paul@437	224	if initial in '([{':
paul@437	225	if parenlev == 0:
paul@437	226	parenlevstart = (lnum, start, line)
paul@437	227	parenlev = parenlev + 1
paul@437	228	elif initial in ')]}':
paul@437	229	parenlev = parenlev - 1
paul@437	230	if parenlev < 0:
paul@437	231	raise TokenError("unmatched '%s'" % initial, line,
paul@437	232	lnum, start + 1, token_list)
paul@437	233	if token in python_opmap:
paul@437	234	punct = python_opmap[token]
paul@437	235	else:
paul@439	236	punct = tokens["OP"]
paul@437	237	token_list.append((punct, token, lnum, start, line))
paul@437	238	last_comment = ''
paul@437	239	else:
paul@437	240	start = whiteSpaceDFA.recognize(line, pos)
paul@437	241	if start < 0:
paul@437	242	start = pos
paul@437	243	if start<max and line[start] in single_quoted:
paul@437	244	raise TokenError("EOL while scanning string literal",
paul@437	245	line, lnum, start+1, token_list)
paul@439	246	tok = (tokens["ERRORTOKEN"], line[pos], lnum, pos, line)
paul@437	247	token_list.append(tok)
paul@437	248	last_comment = ''
paul@437	249	pos = pos + 1
paul@437	250
paul@437	251	lnum -= 1
paul@437	252	if not (flags & consts.PyCF_DONT_IMPLY_DEDENT):
paul@439	253	if token_list and token_list[-1][0] != tokens["NEWLINE"]:
paul@439	254	tok = (tokens["NEWLINE"], '', lnum, 0, '\n')
paul@437	255	token_list.append(tok)
paul@437	256	for indent in indents[1:]: # pop remaining indent levels
paul@439	257	token_list.append((tokens["DEDENT"], '', lnum, pos, line))
paul@439	258	tok = (tokens["NEWLINE"], '', lnum, 0, '\n')
paul@437	259	token_list.append(tok)
paul@437	260
paul@439	261	token_list.append((tokens["ENDMARKER"], '', lnum, pos, line))
paul@437	262	return token_list
paul@437	263
paul@437	264
paul@437	265	def universal_newline(line):
paul@437	266	# show annotator that indexes below are non-negative
paul@437	267	line_len_m2 = len(line) - 2
paul@437	268	if line_len_m2 >= 0 and line[-2] == '\r' and line[-1] == '\n':
paul@437	269	return line[:line_len_m2] + '\n'
paul@437	270	line_len_m1 = len(line) - 1
paul@437	271	if line_len_m1 >= 0 and line[-1] == '\r':
paul@437	272	return line[:line_len_m1] + '\n'
paul@437	273	return line

Lichen

Annotated pyparser/pytokenizer.py