Lichen

pyparser/genpytokenize.py

932:c07b0dd14f85
6 months ago Paul Boddie Moved integer instantiation support to library functions.
     1 #! /usr/bin/env python     2 """Module genPytokenize     3      4 Generates finite state automata for recognizing Python tokens.  These are hand     5 coded versions of the regular expressions originally appearing in Ping's     6 tokenize module in the Python standard library.     7      8 When run from the command line, this should pretty print the DFA machinery.     9     10 $Id: genPytokenize.py,v 1.1 2003/10/02 17:37:17 jriehl Exp $    11 """    12     13 from pyparser.pylexer import *    14 from pyparser.automata import NonGreedyDFA, DFA, DEFAULT    15     16 def makePyPseudoDFA ():    17     import string    18     states = []    19     def makeEOL():    20         return group(states,    21                      newArcPair(states, "\n"),    22                      chain(states,    23                            newArcPair(states, "\r"),    24                            maybe(states, newArcPair(states, "\n"))))    25     # ____________________________________________________________    26     def makeLineCont ():    27         return chain(states,    28                      newArcPair(states, "\\"),    29                      makeEOL())    30     # ____________________________________________________________    31     # Ignore stuff    32     def makeWhitespace ():    33         return any(states, groupStr(states, " \f\t"))    34     # ____________________________________________________________    35     def makeComment ():    36         return chain(states,    37                      newArcPair(states, "#"),    38                      any(states, notGroupStr(states, "\r\n")))    39     # ____________________________________________________________    40     #ignore = chain(states,    41     #               makeWhitespace(),    42     #               any(states, chain(states,    43     #                                 makeLineCont(),    44     #                                 makeWhitespace())),    45     #               maybe(states, makeComment()))    46     # ____________________________________________________________    47     # Names    48     name = chain(states,    49                  groupStr(states, string.letters + "_"),    50                  any(states, groupStr(states,    51                                       string.letters + string.digits + "_")))    52     # ____________________________________________________________    53     # Digits    54     def makeDigits ():    55         return groupStr(states, "0123456789")    56     # ____________________________________________________________    57     # Integer numbers    58     hexNumber = chain(states,    59                       newArcPair(states, "0"),    60                       groupStr(states, "xX"),    61                       atleastonce(states,    62                                   groupStr(states, "0123456789abcdefABCDEF")),    63                       maybe(states, groupStr(states, "lL")))    64     octNumber = chain(states,    65                       newArcPair(states, "0"),    66                       maybe(states,    67                             chain(states,    68                                   groupStr(states, "oO"),    69                                   groupStr(states, "01234567"))),    70                       any(states, groupStr(states, "01234567")),    71                       maybe(states, groupStr(states, "lL")))    72     binNumber = chain(states,    73                       newArcPair(states, "0"),    74                       groupStr(states, "bB"),    75                       atleastonce(states, groupStr(states, "01")),    76                       maybe(states, groupStr(states, "lL")))    77     decNumber = chain(states,    78                       groupStr(states, "123456789"),    79                       any(states, makeDigits()),    80                       maybe(states, groupStr(states, "lL")))    81     intNumber = group(states, hexNumber, octNumber, binNumber, decNumber)    82     # ____________________________________________________________    83     # Exponents    84     def makeExp ():    85         return chain(states,    86                      groupStr(states, "eE"),    87                      maybe(states, groupStr(states, "+-")),    88                      atleastonce(states, makeDigits()))    89     # ____________________________________________________________    90     # Floating point numbers    91     def makeFloat ():    92         pointFloat = chain(states,    93                            group(states,    94                                  chain(states,    95                                        atleastonce(states, makeDigits()),    96                                        newArcPair(states, "."),    97                                        any(states, makeDigits())),    98                                  chain(states,    99                                        newArcPair(states, "."),   100                                        atleastonce(states, makeDigits()))),   101                            maybe(states, makeExp()))   102         expFloat = chain(states,   103                          atleastonce(states, makeDigits()),   104                          makeExp())   105         return group(states, pointFloat, expFloat)   106     # ____________________________________________________________   107     # Imaginary numbers   108     imagNumber = group(states,   109                        chain(states,   110                              atleastonce(states, makeDigits()),   111                              groupStr(states, "jJ")),   112                        chain(states,   113                              makeFloat(),   114                              groupStr(states, "jJ")))   115     # ____________________________________________________________   116     # Any old number.   117     number = group(states, imagNumber, makeFloat(), intNumber)   118     # ____________________________________________________________   119     # Funny   120     operator = group(states,   121                      chain(states,   122                            chainStr(states, "**"),   123                            maybe(states, newArcPair(states, "="))),   124                      chain(states,   125                            chainStr(states, ">>"),   126                            maybe(states, newArcPair(states, "="))),   127                      chain(states,   128                            chainStr(states, "<<"),   129                            maybe(states, newArcPair(states, "="))),   130                      chainStr(states, "<>"),   131                      chainStr(states, "!="),   132                      chain(states,   133                            chainStr(states, "//"),   134                            maybe(states, newArcPair(states, "="))),   135                      chain(states,   136                            groupStr(states, "+-*/%&|^=<>"),   137                            maybe(states, newArcPair(states, "="))),   138                      newArcPair(states, "~"))   139     bracket = groupStr(states, "[](){}")   140     special = group(states,   141                     makeEOL(),   142                     groupStr(states, "@:;.,`"))   143     funny = group(states, operator, bracket, special)   144     # ____________________________________________________________   145     def makeStrPrefix ():   146         return chain(states,   147                      maybe(states, groupStr(states, "uUbB")),   148                      maybe(states, groupStr(states, "rR")))   149     # ____________________________________________________________   150     contStr = group(states,   151                     chain(states,   152                           makeStrPrefix(),   153                           newArcPair(states, "'"),   154                           any(states,   155                               notGroupStr(states, "\r\n'\\")),   156                           any(states,   157                               chain(states,   158                                     newArcPair(states, "\\"),   159                                     newArcPair(states, DEFAULT),   160                                     any(states,   161                                         notGroupStr(states, "\r\n'\\")))),   162                           group(states,   163                                 newArcPair(states, "'"),   164                                 makeLineCont())),   165                     chain(states,   166                           makeStrPrefix(),   167                           newArcPair(states, '"'),   168                           any(states,   169                               notGroupStr(states, '\r\n"\\')),   170                           any(states,   171                               chain(states,   172                                     newArcPair(states, "\\"),   173                                     newArcPair(states, DEFAULT),   174                                     any(states,   175                                         notGroupStr(states, '\r\n"\\')))),   176                           group(states,   177                                 newArcPair(states, '"'),   178                                 makeLineCont())))   179     triple = chain(states,   180                    makeStrPrefix(),   181                    group(states,   182                          chainStr(states, "'''"),   183                          chainStr(states, '"""')))   184     pseudoExtras = group(states,   185                          makeLineCont(),   186                          makeComment(),   187                          triple)   188     pseudoToken = chain(states,   189                         makeWhitespace(),   190                         group(states,   191                               newArcPair(states, EMPTY),   192                               pseudoExtras, number, funny, contStr, name))   193     dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken)   194     return DFA(dfaStates, dfaAccepts), dfaStates   195    196 # ______________________________________________________________________   197    198 def makePyEndDFAMap ():   199     states = []   200     single = chain(states,   201                    any(states, notGroupStr(states, "'\\")),   202                    any(states,   203                        chain(states,   204                              newArcPair(states, "\\"),   205                              newArcPair(states, DEFAULT),   206                              any(states, notGroupStr(states, "'\\")))),   207                    newArcPair(states, "'"))   208     states, accepts = nfaToDfa(states, *single)   209     singleDFA = DFA(states, accepts)   210     states_singleDFA = states   211     states = []   212     double = chain(states,   213                    any(states, notGroupStr(states, '"\\')),   214                    any(states,   215                        chain(states,   216                              newArcPair(states, "\\"),   217                              newArcPair(states, DEFAULT),   218                              any(states, notGroupStr(states, '"\\')))),   219                    newArcPair(states, '"'))   220     states, accepts = nfaToDfa(states, *double)   221     doubleDFA = DFA(states, accepts)   222     states_doubleDFA = states   223     states = []   224     single3 = chain(states,   225                     any(states, notGroupStr(states, "'\\")),   226                     any(states,   227                         chain(states,   228                               group(states,   229                                     chain(states,   230                                           newArcPair(states, "\\"),   231                                           newArcPair(states, DEFAULT)),   232                                     chain(states,   233                                           newArcPair(states, "'"),   234                                           notChainStr(states, "''"))),   235                               any(states, notGroupStr(states, "'\\")))),   236                     chainStr(states, "'''"))   237     states, accepts = nfaToDfa(states, *single3)   238     single3DFA = NonGreedyDFA(states, accepts)   239     states_single3DFA = states   240     states = []   241     double3 = chain(states,   242                     any(states, notGroupStr(states, '"\\')),   243                     any(states,   244                         chain(states,   245                               group(states,   246                                     chain(states,   247                                           newArcPair(states, "\\"),   248                                           newArcPair(states, DEFAULT)),   249                                     chain(states,   250                                           newArcPair(states, '"'),   251                                           notChainStr(states, '""'))),   252                               any(states, notGroupStr(states, '"\\')))),   253                     chainStr(states, '"""'))   254     states, accepts = nfaToDfa(states, *double3)   255     double3DFA = NonGreedyDFA(states, accepts)   256     states_double3DFA = states   257     map = {"'" : (singleDFA, states_singleDFA),   258            '"' : (doubleDFA, states_doubleDFA),   259            "r" : None,   260            "R" : None,   261            "u" : None,   262            "U" : None,   263            "b" : None,   264            "B" : None}   265     for uniPrefix in ("", "u", "U", "b", "B", ):   266         for rawPrefix in ("", "r", "R"):   267             prefix = uniPrefix + rawPrefix   268             map[prefix + "'''"] = (single3DFA, states_single3DFA)   269             map[prefix + '"""'] = (double3DFA, states_double3DFA)   270     return map   271    272 # ______________________________________________________________________   273    274 def output(name, dfa_class, dfa, states):   275     import textwrap   276     lines = []   277     i = 0   278     for line in textwrap.wrap(repr(dfa.accepts), width = 50):   279         if i == 0:   280             lines.append("accepts = ")   281         else:   282             lines.append("           ")   283         lines.append(line)   284         lines.append("\n")   285         i += 1   286     import StringIO   287     lines.append("states = [\n")   288     for numstate, state in enumerate(states):   289         lines.append("    # ")   290         lines.append(str(numstate))   291         lines.append('\n')   292         s = StringIO.StringIO()   293         i = 0   294         for k, v in sorted(state.items()):   295             i += 1   296             if k == DEFAULT:   297                 k = "automata.DEFAULT"   298             else:   299                 k = repr(k)   300             s.write(k)   301             s.write('::')   302             s.write(repr(v))   303             if i < len(state):   304                 s.write(', ')   305         s.write('},')   306         i = 0   307         if len(state) <= 4:   308             text = [s.getvalue()]   309         else:   310             text = textwrap.wrap(s.getvalue(), width=36)   311         for line in text:   312             line = line.replace('::', ': ')   313             if i == 0:   314                 lines.append('    {')   315             else:   316                 lines.append('     ')   317             lines.append(line)   318             lines.append('\n')   319             i += 1   320     lines.append("    ]\n")   321     lines.append("%s = automata.%s(states, accepts)\n" % (name, dfa_class))   322     return ''.join(lines)   323    324 def main ():   325     pseudoDFA, states_pseudoDFA = makePyPseudoDFA()   326     print output("pseudoDFA", "DFA", pseudoDFA, states_pseudoDFA)   327     endDFAMap = makePyEndDFAMap()   328     dfa, states = endDFAMap['"""']   329     print output("double3DFA", "NonGreedyDFA", dfa, states)   330     dfa, states = endDFAMap["'''"]   331     print output("single3DFA", "NonGreedyDFA", dfa, states)   332     dfa, states = endDFAMap["'"]   333     print output("singleDFA", "DFA", dfa, states)   334     dfa, states = endDFAMap["\""]   335     print output("doubleDFA", "DFA", dfa, states)   336    337 # ______________________________________________________________________   338    339 if __name__ == "__main__":   340     main()