Lichen

lib/sre_constants.py

301:8e28983c45a7
2016-12-02 Paul Boddie Changed the hashtable/mapping representation to have a configurable number of buckets and for the number to be set initially depending on the number of items provided.
     1 #     2 # Secret Labs' Regular Expression Engine     3 #     4 # various symbols used by the regular expression engine.     5 #     6 # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.     7 #     8 # See the sre.py file for information on usage and redistribution.     9 #    10     11 """Internal support module for sre"""    12     13 # update when constants are added or removed    14     15 MAGIC = 20031017    16     17 # max code word in this release    18     19 MAXREPEAT = 65535    20     21 # SRE standard exception (access as sre.error)    22 # should this really be here?    23     24 class error(Exception):    25     pass    26     27 # operators    28     29 FAILURE = "failure"    30 SUCCESS = "success"    31     32 ANY = "any"    33 ANY_ALL = "any_all"    34 ASSERT = "assert"    35 ASSERT_NOT = "assert_not"    36 AT = "at"    37 BIGCHARSET = "bigcharset"    38 BRANCH = "branch"    39 CALL = "call"    40 CATEGORY = "category"    41 CHARSET = "charset"    42 GROUPREF = "groupref"    43 GROUPREF_IGNORE = "groupref_ignore"    44 GROUPREF_EXISTS = "groupref_exists"    45 IN = "in"    46 IN_IGNORE = "in_ignore"    47 INFO = "info"    48 JUMP = "jump"    49 LITERAL = "literal"    50 LITERAL_IGNORE = "literal_ignore"    51 MARK = "mark"    52 MAX_REPEAT = "max_repeat"    53 MAX_UNTIL = "max_until"    54 MIN_REPEAT = "min_repeat"    55 MIN_UNTIL = "min_until"    56 NEGATE = "negate"    57 NOT_LITERAL = "not_literal"    58 NOT_LITERAL_IGNORE = "not_literal_ignore"    59 RANGE = "range"    60 REPEAT = "repeat"    61 REPEAT_ONE = "repeat_one"    62 SUBPATTERN = "subpattern"    63 MIN_REPEAT_ONE = "min_repeat_one"    64     65 # positions    66 AT_BEGINNING = "at_beginning"    67 AT_BEGINNING_LINE = "at_beginning_line"    68 AT_BEGINNING_STRING = "at_beginning_string"    69 AT_BOUNDARY = "at_boundary"    70 AT_NON_BOUNDARY = "at_non_boundary"    71 AT_END = "at_end"    72 AT_END_LINE = "at_end_line"    73 AT_END_STRING = "at_end_string"    74 AT_LOC_BOUNDARY = "at_loc_boundary"    75 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"    76 AT_UNI_BOUNDARY = "at_uni_boundary"    77 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"    78     79 # categories    80 CATEGORY_DIGIT = "category_digit"    81 CATEGORY_NOT_DIGIT = "category_not_digit"    82 CATEGORY_SPACE = "category_space"    83 CATEGORY_NOT_SPACE = "category_not_space"    84 CATEGORY_WORD = "category_word"    85 CATEGORY_NOT_WORD = "category_not_word"    86 CATEGORY_LINEBREAK = "category_linebreak"    87 CATEGORY_NOT_LINEBREAK = "category_not_linebreak"    88 CATEGORY_LOC_WORD = "category_loc_word"    89 CATEGORY_LOC_NOT_WORD = "category_loc_not_word"    90 CATEGORY_UNI_DIGIT = "category_uni_digit"    91 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"    92 CATEGORY_UNI_SPACE = "category_uni_space"    93 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"    94 CATEGORY_UNI_WORD = "category_uni_word"    95 CATEGORY_UNI_NOT_WORD = "category_uni_not_word"    96 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"    97 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"    98     99 OPCODES = [   100    101     # failure=0 success=1 (just because it looks better that way :-)   102     FAILURE, SUCCESS,   103    104     ANY, ANY_ALL,   105     ASSERT, ASSERT_NOT,   106     AT,   107     BRANCH,   108     CALL,   109     CATEGORY,   110     CHARSET, BIGCHARSET,   111     GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,   112     IN, IN_IGNORE,   113     INFO,   114     JUMP,   115     LITERAL, LITERAL_IGNORE,   116     MARK,   117     MAX_UNTIL,   118     MIN_UNTIL,   119     NOT_LITERAL, NOT_LITERAL_IGNORE,   120     NEGATE,   121     RANGE,   122     REPEAT,   123     REPEAT_ONE,   124     SUBPATTERN,   125     MIN_REPEAT_ONE   126    127 ]   128    129 ATCODES = [   130     AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,   131     AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,   132     AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,   133     AT_UNI_NON_BOUNDARY   134 ]   135    136 CHCODES = [   137     CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,   138     CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,   139     CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,   140     CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,   141     CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,   142     CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,   143     CATEGORY_UNI_NOT_LINEBREAK   144 ]   145    146 def makedict(list):   147     d = {}   148     i = 0   149     for item in list:   150         d[item] = i   151         i = i + 1   152     return d   153    154 OPCODES = makedict(OPCODES)   155 ATCODES = makedict(ATCODES)   156 CHCODES = makedict(CHCODES)   157    158 # replacement operations for "ignore case" mode   159 OP_IGNORE = {   160     GROUPREF: GROUPREF_IGNORE,   161     IN: IN_IGNORE,   162     LITERAL: LITERAL_IGNORE,   163     NOT_LITERAL: NOT_LITERAL_IGNORE   164 }   165    166 AT_MULTILINE = {   167     AT_BEGINNING: AT_BEGINNING_LINE,   168     AT_END: AT_END_LINE   169 }   170    171 AT_LOCALE = {   172     AT_BOUNDARY: AT_LOC_BOUNDARY,   173     AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY   174 }   175    176 AT_UNICODE = {   177     AT_BOUNDARY: AT_UNI_BOUNDARY,   178     AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY   179 }   180    181 CH_LOCALE = {   182     CATEGORY_DIGIT: CATEGORY_DIGIT,   183     CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,   184     CATEGORY_SPACE: CATEGORY_SPACE,   185     CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,   186     CATEGORY_WORD: CATEGORY_LOC_WORD,   187     CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,   188     CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,   189     CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK   190 }   191    192 CH_UNICODE = {   193     CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,   194     CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,   195     CATEGORY_SPACE: CATEGORY_UNI_SPACE,   196     CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,   197     CATEGORY_WORD: CATEGORY_UNI_WORD,   198     CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,   199     CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,   200     CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK   201 }   202    203 # flags   204 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)   205 SRE_FLAG_IGNORECASE = 2 # case insensitive   206 SRE_FLAG_LOCALE = 4 # honour system locale   207 SRE_FLAG_MULTILINE = 8 # treat target as multiline string   208 SRE_FLAG_DOTALL = 16 # treat target as a single string   209 SRE_FLAG_UNICODE = 32 # use unicode locale   210 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments   211 SRE_FLAG_DEBUG = 128 # debugging   212    213 # flags for INFO primitive   214 SRE_INFO_PREFIX = 1 # has prefix   215 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)   216 SRE_INFO_CHARSET = 4 # pattern starts with character from given set