1 # 2 # Secret Labs' Regular Expression Engine 3 # 4 # various symbols used by the regular expression engine. 5 # 6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 7 # 8 # See the sre.py file for information on usage and redistribution. 9 # 10 11 """Internal support module for sre""" 12 13 # update when constants are added or removed 14 15 MAGIC = 20031017 16 17 # max code word in this release 18 19 MAXREPEAT = 65535 20 21 # SRE standard exception (access as sre.error) 22 # should this really be here? 23 24 class error(Exception): 25 pass 26 27 # operators 28 29 FAILURE = "failure" 30 SUCCESS = "success" 31 32 ANY = "any" 33 ANY_ALL = "any_all" 34 ASSERT = "assert" 35 ASSERT_NOT = "assert_not" 36 AT = "at" 37 BIGCHARSET = "bigcharset" 38 BRANCH = "branch" 39 CALL = "call" 40 CATEGORY = "category" 41 CHARSET = "charset" 42 GROUPREF = "groupref" 43 GROUPREF_IGNORE = "groupref_ignore" 44 GROUPREF_EXISTS = "groupref_exists" 45 IN = "in" 46 IN_IGNORE = "in_ignore" 47 INFO = "info" 48 JUMP = "jump" 49 LITERAL = "literal" 50 LITERAL_IGNORE = "literal_ignore" 51 MARK = "mark" 52 MAX_REPEAT = "max_repeat" 53 MAX_UNTIL = "max_until" 54 MIN_REPEAT = "min_repeat" 55 MIN_UNTIL = "min_until" 56 NEGATE = "negate" 57 NOT_LITERAL = "not_literal" 58 NOT_LITERAL_IGNORE = "not_literal_ignore" 59 RANGE = "range" 60 REPEAT = "repeat" 61 REPEAT_ONE = "repeat_one" 62 SUBPATTERN = "subpattern" 63 MIN_REPEAT_ONE = "min_repeat_one" 64 65 # positions 66 AT_BEGINNING = "at_beginning" 67 AT_BEGINNING_LINE = "at_beginning_line" 68 AT_BEGINNING_STRING = "at_beginning_string" 69 AT_BOUNDARY = "at_boundary" 70 AT_NON_BOUNDARY = "at_non_boundary" 71 AT_END = "at_end" 72 AT_END_LINE = "at_end_line" 73 AT_END_STRING = "at_end_string" 74 AT_LOC_BOUNDARY = "at_loc_boundary" 75 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" 76 AT_UNI_BOUNDARY = "at_uni_boundary" 77 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" 78 79 # categories 80 CATEGORY_DIGIT = "category_digit" 81 CATEGORY_NOT_DIGIT = "category_not_digit" 82 CATEGORY_SPACE = "category_space" 83 CATEGORY_NOT_SPACE = "category_not_space" 84 CATEGORY_WORD = "category_word" 85 CATEGORY_NOT_WORD = "category_not_word" 86 CATEGORY_LINEBREAK = "category_linebreak" 87 CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 88 CATEGORY_LOC_WORD = "category_loc_word" 89 CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 90 CATEGORY_UNI_DIGIT = "category_uni_digit" 91 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 92 CATEGORY_UNI_SPACE = "category_uni_space" 93 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 94 CATEGORY_UNI_WORD = "category_uni_word" 95 CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 96 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 97 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 98 99 OPCODES = [ 100 101 # failure=0 success=1 (just because it looks better that way :-) 102 FAILURE, SUCCESS, 103 104 ANY, ANY_ALL, 105 ASSERT, ASSERT_NOT, 106 AT, 107 BRANCH, 108 CALL, 109 CATEGORY, 110 CHARSET, BIGCHARSET, 111 GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, 112 IN, IN_IGNORE, 113 INFO, 114 JUMP, 115 LITERAL, LITERAL_IGNORE, 116 MARK, 117 MAX_UNTIL, 118 MIN_UNTIL, 119 NOT_LITERAL, NOT_LITERAL_IGNORE, 120 NEGATE, 121 RANGE, 122 REPEAT, 123 REPEAT_ONE, 124 SUBPATTERN, 125 MIN_REPEAT_ONE 126 127 ] 128 129 ATCODES = [ 130 AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, 131 AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, 132 AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, 133 AT_UNI_NON_BOUNDARY 134 ] 135 136 CHCODES = [ 137 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 138 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 139 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 140 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 141 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 142 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 143 CATEGORY_UNI_NOT_LINEBREAK 144 ] 145 146 def makedict(list): 147 d = {} 148 i = 0 149 for item in list: 150 d[item] = i 151 i = i + 1 152 return d 153 154 OPCODES = makedict(OPCODES) 155 ATCODES = makedict(ATCODES) 156 CHCODES = makedict(CHCODES) 157 158 # replacement operations for "ignore case" mode 159 OP_IGNORE = { 160 GROUPREF: GROUPREF_IGNORE, 161 IN: IN_IGNORE, 162 LITERAL: LITERAL_IGNORE, 163 NOT_LITERAL: NOT_LITERAL_IGNORE 164 } 165 166 AT_MULTILINE = { 167 AT_BEGINNING: AT_BEGINNING_LINE, 168 AT_END: AT_END_LINE 169 } 170 171 AT_LOCALE = { 172 AT_BOUNDARY: AT_LOC_BOUNDARY, 173 AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY 174 } 175 176 AT_UNICODE = { 177 AT_BOUNDARY: AT_UNI_BOUNDARY, 178 AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY 179 } 180 181 CH_LOCALE = { 182 CATEGORY_DIGIT: CATEGORY_DIGIT, 183 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 184 CATEGORY_SPACE: CATEGORY_SPACE, 185 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 186 CATEGORY_WORD: CATEGORY_LOC_WORD, 187 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 188 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 189 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 190 } 191 192 CH_UNICODE = { 193 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 194 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 195 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 196 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 197 CATEGORY_WORD: CATEGORY_UNI_WORD, 198 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 199 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 200 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 201 } 202 203 # flags 204 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 205 SRE_FLAG_IGNORECASE = 2 # case insensitive 206 SRE_FLAG_LOCALE = 4 # honour system locale 207 SRE_FLAG_MULTILINE = 8 # treat target as multiline string 208 SRE_FLAG_DOTALL = 16 # treat target as a single string 209 SRE_FLAG_UNICODE = 32 # use unicode locale 210 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 211 SRE_FLAG_DEBUG = 128 # debugging 212 213 # flags for INFO primitive 214 SRE_INFO_PREFIX = 1 # has prefix 215 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 216 SRE_INFO_CHARSET = 4 # pattern starts with character from given set