1 #!/usr/bin/env python 2 3 """ 4 Search Python abstract syntax trees for nodes of a particular type having a 5 particular textual value. 6 7 Copyright (C) 2008 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 import compiler 24 import os 25 import linecache 26 import types 27 28 __version__ = "0.1.1" 29 30 # Excluded AST nodes and their names. 31 32 excluded_term_types = ["Module", "Stmt"] 33 excluded_term_cls = tuple([getattr(compiler.ast, name) for name in excluded_term_types]) 34 35 # Search functions. 36 37 def search_recursive(directory, term_type, op=None): 38 39 """ 40 Search files within the filesystem below 'directory' for terms having the 41 given 'term_type', using 'op' (if specified) to match a search term. 42 """ 43 44 results = [] 45 for path, directories, filenames in os.walk(directory): 46 for filename in filenames: 47 if os.path.splitext(filename)[-1] == os.path.extsep + "py": 48 results += search_file(os.path.join(path, filename), term_type, op) 49 return results 50 51 def search_file(filename, term_type, op=None): 52 53 """ 54 Search the file with the given 'filename' for terms having the given 55 'term_type', using 'op' (if specified) to match a search term. If 56 'term_type' is given as "*", attempt to match any term type. 57 """ 58 59 try: 60 node = compiler.parseFile(filename) 61 except SyntaxError: 62 return [] 63 64 if term_type != "*": 65 cls = getattr(compiler.ast, term_type) 66 else: 67 cls = None 68 69 return search_tree(node, cls, op, filename) 70 71 def search_tree(node, cls, op=None, filename=None): 72 73 """ 74 Search the tree rooted at the given 'node' for nodes of the given class 75 'cls', using 'op' (if specified) to match a search term. If 'cls' is None, 76 all node types will be considered for matches. 77 78 Return a list of results of the form (node, value, filename). 79 """ 80 81 results = [] 82 83 # Ignore excluded nodes. 84 85 if isinstance(node, excluded_term_cls): 86 pass 87 88 # Test permitted nodes. 89 90 elif cls is None or isinstance(node, cls): 91 if op is None: 92 results.append((node, None, filename)) 93 else: 94 found_str = 0 95 96 for child in node.getChildren(): 97 98 # Test literals. 99 100 if isinstance(child, (int, float, long, bool)): 101 if op(str(child)): 102 results.append((node, child, filename)) 103 104 # Only check a single string child value since subsequent 105 # values are typically docstrings. 106 107 elif not found_str and isinstance(child, str): 108 found_str = 1 109 if op(child): 110 results.append((node, child, filename)) 111 112 # Argument lists, globals and imports. 113 114 elif isinstance(child, list): 115 results += search_list(child, node, op, filename) 116 117 # Search within nodes, even if matches have already been found. 118 119 for child in node.getChildNodes(): 120 results += search_tree(child, cls, op, filename) 121 122 return results 123 124 def search_list(values, node, op=None, filename=None): 125 126 """ 127 Search the given 'values' from the given 'node', using 'op' (if specified) 128 to match a search term. 129 130 Return a list of results of the form (node, value, filename). 131 """ 132 133 results = [] 134 135 for value in values: 136 137 # Test strings. 138 139 if isinstance(value, str) and op(str(value)): 140 results.append((node, value, filename)) 141 142 # Test import tuples. 143 144 elif isinstance(value, tuple): 145 for subvalue in value: 146 if isinstance(value, str) and op(str(subvalue)): 147 results.append((node, subvalue, filename)) 148 149 return results 150 151 def expand_results(results): 152 153 """ 154 Expand the given 'results', making a list containing tuples of the form 155 (node, filename, line number, line, value). 156 """ 157 158 expanded = [] 159 160 for node, value, filename in results: 161 lineno = node.lineno 162 163 if filename is not None and lineno is not None: 164 line = linecache.getline(filename, lineno).rstrip() 165 else: 166 line = None 167 168 expanded.append((node, filename, lineno, line, value)) 169 170 return expanded 171 172 def get_term_types(): 173 174 "Return the term types supported by the module." 175 176 term_types = [] 177 178 for name in dir(compiler.ast): 179 if name in excluded_term_types: 180 continue 181 182 obj = getattr(compiler.ast, name) 183 184 if isinstance(obj, types.ClassType) and \ 185 issubclass(obj, compiler.ast.Node) and \ 186 name[0].isupper(): 187 188 term_types.append(name) 189 190 return term_types 191 192 # Command syntax. 193 194 syntax_description = """ 195 [ -n | --line-number ] 196 [ -p | --print-token ] 197 [ ( -t TERM_TYPE ) | ( --type=TERM_TYPE ) ] 198 [ ( -e PATTERN ) | ( --regexp=PATTERN ) ] 199 [ -r | -R | --recursive ] ( FILENAME ... ) 200 """ 201 202 # Main program. 203 204 def run_command(): 205 206 "The functionality of the main program." 207 208 import sys 209 import cmdsyntax 210 import re 211 import textwrap 212 213 # Match command arguments. 214 215 syntax = cmdsyntax.Syntax(syntax_description) 216 syntax_matches = syntax.get_args(sys.argv[1:]) 217 show_syntax = 0 218 219 try: 220 args = syntax_matches[0] 221 except IndexError: 222 show_syntax = 1 223 224 if show_syntax: 225 print "Syntax:" 226 print syntax_description 227 print "Term types:" 228 print "\n".join(textwrap.wrap(", ".join(get_term_types()))) 229 sys.exit(1) 230 231 # Get the search details. 232 233 term_type = args.get("TERM_TYPE", "*") 234 term = args.get("PATTERN") 235 recursive = args.has_key("r") or args.has_key("R") or args.has_key("recursive") 236 237 if term is None: 238 op = None 239 else: 240 op = re.compile(term).search 241 242 # Perform the search in files and directory hierarchies. 243 244 results = [] 245 246 for filename in args["FILENAME"]: 247 if os.path.isfile(filename): 248 results += search_file(filename, term_type, op) 249 elif recursive and os.path.isdir(filename): 250 results += search_recursive(filename, term_type, op) 251 252 # Present the results. 253 254 for node, filename, lineno, line, value in expand_results(results): 255 format = "%s:" 256 output = [filename] 257 258 # Handle line numbers and missing details. 259 260 if args.has_key("n") or args.has_key("line-number"): 261 if lineno is not None: 262 format += "%d:" 263 output.append(lineno) 264 265 # Show matching tokens, if requested. 266 267 if args.has_key("p"): 268 if value is not None: 269 format += "%r:" 270 output.append(value) 271 else: 272 format += "%s:" 273 output.append("<%s>" % (term_type or "*")) 274 275 # Show lines, if defined. 276 277 if line is not None: 278 format += " %s" 279 output.append(line) 280 281 print format % tuple(output) 282 283 if __name__ == "__main__": 284 run_command() 285 286 # vim: tabstop=4 expandtab shiftwidth=4