3.1 --- a/astgrep.py Mon Oct 27 22:49:46 2008 +0100
3.2 +++ b/astgrep.py Wed Oct 29 02:17:19 2008 +0100
3.3 @@ -23,8 +23,16 @@
3.4 import compiler
3.5 import os
3.6 import linecache
3.7 +import types
3.8
3.9 -__version__ = "0.1"
3.10 +__version__ = "0.1.1"
3.11 +
3.12 +# Excluded AST nodes and their names.
3.13 +
3.14 +excluded_term_types = ["Module", "Stmt"]
3.15 +excluded_term_cls = tuple([getattr(compiler.ast, name) for name in excluded_term_types])
3.16 +
3.17 +# Search functions.
3.18
3.19 def search_recursive(directory, term_type, term, op=None):
3.20
3.21 @@ -44,32 +52,58 @@
3.22
3.23 """
3.24 Search the file with the given 'filename' for terms having the given
3.25 - 'term_type' whose value matches the specified 'term'.
3.26 + 'term_type' whose value matches the specified 'term'. If 'term_type' is
3.27 + given as "*", attempt to match any term type.
3.28 """
3.29
3.30 - node = compiler.parseFile(filename)
3.31 - cls = getattr(compiler.ast, term_type)
3.32 + try:
3.33 + node = compiler.parseFile(filename)
3.34 + except SyntaxError:
3.35 + return []
3.36 +
3.37 + if term_type != "*":
3.38 + cls = getattr(compiler.ast, term_type)
3.39 + else:
3.40 + cls = None
3.41 +
3.42 return search_tree(node, cls, term, op, filename)
3.43
3.44 def search_tree(node, cls, term, op=None, filename=None):
3.45
3.46 """
3.47 Search the tree rooted at the given 'node' for nodes of the given class
3.48 - 'cls' for content matching the specified 'term'.
3.49 + 'cls' for content matching the specified 'term'. If 'cls' is None, all node
3.50 + types will be considered for matches.
3.51
3.52 Return a list of results of the form (node, value, filename).
3.53 """
3.54
3.55 results = []
3.56
3.57 - if isinstance(node, cls):
3.58 + # Ignore excluded nodes.
3.59 +
3.60 + if isinstance(node, excluded_term_cls):
3.61 + pass
3.62 +
3.63 + # Test permitted nodes.
3.64 +
3.65 + elif cls is None or isinstance(node, cls):
3.66 if op is None:
3.67 results.append((node, None, filename))
3.68 else:
3.69 for child in node.getChildren():
3.70 - if isinstance(child, (str, unicode, int, float, long, bool)) and op(unicode(child)):
3.71 - results.append((node, child, filename))
3.72 - break
3.73 +
3.74 + # Test literals.
3.75 +
3.76 + if isinstance(child, (str, int, float, long, bool)):
3.77 + if op(str(child)):
3.78 + results.append((node, child, filename))
3.79 +
3.80 + # Only check a single string child value since subsequent
3.81 + # values are typically docstrings.
3.82 +
3.83 + if isinstance(child, str):
3.84 + break
3.85
3.86 # Search within nodes, even if matches have already been found.
3.87
3.88 @@ -82,29 +116,51 @@
3.89
3.90 """
3.91 Expand the given 'results', making a list containing tuples of the form
3.92 - (filename, line number, line, value).
3.93 + (node, filename, line number, line, value).
3.94 """
3.95
3.96 expanded = []
3.97
3.98 for node, value, filename in results:
3.99 - if filename is not None:
3.100 - line = linecache.getline(filename, node.lineno).rstrip()
3.101 + lineno = node.lineno
3.102 +
3.103 + if filename is not None and lineno is not None:
3.104 + line = linecache.getline(filename, lineno).rstrip()
3.105 else:
3.106 line = None
3.107
3.108 - expanded.append((filename, node.lineno, line, value))
3.109 + expanded.append((node, filename, lineno, line, value))
3.110
3.111 return expanded
3.112
3.113 +def get_term_types():
3.114 +
3.115 + "Return the term types supported by the module."
3.116 +
3.117 + term_types = []
3.118 +
3.119 + for name in dir(compiler.ast):
3.120 + if name in excluded_term_types:
3.121 + continue
3.122 +
3.123 + obj = getattr(compiler.ast, name)
3.124 +
3.125 + if isinstance(obj, types.ClassType) and \
3.126 + issubclass(obj, compiler.ast.Node) and \
3.127 + name[0].isupper():
3.128 +
3.129 + term_types.append(name)
3.130 +
3.131 + return term_types
3.132 +
3.133 # Command syntax.
3.134
3.135 syntax_description = """
3.136 [ -n | --line-number ]
3.137 [ -p | --print-token ]
3.138 - ( ( -t TERM_TYPE ) | ( --type=TERM_TYPE ) )
3.139 + [ ( -t TERM_TYPE ) | ( --type=TERM_TYPE ) ]
3.140 [ ( -e PATTERN ) | ( --regexp=PATTERN ) ]
3.141 - ( ( ( -r | -R | --recursive ) DIRECTORY ) | FILENAME )
3.142 + [ -r | -R | --recursive ] ( FILENAME ... )
3.143 """
3.144
3.145 # Main program.
3.146 @@ -116,45 +172,61 @@
3.147 import sys
3.148 import cmdsyntax
3.149 import re
3.150 + import textwrap
3.151
3.152 # Match command arguments.
3.153
3.154 syntax = cmdsyntax.Syntax(syntax_description)
3.155 syntax_matches = syntax.get_args(sys.argv[1:])
3.156 + show_syntax = 0
3.157
3.158 try:
3.159 args = syntax_matches[0]
3.160 except IndexError:
3.161 + show_syntax = 1
3.162 +
3.163 + if show_syntax:
3.164 print "Syntax:"
3.165 print syntax_description
3.166 + print "Term types:"
3.167 + print "\n".join(textwrap.wrap(", ".join(get_term_types())))
3.168 sys.exit(1)
3.169
3.170 # Get the search details.
3.171
3.172 - term_type = args["TERM_TYPE"]
3.173 + term_type = args.get("TERM_TYPE", "*")
3.174 term = args.get("PATTERN")
3.175 + recursive = args.has_key("r") or args.has_key("R") or args.has_key("recursive")
3.176
3.177 if term is None:
3.178 op = None
3.179 else:
3.180 op = re.compile(term).search
3.181
3.182 - # Perform the search either in a single file or in a directory hierarchy.
3.183 + # Perform the search in files and directory hierarchies.
3.184 +
3.185 + results = []
3.186
3.187 - if args.has_key("FILENAME"):
3.188 - results = search_file(args["FILENAME"], term_type, term, op)
3.189 - else:
3.190 - results = search_recursive(args["DIRECTORY"], term_type, term, op)
3.191 + for filename in args["FILENAME"]:
3.192 + if os.path.isfile(filename):
3.193 + results += search_file(filename, term_type, term, op)
3.194 + elif recursive and os.path.isdir(filename):
3.195 + results += search_recursive(filename, term_type, term, op)
3.196
3.197 # Present the results.
3.198
3.199 - for filename, lineno, line, value in expand_results(results):
3.200 + for node, filename, lineno, line, value in expand_results(results):
3.201 format = "%s:"
3.202 output = [filename]
3.203
3.204 + # Handle line numbers and missing details.
3.205 +
3.206 if args.has_key("n") or args.has_key("line-number"):
3.207 - format += "%d:"
3.208 - output.append(lineno)
3.209 + if lineno is not None:
3.210 + format += "%d:"
3.211 + output.append(lineno)
3.212 +
3.213 + # Show matching tokens, if requested.
3.214
3.215 if args.has_key("p"):
3.216 if value is not None:
3.217 @@ -162,10 +234,13 @@
3.218 output.append(value)
3.219 else:
3.220 format += "%s:"
3.221 - output.append("<%s>" % term_type)
3.222 + output.append("<%s>" % (term_type or "*"))
3.223 +
3.224 + # Show lines, if defined.
3.225
3.226 - format += " %s"
3.227 - output.append(line)
3.228 + if line is not None:
3.229 + format += " %s"
3.230 + output.append(line)
3.231
3.232 print format % tuple(output)
3.233
4.1 --- a/packages/ubuntu-gutsy/python-astgrep/debian/astgrep.1 Mon Oct 27 22:49:46 2008 +0100
4.2 +++ b/packages/ubuntu-gutsy/python-astgrep/debian/astgrep.1 Wed Oct 29 02:17:19 2008 +0100
4.3 @@ -11,7 +11,7 @@
4.4 astgrep \- grep/search through Python abstract syntax trees
4.5 .SH SYNOPSIS
4.6 .B astgrep
4.7 -[options] \-t TERM_TYPE [ \-e PATTERN ] ( \-r DIRECTORY | FILE )
4.8 +[options] [ \-t TERM_TYPE ] [ \-e PATTERN ] [ \-r ] FILE...
4.9 .SH DESCRIPTION
4.10 \fBastgrep\fR is a program which searches through Python source files for
4.11 textual information of a specific type. Instead of matching a search term or
4.12 @@ -19,9 +19,10 @@
4.13 \fBastgrep\fR matches only tokens in the program having a particular type,
4.14 specified using \fITERM_TYPE\fR, such as names or constants.
4.15
4.16 -Like \fBgrep\fR, a single \fIFILE\fR or a number of files within a directory
4.17 -hierarchy, \fIDIRECTORY\fR, can be searched, with the occurrences listed from
4.18 -each file.
4.19 +Like \fBgrep\fR, a collection of \fIFILE\fRs can be searched, and if the
4.20 +\fB\-r\fR option is specified, directory hierarchies can also be searched
4.21 +recursively, with the occurrences listed from each file successfully found
4.22 +and parsed.
4.23 .SH COMMAND LINE OPTIONS
4.24 .TP
4.25 .BR \-n , " \-\-line-number"
4.26 @@ -31,13 +32,14 @@
4.27 Show the matching token for each match.
4.28 .TP
4.29 \fB\-t\fR, \fB\-\-type\fR=\fITERM_TYPE\fR
4.30 -Indicate the type of token to be matched.
4.31 +Indicate the type of token to be matched. If \fB*\fR is given, all term types
4.32 +are tested.
4.33 .TP
4.34 \fB\-e\fR, \fB\-\-regexp\fR=\fIPATTERN\fR
4.35 Use \fIPATTERN\fR as the term to search for.
4.36 .TP
4.37 -\fB\-r\fR, \fB\-R\fR, \fB\-\-recursive\fR \fIDIRECTORY\fR
4.38 -Search Python files within \fIDIRECTORY\fR, recursively.
4.39 +\fB\-r\fR, \fB\-R\fR, \fB\-\-recursive\fR
4.40 +Search Python files found within directories.
4.41 .SH TERM TYPES
4.42 Details of term types can be found in the "AST Nodes" section of the Python
4.43 Library Reference or by using \fBpydoc\fR to inspect the node classes in the
4.44 @@ -76,8 +78,18 @@
4.45 .B astgrep -n -t Getattr -e '^_node$' -r libxml2dom
4.46 .RE
4.47 .PD
4.48 +.SH LIMITATIONS
4.49 +\fBastgrep\fR does not attempt to search docstrings (since line number
4.50 +information is inaccurate for docstrings in abstract syntax trees) or comments
4.51 +(since the \fBcompiler\fR package only considers significant syntax when parsing
4.52 +programs).
4.53 +.PP
4.54 +\fBastgrep\fR cannot search syntactically incorrect programs (since the
4.55 +\fBcompiler\fR package will only return an abstract syntax tree for valid
4.56 +programs).
4.57 .SH SEE ALSO
4.58 .BR python (1),
4.59 +.BR pydoc (1),
4.60 .BR grep (1)
4.61 .PP
4.62 astgrep