1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/compiler/symbols.py Fri May 18 23:26:30 2012 +0200
1.3 @@ -0,0 +1,462 @@
1.4 +"""Module symbol-table generator"""
1.5 +
1.6 +from compiler import ast
1.7 +from compiler.consts import SC_LOCAL, SC_GLOBAL_IMPLICIT, SC_GLOBAL_EXPLICT, \
1.8 + SC_FREE, SC_CELL, SC_UNKNOWN
1.9 +from compiler.misc import mangle
1.10 +import types
1.11 +
1.12 +
1.13 +import sys
1.14 +
1.15 +MANGLE_LEN = 256
1.16 +
1.17 +class Scope:
1.18 + # XXX how much information do I need about each name?
1.19 + def __init__(self, name, module, klass=None):
1.20 + self.name = name
1.21 + self.module = module
1.22 + self.defs = {}
1.23 + self.uses = {}
1.24 + self.globals = {}
1.25 + self.params = {}
1.26 + self.frees = {}
1.27 + self.cells = {}
1.28 + self.children = []
1.29 + # nested is true if the class could contain free variables,
1.30 + # i.e. if it is nested within another function.
1.31 + self.nested = None
1.32 + self.generator = None
1.33 + self.klass = None
1.34 + if klass is not None:
1.35 + for i in range(len(klass)):
1.36 + if klass[i] != '_':
1.37 + self.klass = klass[i:]
1.38 + break
1.39 +
1.40 + def __repr__(self):
1.41 + return "<%s: %s>" % (self.__class__.__name__, self.name)
1.42 +
1.43 + def mangle(self, name):
1.44 + if self.klass is None:
1.45 + return name
1.46 + return mangle(name, self.klass)
1.47 +
1.48 + def add_def(self, name):
1.49 + self.defs[self.mangle(name)] = 1
1.50 +
1.51 + def add_use(self, name):
1.52 + self.uses[self.mangle(name)] = 1
1.53 +
1.54 + def add_global(self, name):
1.55 + name = self.mangle(name)
1.56 + if name in self.uses or name in self.defs:
1.57 + pass # XXX warn about global following def/use
1.58 + if name in self.params:
1.59 + raise SyntaxError, "%s in %s is global and parameter" % \
1.60 + (name, self.name)
1.61 + self.globals[name] = 1
1.62 + self.module.add_def(name)
1.63 +
1.64 + def add_param(self, name):
1.65 + name = self.mangle(name)
1.66 + self.defs[name] = 1
1.67 + self.params[name] = 1
1.68 +
1.69 + def get_names(self):
1.70 + d = {}
1.71 + d.update(self.defs)
1.72 + d.update(self.uses)
1.73 + d.update(self.globals)
1.74 + return d.keys()
1.75 +
1.76 + def add_child(self, child):
1.77 + self.children.append(child)
1.78 +
1.79 + def get_children(self):
1.80 + return self.children
1.81 +
1.82 + def DEBUG(self):
1.83 + print >> sys.stderr, self.name, self.nested and "nested" or ""
1.84 + print >> sys.stderr, "\tglobals: ", self.globals
1.85 + print >> sys.stderr, "\tcells: ", self.cells
1.86 + print >> sys.stderr, "\tdefs: ", self.defs
1.87 + print >> sys.stderr, "\tuses: ", self.uses
1.88 + print >> sys.stderr, "\tfrees:", self.frees
1.89 +
1.90 + def check_name(self, name):
1.91 + """Return scope of name.
1.92 +
1.93 + The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
1.94 + """
1.95 + if name in self.globals:
1.96 + return SC_GLOBAL_EXPLICT
1.97 + if name in self.cells:
1.98 + return SC_CELL
1.99 + if name in self.defs:
1.100 + return SC_LOCAL
1.101 + if self.nested and (name in self.frees or name in self.uses):
1.102 + return SC_FREE
1.103 + if self.nested:
1.104 + return SC_UNKNOWN
1.105 + else:
1.106 + return SC_GLOBAL_IMPLICIT
1.107 +
1.108 + def get_free_vars(self):
1.109 + if not self.nested:
1.110 + return ()
1.111 + free = {}
1.112 + free.update(self.frees)
1.113 + for name in self.uses.keys():
1.114 + if name not in self.defs and name not in self.globals:
1.115 + free[name] = 1
1.116 + return free.keys()
1.117 +
1.118 + def handle_children(self):
1.119 + for child in self.children:
1.120 + frees = child.get_free_vars()
1.121 + globals = self.add_frees(frees)
1.122 + for name in globals:
1.123 + child.force_global(name)
1.124 +
1.125 + def force_global(self, name):
1.126 + """Force name to be global in scope.
1.127 +
1.128 + Some child of the current node had a free reference to name.
1.129 + When the child was processed, it was labelled a free
1.130 + variable. Now that all its enclosing scope have been
1.131 + processed, the name is known to be a global or builtin. So
1.132 + walk back down the child chain and set the name to be global
1.133 + rather than free.
1.134 +
1.135 + Be careful to stop if a child does not think the name is
1.136 + free.
1.137 + """
1.138 + self.globals[name] = 1
1.139 + if name in self.frees:
1.140 + del self.frees[name]
1.141 + for child in self.children:
1.142 + if child.check_name(name) == SC_FREE:
1.143 + child.force_global(name)
1.144 +
1.145 + def add_frees(self, names):
1.146 + """Process list of free vars from nested scope.
1.147 +
1.148 + Returns a list of names that are either 1) declared global in the
1.149 + parent or 2) undefined in a top-level parent. In either case,
1.150 + the nested scope should treat them as globals.
1.151 + """
1.152 + child_globals = []
1.153 + for name in names:
1.154 + sc = self.check_name(name)
1.155 + if self.nested:
1.156 + if sc == SC_UNKNOWN or sc == SC_FREE \
1.157 + or isinstance(self, ClassScope):
1.158 + self.frees[name] = 1
1.159 + elif sc == SC_GLOBAL_IMPLICIT:
1.160 + child_globals.append(name)
1.161 + elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
1.162 + self.cells[name] = 1
1.163 + elif sc != SC_CELL:
1.164 + child_globals.append(name)
1.165 + else:
1.166 + if sc == SC_LOCAL:
1.167 + self.cells[name] = 1
1.168 + elif sc != SC_CELL:
1.169 + child_globals.append(name)
1.170 + return child_globals
1.171 +
1.172 + def get_cell_vars(self):
1.173 + return self.cells.keys()
1.174 +
1.175 +class ModuleScope(Scope):
1.176 + __super_init = Scope.__init__
1.177 +
1.178 + def __init__(self):
1.179 + self.__super_init("global", self)
1.180 +
1.181 +class FunctionScope(Scope):
1.182 + pass
1.183 +
1.184 +class GenExprScope(Scope):
1.185 + __super_init = Scope.__init__
1.186 +
1.187 + __counter = 1
1.188 +
1.189 + def __init__(self, module, klass=None):
1.190 + i = self.__counter
1.191 + self.__counter += 1
1.192 + self.__super_init("generator expression<%d>"%i, module, klass)
1.193 + self.add_param('.0')
1.194 +
1.195 + def get_names(self):
1.196 + keys = Scope.get_names(self)
1.197 + return keys
1.198 +
1.199 +class LambdaScope(FunctionScope):
1.200 + __super_init = Scope.__init__
1.201 +
1.202 + __counter = 1
1.203 +
1.204 + def __init__(self, module, klass=None):
1.205 + i = self.__counter
1.206 + self.__counter += 1
1.207 + self.__super_init("lambda.%d" % i, module, klass)
1.208 +
1.209 +class ClassScope(Scope):
1.210 + __super_init = Scope.__init__
1.211 +
1.212 + def __init__(self, name, module):
1.213 + self.__super_init(name, module, name)
1.214 +
1.215 +class SymbolVisitor:
1.216 + def __init__(self):
1.217 + self.scopes = {}
1.218 + self.klass = None
1.219 +
1.220 + # node that define new scopes
1.221 +
1.222 + def visitModule(self, node):
1.223 + scope = self.module = self.scopes[node] = ModuleScope()
1.224 + self.visit(node.node, scope)
1.225 +
1.226 + visitExpression = visitModule
1.227 +
1.228 + def visitFunction(self, node, parent):
1.229 + if node.decorators:
1.230 + self.visit(node.decorators, parent)
1.231 + parent.add_def(node.name)
1.232 + for n in node.defaults:
1.233 + self.visit(n, parent)
1.234 + scope = FunctionScope(node.name, self.module, self.klass)
1.235 + if parent.nested or isinstance(parent, FunctionScope):
1.236 + scope.nested = 1
1.237 + self.scopes[node] = scope
1.238 + self._do_args(scope, node.argnames)
1.239 + self.visit(node.code, scope)
1.240 + self.handle_free_vars(scope, parent)
1.241 +
1.242 + def visitGenExpr(self, node, parent):
1.243 + scope = GenExprScope(self.module, self.klass);
1.244 + if parent.nested or isinstance(parent, FunctionScope) \
1.245 + or isinstance(parent, GenExprScope):
1.246 + scope.nested = 1
1.247 +
1.248 + self.scopes[node] = scope
1.249 + self.visit(node.code, scope)
1.250 +
1.251 + self.handle_free_vars(scope, parent)
1.252 +
1.253 + def visitGenExprInner(self, node, scope):
1.254 + for genfor in node.quals:
1.255 + self.visit(genfor, scope)
1.256 +
1.257 + self.visit(node.expr, scope)
1.258 +
1.259 + def visitGenExprFor(self, node, scope):
1.260 + self.visit(node.assign, scope, 1)
1.261 + self.visit(node.iter, scope)
1.262 + for if_ in node.ifs:
1.263 + self.visit(if_, scope)
1.264 +
1.265 + def visitGenExprIf(self, node, scope):
1.266 + self.visit(node.test, scope)
1.267 +
1.268 + def visitLambda(self, node, parent, assign=0):
1.269 + # Lambda is an expression, so it could appear in an expression
1.270 + # context where assign is passed. The transformer should catch
1.271 + # any code that has a lambda on the left-hand side.
1.272 + assert not assign
1.273 +
1.274 + for n in node.defaults:
1.275 + self.visit(n, parent)
1.276 + scope = LambdaScope(self.module, self.klass)
1.277 + if parent.nested or isinstance(parent, FunctionScope):
1.278 + scope.nested = 1
1.279 + self.scopes[node] = scope
1.280 + self._do_args(scope, node.argnames)
1.281 + self.visit(node.code, scope)
1.282 + self.handle_free_vars(scope, parent)
1.283 +
1.284 + def _do_args(self, scope, args):
1.285 + for name in args:
1.286 + if type(name) == types.TupleType:
1.287 + self._do_args(scope, name)
1.288 + else:
1.289 + scope.add_param(name)
1.290 +
1.291 + def handle_free_vars(self, scope, parent):
1.292 + parent.add_child(scope)
1.293 + scope.handle_children()
1.294 +
1.295 + def visitClass(self, node, parent):
1.296 + parent.add_def(node.name)
1.297 + for n in node.bases:
1.298 + self.visit(n, parent)
1.299 + scope = ClassScope(node.name, self.module)
1.300 + if parent.nested or isinstance(parent, FunctionScope):
1.301 + scope.nested = 1
1.302 + if node.doc is not None:
1.303 + scope.add_def('__doc__')
1.304 + scope.add_def('__module__')
1.305 + self.scopes[node] = scope
1.306 + prev = self.klass
1.307 + self.klass = node.name
1.308 + self.visit(node.code, scope)
1.309 + self.klass = prev
1.310 + self.handle_free_vars(scope, parent)
1.311 +
1.312 + # name can be a def or a use
1.313 +
1.314 + # XXX a few calls and nodes expect a third "assign" arg that is
1.315 + # true if the name is being used as an assignment. only
1.316 + # expressions contained within statements may have the assign arg.
1.317 +
1.318 + def visitName(self, node, scope, assign=0):
1.319 + if assign:
1.320 + scope.add_def(node.name)
1.321 + else:
1.322 + scope.add_use(node.name)
1.323 +
1.324 + # operations that bind new names
1.325 +
1.326 + def visitFor(self, node, scope):
1.327 + self.visit(node.assign, scope, 1)
1.328 + self.visit(node.list, scope)
1.329 + self.visit(node.body, scope)
1.330 + if node.else_:
1.331 + self.visit(node.else_, scope)
1.332 +
1.333 + def visitFrom(self, node, scope):
1.334 + for name, asname in node.names:
1.335 + if name == "*":
1.336 + continue
1.337 + scope.add_def(asname or name)
1.338 +
1.339 + def visitImport(self, node, scope):
1.340 + for name, asname in node.names:
1.341 + i = name.find(".")
1.342 + if i > -1:
1.343 + name = name[:i]
1.344 + scope.add_def(asname or name)
1.345 +
1.346 + def visitGlobal(self, node, scope):
1.347 + for name in node.names:
1.348 + scope.add_global(name)
1.349 +
1.350 + def visitAssign(self, node, scope):
1.351 + """Propagate assignment flag down to child nodes.
1.352 +
1.353 + The Assign node doesn't itself contains the variables being
1.354 + assigned to. Instead, the children in node.nodes are visited
1.355 + with the assign flag set to true. When the names occur in
1.356 + those nodes, they are marked as defs.
1.357 +
1.358 + Some names that occur in an assignment target are not bound by
1.359 + the assignment, e.g. a name occurring inside a slice. The
1.360 + visitor handles these nodes specially; they do not propagate
1.361 + the assign flag to their children.
1.362 + """
1.363 + for n in node.nodes:
1.364 + self.visit(n, scope, 1)
1.365 + self.visit(node.expr, scope)
1.366 +
1.367 + def visitAssName(self, node, scope, assign=1):
1.368 + scope.add_def(node.name)
1.369 +
1.370 + def visitAssAttr(self, node, scope, assign=0):
1.371 + self.visit(node.expr, scope, 0)
1.372 +
1.373 + def visitSubscript(self, node, scope, assign=0):
1.374 + self.visit(node.expr, scope, 0)
1.375 + for n in node.subs:
1.376 + self.visit(n, scope, 0)
1.377 +
1.378 + def visitSlice(self, node, scope, assign=0):
1.379 + self.visit(node.expr, scope, 0)
1.380 + if node.lower:
1.381 + self.visit(node.lower, scope, 0)
1.382 + if node.upper:
1.383 + self.visit(node.upper, scope, 0)
1.384 +
1.385 + def visitAugAssign(self, node, scope):
1.386 + # If the LHS is a name, then this counts as assignment.
1.387 + # Otherwise, it's just use.
1.388 + self.visit(node.node, scope)
1.389 + if isinstance(node.node, ast.Name):
1.390 + self.visit(node.node, scope, 1) # XXX worry about this
1.391 + self.visit(node.expr, scope)
1.392 +
1.393 + # prune if statements if tests are false
1.394 +
1.395 + _const_types = types.StringType, types.IntType, types.FloatType
1.396 +
1.397 + def visitIf(self, node, scope):
1.398 + for test, body in node.tests:
1.399 + if isinstance(test, ast.Const):
1.400 + if type(test.value) in self._const_types:
1.401 + if not test.value:
1.402 + continue
1.403 + self.visit(test, scope)
1.404 + self.visit(body, scope)
1.405 + if node.else_:
1.406 + self.visit(node.else_, scope)
1.407 +
1.408 + # a yield statement signals a generator
1.409 +
1.410 + def visitYield(self, node, scope):
1.411 + scope.generator = 1
1.412 + self.visit(node.value, scope)
1.413 +
1.414 +def list_eq(l1, l2):
1.415 + return sorted(l1) == sorted(l2)
1.416 +
1.417 +if __name__ == "__main__":
1.418 + import sys
1.419 + from compiler import parseFile, walk
1.420 + import symtable
1.421 +
1.422 + def get_names(syms):
1.423 + return [s for s in [s.get_name() for s in syms.get_symbols()]
1.424 + if not (s.startswith('_[') or s.startswith('.'))]
1.425 +
1.426 + for file in sys.argv[1:]:
1.427 + print file
1.428 + f = open(file)
1.429 + buf = f.read()
1.430 + f.close()
1.431 + syms = symtable.symtable(buf, file, "exec")
1.432 + mod_names = get_names(syms)
1.433 + tree = parseFile(file)
1.434 + s = SymbolVisitor()
1.435 + walk(tree, s)
1.436 +
1.437 + # compare module-level symbols
1.438 + names2 = s.scopes[tree].get_names()
1.439 +
1.440 + if not list_eq(mod_names, names2):
1.441 + print
1.442 + print "oops", file
1.443 + print sorted(mod_names)
1.444 + print sorted(names2)
1.445 + sys.exit(-1)
1.446 +
1.447 + d = {}
1.448 + d.update(s.scopes)
1.449 + del d[tree]
1.450 + scopes = d.values()
1.451 + del d
1.452 +
1.453 + for s in syms.get_symbols():
1.454 + if s.is_namespace():
1.455 + l = [sc for sc in scopes
1.456 + if sc.name == s.get_name()]
1.457 + if len(l) > 1:
1.458 + print "skipping", s.get_name()
1.459 + else:
1.460 + if not list_eq(get_names(s.get_namespace()),
1.461 + l[0].get_names()):
1.462 + print s.get_name()
1.463 + print sorted(get_names(s.get_namespace()))
1.464 + print sorted(l[0].get_names())
1.465 + sys.exit(-1)