1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/compiler/pyassem.py Tue May 01 22:04:53 2012 +0200
1.3 @@ -0,0 +1,818 @@
1.4 +"""A flow graph representation for Python bytecode"""
1.5 +
1.6 +import dis
1.7 +import new
1.8 +import sys
1.9 +
1.10 +from compiler import misc
1.11 +from compiler.consts \
1.12 + import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
1.13 +
1.14 +class FlowGraph:
1.15 + def __init__(self):
1.16 + self.current = self.entry = Block()
1.17 + self.exit = Block("exit")
1.18 + self.blocks = misc.Set()
1.19 + self.blocks.add(self.entry)
1.20 + self.blocks.add(self.exit)
1.21 +
1.22 + def startBlock(self, block):
1.23 + if self._debug:
1.24 + if self.current:
1.25 + print "end", repr(self.current)
1.26 + print " next", self.current.next
1.27 + print " ", self.current.get_children()
1.28 + print repr(block)
1.29 + self.current = block
1.30 +
1.31 + def nextBlock(self, block=None):
1.32 + # XXX think we need to specify when there is implicit transfer
1.33 + # from one block to the next. might be better to represent this
1.34 + # with explicit JUMP_ABSOLUTE instructions that are optimized
1.35 + # out when they are unnecessary.
1.36 + #
1.37 + # I think this strategy works: each block has a child
1.38 + # designated as "next" which is returned as the last of the
1.39 + # children. because the nodes in a graph are emitted in
1.40 + # reverse post order, the "next" block will always be emitted
1.41 + # immediately after its parent.
1.42 + # Worry: maintaining this invariant could be tricky
1.43 + if block is None:
1.44 + block = self.newBlock()
1.45 +
1.46 + # Note: If the current block ends with an unconditional
1.47 + # control transfer, then it is incorrect to add an implicit
1.48 + # transfer to the block graph. The current code requires
1.49 + # these edges to get the blocks emitted in the right order,
1.50 + # however. :-( If a client needs to remove these edges, call
1.51 + # pruneEdges().
1.52 +
1.53 + self.current.addNext(block)
1.54 + self.startBlock(block)
1.55 +
1.56 + def newBlock(self):
1.57 + b = Block()
1.58 + self.blocks.add(b)
1.59 + return b
1.60 +
1.61 + def startExitBlock(self):
1.62 + self.startBlock(self.exit)
1.63 +
1.64 + _debug = 0
1.65 +
1.66 + def _enable_debug(self):
1.67 + self._debug = 1
1.68 +
1.69 + def _disable_debug(self):
1.70 + self._debug = 0
1.71 +
1.72 + def emit(self, *inst):
1.73 + if self._debug:
1.74 + print "\t", inst
1.75 + if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']:
1.76 + self.current.addOutEdge(self.exit)
1.77 + if len(inst) == 2 and isinstance(inst[1], Block):
1.78 + self.current.addOutEdge(inst[1])
1.79 + self.current.emit(inst)
1.80 +
1.81 + def getBlocksInOrder(self):
1.82 + """Return the blocks in reverse postorder
1.83 +
1.84 + i.e. each node appears before all of its successors
1.85 + """
1.86 + # XXX make sure every node that doesn't have an explicit next
1.87 + # is set so that next points to exit
1.88 + for b in self.blocks.elements():
1.89 + if b is self.exit:
1.90 + continue
1.91 + if not b.next:
1.92 + b.addNext(self.exit)
1.93 + order = dfs_postorder(self.entry, {})
1.94 + order.reverse()
1.95 + self.fixupOrder(order, self.exit)
1.96 + # hack alert
1.97 + if not self.exit in order:
1.98 + order.append(self.exit)
1.99 +
1.100 + return order
1.101 +
1.102 + def fixupOrder(self, blocks, default_next):
1.103 + """Fixup bad order introduced by DFS."""
1.104 +
1.105 + # XXX This is a total mess. There must be a better way to get
1.106 + # the code blocks in the right order.
1.107 +
1.108 + self.fixupOrderHonorNext(blocks, default_next)
1.109 + self.fixupOrderForward(blocks, default_next)
1.110 +
1.111 + def fixupOrderHonorNext(self, blocks, default_next):
1.112 + """Fix one problem with DFS.
1.113 +
1.114 + The DFS uses child block, but doesn't know about the special
1.115 + "next" block. As a result, the DFS can order blocks so that a
1.116 + block isn't next to the right block for implicit control
1.117 + transfers.
1.118 + """
1.119 + index = {}
1.120 + for i in range(len(blocks)):
1.121 + index[blocks[i]] = i
1.122 +
1.123 + for i in range(0, len(blocks) - 1):
1.124 + b = blocks[i]
1.125 + n = blocks[i + 1]
1.126 + if not b.next or b.next[0] == default_next or b.next[0] == n:
1.127 + continue
1.128 + # The blocks are in the wrong order. Find the chain of
1.129 + # blocks to insert where they belong.
1.130 + cur = b
1.131 + chain = []
1.132 + elt = cur
1.133 + while elt.next and elt.next[0] != default_next:
1.134 + chain.append(elt.next[0])
1.135 + elt = elt.next[0]
1.136 + # Now remove the blocks in the chain from the current
1.137 + # block list, so that they can be re-inserted.
1.138 + l = []
1.139 + for b in chain:
1.140 + assert index[b] > i
1.141 + l.append((index[b], b))
1.142 + l.sort()
1.143 + l.reverse()
1.144 + for j, b in l:
1.145 + del blocks[index[b]]
1.146 + # Insert the chain in the proper location
1.147 + blocks[i:i + 1] = [cur] + chain
1.148 + # Finally, re-compute the block indexes
1.149 + for i in range(len(blocks)):
1.150 + index[blocks[i]] = i
1.151 +
1.152 + def fixupOrderForward(self, blocks, default_next):
1.153 + """Make sure all JUMP_FORWARDs jump forward"""
1.154 + index = {}
1.155 + chains = []
1.156 + cur = []
1.157 + for b in blocks:
1.158 + index[b] = len(chains)
1.159 + cur.append(b)
1.160 + if b.next and b.next[0] == default_next:
1.161 + chains.append(cur)
1.162 + cur = []
1.163 + chains.append(cur)
1.164 +
1.165 + while 1:
1.166 + constraints = []
1.167 +
1.168 + for i in range(len(chains)):
1.169 + l = chains[i]
1.170 + for b in l:
1.171 + for c in b.get_children():
1.172 + if index[c] < i:
1.173 + forward_p = 0
1.174 + for inst in b.insts:
1.175 + if inst[0] == 'JUMP_FORWARD':
1.176 + if inst[1] == c:
1.177 + forward_p = 1
1.178 + if not forward_p:
1.179 + continue
1.180 + constraints.append((index[c], i))
1.181 +
1.182 + if not constraints:
1.183 + break
1.184 +
1.185 + # XXX just do one for now
1.186 + # do swaps to get things in the right order
1.187 + goes_before, a_chain = constraints[0]
1.188 + assert a_chain > goes_before
1.189 + c = chains[a_chain]
1.190 + chains.remove(c)
1.191 + chains.insert(goes_before, c)
1.192 +
1.193 + del blocks[:]
1.194 + for c in chains:
1.195 + for b in c:
1.196 + blocks.append(b)
1.197 +
1.198 + def getBlocks(self):
1.199 + return self.blocks.elements()
1.200 +
1.201 + def getRoot(self):
1.202 + """Return nodes appropriate for use with dominator"""
1.203 + return self.entry
1.204 +
1.205 + def getContainedGraphs(self):
1.206 + l = []
1.207 + for b in self.getBlocks():
1.208 + l.extend(b.getContainedGraphs())
1.209 + return l
1.210 +
1.211 +def dfs_postorder(b, seen):
1.212 + """Depth-first search of tree rooted at b, return in postorder"""
1.213 + order = []
1.214 + seen[b] = b
1.215 + for c in b.get_children():
1.216 + if seen.has_key(c):
1.217 + continue
1.218 + order = order + dfs_postorder(c, seen)
1.219 + order.append(b)
1.220 + return order
1.221 +
1.222 +class Block:
1.223 + _count = 0
1.224 +
1.225 + def __init__(self, label=''):
1.226 + self.insts = []
1.227 + self.inEdges = misc.Set()
1.228 + self.outEdges = misc.Set()
1.229 + self.label = label
1.230 + self.bid = Block._count
1.231 + self.next = []
1.232 + Block._count = Block._count + 1
1.233 +
1.234 + def __repr__(self):
1.235 + if self.label:
1.236 + return "<block %s id=%d>" % (self.label, self.bid)
1.237 + else:
1.238 + return "<block id=%d>" % (self.bid)
1.239 +
1.240 + def __str__(self):
1.241 + insts = map(str, self.insts)
1.242 + return "<block %s %d:\n%s>" % (self.label, self.bid,
1.243 + '\n'.join(insts))
1.244 +
1.245 + def emit(self, inst):
1.246 + op = inst[0]
1.247 + if op[:4] == 'JUMP':
1.248 + self.outEdges.add(inst[1])
1.249 + self.insts.append(inst)
1.250 +
1.251 + def getInstructions(self):
1.252 + return self.insts
1.253 +
1.254 + def addInEdge(self, block):
1.255 + self.inEdges.add(block)
1.256 +
1.257 + def addOutEdge(self, block):
1.258 + self.outEdges.add(block)
1.259 +
1.260 + def addNext(self, block):
1.261 + self.next.append(block)
1.262 + assert len(self.next) == 1, map(str, self.next)
1.263 +
1.264 + _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE',
1.265 + 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
1.266 +
1.267 + def pruneNext(self):
1.268 + """Remove bogus edge for unconditional transfers
1.269 +
1.270 + Each block has a next edge that accounts for implicit control
1.271 + transfers, e.g. from a JUMP_IF_FALSE to the block that will be
1.272 + executed if the test is true.
1.273 +
1.274 + These edges must remain for the current assembler code to
1.275 + work. If they are removed, the dfs_postorder gets things in
1.276 + weird orders. However, they shouldn't be there for other
1.277 + purposes, e.g. conversion to SSA form. This method will
1.278 + remove the next edge when it follows an unconditional control
1.279 + transfer.
1.280 + """
1.281 + try:
1.282 + op, arg = self.insts[-1]
1.283 + except (IndexError, ValueError):
1.284 + return
1.285 + if op in self._uncond_transfer:
1.286 + self.next = []
1.287 +
1.288 + def get_children(self):
1.289 + if self.next and self.next[0] in self.outEdges:
1.290 + self.outEdges.remove(self.next[0])
1.291 + return self.outEdges.elements() + self.next
1.292 +
1.293 + def getContainedGraphs(self):
1.294 + """Return all graphs contained within this block.
1.295 +
1.296 + For example, a MAKE_FUNCTION block will contain a reference to
1.297 + the graph for the function body.
1.298 + """
1.299 + contained = []
1.300 + for inst in self.insts:
1.301 + if len(inst) == 1:
1.302 + continue
1.303 + op = inst[1]
1.304 + if hasattr(op, 'graph'):
1.305 + contained.append(op.graph)
1.306 + return contained
1.307 +
1.308 +# flags for code objects
1.309 +
1.310 +# the FlowGraph is transformed in place; it exists in one of these states
1.311 +RAW = "RAW"
1.312 +FLAT = "FLAT"
1.313 +CONV = "CONV"
1.314 +DONE = "DONE"
1.315 +
1.316 +class PyFlowGraph(FlowGraph):
1.317 + super_init = FlowGraph.__init__
1.318 +
1.319 + def __init__(self, name, filename, args=(), optimized=0, klass=None):
1.320 + self.super_init()
1.321 + self.name = name
1.322 + self.filename = filename
1.323 + self.docstring = None
1.324 + self.args = args # XXX
1.325 + self.argcount = getArgCount(args)
1.326 + self.klass = klass
1.327 + if optimized:
1.328 + self.flags = CO_OPTIMIZED | CO_NEWLOCALS
1.329 + else:
1.330 + self.flags = 0
1.331 + self.consts = []
1.332 + self.names = []
1.333 + # Free variables found by the symbol table scan, including
1.334 + # variables used only in nested scopes, are included here.
1.335 + self.freevars = []
1.336 + self.cellvars = []
1.337 + # The closure list is used to track the order of cell
1.338 + # variables and free variables in the resulting code object.
1.339 + # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
1.340 + # kinds of variables.
1.341 + self.closure = []
1.342 + self.varnames = list(args) or []
1.343 + for i in range(len(self.varnames)):
1.344 + var = self.varnames[i]
1.345 + if isinstance(var, TupleArg):
1.346 + self.varnames[i] = var.getName()
1.347 + self.stage = RAW
1.348 +
1.349 + def setDocstring(self, doc):
1.350 + self.docstring = doc
1.351 +
1.352 + def setFlag(self, flag):
1.353 + self.flags = self.flags | flag
1.354 + if flag == CO_VARARGS:
1.355 + self.argcount = self.argcount - 1
1.356 +
1.357 + def checkFlag(self, flag):
1.358 + if self.flags & flag:
1.359 + return 1
1.360 +
1.361 + def setFreeVars(self, names):
1.362 + self.freevars = list(names)
1.363 +
1.364 + def setCellVars(self, names):
1.365 + self.cellvars = names
1.366 +
1.367 + def getCode(self):
1.368 + """Get a Python code object"""
1.369 + assert self.stage == RAW
1.370 + self.computeStackDepth()
1.371 + self.flattenGraph()
1.372 + assert self.stage == FLAT
1.373 + self.convertArgs()
1.374 + assert self.stage == CONV
1.375 + self.makeByteCode()
1.376 + assert self.stage == DONE
1.377 + return self.newCodeObject()
1.378 +
1.379 + def dump(self, io=None):
1.380 + if io:
1.381 + save = sys.stdout
1.382 + sys.stdout = io
1.383 + pc = 0
1.384 + for t in self.insts:
1.385 + opname = t[0]
1.386 + if opname == "SET_LINENO":
1.387 + print
1.388 + if len(t) == 1:
1.389 + print "\t", "%3d" % pc, opname
1.390 + pc = pc + 1
1.391 + else:
1.392 + print "\t", "%3d" % pc, opname, t[1]
1.393 + pc = pc + 3
1.394 + if io:
1.395 + sys.stdout = save
1.396 +
1.397 + def computeStackDepth(self):
1.398 + """Compute the max stack depth.
1.399 +
1.400 + Approach is to compute the stack effect of each basic block.
1.401 + Then find the path through the code with the largest total
1.402 + effect.
1.403 + """
1.404 + depth = {}
1.405 + exit = None
1.406 + for b in self.getBlocks():
1.407 + depth[b] = findDepth(b.getInstructions())
1.408 +
1.409 + seen = {}
1.410 +
1.411 + def max_depth(b, d):
1.412 + if seen.has_key(b):
1.413 + return d
1.414 + seen[b] = 1
1.415 + d = d + depth[b]
1.416 + children = b.get_children()
1.417 + if children:
1.418 + return max([max_depth(c, d) for c in children])
1.419 + else:
1.420 + if not b.label == "exit":
1.421 + return max_depth(self.exit, d)
1.422 + else:
1.423 + return d
1.424 +
1.425 + self.stacksize = max_depth(self.entry, 0)
1.426 +
1.427 + def flattenGraph(self):
1.428 + """Arrange the blocks in order and resolve jumps"""
1.429 + assert self.stage == RAW
1.430 + self.insts = insts = []
1.431 + pc = 0
1.432 + begin = {}
1.433 + end = {}
1.434 + for b in self.getBlocksInOrder():
1.435 + begin[b] = pc
1.436 + for inst in b.getInstructions():
1.437 + insts.append(inst)
1.438 + if len(inst) == 1:
1.439 + pc = pc + 1
1.440 + elif inst[0] != "SET_LINENO":
1.441 + # arg takes 2 bytes
1.442 + pc = pc + 3
1.443 + end[b] = pc
1.444 + pc = 0
1.445 + for i in range(len(insts)):
1.446 + inst = insts[i]
1.447 + if len(inst) == 1:
1.448 + pc = pc + 1
1.449 + elif inst[0] != "SET_LINENO":
1.450 + pc = pc + 3
1.451 + opname = inst[0]
1.452 + if self.hasjrel.has_elt(opname):
1.453 + oparg = inst[1]
1.454 + offset = begin[oparg] - pc
1.455 + insts[i] = opname, offset
1.456 + elif self.hasjabs.has_elt(opname):
1.457 + insts[i] = opname, begin[inst[1]]
1.458 + self.stage = FLAT
1.459 +
1.460 + hasjrel = misc.Set()
1.461 + for i in dis.hasjrel:
1.462 + hasjrel.add(dis.opname[i])
1.463 + hasjabs = misc.Set()
1.464 + for i in dis.hasjabs:
1.465 + hasjabs.add(dis.opname[i])
1.466 +
1.467 + def convertArgs(self):
1.468 + """Convert arguments from symbolic to concrete form"""
1.469 + assert self.stage == FLAT
1.470 + self.consts.insert(0, self.docstring)
1.471 + self.sort_cellvars()
1.472 + for i in range(len(self.insts)):
1.473 + t = self.insts[i]
1.474 + if len(t) == 2:
1.475 + opname, oparg = t
1.476 + conv = self._converters.get(opname, None)
1.477 + if conv:
1.478 + self.insts[i] = opname, conv(self, oparg)
1.479 + self.stage = CONV
1.480 +
1.481 + def sort_cellvars(self):
1.482 + """Sort cellvars in the order of varnames and prune from freevars.
1.483 + """
1.484 + cells = {}
1.485 + for name in self.cellvars:
1.486 + cells[name] = 1
1.487 + self.cellvars = [name for name in self.varnames
1.488 + if cells.has_key(name)]
1.489 + for name in self.cellvars:
1.490 + del cells[name]
1.491 + self.cellvars = self.cellvars + cells.keys()
1.492 + self.closure = self.cellvars + self.freevars
1.493 +
1.494 + def _lookupName(self, name, list):
1.495 + """Return index of name in list, appending if necessary
1.496 +
1.497 + This routine uses a list instead of a dictionary, because a
1.498 + dictionary can't store two different keys if the keys have the
1.499 + same value but different types, e.g. 2 and 2L. The compiler
1.500 + must treat these two separately, so it does an explicit type
1.501 + comparison before comparing the values.
1.502 + """
1.503 + t = type(name)
1.504 + for i in range(len(list)):
1.505 + if t == type(list[i]) and list[i] == name:
1.506 + return i
1.507 + end = len(list)
1.508 + list.append(name)
1.509 + return end
1.510 +
1.511 + _converters = {}
1.512 + def _convert_LOAD_CONST(self, arg):
1.513 + if hasattr(arg, 'getCode'):
1.514 + arg = arg.getCode()
1.515 + return self._lookupName(arg, self.consts)
1.516 +
1.517 + def _convert_LOAD_FAST(self, arg):
1.518 + self._lookupName(arg, self.names)
1.519 + return self._lookupName(arg, self.varnames)
1.520 + _convert_STORE_FAST = _convert_LOAD_FAST
1.521 + _convert_DELETE_FAST = _convert_LOAD_FAST
1.522 +
1.523 + def _convert_LOAD_NAME(self, arg):
1.524 + if self.klass is None:
1.525 + self._lookupName(arg, self.varnames)
1.526 + return self._lookupName(arg, self.names)
1.527 +
1.528 + def _convert_NAME(self, arg):
1.529 + if self.klass is None:
1.530 + self._lookupName(arg, self.varnames)
1.531 + return self._lookupName(arg, self.names)
1.532 + _convert_STORE_NAME = _convert_NAME
1.533 + _convert_DELETE_NAME = _convert_NAME
1.534 + _convert_IMPORT_NAME = _convert_NAME
1.535 + _convert_IMPORT_FROM = _convert_NAME
1.536 + _convert_STORE_ATTR = _convert_NAME
1.537 + _convert_LOAD_ATTR = _convert_NAME
1.538 + _convert_DELETE_ATTR = _convert_NAME
1.539 + _convert_LOAD_GLOBAL = _convert_NAME
1.540 + _convert_STORE_GLOBAL = _convert_NAME
1.541 + _convert_DELETE_GLOBAL = _convert_NAME
1.542 +
1.543 + def _convert_DEREF(self, arg):
1.544 + self._lookupName(arg, self.names)
1.545 + self._lookupName(arg, self.varnames)
1.546 + return self._lookupName(arg, self.closure)
1.547 + _convert_LOAD_DEREF = _convert_DEREF
1.548 + _convert_STORE_DEREF = _convert_DEREF
1.549 +
1.550 + def _convert_LOAD_CLOSURE(self, arg):
1.551 + self._lookupName(arg, self.varnames)
1.552 + return self._lookupName(arg, self.closure)
1.553 +
1.554 + _cmp = list(dis.cmp_op)
1.555 + def _convert_COMPARE_OP(self, arg):
1.556 + return self._cmp.index(arg)
1.557 +
1.558 + # similarly for other opcodes...
1.559 +
1.560 + for name, obj in locals().items():
1.561 + if name[:9] == "_convert_":
1.562 + opname = name[9:]
1.563 + _converters[opname] = obj
1.564 + del name, obj, opname
1.565 +
1.566 + def makeByteCode(self):
1.567 + assert self.stage == CONV
1.568 + self.lnotab = lnotab = LineAddrTable()
1.569 + for t in self.insts:
1.570 + opname = t[0]
1.571 + if len(t) == 1:
1.572 + lnotab.addCode(self.opnum[opname])
1.573 + else:
1.574 + oparg = t[1]
1.575 + if opname == "SET_LINENO":
1.576 + lnotab.nextLine(oparg)
1.577 + continue
1.578 + hi, lo = twobyte(oparg)
1.579 + try:
1.580 + lnotab.addCode(self.opnum[opname], lo, hi)
1.581 + except ValueError:
1.582 + print opname, oparg
1.583 + print self.opnum[opname], lo, hi
1.584 + raise
1.585 + self.stage = DONE
1.586 +
1.587 + opnum = {}
1.588 + for num in range(len(dis.opname)):
1.589 + opnum[dis.opname[num]] = num
1.590 + del num
1.591 +
1.592 + def newCodeObject(self):
1.593 + assert self.stage == DONE
1.594 + if (self.flags & CO_NEWLOCALS) == 0:
1.595 + nlocals = 0
1.596 + else:
1.597 + nlocals = len(self.varnames)
1.598 + argcount = self.argcount
1.599 + if self.flags & CO_VARKEYWORDS:
1.600 + argcount = argcount - 1
1.601 + return new.code(argcount, nlocals, self.stacksize, self.flags,
1.602 + self.lnotab.getCode(), self.getConsts(),
1.603 + tuple(self.names), tuple(self.varnames),
1.604 + self.filename, self.name, self.lnotab.firstline,
1.605 + self.lnotab.getTable(), tuple(self.freevars),
1.606 + tuple(self.cellvars))
1.607 +
1.608 + def getConsts(self):
1.609 + """Return a tuple for the const slot of the code object
1.610 +
1.611 + Must convert references to code (MAKE_FUNCTION) to code
1.612 + objects recursively.
1.613 + """
1.614 + l = []
1.615 + for elt in self.consts:
1.616 + if isinstance(elt, PyFlowGraph):
1.617 + elt = elt.getCode()
1.618 + l.append(elt)
1.619 + return tuple(l)
1.620 +
1.621 +def isJump(opname):
1.622 + if opname[:4] == 'JUMP':
1.623 + return 1
1.624 +
1.625 +class TupleArg:
1.626 + """Helper for marking func defs with nested tuples in arglist"""
1.627 + def __init__(self, count, names):
1.628 + self.count = count
1.629 + self.names = names
1.630 + def __repr__(self):
1.631 + return "TupleArg(%s, %s)" % (self.count, self.names)
1.632 + def getName(self):
1.633 + return ".%d" % self.count
1.634 +
1.635 +def getArgCount(args):
1.636 + argcount = len(args)
1.637 + if args:
1.638 + for arg in args:
1.639 + if isinstance(arg, TupleArg):
1.640 + numNames = len(misc.flatten(arg.names))
1.641 + argcount = argcount - numNames
1.642 + return argcount
1.643 +
1.644 +def twobyte(val):
1.645 + """Convert an int argument into high and low bytes"""
1.646 + assert isinstance(val, int)
1.647 + return divmod(val, 256)
1.648 +
1.649 +class LineAddrTable:
1.650 + """lnotab
1.651 +
1.652 + This class builds the lnotab, which is documented in compile.c.
1.653 + Here's a brief recap:
1.654 +
1.655 + For each SET_LINENO instruction after the first one, two bytes are
1.656 + added to lnotab. (In some cases, multiple two-byte entries are
1.657 + added.) The first byte is the distance in bytes between the
1.658 + instruction for the last SET_LINENO and the current SET_LINENO.
1.659 + The second byte is offset in line numbers. If either offset is
1.660 + greater than 255, multiple two-byte entries are added -- see
1.661 + compile.c for the delicate details.
1.662 + """
1.663 +
1.664 + def __init__(self):
1.665 + self.code = []
1.666 + self.codeOffset = 0
1.667 + self.firstline = 0
1.668 + self.lastline = 0
1.669 + self.lastoff = 0
1.670 + self.lnotab = []
1.671 +
1.672 + def addCode(self, *args):
1.673 + for arg in args:
1.674 + self.code.append(chr(arg))
1.675 + self.codeOffset = self.codeOffset + len(args)
1.676 +
1.677 + def nextLine(self, lineno):
1.678 + if self.firstline == 0:
1.679 + self.firstline = lineno
1.680 + self.lastline = lineno
1.681 + else:
1.682 + # compute deltas
1.683 + addr = self.codeOffset - self.lastoff
1.684 + line = lineno - self.lastline
1.685 + # Python assumes that lineno always increases with
1.686 + # increasing bytecode address (lnotab is unsigned char).
1.687 + # Depending on when SET_LINENO instructions are emitted
1.688 + # this is not always true. Consider the code:
1.689 + # a = (1,
1.690 + # b)
1.691 + # In the bytecode stream, the assignment to "a" occurs
1.692 + # after the loading of "b". This works with the C Python
1.693 + # compiler because it only generates a SET_LINENO instruction
1.694 + # for the assignment.
1.695 + if line >= 0:
1.696 + push = self.lnotab.append
1.697 + while addr > 255:
1.698 + push(255); push(0)
1.699 + addr -= 255
1.700 + while line > 255:
1.701 + push(addr); push(255)
1.702 + line -= 255
1.703 + addr = 0
1.704 + if addr > 0 or line > 0:
1.705 + push(addr); push(line)
1.706 + self.lastline = lineno
1.707 + self.lastoff = self.codeOffset
1.708 +
1.709 + def getCode(self):
1.710 + return ''.join(self.code)
1.711 +
1.712 + def getTable(self):
1.713 + return ''.join(map(chr, self.lnotab))
1.714 +
1.715 +class StackDepthTracker:
1.716 + # XXX 1. need to keep track of stack depth on jumps
1.717 + # XXX 2. at least partly as a result, this code is broken
1.718 +
1.719 + def findDepth(self, insts, debug=0):
1.720 + depth = 0
1.721 + maxDepth = 0
1.722 + for i in insts:
1.723 + opname = i[0]
1.724 + if debug:
1.725 + print i,
1.726 + delta = self.effect.get(opname, None)
1.727 + if delta is not None:
1.728 + depth = depth + delta
1.729 + else:
1.730 + # now check patterns
1.731 + for pat, pat_delta in self.patterns:
1.732 + if opname[:len(pat)] == pat:
1.733 + delta = pat_delta
1.734 + depth = depth + delta
1.735 + break
1.736 + # if we still haven't found a match
1.737 + if delta is None:
1.738 + meth = getattr(self, opname, None)
1.739 + if meth is not None:
1.740 + depth = depth + meth(i[1])
1.741 + if depth > maxDepth:
1.742 + maxDepth = depth
1.743 + if debug:
1.744 + print depth, maxDepth
1.745 + return maxDepth
1.746 +
1.747 + effect = {
1.748 + 'POP_TOP': -1,
1.749 + 'DUP_TOP': 1,
1.750 + 'LIST_APPEND': -2,
1.751 + 'SLICE+1': -1,
1.752 + 'SLICE+2': -1,
1.753 + 'SLICE+3': -2,
1.754 + 'STORE_SLICE+0': -1,
1.755 + 'STORE_SLICE+1': -2,
1.756 + 'STORE_SLICE+2': -2,
1.757 + 'STORE_SLICE+3': -3,
1.758 + 'DELETE_SLICE+0': -1,
1.759 + 'DELETE_SLICE+1': -2,
1.760 + 'DELETE_SLICE+2': -2,
1.761 + 'DELETE_SLICE+3': -3,
1.762 + 'STORE_SUBSCR': -3,
1.763 + 'DELETE_SUBSCR': -2,
1.764 + # PRINT_EXPR?
1.765 + 'PRINT_ITEM': -1,
1.766 + 'RETURN_VALUE': -1,
1.767 + 'YIELD_VALUE': -1,
1.768 + 'EXEC_STMT': -3,
1.769 + 'BUILD_CLASS': -2,
1.770 + 'STORE_NAME': -1,
1.771 + 'STORE_ATTR': -2,
1.772 + 'DELETE_ATTR': -1,
1.773 + 'STORE_GLOBAL': -1,
1.774 + 'BUILD_MAP': 1,
1.775 + 'COMPARE_OP': -1,
1.776 + 'STORE_FAST': -1,
1.777 + 'IMPORT_STAR': -1,
1.778 + 'IMPORT_NAME': -1,
1.779 + 'IMPORT_FROM': 1,
1.780 + 'LOAD_ATTR': 0, # unlike other loads
1.781 + # close enough...
1.782 + 'SETUP_EXCEPT': 3,
1.783 + 'SETUP_FINALLY': 3,
1.784 + 'FOR_ITER': 1,
1.785 + 'WITH_CLEANUP': -1,
1.786 + }
1.787 + # use pattern match
1.788 + patterns = [
1.789 + ('BINARY_', -1),
1.790 + ('LOAD_', 1),
1.791 + ]
1.792 +
1.793 + def UNPACK_SEQUENCE(self, count):
1.794 + return count-1
1.795 + def BUILD_TUPLE(self, count):
1.796 + return -count+1
1.797 + def BUILD_LIST(self, count):
1.798 + return -count+1
1.799 + def CALL_FUNCTION(self, argc):
1.800 + hi, lo = divmod(argc, 256)
1.801 + return -(lo + hi * 2)
1.802 + def CALL_FUNCTION_VAR(self, argc):
1.803 + return self.CALL_FUNCTION(argc)-1
1.804 + def CALL_FUNCTION_KW(self, argc):
1.805 + return self.CALL_FUNCTION(argc)-1
1.806 + def CALL_FUNCTION_VAR_KW(self, argc):
1.807 + return self.CALL_FUNCTION(argc)-2
1.808 + def MAKE_FUNCTION(self, argc):
1.809 + return -argc
1.810 + def MAKE_CLOSURE(self, argc):
1.811 + # XXX need to account for free variables too!
1.812 + return -argc
1.813 + def BUILD_SLICE(self, argc):
1.814 + if argc == 2:
1.815 + return -1
1.816 + elif argc == 3:
1.817 + return -2
1.818 + def DUP_TOPX(self, argc):
1.819 + return argc
1.820 +
1.821 +findDepth = StackDepthTracker().findDepth