python2.5-compiler-package (annotate compiler/pyassem.py in 594cde91c7f1)

python2.5-compiler-package

Annotated compiler/pyassem.py

5:594cde91c7f1

2012-05-21

Paul Boddie

Added copyright and modification details for the visitor changes.

paul@0	1	"""A flow graph representation for Python bytecode"""
paul@0	2
paul@0	3	import dis
paul@0	4	import new
paul@0	5	import sys
paul@0	6
paul@0	7	from compiler import misc
paul@0	8	from compiler.consts \
paul@0	9	import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
paul@0	10
paul@0	11	class FlowGraph:
paul@0	12	def __init__(self):
paul@0	13	self.current = self.entry = Block()
paul@0	14	self.exit = Block("exit")
paul@0	15	self.blocks = misc.Set()
paul@0	16	self.blocks.add(self.entry)
paul@0	17	self.blocks.add(self.exit)
paul@0	18
paul@0	19	def startBlock(self, block):
paul@0	20	if self._debug:
paul@0	21	if self.current:
paul@0	22	print "end", repr(self.current)
paul@0	23	print " next", self.current.next
paul@0	24	print " ", self.current.get_children()
paul@0	25	print repr(block)
paul@0	26	self.current = block
paul@0	27
paul@0	28	def nextBlock(self, block=None):
paul@0	29	# XXX think we need to specify when there is implicit transfer
paul@0	30	# from one block to the next. might be better to represent this
paul@0	31	# with explicit JUMP_ABSOLUTE instructions that are optimized
paul@0	32	# out when they are unnecessary.
paul@0	33	#
paul@0	34	# I think this strategy works: each block has a child
paul@0	35	# designated as "next" which is returned as the last of the
paul@0	36	# children. because the nodes in a graph are emitted in
paul@0	37	# reverse post order, the "next" block will always be emitted
paul@0	38	# immediately after its parent.
paul@0	39	# Worry: maintaining this invariant could be tricky
paul@0	40	if block is None:
paul@0	41	block = self.newBlock()
paul@0	42
paul@0	43	# Note: If the current block ends with an unconditional
paul@0	44	# control transfer, then it is incorrect to add an implicit
paul@0	45	# transfer to the block graph. The current code requires
paul@0	46	# these edges to get the blocks emitted in the right order,
paul@0	47	# however. :-( If a client needs to remove these edges, call
paul@0	48	# pruneEdges().
paul@0	49
paul@0	50	self.current.addNext(block)
paul@0	51	self.startBlock(block)
paul@0	52
paul@0	53	def newBlock(self):
paul@0	54	b = Block()
paul@0	55	self.blocks.add(b)
paul@0	56	return b
paul@0	57
paul@0	58	def startExitBlock(self):
paul@0	59	self.startBlock(self.exit)
paul@0	60
paul@0	61	_debug = 0
paul@0	62
paul@0	63	def _enable_debug(self):
paul@0	64	self._debug = 1
paul@0	65
paul@0	66	def _disable_debug(self):
paul@0	67	self._debug = 0
paul@0	68
paul@0	69	def emit(self, *inst):
paul@0	70	if self._debug:
paul@0	71	print "\t", inst
paul@0	72	if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']:
paul@0	73	self.current.addOutEdge(self.exit)
paul@0	74	if len(inst) == 2 and isinstance(inst[1], Block):
paul@0	75	self.current.addOutEdge(inst[1])
paul@0	76	self.current.emit(inst)
paul@0	77
paul@0	78	def getBlocksInOrder(self):
paul@0	79	"""Return the blocks in reverse postorder
paul@0	80
paul@0	81	i.e. each node appears before all of its successors
paul@0	82	"""
paul@0	83	# XXX make sure every node that doesn't have an explicit next
paul@0	84	# is set so that next points to exit
paul@0	85	for b in self.blocks.elements():
paul@0	86	if b is self.exit:
paul@0	87	continue
paul@0	88	if not b.next:
paul@0	89	b.addNext(self.exit)
paul@0	90	order = dfs_postorder(self.entry, {})
paul@0	91	order.reverse()
paul@0	92	self.fixupOrder(order, self.exit)
paul@0	93	# hack alert
paul@0	94	if not self.exit in order:
paul@0	95	order.append(self.exit)
paul@0	96
paul@0	97	return order
paul@0	98
paul@0	99	def fixupOrder(self, blocks, default_next):
paul@0	100	"""Fixup bad order introduced by DFS."""
paul@0	101
paul@0	102	# XXX This is a total mess. There must be a better way to get
paul@0	103	# the code blocks in the right order.
paul@0	104
paul@0	105	self.fixupOrderHonorNext(blocks, default_next)
paul@0	106	self.fixupOrderForward(blocks, default_next)
paul@0	107
paul@0	108	def fixupOrderHonorNext(self, blocks, default_next):
paul@0	109	"""Fix one problem with DFS.
paul@0	110
paul@0	111	The DFS uses child block, but doesn't know about the special
paul@0	112	"next" block. As a result, the DFS can order blocks so that a
paul@0	113	block isn't next to the right block for implicit control
paul@0	114	transfers.
paul@0	115	"""
paul@0	116	index = {}
paul@0	117	for i in range(len(blocks)):
paul@0	118	index[blocks[i]] = i
paul@0	119
paul@0	120	for i in range(0, len(blocks) - 1):
paul@0	121	b = blocks[i]
paul@0	122	n = blocks[i + 1]
paul@0	123	if not b.next or b.next[0] == default_next or b.next[0] == n:
paul@0	124	continue
paul@0	125	# The blocks are in the wrong order. Find the chain of
paul@0	126	# blocks to insert where they belong.
paul@0	127	cur = b
paul@0	128	chain = []
paul@0	129	elt = cur
paul@0	130	while elt.next and elt.next[0] != default_next:
paul@0	131	chain.append(elt.next[0])
paul@0	132	elt = elt.next[0]
paul@0	133	# Now remove the blocks in the chain from the current
paul@0	134	# block list, so that they can be re-inserted.
paul@0	135	l = []
paul@0	136	for b in chain:
paul@0	137	assert index[b] > i
paul@0	138	l.append((index[b], b))
paul@0	139	l.sort()
paul@0	140	l.reverse()
paul@0	141	for j, b in l:
paul@0	142	del blocks[index[b]]
paul@0	143	# Insert the chain in the proper location
paul@0	144	blocks[i:i + 1] = [cur] + chain
paul@0	145	# Finally, re-compute the block indexes
paul@0	146	for i in range(len(blocks)):
paul@0	147	index[blocks[i]] = i
paul@0	148
paul@0	149	def fixupOrderForward(self, blocks, default_next):
paul@0	150	"""Make sure all JUMP_FORWARDs jump forward"""
paul@0	151	index = {}
paul@0	152	chains = []
paul@0	153	cur = []
paul@0	154	for b in blocks:
paul@0	155	index[b] = len(chains)
paul@0	156	cur.append(b)
paul@0	157	if b.next and b.next[0] == default_next:
paul@0	158	chains.append(cur)
paul@0	159	cur = []
paul@0	160	chains.append(cur)
paul@0	161
paul@0	162	while 1:
paul@0	163	constraints = []
paul@0	164
paul@0	165	for i in range(len(chains)):
paul@0	166	l = chains[i]
paul@0	167	for b in l:
paul@0	168	for c in b.get_children():
paul@0	169	if index[c] < i:
paul@0	170	forward_p = 0
paul@0	171	for inst in b.insts:
paul@0	172	if inst[0] == 'JUMP_FORWARD':
paul@0	173	if inst[1] == c:
paul@0	174	forward_p = 1
paul@0	175	if not forward_p:
paul@0	176	continue
paul@0	177	constraints.append((index[c], i))
paul@0	178
paul@0	179	if not constraints:
paul@0	180	break
paul@0	181
paul@0	182	# XXX just do one for now
paul@0	183	# do swaps to get things in the right order
paul@0	184	goes_before, a_chain = constraints[0]
paul@0	185	assert a_chain > goes_before
paul@0	186	c = chains[a_chain]
paul@0	187	chains.remove(c)
paul@0	188	chains.insert(goes_before, c)
paul@0	189
paul@0	190	del blocks[:]
paul@0	191	for c in chains:
paul@0	192	for b in c:
paul@0	193	blocks.append(b)
paul@0	194
paul@0	195	def getBlocks(self):
paul@0	196	return self.blocks.elements()
paul@0	197
paul@0	198	def getRoot(self):
paul@0	199	"""Return nodes appropriate for use with dominator"""
paul@0	200	return self.entry
paul@0	201
paul@0	202	def getContainedGraphs(self):
paul@0	203	l = []
paul@0	204	for b in self.getBlocks():
paul@0	205	l.extend(b.getContainedGraphs())
paul@0	206	return l
paul@0	207
paul@0	208	def dfs_postorder(b, seen):
paul@0	209	"""Depth-first search of tree rooted at b, return in postorder"""
paul@0	210	order = []
paul@0	211	seen[b] = b
paul@0	212	for c in b.get_children():
paul@0	213	if seen.has_key(c):
paul@0	214	continue
paul@0	215	order = order + dfs_postorder(c, seen)
paul@0	216	order.append(b)
paul@0	217	return order
paul@0	218
paul@0	219	class Block:
paul@0	220	_count = 0
paul@0	221
paul@0	222	def __init__(self, label=''):
paul@0	223	self.insts = []
paul@0	224	self.inEdges = misc.Set()
paul@0	225	self.outEdges = misc.Set()
paul@0	226	self.label = label
paul@0	227	self.bid = Block._count
paul@0	228	self.next = []
paul@0	229	Block._count = Block._count + 1
paul@0	230
paul@0	231	def __repr__(self):
paul@0	232	if self.label:
paul@0	233	return "<block %s id=%d>" % (self.label, self.bid)
paul@0	234	else:
paul@0	235	return "<block id=%d>" % (self.bid)
paul@0	236
paul@0	237	def __str__(self):
paul@0	238	insts = map(str, self.insts)
paul@0	239	return "<block %s %d:\n%s>" % (self.label, self.bid,
paul@0	240	'\n'.join(insts))
paul@0	241
paul@0	242	def emit(self, inst):
paul@0	243	op = inst[0]
paul@0	244	if op[:4] == 'JUMP':
paul@0	245	self.outEdges.add(inst[1])
paul@0	246	self.insts.append(inst)
paul@0	247
paul@0	248	def getInstructions(self):
paul@0	249	return self.insts
paul@0	250
paul@0	251	def addInEdge(self, block):
paul@0	252	self.inEdges.add(block)
paul@0	253
paul@0	254	def addOutEdge(self, block):
paul@0	255	self.outEdges.add(block)
paul@0	256
paul@0	257	def addNext(self, block):
paul@0	258	self.next.append(block)
paul@0	259	assert len(self.next) == 1, map(str, self.next)
paul@0	260
paul@0	261	_uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE',
paul@0	262	'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
paul@0	263
paul@0	264	def pruneNext(self):
paul@0	265	"""Remove bogus edge for unconditional transfers
paul@0	266
paul@0	267	Each block has a next edge that accounts for implicit control
paul@0	268	transfers, e.g. from a JUMP_IF_FALSE to the block that will be
paul@0	269	executed if the test is true.
paul@0	270
paul@0	271	These edges must remain for the current assembler code to
paul@0	272	work. If they are removed, the dfs_postorder gets things in
paul@0	273	weird orders. However, they shouldn't be there for other
paul@0	274	purposes, e.g. conversion to SSA form. This method will
paul@0	275	remove the next edge when it follows an unconditional control
paul@0	276	transfer.
paul@0	277	"""
paul@0	278	try:
paul@0	279	op, arg = self.insts[-1]
paul@0	280	except (IndexError, ValueError):
paul@0	281	return
paul@0	282	if op in self._uncond_transfer:
paul@0	283	self.next = []
paul@0	284
paul@0	285	def get_children(self):
paul@0	286	if self.next and self.next[0] in self.outEdges:
paul@0	287	self.outEdges.remove(self.next[0])
paul@0	288	return self.outEdges.elements() + self.next
paul@0	289
paul@0	290	def getContainedGraphs(self):
paul@0	291	"""Return all graphs contained within this block.
paul@0	292
paul@0	293	For example, a MAKE_FUNCTION block will contain a reference to
paul@0	294	the graph for the function body.
paul@0	295	"""
paul@0	296	contained = []
paul@0	297	for inst in self.insts:
paul@0	298	if len(inst) == 1:
paul@0	299	continue
paul@0	300	op = inst[1]
paul@0	301	if hasattr(op, 'graph'):
paul@0	302	contained.append(op.graph)
paul@0	303	return contained
paul@0	304
paul@0	305	# flags for code objects
paul@0	306
paul@0	307	# the FlowGraph is transformed in place; it exists in one of these states
paul@0	308	RAW = "RAW"
paul@0	309	FLAT = "FLAT"
paul@0	310	CONV = "CONV"
paul@0	311	DONE = "DONE"
paul@0	312
paul@0	313	class PyFlowGraph(FlowGraph):
paul@0	314	super_init = FlowGraph.__init__
paul@0	315
paul@0	316	def __init__(self, name, filename, args=(), optimized=0, klass=None):
paul@0	317	self.super_init()
paul@0	318	self.name = name
paul@0	319	self.filename = filename
paul@0	320	self.docstring = None
paul@0	321	self.args = args # XXX
paul@0	322	self.argcount = getArgCount(args)
paul@0	323	self.klass = klass
paul@0	324	if optimized:
paul@0	325	self.flags = CO_OPTIMIZED \| CO_NEWLOCALS
paul@0	326	else:
paul@0	327	self.flags = 0
paul@0	328	self.consts = []
paul@0	329	self.names = []
paul@0	330	# Free variables found by the symbol table scan, including
paul@0	331	# variables used only in nested scopes, are included here.
paul@0	332	self.freevars = []
paul@0	333	self.cellvars = []
paul@0	334	# The closure list is used to track the order of cell
paul@0	335	# variables and free variables in the resulting code object.
paul@0	336	# The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
paul@0	337	# kinds of variables.
paul@0	338	self.closure = []
paul@0	339	self.varnames = list(args) or []
paul@0	340	for i in range(len(self.varnames)):
paul@0	341	var = self.varnames[i]
paul@0	342	if isinstance(var, TupleArg):
paul@0	343	self.varnames[i] = var.getName()
paul@0	344	self.stage = RAW
paul@0	345
paul@0	346	def setDocstring(self, doc):
paul@0	347	self.docstring = doc
paul@0	348
paul@0	349	def setFlag(self, flag):
paul@0	350	self.flags = self.flags \| flag
paul@0	351	if flag == CO_VARARGS:
paul@0	352	self.argcount = self.argcount - 1
paul@0	353
paul@0	354	def checkFlag(self, flag):
paul@0	355	if self.flags & flag:
paul@0	356	return 1
paul@0	357
paul@0	358	def setFreeVars(self, names):
paul@0	359	self.freevars = list(names)
paul@0	360
paul@0	361	def setCellVars(self, names):
paul@0	362	self.cellvars = names
paul@0	363
paul@0	364	def getCode(self):
paul@0	365	"""Get a Python code object"""
paul@0	366	assert self.stage == RAW
paul@0	367	self.computeStackDepth()
paul@0	368	self.flattenGraph()
paul@0	369	assert self.stage == FLAT
paul@0	370	self.convertArgs()
paul@0	371	assert self.stage == CONV
paul@0	372	self.makeByteCode()
paul@0	373	assert self.stage == DONE
paul@0	374	return self.newCodeObject()
paul@0	375
paul@0	376	def dump(self, io=None):
paul@0	377	if io:
paul@0	378	save = sys.stdout
paul@0	379	sys.stdout = io
paul@0	380	pc = 0
paul@0	381	for t in self.insts:
paul@0	382	opname = t[0]
paul@0	383	if opname == "SET_LINENO":
paul@0	384	print
paul@0	385	if len(t) == 1:
paul@0	386	print "\t", "%3d" % pc, opname
paul@0	387	pc = pc + 1
paul@0	388	else:
paul@0	389	print "\t", "%3d" % pc, opname, t[1]
paul@0	390	pc = pc + 3
paul@0	391	if io:
paul@0	392	sys.stdout = save
paul@0	393
paul@0	394	def computeStackDepth(self):
paul@0	395	"""Compute the max stack depth.
paul@0	396
paul@0	397	Approach is to compute the stack effect of each basic block.
paul@0	398	Then find the path through the code with the largest total
paul@0	399	effect.
paul@0	400	"""
paul@0	401	depth = {}
paul@0	402	exit = None
paul@0	403	for b in self.getBlocks():
paul@0	404	depth[b] = findDepth(b.getInstructions())
paul@0	405
paul@0	406	seen = {}
paul@0	407
paul@0	408	def max_depth(b, d):
paul@0	409	if seen.has_key(b):
paul@0	410	return d
paul@0	411	seen[b] = 1
paul@0	412	d = d + depth[b]
paul@0	413	children = b.get_children()
paul@0	414	if children:
paul@0	415	return max([max_depth(c, d) for c in children])
paul@0	416	else:
paul@0	417	if not b.label == "exit":
paul@0	418	return max_depth(self.exit, d)
paul@0	419	else:
paul@0	420	return d
paul@0	421
paul@0	422	self.stacksize = max_depth(self.entry, 0)
paul@0	423
paul@0	424	def flattenGraph(self):
paul@0	425	"""Arrange the blocks in order and resolve jumps"""
paul@0	426	assert self.stage == RAW
paul@0	427	self.insts = insts = []
paul@0	428	pc = 0
paul@0	429	begin = {}
paul@0	430	end = {}
paul@0	431	for b in self.getBlocksInOrder():
paul@0	432	begin[b] = pc
paul@0	433	for inst in b.getInstructions():
paul@0	434	insts.append(inst)
paul@0	435	if len(inst) == 1:
paul@0	436	pc = pc + 1
paul@0	437	elif inst[0] != "SET_LINENO":
paul@0	438	# arg takes 2 bytes
paul@0	439	pc = pc + 3
paul@0	440	end[b] = pc
paul@0	441	pc = 0
paul@0	442	for i in range(len(insts)):
paul@0	443	inst = insts[i]
paul@0	444	if len(inst) == 1:
paul@0	445	pc = pc + 1
paul@0	446	elif inst[0] != "SET_LINENO":
paul@0	447	pc = pc + 3
paul@0	448	opname = inst[0]
paul@0	449	if self.hasjrel.has_elt(opname):
paul@0	450	oparg = inst[1]
paul@0	451	offset = begin[oparg] - pc
paul@0	452	insts[i] = opname, offset
paul@0	453	elif self.hasjabs.has_elt(opname):
paul@0	454	insts[i] = opname, begin[inst[1]]
paul@0	455	self.stage = FLAT
paul@0	456
paul@0	457	hasjrel = misc.Set()
paul@0	458	for i in dis.hasjrel:
paul@0	459	hasjrel.add(dis.opname[i])
paul@0	460	hasjabs = misc.Set()
paul@0	461	for i in dis.hasjabs:
paul@0	462	hasjabs.add(dis.opname[i])
paul@0	463
paul@0	464	def convertArgs(self):
paul@0	465	"""Convert arguments from symbolic to concrete form"""
paul@0	466	assert self.stage == FLAT
paul@0	467	self.consts.insert(0, self.docstring)
paul@0	468	self.sort_cellvars()
paul@0	469	for i in range(len(self.insts)):
paul@0	470	t = self.insts[i]
paul@0	471	if len(t) == 2:
paul@0	472	opname, oparg = t
paul@0	473	conv = self._converters.get(opname, None)
paul@0	474	if conv:
paul@0	475	self.insts[i] = opname, conv(self, oparg)
paul@0	476	self.stage = CONV
paul@0	477
paul@0	478	def sort_cellvars(self):
paul@0	479	"""Sort cellvars in the order of varnames and prune from freevars.
paul@0	480	"""
paul@0	481	cells = {}
paul@0	482	for name in self.cellvars:
paul@0	483	cells[name] = 1
paul@0	484	self.cellvars = [name for name in self.varnames
paul@0	485	if cells.has_key(name)]
paul@0	486	for name in self.cellvars:
paul@0	487	del cells[name]
paul@0	488	self.cellvars = self.cellvars + cells.keys()
paul@0	489	self.closure = self.cellvars + self.freevars
paul@0	490
paul@0	491	def _lookupName(self, name, list):
paul@0	492	"""Return index of name in list, appending if necessary
paul@0	493
paul@0	494	This routine uses a list instead of a dictionary, because a
paul@0	495	dictionary can't store two different keys if the keys have the
paul@0	496	same value but different types, e.g. 2 and 2L. The compiler
paul@0	497	must treat these two separately, so it does an explicit type
paul@0	498	comparison before comparing the values.
paul@0	499	"""
paul@0	500	t = type(name)
paul@0	501	for i in range(len(list)):
paul@0	502	if t == type(list[i]) and list[i] == name:
paul@0	503	return i
paul@0	504	end = len(list)
paul@0	505	list.append(name)
paul@0	506	return end
paul@0	507
paul@0	508	_converters = {}
paul@0	509	def _convert_LOAD_CONST(self, arg):
paul@0	510	if hasattr(arg, 'getCode'):
paul@0	511	arg = arg.getCode()
paul@0	512	return self._lookupName(arg, self.consts)
paul@0	513
paul@0	514	def _convert_LOAD_FAST(self, arg):
paul@0	515	self._lookupName(arg, self.names)
paul@0	516	return self._lookupName(arg, self.varnames)
paul@0	517	_convert_STORE_FAST = _convert_LOAD_FAST
paul@0	518	_convert_DELETE_FAST = _convert_LOAD_FAST
paul@0	519
paul@0	520	def _convert_LOAD_NAME(self, arg):
paul@0	521	if self.klass is None:
paul@0	522	self._lookupName(arg, self.varnames)
paul@0	523	return self._lookupName(arg, self.names)
paul@0	524
paul@0	525	def _convert_NAME(self, arg):
paul@0	526	if self.klass is None:
paul@0	527	self._lookupName(arg, self.varnames)
paul@0	528	return self._lookupName(arg, self.names)
paul@0	529	_convert_STORE_NAME = _convert_NAME
paul@0	530	_convert_DELETE_NAME = _convert_NAME
paul@0	531	_convert_IMPORT_NAME = _convert_NAME
paul@0	532	_convert_IMPORT_FROM = _convert_NAME
paul@0	533	_convert_STORE_ATTR = _convert_NAME
paul@0	534	_convert_LOAD_ATTR = _convert_NAME
paul@0	535	_convert_DELETE_ATTR = _convert_NAME
paul@0	536	_convert_LOAD_GLOBAL = _convert_NAME
paul@0	537	_convert_STORE_GLOBAL = _convert_NAME
paul@0	538	_convert_DELETE_GLOBAL = _convert_NAME
paul@0	539
paul@0	540	def _convert_DEREF(self, arg):
paul@0	541	self._lookupName(arg, self.names)
paul@0	542	self._lookupName(arg, self.varnames)
paul@0	543	return self._lookupName(arg, self.closure)
paul@0	544	_convert_LOAD_DEREF = _convert_DEREF
paul@0	545	_convert_STORE_DEREF = _convert_DEREF
paul@0	546
paul@0	547	def _convert_LOAD_CLOSURE(self, arg):
paul@0	548	self._lookupName(arg, self.varnames)
paul@0	549	return self._lookupName(arg, self.closure)
paul@0	550
paul@0	551	_cmp = list(dis.cmp_op)
paul@0	552	def _convert_COMPARE_OP(self, arg):
paul@0	553	return self._cmp.index(arg)
paul@0	554
paul@0	555	# similarly for other opcodes...
paul@0	556
paul@0	557	for name, obj in locals().items():
paul@0	558	if name[:9] == "_convert_":
paul@0	559	opname = name[9:]
paul@0	560	_converters[opname] = obj
paul@0	561	del name, obj, opname
paul@0	562
paul@0	563	def makeByteCode(self):
paul@0	564	assert self.stage == CONV
paul@0	565	self.lnotab = lnotab = LineAddrTable()
paul@0	566	for t in self.insts:
paul@0	567	opname = t[0]
paul@0	568	if len(t) == 1:
paul@0	569	lnotab.addCode(self.opnum[opname])
paul@0	570	else:
paul@0	571	oparg = t[1]
paul@0	572	if opname == "SET_LINENO":
paul@0	573	lnotab.nextLine(oparg)
paul@0	574	continue
paul@0	575	hi, lo = twobyte(oparg)
paul@0	576	try:
paul@0	577	lnotab.addCode(self.opnum[opname], lo, hi)
paul@0	578	except ValueError:
paul@0	579	print opname, oparg
paul@0	580	print self.opnum[opname], lo, hi
paul@0	581	raise
paul@0	582	self.stage = DONE
paul@0	583
paul@0	584	opnum = {}
paul@0	585	for num in range(len(dis.opname)):
paul@0	586	opnum[dis.opname[num]] = num
paul@0	587	del num
paul@0	588
paul@0	589	def newCodeObject(self):
paul@0	590	assert self.stage == DONE
paul@0	591	if (self.flags & CO_NEWLOCALS) == 0:
paul@0	592	nlocals = 0
paul@0	593	else:
paul@0	594	nlocals = len(self.varnames)
paul@0	595	argcount = self.argcount
paul@0	596	if self.flags & CO_VARKEYWORDS:
paul@0	597	argcount = argcount - 1
paul@0	598	return new.code(argcount, nlocals, self.stacksize, self.flags,
paul@0	599	self.lnotab.getCode(), self.getConsts(),
paul@0	600	tuple(self.names), tuple(self.varnames),
paul@0	601	self.filename, self.name, self.lnotab.firstline,
paul@0	602	self.lnotab.getTable(), tuple(self.freevars),
paul@0	603	tuple(self.cellvars))
paul@0	604
paul@0	605	def getConsts(self):
paul@0	606	"""Return a tuple for the const slot of the code object
paul@0	607
paul@0	608	Must convert references to code (MAKE_FUNCTION) to code
paul@0	609	objects recursively.
paul@0	610	"""
paul@0	611	l = []
paul@0	612	for elt in self.consts:
paul@0	613	if isinstance(elt, PyFlowGraph):
paul@0	614	elt = elt.getCode()
paul@0	615	l.append(elt)
paul@0	616	return tuple(l)
paul@0	617
paul@0	618	def isJump(opname):
paul@0	619	if opname[:4] == 'JUMP':
paul@0	620	return 1
paul@0	621
paul@0	622	class TupleArg:
paul@0	623	"""Helper for marking func defs with nested tuples in arglist"""
paul@0	624	def __init__(self, count, names):
paul@0	625	self.count = count
paul@0	626	self.names = names
paul@0	627	def __repr__(self):
paul@0	628	return "TupleArg(%s, %s)" % (self.count, self.names)
paul@0	629	def getName(self):
paul@0	630	return ".%d" % self.count
paul@0	631
paul@0	632	def getArgCount(args):
paul@0	633	argcount = len(args)
paul@0	634	if args:
paul@0	635	for arg in args:
paul@0	636	if isinstance(arg, TupleArg):
paul@0	637	numNames = len(misc.flatten(arg.names))
paul@0	638	argcount = argcount - numNames
paul@0	639	return argcount
paul@0	640
paul@0	641	def twobyte(val):
paul@0	642	"""Convert an int argument into high and low bytes"""
paul@0	643	assert isinstance(val, int)
paul@0	644	return divmod(val, 256)
paul@0	645
paul@0	646	class LineAddrTable:
paul@0	647	"""lnotab
paul@0	648
paul@0	649	This class builds the lnotab, which is documented in compile.c.
paul@0	650	Here's a brief recap:
paul@0	651
paul@0	652	For each SET_LINENO instruction after the first one, two bytes are
paul@0	653	added to lnotab. (In some cases, multiple two-byte entries are
paul@0	654	added.) The first byte is the distance in bytes between the
paul@0	655	instruction for the last SET_LINENO and the current SET_LINENO.
paul@0	656	The second byte is offset in line numbers. If either offset is
paul@0	657	greater than 255, multiple two-byte entries are added -- see
paul@0	658	compile.c for the delicate details.
paul@0	659	"""
paul@0	660
paul@0	661	def __init__(self):
paul@0	662	self.code = []
paul@0	663	self.codeOffset = 0
paul@0	664	self.firstline = 0
paul@0	665	self.lastline = 0
paul@0	666	self.lastoff = 0
paul@0	667	self.lnotab = []
paul@0	668
paul@0	669	def addCode(self, *args):
paul@0	670	for arg in args:
paul@0	671	self.code.append(chr(arg))
paul@0	672	self.codeOffset = self.codeOffset + len(args)
paul@0	673
paul@0	674	def nextLine(self, lineno):
paul@0	675	if self.firstline == 0:
paul@0	676	self.firstline = lineno
paul@0	677	self.lastline = lineno
paul@0	678	else:
paul@0	679	# compute deltas
paul@0	680	addr = self.codeOffset - self.lastoff
paul@0	681	line = lineno - self.lastline
paul@0	682	# Python assumes that lineno always increases with
paul@0	683	# increasing bytecode address (lnotab is unsigned char).
paul@0	684	# Depending on when SET_LINENO instructions are emitted
paul@0	685	# this is not always true. Consider the code:
paul@0	686	# a = (1,
paul@0	687	# b)
paul@0	688	# In the bytecode stream, the assignment to "a" occurs
paul@0	689	# after the loading of "b". This works with the C Python
paul@0	690	# compiler because it only generates a SET_LINENO instruction
paul@0	691	# for the assignment.
paul@0	692	if line >= 0:
paul@0	693	push = self.lnotab.append
paul@0	694	while addr > 255:
paul@0	695	push(255); push(0)
paul@0	696	addr -= 255
paul@0	697	while line > 255:
paul@0	698	push(addr); push(255)
paul@0	699	line -= 255
paul@0	700	addr = 0
paul@0	701	if addr > 0 or line > 0:
paul@0	702	push(addr); push(line)
paul@0	703	self.lastline = lineno
paul@0	704	self.lastoff = self.codeOffset
paul@0	705
paul@0	706	def getCode(self):
paul@0	707	return ''.join(self.code)
paul@0	708
paul@0	709	def getTable(self):
paul@0	710	return ''.join(map(chr, self.lnotab))
paul@0	711
paul@0	712	class StackDepthTracker:
paul@0	713	# XXX 1. need to keep track of stack depth on jumps
paul@0	714	# XXX 2. at least partly as a result, this code is broken
paul@0	715
paul@0	716	def findDepth(self, insts, debug=0):
paul@0	717	depth = 0
paul@0	718	maxDepth = 0
paul@0	719	for i in insts:
paul@0	720	opname = i[0]
paul@0	721	if debug:
paul@0	722	print i,
paul@0	723	delta = self.effect.get(opname, None)
paul@0	724	if delta is not None:
paul@0	725	depth = depth + delta
paul@0	726	else:
paul@0	727	# now check patterns
paul@0	728	for pat, pat_delta in self.patterns:
paul@0	729	if opname[:len(pat)] == pat:
paul@0	730	delta = pat_delta
paul@0	731	depth = depth + delta
paul@0	732	break
paul@0	733	# if we still haven't found a match
paul@0	734	if delta is None:
paul@0	735	meth = getattr(self, opname, None)
paul@0	736	if meth is not None:
paul@0	737	depth = depth + meth(i[1])
paul@0	738	if depth > maxDepth:
paul@0	739	maxDepth = depth
paul@0	740	if debug:
paul@0	741	print depth, maxDepth
paul@0	742	return maxDepth
paul@0	743
paul@0	744	effect = {
paul@0	745	'POP_TOP': -1,
paul@0	746	'DUP_TOP': 1,
paul@0	747	'LIST_APPEND': -2,
paul@0	748	'SLICE+1': -1,
paul@0	749	'SLICE+2': -1,
paul@0	750	'SLICE+3': -2,
paul@0	751	'STORE_SLICE+0': -1,
paul@0	752	'STORE_SLICE+1': -2,
paul@0	753	'STORE_SLICE+2': -2,
paul@0	754	'STORE_SLICE+3': -3,
paul@0	755	'DELETE_SLICE+0': -1,
paul@0	756	'DELETE_SLICE+1': -2,
paul@0	757	'DELETE_SLICE+2': -2,
paul@0	758	'DELETE_SLICE+3': -3,
paul@0	759	'STORE_SUBSCR': -3,
paul@0	760	'DELETE_SUBSCR': -2,
paul@0	761	# PRINT_EXPR?
paul@0	762	'PRINT_ITEM': -1,
paul@0	763	'RETURN_VALUE': -1,
paul@0	764	'YIELD_VALUE': -1,
paul@0	765	'EXEC_STMT': -3,
paul@0	766	'BUILD_CLASS': -2,
paul@0	767	'STORE_NAME': -1,
paul@0	768	'STORE_ATTR': -2,
paul@0	769	'DELETE_ATTR': -1,
paul@0	770	'STORE_GLOBAL': -1,
paul@0	771	'BUILD_MAP': 1,
paul@0	772	'COMPARE_OP': -1,
paul@0	773	'STORE_FAST': -1,
paul@0	774	'IMPORT_STAR': -1,
paul@0	775	'IMPORT_NAME': -1,
paul@0	776	'IMPORT_FROM': 1,
paul@0	777	'LOAD_ATTR': 0, # unlike other loads
paul@0	778	# close enough...
paul@0	779	'SETUP_EXCEPT': 3,
paul@0	780	'SETUP_FINALLY': 3,
paul@0	781	'FOR_ITER': 1,
paul@0	782	'WITH_CLEANUP': -1,
paul@0	783	}
paul@0	784	# use pattern match
paul@0	785	patterns = [
paul@0	786	('BINARY_', -1),
paul@0	787	('LOAD_', 1),
paul@0	788	]
paul@0	789
paul@0	790	def UNPACK_SEQUENCE(self, count):
paul@0	791	return count-1
paul@0	792	def BUILD_TUPLE(self, count):
paul@0	793	return -count+1
paul@0	794	def BUILD_LIST(self, count):
paul@0	795	return -count+1
paul@0	796	def CALL_FUNCTION(self, argc):
paul@0	797	hi, lo = divmod(argc, 256)
paul@0	798	return -(lo + hi * 2)
paul@0	799	def CALL_FUNCTION_VAR(self, argc):
paul@0	800	return self.CALL_FUNCTION(argc)-1
paul@0	801	def CALL_FUNCTION_KW(self, argc):
paul@0	802	return self.CALL_FUNCTION(argc)-1
paul@0	803	def CALL_FUNCTION_VAR_KW(self, argc):
paul@0	804	return self.CALL_FUNCTION(argc)-2
paul@0	805	def MAKE_FUNCTION(self, argc):
paul@0	806	return -argc
paul@0	807	def MAKE_CLOSURE(self, argc):
paul@0	808	# XXX need to account for free variables too!
paul@0	809	return -argc
paul@0	810	def BUILD_SLICE(self, argc):
paul@0	811	if argc == 2:
paul@0	812	return -1
paul@0	813	elif argc == 3:
paul@0	814	return -2
paul@0	815	def DUP_TOPX(self, argc):
paul@0	816	return argc
paul@0	817
paul@0	818	findDepth = StackDepthTracker().findDepth