# HG changeset patch # User Paul Boddie # Date 1099857183 -3600 # Node ID 91cceb1bd58098ea5aa7f81143eb5dcdc5e98da0 # Parent a714f8355910657122b65766aeb59da7dd6ba5a8 Fixed shifting operations, observing operator precedence. Added names support in BytecodeWriter. Reorganised the BytecodeReader class hierarchy to provide disassembling support separate from translation support. Added convenience functions for disassembly and translation. Introduced some additional bytecode writing methods. Improved the classfile module to support easier access to names and descriptors through RefInfo objects. diff -r a714f8355910 -r 91cceb1bd580 bytecode.py --- a/bytecode.py Sun Nov 07 17:42:07 2004 +0100 +++ b/bytecode.py Sun Nov 07 20:53:03 2004 +0100 @@ -7,12 +7,15 @@ NOTE: Synchronized constructs are not actually supported. """ -import dis # for access to Python bytecode values +from dis import opmap # for access to Python bytecode values from UserDict import UserDict # Bytecode production classes. class BytecodeWriter: + + "A Python bytecode writer." + def __init__(self): self.loops = [] self.jumps = {} @@ -26,6 +29,9 @@ # NOTE: This may be acquired from elsewhere. self.globals = {} + # Mapping from names to indexes. + self.names = {} + def get_output(self): output = [] for element in self.output: @@ -67,16 +73,26 @@ if not self.constants.has_key(value): self.constants[value] = len(self.constants.keys()) self.output.append(self.constants[value]) + self.position += 2 def load_global(self, name): self.output.append(opmap["LOAD_GLOBAL"]) - if not self.globals.has_key(value): + if not self.globals.has_key(name): self.globals[name] = len(self.globals.keys()) self.output.append(self.globals[name]) + self.position += 2 + + def load_attr(self, name): + self.output.append(opmap["LOAD_ATTR"]) + if not self.names.has_key(name): + self.names[name] = len(self.names.keys()) + self.output.append(self.names[name]) + self.position += 2 def load_fast(self, index): self.output.append(opmap["LOAD_FAST"]) self.output.append(index) + self.position += 2 # Normal bytecode generators. @@ -101,6 +117,28 @@ self.output.append(offset) # May be filled in later self.position += 2 + def build_tuple(self, count): + self.output.append(opmap["BUILD_TUPLE"]) + self.output.append(count) + self.position += 2 + + def rot_two(self): + self.output.append(opmap["ROT_TWO"]) + self.position += 1 + + def rot_three(self): + self.output.append(opmap["ROT_THREE"]) + self.position += 1 + + def rot_four(self): + self.output.append(opmap["ROT_FOUR"]) + self.position += 1 + + def call_function(self, count): + self.output.append(opmap["CALL_FUNCTION"]) + self.output.append(count) + self.position += 2 + # Utility classes and functions. class LazyDict(UserDict): @@ -144,6 +182,9 @@ # Bytecode conversion. class BytecodeReader: + + "A generic Java bytecode reader." + def __init__(self, class_file): self.class_file = class_file self.position_mapping = LazyDict() @@ -154,9 +195,9 @@ self.position_mapping[self.java_position] = program.position bytecode = ord(code[self.java_position]) mnemonic, number_of_arguments = self.java_bytecodes[bytecode] - self.process_bytecode(mnemonic, number_of_arguments) + self.process_bytecode(mnemonic, number_of_arguments, code, program) - def process_bytecode(self, mnemonic, number_of_arguments): + def process_bytecode(self, mnemonic, number_of_arguments, code, program): if number_of_arguments is not None: arguments = [] for j in range(0, number_of_arguments): @@ -170,636 +211,6 @@ self.java_position = self.java_position + 1 + number_of_arguments - def nop(self, arguments, program): - pass - - def aaload(self, arguments, program): - # NOTE: No type checking performed. - program.binary_subscr() - - def aastore(self, arguments, program): - # NOTE: No type checking performed. - # Stack: arrayref, index, value - program.rot_three() # Stack: value, arrayref, index - program.store_subscr() - - def aconst_null(self, arguments, program): - program.load_global(None) - - def aload(self, arguments, program): - program.load_fast(arguments[0]) - - def aload_0(self, arguments, program): - program.load_fast(0) - - def aload_1(self, arguments, program): - program.load_fast(1) - - def aload_2(self, arguments, program): - program.load_fast(2) - - def aload_3(self, arguments, program): - program.load_fast(3) - - def anewarray(self, arguments, program): - # NOTE: Does not raise NegativeArraySizeException. - # NOTE: Not using the index to type the list/array. - index = arguments[0] << 8 + arguments[1] - - program.build_list() # Stack: count, list - program.rot_two() # Stack: list, count - program.setup_loop() - program.load_global("range") - program.load_const(0) # Stack: list, count, range, 0 - program.rot_three() # Stack: list, 0, count, range - program.rot_three() # Stack: list, range, 0, count - program.call_function(2) # Stack: list, range_list - program.get_iter() # Stack: list, iter - program.for_iter() # Stack: list, iter, value - program.pop_top() # Stack: list, iter - program.rot_two() # Stack: iter, list - program.dup_top() # Stack: iter, list, list - program.load_attr("append") # Stack: iter, list, append - program.load_global(None) # Stack: iter, list, append, None - program.call_function(1) # Stack: iter, list, None - program.pop_top() # Stack: iter, list - program.rot_two() # Stack: list, iter - program.end_loop() # Back to for_iter above - - def areturn(self, arguments, program): - program.return_value() - - def arraylength(self, arguments, program): - program.load_global("len") # Stack: arrayref, len - program.rot_two() # Stack: len, arrayref - program.call_function(1) - - def astore(self, arguments, program): - program.store_fast(arguments[0]) - - def astore_0(self, arguments, program): - program.store_fast(0) - - def astore_1(self, arguments, program): - program.store_fast(1) - - def astore_2(self, arguments, program): - program.store_fast(2) - - def astore_3(self, arguments, program): - program.store_fast(3) - - def athrow(self, arguments, program): - # NOTE: NullPointerException not raised where null/None is found on the stack. - program.raise_varargs(1) - - baload = aaload - bastore = aastore - - def bipush(self, arguments, program): - program.load_const(arguments[0]) - - caload = aaload - castore = aastore - - def checkcast(self, arguments, program): - index = arguments[0] << 8 + arguments[1] - target_name = self.class_file.constants[index - 1].get_name() - target_components = target_name.split("/") - - program.dup_top() # Stack: objectref, objectref - program.load_global("isinstance") # Stack: objectref, objectref, isinstance - program.rot_two() # Stack: objectref, isinstance, objectref - program.load_global(target_components[0]) - for target_component in target_components[1:]: - program.load_attr(target_component) - program.call_function(2) # Stack: objectref - - def d2f(self, arguments, program): - pass - - def d2i(self, arguments, program): - program.load_global("int") # Stack: value, int - program.rot_two() # Stack: int, value - program.call_function(1) # Stack: result - - d2l = d2i # Preserving Java semantics - - def dadd(self, arguments, program): - # NOTE: No type checking performed. - program.binary_add() - - daload = aaload - dastore = aastore - - def dcmpg(self, arguments, program): - # NOTE: No type checking performed. - program.compare_op(">") - - def dcmpl(self, arguments, program): - # NOTE: No type checking performed. - program.compare_op("<") - - def dconst_0(self, arguments, program): - program.load_const(0.0) - - def dconst_1(self, arguments, program): - program.load_const(1.0) - - def ddiv(self, arguments, program): - # NOTE: No type checking performed. - program.binary_divide() - - dload = aload - dload_0 = aload_0 - dload_1 = aload_1 - dload_2 = aload_2 - dload_3 = aload_3 - - def dmul(self, arguments, program): - # NOTE: No type checking performed. - program.binary_multiply() - - def dneg(self, arguments, program): - # NOTE: No type checking performed. - program.unary_negative() - - def drem(self, arguments, program): - # NOTE: No type checking performed. - program.binary_modulo() - - dreturn = areturn - dstore = astore - dstore_0 = astore_0 - dstore_1 = astore_1 - dstore_2 = astore_2 - dstore_3 = astore_3 - - def dsub(self, arguments, program): - # NOTE: No type checking performed. - program.binary_subtract() - - def dup(self, arguments, program): - program.dup_top() - - def dup_x1(self, arguments, program): - # Ignoring computational type categories. - program.dup_top() - program.rot_three() - - def dup_x2(self, arguments, program): - # Ignoring computational type categories. - program.dup_top() - program.rot_four() - - dup2 = dup # Ignoring computational type categories - dup2_x1 = dup_x1 # Ignoring computational type categories - dup2_x2 = dup_x2 # Ignoring computational type categories - - def f2d(self, arguments, program): - pass # Preserving Java semantics - - def f2i(self, arguments, program): - program.load_global("int") # Stack: value, int - program.rot_two() # Stack: int, value - program.call_function(1) # Stack: result - - f2l = f2i # Preserving Java semantics - fadd = dadd - faload = daload - fastore = dastore - fcmpg = dcmpg - fcmpl = dcmpl - fconst_0 = dconst_0 - fconst_1 = dconst_1 - - def fconst_2(self, arguments, program): - program.load_const(2.0) - - fdiv = ddiv - fload = dload - fload_0 = dload_0 - fload_1 = dload_1 - fload_2 = dload_2 - fload_3 = dload_3 - fmul = dmul - fneg = dneg - frem = drem - freturn = dreturn - fstore = dstore - fstore_0 = dstore_0 - fstore_1 = dstore_1 - fstore_2 = dstore_2 - fstore_3 = dstore_3 - fsub = dsub - - def getfield(self, arguments, program): - index = arguments[0] << 8 + arguments[1] - target_name = self.class_file.constants[index - 1].get_name() - # NOTE: Using the string version of the name which may contain incompatible characters. - program.load_attr(str(target_name)) - - getstatic = getfield # Ignoring Java restrictions - - def goto(self, arguments, program): - offset = signed2(arguments[0] << 8 + arguments[1]) - java_absolute = self.java_position + offset - program.jump_absolute(self.position_mapping[java_absolute]) - - def goto_w(self, arguments, program): - offset = signed4(arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3]) - java_absolute = self.java_position + offset - program.jump_absolute(self.position_mapping[java_absolute]) - - def i2b(self, arguments, program): - pass - - def i2c(self, arguments, program): - program.load_global("chr") # Stack: value, chr - program.rot_two() # Stack: chr, value - program.call_function(1) # Stack: result - - def i2d(self, arguments, program): - program.load_global("float") # Stack: value, float - program.rot_two() # Stack: float, value - program.call_function(1) # Stack: result - - i2f = i2d # Not distinguishing between float and double - - def i2l(self, arguments, program): - pass # Preserving Java semantics - - def i2s(self, arguments, program): - pass # Not distinguishing between int and short - - iadd = fadd - iaload = faload - - def iand(self, arguments, program): - # NOTE: No type checking performed. - program.binary_and() - - iastore = fastore - - def iconst_m1(self, arguments, program): - program.load_const(-1) - - def iconst_0(self, arguments, program): - program.load_const(0) - - def iconst_1(self, arguments, program): - program.load_const(1) - - def iconst_2(self, arguments, program): - program.load_const(2) - - def iconst_3(self, arguments, program): - program.load_const(3) - - def iconst_4(self, arguments, program): - program.load_const(4) - - def iconst_5(self, arguments, program): - program.load_const(5) - - idiv = fdiv - - def _if_xcmpx(self, arguments, program, op): - offset = signed2(arguments[0] << 8 + arguments[1]) - java_absolute = self.java_position + offset - program.compare_op(op) - program.jump_to_label(0, "next") # skip if false - program.goto(offset) - program.start_label("next") - - def if_acmpeq(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "is") - - def if_acmpne(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "is not") - - def if_icmpeq(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "==") - - def if_icmpne(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "!=") - - def if_icmplt(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "<") - - def if_icmpge(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, ">=") - - def if_icmpgt(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, ">") - - def if_icmple(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "<=") - - def ifeq(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "==") - - def ifne(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "!=") - - def iflt(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "<") - - def ifge(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, ">=") - - def ifgt(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, ">") - - def ifle(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "<=") - - def ifnonnull(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(None) - self._if_xcmpx(arguments, program, "is not") - - def ifnull(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(None) - self._if_xcmpx(arguments, program, "is") - - def iinc(self, arguments, program): - # NOTE: No type checking performed. - program.load_fast(arguments[0]) - program.load_const(arguments[1]) - program.binary_add() - - iload = fload - iload_0 = fload_0 - iload_1 = fload_1 - iload_2 = fload_2 - iload_3 = fload_3 - imul = fmul - ineg = fneg - - def instanceof(self, arguments, program): - index = arguments[0] << 8 + arguments[1] - target_name = self.class_file.constants[index - 1].get_name() - target_components = target_name.split("/") - - program.load_global("isinstance") # Stack: objectref, isinstance - program.rot_two() # Stack: isinstance, objectref - program.load_global(target_components[0]) - for target_component in target_components[1:]: - program.load_attr(target_component) - program.call_function(2) # Stack: result - - def _invoke(self, target_name, program): - program.rot_two() # Stack: tuple, objectref - # NOTE: Using the string version of the name which may contain incompatible characters. - program.load_attr(str(target_name)) # Stack: tuple, method - program.rot_two() # Stack: method, tuple - program.load_global("apply") # Stack: method, tuple, apply - program.rot_three() # Stack: apply, method, tuple - program.call_function(2) - - def invokeinterface(self, arguments, program): - # NOTE: This implementation does not perform the necessary checks for - # NOTE: signature-based polymorphism. - # NOTE: Java rules not specifically obeyed. - index = arguments[0] << 8 + arguments[1] - count = arguments[2] - target_name = self.class_file.constants[index - 1].get_name() - # Stack: objectref, arg1, arg2, ... - program.build_tuple(count) # Stack: objectref, tuple - self._invoke(target_name, program) - - def invokespecial(self, arguments, program): - # NOTE: This implementation does not perform the necessary checks for - # NOTE: signature-based polymorphism. - # NOTE: Java rules not specifically obeyed. - index = arguments[0] << 8 + arguments[1] - target = self.class_file.constants[index - 1] - target_name = target.get_name() - # Get the number of parameters from the descriptor. - count = len(target.get_descriptor()[0]) - # Stack: objectref, arg1, arg2, ... - program.build_tuple(count) # Stack: objectref, tuple - self._invoke(target_name, program) - - def invokestatic(self, arguments, program): - # NOTE: This implementation does not perform the necessary checks for - # NOTE: signature-based polymorphism. - # NOTE: Java rules not specifically obeyed. - index = arguments[0] << 8 + arguments[1] - target = self.class_file.constants[index - 1] - target_name = target.get_name() - # Get the number of parameters from the descriptor. - count = len(target.get_descriptor()[0]) - # Stack: arg1, arg2, ... - program.build_tuple(count) # Stack: tuple - # NOTE: Should probably use Python static methods. - program.load_name("self") # Stack: tuple, self - self._invoke(target_name, program) - - invokevirtual = invokeinterface # Ignoring Java rules - - def ior(self, arguments, program): - # NOTE: No type checking performed. - program.binary_or() - - irem = frem - ireturn = freturn - - def ishl(self, arguments, program): - # NOTE: No type checking performed. - # NOTE: Not verified. - program.binary_lshift() - - def ishr(self, arguments, program): - # NOTE: No type checking performed. - # NOTE: Not verified. - program.binary_rshift() - - istore = fstore - istore_0 = fstore_0 - istore_1 = fstore_1 - istore_2 = fstore_2 - istore_3 = fstore_3 - isub = fsub - iushr = ishr # Ignoring distinctions between arithmetic and logical shifts - - def ixor(self, arguments, program): - # NOTE: No type checking performed. - program.binary_xor() - - def jsr(self, arguments, program): - offset = signed2(arguments[0] << 8 + arguments[1]) - java_absolute = self.java_position + offset - # Store the address of the next instruction. - program.load_const(self.position_mapping[self.java_position + 3]) - program.jump_absolute(self.position_mapping[java_absolute]) - - def jsr_w(self, arguments, program): - offset = signed4(arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3]) - java_absolute = self.java_position + offset - # Store the address of the next instruction. - program.load_const(self.position_mapping[self.java_position + 5]) - program.jump_absolute(self.position_mapping[java_absolute]) - - l2d = i2d - l2f = i2f - - def l2i(self, arguments, program): - pass # Preserving Java semantics - - ladd = iadd - laload = iaload - land = iand - lastore = iastore - - def lcmp(self, arguments, program): - # NOTE: No type checking performed. - program.dup_topx(2) # Stack: value1, value2, value1, value2 - program.compare_op(">") # Stack: value1, value2, result - program.jump_to_label(0, "equals") - # True - produce result and branch. - program.pop_top() # Stack: value1, value2 - program.pop_top() # Stack: value1 - program.pop_top() # Stack: - program.load_const(1) # Stack: 1 - program.jump_to_label(None, "next") - # False - test equality. - program.start_label("equals") - program.pop_top() # Stack: value1, value2 - program.dup_topx(2) # Stack: value1, value2, value1, value2 - program.compare_op("==") # Stack: value1, value2, result - program.jump_to_label(0, "less") - # True - produce result and branch. - program.pop_top() # Stack: value1, value2 - program.pop_top() # Stack: value1 - program.pop_top() # Stack: - program.load_const(0) # Stack: 0 - program.jump_to_label(None, "next") - # False - produce result. - program.start_label("less") - program.pop_top() # Stack: value1, value2 - program.pop_top() # Stack: value1 - program.pop_top() # Stack: - program.load_const(-1) # Stack: -1 - program.start_label("next") - - lconst_0 = iconst_0 - lconst_1 = iconst_1 - - def ldc(self, arguments, program): - program.load_const(self.class_file.constants[arguments[0] - 1]) - - def ldc_w(self, arguments, program): - program.load_const(self.class_file.constants[arguments[0] << 8 + arguments[1] - 1]) - - ldc2_w = ldc_w - ldiv = idiv - lload = iload - lload_0 = iload_0 - lload_1 = iload_1 - lload_2 = iload_2 - lload_3 = iload_3 - lmul = imul - lneg = ineg - - def lookupswitch(self, arguments, program): - # Find the offset to the next 4 byte boundary in the code. - d, r = divmod(self.java_position, 4) - to_boundary = (4 - r) % 4 - # Get the pertinent arguments. - arguments = arguments[to_boundary:] - default = arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3] - npairs = arguments[4] << 24 + arguments[5] << 16 + arguments[6] << 8 + arguments[7] - # Process the pairs. - # NOTE: This is not the most optimal implementation. - pair_index = 8 - for pair in range(0, npairs): - match = (arguments[pair_index] << 24 + arguments[pair_index + 1] << 16 + - arguments[pair_index + 2] << 8 + arguments[pair_index + 3]) - offset = signed4(arguments[pair_index + 4] << 24 + arguments[pair_index + 5] << 16 + - arguments[pair_index + 6] << 8 + arguments[pair_index + 7]) - # Calculate the branch target. - java_absolute = self.java_position + offset - # Generate branching code. - program.dup_top() # Stack: key, key - program.load_const(match) # Stack: key, key, match - program.compare_op("==") # Stack: key, result - program.jump_to_label(0, "end" + str(pair)) - program.pop_top() # Stack: key - program.pop_top() # Stack: - program.jump_absolute(self.position_mapping[java_absolute]) - # Generate the label for the end of the branching code. - program.start_label("end" + str(pair)) - program.pop_top() # Stack: key - # Update the index. - pair_index += 8 - # Generate the default. - java_absolute = self.java_position + default - program.jump_absolute(self.position_mapping[java_absolute]) - - lor = ior - lrem = irem - lreturn = ireturn - lshl = ishl - lshr = ishr - lstore = istore - lstore_0 = istore_0 - lstore_1 = istore_1 - lstore_2 = istore_2 - lstore_3 = istore_3 - lsub = isub - lushr = iushr - lxor = ixor - - def monitorenter(self, arguments, program): - # NOTE: To be implemented. - pass - - def monitorexit(self, arguments, program): - # NOTE: To be implemented. - pass - - def multianewarray(self, arguments, program): - program.build_list() # Stack: count1, count2, ..., countN, list - program.rot_two() # Stack: count1, count2, ..., list, countN - program.setup_loop() - program.load_global("range") - program.load_const(0) # Stack: list, count, range, 0 - program.rot_three() # Stack: list, 0, count, range - program.rot_three() # Stack: list, range, 0, count - program.call_function(2) # Stack: list, range_list - program.get_iter() # Stack: list, iter - program.for_iter() # Stack: list, iter, value - for i in range(0, arguments[2]): - # Stack: - self.anewarray(arguments, program) # Stack: list, iter - - def wide(self, code, program): - # NOTE: To be implemented. - return number_of_arguments - java_bytecodes = { # code : (mnemonic, number of following bytes, change in stack) 0 : ("nop", 0), @@ -1005,6 +416,669 @@ 201 : ("jsr_w", 4), } +class BytecodeDisassembler(BytecodeReader): + + "A Java bytecode disassembler." + + bytecode_methods = [spec[0] for spec in BytecodeReader.java_bytecodes.values()] + + def __getattr__(self, name): + if name in self.bytecode_methods: + print name, + return self.generic + else: + raise AttributeError, name + + def generic(self, arguments, program): + print arguments + +class BytecodeDisassemblerProgram: + position = 0 + +class BytecodeTranslator(BytecodeReader): + + "A Java bytecode translator which uses a Python bytecode writer." + + def nop(self, arguments, program): + pass + + def aaload(self, arguments, program): + # NOTE: No type checking performed. + program.binary_subscr() + + def aastore(self, arguments, program): + # NOTE: No type checking performed. + # Stack: arrayref, index, value + program.rot_three() # Stack: value, arrayref, index + program.store_subscr() + + def aconst_null(self, arguments, program): + program.load_global(None) + + def aload(self, arguments, program): + program.load_fast(arguments[0]) + + def aload_0(self, arguments, program): + program.load_fast(0) + + def aload_1(self, arguments, program): + program.load_fast(1) + + def aload_2(self, arguments, program): + program.load_fast(2) + + def aload_3(self, arguments, program): + program.load_fast(3) + + def anewarray(self, arguments, program): + # NOTE: Does not raise NegativeArraySizeException. + # NOTE: Not using the index to type the list/array. + index = (arguments[0] << 8) + arguments[1] + + program.build_list() # Stack: count, list + program.rot_two() # Stack: list, count + program.setup_loop() + program.load_global("range") + program.load_const(0) # Stack: list, count, range, 0 + program.rot_three() # Stack: list, 0, count, range + program.rot_three() # Stack: list, range, 0, count + program.call_function(2) # Stack: list, range_list + program.get_iter() # Stack: list, iter + program.for_iter() # Stack: list, iter, value + program.pop_top() # Stack: list, iter + program.rot_two() # Stack: iter, list + program.dup_top() # Stack: iter, list, list + program.load_attr("append") # Stack: iter, list, append + program.load_global(None) # Stack: iter, list, append, None + program.call_function(1) # Stack: iter, list, None + program.pop_top() # Stack: iter, list + program.rot_two() # Stack: list, iter + program.end_loop() # Back to for_iter above + + def areturn(self, arguments, program): + program.return_value() + + def arraylength(self, arguments, program): + program.load_global("len") # Stack: arrayref, len + program.rot_two() # Stack: len, arrayref + program.call_function(1) + + def astore(self, arguments, program): + program.store_fast(arguments[0]) + + def astore_0(self, arguments, program): + program.store_fast(0) + + def astore_1(self, arguments, program): + program.store_fast(1) + + def astore_2(self, arguments, program): + program.store_fast(2) + + def astore_3(self, arguments, program): + program.store_fast(3) + + def athrow(self, arguments, program): + # NOTE: NullPointerException not raised where null/None is found on the stack. + program.raise_varargs(1) + + baload = aaload + bastore = aastore + + def bipush(self, arguments, program): + program.load_const(arguments[0]) + + caload = aaload + castore = aastore + + def checkcast(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_name() + target_components = target_name.split("/") + + program.dup_top() # Stack: objectref, objectref + program.load_global("isinstance") # Stack: objectref, objectref, isinstance + program.rot_two() # Stack: objectref, isinstance, objectref + program.load_global(target_components[0]) + for target_component in target_components[1:]: + program.load_attr(target_component) + program.call_function(2) # Stack: objectref + + def d2f(self, arguments, program): + pass + + def d2i(self, arguments, program): + program.load_global("int") # Stack: value, int + program.rot_two() # Stack: int, value + program.call_function(1) # Stack: result + + d2l = d2i # Preserving Java semantics + + def dadd(self, arguments, program): + # NOTE: No type checking performed. + program.binary_add() + + daload = aaload + dastore = aastore + + def dcmpg(self, arguments, program): + # NOTE: No type checking performed. + program.compare_op(">") + + def dcmpl(self, arguments, program): + # NOTE: No type checking performed. + program.compare_op("<") + + def dconst_0(self, arguments, program): + program.load_const(0.0) + + def dconst_1(self, arguments, program): + program.load_const(1.0) + + def ddiv(self, arguments, program): + # NOTE: No type checking performed. + program.binary_divide() + + dload = aload + dload_0 = aload_0 + dload_1 = aload_1 + dload_2 = aload_2 + dload_3 = aload_3 + + def dmul(self, arguments, program): + # NOTE: No type checking performed. + program.binary_multiply() + + def dneg(self, arguments, program): + # NOTE: No type checking performed. + program.unary_negative() + + def drem(self, arguments, program): + # NOTE: No type checking performed. + program.binary_modulo() + + dreturn = areturn + dstore = astore + dstore_0 = astore_0 + dstore_1 = astore_1 + dstore_2 = astore_2 + dstore_3 = astore_3 + + def dsub(self, arguments, program): + # NOTE: No type checking performed. + program.binary_subtract() + + def dup(self, arguments, program): + program.dup_top() + + def dup_x1(self, arguments, program): + # Ignoring computational type categories. + program.dup_top() + program.rot_three() + + def dup_x2(self, arguments, program): + # Ignoring computational type categories. + program.dup_top() + program.rot_four() + + dup2 = dup # Ignoring computational type categories + dup2_x1 = dup_x1 # Ignoring computational type categories + dup2_x2 = dup_x2 # Ignoring computational type categories + + def f2d(self, arguments, program): + pass # Preserving Java semantics + + def f2i(self, arguments, program): + program.load_global("int") # Stack: value, int + program.rot_two() # Stack: int, value + program.call_function(1) # Stack: result + + f2l = f2i # Preserving Java semantics + fadd = dadd + faload = daload + fastore = dastore + fcmpg = dcmpg + fcmpl = dcmpl + fconst_0 = dconst_0 + fconst_1 = dconst_1 + + def fconst_2(self, arguments, program): + program.load_const(2.0) + + fdiv = ddiv + fload = dload + fload_0 = dload_0 + fload_1 = dload_1 + fload_2 = dload_2 + fload_3 = dload_3 + fmul = dmul + fneg = dneg + frem = drem + freturn = dreturn + fstore = dstore + fstore_0 = dstore_0 + fstore_1 = dstore_1 + fstore_2 = dstore_2 + fstore_3 = dstore_3 + fsub = dsub + + def getfield(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_name() + # NOTE: Using the string version of the name which may contain incompatible characters. + program.load_attr(str(target_name)) + + getstatic = getfield # Ignoring Java restrictions + + def goto(self, arguments, program): + offset = signed2((arguments[0] << 8) + arguments[1]) + java_absolute = self.java_position + offset + program.jump_absolute(self.position_mapping[java_absolute]) + + def goto_w(self, arguments, program): + offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) + java_absolute = self.java_position + offset + program.jump_absolute(self.position_mapping[java_absolute]) + + def i2b(self, arguments, program): + pass + + def i2c(self, arguments, program): + program.load_global("chr") # Stack: value, chr + program.rot_two() # Stack: chr, value + program.call_function(1) # Stack: result + + def i2d(self, arguments, program): + program.load_global("float") # Stack: value, float + program.rot_two() # Stack: float, value + program.call_function(1) # Stack: result + + i2f = i2d # Not distinguishing between float and double + + def i2l(self, arguments, program): + pass # Preserving Java semantics + + def i2s(self, arguments, program): + pass # Not distinguishing between int and short + + iadd = fadd + iaload = faload + + def iand(self, arguments, program): + # NOTE: No type checking performed. + program.binary_and() + + iastore = fastore + + def iconst_m1(self, arguments, program): + program.load_const(-1) + + def iconst_0(self, arguments, program): + program.load_const(0) + + def iconst_1(self, arguments, program): + program.load_const(1) + + def iconst_2(self, arguments, program): + program.load_const(2) + + def iconst_3(self, arguments, program): + program.load_const(3) + + def iconst_4(self, arguments, program): + program.load_const(4) + + def iconst_5(self, arguments, program): + program.load_const(5) + + idiv = fdiv + + def _if_xcmpx(self, arguments, program, op): + offset = signed2((arguments[0] << 8) + arguments[1]) + java_absolute = self.java_position + offset + program.compare_op(op) + program.jump_to_label(0, "next") # skip if false + program.goto(offset) + program.start_label("next") + + def if_acmpeq(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "is") + + def if_acmpne(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "is not") + + def if_icmpeq(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "==") + + def if_icmpne(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "!=") + + def if_icmplt(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "<") + + def if_icmpge(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, ">=") + + def if_icmpgt(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, ">") + + def if_icmple(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "<=") + + def ifeq(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "==") + + def ifne(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "!=") + + def iflt(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "<") + + def ifge(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, ">=") + + def ifgt(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, ">") + + def ifle(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "<=") + + def ifnonnull(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(None) + self._if_xcmpx(arguments, program, "is not") + + def ifnull(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(None) + self._if_xcmpx(arguments, program, "is") + + def iinc(self, arguments, program): + # NOTE: No type checking performed. + program.load_fast(arguments[0]) + program.load_const(arguments[1]) + program.binary_add() + + iload = fload + iload_0 = fload_0 + iload_1 = fload_1 + iload_2 = fload_2 + iload_3 = fload_3 + imul = fmul + ineg = fneg + + def instanceof(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_name() + target_components = target_name.split("/") + + program.load_global("isinstance") # Stack: objectref, isinstance + program.rot_two() # Stack: isinstance, objectref + program.load_global(target_components[0]) + for target_component in target_components[1:]: + program.load_attr(target_component) + program.call_function(2) # Stack: result + + def _invoke(self, target_name, program): + program.rot_two() # Stack: tuple, objectref + # NOTE: Using the string version of the name which may contain incompatible characters. + program.load_attr(str(target_name)) # Stack: tuple, method + program.rot_two() # Stack: method, tuple + program.load_global("apply") # Stack: method, tuple, apply + program.rot_three() # Stack: apply, method, tuple + program.call_function(2) + + def invokeinterface(self, arguments, program): + # NOTE: This implementation does not perform the necessary checks for + # NOTE: signature-based polymorphism. + # NOTE: Java rules not specifically obeyed. + index = (arguments[0] << 8) + arguments[1] + count = arguments[2] + target_name = self.class_file.constants[index - 1].get_name() + # Stack: objectref, arg1, arg2, ... + program.build_tuple(count) # Stack: objectref, tuple + self._invoke(target_name, program) + + def invokespecial(self, arguments, program): + # NOTE: This implementation does not perform the necessary checks for + # NOTE: signature-based polymorphism. + # NOTE: Java rules not specifically obeyed. + index = (arguments[0] << 8) + arguments[1] + target = self.class_file.constants[index - 1] + target_name = target.get_name() + # Get the number of parameters from the descriptor. + count = len(target.get_descriptor()[0]) + # Stack: objectref, arg1, arg2, ... + program.build_tuple(count) # Stack: objectref, tuple + self._invoke(target_name, program) + + def invokestatic(self, arguments, program): + # NOTE: This implementation does not perform the necessary checks for + # NOTE: signature-based polymorphism. + # NOTE: Java rules not specifically obeyed. + index = (arguments[0] << 8) + arguments[1] + target = self.class_file.constants[index - 1] + target_name = target.get_name() + # Get the number of parameters from the descriptor. + count = len(target.get_descriptor()[0]) + # Stack: arg1, arg2, ... + program.build_tuple(count) # Stack: tuple + # NOTE: Should probably use Python static methods. + program.load_name("self") # Stack: tuple, self + self._invoke(target_name, program) + + invokevirtual = invokeinterface # Ignoring Java rules + + def ior(self, arguments, program): + # NOTE: No type checking performed. + program.binary_or() + + irem = frem + ireturn = freturn + + def ishl(self, arguments, program): + # NOTE: No type checking performed. + # NOTE: Not verified. + program.binary_lshift() + + def ishr(self, arguments, program): + # NOTE: No type checking performed. + # NOTE: Not verified. + program.binary_rshift() + + istore = fstore + istore_0 = fstore_0 + istore_1 = fstore_1 + istore_2 = fstore_2 + istore_3 = fstore_3 + isub = fsub + iushr = ishr # Ignoring distinctions between arithmetic and logical shifts + + def ixor(self, arguments, program): + # NOTE: No type checking performed. + program.binary_xor() + + def jsr(self, arguments, program): + offset = signed2((arguments[0] << 8) + arguments[1]) + java_absolute = self.java_position + offset + # Store the address of the next instruction. + program.load_const(self.position_mapping[self.java_position + 3]) + program.jump_absolute(self.position_mapping[java_absolute]) + + def jsr_w(self, arguments, program): + offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) + java_absolute = self.java_position + offset + # Store the address of the next instruction. + program.load_const(self.position_mapping[self.java_position + 5]) + program.jump_absolute(self.position_mapping[java_absolute]) + + l2d = i2d + l2f = i2f + + def l2i(self, arguments, program): + pass # Preserving Java semantics + + ladd = iadd + laload = iaload + land = iand + lastore = iastore + + def lcmp(self, arguments, program): + # NOTE: No type checking performed. + program.dup_topx(2) # Stack: value1, value2, value1, value2 + program.compare_op(">") # Stack: value1, value2, result + program.jump_to_label(0, "equals") + # True - produce result and branch. + program.pop_top() # Stack: value1, value2 + program.pop_top() # Stack: value1 + program.pop_top() # Stack: + program.load_const(1) # Stack: 1 + program.jump_to_label(None, "next") + # False - test equality. + program.start_label("equals") + program.pop_top() # Stack: value1, value2 + program.dup_topx(2) # Stack: value1, value2, value1, value2 + program.compare_op("==") # Stack: value1, value2, result + program.jump_to_label(0, "less") + # True - produce result and branch. + program.pop_top() # Stack: value1, value2 + program.pop_top() # Stack: value1 + program.pop_top() # Stack: + program.load_const(0) # Stack: 0 + program.jump_to_label(None, "next") + # False - produce result. + program.start_label("less") + program.pop_top() # Stack: value1, value2 + program.pop_top() # Stack: value1 + program.pop_top() # Stack: + program.load_const(-1) # Stack: -1 + program.start_label("next") + + lconst_0 = iconst_0 + lconst_1 = iconst_1 + + def ldc(self, arguments, program): + program.load_const(self.class_file.constants[arguments[0] - 1]) + + def ldc_w(self, arguments, program): + program.load_const(self.class_file.constants[(arguments[0] << 8) + arguments[1] - 1]) + + ldc2_w = ldc_w + ldiv = idiv + lload = iload + lload_0 = iload_0 + lload_1 = iload_1 + lload_2 = iload_2 + lload_3 = iload_3 + lmul = imul + lneg = ineg + + def lookupswitch(self, arguments, program): + # Find the offset to the next 4 byte boundary in the code. + d, r = divmod(self.java_position, 4) + to_boundary = (4 - r) % 4 + # Get the pertinent arguments. + arguments = arguments[to_boundary:] + default = (arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3] + npairs = (arguments[4] << 24) + (arguments[5] << 16) + (arguments[6] << 8) + arguments[7] + # Process the pairs. + # NOTE: This is not the most optimal implementation. + pair_index = 8 + for pair in range(0, npairs): + match = ((arguments[pair_index] << 24) + (arguments[pair_index + 1] << 16) + + (arguments[pair_index + 2] << 8) + arguments[pair_index + 3]) + offset = signed4((arguments[pair_index + 4] << 24) + (arguments[pair_index + 5] << 16) + + (arguments[pair_index + 6] << 8) + arguments[pair_index + 7]) + # Calculate the branch target. + java_absolute = self.java_position + offset + # Generate branching code. + program.dup_top() # Stack: key, key + program.load_const(match) # Stack: key, key, match + program.compare_op("==") # Stack: key, result + program.jump_to_label(0, "end" + str(pair)) + program.pop_top() # Stack: key + program.pop_top() # Stack: + program.jump_absolute(self.position_mapping[java_absolute]) + # Generate the label for the end of the branching code. + program.start_label("end" + str(pair)) + program.pop_top() # Stack: key + # Update the index. + pair_index += 8 + # Generate the default. + java_absolute = self.java_position + default + program.jump_absolute(self.position_mapping[java_absolute]) + + lor = ior + lrem = irem + lreturn = ireturn + lshl = ishl + lshr = ishr + lstore = istore + lstore_0 = istore_0 + lstore_1 = istore_1 + lstore_2 = istore_2 + lstore_3 = istore_3 + lsub = isub + lushr = iushr + lxor = ixor + + def monitorenter(self, arguments, program): + # NOTE: To be implemented. + pass + + def monitorexit(self, arguments, program): + # NOTE: To be implemented. + pass + + def multianewarray(self, arguments, program): + program.build_list() # Stack: count1, count2, ..., countN, list + program.rot_two() # Stack: count1, count2, ..., list, countN + program.setup_loop() + program.load_global("range") + program.load_const(0) # Stack: list, count, range, 0 + program.rot_three() # Stack: list, 0, count, range + program.rot_three() # Stack: list, range, 0, count + program.call_function(2) # Stack: list, range_list + program.get_iter() # Stack: list, iter + program.for_iter() # Stack: list, iter, value + for i in range(0, arguments[2]): + # Stack: + self.anewarray(arguments, program) # Stack: list, iter + + def wide(self, code, program): + # NOTE: To be implemented. + return number_of_arguments + +def disassemble(class_file, code): + disassembler = BytecodeDisassembler(class_file) + disassembler.process(code, BytecodeDisassemblerProgram()) + +def translate(class_file, code): + translator = BytecodeTranslator(class_file) + writer = BytecodeWriter() + translator.process(code, writer) + return writer + if __name__ == "__main__": import sys from classfile import ClassFile diff -r a714f8355910 -r 91cceb1bd580 classfile.py --- a/classfile.py Sun Nov 07 17:42:07 2004 +0100 +++ b/classfile.py Sun Nov 07 20:53:03 2004 +0100 @@ -40,6 +40,84 @@ # Some name indexes are zero to indicate special conditions. return None +class NameAndTypeUtils: + def get_name(self): + if self.name_and_type_index != 0: + return self.class_file.constants[self.name_and_type_index - 1].get_name() + else: + # Some name indexes are zero to indicate special conditions. + return None + + def get_field_descriptor(self): + if self.name_and_type_index != 0: + return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() + else: + # Some name indexes are zero to indicate special conditions. + return None + + def get_method_descriptor(self): + if self.name_and_type_index != 0: + return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() + else: + # Some name indexes are zero to indicate special conditions. + return None + +class DescriptorUtils: + + "Symbol parsing." + + def _get_method_descriptor(self, s): + assert s[0] == "(" + params = [] + s = s[1:] + while s[0] != ")": + parameter_descriptor, s = self._get_parameter_descriptor(s) + params.append(parameter_descriptor) + if s[1] != "V": + return_type, s = self._get_field_type(s[1:]) + else: + return_type, s = None, s[1:] + return params, return_type + + def _get_parameter_descriptor(self, s): + return self._get_field_type(s) + + def _get_field_descriptor(self, s): + return self._get_field_type(s) + + def _get_component_type(self, s): + return self._get_field_type(s) + + def _get_field_type(self, s): + base_type, s = self._get_base_type(s) + object_type = None + array_type = None + if base_type == "L": + object_type, s = self._get_object_type(s) + elif base_type == "[": + array_type, s = self._get_array_type(s) + return (base_type, object_type, array_type), s + + def _get_base_type(self, s): + if len(s) > 0: + return s[0], s[1:] + else: + return None, s + + def _get_object_type(self, s): + if len(s) > 0: + s_end = s.find(";") + assert s_end != -1 + return s[:s_end], s[s_end+1:] + else: + return None, s + + def _get_array_type(self, s): + if len(s) > 0: + return self._get_component_type(s) + else: + return None, s + # Constant information. # Objects of these classes are not directly aware of the class they reside in. @@ -49,7 +127,7 @@ self.name_index = u2(data[0:2]) return data[2:] -class RefInfo: +class RefInfo(NameAndTypeUtils): def init(self, data, class_file): self.class_file = class_file self.class_index = u2(data[0:2]) @@ -57,21 +135,30 @@ return data[4:] class FieldRefInfo(RefInfo): - pass + def get_descriptor(self): + return RefInfo.get_field_descriptor(self) class MethodRefInfo(RefInfo): - pass + def get_descriptor(self): + return RefInfo.get_method_descriptor(self) class InterfaceMethodRefInfo(RefInfo): - pass + def get_descriptor(self): + return RefInfo.get_method_descriptor(self) -class NameAndTypeInfo(NameUtils): +class NameAndTypeInfo(NameUtils, DescriptorUtils): def init(self, data, class_file): self.class_file = class_file self.name_index = u2(data[0:2]) self.descriptor_index = u2(data[2:4]) return data[4:] + def get_field_descriptor(self): + return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) + + def get_method_descriptor(self): + return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) + class Utf8Info: def init(self, data, class_file): self.class_file = class_file @@ -123,7 +210,7 @@ # Other information. # Objects of these classes are generally aware of the class they reside in. -class ItemInfo(NameUtils): +class ItemInfo(NameUtils, DescriptorUtils): def init(self, data, class_file): self.class_file = class_file self.access_flags = u2(data[0:2]) @@ -132,60 +219,6 @@ self.attributes, data = self.class_file._get_attributes(data[6:]) return data - # Symbol parsing. - - def _get_method_descriptor(self, s): - assert s[0] == "(" - params = [] - s = s[1:] - while s[0] != ")": - parameter_descriptor, s = self._get_parameter_descriptor(s) - params.append(parameter_descriptor) - if s[1] != "V": - return_type, s = self._get_field_type(s[1:]) - else: - return_type, s = None, s[1:] - return params, return_type - - def _get_parameter_descriptor(self, s): - return self._get_field_type(s) - - def _get_field_descriptor(self, s): - return self._get_field_type(s) - - def _get_component_type(self, s): - return self._get_field_type(s) - - def _get_field_type(self, s): - base_type, s = self._get_base_type(s) - object_type = None - array_type = None - if base_type == "L": - object_type, s = self._get_object_type(s) - elif base_type == "[": - array_type, s = self._get_array_type(s) - return (base_type, object_type, array_type), s - - def _get_base_type(self, s): - if len(s) > 0: - return s[0], s[1:] - else: - return None, s - - def _get_object_type(self, s): - if len(s) > 0: - s_end = s.find(";") - assert s_end != -1 - return s[:s_end], s[s_end+1:] - else: - return None, s - - def _get_array_type(self, s): - if len(s) > 0: - return self._get_component_type(s) - else: - return None, s - class FieldInfo(ItemInfo): def get_descriptor(self): return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))