# HG changeset patch # User Paul Boddie # Date 1106323506 -3600 # Node ID 182cba61327c9f6d29c252a5eaaf107b67a9d89f # Parent 754d36821fc88da34b7bc55be425db5a8ed71918 Moved the modules into the javaclass package. diff -r 754d36821fc8 -r 182cba61327c javaclass/__init__.py diff -r 754d36821fc8 -r 182cba61327c javaclass/bytecode.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/javaclass/bytecode.py Fri Jan 21 17:05:06 2005 +0100 @@ -0,0 +1,2311 @@ +#!/usr/bin/env python + +""" +Java bytecode conversion. Specification found at the following URL: +http://java.sun.com/docs/books/vmspec/2nd-edition/html/Instructions2.doc.html + +NOTE: Synchronized constructs are not actually supported. +""" + +import classfile +from dis import cmp_op # for access to Python bytecode values and operators +try: + from dis import opmap +except ImportError: + from dis import opname + opmap = {} + for i in range(0, len(opname)): + opmap[opname[i]] = i +from UserDict import UserDict +import new + +# Bytecode production classes. + +class BytecodeWriter: + + "A Python bytecode writer." + + def __init__(self): + + "Initialise the writer." + + # A stack of loop start instructions corresponding to loop blocks. + self.loops = [] + + # A stack of loop block or exception block start positions. + self.blocks = [] + + # A stack of exception block handler pointers. + self.exception_handlers = [] + + # A dictionary mapping labels to jump instructions referencing such labels. + self.jumps = {} + + # The output values, including "lazy" subvalues which will need evaluating. + self.output = [] + + # The current Python bytecode instruction position. + self.position = 0 + + # Stack depth estimation. + self.stack_depth = 0 + self.max_stack_depth = 0 + + # Local variable estimation. + self.max_locals = 0 + + # Mapping from values to indexes. + self.constants = {} + + # Mapping from names to indexes. + # NOTE: This may be acquired from elsewhere. + #self.globals = {} + + # Mapping from names to indexes. + self.names = {} + + # A list of constants used as exception handler return addresses. + self.constants_for_exceptions = [] + + # A list of external names. + self.external_names = [] + + def get_output(self): + + "Return the output of the writer as a string." + + output = [] + for element in self.output: + if isinstance(element, LazySubValue): + value = element.value + else: + value = element + # NOTE: ValueError gets raised for bad values here. + output.append(chr(value)) + return "".join(output) + + def get_constants(self): + + """ + Return a list of constants with ordering significant to the code + employing them. + """ + + l = self._get_list(self._invert(self.constants)) + result = [] + for i in l: + if isinstance(i, LazyValue): + result.append(i.get_value()) + else: + result.append(i) + return result + + #def get_globals(self): + # return self._get_list(self._invert(self.globals)) + + def get_names(self): + + """ + Return a list of names with ordering significant to the code employing + them. + """ + + return self._get_list(self._invert(self.names)) + + def _invert(self, d): + + """ + Return a new dictionary whose key-to-value mapping is in the inverse of + that found in 'd'. + """ + + inverted = {} + for k, v in d.items(): + inverted[v] = k + return inverted + + def _get_list(self, d): + + """ + Traverse the dictionary 'd' returning a list whose values appear at the + position denoted by each value's key in 'd'. + """ + + l = [] + for i in range(0, len(d.keys())): + l.append(d[i]) + return l + + # Administrative methods. + + def update_stack_depth(self, change): + + """ + Given the stated 'change' in stack depth, update the maximum stack depth + where appropriate. + """ + + self.stack_depth += change + if self.stack_depth > self.max_stack_depth: + self.max_stack_depth = self.stack_depth + + def update_locals(self, index): + + """ + Given the stated 'index' of a local variable, update the maximum local + variable index where appropriate. + """ + + if index > self.max_locals: + self.max_locals = index + + # Special methods. + + def _write_value(self, value): + + """ + Write the given 'value' at the current output position. + """ + + if isinstance(value, LazyValue): + # NOTE: Assume a 16-bit value. + self.output.append(value.values[0]) + self.output.append(value.values[1]) + self.position += 2 + elif value <= 0xffff: + self.output.append(value & 0xff) + self.output.append((value & 0xff00) >> 8) + self.position += 2 + else: + # NOTE: EXTENDED_ARG not yet supported. + raise ValueError, value + + def _rewrite_value(self, position, value): + + """ + At the given output 'position', rewrite the given 'value'. + """ + + # NOTE: Assume a 16-bit value. + if value <= 0xffff: + self.output[position] = (value & 0xff) + self.output[position + 1] = ((value & 0xff00) >> 8) + else: + # NOTE: EXTENDED_ARG not yet supported. + raise ValueError, value + + # Higher level methods. + + def use_external_name(self, name): + # NOTE: Remove array and object indicators. + self.external_names.append(name) + + def setup_loop(self): + self.loops.append(self.position) + self.output.append(opmap["SETUP_LOOP"]) + self.position += 1 + self._write_value(0) # To be filled in later + + def end_loop(self): + current_loop_start = self.loops.pop() + current_loop_real_start = self.blocks.pop() + #print "<", self.blocks, current_loop_real_start + # Fix the iterator delta. + # NOTE: Using 3 as the assumed length of the FOR_ITER instruction. + self.jump_absolute(current_loop_real_start) + self._rewrite_value(current_loop_real_start + 1, self.position - current_loop_real_start - 3) + self.pop_block() + # Fix the loop delta. + # NOTE: Using 3 as the assumed length of the SETUP_LOOP instruction. + self._rewrite_value(current_loop_start + 1, self.position - current_loop_start - 3) + + def jump_to_label(self, status, name): + # Record the instruction using the jump. + jump_instruction = self.position + if status is None: + self.jump_forward() + elif status: + self.jump_if_true() + else: + self.jump_if_false() + # Record the following instruction, too. + if not self.jumps.has_key(name): + self.jumps[name] = [] + self.jumps[name].append((jump_instruction, self.position)) + + def start_label(self, name): + # Fill in all jump instructions. + for jump_instruction, following_instruction in self.jumps[name]: + self._rewrite_value(jump_instruction + 1, self.position - following_instruction) + del self.jumps[name] + + def load_const_ret(self, value): + self.constants_for_exceptions.append(value) + self.load_const(value) + + def ret(self, index): + self.load_fast(index) + + # Previously, the constant stored on the stack by jsr/jsr_w was stored + # in a local variable. In the JVM, extracting the value from the local + # variable and jumping can be done at runtime. In the Python VM, any + # jump target must be known in advance and written into the bytecode. + + for constant in self.constants_for_exceptions: + self.dup_top() # Stack: actual-address, actual-address + self.load_const(constant) # Stack: actual-address, actual-address, suggested-address + self.compare_op("==") # Stack: actual-address, result + self.jump_to_label(0, "const") + self.pop_top() # Stack: actual-address + self.pop_top() # Stack: + self.jump_absolute(constant) + self.start_label("const") + self.pop_top() # Stack: actual-address + + # NOTE: If we get here, something is really wrong. + + self.pop_top() # Stack: + + def setup_except(self, target): + self.blocks.append(self.position) + self.exception_handlers.append(target) + #print "-", self.position, target + self.output.append(opmap["SETUP_EXCEPT"]) + self.position += 1 + self._write_value(0) # To be filled in later + + def setup_finally(self, target): + self.blocks.append(self.position) + self.exception_handlers.append(target) + #print "-", self.position, target + self.output.append(opmap["SETUP_FINALLY"]) + self.position += 1 + self._write_value(0) # To be filled in later + + def end_exception(self): + current_exception_start = self.blocks.pop() + # Convert the "lazy" absolute value. + current_exception_target = self.exception_handlers.pop() + target = current_exception_target.get_value() + #print "*", current_exception_start, target + # NOTE: Using 3 as the assumed length of the SETUP_* instruction. + self._rewrite_value(current_exception_start + 1, target - current_exception_start - 3) + + def start_handler(self, exc_name, class_file): + + # Where handlers are begun, produce bytecode to test the type of + # the exception. + # NOTE: Since RAISE_VARARGS and END_FINALLY are not really documented, + # NOTE: we store the top of the stack and use it later to trigger the + # NOTE: magic processes when re-raising. + self.use_external_name(str(exc_name)) + + self.rot_two() # Stack: raised-exception, exception + self.dup_top() # Stack: raised-exception, exception, exception + # Handled exceptions are wrapped before being thrown. + self.load_global("Exception") # Stack: raised-exception, exception, exception, Exception + self.compare_op("exception match") # Stack: raised-exception, exception, result + self.jump_to_label(0, "next") + self.pop_top() # Stack: raised-exception, exception + self.dup_top() # Stack: raised-exception, exception, exception + self.load_attr("args") # Stack: raised-exception, exception, args + self.load_const(0) # Stack: raised-exception, exception, args, 0 + self.binary_subscr() # Stack: raised-exception, exception, exception-object + load_class_name(class_file, str(exc_name), self) + # Stack: raised-exception, exception, exception-object, handled-exception + self.load_global("isinstance") # Stack: raised-exception, exception, exception-object, handled-exception, isinstance + self.rot_three() # Stack: raised-exception, exception, isinstance, exception-object, handled-exception + self.call_function(2) # Stack: raised-exception, exception, result + self.jump_to_label(1, "handler") + self.start_label("next") + self.pop_top() # Stack: raised-exception, exception + self.rot_two() # Stack: exception, raised-exception + self.end_finally() + self.start_label("handler") + self.pop_top() # Stack: raised-exception, exception + + # Complicated methods. + + def load_const(self, value): + self.output.append(opmap["LOAD_CONST"]) + if not self.constants.has_key(value): + self.constants[value] = len(self.constants.keys()) + self.position += 1 + self._write_value(self.constants[value]) + self.update_stack_depth(1) + + def load_global(self, name): + self.output.append(opmap["LOAD_GLOBAL"]) + if not self.names.has_key(name): + self.names[name] = len(self.names.keys()) + self.position += 1 + self._write_value(self.names[name]) + self.update_stack_depth(1) + + def load_attr(self, name): + self.output.append(opmap["LOAD_ATTR"]) + if not self.names.has_key(name): + self.names[name] = len(self.names.keys()) + self.position += 1 + self._write_value(self.names[name]) + + def load_name(self, name): + self.output.append(opmap["LOAD_NAME"]) + if not self.names.has_key(name): + self.names[name] = len(self.names.keys()) + self.position += 1 + self._write_value(self.names[name]) + self.update_stack_depth(1) + + def load_fast(self, index): + self.output.append(opmap["LOAD_FAST"]) + self.position += 1 + self._write_value(index) + self.update_stack_depth(1) + self.update_locals(index) + + def store_attr(self, name): + self.output.append(opmap["STORE_ATTR"]) + if not self.names.has_key(name): + self.names[name] = len(self.names.keys()) + self.position += 1 + self._write_value(self.names[name]) + self.update_stack_depth(-1) + + def store_fast(self, index): + self.output.append(opmap["STORE_FAST"]) + self.position += 1 + self._write_value(index) + self.update_stack_depth(-1) + self.update_locals(index) + + def for_iter(self): + self.blocks.append(self.position) + #print ">", self.blocks + self.output.append(opmap["FOR_ITER"]) + self.position += 1 + self._write_value(0) # To be filled in later + self.update_stack_depth(1) + + def break_loop(self): + self.output.append(opmap["BREAK_LOOP"]) + self.position += 1 + self.jump_absolute(self.blocks[-1]) + + # Normal bytecode generators. + + def get_iter(self): + self.output.append(opmap["GET_ITER"]) + self.position += 1 + + def jump_if_false(self, offset=0): + self.output.append(opmap["JUMP_IF_FALSE"]) + self.position += 1 + self._write_value(offset) # May be filled in later + + def jump_if_true(self, offset=0): + self.output.append(opmap["JUMP_IF_TRUE"]) + self.position += 1 + self._write_value(offset) # May be filled in later + + def jump_forward(self, offset=0): + self.output.append(opmap["JUMP_FORWARD"]) + self.position += 1 + self._write_value(offset) # May be filled in later + + def jump_absolute(self, address=0): + self.output.append(opmap["JUMP_ABSOLUTE"]) + self.position += 1 + self._write_value(address) # May be filled in later + + def build_tuple(self, count): + self.output.append(opmap["BUILD_TUPLE"]) + self.position += 1 + self._write_value(count) + self.update_stack_depth(-(count - 1)) + + def build_list(self, count): + self.output.append(opmap["BUILD_LIST"]) + self.position += 1 + self._write_value(count) + self.update_stack_depth(-(count - 1)) + + def pop_top(self): + self.output.append(opmap["POP_TOP"]) + self.position += 1 + self.update_stack_depth(-1) + + def dup_top(self): + self.output.append(opmap["DUP_TOP"]) + self.position += 1 + self.update_stack_depth(1) + + def dup_topx(self, count): + self.output.append(opmap["DUP_TOPX"]) + self.position += 1 + self._write_value(count) + self.update_stack_depth(count) + + def rot_two(self): + self.output.append(opmap["ROT_TWO"]) + self.position += 1 + + def rot_three(self): + self.output.append(opmap["ROT_THREE"]) + self.position += 1 + + def rot_four(self): + self.output.append(opmap["ROT_FOUR"]) + self.position += 1 + + def call_function(self, count): + self.output.append(opmap["CALL_FUNCTION"]) + self.position += 1 + self._write_value(count) + self.update_stack_depth(-count) + + def call_function_var(self, count): + self.output.append(opmap["CALL_FUNCTION_VAR"]) + self.position += 1 + self._write_value(count) + self.update_stack_depth(-count-1) + + def binary_subscr(self): + self.output.append(opmap["BINARY_SUBSCR"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_add(self): + self.output.append(opmap["BINARY_ADD"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_divide(self): + self.output.append(opmap["BINARY_DIVIDE"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_multiply(self): + self.output.append(opmap["BINARY_MULTIPLY"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_modulo(self): + self.output.append(opmap["BINARY_MODULO"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_subtract(self): + self.output.append(opmap["BINARY_SUBTRACT"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_and(self): + self.output.append(opmap["BINARY_AND"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_or(self): + self.output.append(opmap["BINARY_XOR"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_lshift(self): + self.output.append(opmap["BINARY_LSHIFT"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_rshift(self): + self.output.append(opmap["BINARY_RSHIFT"]) + self.position += 1 + self.update_stack_depth(-1) + + def binary_xor(self): + self.output.append(opmap["BINARY_XOR"]) + self.position += 1 + self.update_stack_depth(-1) + + def store_subscr(self): + self.output.append(opmap["STORE_SUBSCR"]) + self.position += 1 + self.update_stack_depth(-3) + + def unary_negative(self): + self.output.append(opmap["UNARY_NEGATIVE"]) + self.position += 1 + + def slice_0(self): + self.output.append(opmap["SLICE+0"]) + self.position += 1 + + def slice_1(self): + self.output.append(opmap["SLICE+1"]) + self.position += 1 + + def compare_op(self, op): + self.output.append(opmap["COMPARE_OP"]) + self.position += 1 + self._write_value(list(cmp_op).index(op)) + self.update_stack_depth(-1) + + def return_value(self): + self.output.append(opmap["RETURN_VALUE"]) + self.position += 1 + self.update_stack_depth(-1) + + def raise_varargs(self, count): + self.output.append(opmap["RAISE_VARARGS"]) + self.position += 1 + self._write_value(count) + + def pop_block(self): + self.output.append(opmap["POP_BLOCK"]) + self.position += 1 + + def end_finally(self): + self.output.append(opmap["END_FINALLY"]) + self.position += 1 + + def unpack_sequence(self, count): + self.output.append(opmap["UNPACK_SEQUENCE"]) + self.position += 1 + self._write_value(count) + + # Debugging. + + def print_item(self): + self.output.append(opmap["PRINT_ITEM"]) + self.position += 1 + +# Utility classes and functions. + +class LazyDict(UserDict): + def __getitem__(self, key): + if not self.data.has_key(key): + # NOTE: Assume 16-bit value. + self.data[key] = LazyValue(2) + return self.data[key] + def __setitem__(self, key, value): + if self.data.has_key(key): + existing_value = self.data[key] + if isinstance(existing_value, LazyValue): + existing_value.set_value(value) + return + self.data[key] = value + +class LazyValue: + def __init__(self, nvalues): + self.values = [] + for i in range(0, nvalues): + self.values.append(LazySubValue()) + def set_value(self, value): + # NOTE: Assume at least 16-bit value. No "filling" performed. + if value <= 0xffff: + self.values[0].set_value(value & 0xff) + self.values[1].set_value((value & 0xff00) >> 8) + else: + # NOTE: EXTENDED_ARG not yet supported. + raise ValueError, value + def get_value(self): + value = 0 + values = self.values[:] + for i in range(0, len(values)): + value = (value << 8) + values.pop().value + return value + +class LazySubValue: + def __init__(self): + self.value = 0 + def set_value(self, value): + self.value = value + +def signed(value, limit): + + """ + Return the signed integer from the unsigned 'value', where 'limit' (a value + one greater than the highest possible positive integer) is used to determine + whether a negative or positive result is produced. + """ + + d, r = divmod(value, limit) + if d == 1: + mask = limit * 2 - 1 + return -1 - (value ^ mask) + else: + return value + +def signed1(value): + return signed(value, 0x80) + +def signed2(value): + return signed(value, 0x8000) + +def signed4(value): + return signed(value, 0x80000000) + +def load_class_name(class_file, full_class_name, program): + this_class_name = str(class_file.this_class.get_python_name()) + this_class_parts = this_class_name.split(".") + class_parts = full_class_name.split(".") + + # Only use the full path if different from this class's path. + + if class_parts[:-1] != this_class_parts[:-1]: + program.use_external_name(full_class_name) + program.load_global(class_parts[0]) + for class_part in class_parts[1:]: + program.load_attr(class_part) # Stack: classref + else: + program.load_global(class_parts[-1]) + +# Bytecode conversion. + +class BytecodeReader: + + "A generic Java bytecode reader." + + def __init__(self, class_file): + + """ + Initialise the reader with a 'class_file' containing essential + information for any bytecode inspection activity. + """ + + self.class_file = class_file + self.position_mapping = LazyDict() + + def process(self, method, program): + + """ + Process the given 'method' (obtained from the class file), using the + given 'program' to write translated Python bytecode instructions. + """ + + self.java_position = 0 + self.in_finally = 0 + self.method = method + + # NOTE: Potentially unreliable way of getting necessary information. + + code, exception_table = None, None + for attribute in method.attributes: + if isinstance(attribute, classfile.CodeAttributeInfo): + code, exception_table = attribute.code, attribute.exception_table + break + + # Where no code was found, write a very simple placeholder routine. + # This is useful for interfaces and abstract classes. + # NOTE: Assess the correctness of doing this. An exception should really + # NOTE: be raised instead. + + if code is None: + program.load_const(None) + program.return_value() + return + + # Produce a structure which permits fast access to exception details. + + exception_block_start = {} + exception_block_end = {} + exception_block_handler = {} + reversed_exception_table = exception_table[:] + reversed_exception_table.reverse() + + # Later entries have wider coverage than earlier entries. + + for exception in reversed_exception_table: + + # Index start positions. + + if not exception_block_start.has_key(exception.start_pc): + exception_block_start[exception.start_pc] = [] + exception_block_start[exception.start_pc].append(exception) + + # Index end positions. + + if not exception_block_end.has_key(exception.end_pc): + exception_block_end[exception.end_pc] = [] + exception_block_end[exception.end_pc].append(exception) + + # Index handler positions. + + if not exception_block_handler.has_key(exception.handler_pc): + exception_block_handler[exception.handler_pc] = [] + exception_block_handler[exception.handler_pc].append(exception) + + # Process each instruction in the code. + + while self.java_position < len(code): + self.position_mapping[self.java_position] = program.position + + # Insert exception handling constructs. + + block_starts = exception_block_start.get(self.java_position, []) + for exception in block_starts: + + # Note that the absolute position is used. + + if exception.catch_type == 0: + program.setup_finally(self.position_mapping[exception.handler_pc]) + else: + program.setup_except(self.position_mapping[exception.handler_pc]) + + if block_starts: + self.in_finally = 0 + + # Insert exception handler details. + # NOTE: Ensure that pop_block is reachable by possibly inserting it at the start of finally handlers. + # NOTE: Insert a check for the correct exception at the start of each handler. + + for exception in exception_block_handler.get(self.java_position, []): + program.end_exception() + if exception.catch_type == 0: + self.in_finally = 1 + else: + program.start_handler(self.class_file.constants[exception.catch_type - 1].get_python_name(), self.class_file) + + # Process the bytecode at the current position. + + bytecode = ord(code[self.java_position]) + mnemonic, number_of_arguments = self.java_bytecodes[bytecode] + number_of_arguments = self.process_bytecode(mnemonic, number_of_arguments, code, program) + next_java_position = self.java_position + 1 + number_of_arguments + + # Insert exception block end details. + + for exception in exception_block_end.get(next_java_position, []): + + # NOTE: Insert jump beyond handlers. + # NOTE: program.jump_forward/absolute(...) + # NOTE: Insert end finally at end of handlers as well as where "ret" occurs. + + if exception.catch_type != 0: + program.pop_block() + + # Only advance the JVM position after sneaking in extra Python + # instructions. + + self.java_position = next_java_position + + def process_bytecode(self, mnemonic, number_of_arguments, code, program): + + """ + Process a bytecode instruction with the given 'mnemonic' and + 'number_of_arguments'. The 'code' parameter contains the full method + code so that argument data can be inspected. The 'program' parameter is + used to produce a Python translation of the instruction. + """ + + if number_of_arguments is not None: + arguments = [] + for j in range(0, number_of_arguments): + arguments.append(ord(code[self.java_position + 1 + j])) + + # Call the handler. + + getattr(self, mnemonic)(arguments, program) + return number_of_arguments + else: + # Call the handler. + + return getattr(self, mnemonic)(code[self.java_position+1:], program) + + java_bytecodes = { + # code : (mnemonic, number of following bytes, change in stack) + 0 : ("nop", 0), + 1 : ("aconst_null", 0), + 2 : ("iconst_m1", 0), + 3 : ("iconst_0", 0), + 4 : ("iconst_1", 0), + 5 : ("iconst_2", 0), + 6 : ("iconst_3", 0), + 7 : ("iconst_4", 0), + 8 : ("iconst_5", 0), + 9 : ("lconst_0", 0), + 10 : ("lconst_1", 0), + 11 : ("fconst_0", 0), + 12 : ("fconst_1", 0), + 13 : ("fconst_2", 0), + 14 : ("dconst_0", 0), + 15 : ("dconst_1", 0), + 16 : ("bipush", 1), + 17 : ("sipush", 2), + 18 : ("ldc", 1), + 19 : ("ldc_w", 2), + 20 : ("ldc2_w", 2), + 21 : ("iload", 1), + 22 : ("lload", 1), + 23 : ("fload", 1), + 24 : ("dload", 1), + 25 : ("aload", 1), + 26 : ("iload_0", 0), + 27 : ("iload_1", 0), + 28 : ("iload_2", 0), + 29 : ("iload_3", 0), + 30 : ("lload_0", 0), + 31 : ("lload_1", 0), + 32 : ("lload_2", 0), + 33 : ("lload_3", 0), + 34 : ("fload_0", 0), + 35 : ("fload_1", 0), + 36 : ("fload_2", 0), + 37 : ("fload_3", 0), + 38 : ("dload_0", 0), + 39 : ("dload_1", 0), + 40 : ("dload_2", 0), + 41 : ("dload_3", 0), + 42 : ("aload_0", 0), + 43 : ("aload_1", 0), + 44 : ("aload_2", 0), + 45 : ("aload_3", 0), + 46 : ("iaload", 0), + 47 : ("laload", 0), + 48 : ("faload", 0), + 49 : ("daload", 0), + 50 : ("aaload", 0), + 51 : ("baload", 0), + 52 : ("caload", 0), + 53 : ("saload", 0), + 54 : ("istore", 1), + 55 : ("lstore", 1), + 56 : ("fstore", 1), + 57 : ("dstore", 1), + 58 : ("astore", 1), + 59 : ("istore_0", 0), + 60 : ("istore_1", 0), + 61 : ("istore_2", 0), + 62 : ("istore_3", 0), + 63 : ("lstore_0", 0), + 64 : ("lstore_1", 0), + 65 : ("lstore_2", 0), + 66 : ("lstore_3", 0), + 67 : ("fstore_0", 0), + 68 : ("fstore_1", 0), + 69 : ("fstore_2", 0), + 70 : ("fstore_3", 0), + 71 : ("dstore_0", 0), + 72 : ("dstore_1", 0), + 73 : ("dstore_2", 0), + 74 : ("dstore_3", 0), + 75 : ("astore_0", 0), + 76 : ("astore_1", 0), + 77 : ("astore_2", 0), + 78 : ("astore_3", 0), + 79 : ("iastore", 0), + 80 : ("lastore", 0), + 81 : ("fastore", 0), + 82 : ("dastore", 0), + 83 : ("aastore", 0), + 84 : ("bastore", 0), + 85 : ("castore", 0), + 86 : ("sastore", 0), + 87 : ("pop", 0), + 88 : ("pop2", 0), + 89 : ("dup", 0), + 90 : ("dup_x1", 0), + 91 : ("dup_x2", 0), + 92 : ("dup2", 0), + 93 : ("dup2_x1", 0), + 94 : ("dup2_x2", 0), + 95 : ("swap", 0), + 96 : ("iadd", 0), + 97 : ("ladd", 0), + 98 : ("fadd", 0), + 99 : ("dadd", 0), + 100 : ("isub", 0), + 101 : ("lsub", 0), + 102 : ("fsub", 0), + 103 : ("dsub", 0), + 104 : ("imul", 0), + 105 : ("lmul", 0), + 106 : ("fmul", 0), + 107 : ("dmul", 0), + 108 : ("idiv", 0), + 109 : ("ldiv", 0), + 110 : ("fdiv", 0), + 111 : ("ddiv", 0), + 112 : ("irem", 0), + 113 : ("lrem", 0), + 114 : ("frem", 0), + 115 : ("drem", 0), + 116 : ("ineg", 0), + 117 : ("lneg", 0), + 118 : ("fneg", 0), + 119 : ("dneg", 0), + 120 : ("ishl", 0), + 121 : ("lshl", 0), + 122 : ("ishr", 0), + 123 : ("lshr", 0), + 124 : ("iushr", 0), + 125 : ("lushr", 0), + 126 : ("iand", 0), + 127 : ("land", 0), + 128 : ("ior", 0), + 129 : ("lor", 0), + 130 : ("ixor", 0), + 131 : ("lxor", 0), + 132 : ("iinc", 2), + 133 : ("i2l", 0), + 134 : ("i2f", 0), + 135 : ("i2d", 0), + 136 : ("l2i", 0), + 137 : ("l2f", 0), + 138 : ("l2d", 0), + 139 : ("f2i", 0), + 140 : ("f2l", 0), + 141 : ("f2d", 0), + 142 : ("d2i", 0), + 143 : ("d2l", 0), + 144 : ("d2f", 0), + 145 : ("i2b", 0), + 146 : ("i2c", 0), + 147 : ("i2s", 0), + 148 : ("lcmp", 0), + 149 : ("fcmpl", 0), + 150 : ("fcmpg", 0), + 151 : ("dcmpl", 0), + 152 : ("dcmpg", 0), + 153 : ("ifeq", 2), + 154 : ("ifne", 2), + 155 : ("iflt", 2), + 156 : ("ifge", 2), + 157 : ("ifgt", 2), + 158 : ("ifle", 2), + 159 : ("if_icmpeq", 2), + 160 : ("if_icmpne", 2), + 161 : ("if_icmplt", 2), + 162 : ("if_icmpge", 2), + 163 : ("if_icmpgt", 2), + 164 : ("if_icmple", 2), + 165 : ("if_acmpeq", 2), + 166 : ("if_acmpne", 2), + 167 : ("goto", 2), + 168 : ("jsr", 2), + 169 : ("ret", 1), + 170 : ("tableswitch", None), # variable number of arguments + 171 : ("lookupswitch", None), # variable number of arguments + 172 : ("ireturn", 0), + 173 : ("lreturn", 0), + 174 : ("freturn", 0), + 175 : ("dreturn", 0), + 176 : ("areturn", 0), + 177 : ("return_", 0), + 178 : ("getstatic", 2), + 179 : ("putstatic", 2), + 180 : ("getfield", 2), + 181 : ("putfield", 2), + 182 : ("invokevirtual", 2), + 183 : ("invokespecial", 2), + 184 : ("invokestatic", 2), + 185 : ("invokeinterface", 4), + 187 : ("new", 2), + 188 : ("newarray", 1), + 189 : ("anewarray", 2), + 190 : ("arraylength", 0), + 191 : ("athrow", 0), + 192 : ("checkcast", 2), + 193 : ("instanceof", 2), + 194 : ("monitorenter", 0), + 195 : ("monitorexit", 0), + 196 : ("wide", None), # 3 or 5 arguments, stack changes according to modified element + 197 : ("multianewarray", 3), + 198 : ("ifnull", 2), + 199 : ("ifnonnull", 2), + 200 : ("goto_w", 4), + 201 : ("jsr_w", 4), + } + +class BytecodeDisassembler(BytecodeReader): + + "A Java bytecode disassembler." + + bytecode_methods = [spec[0] for spec in BytecodeReader.java_bytecodes.values()] + + def __getattr__(self, name): + if name in self.bytecode_methods: + print "%5s %s" % (self.java_position, name), + return self.generic + else: + raise AttributeError, name + + def generic(self, arguments, program): + print arguments + + def lookupswitch(self, code, program): + print "%5s lookupswitch" % (self.java_position,), + d, r = divmod(self.java_position + 1, 4) + to_boundary = (4 - r) % 4 + code = code[to_boundary:] + default = classfile.u4(code[0:4]) + npairs = classfile.u4(code[4:8]) + print default, npairs + return to_boundary + 8 + npairs * 8 + + def tableswitch(self, code, program): + print "%5s tableswitch" % (self.java_position,), + d, r = divmod(self.java_position + 1, 4) + to_boundary = (4 - r) % 4 + code = code[to_boundary:] + default = classfile.u4(code[0:4]) + low = classfile.u4(code[4:8]) + high = classfile.u4(code[8:12]) + print default, low, high + return to_boundary + 12 + (high - low + 1) * 4 + +class BytecodeDisassemblerProgram: + position = 0 + def setup_except(self, target): + print "(setup_except %s)" % target + def setup_finally(self, target): + print "(setup_finally %s)" % target + def end_exception(self): + print "(end_exception)" + def start_handler(self, exc_name, class_file): + print "(start_handler %s)" % exc_name + def pop_block(self): + print "(pop_block)" + +class BytecodeTranslator(BytecodeReader): + + "A Java bytecode translator which uses a Python bytecode writer." + + def aaload(self, arguments, program): + # NOTE: No type checking performed. + program.binary_subscr() + + def aastore(self, arguments, program): + # NOTE: No type checking performed. + # Stack: arrayref, index, value + program.rot_three() # Stack: value, arrayref, index + program.store_subscr() + + def aconst_null(self, arguments, program): + program.load_const(None) + + def aload(self, arguments, program): + program.load_fast(arguments[0]) + + def aload_0(self, arguments, program): + program.load_fast(0) + + def aload_1(self, arguments, program): + program.load_fast(1) + + def aload_2(self, arguments, program): + program.load_fast(2) + + def aload_3(self, arguments, program): + program.load_fast(3) + + def anewarray(self, arguments, program): + # NOTE: Does not raise NegativeArraySizeException. + # NOTE: Not using the index to type the list/array. + index = (arguments[0] << 8) + arguments[1] + self._newarray(program) + + def _newarray(self, program): + program.build_list(0) # Stack: count, list + program.rot_two() # Stack: list, count + program.setup_loop() + program.load_global("range") + program.load_const(0) # Stack: list, count, range, 0 + program.rot_three() # Stack: list, 0, count, range + program.rot_three() # Stack: list, range, 0, count + program.call_function(2) # Stack: list, range_list + program.get_iter() # Stack: list, iter + program.for_iter() # Stack: list, iter, value + program.pop_top() # Stack: list, iter + program.rot_two() # Stack: iter, list + program.dup_top() # Stack: iter, list, list + program.load_attr("append") # Stack: iter, list, append + program.load_const(None) # Stack: iter, list, append, None + program.call_function(1) # Stack: iter, list, None + program.pop_top() # Stack: iter, list + program.rot_two() # Stack: list, iter + program.end_loop() # Back to for_iter above + + def areturn(self, arguments, program): + program.return_value() + + def arraylength(self, arguments, program): + program.load_global("len") # Stack: arrayref, len + program.rot_two() # Stack: len, arrayref + program.call_function(1) + + def astore(self, arguments, program): + program.store_fast(arguments[0]) + + def astore_0(self, arguments, program): + program.store_fast(0) + + def astore_1(self, arguments, program): + program.store_fast(1) + + def astore_2(self, arguments, program): + program.store_fast(2) + + def astore_3(self, arguments, program): + program.store_fast(3) + + def athrow(self, arguments, program): + # NOTE: NullPointerException not raised where null/None is found on the stack. + # If this instruction appears in a finally handler, use end_finally instead. + if self.in_finally: + program.end_finally() + else: + # Wrap the exception in a Python exception. + program.load_global("Exception") # Stack: objectref, Exception + program.rot_two() # Stack: Exception, objectref + program.call_function(1) # Stack: exception + program.raise_varargs(1) + # NOTE: This seems to put another object on the stack. + + baload = aaload + bastore = aastore + + def bipush(self, arguments, program): + program.load_const(signed1(arguments[0])) + + caload = aaload + castore = aastore + + def checkcast(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_python_name() + program.use_external_name(target_name) + program.dup_top() # Stack: objectref, objectref + program.load_const(None) # Stack: objectref, objectref, None + program.compare_op("is") # Stack: objectref, result + program.jump_to_label(1, "next") + program.pop_top() # Stack: objectref + program.dup_top() # Stack: objectref, objectref + program.load_global("isinstance") # Stack: objectref, objectref, isinstance + program.rot_two() # Stack: objectref, isinstance, objectref + load_class_name(self.class_file, target_name, program) + program.call_function(2) # Stack: objectref, result + program.jump_to_label(1, "next") + program.pop_top() # Stack: objectref + program.pop_top() # Stack: + program.use_external_name("java.lang.ClassCastException") + load_class_name(self.class_file, "java.lang.ClassCastException", program) + program.call_function(0) # Stack: exception + # Wrap the exception in a Python exception. + program.load_global("Exception") # Stack: exception, Exception + program.rot_two() # Stack: Exception, exception + program.call_function(1) # Stack: exception + program.raise_varargs(1) + # NOTE: This seems to put another object on the stack. + program.start_label("next") + program.pop_top() # Stack: objectref + + def d2f(self, arguments, program): + pass + + def d2i(self, arguments, program): + program.load_global("int") # Stack: value, int + program.rot_two() # Stack: int, value + program.call_function(1) # Stack: result + + d2l = d2i # Preserving Java semantics + + def dadd(self, arguments, program): + # NOTE: No type checking performed. + program.binary_add() + + daload = aaload + dastore = aastore + + def dcmpg(self, arguments, program): + # NOTE: No type checking performed. + program.compare_op(">") + + def dcmpl(self, arguments, program): + # NOTE: No type checking performed. + program.compare_op("<") + + def dconst_0(self, arguments, program): + program.load_const(0.0) + + def dconst_1(self, arguments, program): + program.load_const(1.0) + + def ddiv(self, arguments, program): + # NOTE: No type checking performed. + program.binary_divide() + + dload = aload + dload_0 = aload_0 + dload_1 = aload_1 + dload_2 = aload_2 + dload_3 = aload_3 + + def dmul(self, arguments, program): + # NOTE: No type checking performed. + program.binary_multiply() + + def dneg(self, arguments, program): + # NOTE: No type checking performed. + program.unary_negative() + + def drem(self, arguments, program): + # NOTE: No type checking performed. + program.binary_modulo() + + dreturn = areturn + dstore = astore + dstore_0 = astore_0 + dstore_1 = astore_1 + dstore_2 = astore_2 + dstore_3 = astore_3 + + def dsub(self, arguments, program): + # NOTE: No type checking performed. + program.binary_subtract() + + def dup(self, arguments, program): + program.dup_top() + + def dup_x1(self, arguments, program): + # Ignoring computational type categories. + program.dup_top() + program.rot_three() + + def dup_x2(self, arguments, program): + # Ignoring computational type categories. + program.dup_top() + program.rot_four() + + dup2 = dup # Ignoring computational type categories + dup2_x1 = dup_x1 # Ignoring computational type categories + dup2_x2 = dup_x2 # Ignoring computational type categories + + def f2d(self, arguments, program): + pass # Preserving Java semantics + + def f2i(self, arguments, program): + program.load_global("int") # Stack: value, int + program.rot_two() # Stack: int, value + program.call_function(1) # Stack: result + + f2l = f2i # Preserving Java semantics + fadd = dadd + faload = daload + fastore = dastore + fcmpg = dcmpg + fcmpl = dcmpl + fconst_0 = dconst_0 + fconst_1 = dconst_1 + + def fconst_2(self, arguments, program): + program.load_const(2.0) + + fdiv = ddiv + fload = dload + fload_0 = dload_0 + fload_1 = dload_1 + fload_2 = dload_2 + fload_3 = dload_3 + fmul = dmul + fneg = dneg + frem = drem + freturn = dreturn + fstore = dstore + fstore_0 = dstore_0 + fstore_1 = dstore_1 + fstore_2 = dstore_2 + fstore_3 = dstore_3 + fsub = dsub + + def getfield(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_python_name() + # NOTE: Using the string version of the name which may contain incompatible characters. + program.load_attr(str(target_name)) + + def getstatic(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target = self.class_file.constants[index - 1] + target_name = target.get_python_name() + + # Get the class name instead of the fully qualified name. + + full_class_name = target.get_class().get_python_name() + program.use_external_name(full_class_name) + load_class_name(self.class_file, full_class_name, program) + # NOTE: Using the string version of the name which may contain incompatible characters. + program.load_attr(str(target_name)) + + def goto(self, arguments, program): + offset = signed2((arguments[0] << 8) + arguments[1]) + java_absolute = self.java_position + offset + program.jump_absolute(self.position_mapping[java_absolute]) + + def goto_w(self, arguments, program): + offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) + java_absolute = self.java_position + offset + program.jump_absolute(self.position_mapping[java_absolute]) + + def i2b(self, arguments, program): + pass + + def i2c(self, arguments, program): + pass + + def i2d(self, arguments, program): + program.load_global("float") # Stack: value, float + program.rot_two() # Stack: float, value + program.call_function(1) # Stack: result + + i2f = i2d # Not distinguishing between float and double + + def i2l(self, arguments, program): + pass # Preserving Java semantics + + def i2s(self, arguments, program): + pass # Not distinguishing between int and short + + iadd = fadd + iaload = faload + + def iand(self, arguments, program): + # NOTE: No type checking performed. + program.binary_and() + + iastore = fastore + + def iconst_m1(self, arguments, program): + program.load_const(-1) + + def iconst_0(self, arguments, program): + program.load_const(0) + + def iconst_1(self, arguments, program): + program.load_const(1) + + def iconst_2(self, arguments, program): + program.load_const(2) + + def iconst_3(self, arguments, program): + program.load_const(3) + + def iconst_4(self, arguments, program): + program.load_const(4) + + def iconst_5(self, arguments, program): + program.load_const(5) + + idiv = fdiv + + def _if_xcmpx(self, arguments, program, op): + offset = signed2((arguments[0] << 8) + arguments[1]) + java_absolute = self.java_position + offset + program.compare_op(op) + program.jump_to_label(0, "next") # skip if false + program.pop_top() + program.jump_absolute(self.position_mapping[java_absolute]) + program.start_label("next") + program.pop_top() + + def if_acmpeq(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "is") + + def if_acmpne(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "is not") + + def if_icmpeq(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "==") + + def if_icmpne(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "!=") + + def if_icmplt(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "<") + + def if_icmpge(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, ">=") + + def if_icmpgt(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, ">") + + def if_icmple(self, arguments, program): + # NOTE: No type checking performed. + self._if_xcmpx(arguments, program, "<=") + + def ifeq(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "==") + + def ifne(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "!=") + + def iflt(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "<") + + def ifge(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, ">=") + + def ifgt(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, ">") + + def ifle(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(0) + self._if_xcmpx(arguments, program, "<=") + + def ifnonnull(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(None) + self._if_xcmpx(arguments, program, "is not") + + def ifnull(self, arguments, program): + # NOTE: No type checking performed. + program.load_const(None) + self._if_xcmpx(arguments, program, "is") + + def iinc(self, arguments, program): + # NOTE: No type checking performed. + program.load_fast(arguments[0]) + program.load_const(arguments[1]) + program.binary_add() + program.store_fast(arguments[0]) + + iload = fload + iload_0 = fload_0 + iload_1 = fload_1 + iload_2 = fload_2 + iload_3 = fload_3 + imul = fmul + ineg = fneg + + def instanceof(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_python_name() + program.use_external_name(target_name) + program.load_global("isinstance") # Stack: objectref, isinstance + program.rot_two() # Stack: isinstance, objectref + load_class_name(self.class_file, target_name, program) + program.call_function(2) # Stack: result + + def _invoke(self, target_name, program): + # NOTE: Using the string version of the name which may contain incompatible characters. + program.load_attr(str(target_name)) # Stack: tuple, method + program.rot_two() # Stack: method, tuple + program.call_function_var(0) # Stack: result + + def invokeinterface(self, arguments, program): + # NOTE: This implementation does not perform the necessary checks for + # NOTE: signature-based polymorphism. + # NOTE: Java rules not specifically obeyed. + index = (arguments[0] << 8) + arguments[1] + # NOTE: "count" == nargs + 1, apparently. + count = arguments[2] - 1 + target_name = self.class_file.constants[index - 1].get_python_name() + # Stack: objectref, arg1, arg2, ... + program.build_tuple(count) # Stack: objectref, tuple + program.rot_two() # Stack: tuple, objectref + # NOTE: The interface information is not used to discover the correct + # NOTE: method. + self._invoke(target_name, program) + + def invokespecial(self, arguments, program): + # NOTE: This implementation does not perform the necessary checks for + # NOTE: signature-based polymorphism. + # NOTE: Java rules not specifically obeyed. + index = (arguments[0] << 8) + arguments[1] + target = self.class_file.constants[index - 1] + original_name = target.get_name() + target_name = target.get_python_name() + + # Get the number of parameters from the descriptor. + + count = len(target.get_descriptor()[0]) + + # First, we build a tuple of the reference and arguments. + + program.build_tuple(count + 1) # Stack: tuple + + # Get the class name instead of the fully qualified name. + # NOTE: Not bothering with Object initialisation. + + full_class_name = target.get_class().get_python_name() + if full_class_name not in ("java.lang.Object", "java.lang.Exception"): + program.use_external_name(full_class_name) + load_class_name(self.class_file, full_class_name, program) + self._invoke(target_name, program) + + # Remove Python None return value. + + if str(original_name) == "": + program.pop_top() + + def invokestatic(self, arguments, program): + # NOTE: This implementation does not perform the necessary checks for + # NOTE: signature-based polymorphism. + # NOTE: Java rules not specifically obeyed. + index = (arguments[0] << 8) + arguments[1] + target = self.class_file.constants[index - 1] + target_name = target.get_python_name() + + # Get the number of parameters from the descriptor. + + count = len(target.get_descriptor()[0]) + + # Stack: arg1, arg2, ... + + program.build_tuple(count) # Stack: tuple + + # Use the class to provide access to static methods. + # Get the class name instead of the fully qualified name. + + full_class_name = target.get_class().get_python_name() + if full_class_name not in ("java.lang.Object", "java.lang.Exception"): + program.use_external_name(full_class_name) + load_class_name(self.class_file, full_class_name, program) + self._invoke(target_name, program) + + def invokevirtual (self, arguments, program): + # NOTE: This implementation does not perform the necessary checks for + # NOTE: signature-based polymorphism. + # NOTE: Java rules not specifically obeyed. + index = (arguments[0] << 8) + arguments[1] + target = self.class_file.constants[index - 1] + target_name = target.get_python_name() + # Get the number of parameters from the descriptor. + count = len(target.get_descriptor()[0]) + # Stack: objectref, arg1, arg2, ... + program.build_tuple(count) # Stack: objectref, tuple + program.rot_two() # Stack: tuple, objectref + self._invoke(target_name, program) + + def ior(self, arguments, program): + # NOTE: No type checking performed. + program.binary_or() + + irem = frem + ireturn = freturn + + def ishl(self, arguments, program): + # NOTE: No type checking performed. + # NOTE: Not verified. + program.binary_lshift() + + def ishr(self, arguments, program): + # NOTE: No type checking performed. + # NOTE: Not verified. + program.binary_rshift() + + istore = fstore + istore_0 = fstore_0 + istore_1 = fstore_1 + istore_2 = fstore_2 + istore_3 = fstore_3 + isub = fsub + iushr = ishr # Ignoring distinctions between arithmetic and logical shifts + + def ixor(self, arguments, program): + # NOTE: No type checking performed. + program.binary_xor() + + def jsr(self, arguments, program): + offset = signed2((arguments[0] << 8) + arguments[1]) + java_absolute = self.java_position + offset + # Store the address of the next instruction. + program.load_const_ret(self.position_mapping[self.java_position + 3]) + program.jump_absolute(self.position_mapping[java_absolute]) + + def jsr_w(self, arguments, program): + offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) + java_absolute = self.java_position + offset + # Store the address of the next instruction. + program.load_const_ret(self.position_mapping[self.java_position + 5]) + program.jump_absolute(self.position_mapping[java_absolute]) + + l2d = i2d + l2f = i2f + + def l2i(self, arguments, program): + pass # Preserving Java semantics + + ladd = iadd + laload = iaload + land = iand + lastore = iastore + + def lcmp(self, arguments, program): + # NOTE: No type checking performed. + program.dup_topx(2) # Stack: value1, value2, value1, value2 + program.compare_op(">") # Stack: value1, value2, result + program.jump_to_label(0, "equals") + # True - produce result and branch. + program.pop_top() # Stack: value1, value2 + program.pop_top() # Stack: value1 + program.pop_top() # Stack: + program.load_const(1) # Stack: 1 + program.jump_to_label(None, "next") + # False - test equality. + program.start_label("equals") + program.pop_top() # Stack: value1, value2 + program.dup_topx(2) # Stack: value1, value2, value1, value2 + program.compare_op("==") # Stack: value1, value2, result + program.jump_to_label(0, "less") + # True - produce result and branch. + program.pop_top() # Stack: value1, value2 + program.pop_top() # Stack: value1 + program.pop_top() # Stack: + program.load_const(0) # Stack: 0 + program.jump_to_label(None, "next") + # False - produce result. + program.start_label("less") + program.pop_top() # Stack: value1, value2 + program.pop_top() # Stack: value1 + program.pop_top() # Stack: + program.load_const(-1) # Stack: -1 + program.start_label("next") + + lconst_0 = iconst_0 + lconst_1 = iconst_1 + + def ldc(self, arguments, program): + const = self.class_file.constants[arguments[0] - 1] + if isinstance(const, classfile.StringInfo): + program.use_external_name("java.lang.String") + program.load_global("java") + program.load_attr("lang") + program.load_attr("String") + program.load_const(const.get_value()) + program.call_function(1) + else: + program.load_const(const.get_value()) + + def ldc_w(self, arguments, program): + const = self.class_file.constants[(arguments[0] << 8) + arguments[1] - 1] + if isinstance(const, classfile.StringInfo): + program.use_external_name("java.lang.String") + program.load_global("java") + program.load_attr("lang") + program.load_attr("String") + program.load_const(const.get_value()) + program.call_function(1) + else: + program.load_const(const.get_value()) + + ldc2_w = ldc_w + ldiv = idiv + lload = iload + lload_0 = iload_0 + lload_1 = iload_1 + lload_2 = iload_2 + lload_3 = iload_3 + lmul = imul + lneg = ineg + + def lookupswitch(self, code, program): + + # Find the offset to the next 4 byte boundary in the code. + + d, r = divmod(self.java_position + 1, 4) + to_boundary = (4 - r) % 4 + + # Get the pertinent arguments. + + code = code[to_boundary:] + default = classfile.u4(code[0:4]) + npairs = classfile.u4(code[4:8]) + + # Process the pairs. + # NOTE: This is not the most optimal implementation. + + pair_index = 8 + for pair in range(0, npairs): + match = classfile.u4(code[pair_index:pair_index+4]) + offset = classfile.s4(code[pair_index+4:pair_index+8]) + # Calculate the branch target. + java_absolute = self.java_position + offset + # Generate branching code. + program.dup_top() # Stack: key, key + program.load_const(match) # Stack: key, key, match + program.compare_op("==") # Stack: key, result + program.jump_to_label(0, "end") + program.pop_top() # Stack: key + program.pop_top() # Stack: + program.jump_absolute(self.position_mapping[java_absolute]) + # Generate the label for the end of the branching code. + program.start_label("end") + program.pop_top() # Stack: key + # Update the index. + pair_index += 4 + + # Generate the default. + + java_absolute = self.java_position + default + program.jump_absolute(self.position_mapping[java_absolute]) + return pair_index + to_boundary + + lor = ior + lrem = irem + lreturn = ireturn + lshl = ishl + lshr = ishr + lstore = istore + lstore_0 = istore_0 + lstore_1 = istore_1 + lstore_2 = istore_2 + lstore_3 = istore_3 + lsub = isub + lushr = iushr + lxor = ixor + + def monitorenter(self, arguments, program): + # NOTE: To be implemented. + pass + + def monitorexit(self, arguments, program): + # NOTE: To be implemented. + pass + + def multianewarray(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + dimensions = arguments[2] + # Stack: count1, ..., countN-1, countN + self._newarray(program) # Stack: count1, ..., countN-1, list + for dimension in range(1, dimensions): + program.rot_two() # Stack: count1, ..., list, countN-1 + program.build_list(0) # Stack: count1, ..., list, countN-1, new-list + program.rot_three() # Stack: count1, ..., new-list, list, countN-1 + program.setup_loop() + program.load_const(0) # Stack: count1, ..., new-list, list, countN-1, 0 + program.rot_two() # Stack: count1, ..., new-list, list, 0, countN-1 + program.load_global("range") # Stack: count1, ..., new-list, list, 0, countN-1, range + program.rot_three() # Stack: count1, ..., new-list, list, range, 0, countN-1 + program.call_function(2) # Stack: count1, ..., new-list, list, range-list + program.get_iter() # Stack: count1, ..., new-list, list, iter + program.for_iter() # Stack: count1, ..., new-list, list, iter, value + program.pop_top() # Stack: count1, ..., new-list, list, iter + program.rot_three() # Stack: count1, ..., iter, new-list, list + program.slice_0() # Stack: count1, ..., iter, new-list, list[:] + program.dup_top() # Stack: count1, ..., iter, new-list, list[:], list[:] + program.rot_three() # Stack: count1, ..., iter, list[:], new-list, list[:] + program.rot_two() # Stack: count1, ..., iter, list[:], list[:], new-list + program.dup_top() # Stack: count1, ..., iter, list[:], list[:], new-list, new-list + program.load_attr("append") # Stack: count1, ..., iter, list[:], list[:], new-list, append + program.rot_three() # Stack: count1, ..., iter, list[:], append, list[:], new-list + program.rot_three() # Stack: count1, ..., iter, list[:], new-list, append, list[:] + program.call_function(1) # Stack: count1, ..., iter, list[:], new-list, None + program.pop_top() # Stack: count1, ..., iter, list[:], new-list + program.rot_two() # Stack: count1, ..., iter, new-list, list[:] + program.rot_three() # Stack: count1, ..., list[:], iter, new-list + program.rot_three() # Stack: count1, ..., new-list, list[:], iter + program.end_loop() # Stack: count1, ..., new-list, list[:], iter + program.pop_top() # Stack: count1, ..., new-list + + def new(self, arguments, program): + # This operation is considered to be the same as the calling of the + # initialisation method of the given class with no arguments. + + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_python_name() + program.use_external_name(target_name) + + # NOTE: Using the string version of the name which may contain incompatible characters. + program.load_global("object") + program.load_attr("__new__") + load_class_name(self.class_file, target_name, program) + program.call_function(1) + + def newarray(self, arguments, program): + # NOTE: Does not raise NegativeArraySizeException. + # NOTE: Not using the arguments to type the list/array. + self._newarray(program) + + def nop(self, arguments, program): + pass + + def pop(self, arguments, program): + program.pop_top() + + pop2 = pop # ignoring Java stack value distinctions + + def putfield(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target_name = self.class_file.constants[index - 1].get_python_name() + program.rot_two() + # NOTE: Using the string version of the name which may contain incompatible characters. + program.store_attr(str(target_name)) + + def putstatic(self, arguments, program): + index = (arguments[0] << 8) + arguments[1] + target = self.class_file.constants[index - 1] + target_name = target.get_python_name() + + # Get the class name instead of the fully qualified name. + + full_class_name = target.get_class().get_python_name() + program.use_external_name(full_class_name) + load_class_name(self.class_file, full_class_name, program) + # NOTE: Using the string version of the name which may contain incompatible characters. + program.store_attr(str(target_name)) + + def ret(self, arguments, program): + program.ret(arguments[0]) + # Indicate that the finally handler is probably over. + # NOTE: This is seemingly not guaranteed. + self.in_finally = 0 + + def return_(self, arguments, program): + program.load_const(None) + program.return_value() + + saload = laload + sastore = lastore + + def sipush(self, arguments, program): + program.load_const(signed2((arguments[0] << 8) + arguments[1])) + + def swap(self, arguments, program): + program.rot_two() + + def tableswitch(self, code, program): + + # Find the offset to the next 4 byte boundary in the code. + + d, r = divmod(self.java_position + 1, 4) + to_boundary = (4 - r) % 4 + + # Get the pertinent arguments. + + code = code[to_boundary:] + default = classfile.u4(code[0:4]) + low = classfile.u4(code[4:8]) + high = classfile.u4(code[8:12]) + + # Process the jump entries. + # NOTE: This is not the most optimal implementation. + + jump_index = 12 + for jump in range(low, high + 1): + offset = classfile.s4(code[jump_index:jump_index + 4]) + + # Calculate the branch target. + + java_absolute = self.java_position + offset + + # Generate branching code. + + program.dup_top() # Stack: key, key + program.load_const(jump) # Stack: key, key, jump + program.compare_op("==") # Stack: key, result + program.jump_to_label(0, "end") + program.pop_top() # Stack: key + program.pop_top() # Stack: + program.jump_absolute(self.position_mapping[java_absolute]) + + # Generate the label for the end of the branching code. + + program.start_label("end") + program.pop_top() # Stack: key + + # Update the index. + + jump_index += 4 + + # Generate the default. + + java_absolute = self.java_position + default + program.jump_absolute(self.position_mapping[java_absolute]) + return jump_index + to_boundary + + def wide(self, code, program): + # NOTE: To be implemented. + return number_of_arguments + +def disassemble(class_file, method): + disassembler = BytecodeDisassembler(class_file) + disassembler.process(method, BytecodeDisassemblerProgram()) + +class ClassTranslator: + + """ + A class which provides a wrapper around a class file and the means to + translate the represented class into a Python class. + """ + + def __init__(self, class_file): + + "Initialise the object with the given 'class_file'." + + self.class_file = class_file + self.filename = "" + + for attribute in self.class_file.attributes: + if isinstance(attribute, classfile.SourceFileAttributeInfo): + self.filename = str(attribute.get_name()) + + def translate_method(self, method): + + "Translate the given 'method' - an object obtained from the class file." + + translator = BytecodeTranslator(self.class_file) + writer = BytecodeWriter() + translator.process(method, writer) + return translator, writer + + def make_method(self, real_method_name, methods, global_names, namespace): + + """ + Make a dispatcher method with the given 'real_method_name', providing + dispatch to the supplied type-sensitive 'methods', accessing the given + 'global_names' where necessary, and storing the new method in the + 'namespace' provided. + """ + + if real_method_name == "": + method_name = "__init__" + else: + method_name = real_method_name + + # Where only one method exists, just make an alias. + + if len(methods) == 1: + method, fn = methods[0] + namespace[method_name] = fn + return + + # Write a simple bytecode dispatching mechanism. + + program = BytecodeWriter() + + # Remember whether any of the methods are static. + # NOTE: This should be an all or nothing situation. + + method_is_static = 0 + + # NOTE: The code below should use dictionary-based dispatch for better performance. + + for method, fn in methods: + method_is_static = real_method_name != "" and method_is_static or \ + classfile.has_flags(method.access_flags, [classfile.STATIC]) + + if method_is_static: + program.load_fast(0) # Stack: arguments + else: + program.load_fast(1) # Stack: arguments + + program.setup_loop() + program.load_const(1) # Stack: arguments, 1 + + if method_is_static: + program.store_fast(1) # Stack: arguments (found = 1) + else: + program.store_fast(2) # Stack: arguments (found = 1) + + # Emit a list of parameter types. + + descriptor_types = method.get_descriptor()[0] + for descriptor_type in descriptor_types: + base_type, object_type, array_type = descriptor_type + python_type = classfile.descriptor_base_type_mapping[base_type] + if python_type == "instance": + # NOTE: This will need extending. + python_type = object_type + program.load_global(python_type) # Stack: arguments, type, ... + program.build_list(len(descriptor_types)) + # Stack: arguments, types + # Make a map of arguments and types. + program.load_const(None) # Stack: arguments, types, None + program.rot_three() # Stack: None, arguments, types + program.build_tuple(3) # Stack: tuple + program.load_global("map") # Stack: tuple, map + program.rot_two() # Stack: map, tuple + program.call_function_var(0) # Stack: list (mapping arguments to types) + # Loop over each pair. + program.get_iter() # Stack: iter + program.for_iter() # Stack: iter, (argument, type) + program.unpack_sequence(2) # Stack: iter, type, argument + program.dup_top() # Stack: iter, type, argument, argument + program.load_const(None) # Stack: iter, type, argument, argument, None + program.compare_op("is") # Stack: iter, type, argument, result + # Missing argument? + program.jump_to_label(0, "present") + program.pop_top() # Stack: iter, type, argument + program.pop_top() # Stack: iter, type + program.pop_top() # Stack: iter + program.load_const(0) # Stack: iter, 0 + + if method_is_static: + program.store_fast(1) # Stack: iter (found = 0) + else: + program.store_fast(2) # Stack: iter (found = 0) + + program.break_loop() + # Argument was present. + program.start_label("present") + program.pop_top() # Stack: iter, type, argument + program.rot_two() # Stack: iter, argument, type + program.dup_top() # Stack: iter, argument, type, type + program.load_const(None) # Stack: iter, argument, type, type, None + program.compare_op("is") # Stack: iter, argument, type, result + # Missing parameter type? + program.jump_to_label(0, "present") + program.pop_top() # Stack: iter, argument, type + program.pop_top() # Stack: iter, argument + program.pop_top() # Stack: iter + program.load_const(0) # Stack: iter, 0 + + if method_is_static: + program.store_fast(1) # Stack: iter (found = 0) + else: + program.store_fast(2) # Stack: iter (found = 0) + + program.break_loop() + # Parameter was present. + program.start_label("present") + program.pop_top() # Stack: iter, argument, type + program.build_tuple(2) # Stack: iter, (argument, type) + program.load_global("isinstance") # Stack: iter, (argument, type), isinstance + program.rot_two() # Stack: iter, isinstance, (argument, type) + program.call_function_var(0) # Stack: iter, result + program.jump_to_label(1, "match") + program.pop_top() # Stack: iter + program.load_const(0) # Stack: iter, 0 + + if method_is_static: + program.store_fast(1) # Stack: iter (found = 0) + else: + program.store_fast(2) # Stack: iter (found = 0) + + program.break_loop() + # Argument type and parameter type matched. + program.start_label("match") + program.pop_top() # Stack: iter + program.end_loop() # Stack: + # If all the parameters matched, call the method. + + if method_is_static: + program.load_fast(1) # Stack: match + else: + program.load_fast(2) # Stack: match + + program.jump_to_label(0, "failed") + # All the parameters matched. + program.pop_top() # Stack: + + if method_is_static: + program.load_fast(0) # Stack: arguments + program.load_global(str(self.class_file.this_class.get_python_name())) + # Stack: arguments, class + else: + program.load_fast(1) # Stack: arguments + program.load_fast(0) # Stack: arguments, self + + program.load_attr(str(method.get_python_name())) + # Stack: arguments, method + program.rot_two() # Stack: method, arguments + program.call_function_var(0) # Stack: result + program.return_value() + # Try the next method if arguments or parameters were missing or incorrect. + program.start_label("failed") + program.pop_top() # Stack: + + # Raise an exception if nothing matched. + # NOTE: Improve this. + + program.load_const("No matching method") + program.raise_varargs(1) + program.load_const(None) + program.return_value() + + # Add the code as a method in the namespace. + # NOTE: One actual parameter, flags as 71 apparently means that a list + # NOTE: parameter is used in a method. + + if method_is_static: + nargs = 0 + else: + nargs = 1 + nlocals = program.max_locals + 1 + + code = new.code(nargs, nlocals, program.max_stack_depth, 71, program.get_output(), + tuple(program.get_constants()), tuple(program.get_names()), tuple(self.make_varnames(nlocals, method_is_static)), + self.filename, method_name, 0, "") + fn = new.function(code, global_names) + + if method_is_static: + fn = staticmethod(fn) + + namespace[method_name] = fn + + def process(self, global_names): + + """ + Process the class, storing it in the 'global_names' dictionary provided. + Return a tuple containing the class and a list of external names + referenced by the class's methods. + """ + + namespace = {} + + # Make the fields. + + for field in self.class_file.fields: + if classfile.has_flags(field.access_flags, [classfile.STATIC]): + field_name = str(field.get_python_name()) + namespace[field_name] = None + + # Make the methods. + + real_methods = {} + external_names = [] + + for method in self.class_file.methods: + real_method_name = str(method.get_name()) + method_name = str(method.get_python_name()) + + translator, writer = self.translate_method(method) + + # Add external names to the master list. + + for external_name in writer.external_names: + if external_name not in external_names: + external_names.append(external_name) + + # Fix up special class initialisation methods and static methods. + + method_is_static = real_method_name != "" and classfile.has_flags(method.access_flags, [classfile.STATIC]) + if method_is_static: + nargs = len(method.get_descriptor()[0]) + else: + nargs = len(method.get_descriptor()[0]) + 1 + nlocals = writer.max_locals + 1 + flags = 67 + + # NOTE: Add line number table later. + + code = new.code(nargs, nlocals, writer.max_stack_depth, flags, writer.get_output(), + tuple(writer.get_constants()), tuple(writer.get_names()), + tuple(self.make_varnames(nlocals, method_is_static)), self.filename, method_name, 0, "") + + # NOTE: May need more globals. + + fn = new.function(code, global_names) + + # Fix up special class initialisation methods and static methods. + + if method_is_static: + fn = staticmethod(fn) + + # Remember the real method name and the corresponding methods produced. + + if not real_methods.has_key(real_method_name): + real_methods[real_method_name] = [] + real_methods[real_method_name].append((method, fn)) + + # Add the method to the class's namespace. + + namespace[method_name] = fn + + # Define superclasses. + + bases = self.get_base_classes(global_names) + + # Define method dispatchers. + + for real_method_name, methods in real_methods.items(): + if real_method_name != "": + self.make_method(real_method_name, methods, global_names, namespace) + + # Use only the last part of the fully qualified name. + + full_class_name = str(self.class_file.this_class.get_python_name()) + class_name = full_class_name.split(".")[-1] + cls = new.classobj(class_name, bases, namespace) + global_names[cls.__name__] = cls + + return cls, external_names + + def get_base_classes(self, global_names): + + """ + Identify the superclass, then either load it from the given + 'global_names' if available, or import the class from its parent module. + Return a tuple containing all base classes (typically a single element + tuple). + """ + + original_name = str(self.class_file.super_class.get_name()) + full_this_class_name = str(self.class_file.this_class.get_python_name()) + this_class_name_parts = full_this_class_name.split(".") + this_class_module_name = ".".join(this_class_name_parts[:-1]) + full_super_class_name = str(self.class_file.super_class.get_python_name()) + super_class_name_parts = full_super_class_name.split(".") + super_class_name = super_class_name_parts[-1] + super_class_module_name = ".".join(super_class_name_parts[:-1]) + if super_class_module_name == "": + obj = global_names[super_class_name] + elif super_class_module_name == this_class_module_name: + obj = global_names[super_class_name] + else: + #print "Importing", super_class_module_name, super_class_name + obj = __import__(super_class_module_name, global_names, {}, []) + for super_class_name_part in super_class_name_parts[1:] or [super_class_name]: + #print "*", obj, super_class_name_part + obj = getattr(obj, super_class_name_part) + return (obj,) + + def make_varnames(self, nlocals, method_is_static=0): + + """ + A utility method which invents variable names for the given number - + 'nlocals' - of local variables in a method. Returns a list of such + variable names. + + If the optional 'method_is_static' is set to true, do not use "self" as + the first argument name. + """ + + if method_is_static: + l = ["cls"] + else: + l = ["self"] + for i in range(1, nlocals): + l.append("_l%s" % i) + return l[:nlocals] + +# Test functions, useful for tracing generated bytecode operations. + +def _map(*args): + print args + return apply(__builtins__.map, args) + +def _isinstance(*args): + print args + return apply(__builtins__.isinstance, args) + +if __name__ == "__main__": + import sys + import dis + global_names = globals() + #global_names["isinstance"] = _isinstance + #global_names["map"] = _map + for filename in sys.argv[1:]: + f = open(filename, "rb") + c = classfile.ClassFile(f.read()) + translator = ClassTranslator(c) + cls, external_names = translator.process(global_names) + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 754d36821fc8 -r 182cba61327c javaclass/classfile.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/javaclass/classfile.py Fri Jan 21 17:05:06 2005 +0100 @@ -0,0 +1,633 @@ +#!/usr/bin/env python + +""" +Java class file decoder. Specification found at the following URL: +http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html +""" + +import struct # for general decoding of class files + +# Utility functions. + +def u1(data): + return struct.unpack(">B", data[0:1])[0] + +def u2(data): + return struct.unpack(">H", data[0:2])[0] + +def s2(data): + return struct.unpack(">h", data[0:2])[0] + +def u4(data): + return struct.unpack(">L", data[0:4])[0] + +def s4(data): + return struct.unpack(">l", data[0:4])[0] + +def s8(data): + return struct.unpack(">q", data[0:8])[0] + +def f4(data): + return struct.unpack(">f", data[0:4])[0] + +def f8(data): + return struct.unpack(">d", data[0:8])[0] + +# Useful tables and constants. + +descriptor_base_type_mapping = { + "B" : "int", + "C" : "str", + "D" : "float", + "F" : "float", + "I" : "int", + "J" : "int", + "L" : "object", + "S" : "int", + "Z" : "bool", + "[" : "list" + } + +PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ +0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 + +def has_flags(flags, desired): + desired_flags = reduce(lambda a, b: a | b, desired, 0) + return (flags & desired_flags) == desired_flags + +# Useful mix-ins. + +class PythonMethodUtils: + symbol_sep = "___" # was "$" + type_sep = "__" # replaces "/" + array_sep = "_array_" # was "[]" + base_seps = ("_", "_") # was "<" and ">" + + def get_unqualified_python_name(self): + name = self.get_name() + if str(name) == "": + return "__init__" + elif str(name) == "": + return "__clinit__" + else: + return str(name) + + def get_python_name(self): + name = self.get_unqualified_python_name() + if name == "__clinit__": + return name + return name + self.symbol_sep + self._get_descriptor_as_name() + + def _get_descriptor_as_name(self): + l = [] + for descriptor_type in self.get_descriptor()[0]: + l.append(self._get_type_as_name(descriptor_type)) + return self.symbol_sep.join(l) + + def _get_type_as_name(self, descriptor_type, s=""): + base_type, object_type, array_type = descriptor_type + if base_type == "L": + return object_type.replace("/", self.type_sep) + s + elif base_type == "[": + return self._get_type_as_name(array_type, s + self.array_sep) + else: + return self.base_seps[0] + base_type + self.base_seps[1] + s + +class PythonNameUtils: + def get_python_name(self): + # NOTE: This may not be comprehensive. + if not str(self.get_name()).startswith("["): + return str(self.get_name()).replace("/", ".") + else: + return self._get_type_name( + get_field_descriptor( + str(self.get_name()) + ) + ).replace("/", ".") + + def _get_type_name(self, descriptor_type): + base_type, object_type, array_type = descriptor_type + if base_type == "L": + return object_type + elif base_type == "[": + return self._get_type_name(array_type) + else: + return descriptor_base_type_mapping[base_type] + +class NameUtils: + def get_name(self): + if self.name_index != 0: + return self.class_file.constants[self.name_index - 1] + else: + # Some name indexes are zero to indicate special conditions. + return None + +class NameAndTypeUtils: + def get_name(self): + if self.name_and_type_index != 0: + return self.class_file.constants[self.name_and_type_index - 1].get_name() + else: + # Some name indexes are zero to indicate special conditions. + return None + + def get_field_descriptor(self): + if self.name_and_type_index != 0: + return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() + else: + # Some name indexes are zero to indicate special conditions. + return None + + def get_method_descriptor(self): + if self.name_and_type_index != 0: + return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() + else: + # Some name indexes are zero to indicate special conditions. + return None + + def get_class(self): + return self.class_file.constants[self.class_index - 1] + +# Symbol parsing. + +def get_method_descriptor(s): + assert s[0] == "(" + params = [] + s = s[1:] + while s[0] != ")": + parameter_descriptor, s = _get_parameter_descriptor(s) + params.append(parameter_descriptor) + if s[1] != "V": + return_type, s = _get_field_type(s[1:]) + else: + return_type, s = None, s[1:] + return params, return_type + +def get_field_descriptor(s): + return _get_field_type(s)[0] + +def _get_parameter_descriptor(s): + return _get_field_type(s) + +def _get_component_type(s): + return _get_field_type(s) + +def _get_field_type(s): + base_type, s = _get_base_type(s) + object_type = None + array_type = None + if base_type == "L": + object_type, s = _get_object_type(s) + elif base_type == "[": + array_type, s = _get_array_type(s) + return (base_type, object_type, array_type), s + +def _get_base_type(s): + if len(s) > 0: + return s[0], s[1:] + else: + return None, s + +def _get_object_type(s): + if len(s) > 0: + s_end = s.find(";") + assert s_end != -1 + return s[:s_end], s[s_end+1:] + else: + return None, s + +def _get_array_type(s): + if len(s) > 0: + return _get_component_type(s) + else: + return None, s + +# Constant information. + +class ClassInfo(NameUtils, PythonNameUtils): + def init(self, data, class_file): + self.class_file = class_file + self.name_index = u2(data[0:2]) + return data[2:] + +class RefInfo(NameAndTypeUtils): + def init(self, data, class_file): + self.class_file = class_file + self.class_index = u2(data[0:2]) + self.name_and_type_index = u2(data[2:4]) + return data[4:] + +class FieldRefInfo(RefInfo, PythonNameUtils): + def get_descriptor(self): + return RefInfo.get_field_descriptor(self) + +class MethodRefInfo(RefInfo, PythonMethodUtils): + def get_descriptor(self): + return RefInfo.get_method_descriptor(self) + +class InterfaceMethodRefInfo(MethodRefInfo): + pass + +class NameAndTypeInfo(NameUtils, PythonNameUtils): + def init(self, data, class_file): + self.class_file = class_file + self.name_index = u2(data[0:2]) + self.descriptor_index = u2(data[2:4]) + return data[4:] + + def get_field_descriptor(self): + return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) + + def get_method_descriptor(self): + return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) + +class Utf8Info: + def init(self, data, class_file): + self.class_file = class_file + self.length = u2(data[0:2]) + self.bytes = data[2:2+self.length] + return data[2+self.length:] + + def __str__(self): + return self.bytes + + def __unicode__(self): + return unicode(self.bytes, "utf-8") + + def get_value(self): + return str(self) + +class StringInfo: + def init(self, data, class_file): + self.class_file = class_file + self.string_index = u2(data[0:2]) + return data[2:] + + def __str__(self): + return str(self.class_file.constants[self.string_index - 1]) + + def __unicode__(self): + return unicode(self.class_file.constants[self.string_index - 1]) + + def get_value(self): + return str(self) + +class SmallNumInfo: + def init(self, data, class_file): + self.class_file = class_file + self.bytes = data[0:4] + return data[4:] + +class IntegerInfo(SmallNumInfo): + def get_value(self): + return s4(self.bytes) + +class FloatInfo(SmallNumInfo): + def get_value(self): + return f4(self.bytes) + +class LargeNumInfo: + def init(self, data, class_file): + self.class_file = class_file + self.high_bytes = data[0:4] + self.low_bytes = data[4:8] + return data[8:] + +class LongInfo(LargeNumInfo): + def get_value(self): + return s8(self.high_bytes + self.low_bytes) + +class DoubleInfo(LargeNumInfo): + def get_value(self): + return f8(self.high_bytes + self.low_bytes) + +# Other information. +# Objects of these classes are generally aware of the class they reside in. + +class ItemInfo(NameUtils): + def init(self, data, class_file): + self.class_file = class_file + self.access_flags = u2(data[0:2]) + self.name_index = u2(data[2:4]) + self.descriptor_index = u2(data[4:6]) + self.attributes, data = self.class_file._get_attributes(data[6:]) + return data + +class FieldInfo(ItemInfo, PythonNameUtils): + def get_descriptor(self): + return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) + +class MethodInfo(ItemInfo, PythonMethodUtils): + def get_descriptor(self): + return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) + +class AttributeInfo: + def init(self, data, class_file): + self.attribute_length = u4(data[0:4]) + self.info = data[4:4+self.attribute_length] + return data[4+self.attribute_length:] + +# NOTE: Decode the different attribute formats. + +class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): + def init(self, data, class_file): + self.class_file = class_file + self.attribute_length = u4(data[0:4]) + # Permit the NameUtils mix-in. + self.name_index = self.sourcefile_index = u2(data[4:6]) + return data[6:] + +class ConstantValueAttributeInfo(AttributeInfo): + def init(self, data, class_file): + self.class_file = class_file + self.attribute_length = u4(data[0:4]) + self.constant_value_index = u2(data[4:6]) + assert 4+self.attribute_length == 6 + return data[4+self.attribute_length:] + + def get_value(self): + return self.class_file.constants[self.constant_value_index - 1].get_value() + +class CodeAttributeInfo(AttributeInfo): + def init(self, data, class_file): + self.class_file = class_file + self.attribute_length = u4(data[0:4]) + self.max_stack = u2(data[4:6]) + self.max_locals = u2(data[6:8]) + self.code_length = u4(data[8:12]) + end_of_code = 12+self.code_length + self.code = data[12:end_of_code] + self.exception_table_length = u2(data[end_of_code:end_of_code+2]) + self.exception_table = [] + data = data[end_of_code + 2:] + for i in range(0, self.exception_table_length): + exception = ExceptionInfo() + data = exception.init(data) + self.exception_table.append(exception) + self.attributes, data = self.class_file._get_attributes(data) + return data + +class ExceptionsAttributeInfo(AttributeInfo): + def init(self, data, class_file): + self.class_file = class_file + self.attribute_length = u4(data[0:4]) + self.number_of_exceptions = u2(data[4:6]) + self.exception_index_table = [] + index = 6 + for i in range(0, self.number_of_exceptions): + self.exception_index_table.append(u2(data[index:index+2])) + index += 2 + return data[index:] + + def get_exception(self, i): + exception_index = self.exception_index_table[i] + return self.class_file.constants[exception_index - 1] + +class InnerClassesAttributeInfo(AttributeInfo): + def init(self, data, class_file): + self.class_file = class_file + self.attribute_length = u4(data[0:4]) + self.number_of_classes = u2(data[4:6]) + self.classes = [] + data = data[6:] + for i in range(0, self.number_of_classes): + inner_class = InnerClassInfo() + data = inner_class.init(data, self.class_file) + self.classes.append(inner_class) + return data + +class SyntheticAttributeInfo(AttributeInfo): + pass + +class LineNumberAttributeInfo(AttributeInfo): + def init(self, data, class_file): + self.class_file = class_file + self.attribute_length = u4(data[0:4]) + self.line_number_table_length = u2(data[4:6]) + self.line_number_table = [] + data = data[6:] + for i in range(0, self.line_number_table_length): + line_number = LineNumberInfo() + data = line_number.init(data) + self.line_number_table.append(line_number) + return data + +class LocalVariableAttributeInfo(AttributeInfo): + def init(self, data, class_file): + self.class_file = class_file + self.attribute_length = u4(data[0:4]) + self.local_variable_table_length = u2(data[4:6]) + self.local_variable_table = [] + data = data[6:] + for i in range(0, self.local_variable_table_length): + local_variable = LocalVariableInfo() + data = local_variable.init(data, self.class_file) + self.local_variable_table.append(local_variable) + return data + +class DeprecatedAttributeInfo(AttributeInfo): + pass + +# Child classes of the attribute information classes. + +class ExceptionInfo: + def init(self, data): + self.start_pc = u2(data[0:2]) + self.end_pc = u2(data[2:4]) + self.handler_pc = u2(data[4:6]) + self.catch_type = u2(data[6:8]) + return data[8:] + +class InnerClassInfo(NameUtils): + def init(self, data, class_file): + self.class_file = class_file + self.inner_class_info_index = u2(data[0:2]) + self.outer_class_info_index = u2(data[2:4]) + # Permit the NameUtils mix-in. + self.name_index = self.inner_name_index = u2(data[4:6]) + self.inner_class_access_flags = u2(data[6:8]) + return data[8:] + +class LineNumberInfo: + def init(self, data): + self.start_pc = u2(data[0:2]) + self.line_number = u2(data[2:4]) + return data[4:] + +class LocalVariableInfo(NameUtils, PythonNameUtils): + def init(self, data, class_file): + self.class_file = class_file + self.start_pc = u2(data[0:2]) + self.length = u2(data[2:4]) + self.name_index = u2(data[4:6]) + self.descriptor_index = u2(data[6:8]) + self.index = u2(data[8:10]) + return data[10:] + + def get_descriptor(self): + return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) + +# Exceptions. + +class UnknownTag(Exception): + pass + +class UnknownAttribute(Exception): + pass + +# Abstractions for the main structures. + +class ClassFile: + + "A class representing a Java class file." + + def __init__(self, s): + + """ + Process the given string 's', populating the object with the class + file's details. + """ + + self.constants, s = self._get_constants(s[8:]) + self.access_flags, s = self._get_access_flags(s) + self.this_class, s = self._get_this_class(s) + self.super_class, s = self._get_super_class(s) + self.interfaces, s = self._get_interfaces(s) + self.fields, s = self._get_fields(s) + self.methods, s = self._get_methods(s) + self.attributes, s = self._get_attributes(s) + + def _decode_const(self, s): + tag = u1(s[0:1]) + if tag == 1: + const = Utf8Info() + elif tag == 3: + const = IntegerInfo() + elif tag == 4: + const = FloatInfo() + elif tag == 5: + const = LongInfo() + elif tag == 6: + const = DoubleInfo() + elif tag == 7: + const = ClassInfo() + elif tag == 8: + const = StringInfo() + elif tag == 9: + const = FieldRefInfo() + elif tag == 10: + const = MethodRefInfo() + elif tag == 11: + const = InterfaceMethodRefInfo() + elif tag == 12: + const = NameAndTypeInfo() + else: + raise UnknownTag, tag + + # Initialise the constant object. + + s = const.init(s[1:], self) + return const, s + + def _get_constants_from_table(self, count, s): + l = [] + # Have to skip certain entries specially. + i = 1 + while i < count: + c, s = self._decode_const(s) + l.append(c) + # Add a blank entry after "large" entries. + if isinstance(c, LargeNumInfo): + l.append(None) + i += 1 + i += 1 + return l, s + + def _get_items_from_table(self, cls, number, s): + l = [] + for i in range(0, number): + f = cls() + s = f.init(s, self) + l.append(f) + return l, s + + def _get_methods_from_table(self, number, s): + return self._get_items_from_table(MethodInfo, number, s) + + def _get_fields_from_table(self, number, s): + return self._get_items_from_table(FieldInfo, number, s) + + def _get_attribute_from_table(self, s): + attribute_name_index = u2(s[0:2]) + constant_name = self.constants[attribute_name_index - 1].bytes + if constant_name == "SourceFile": + attribute = SourceFileAttributeInfo() + elif constant_name == "ConstantValue": + attribute = ConstantValueAttributeInfo() + elif constant_name == "Code": + attribute = CodeAttributeInfo() + elif constant_name == "Exceptions": + attribute = ExceptionsAttributeInfo() + elif constant_name == "InnerClasses": + attribute = InnerClassesAttributeInfo() + elif constant_name == "Synthetic": + attribute = SyntheticAttributeInfo() + elif constant_name == "LineNumberTable": + attribute = LineNumberAttributeInfo() + elif constant_name == "LocalVariableTable": + attribute = LocalVariableAttributeInfo() + elif constant_name == "Deprecated": + attribute = DeprecatedAttributeInfo() + else: + raise UnknownAttribute, constant_name + s = attribute.init(s[2:], self) + return attribute, s + + def _get_attributes_from_table(self, number, s): + attributes = [] + for i in range(0, number): + attribute, s = self._get_attribute_from_table(s) + attributes.append(attribute) + return attributes, s + + def _get_constants(self, s): + count = u2(s[0:2]) + return self._get_constants_from_table(count, s[2:]) + + def _get_access_flags(self, s): + return u2(s[0:2]), s[2:] + + def _get_this_class(self, s): + index = u2(s[0:2]) + return self.constants[index - 1], s[2:] + + _get_super_class = _get_this_class + + def _get_interfaces(self, s): + interfaces = [] + number = u2(s[0:2]) + s = s[2:] + for i in range(0, number): + index = u2(s[0:2]) + interfaces.append(self.constants[index - 1]) + s = s[2:] + return interfaces, s + + def _get_fields(self, s): + number = u2(s[0:2]) + return self._get_fields_from_table(number, s[2:]) + + def _get_attributes(self, s): + number = u2(s[0:2]) + return self._get_attributes_from_table(number, s[2:]) + + def _get_methods(self, s): + number = u2(s[0:2]) + return self._get_methods_from_table(number, s[2:]) + +if __name__ == "__main__": + import sys + f = open(sys.argv[1], "rb") + c = ClassFile(f.read()) + f.close() + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 754d36821fc8 -r 182cba61327c javaclass/classhook.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/javaclass/classhook.py Fri Jan 21 17:05:06 2005 +0100 @@ -0,0 +1,384 @@ +#!/usr/bin/env python + +import ihooks # for the import machinery +import os, glob # for getting suitably-named files +from imp import PY_SOURCE, PKG_DIRECTORY, C_BUILTIN # import machinery magic +import classfile, bytecode # Java class support +import zipfile # for Java archive inspection + +# NOTE: Arbitrary constants pulled from thin air. + +JAVA_PACKAGE = 20041113 +JAVA_CLASS = 20041114 +JAVA_ARCHIVE = 20041115 + +class ClassHooks(ihooks.Hooks): + + "A filesystem hooks class providing information about supported files." + + def get_suffixes(self): + + "Return the recognised suffixes." + + return [("", "", JAVA_PACKAGE), (os.extsep + "jar", "r", JAVA_ARCHIVE)] + ihooks.Hooks.get_suffixes(self) + + def path_isdir(self, x, archive=None): + + "Return whether 'x' is a directory in the given 'archive'." + + if archive is None: + return ihooks.Hooks.path_isdir(self, x) + + return self._get_dirname(x) in archive.namelist() + + def _get_dirname(self, x): + + """ + Return the directory name for 'x'. + In zip files, the presence of "/" seems to indicate a directory. + """ + + if x.endswith("/"): + return x + else: + return x + "/" + + def listdir(self, x, archive=None): + + "Return the contents of the directory 'x' in the given 'archive'." + + if archive is None: + return ihooks.Hooks.listdir(self, x) + + x = self._get_dirname(x) + l = [] + for path in archive.namelist(): + + # Find out if the path is within the given directory. + + if path != x and path.startswith(x): + + # Get the path below the given directory. + + subpath = path[len(x):] + + # Find out whether the path is an object in the current directory. + + if subpath.count("/") == 0 or subpath.count("/") == 1 and subpath.endswith("/"): + l.append(subpath) + + return l + + def matching(self, dir, extension, archive=None): + + """ + Return the matching files in the given directory 'dir' having the given + 'extension' within the given 'archive'. Produce a list containing full + paths as opposed to simple filenames. + """ + + if archive is None: + return glob.glob(self.path_join(dir, "*" + extension)) + + dir = self._get_dirname(dir) + l = [] + for path in self.listdir(dir, archive): + if path.endswith(extension): + l.append(self.path_join(dir, path)) + return l + + def read(self, filename, archive=None): + + """ + Return the contents of the file with the given 'filename' in the given + 'archive'. + """ + + if archive is None: + f = open(filename, "rb") + s = f.read() + f.close() + return s + return archive.read(filename) + +class ClassLoader(ihooks.ModuleLoader): + + "A class providing support for searching directories for supported files." + + def find_module(self, name, path=None): + + """ + Find the module with the given 'name', using the given 'path' to locate + it. Note that ModuleLoader.find_module is almost sufficient, but does + not provide enough support for "package unions" where the root of a + package hierarchy may appear in several places. + + Return a list of locations (each being the "stuff" data structure used + by load_module); this replaces the single "stuff" value or None returned + by ModuleLoader.find_module. + """ + + if path is None: + path = [None] + self.default_path() + + found_locations = [] + + for dir in path: + stuff = self.find_module_in_dir(name, dir) + if stuff: + found_locations.append(stuff) + + return found_locations + + def find_module_in_dir(self, name, dir, allow_packages=1): + + """ + Find the module with the given 'name' in the given directory 'dir'. + Since Java packages/modules are directories containing class files, + return the required information tuple only when the path constructed + from 'dir' and 'name' refers to a directory containing class files. + """ + + result = ihooks.ModuleLoader.find_module_in_dir(self, name, dir, allow_packages) + if result is not None: + return result + + # An archive may be opened. + + archive = None + + # Provide a special name for the current directory. + + if name == "__this__": + if dir == None: + return (None, ".", ("", "", JAVA_PACKAGE)) + else: + return None + + # Where no directory is given, return failure immediately. + + elif dir is None: + return None + + # Detect archives. + + else: + archive, archive_path, path = self._get_archive_and_path(dir, name) + + #print "Processing name", name, "in", dir, "producing", path, "within archive", archive + + if self._find_module_at_path(path, archive): + if archive is not None: + return (archive, archive_path + ":" + path, (os.extsep + "jar", "r", JAVA_ARCHIVE)) + else: + return (None, path, ("", "", JAVA_PACKAGE)) + else: + return None + + def _get_archive_and_path(self, dir, name): + parts = dir.split(":") + archive_path = parts[0] + + # Archives may include an internal path, but will in any case have + # a primary part ending in .jar. + + if archive_path.endswith(os.extsep + "jar"): + archive = zipfile.ZipFile(archive_path, "r") + path = self.hooks.path_join(":".join(parts[1:]), name) + + # Otherwise, produce a filesystem-based path. + + else: + archive = None + path = self.hooks.path_join(dir, name) + + return archive, archive_path, path + + def _get_path_in_archive(self, path): + parts = path.split(":") + if len(parts) == 1: + return parts[0] + else: + return ":".join(parts[1:]) + + def _find_module_at_path(self, path, archive): + if self.hooks.path_isdir(path, archive): + #print "Looking in", path, "using archive", archive + + # Look for classes in the directory. + + if len(self.hooks.matching(path, os.extsep + "class", archive)) != 0: + return 1 + + # Otherwise permit importing where directories containing classes exist. + + #print "Filenames are", self.hooks.listdir(path, archive) + for filename in self.hooks.listdir(path, archive): + pathname = self.hooks.path_join(path, filename) + result = self._find_module_at_path(pathname, archive) + if result is not None: + return result + + return 0 + + def load_module(self, name, stuff): + + """ + Load the module with the given 'name', with a list of 'stuff' items, + each of which describes the location of the module and is a tuple of the + form (file, filename, (suffix, mode, data type)). + + Return a module object or raise an ImportError if a problem occurred in + the import operation. + + Note that the 'stuff' parameter is a list and not a single item as in + ModuleLoader.load_module. This should still work, however, since the + find_module method produces such a list. + """ + + # Set up the module. + # A union of all locations is placed in the module's path. + + module = self.hooks.add_module(name) + module.__path__ = [item_filename for (item_archive, item_filename, item_info) in stuff] + + # Just go into each package and find the class files. + + for stuff_item in stuff: + + # Extract the details, delegating loading responsibility to the + # default loader where appropriate. + # NOTE: Should we not be using some saved loader remembered upon + # NOTE: installation? + + archive, filename, info = stuff_item + suffix, mode, datatype = info + if datatype not in (JAVA_PACKAGE, JAVA_ARCHIVE): + return ihooks.ModuleLoader.load_module(self, name, stuff_item) + + #print "Loading", archive, filename, info + + # Prepare a dictionary of globals. + + global_names = module.__dict__ + global_names["__builtins__"] = __builtins__ + + # Get the real filename. + + filename = self._get_path_in_archive(filename) + #print "Real filename", filename + + # Load the class files. + + class_files = {} + for class_filename in self.hooks.matching(filename, os.extsep + "class", archive): + #print "Loading class", class_filename + s = self.hooks.read(class_filename, archive) + class_file = classfile.ClassFile(s) + class_files[str(class_file.this_class.get_name())] = class_file + + # Get an index of the class files. + + class_file_index = class_files.keys() + + # NOTE: Unnecessary sorting for test purposes. + + class_file_index.sort() + + # Now go through the classes arranging them in a safe loading order. + + position = 0 + while position < len(class_file_index): + class_name = class_file_index[position] + super_class_name = str(class_files[class_name].super_class.get_name()) + + # Discover whether the superclass appears later. + + try: + super_class_position = class_file_index.index(super_class_name) + if super_class_position > position: + + # If the superclass appears later, swap this class and the + # superclass, then process the superclass. + + class_file_index[position] = super_class_name + class_file_index[super_class_position] = class_name + continue + + except ValueError: + pass + + position += 1 + + # Process each class file, producing a genuine Python class. + # Create the classes, but establish a proper initialisation order. + + class_file_init_index = [] + class_file_init = {} + + for class_name in class_file_index: + #print "* Class", class_name + class_file = class_files[class_name] + translator = bytecode.ClassTranslator(class_file) + cls, external_names = translator.process(global_names) + module.__dict__[cls.__name__] = cls + + # Process external names. + + this_class_name_parts = class_file.this_class.get_python_name().split(".") + this_class_module, this_class_name = this_class_name_parts[:-1], this_class_name_parts[-1] + + for external_name in external_names: + #print "* Name", external_name + external_name_parts = external_name.split(".") + external_class_module, external_class_name = external_name_parts[:-1], external_name_parts[-1] + + # Names not local to this package need importing. + + if len(external_name_parts) > 1 and this_class_module != external_class_module: + + external_module_name = ".".join(external_class_module) + #print "* Importing", external_module_name + obj = __import__(external_module_name, global_names, {}, []) + global_names[external_name_parts[0]] = obj + + # Names local to this package may affect initialisation order. + + elif external_class_name not in class_file_init_index: + try: + this_class_name_index = class_file_init_index.index(this_class_name) + + # Either insert this name before the current class's + # name. + + #print "* Inserting", external_class_name + class_file_init_index.insert(this_class_name_index, external_class_name) + + except ValueError: + + # Or add this name in anticipation of the current + # class's name appearing. + + #print "* Including", external_class_name + class_file_init_index.append(external_class_name) + + # Add this class name to the initialisation index. + + if class_name not in class_file_init_index: + class_file_init_index.append(this_class_name) + class_file_init[this_class_name] = (cls, class_file) + + # Finally, call __clinit__ methods for all relevant classes. + + #print "** Initialisation order", class_file_init_index + for class_name in class_file_init_index: + cls, class_file = class_file_init[class_name] + #print "**", cls, class_file + if hasattr(cls, "__clinit__"): + eval(cls.__clinit__.func_code, global_names) + + return module + +ihooks.ModuleImporter(loader=ClassLoader(hooks=ClassHooks())).install() + +# vim: tabstop=4 expandtab shiftwidth=4