# HG changeset patch # User Paul Boddie # Date 1106323503 -3600 # Node ID 754d36821fc88da34b7bc55be425db5a8ed71918 # Parent 901001c30474705532495d70a4f51267d5cbb67a Moved the modules into the javaclass package. Added a setup script. diff -r 901001c30474 -r 754d36821fc8 bytecode.py --- a/bytecode.py Fri Jan 21 17:04:41 2005 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2311 +0,0 @@ -#!/usr/bin/env python - -""" -Java bytecode conversion. Specification found at the following URL: -http://java.sun.com/docs/books/vmspec/2nd-edition/html/Instructions2.doc.html - -NOTE: Synchronized constructs are not actually supported. -""" - -import classfile -from dis import cmp_op # for access to Python bytecode values and operators -try: - from dis import opmap -except ImportError: - from dis import opname - opmap = {} - for i in range(0, len(opname)): - opmap[opname[i]] = i -from UserDict import UserDict -import new - -# Bytecode production classes. - -class BytecodeWriter: - - "A Python bytecode writer." - - def __init__(self): - - "Initialise the writer." - - # A stack of loop start instructions corresponding to loop blocks. - self.loops = [] - - # A stack of loop block or exception block start positions. - self.blocks = [] - - # A stack of exception block handler pointers. - self.exception_handlers = [] - - # A dictionary mapping labels to jump instructions referencing such labels. - self.jumps = {} - - # The output values, including "lazy" subvalues which will need evaluating. - self.output = [] - - # The current Python bytecode instruction position. - self.position = 0 - - # Stack depth estimation. - self.stack_depth = 0 - self.max_stack_depth = 0 - - # Local variable estimation. - self.max_locals = 0 - - # Mapping from values to indexes. - self.constants = {} - - # Mapping from names to indexes. - # NOTE: This may be acquired from elsewhere. - #self.globals = {} - - # Mapping from names to indexes. - self.names = {} - - # A list of constants used as exception handler return addresses. - self.constants_for_exceptions = [] - - # A list of external names. - self.external_names = [] - - def get_output(self): - - "Return the output of the writer as a string." - - output = [] - for element in self.output: - if isinstance(element, LazySubValue): - value = element.value - else: - value = element - # NOTE: ValueError gets raised for bad values here. - output.append(chr(value)) - return "".join(output) - - def get_constants(self): - - """ - Return a list of constants with ordering significant to the code - employing them. - """ - - l = self._get_list(self._invert(self.constants)) - result = [] - for i in l: - if isinstance(i, LazyValue): - result.append(i.get_value()) - else: - result.append(i) - return result - - #def get_globals(self): - # return self._get_list(self._invert(self.globals)) - - def get_names(self): - - """ - Return a list of names with ordering significant to the code employing - them. - """ - - return self._get_list(self._invert(self.names)) - - def _invert(self, d): - - """ - Return a new dictionary whose key-to-value mapping is in the inverse of - that found in 'd'. - """ - - inverted = {} - for k, v in d.items(): - inverted[v] = k - return inverted - - def _get_list(self, d): - - """ - Traverse the dictionary 'd' returning a list whose values appear at the - position denoted by each value's key in 'd'. - """ - - l = [] - for i in range(0, len(d.keys())): - l.append(d[i]) - return l - - # Administrative methods. - - def update_stack_depth(self, change): - - """ - Given the stated 'change' in stack depth, update the maximum stack depth - where appropriate. - """ - - self.stack_depth += change - if self.stack_depth > self.max_stack_depth: - self.max_stack_depth = self.stack_depth - - def update_locals(self, index): - - """ - Given the stated 'index' of a local variable, update the maximum local - variable index where appropriate. - """ - - if index > self.max_locals: - self.max_locals = index - - # Special methods. - - def _write_value(self, value): - - """ - Write the given 'value' at the current output position. - """ - - if isinstance(value, LazyValue): - # NOTE: Assume a 16-bit value. - self.output.append(value.values[0]) - self.output.append(value.values[1]) - self.position += 2 - elif value <= 0xffff: - self.output.append(value & 0xff) - self.output.append((value & 0xff00) >> 8) - self.position += 2 - else: - # NOTE: EXTENDED_ARG not yet supported. - raise ValueError, value - - def _rewrite_value(self, position, value): - - """ - At the given output 'position', rewrite the given 'value'. - """ - - # NOTE: Assume a 16-bit value. - if value <= 0xffff: - self.output[position] = (value & 0xff) - self.output[position + 1] = ((value & 0xff00) >> 8) - else: - # NOTE: EXTENDED_ARG not yet supported. - raise ValueError, value - - # Higher level methods. - - def use_external_name(self, name): - # NOTE: Remove array and object indicators. - self.external_names.append(name) - - def setup_loop(self): - self.loops.append(self.position) - self.output.append(opmap["SETUP_LOOP"]) - self.position += 1 - self._write_value(0) # To be filled in later - - def end_loop(self): - current_loop_start = self.loops.pop() - current_loop_real_start = self.blocks.pop() - #print "<", self.blocks, current_loop_real_start - # Fix the iterator delta. - # NOTE: Using 3 as the assumed length of the FOR_ITER instruction. - self.jump_absolute(current_loop_real_start) - self._rewrite_value(current_loop_real_start + 1, self.position - current_loop_real_start - 3) - self.pop_block() - # Fix the loop delta. - # NOTE: Using 3 as the assumed length of the SETUP_LOOP instruction. - self._rewrite_value(current_loop_start + 1, self.position - current_loop_start - 3) - - def jump_to_label(self, status, name): - # Record the instruction using the jump. - jump_instruction = self.position - if status is None: - self.jump_forward() - elif status: - self.jump_if_true() - else: - self.jump_if_false() - # Record the following instruction, too. - if not self.jumps.has_key(name): - self.jumps[name] = [] - self.jumps[name].append((jump_instruction, self.position)) - - def start_label(self, name): - # Fill in all jump instructions. - for jump_instruction, following_instruction in self.jumps[name]: - self._rewrite_value(jump_instruction + 1, self.position - following_instruction) - del self.jumps[name] - - def load_const_ret(self, value): - self.constants_for_exceptions.append(value) - self.load_const(value) - - def ret(self, index): - self.load_fast(index) - - # Previously, the constant stored on the stack by jsr/jsr_w was stored - # in a local variable. In the JVM, extracting the value from the local - # variable and jumping can be done at runtime. In the Python VM, any - # jump target must be known in advance and written into the bytecode. - - for constant in self.constants_for_exceptions: - self.dup_top() # Stack: actual-address, actual-address - self.load_const(constant) # Stack: actual-address, actual-address, suggested-address - self.compare_op("==") # Stack: actual-address, result - self.jump_to_label(0, "const") - self.pop_top() # Stack: actual-address - self.pop_top() # Stack: - self.jump_absolute(constant) - self.start_label("const") - self.pop_top() # Stack: actual-address - - # NOTE: If we get here, something is really wrong. - - self.pop_top() # Stack: - - def setup_except(self, target): - self.blocks.append(self.position) - self.exception_handlers.append(target) - #print "-", self.position, target - self.output.append(opmap["SETUP_EXCEPT"]) - self.position += 1 - self._write_value(0) # To be filled in later - - def setup_finally(self, target): - self.blocks.append(self.position) - self.exception_handlers.append(target) - #print "-", self.position, target - self.output.append(opmap["SETUP_FINALLY"]) - self.position += 1 - self._write_value(0) # To be filled in later - - def end_exception(self): - current_exception_start = self.blocks.pop() - # Convert the "lazy" absolute value. - current_exception_target = self.exception_handlers.pop() - target = current_exception_target.get_value() - #print "*", current_exception_start, target - # NOTE: Using 3 as the assumed length of the SETUP_* instruction. - self._rewrite_value(current_exception_start + 1, target - current_exception_start - 3) - - def start_handler(self, exc_name, class_file): - - # Where handlers are begun, produce bytecode to test the type of - # the exception. - # NOTE: Since RAISE_VARARGS and END_FINALLY are not really documented, - # NOTE: we store the top of the stack and use it later to trigger the - # NOTE: magic processes when re-raising. - self.use_external_name(str(exc_name)) - - self.rot_two() # Stack: raised-exception, exception - self.dup_top() # Stack: raised-exception, exception, exception - # Handled exceptions are wrapped before being thrown. - self.load_global("Exception") # Stack: raised-exception, exception, exception, Exception - self.compare_op("exception match") # Stack: raised-exception, exception, result - self.jump_to_label(0, "next") - self.pop_top() # Stack: raised-exception, exception - self.dup_top() # Stack: raised-exception, exception, exception - self.load_attr("args") # Stack: raised-exception, exception, args - self.load_const(0) # Stack: raised-exception, exception, args, 0 - self.binary_subscr() # Stack: raised-exception, exception, exception-object - load_class_name(class_file, str(exc_name), self) - # Stack: raised-exception, exception, exception-object, handled-exception - self.load_global("isinstance") # Stack: raised-exception, exception, exception-object, handled-exception, isinstance - self.rot_three() # Stack: raised-exception, exception, isinstance, exception-object, handled-exception - self.call_function(2) # Stack: raised-exception, exception, result - self.jump_to_label(1, "handler") - self.start_label("next") - self.pop_top() # Stack: raised-exception, exception - self.rot_two() # Stack: exception, raised-exception - self.end_finally() - self.start_label("handler") - self.pop_top() # Stack: raised-exception, exception - - # Complicated methods. - - def load_const(self, value): - self.output.append(opmap["LOAD_CONST"]) - if not self.constants.has_key(value): - self.constants[value] = len(self.constants.keys()) - self.position += 1 - self._write_value(self.constants[value]) - self.update_stack_depth(1) - - def load_global(self, name): - self.output.append(opmap["LOAD_GLOBAL"]) - if not self.names.has_key(name): - self.names[name] = len(self.names.keys()) - self.position += 1 - self._write_value(self.names[name]) - self.update_stack_depth(1) - - def load_attr(self, name): - self.output.append(opmap["LOAD_ATTR"]) - if not self.names.has_key(name): - self.names[name] = len(self.names.keys()) - self.position += 1 - self._write_value(self.names[name]) - - def load_name(self, name): - self.output.append(opmap["LOAD_NAME"]) - if not self.names.has_key(name): - self.names[name] = len(self.names.keys()) - self.position += 1 - self._write_value(self.names[name]) - self.update_stack_depth(1) - - def load_fast(self, index): - self.output.append(opmap["LOAD_FAST"]) - self.position += 1 - self._write_value(index) - self.update_stack_depth(1) - self.update_locals(index) - - def store_attr(self, name): - self.output.append(opmap["STORE_ATTR"]) - if not self.names.has_key(name): - self.names[name] = len(self.names.keys()) - self.position += 1 - self._write_value(self.names[name]) - self.update_stack_depth(-1) - - def store_fast(self, index): - self.output.append(opmap["STORE_FAST"]) - self.position += 1 - self._write_value(index) - self.update_stack_depth(-1) - self.update_locals(index) - - def for_iter(self): - self.blocks.append(self.position) - #print ">", self.blocks - self.output.append(opmap["FOR_ITER"]) - self.position += 1 - self._write_value(0) # To be filled in later - self.update_stack_depth(1) - - def break_loop(self): - self.output.append(opmap["BREAK_LOOP"]) - self.position += 1 - self.jump_absolute(self.blocks[-1]) - - # Normal bytecode generators. - - def get_iter(self): - self.output.append(opmap["GET_ITER"]) - self.position += 1 - - def jump_if_false(self, offset=0): - self.output.append(opmap["JUMP_IF_FALSE"]) - self.position += 1 - self._write_value(offset) # May be filled in later - - def jump_if_true(self, offset=0): - self.output.append(opmap["JUMP_IF_TRUE"]) - self.position += 1 - self._write_value(offset) # May be filled in later - - def jump_forward(self, offset=0): - self.output.append(opmap["JUMP_FORWARD"]) - self.position += 1 - self._write_value(offset) # May be filled in later - - def jump_absolute(self, address=0): - self.output.append(opmap["JUMP_ABSOLUTE"]) - self.position += 1 - self._write_value(address) # May be filled in later - - def build_tuple(self, count): - self.output.append(opmap["BUILD_TUPLE"]) - self.position += 1 - self._write_value(count) - self.update_stack_depth(-(count - 1)) - - def build_list(self, count): - self.output.append(opmap["BUILD_LIST"]) - self.position += 1 - self._write_value(count) - self.update_stack_depth(-(count - 1)) - - def pop_top(self): - self.output.append(opmap["POP_TOP"]) - self.position += 1 - self.update_stack_depth(-1) - - def dup_top(self): - self.output.append(opmap["DUP_TOP"]) - self.position += 1 - self.update_stack_depth(1) - - def dup_topx(self, count): - self.output.append(opmap["DUP_TOPX"]) - self.position += 1 - self._write_value(count) - self.update_stack_depth(count) - - def rot_two(self): - self.output.append(opmap["ROT_TWO"]) - self.position += 1 - - def rot_three(self): - self.output.append(opmap["ROT_THREE"]) - self.position += 1 - - def rot_four(self): - self.output.append(opmap["ROT_FOUR"]) - self.position += 1 - - def call_function(self, count): - self.output.append(opmap["CALL_FUNCTION"]) - self.position += 1 - self._write_value(count) - self.update_stack_depth(-count) - - def call_function_var(self, count): - self.output.append(opmap["CALL_FUNCTION_VAR"]) - self.position += 1 - self._write_value(count) - self.update_stack_depth(-count-1) - - def binary_subscr(self): - self.output.append(opmap["BINARY_SUBSCR"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_add(self): - self.output.append(opmap["BINARY_ADD"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_divide(self): - self.output.append(opmap["BINARY_DIVIDE"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_multiply(self): - self.output.append(opmap["BINARY_MULTIPLY"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_modulo(self): - self.output.append(opmap["BINARY_MODULO"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_subtract(self): - self.output.append(opmap["BINARY_SUBTRACT"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_and(self): - self.output.append(opmap["BINARY_AND"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_or(self): - self.output.append(opmap["BINARY_XOR"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_lshift(self): - self.output.append(opmap["BINARY_LSHIFT"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_rshift(self): - self.output.append(opmap["BINARY_RSHIFT"]) - self.position += 1 - self.update_stack_depth(-1) - - def binary_xor(self): - self.output.append(opmap["BINARY_XOR"]) - self.position += 1 - self.update_stack_depth(-1) - - def store_subscr(self): - self.output.append(opmap["STORE_SUBSCR"]) - self.position += 1 - self.update_stack_depth(-3) - - def unary_negative(self): - self.output.append(opmap["UNARY_NEGATIVE"]) - self.position += 1 - - def slice_0(self): - self.output.append(opmap["SLICE+0"]) - self.position += 1 - - def slice_1(self): - self.output.append(opmap["SLICE+1"]) - self.position += 1 - - def compare_op(self, op): - self.output.append(opmap["COMPARE_OP"]) - self.position += 1 - self._write_value(list(cmp_op).index(op)) - self.update_stack_depth(-1) - - def return_value(self): - self.output.append(opmap["RETURN_VALUE"]) - self.position += 1 - self.update_stack_depth(-1) - - def raise_varargs(self, count): - self.output.append(opmap["RAISE_VARARGS"]) - self.position += 1 - self._write_value(count) - - def pop_block(self): - self.output.append(opmap["POP_BLOCK"]) - self.position += 1 - - def end_finally(self): - self.output.append(opmap["END_FINALLY"]) - self.position += 1 - - def unpack_sequence(self, count): - self.output.append(opmap["UNPACK_SEQUENCE"]) - self.position += 1 - self._write_value(count) - - # Debugging. - - def print_item(self): - self.output.append(opmap["PRINT_ITEM"]) - self.position += 1 - -# Utility classes and functions. - -class LazyDict(UserDict): - def __getitem__(self, key): - if not self.data.has_key(key): - # NOTE: Assume 16-bit value. - self.data[key] = LazyValue(2) - return self.data[key] - def __setitem__(self, key, value): - if self.data.has_key(key): - existing_value = self.data[key] - if isinstance(existing_value, LazyValue): - existing_value.set_value(value) - return - self.data[key] = value - -class LazyValue: - def __init__(self, nvalues): - self.values = [] - for i in range(0, nvalues): - self.values.append(LazySubValue()) - def set_value(self, value): - # NOTE: Assume at least 16-bit value. No "filling" performed. - if value <= 0xffff: - self.values[0].set_value(value & 0xff) - self.values[1].set_value((value & 0xff00) >> 8) - else: - # NOTE: EXTENDED_ARG not yet supported. - raise ValueError, value - def get_value(self): - value = 0 - values = self.values[:] - for i in range(0, len(values)): - value = (value << 8) + values.pop().value - return value - -class LazySubValue: - def __init__(self): - self.value = 0 - def set_value(self, value): - self.value = value - -def signed(value, limit): - - """ - Return the signed integer from the unsigned 'value', where 'limit' (a value - one greater than the highest possible positive integer) is used to determine - whether a negative or positive result is produced. - """ - - d, r = divmod(value, limit) - if d == 1: - mask = limit * 2 - 1 - return -1 - (value ^ mask) - else: - return value - -def signed1(value): - return signed(value, 0x80) - -def signed2(value): - return signed(value, 0x8000) - -def signed4(value): - return signed(value, 0x80000000) - -def load_class_name(class_file, full_class_name, program): - this_class_name = str(class_file.this_class.get_python_name()) - this_class_parts = this_class_name.split(".") - class_parts = full_class_name.split(".") - - # Only use the full path if different from this class's path. - - if class_parts[:-1] != this_class_parts[:-1]: - program.use_external_name(full_class_name) - program.load_global(class_parts[0]) - for class_part in class_parts[1:]: - program.load_attr(class_part) # Stack: classref - else: - program.load_global(class_parts[-1]) - -# Bytecode conversion. - -class BytecodeReader: - - "A generic Java bytecode reader." - - def __init__(self, class_file): - - """ - Initialise the reader with a 'class_file' containing essential - information for any bytecode inspection activity. - """ - - self.class_file = class_file - self.position_mapping = LazyDict() - - def process(self, method, program): - - """ - Process the given 'method' (obtained from the class file), using the - given 'program' to write translated Python bytecode instructions. - """ - - self.java_position = 0 - self.in_finally = 0 - self.method = method - - # NOTE: Potentially unreliable way of getting necessary information. - - code, exception_table = None, None - for attribute in method.attributes: - if isinstance(attribute, classfile.CodeAttributeInfo): - code, exception_table = attribute.code, attribute.exception_table - break - - # Where no code was found, write a very simple placeholder routine. - # This is useful for interfaces and abstract classes. - # NOTE: Assess the correctness of doing this. An exception should really - # NOTE: be raised instead. - - if code is None: - program.load_const(None) - program.return_value() - return - - # Produce a structure which permits fast access to exception details. - - exception_block_start = {} - exception_block_end = {} - exception_block_handler = {} - reversed_exception_table = exception_table[:] - reversed_exception_table.reverse() - - # Later entries have wider coverage than earlier entries. - - for exception in reversed_exception_table: - - # Index start positions. - - if not exception_block_start.has_key(exception.start_pc): - exception_block_start[exception.start_pc] = [] - exception_block_start[exception.start_pc].append(exception) - - # Index end positions. - - if not exception_block_end.has_key(exception.end_pc): - exception_block_end[exception.end_pc] = [] - exception_block_end[exception.end_pc].append(exception) - - # Index handler positions. - - if not exception_block_handler.has_key(exception.handler_pc): - exception_block_handler[exception.handler_pc] = [] - exception_block_handler[exception.handler_pc].append(exception) - - # Process each instruction in the code. - - while self.java_position < len(code): - self.position_mapping[self.java_position] = program.position - - # Insert exception handling constructs. - - block_starts = exception_block_start.get(self.java_position, []) - for exception in block_starts: - - # Note that the absolute position is used. - - if exception.catch_type == 0: - program.setup_finally(self.position_mapping[exception.handler_pc]) - else: - program.setup_except(self.position_mapping[exception.handler_pc]) - - if block_starts: - self.in_finally = 0 - - # Insert exception handler details. - # NOTE: Ensure that pop_block is reachable by possibly inserting it at the start of finally handlers. - # NOTE: Insert a check for the correct exception at the start of each handler. - - for exception in exception_block_handler.get(self.java_position, []): - program.end_exception() - if exception.catch_type == 0: - self.in_finally = 1 - else: - program.start_handler(self.class_file.constants[exception.catch_type - 1].get_python_name(), self.class_file) - - # Process the bytecode at the current position. - - bytecode = ord(code[self.java_position]) - mnemonic, number_of_arguments = self.java_bytecodes[bytecode] - number_of_arguments = self.process_bytecode(mnemonic, number_of_arguments, code, program) - next_java_position = self.java_position + 1 + number_of_arguments - - # Insert exception block end details. - - for exception in exception_block_end.get(next_java_position, []): - - # NOTE: Insert jump beyond handlers. - # NOTE: program.jump_forward/absolute(...) - # NOTE: Insert end finally at end of handlers as well as where "ret" occurs. - - if exception.catch_type != 0: - program.pop_block() - - # Only advance the JVM position after sneaking in extra Python - # instructions. - - self.java_position = next_java_position - - def process_bytecode(self, mnemonic, number_of_arguments, code, program): - - """ - Process a bytecode instruction with the given 'mnemonic' and - 'number_of_arguments'. The 'code' parameter contains the full method - code so that argument data can be inspected. The 'program' parameter is - used to produce a Python translation of the instruction. - """ - - if number_of_arguments is not None: - arguments = [] - for j in range(0, number_of_arguments): - arguments.append(ord(code[self.java_position + 1 + j])) - - # Call the handler. - - getattr(self, mnemonic)(arguments, program) - return number_of_arguments - else: - # Call the handler. - - return getattr(self, mnemonic)(code[self.java_position+1:], program) - - java_bytecodes = { - # code : (mnemonic, number of following bytes, change in stack) - 0 : ("nop", 0), - 1 : ("aconst_null", 0), - 2 : ("iconst_m1", 0), - 3 : ("iconst_0", 0), - 4 : ("iconst_1", 0), - 5 : ("iconst_2", 0), - 6 : ("iconst_3", 0), - 7 : ("iconst_4", 0), - 8 : ("iconst_5", 0), - 9 : ("lconst_0", 0), - 10 : ("lconst_1", 0), - 11 : ("fconst_0", 0), - 12 : ("fconst_1", 0), - 13 : ("fconst_2", 0), - 14 : ("dconst_0", 0), - 15 : ("dconst_1", 0), - 16 : ("bipush", 1), - 17 : ("sipush", 2), - 18 : ("ldc", 1), - 19 : ("ldc_w", 2), - 20 : ("ldc2_w", 2), - 21 : ("iload", 1), - 22 : ("lload", 1), - 23 : ("fload", 1), - 24 : ("dload", 1), - 25 : ("aload", 1), - 26 : ("iload_0", 0), - 27 : ("iload_1", 0), - 28 : ("iload_2", 0), - 29 : ("iload_3", 0), - 30 : ("lload_0", 0), - 31 : ("lload_1", 0), - 32 : ("lload_2", 0), - 33 : ("lload_3", 0), - 34 : ("fload_0", 0), - 35 : ("fload_1", 0), - 36 : ("fload_2", 0), - 37 : ("fload_3", 0), - 38 : ("dload_0", 0), - 39 : ("dload_1", 0), - 40 : ("dload_2", 0), - 41 : ("dload_3", 0), - 42 : ("aload_0", 0), - 43 : ("aload_1", 0), - 44 : ("aload_2", 0), - 45 : ("aload_3", 0), - 46 : ("iaload", 0), - 47 : ("laload", 0), - 48 : ("faload", 0), - 49 : ("daload", 0), - 50 : ("aaload", 0), - 51 : ("baload", 0), - 52 : ("caload", 0), - 53 : ("saload", 0), - 54 : ("istore", 1), - 55 : ("lstore", 1), - 56 : ("fstore", 1), - 57 : ("dstore", 1), - 58 : ("astore", 1), - 59 : ("istore_0", 0), - 60 : ("istore_1", 0), - 61 : ("istore_2", 0), - 62 : ("istore_3", 0), - 63 : ("lstore_0", 0), - 64 : ("lstore_1", 0), - 65 : ("lstore_2", 0), - 66 : ("lstore_3", 0), - 67 : ("fstore_0", 0), - 68 : ("fstore_1", 0), - 69 : ("fstore_2", 0), - 70 : ("fstore_3", 0), - 71 : ("dstore_0", 0), - 72 : ("dstore_1", 0), - 73 : ("dstore_2", 0), - 74 : ("dstore_3", 0), - 75 : ("astore_0", 0), - 76 : ("astore_1", 0), - 77 : ("astore_2", 0), - 78 : ("astore_3", 0), - 79 : ("iastore", 0), - 80 : ("lastore", 0), - 81 : ("fastore", 0), - 82 : ("dastore", 0), - 83 : ("aastore", 0), - 84 : ("bastore", 0), - 85 : ("castore", 0), - 86 : ("sastore", 0), - 87 : ("pop", 0), - 88 : ("pop2", 0), - 89 : ("dup", 0), - 90 : ("dup_x1", 0), - 91 : ("dup_x2", 0), - 92 : ("dup2", 0), - 93 : ("dup2_x1", 0), - 94 : ("dup2_x2", 0), - 95 : ("swap", 0), - 96 : ("iadd", 0), - 97 : ("ladd", 0), - 98 : ("fadd", 0), - 99 : ("dadd", 0), - 100 : ("isub", 0), - 101 : ("lsub", 0), - 102 : ("fsub", 0), - 103 : ("dsub", 0), - 104 : ("imul", 0), - 105 : ("lmul", 0), - 106 : ("fmul", 0), - 107 : ("dmul", 0), - 108 : ("idiv", 0), - 109 : ("ldiv", 0), - 110 : ("fdiv", 0), - 111 : ("ddiv", 0), - 112 : ("irem", 0), - 113 : ("lrem", 0), - 114 : ("frem", 0), - 115 : ("drem", 0), - 116 : ("ineg", 0), - 117 : ("lneg", 0), - 118 : ("fneg", 0), - 119 : ("dneg", 0), - 120 : ("ishl", 0), - 121 : ("lshl", 0), - 122 : ("ishr", 0), - 123 : ("lshr", 0), - 124 : ("iushr", 0), - 125 : ("lushr", 0), - 126 : ("iand", 0), - 127 : ("land", 0), - 128 : ("ior", 0), - 129 : ("lor", 0), - 130 : ("ixor", 0), - 131 : ("lxor", 0), - 132 : ("iinc", 2), - 133 : ("i2l", 0), - 134 : ("i2f", 0), - 135 : ("i2d", 0), - 136 : ("l2i", 0), - 137 : ("l2f", 0), - 138 : ("l2d", 0), - 139 : ("f2i", 0), - 140 : ("f2l", 0), - 141 : ("f2d", 0), - 142 : ("d2i", 0), - 143 : ("d2l", 0), - 144 : ("d2f", 0), - 145 : ("i2b", 0), - 146 : ("i2c", 0), - 147 : ("i2s", 0), - 148 : ("lcmp", 0), - 149 : ("fcmpl", 0), - 150 : ("fcmpg", 0), - 151 : ("dcmpl", 0), - 152 : ("dcmpg", 0), - 153 : ("ifeq", 2), - 154 : ("ifne", 2), - 155 : ("iflt", 2), - 156 : ("ifge", 2), - 157 : ("ifgt", 2), - 158 : ("ifle", 2), - 159 : ("if_icmpeq", 2), - 160 : ("if_icmpne", 2), - 161 : ("if_icmplt", 2), - 162 : ("if_icmpge", 2), - 163 : ("if_icmpgt", 2), - 164 : ("if_icmple", 2), - 165 : ("if_acmpeq", 2), - 166 : ("if_acmpne", 2), - 167 : ("goto", 2), - 168 : ("jsr", 2), - 169 : ("ret", 1), - 170 : ("tableswitch", None), # variable number of arguments - 171 : ("lookupswitch", None), # variable number of arguments - 172 : ("ireturn", 0), - 173 : ("lreturn", 0), - 174 : ("freturn", 0), - 175 : ("dreturn", 0), - 176 : ("areturn", 0), - 177 : ("return_", 0), - 178 : ("getstatic", 2), - 179 : ("putstatic", 2), - 180 : ("getfield", 2), - 181 : ("putfield", 2), - 182 : ("invokevirtual", 2), - 183 : ("invokespecial", 2), - 184 : ("invokestatic", 2), - 185 : ("invokeinterface", 4), - 187 : ("new", 2), - 188 : ("newarray", 1), - 189 : ("anewarray", 2), - 190 : ("arraylength", 0), - 191 : ("athrow", 0), - 192 : ("checkcast", 2), - 193 : ("instanceof", 2), - 194 : ("monitorenter", 0), - 195 : ("monitorexit", 0), - 196 : ("wide", None), # 3 or 5 arguments, stack changes according to modified element - 197 : ("multianewarray", 3), - 198 : ("ifnull", 2), - 199 : ("ifnonnull", 2), - 200 : ("goto_w", 4), - 201 : ("jsr_w", 4), - } - -class BytecodeDisassembler(BytecodeReader): - - "A Java bytecode disassembler." - - bytecode_methods = [spec[0] for spec in BytecodeReader.java_bytecodes.values()] - - def __getattr__(self, name): - if name in self.bytecode_methods: - print "%5s %s" % (self.java_position, name), - return self.generic - else: - raise AttributeError, name - - def generic(self, arguments, program): - print arguments - - def lookupswitch(self, code, program): - print "%5s lookupswitch" % (self.java_position,), - d, r = divmod(self.java_position + 1, 4) - to_boundary = (4 - r) % 4 - code = code[to_boundary:] - default = classfile.u4(code[0:4]) - npairs = classfile.u4(code[4:8]) - print default, npairs - return to_boundary + 8 + npairs * 8 - - def tableswitch(self, code, program): - print "%5s tableswitch" % (self.java_position,), - d, r = divmod(self.java_position + 1, 4) - to_boundary = (4 - r) % 4 - code = code[to_boundary:] - default = classfile.u4(code[0:4]) - low = classfile.u4(code[4:8]) - high = classfile.u4(code[8:12]) - print default, low, high - return to_boundary + 12 + (high - low + 1) * 4 - -class BytecodeDisassemblerProgram: - position = 0 - def setup_except(self, target): - print "(setup_except %s)" % target - def setup_finally(self, target): - print "(setup_finally %s)" % target - def end_exception(self): - print "(end_exception)" - def start_handler(self, exc_name, class_file): - print "(start_handler %s)" % exc_name - def pop_block(self): - print "(pop_block)" - -class BytecodeTranslator(BytecodeReader): - - "A Java bytecode translator which uses a Python bytecode writer." - - def aaload(self, arguments, program): - # NOTE: No type checking performed. - program.binary_subscr() - - def aastore(self, arguments, program): - # NOTE: No type checking performed. - # Stack: arrayref, index, value - program.rot_three() # Stack: value, arrayref, index - program.store_subscr() - - def aconst_null(self, arguments, program): - program.load_const(None) - - def aload(self, arguments, program): - program.load_fast(arguments[0]) - - def aload_0(self, arguments, program): - program.load_fast(0) - - def aload_1(self, arguments, program): - program.load_fast(1) - - def aload_2(self, arguments, program): - program.load_fast(2) - - def aload_3(self, arguments, program): - program.load_fast(3) - - def anewarray(self, arguments, program): - # NOTE: Does not raise NegativeArraySizeException. - # NOTE: Not using the index to type the list/array. - index = (arguments[0] << 8) + arguments[1] - self._newarray(program) - - def _newarray(self, program): - program.build_list(0) # Stack: count, list - program.rot_two() # Stack: list, count - program.setup_loop() - program.load_global("range") - program.load_const(0) # Stack: list, count, range, 0 - program.rot_three() # Stack: list, 0, count, range - program.rot_three() # Stack: list, range, 0, count - program.call_function(2) # Stack: list, range_list - program.get_iter() # Stack: list, iter - program.for_iter() # Stack: list, iter, value - program.pop_top() # Stack: list, iter - program.rot_two() # Stack: iter, list - program.dup_top() # Stack: iter, list, list - program.load_attr("append") # Stack: iter, list, append - program.load_const(None) # Stack: iter, list, append, None - program.call_function(1) # Stack: iter, list, None - program.pop_top() # Stack: iter, list - program.rot_two() # Stack: list, iter - program.end_loop() # Back to for_iter above - - def areturn(self, arguments, program): - program.return_value() - - def arraylength(self, arguments, program): - program.load_global("len") # Stack: arrayref, len - program.rot_two() # Stack: len, arrayref - program.call_function(1) - - def astore(self, arguments, program): - program.store_fast(arguments[0]) - - def astore_0(self, arguments, program): - program.store_fast(0) - - def astore_1(self, arguments, program): - program.store_fast(1) - - def astore_2(self, arguments, program): - program.store_fast(2) - - def astore_3(self, arguments, program): - program.store_fast(3) - - def athrow(self, arguments, program): - # NOTE: NullPointerException not raised where null/None is found on the stack. - # If this instruction appears in a finally handler, use end_finally instead. - if self.in_finally: - program.end_finally() - else: - # Wrap the exception in a Python exception. - program.load_global("Exception") # Stack: objectref, Exception - program.rot_two() # Stack: Exception, objectref - program.call_function(1) # Stack: exception - program.raise_varargs(1) - # NOTE: This seems to put another object on the stack. - - baload = aaload - bastore = aastore - - def bipush(self, arguments, program): - program.load_const(signed1(arguments[0])) - - caload = aaload - castore = aastore - - def checkcast(self, arguments, program): - index = (arguments[0] << 8) + arguments[1] - target_name = self.class_file.constants[index - 1].get_python_name() - program.use_external_name(target_name) - program.dup_top() # Stack: objectref, objectref - program.load_const(None) # Stack: objectref, objectref, None - program.compare_op("is") # Stack: objectref, result - program.jump_to_label(1, "next") - program.pop_top() # Stack: objectref - program.dup_top() # Stack: objectref, objectref - program.load_global("isinstance") # Stack: objectref, objectref, isinstance - program.rot_two() # Stack: objectref, isinstance, objectref - load_class_name(self.class_file, target_name, program) - program.call_function(2) # Stack: objectref, result - program.jump_to_label(1, "next") - program.pop_top() # Stack: objectref - program.pop_top() # Stack: - program.use_external_name("java.lang.ClassCastException") - load_class_name(self.class_file, "java.lang.ClassCastException", program) - program.call_function(0) # Stack: exception - # Wrap the exception in a Python exception. - program.load_global("Exception") # Stack: exception, Exception - program.rot_two() # Stack: Exception, exception - program.call_function(1) # Stack: exception - program.raise_varargs(1) - # NOTE: This seems to put another object on the stack. - program.start_label("next") - program.pop_top() # Stack: objectref - - def d2f(self, arguments, program): - pass - - def d2i(self, arguments, program): - program.load_global("int") # Stack: value, int - program.rot_two() # Stack: int, value - program.call_function(1) # Stack: result - - d2l = d2i # Preserving Java semantics - - def dadd(self, arguments, program): - # NOTE: No type checking performed. - program.binary_add() - - daload = aaload - dastore = aastore - - def dcmpg(self, arguments, program): - # NOTE: No type checking performed. - program.compare_op(">") - - def dcmpl(self, arguments, program): - # NOTE: No type checking performed. - program.compare_op("<") - - def dconst_0(self, arguments, program): - program.load_const(0.0) - - def dconst_1(self, arguments, program): - program.load_const(1.0) - - def ddiv(self, arguments, program): - # NOTE: No type checking performed. - program.binary_divide() - - dload = aload - dload_0 = aload_0 - dload_1 = aload_1 - dload_2 = aload_2 - dload_3 = aload_3 - - def dmul(self, arguments, program): - # NOTE: No type checking performed. - program.binary_multiply() - - def dneg(self, arguments, program): - # NOTE: No type checking performed. - program.unary_negative() - - def drem(self, arguments, program): - # NOTE: No type checking performed. - program.binary_modulo() - - dreturn = areturn - dstore = astore - dstore_0 = astore_0 - dstore_1 = astore_1 - dstore_2 = astore_2 - dstore_3 = astore_3 - - def dsub(self, arguments, program): - # NOTE: No type checking performed. - program.binary_subtract() - - def dup(self, arguments, program): - program.dup_top() - - def dup_x1(self, arguments, program): - # Ignoring computational type categories. - program.dup_top() - program.rot_three() - - def dup_x2(self, arguments, program): - # Ignoring computational type categories. - program.dup_top() - program.rot_four() - - dup2 = dup # Ignoring computational type categories - dup2_x1 = dup_x1 # Ignoring computational type categories - dup2_x2 = dup_x2 # Ignoring computational type categories - - def f2d(self, arguments, program): - pass # Preserving Java semantics - - def f2i(self, arguments, program): - program.load_global("int") # Stack: value, int - program.rot_two() # Stack: int, value - program.call_function(1) # Stack: result - - f2l = f2i # Preserving Java semantics - fadd = dadd - faload = daload - fastore = dastore - fcmpg = dcmpg - fcmpl = dcmpl - fconst_0 = dconst_0 - fconst_1 = dconst_1 - - def fconst_2(self, arguments, program): - program.load_const(2.0) - - fdiv = ddiv - fload = dload - fload_0 = dload_0 - fload_1 = dload_1 - fload_2 = dload_2 - fload_3 = dload_3 - fmul = dmul - fneg = dneg - frem = drem - freturn = dreturn - fstore = dstore - fstore_0 = dstore_0 - fstore_1 = dstore_1 - fstore_2 = dstore_2 - fstore_3 = dstore_3 - fsub = dsub - - def getfield(self, arguments, program): - index = (arguments[0] << 8) + arguments[1] - target_name = self.class_file.constants[index - 1].get_python_name() - # NOTE: Using the string version of the name which may contain incompatible characters. - program.load_attr(str(target_name)) - - def getstatic(self, arguments, program): - index = (arguments[0] << 8) + arguments[1] - target = self.class_file.constants[index - 1] - target_name = target.get_python_name() - - # Get the class name instead of the fully qualified name. - - full_class_name = target.get_class().get_python_name() - program.use_external_name(full_class_name) - load_class_name(self.class_file, full_class_name, program) - # NOTE: Using the string version of the name which may contain incompatible characters. - program.load_attr(str(target_name)) - - def goto(self, arguments, program): - offset = signed2((arguments[0] << 8) + arguments[1]) - java_absolute = self.java_position + offset - program.jump_absolute(self.position_mapping[java_absolute]) - - def goto_w(self, arguments, program): - offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) - java_absolute = self.java_position + offset - program.jump_absolute(self.position_mapping[java_absolute]) - - def i2b(self, arguments, program): - pass - - def i2c(self, arguments, program): - pass - - def i2d(self, arguments, program): - program.load_global("float") # Stack: value, float - program.rot_two() # Stack: float, value - program.call_function(1) # Stack: result - - i2f = i2d # Not distinguishing between float and double - - def i2l(self, arguments, program): - pass # Preserving Java semantics - - def i2s(self, arguments, program): - pass # Not distinguishing between int and short - - iadd = fadd - iaload = faload - - def iand(self, arguments, program): - # NOTE: No type checking performed. - program.binary_and() - - iastore = fastore - - def iconst_m1(self, arguments, program): - program.load_const(-1) - - def iconst_0(self, arguments, program): - program.load_const(0) - - def iconst_1(self, arguments, program): - program.load_const(1) - - def iconst_2(self, arguments, program): - program.load_const(2) - - def iconst_3(self, arguments, program): - program.load_const(3) - - def iconst_4(self, arguments, program): - program.load_const(4) - - def iconst_5(self, arguments, program): - program.load_const(5) - - idiv = fdiv - - def _if_xcmpx(self, arguments, program, op): - offset = signed2((arguments[0] << 8) + arguments[1]) - java_absolute = self.java_position + offset - program.compare_op(op) - program.jump_to_label(0, "next") # skip if false - program.pop_top() - program.jump_absolute(self.position_mapping[java_absolute]) - program.start_label("next") - program.pop_top() - - def if_acmpeq(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "is") - - def if_acmpne(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "is not") - - def if_icmpeq(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "==") - - def if_icmpne(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "!=") - - def if_icmplt(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "<") - - def if_icmpge(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, ">=") - - def if_icmpgt(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, ">") - - def if_icmple(self, arguments, program): - # NOTE: No type checking performed. - self._if_xcmpx(arguments, program, "<=") - - def ifeq(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "==") - - def ifne(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "!=") - - def iflt(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "<") - - def ifge(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, ">=") - - def ifgt(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, ">") - - def ifle(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(0) - self._if_xcmpx(arguments, program, "<=") - - def ifnonnull(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(None) - self._if_xcmpx(arguments, program, "is not") - - def ifnull(self, arguments, program): - # NOTE: No type checking performed. - program.load_const(None) - self._if_xcmpx(arguments, program, "is") - - def iinc(self, arguments, program): - # NOTE: No type checking performed. - program.load_fast(arguments[0]) - program.load_const(arguments[1]) - program.binary_add() - program.store_fast(arguments[0]) - - iload = fload - iload_0 = fload_0 - iload_1 = fload_1 - iload_2 = fload_2 - iload_3 = fload_3 - imul = fmul - ineg = fneg - - def instanceof(self, arguments, program): - index = (arguments[0] << 8) + arguments[1] - target_name = self.class_file.constants[index - 1].get_python_name() - program.use_external_name(target_name) - program.load_global("isinstance") # Stack: objectref, isinstance - program.rot_two() # Stack: isinstance, objectref - load_class_name(self.class_file, target_name, program) - program.call_function(2) # Stack: result - - def _invoke(self, target_name, program): - # NOTE: Using the string version of the name which may contain incompatible characters. - program.load_attr(str(target_name)) # Stack: tuple, method - program.rot_two() # Stack: method, tuple - program.call_function_var(0) # Stack: result - - def invokeinterface(self, arguments, program): - # NOTE: This implementation does not perform the necessary checks for - # NOTE: signature-based polymorphism. - # NOTE: Java rules not specifically obeyed. - index = (arguments[0] << 8) + arguments[1] - # NOTE: "count" == nargs + 1, apparently. - count = arguments[2] - 1 - target_name = self.class_file.constants[index - 1].get_python_name() - # Stack: objectref, arg1, arg2, ... - program.build_tuple(count) # Stack: objectref, tuple - program.rot_two() # Stack: tuple, objectref - # NOTE: The interface information is not used to discover the correct - # NOTE: method. - self._invoke(target_name, program) - - def invokespecial(self, arguments, program): - # NOTE: This implementation does not perform the necessary checks for - # NOTE: signature-based polymorphism. - # NOTE: Java rules not specifically obeyed. - index = (arguments[0] << 8) + arguments[1] - target = self.class_file.constants[index - 1] - original_name = target.get_name() - target_name = target.get_python_name() - - # Get the number of parameters from the descriptor. - - count = len(target.get_descriptor()[0]) - - # First, we build a tuple of the reference and arguments. - - program.build_tuple(count + 1) # Stack: tuple - - # Get the class name instead of the fully qualified name. - # NOTE: Not bothering with Object initialisation. - - full_class_name = target.get_class().get_python_name() - if full_class_name not in ("java.lang.Object", "java.lang.Exception"): - program.use_external_name(full_class_name) - load_class_name(self.class_file, full_class_name, program) - self._invoke(target_name, program) - - # Remove Python None return value. - - if str(original_name) == "": - program.pop_top() - - def invokestatic(self, arguments, program): - # NOTE: This implementation does not perform the necessary checks for - # NOTE: signature-based polymorphism. - # NOTE: Java rules not specifically obeyed. - index = (arguments[0] << 8) + arguments[1] - target = self.class_file.constants[index - 1] - target_name = target.get_python_name() - - # Get the number of parameters from the descriptor. - - count = len(target.get_descriptor()[0]) - - # Stack: arg1, arg2, ... - - program.build_tuple(count) # Stack: tuple - - # Use the class to provide access to static methods. - # Get the class name instead of the fully qualified name. - - full_class_name = target.get_class().get_python_name() - if full_class_name not in ("java.lang.Object", "java.lang.Exception"): - program.use_external_name(full_class_name) - load_class_name(self.class_file, full_class_name, program) - self._invoke(target_name, program) - - def invokevirtual (self, arguments, program): - # NOTE: This implementation does not perform the necessary checks for - # NOTE: signature-based polymorphism. - # NOTE: Java rules not specifically obeyed. - index = (arguments[0] << 8) + arguments[1] - target = self.class_file.constants[index - 1] - target_name = target.get_python_name() - # Get the number of parameters from the descriptor. - count = len(target.get_descriptor()[0]) - # Stack: objectref, arg1, arg2, ... - program.build_tuple(count) # Stack: objectref, tuple - program.rot_two() # Stack: tuple, objectref - self._invoke(target_name, program) - - def ior(self, arguments, program): - # NOTE: No type checking performed. - program.binary_or() - - irem = frem - ireturn = freturn - - def ishl(self, arguments, program): - # NOTE: No type checking performed. - # NOTE: Not verified. - program.binary_lshift() - - def ishr(self, arguments, program): - # NOTE: No type checking performed. - # NOTE: Not verified. - program.binary_rshift() - - istore = fstore - istore_0 = fstore_0 - istore_1 = fstore_1 - istore_2 = fstore_2 - istore_3 = fstore_3 - isub = fsub - iushr = ishr # Ignoring distinctions between arithmetic and logical shifts - - def ixor(self, arguments, program): - # NOTE: No type checking performed. - program.binary_xor() - - def jsr(self, arguments, program): - offset = signed2((arguments[0] << 8) + arguments[1]) - java_absolute = self.java_position + offset - # Store the address of the next instruction. - program.load_const_ret(self.position_mapping[self.java_position + 3]) - program.jump_absolute(self.position_mapping[java_absolute]) - - def jsr_w(self, arguments, program): - offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) - java_absolute = self.java_position + offset - # Store the address of the next instruction. - program.load_const_ret(self.position_mapping[self.java_position + 5]) - program.jump_absolute(self.position_mapping[java_absolute]) - - l2d = i2d - l2f = i2f - - def l2i(self, arguments, program): - pass # Preserving Java semantics - - ladd = iadd - laload = iaload - land = iand - lastore = iastore - - def lcmp(self, arguments, program): - # NOTE: No type checking performed. - program.dup_topx(2) # Stack: value1, value2, value1, value2 - program.compare_op(">") # Stack: value1, value2, result - program.jump_to_label(0, "equals") - # True - produce result and branch. - program.pop_top() # Stack: value1, value2 - program.pop_top() # Stack: value1 - program.pop_top() # Stack: - program.load_const(1) # Stack: 1 - program.jump_to_label(None, "next") - # False - test equality. - program.start_label("equals") - program.pop_top() # Stack: value1, value2 - program.dup_topx(2) # Stack: value1, value2, value1, value2 - program.compare_op("==") # Stack: value1, value2, result - program.jump_to_label(0, "less") - # True - produce result and branch. - program.pop_top() # Stack: value1, value2 - program.pop_top() # Stack: value1 - program.pop_top() # Stack: - program.load_const(0) # Stack: 0 - program.jump_to_label(None, "next") - # False - produce result. - program.start_label("less") - program.pop_top() # Stack: value1, value2 - program.pop_top() # Stack: value1 - program.pop_top() # Stack: - program.load_const(-1) # Stack: -1 - program.start_label("next") - - lconst_0 = iconst_0 - lconst_1 = iconst_1 - - def ldc(self, arguments, program): - const = self.class_file.constants[arguments[0] - 1] - if isinstance(const, classfile.StringInfo): - program.use_external_name("java.lang.String") - program.load_global("java") - program.load_attr("lang") - program.load_attr("String") - program.load_const(const.get_value()) - program.call_function(1) - else: - program.load_const(const.get_value()) - - def ldc_w(self, arguments, program): - const = self.class_file.constants[(arguments[0] << 8) + arguments[1] - 1] - if isinstance(const, classfile.StringInfo): - program.use_external_name("java.lang.String") - program.load_global("java") - program.load_attr("lang") - program.load_attr("String") - program.load_const(const.get_value()) - program.call_function(1) - else: - program.load_const(const.get_value()) - - ldc2_w = ldc_w - ldiv = idiv - lload = iload - lload_0 = iload_0 - lload_1 = iload_1 - lload_2 = iload_2 - lload_3 = iload_3 - lmul = imul - lneg = ineg - - def lookupswitch(self, code, program): - - # Find the offset to the next 4 byte boundary in the code. - - d, r = divmod(self.java_position + 1, 4) - to_boundary = (4 - r) % 4 - - # Get the pertinent arguments. - - code = code[to_boundary:] - default = classfile.u4(code[0:4]) - npairs = classfile.u4(code[4:8]) - - # Process the pairs. - # NOTE: This is not the most optimal implementation. - - pair_index = 8 - for pair in range(0, npairs): - match = classfile.u4(code[pair_index:pair_index+4]) - offset = classfile.s4(code[pair_index+4:pair_index+8]) - # Calculate the branch target. - java_absolute = self.java_position + offset - # Generate branching code. - program.dup_top() # Stack: key, key - program.load_const(match) # Stack: key, key, match - program.compare_op("==") # Stack: key, result - program.jump_to_label(0, "end") - program.pop_top() # Stack: key - program.pop_top() # Stack: - program.jump_absolute(self.position_mapping[java_absolute]) - # Generate the label for the end of the branching code. - program.start_label("end") - program.pop_top() # Stack: key - # Update the index. - pair_index += 4 - - # Generate the default. - - java_absolute = self.java_position + default - program.jump_absolute(self.position_mapping[java_absolute]) - return pair_index + to_boundary - - lor = ior - lrem = irem - lreturn = ireturn - lshl = ishl - lshr = ishr - lstore = istore - lstore_0 = istore_0 - lstore_1 = istore_1 - lstore_2 = istore_2 - lstore_3 = istore_3 - lsub = isub - lushr = iushr - lxor = ixor - - def monitorenter(self, arguments, program): - # NOTE: To be implemented. - pass - - def monitorexit(self, arguments, program): - # NOTE: To be implemented. - pass - - def multianewarray(self, arguments, program): - index = (arguments[0] << 8) + arguments[1] - dimensions = arguments[2] - # Stack: count1, ..., countN-1, countN - self._newarray(program) # Stack: count1, ..., countN-1, list - for dimension in range(1, dimensions): - program.rot_two() # Stack: count1, ..., list, countN-1 - program.build_list(0) # Stack: count1, ..., list, countN-1, new-list - program.rot_three() # Stack: count1, ..., new-list, list, countN-1 - program.setup_loop() - program.load_const(0) # Stack: count1, ..., new-list, list, countN-1, 0 - program.rot_two() # Stack: count1, ..., new-list, list, 0, countN-1 - program.load_global("range") # Stack: count1, ..., new-list, list, 0, countN-1, range - program.rot_three() # Stack: count1, ..., new-list, list, range, 0, countN-1 - program.call_function(2) # Stack: count1, ..., new-list, list, range-list - program.get_iter() # Stack: count1, ..., new-list, list, iter - program.for_iter() # Stack: count1, ..., new-list, list, iter, value - program.pop_top() # Stack: count1, ..., new-list, list, iter - program.rot_three() # Stack: count1, ..., iter, new-list, list - program.slice_0() # Stack: count1, ..., iter, new-list, list[:] - program.dup_top() # Stack: count1, ..., iter, new-list, list[:], list[:] - program.rot_three() # Stack: count1, ..., iter, list[:], new-list, list[:] - program.rot_two() # Stack: count1, ..., iter, list[:], list[:], new-list - program.dup_top() # Stack: count1, ..., iter, list[:], list[:], new-list, new-list - program.load_attr("append") # Stack: count1, ..., iter, list[:], list[:], new-list, append - program.rot_three() # Stack: count1, ..., iter, list[:], append, list[:], new-list - program.rot_three() # Stack: count1, ..., iter, list[:], new-list, append, list[:] - program.call_function(1) # Stack: count1, ..., iter, list[:], new-list, None - program.pop_top() # Stack: count1, ..., iter, list[:], new-list - program.rot_two() # Stack: count1, ..., iter, new-list, list[:] - program.rot_three() # Stack: count1, ..., list[:], iter, new-list - program.rot_three() # Stack: count1, ..., new-list, list[:], iter - program.end_loop() # Stack: count1, ..., new-list, list[:], iter - program.pop_top() # Stack: count1, ..., new-list - - def new(self, arguments, program): - # This operation is considered to be the same as the calling of the - # initialisation method of the given class with no arguments. - - index = (arguments[0] << 8) + arguments[1] - target_name = self.class_file.constants[index - 1].get_python_name() - program.use_external_name(target_name) - - # NOTE: Using the string version of the name which may contain incompatible characters. - program.load_global("object") - program.load_attr("__new__") - load_class_name(self.class_file, target_name, program) - program.call_function(1) - - def newarray(self, arguments, program): - # NOTE: Does not raise NegativeArraySizeException. - # NOTE: Not using the arguments to type the list/array. - self._newarray(program) - - def nop(self, arguments, program): - pass - - def pop(self, arguments, program): - program.pop_top() - - pop2 = pop # ignoring Java stack value distinctions - - def putfield(self, arguments, program): - index = (arguments[0] << 8) + arguments[1] - target_name = self.class_file.constants[index - 1].get_python_name() - program.rot_two() - # NOTE: Using the string version of the name which may contain incompatible characters. - program.store_attr(str(target_name)) - - def putstatic(self, arguments, program): - index = (arguments[0] << 8) + arguments[1] - target = self.class_file.constants[index - 1] - target_name = target.get_python_name() - - # Get the class name instead of the fully qualified name. - - full_class_name = target.get_class().get_python_name() - program.use_external_name(full_class_name) - load_class_name(self.class_file, full_class_name, program) - # NOTE: Using the string version of the name which may contain incompatible characters. - program.store_attr(str(target_name)) - - def ret(self, arguments, program): - program.ret(arguments[0]) - # Indicate that the finally handler is probably over. - # NOTE: This is seemingly not guaranteed. - self.in_finally = 0 - - def return_(self, arguments, program): - program.load_const(None) - program.return_value() - - saload = laload - sastore = lastore - - def sipush(self, arguments, program): - program.load_const(signed2((arguments[0] << 8) + arguments[1])) - - def swap(self, arguments, program): - program.rot_two() - - def tableswitch(self, code, program): - - # Find the offset to the next 4 byte boundary in the code. - - d, r = divmod(self.java_position + 1, 4) - to_boundary = (4 - r) % 4 - - # Get the pertinent arguments. - - code = code[to_boundary:] - default = classfile.u4(code[0:4]) - low = classfile.u4(code[4:8]) - high = classfile.u4(code[8:12]) - - # Process the jump entries. - # NOTE: This is not the most optimal implementation. - - jump_index = 12 - for jump in range(low, high + 1): - offset = classfile.s4(code[jump_index:jump_index + 4]) - - # Calculate the branch target. - - java_absolute = self.java_position + offset - - # Generate branching code. - - program.dup_top() # Stack: key, key - program.load_const(jump) # Stack: key, key, jump - program.compare_op("==") # Stack: key, result - program.jump_to_label(0, "end") - program.pop_top() # Stack: key - program.pop_top() # Stack: - program.jump_absolute(self.position_mapping[java_absolute]) - - # Generate the label for the end of the branching code. - - program.start_label("end") - program.pop_top() # Stack: key - - # Update the index. - - jump_index += 4 - - # Generate the default. - - java_absolute = self.java_position + default - program.jump_absolute(self.position_mapping[java_absolute]) - return jump_index + to_boundary - - def wide(self, code, program): - # NOTE: To be implemented. - return number_of_arguments - -def disassemble(class_file, method): - disassembler = BytecodeDisassembler(class_file) - disassembler.process(method, BytecodeDisassemblerProgram()) - -class ClassTranslator: - - """ - A class which provides a wrapper around a class file and the means to - translate the represented class into a Python class. - """ - - def __init__(self, class_file): - - "Initialise the object with the given 'class_file'." - - self.class_file = class_file - self.filename = "" - - for attribute in self.class_file.attributes: - if isinstance(attribute, classfile.SourceFileAttributeInfo): - self.filename = str(attribute.get_name()) - - def translate_method(self, method): - - "Translate the given 'method' - an object obtained from the class file." - - translator = BytecodeTranslator(self.class_file) - writer = BytecodeWriter() - translator.process(method, writer) - return translator, writer - - def make_method(self, real_method_name, methods, global_names, namespace): - - """ - Make a dispatcher method with the given 'real_method_name', providing - dispatch to the supplied type-sensitive 'methods', accessing the given - 'global_names' where necessary, and storing the new method in the - 'namespace' provided. - """ - - if real_method_name == "": - method_name = "__init__" - else: - method_name = real_method_name - - # Where only one method exists, just make an alias. - - if len(methods) == 1: - method, fn = methods[0] - namespace[method_name] = fn - return - - # Write a simple bytecode dispatching mechanism. - - program = BytecodeWriter() - - # Remember whether any of the methods are static. - # NOTE: This should be an all or nothing situation. - - method_is_static = 0 - - # NOTE: The code below should use dictionary-based dispatch for better performance. - - for method, fn in methods: - method_is_static = real_method_name != "" and method_is_static or \ - classfile.has_flags(method.access_flags, [classfile.STATIC]) - - if method_is_static: - program.load_fast(0) # Stack: arguments - else: - program.load_fast(1) # Stack: arguments - - program.setup_loop() - program.load_const(1) # Stack: arguments, 1 - - if method_is_static: - program.store_fast(1) # Stack: arguments (found = 1) - else: - program.store_fast(2) # Stack: arguments (found = 1) - - # Emit a list of parameter types. - - descriptor_types = method.get_descriptor()[0] - for descriptor_type in descriptor_types: - base_type, object_type, array_type = descriptor_type - python_type = classfile.descriptor_base_type_mapping[base_type] - if python_type == "instance": - # NOTE: This will need extending. - python_type = object_type - program.load_global(python_type) # Stack: arguments, type, ... - program.build_list(len(descriptor_types)) - # Stack: arguments, types - # Make a map of arguments and types. - program.load_const(None) # Stack: arguments, types, None - program.rot_three() # Stack: None, arguments, types - program.build_tuple(3) # Stack: tuple - program.load_global("map") # Stack: tuple, map - program.rot_two() # Stack: map, tuple - program.call_function_var(0) # Stack: list (mapping arguments to types) - # Loop over each pair. - program.get_iter() # Stack: iter - program.for_iter() # Stack: iter, (argument, type) - program.unpack_sequence(2) # Stack: iter, type, argument - program.dup_top() # Stack: iter, type, argument, argument - program.load_const(None) # Stack: iter, type, argument, argument, None - program.compare_op("is") # Stack: iter, type, argument, result - # Missing argument? - program.jump_to_label(0, "present") - program.pop_top() # Stack: iter, type, argument - program.pop_top() # Stack: iter, type - program.pop_top() # Stack: iter - program.load_const(0) # Stack: iter, 0 - - if method_is_static: - program.store_fast(1) # Stack: iter (found = 0) - else: - program.store_fast(2) # Stack: iter (found = 0) - - program.break_loop() - # Argument was present. - program.start_label("present") - program.pop_top() # Stack: iter, type, argument - program.rot_two() # Stack: iter, argument, type - program.dup_top() # Stack: iter, argument, type, type - program.load_const(None) # Stack: iter, argument, type, type, None - program.compare_op("is") # Stack: iter, argument, type, result - # Missing parameter type? - program.jump_to_label(0, "present") - program.pop_top() # Stack: iter, argument, type - program.pop_top() # Stack: iter, argument - program.pop_top() # Stack: iter - program.load_const(0) # Stack: iter, 0 - - if method_is_static: - program.store_fast(1) # Stack: iter (found = 0) - else: - program.store_fast(2) # Stack: iter (found = 0) - - program.break_loop() - # Parameter was present. - program.start_label("present") - program.pop_top() # Stack: iter, argument, type - program.build_tuple(2) # Stack: iter, (argument, type) - program.load_global("isinstance") # Stack: iter, (argument, type), isinstance - program.rot_two() # Stack: iter, isinstance, (argument, type) - program.call_function_var(0) # Stack: iter, result - program.jump_to_label(1, "match") - program.pop_top() # Stack: iter - program.load_const(0) # Stack: iter, 0 - - if method_is_static: - program.store_fast(1) # Stack: iter (found = 0) - else: - program.store_fast(2) # Stack: iter (found = 0) - - program.break_loop() - # Argument type and parameter type matched. - program.start_label("match") - program.pop_top() # Stack: iter - program.end_loop() # Stack: - # If all the parameters matched, call the method. - - if method_is_static: - program.load_fast(1) # Stack: match - else: - program.load_fast(2) # Stack: match - - program.jump_to_label(0, "failed") - # All the parameters matched. - program.pop_top() # Stack: - - if method_is_static: - program.load_fast(0) # Stack: arguments - program.load_global(str(self.class_file.this_class.get_python_name())) - # Stack: arguments, class - else: - program.load_fast(1) # Stack: arguments - program.load_fast(0) # Stack: arguments, self - - program.load_attr(str(method.get_python_name())) - # Stack: arguments, method - program.rot_two() # Stack: method, arguments - program.call_function_var(0) # Stack: result - program.return_value() - # Try the next method if arguments or parameters were missing or incorrect. - program.start_label("failed") - program.pop_top() # Stack: - - # Raise an exception if nothing matched. - # NOTE: Improve this. - - program.load_const("No matching method") - program.raise_varargs(1) - program.load_const(None) - program.return_value() - - # Add the code as a method in the namespace. - # NOTE: One actual parameter, flags as 71 apparently means that a list - # NOTE: parameter is used in a method. - - if method_is_static: - nargs = 0 - else: - nargs = 1 - nlocals = program.max_locals + 1 - - code = new.code(nargs, nlocals, program.max_stack_depth, 71, program.get_output(), - tuple(program.get_constants()), tuple(program.get_names()), tuple(self.make_varnames(nlocals, method_is_static)), - self.filename, method_name, 0, "") - fn = new.function(code, global_names) - - if method_is_static: - fn = staticmethod(fn) - - namespace[method_name] = fn - - def process(self, global_names): - - """ - Process the class, storing it in the 'global_names' dictionary provided. - Return a tuple containing the class and a list of external names - referenced by the class's methods. - """ - - namespace = {} - - # Make the fields. - - for field in self.class_file.fields: - if classfile.has_flags(field.access_flags, [classfile.STATIC]): - field_name = str(field.get_python_name()) - namespace[field_name] = None - - # Make the methods. - - real_methods = {} - external_names = [] - - for method in self.class_file.methods: - real_method_name = str(method.get_name()) - method_name = str(method.get_python_name()) - - translator, writer = self.translate_method(method) - - # Add external names to the master list. - - for external_name in writer.external_names: - if external_name not in external_names: - external_names.append(external_name) - - # Fix up special class initialisation methods and static methods. - - method_is_static = real_method_name != "" and classfile.has_flags(method.access_flags, [classfile.STATIC]) - if method_is_static: - nargs = len(method.get_descriptor()[0]) - else: - nargs = len(method.get_descriptor()[0]) + 1 - nlocals = writer.max_locals + 1 - flags = 67 - - # NOTE: Add line number table later. - - code = new.code(nargs, nlocals, writer.max_stack_depth, flags, writer.get_output(), - tuple(writer.get_constants()), tuple(writer.get_names()), - tuple(self.make_varnames(nlocals, method_is_static)), self.filename, method_name, 0, "") - - # NOTE: May need more globals. - - fn = new.function(code, global_names) - - # Fix up special class initialisation methods and static methods. - - if method_is_static: - fn = staticmethod(fn) - - # Remember the real method name and the corresponding methods produced. - - if not real_methods.has_key(real_method_name): - real_methods[real_method_name] = [] - real_methods[real_method_name].append((method, fn)) - - # Add the method to the class's namespace. - - namespace[method_name] = fn - - # Define superclasses. - - bases = self.get_base_classes(global_names) - - # Define method dispatchers. - - for real_method_name, methods in real_methods.items(): - if real_method_name != "": - self.make_method(real_method_name, methods, global_names, namespace) - - # Use only the last part of the fully qualified name. - - full_class_name = str(self.class_file.this_class.get_python_name()) - class_name = full_class_name.split(".")[-1] - cls = new.classobj(class_name, bases, namespace) - global_names[cls.__name__] = cls - - return cls, external_names - - def get_base_classes(self, global_names): - - """ - Identify the superclass, then either load it from the given - 'global_names' if available, or import the class from its parent module. - Return a tuple containing all base classes (typically a single element - tuple). - """ - - original_name = str(self.class_file.super_class.get_name()) - full_this_class_name = str(self.class_file.this_class.get_python_name()) - this_class_name_parts = full_this_class_name.split(".") - this_class_module_name = ".".join(this_class_name_parts[:-1]) - full_super_class_name = str(self.class_file.super_class.get_python_name()) - super_class_name_parts = full_super_class_name.split(".") - super_class_name = super_class_name_parts[-1] - super_class_module_name = ".".join(super_class_name_parts[:-1]) - if super_class_module_name == "": - obj = global_names[super_class_name] - elif super_class_module_name == this_class_module_name: - obj = global_names[super_class_name] - else: - #print "Importing", super_class_module_name, super_class_name - obj = __import__(super_class_module_name, global_names, {}, []) - for super_class_name_part in super_class_name_parts[1:] or [super_class_name]: - #print "*", obj, super_class_name_part - obj = getattr(obj, super_class_name_part) - return (obj,) - - def make_varnames(self, nlocals, method_is_static=0): - - """ - A utility method which invents variable names for the given number - - 'nlocals' - of local variables in a method. Returns a list of such - variable names. - - If the optional 'method_is_static' is set to true, do not use "self" as - the first argument name. - """ - - if method_is_static: - l = ["cls"] - else: - l = ["self"] - for i in range(1, nlocals): - l.append("_l%s" % i) - return l[:nlocals] - -# Test functions, useful for tracing generated bytecode operations. - -def _map(*args): - print args - return apply(__builtins__.map, args) - -def _isinstance(*args): - print args - return apply(__builtins__.isinstance, args) - -if __name__ == "__main__": - import sys - import dis - global_names = globals() - #global_names["isinstance"] = _isinstance - #global_names["map"] = _map - for filename in sys.argv[1:]: - f = open(filename, "rb") - c = classfile.ClassFile(f.read()) - translator = ClassTranslator(c) - cls, external_names = translator.process(global_names) - -# vim: tabstop=4 expandtab shiftwidth=4 diff -r 901001c30474 -r 754d36821fc8 classfile.py --- a/classfile.py Fri Jan 21 17:04:41 2005 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,633 +0,0 @@ -#!/usr/bin/env python - -""" -Java class file decoder. Specification found at the following URL: -http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html -""" - -import struct # for general decoding of class files - -# Utility functions. - -def u1(data): - return struct.unpack(">B", data[0:1])[0] - -def u2(data): - return struct.unpack(">H", data[0:2])[0] - -def s2(data): - return struct.unpack(">h", data[0:2])[0] - -def u4(data): - return struct.unpack(">L", data[0:4])[0] - -def s4(data): - return struct.unpack(">l", data[0:4])[0] - -def s8(data): - return struct.unpack(">q", data[0:8])[0] - -def f4(data): - return struct.unpack(">f", data[0:4])[0] - -def f8(data): - return struct.unpack(">d", data[0:8])[0] - -# Useful tables and constants. - -descriptor_base_type_mapping = { - "B" : "int", - "C" : "str", - "D" : "float", - "F" : "float", - "I" : "int", - "J" : "int", - "L" : "object", - "S" : "int", - "Z" : "bool", - "[" : "list" - } - -PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ -0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 - -def has_flags(flags, desired): - desired_flags = reduce(lambda a, b: a | b, desired, 0) - return (flags & desired_flags) == desired_flags - -# Useful mix-ins. - -class PythonMethodUtils: - symbol_sep = "___" # was "$" - type_sep = "__" # replaces "/" - array_sep = "_array_" # was "[]" - base_seps = ("_", "_") # was "<" and ">" - - def get_unqualified_python_name(self): - name = self.get_name() - if str(name) == "": - return "__init__" - elif str(name) == "": - return "__clinit__" - else: - return str(name) - - def get_python_name(self): - name = self.get_unqualified_python_name() - if name == "__clinit__": - return name - return name + self.symbol_sep + self._get_descriptor_as_name() - - def _get_descriptor_as_name(self): - l = [] - for descriptor_type in self.get_descriptor()[0]: - l.append(self._get_type_as_name(descriptor_type)) - return self.symbol_sep.join(l) - - def _get_type_as_name(self, descriptor_type, s=""): - base_type, object_type, array_type = descriptor_type - if base_type == "L": - return object_type.replace("/", self.type_sep) + s - elif base_type == "[": - return self._get_type_as_name(array_type, s + self.array_sep) - else: - return self.base_seps[0] + base_type + self.base_seps[1] + s - -class PythonNameUtils: - def get_python_name(self): - # NOTE: This may not be comprehensive. - if not str(self.get_name()).startswith("["): - return str(self.get_name()).replace("/", ".") - else: - return self._get_type_name( - get_field_descriptor( - str(self.get_name()) - ) - ).replace("/", ".") - - def _get_type_name(self, descriptor_type): - base_type, object_type, array_type = descriptor_type - if base_type == "L": - return object_type - elif base_type == "[": - return self._get_type_name(array_type) - else: - return descriptor_base_type_mapping[base_type] - -class NameUtils: - def get_name(self): - if self.name_index != 0: - return self.class_file.constants[self.name_index - 1] - else: - # Some name indexes are zero to indicate special conditions. - return None - -class NameAndTypeUtils: - def get_name(self): - if self.name_and_type_index != 0: - return self.class_file.constants[self.name_and_type_index - 1].get_name() - else: - # Some name indexes are zero to indicate special conditions. - return None - - def get_field_descriptor(self): - if self.name_and_type_index != 0: - return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() - else: - # Some name indexes are zero to indicate special conditions. - return None - - def get_method_descriptor(self): - if self.name_and_type_index != 0: - return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() - else: - # Some name indexes are zero to indicate special conditions. - return None - - def get_class(self): - return self.class_file.constants[self.class_index - 1] - -# Symbol parsing. - -def get_method_descriptor(s): - assert s[0] == "(" - params = [] - s = s[1:] - while s[0] != ")": - parameter_descriptor, s = _get_parameter_descriptor(s) - params.append(parameter_descriptor) - if s[1] != "V": - return_type, s = _get_field_type(s[1:]) - else: - return_type, s = None, s[1:] - return params, return_type - -def get_field_descriptor(s): - return _get_field_type(s)[0] - -def _get_parameter_descriptor(s): - return _get_field_type(s) - -def _get_component_type(s): - return _get_field_type(s) - -def _get_field_type(s): - base_type, s = _get_base_type(s) - object_type = None - array_type = None - if base_type == "L": - object_type, s = _get_object_type(s) - elif base_type == "[": - array_type, s = _get_array_type(s) - return (base_type, object_type, array_type), s - -def _get_base_type(s): - if len(s) > 0: - return s[0], s[1:] - else: - return None, s - -def _get_object_type(s): - if len(s) > 0: - s_end = s.find(";") - assert s_end != -1 - return s[:s_end], s[s_end+1:] - else: - return None, s - -def _get_array_type(s): - if len(s) > 0: - return _get_component_type(s) - else: - return None, s - -# Constant information. - -class ClassInfo(NameUtils, PythonNameUtils): - def init(self, data, class_file): - self.class_file = class_file - self.name_index = u2(data[0:2]) - return data[2:] - -class RefInfo(NameAndTypeUtils): - def init(self, data, class_file): - self.class_file = class_file - self.class_index = u2(data[0:2]) - self.name_and_type_index = u2(data[2:4]) - return data[4:] - -class FieldRefInfo(RefInfo, PythonNameUtils): - def get_descriptor(self): - return RefInfo.get_field_descriptor(self) - -class MethodRefInfo(RefInfo, PythonMethodUtils): - def get_descriptor(self): - return RefInfo.get_method_descriptor(self) - -class InterfaceMethodRefInfo(MethodRefInfo): - pass - -class NameAndTypeInfo(NameUtils, PythonNameUtils): - def init(self, data, class_file): - self.class_file = class_file - self.name_index = u2(data[0:2]) - self.descriptor_index = u2(data[2:4]) - return data[4:] - - def get_field_descriptor(self): - return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) - - def get_method_descriptor(self): - return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) - -class Utf8Info: - def init(self, data, class_file): - self.class_file = class_file - self.length = u2(data[0:2]) - self.bytes = data[2:2+self.length] - return data[2+self.length:] - - def __str__(self): - return self.bytes - - def __unicode__(self): - return unicode(self.bytes, "utf-8") - - def get_value(self): - return str(self) - -class StringInfo: - def init(self, data, class_file): - self.class_file = class_file - self.string_index = u2(data[0:2]) - return data[2:] - - def __str__(self): - return str(self.class_file.constants[self.string_index - 1]) - - def __unicode__(self): - return unicode(self.class_file.constants[self.string_index - 1]) - - def get_value(self): - return str(self) - -class SmallNumInfo: - def init(self, data, class_file): - self.class_file = class_file - self.bytes = data[0:4] - return data[4:] - -class IntegerInfo(SmallNumInfo): - def get_value(self): - return s4(self.bytes) - -class FloatInfo(SmallNumInfo): - def get_value(self): - return f4(self.bytes) - -class LargeNumInfo: - def init(self, data, class_file): - self.class_file = class_file - self.high_bytes = data[0:4] - self.low_bytes = data[4:8] - return data[8:] - -class LongInfo(LargeNumInfo): - def get_value(self): - return s8(self.high_bytes + self.low_bytes) - -class DoubleInfo(LargeNumInfo): - def get_value(self): - return f8(self.high_bytes + self.low_bytes) - -# Other information. -# Objects of these classes are generally aware of the class they reside in. - -class ItemInfo(NameUtils): - def init(self, data, class_file): - self.class_file = class_file - self.access_flags = u2(data[0:2]) - self.name_index = u2(data[2:4]) - self.descriptor_index = u2(data[4:6]) - self.attributes, data = self.class_file._get_attributes(data[6:]) - return data - -class FieldInfo(ItemInfo, PythonNameUtils): - def get_descriptor(self): - return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) - -class MethodInfo(ItemInfo, PythonMethodUtils): - def get_descriptor(self): - return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) - -class AttributeInfo: - def init(self, data, class_file): - self.attribute_length = u4(data[0:4]) - self.info = data[4:4+self.attribute_length] - return data[4+self.attribute_length:] - -# NOTE: Decode the different attribute formats. - -class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): - def init(self, data, class_file): - self.class_file = class_file - self.attribute_length = u4(data[0:4]) - # Permit the NameUtils mix-in. - self.name_index = self.sourcefile_index = u2(data[4:6]) - return data[6:] - -class ConstantValueAttributeInfo(AttributeInfo): - def init(self, data, class_file): - self.class_file = class_file - self.attribute_length = u4(data[0:4]) - self.constant_value_index = u2(data[4:6]) - assert 4+self.attribute_length == 6 - return data[4+self.attribute_length:] - - def get_value(self): - return self.class_file.constants[self.constant_value_index - 1].get_value() - -class CodeAttributeInfo(AttributeInfo): - def init(self, data, class_file): - self.class_file = class_file - self.attribute_length = u4(data[0:4]) - self.max_stack = u2(data[4:6]) - self.max_locals = u2(data[6:8]) - self.code_length = u4(data[8:12]) - end_of_code = 12+self.code_length - self.code = data[12:end_of_code] - self.exception_table_length = u2(data[end_of_code:end_of_code+2]) - self.exception_table = [] - data = data[end_of_code + 2:] - for i in range(0, self.exception_table_length): - exception = ExceptionInfo() - data = exception.init(data) - self.exception_table.append(exception) - self.attributes, data = self.class_file._get_attributes(data) - return data - -class ExceptionsAttributeInfo(AttributeInfo): - def init(self, data, class_file): - self.class_file = class_file - self.attribute_length = u4(data[0:4]) - self.number_of_exceptions = u2(data[4:6]) - self.exception_index_table = [] - index = 6 - for i in range(0, self.number_of_exceptions): - self.exception_index_table.append(u2(data[index:index+2])) - index += 2 - return data[index:] - - def get_exception(self, i): - exception_index = self.exception_index_table[i] - return self.class_file.constants[exception_index - 1] - -class InnerClassesAttributeInfo(AttributeInfo): - def init(self, data, class_file): - self.class_file = class_file - self.attribute_length = u4(data[0:4]) - self.number_of_classes = u2(data[4:6]) - self.classes = [] - data = data[6:] - for i in range(0, self.number_of_classes): - inner_class = InnerClassInfo() - data = inner_class.init(data, self.class_file) - self.classes.append(inner_class) - return data - -class SyntheticAttributeInfo(AttributeInfo): - pass - -class LineNumberAttributeInfo(AttributeInfo): - def init(self, data, class_file): - self.class_file = class_file - self.attribute_length = u4(data[0:4]) - self.line_number_table_length = u2(data[4:6]) - self.line_number_table = [] - data = data[6:] - for i in range(0, self.line_number_table_length): - line_number = LineNumberInfo() - data = line_number.init(data) - self.line_number_table.append(line_number) - return data - -class LocalVariableAttributeInfo(AttributeInfo): - def init(self, data, class_file): - self.class_file = class_file - self.attribute_length = u4(data[0:4]) - self.local_variable_table_length = u2(data[4:6]) - self.local_variable_table = [] - data = data[6:] - for i in range(0, self.local_variable_table_length): - local_variable = LocalVariableInfo() - data = local_variable.init(data, self.class_file) - self.local_variable_table.append(local_variable) - return data - -class DeprecatedAttributeInfo(AttributeInfo): - pass - -# Child classes of the attribute information classes. - -class ExceptionInfo: - def init(self, data): - self.start_pc = u2(data[0:2]) - self.end_pc = u2(data[2:4]) - self.handler_pc = u2(data[4:6]) - self.catch_type = u2(data[6:8]) - return data[8:] - -class InnerClassInfo(NameUtils): - def init(self, data, class_file): - self.class_file = class_file - self.inner_class_info_index = u2(data[0:2]) - self.outer_class_info_index = u2(data[2:4]) - # Permit the NameUtils mix-in. - self.name_index = self.inner_name_index = u2(data[4:6]) - self.inner_class_access_flags = u2(data[6:8]) - return data[8:] - -class LineNumberInfo: - def init(self, data): - self.start_pc = u2(data[0:2]) - self.line_number = u2(data[2:4]) - return data[4:] - -class LocalVariableInfo(NameUtils, PythonNameUtils): - def init(self, data, class_file): - self.class_file = class_file - self.start_pc = u2(data[0:2]) - self.length = u2(data[2:4]) - self.name_index = u2(data[4:6]) - self.descriptor_index = u2(data[6:8]) - self.index = u2(data[8:10]) - return data[10:] - - def get_descriptor(self): - return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) - -# Exceptions. - -class UnknownTag(Exception): - pass - -class UnknownAttribute(Exception): - pass - -# Abstractions for the main structures. - -class ClassFile: - - "A class representing a Java class file." - - def __init__(self, s): - - """ - Process the given string 's', populating the object with the class - file's details. - """ - - self.constants, s = self._get_constants(s[8:]) - self.access_flags, s = self._get_access_flags(s) - self.this_class, s = self._get_this_class(s) - self.super_class, s = self._get_super_class(s) - self.interfaces, s = self._get_interfaces(s) - self.fields, s = self._get_fields(s) - self.methods, s = self._get_methods(s) - self.attributes, s = self._get_attributes(s) - - def _decode_const(self, s): - tag = u1(s[0:1]) - if tag == 1: - const = Utf8Info() - elif tag == 3: - const = IntegerInfo() - elif tag == 4: - const = FloatInfo() - elif tag == 5: - const = LongInfo() - elif tag == 6: - const = DoubleInfo() - elif tag == 7: - const = ClassInfo() - elif tag == 8: - const = StringInfo() - elif tag == 9: - const = FieldRefInfo() - elif tag == 10: - const = MethodRefInfo() - elif tag == 11: - const = InterfaceMethodRefInfo() - elif tag == 12: - const = NameAndTypeInfo() - else: - raise UnknownTag, tag - - # Initialise the constant object. - - s = const.init(s[1:], self) - return const, s - - def _get_constants_from_table(self, count, s): - l = [] - # Have to skip certain entries specially. - i = 1 - while i < count: - c, s = self._decode_const(s) - l.append(c) - # Add a blank entry after "large" entries. - if isinstance(c, LargeNumInfo): - l.append(None) - i += 1 - i += 1 - return l, s - - def _get_items_from_table(self, cls, number, s): - l = [] - for i in range(0, number): - f = cls() - s = f.init(s, self) - l.append(f) - return l, s - - def _get_methods_from_table(self, number, s): - return self._get_items_from_table(MethodInfo, number, s) - - def _get_fields_from_table(self, number, s): - return self._get_items_from_table(FieldInfo, number, s) - - def _get_attribute_from_table(self, s): - attribute_name_index = u2(s[0:2]) - constant_name = self.constants[attribute_name_index - 1].bytes - if constant_name == "SourceFile": - attribute = SourceFileAttributeInfo() - elif constant_name == "ConstantValue": - attribute = ConstantValueAttributeInfo() - elif constant_name == "Code": - attribute = CodeAttributeInfo() - elif constant_name == "Exceptions": - attribute = ExceptionsAttributeInfo() - elif constant_name == "InnerClasses": - attribute = InnerClassesAttributeInfo() - elif constant_name == "Synthetic": - attribute = SyntheticAttributeInfo() - elif constant_name == "LineNumberTable": - attribute = LineNumberAttributeInfo() - elif constant_name == "LocalVariableTable": - attribute = LocalVariableAttributeInfo() - elif constant_name == "Deprecated": - attribute = DeprecatedAttributeInfo() - else: - raise UnknownAttribute, constant_name - s = attribute.init(s[2:], self) - return attribute, s - - def _get_attributes_from_table(self, number, s): - attributes = [] - for i in range(0, number): - attribute, s = self._get_attribute_from_table(s) - attributes.append(attribute) - return attributes, s - - def _get_constants(self, s): - count = u2(s[0:2]) - return self._get_constants_from_table(count, s[2:]) - - def _get_access_flags(self, s): - return u2(s[0:2]), s[2:] - - def _get_this_class(self, s): - index = u2(s[0:2]) - return self.constants[index - 1], s[2:] - - _get_super_class = _get_this_class - - def _get_interfaces(self, s): - interfaces = [] - number = u2(s[0:2]) - s = s[2:] - for i in range(0, number): - index = u2(s[0:2]) - interfaces.append(self.constants[index - 1]) - s = s[2:] - return interfaces, s - - def _get_fields(self, s): - number = u2(s[0:2]) - return self._get_fields_from_table(number, s[2:]) - - def _get_attributes(self, s): - number = u2(s[0:2]) - return self._get_attributes_from_table(number, s[2:]) - - def _get_methods(self, s): - number = u2(s[0:2]) - return self._get_methods_from_table(number, s[2:]) - -if __name__ == "__main__": - import sys - f = open(sys.argv[1], "rb") - c = ClassFile(f.read()) - f.close() - -# vim: tabstop=4 expandtab shiftwidth=4 diff -r 901001c30474 -r 754d36821fc8 classhook.py --- a/classhook.py Fri Jan 21 17:04:41 2005 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,384 +0,0 @@ -#!/usr/bin/env python - -import ihooks # for the import machinery -import os, glob # for getting suitably-named files -from imp import PY_SOURCE, PKG_DIRECTORY, C_BUILTIN # import machinery magic -import classfile, bytecode # Java class support -import zipfile # for Java archive inspection - -# NOTE: Arbitrary constants pulled from thin air. - -JAVA_PACKAGE = 20041113 -JAVA_CLASS = 20041114 -JAVA_ARCHIVE = 20041115 - -class ClassHooks(ihooks.Hooks): - - "A filesystem hooks class providing information about supported files." - - def get_suffixes(self): - - "Return the recognised suffixes." - - return [("", "", JAVA_PACKAGE), (os.extsep + "jar", "r", JAVA_ARCHIVE)] + ihooks.Hooks.get_suffixes(self) - - def path_isdir(self, x, archive=None): - - "Return whether 'x' is a directory in the given 'archive'." - - if archive is None: - return ihooks.Hooks.path_isdir(self, x) - - return self._get_dirname(x) in archive.namelist() - - def _get_dirname(self, x): - - """ - Return the directory name for 'x'. - In zip files, the presence of "/" seems to indicate a directory. - """ - - if x.endswith("/"): - return x - else: - return x + "/" - - def listdir(self, x, archive=None): - - "Return the contents of the directory 'x' in the given 'archive'." - - if archive is None: - return ihooks.Hooks.listdir(self, x) - - x = self._get_dirname(x) - l = [] - for path in archive.namelist(): - - # Find out if the path is within the given directory. - - if path != x and path.startswith(x): - - # Get the path below the given directory. - - subpath = path[len(x):] - - # Find out whether the path is an object in the current directory. - - if subpath.count("/") == 0 or subpath.count("/") == 1 and subpath.endswith("/"): - l.append(subpath) - - return l - - def matching(self, dir, extension, archive=None): - - """ - Return the matching files in the given directory 'dir' having the given - 'extension' within the given 'archive'. Produce a list containing full - paths as opposed to simple filenames. - """ - - if archive is None: - return glob.glob(self.path_join(dir, "*" + extension)) - - dir = self._get_dirname(dir) - l = [] - for path in self.listdir(dir, archive): - if path.endswith(extension): - l.append(self.path_join(dir, path)) - return l - - def read(self, filename, archive=None): - - """ - Return the contents of the file with the given 'filename' in the given - 'archive'. - """ - - if archive is None: - f = open(filename, "rb") - s = f.read() - f.close() - return s - return archive.read(filename) - -class ClassLoader(ihooks.ModuleLoader): - - "A class providing support for searching directories for supported files." - - def find_module(self, name, path=None): - - """ - Find the module with the given 'name', using the given 'path' to locate - it. Note that ModuleLoader.find_module is almost sufficient, but does - not provide enough support for "package unions" where the root of a - package hierarchy may appear in several places. - - Return a list of locations (each being the "stuff" data structure used - by load_module); this replaces the single "stuff" value or None returned - by ModuleLoader.find_module. - """ - - if path is None: - path = [None] + self.default_path() - - found_locations = [] - - for dir in path: - stuff = self.find_module_in_dir(name, dir) - if stuff: - found_locations.append(stuff) - - return found_locations - - def find_module_in_dir(self, name, dir, allow_packages=1): - - """ - Find the module with the given 'name' in the given directory 'dir'. - Since Java packages/modules are directories containing class files, - return the required information tuple only when the path constructed - from 'dir' and 'name' refers to a directory containing class files. - """ - - result = ihooks.ModuleLoader.find_module_in_dir(self, name, dir, allow_packages) - if result is not None: - return result - - # An archive may be opened. - - archive = None - - # Provide a special name for the current directory. - - if name == "__this__": - if dir == None: - return (None, ".", ("", "", JAVA_PACKAGE)) - else: - return None - - # Where no directory is given, return failure immediately. - - elif dir is None: - return None - - # Detect archives. - - else: - archive, archive_path, path = self._get_archive_and_path(dir, name) - - #print "Processing name", name, "in", dir, "producing", path, "within archive", archive - - if self._find_module_at_path(path, archive): - if archive is not None: - return (archive, archive_path + ":" + path, (os.extsep + "jar", "r", JAVA_ARCHIVE)) - else: - return (None, path, ("", "", JAVA_PACKAGE)) - else: - return None - - def _get_archive_and_path(self, dir, name): - parts = dir.split(":") - archive_path = parts[0] - - # Archives may include an internal path, but will in any case have - # a primary part ending in .jar. - - if archive_path.endswith(os.extsep + "jar"): - archive = zipfile.ZipFile(archive_path, "r") - path = self.hooks.path_join(":".join(parts[1:]), name) - - # Otherwise, produce a filesystem-based path. - - else: - archive = None - path = self.hooks.path_join(dir, name) - - return archive, archive_path, path - - def _get_path_in_archive(self, path): - parts = path.split(":") - if len(parts) == 1: - return parts[0] - else: - return ":".join(parts[1:]) - - def _find_module_at_path(self, path, archive): - if self.hooks.path_isdir(path, archive): - #print "Looking in", path, "using archive", archive - - # Look for classes in the directory. - - if len(self.hooks.matching(path, os.extsep + "class", archive)) != 0: - return 1 - - # Otherwise permit importing where directories containing classes exist. - - #print "Filenames are", self.hooks.listdir(path, archive) - for filename in self.hooks.listdir(path, archive): - pathname = self.hooks.path_join(path, filename) - result = self._find_module_at_path(pathname, archive) - if result is not None: - return result - - return 0 - - def load_module(self, name, stuff): - - """ - Load the module with the given 'name', with a list of 'stuff' items, - each of which describes the location of the module and is a tuple of the - form (file, filename, (suffix, mode, data type)). - - Return a module object or raise an ImportError if a problem occurred in - the import operation. - - Note that the 'stuff' parameter is a list and not a single item as in - ModuleLoader.load_module. This should still work, however, since the - find_module method produces such a list. - """ - - # Set up the module. - # A union of all locations is placed in the module's path. - - module = self.hooks.add_module(name) - module.__path__ = [item_filename for (item_archive, item_filename, item_info) in stuff] - - # Just go into each package and find the class files. - - for stuff_item in stuff: - - # Extract the details, delegating loading responsibility to the - # default loader where appropriate. - # NOTE: Should we not be using some saved loader remembered upon - # NOTE: installation? - - archive, filename, info = stuff_item - suffix, mode, datatype = info - if datatype not in (JAVA_PACKAGE, JAVA_ARCHIVE): - return ihooks.ModuleLoader.load_module(self, name, stuff_item) - - #print "Loading", archive, filename, info - - # Prepare a dictionary of globals. - - global_names = module.__dict__ - global_names["__builtins__"] = __builtins__ - - # Get the real filename. - - filename = self._get_path_in_archive(filename) - #print "Real filename", filename - - # Load the class files. - - class_files = {} - for class_filename in self.hooks.matching(filename, os.extsep + "class", archive): - #print "Loading class", class_filename - s = self.hooks.read(class_filename, archive) - class_file = classfile.ClassFile(s) - class_files[str(class_file.this_class.get_name())] = class_file - - # Get an index of the class files. - - class_file_index = class_files.keys() - - # NOTE: Unnecessary sorting for test purposes. - - class_file_index.sort() - - # Now go through the classes arranging them in a safe loading order. - - position = 0 - while position < len(class_file_index): - class_name = class_file_index[position] - super_class_name = str(class_files[class_name].super_class.get_name()) - - # Discover whether the superclass appears later. - - try: - super_class_position = class_file_index.index(super_class_name) - if super_class_position > position: - - # If the superclass appears later, swap this class and the - # superclass, then process the superclass. - - class_file_index[position] = super_class_name - class_file_index[super_class_position] = class_name - continue - - except ValueError: - pass - - position += 1 - - # Process each class file, producing a genuine Python class. - # Create the classes, but establish a proper initialisation order. - - class_file_init_index = [] - class_file_init = {} - - for class_name in class_file_index: - #print "* Class", class_name - class_file = class_files[class_name] - translator = bytecode.ClassTranslator(class_file) - cls, external_names = translator.process(global_names) - module.__dict__[cls.__name__] = cls - - # Process external names. - - this_class_name_parts = class_file.this_class.get_python_name().split(".") - this_class_module, this_class_name = this_class_name_parts[:-1], this_class_name_parts[-1] - - for external_name in external_names: - #print "* Name", external_name - external_name_parts = external_name.split(".") - external_class_module, external_class_name = external_name_parts[:-1], external_name_parts[-1] - - # Names not local to this package need importing. - - if len(external_name_parts) > 1 and this_class_module != external_class_module: - - external_module_name = ".".join(external_class_module) - #print "* Importing", external_module_name - obj = __import__(external_module_name, global_names, {}, []) - global_names[external_name_parts[0]] = obj - - # Names local to this package may affect initialisation order. - - elif external_class_name not in class_file_init_index: - try: - this_class_name_index = class_file_init_index.index(this_class_name) - - # Either insert this name before the current class's - # name. - - #print "* Inserting", external_class_name - class_file_init_index.insert(this_class_name_index, external_class_name) - - except ValueError: - - # Or add this name in anticipation of the current - # class's name appearing. - - #print "* Including", external_class_name - class_file_init_index.append(external_class_name) - - # Add this class name to the initialisation index. - - if class_name not in class_file_init_index: - class_file_init_index.append(this_class_name) - class_file_init[this_class_name] = (cls, class_file) - - # Finally, call __clinit__ methods for all relevant classes. - - #print "** Initialisation order", class_file_init_index - for class_name in class_file_init_index: - cls, class_file = class_file_init[class_name] - #print "**", cls, class_file - if hasattr(cls, "__clinit__"): - eval(cls.__clinit__.func_code, global_names) - - return module - -ihooks.ModuleImporter(loader=ClassLoader(hooks=ClassHooks())).install() - -# vim: tabstop=4 expandtab shiftwidth=4 diff -r 901001c30474 -r 754d36821fc8 runclass.py --- a/runclass.py Fri Jan 21 17:04:41 2005 +0100 +++ b/runclass.py Fri Jan 21 17:05:03 2005 +0100 @@ -2,7 +2,7 @@ "A program to run Java class files." -import classhook +import javaclass.classhook import java.lang def load_class(class_name): diff -r 901001c30474 -r 754d36821fc8 setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/setup.py Fri Jan 21 17:05:03 2005 +0100 @@ -0,0 +1,14 @@ +#! /usr/bin/env python + +from distutils.core import setup + +setup( + name = "ClassFile", + description = "A Java class and package importer and utilities.", + author = "Paul Boddie", + author_email = "paul@boddie.org.uk", + url = "http://www.boddie.org.uk/python/ClassFile.html", + version = "0.1", + packages = ["java", "javaclass"], + scripts = ["runclass"] + )