javaclass

Changeset

11:4184dc282002
2004-11-09 Paul Boddie raw files shortlog changelog graph Added supposedly improved exception handling in order to use Python VM features. The problem is that the JVM athrow instruction is dynamic and best translates to the Python VM RAISE_VARARGS instruction. However, in order to support RAISE_VARARGS, the SETUP_EXCEPT, SETUP_FINALLY, POP_BLOCK and END_FINALLY instructions are also required, yet the JVM supports try... catch...finally (whereas the Python VM only supports try...except and try...finally), and anticipates finally handling using very low-level subroutine calling and arcane usage of local variables. Changed the result of the translate convenience function and the parameter specification of the translate and disassemble functions. Fixed the get_value method in LazySubValue and the LazyValue portion of the _write_value method in BytecodeWriter.
bytecode.py (file)
     1.1 --- a/bytecode.py	Tue Nov 09 00:51:39 2004 +0100
     1.2 +++ b/bytecode.py	Tue Nov 09 00:59:16 2004 +0100
     1.3 @@ -17,9 +17,19 @@
     1.4      "A Python bytecode writer."
     1.5  
     1.6      def __init__(self):
     1.7 -        self.loops = []
     1.8 +        # A stack of loop block or exception block start positions.
     1.9 +        self.blocks = []
    1.10 +
    1.11 +        # A stack of exception block handler pointers.
    1.12 +        self.exception_handlers = []
    1.13 +
    1.14 +        # A dictionary mapping labels to jump instructions referencing such labels.
    1.15          self.jumps = {}
    1.16 +
    1.17 +        # The output values, including "lazy" subvalues which will need evaluating.
    1.18          self.output = []
    1.19 +
    1.20 +        # The current Python bytecode instruction position.
    1.21          self.position = 0
    1.22  
    1.23          # Stack depth estimation.
    1.24 @@ -91,6 +101,7 @@
    1.25              # NOTE: Assume a 16-bit value.
    1.26              self.output.append(value.values[0])
    1.27              self.output.append(value.values[1])
    1.28 +            self.position += 2
    1.29          elif value <= 0xffff:
    1.30              self.output.append(value & 0xff)
    1.31              self.output.append((value & 0xff00) >> 8)
    1.32 @@ -100,15 +111,18 @@
    1.33              raise ValueError, value
    1.34  
    1.35      def setup_loop(self):
    1.36 -        self.loops.push(self.position)
    1.37 +        self.blocks.append(self.position)
    1.38          self.output.append(opmap["SETUP_LOOP"])
    1.39          self.position += 1
    1.40          self._write_value(0) # To be filled in later
    1.41  
    1.42      def end_loop(self):
    1.43 -        current_loop_start = self.loops.pop()
    1.44 +        current_loop_start = self.blocks.pop()
    1.45          self.jump_absolute(current_loop_start)
    1.46 -        self.output[current_loop_start + 1] = self.position
    1.47 +        # NOTE: Using 3 as the assumed length of the SETUP_LOOP instruction.
    1.48 +        # NOTE: 8-bit limit.
    1.49 +        self.output[current_loop_start + 1] = self.position - current_loop_start - 3
    1.50 +        self.output[current_loop_start + 2] = 0
    1.51          self.pop_block()
    1.52  
    1.53      def jump_to_label(self, status, name):
    1.54 @@ -128,7 +142,9 @@
    1.55      def start_label(self, name):
    1.56          # Fill in all jump instructions.
    1.57          for jump_instruction, following_instruction in self.jumps[name]:
    1.58 +            # NOTE: 8-bit limit.
    1.59              self.output[jump_instruction + 1] = self.position - following_instruction
    1.60 +            self.output[jump_instruction + 2] = 0
    1.61          del self.jumps[name]
    1.62  
    1.63      def load_const_ret(self, value):
    1.64 @@ -136,19 +152,32 @@
    1.65          self.load_const(value)
    1.66  
    1.67      def ret(self, index):
    1.68 -        # Previously, the constant stored on the stack by jsr/jsr_w was stored
    1.69 -        # in a local variable. In the JVM, extracting the value from the local
    1.70 -        # variable and jumping can be done at runtime. In the Python VM, any
    1.71 -        # jump target must be known in advance and written into the bytecode.
    1.72 -        self.load_fast(index)
    1.73 -        for constant in self.constants_for_exceptions:
    1.74 -            self.dup_top()              # Stack: actual-address, actual-address
    1.75 -            self.load_const(constant)   # Stack: actual-address, actual-address, suggested-address
    1.76 -            self.compare_op("==")       # Stack: actual-address, result
    1.77 -            self.jump_to_label(0, "const")
    1.78 -            self.jump_absolute(constant)
    1.79 -            self.start_label("const")
    1.80 -            self.pop_top()              # Stack: actual-address
    1.81 +        self.end_finally()
    1.82 +
    1.83 +    def setup_except(self, target):
    1.84 +        self.blocks.append(self.position)
    1.85 +        self.exception_handlers.append(target)
    1.86 +        self.output.append(opmap["SETUP_EXCEPT"])
    1.87 +        self.position += 1
    1.88 +        self._write_value(0) # To be filled in later
    1.89 +
    1.90 +    def setup_finally(self, target):
    1.91 +        self.blocks.append(self.position)
    1.92 +        self.exception_handlers.append(target)
    1.93 +        self.output.append(opmap["SETUP_FINALLY"])
    1.94 +        self.position += 1
    1.95 +        self._write_value(0) # To be filled in later
    1.96 +
    1.97 +    def end_exception(self):
    1.98 +        current_exception_start = self.blocks.pop()
    1.99 +        # Convert the "lazy" absolute value.
   1.100 +        current_exception_target = self.exception_handlers.pop()
   1.101 +        target = current_exception_target.get_value()
   1.102 +        # NOTE: Using 3 as the assumed length of the SETUP_* instruction.
   1.103 +        # NOTE: 8-bit limit.
   1.104 +        self.output[current_exception_start + 1] = target - current_exception_start - 3
   1.105 +        self.output[current_exception_start + 2] = 0
   1.106 +        # NOTE: The POP_BLOCK instruction gets slipped in before this method is called.
   1.107  
   1.108      # Complicated methods.
   1.109  
   1.110 @@ -206,7 +235,7 @@
   1.111      # Normal bytecode generators.
   1.112  
   1.113      def for_iter(self):
   1.114 -        self.loops.push(self.position)
   1.115 +        self.blocks.append(self.position)
   1.116          self.output.append(opmap["FOR_ITER"])
   1.117          self.position += 1
   1.118          self._write_value(0) # To be filled in later
   1.119 @@ -347,6 +376,10 @@
   1.120          self.output.append(opmap["POP_BLOCK"])
   1.121          self.position += 1
   1.122  
   1.123 +    def end_finally(self):
   1.124 +        self.output.append(opmap["END_FINALLY"])
   1.125 +        self.position += 1
   1.126 +
   1.127  # Utility classes and functions.
   1.128  
   1.129  class LazyDict(UserDict):
   1.130 @@ -378,8 +411,9 @@
   1.131              raise ValueError, value
   1.132      def get_value(self):
   1.133          value = 0
   1.134 -        for i in range(0, len(self.values)):
   1.135 -            value = (value << 8) + self.values.pop().value
   1.136 +        values = self.values[:]
   1.137 +        for i in range(0, len(values)):
   1.138 +            value = (value << 8) + values.pop().value
   1.139          return value
   1.140  
   1.141  class LazySubValue:
   1.142 @@ -419,13 +453,65 @@
   1.143          self.class_file = class_file
   1.144          self.position_mapping = LazyDict()
   1.145  
   1.146 -    def process(self, code, program):
   1.147 +    def process(self, code, exception_table, program):
   1.148          self.java_position = 0
   1.149 +
   1.150 +        # Produce a structure which permits fast access to exception details.
   1.151 +        exception_block_start = {}
   1.152 +        exception_block_end = {}
   1.153 +        exception_block_handler = {}
   1.154 +        reversed_exception_table = exception_table[:]
   1.155 +        reversed_exception_table.reverse()
   1.156 +
   1.157 +        # Later entries have wider coverage than earlier entries.
   1.158 +        for exception in reversed_exception_table:
   1.159 +            # Index start positions.
   1.160 +            if not exception_block_start.has_key(exception.start_pc):
   1.161 +                exception_block_start[exception.start_pc] = []
   1.162 +            exception_block_start[exception.start_pc].append(exception)
   1.163 +            # Index end positions.
   1.164 +            if not exception_block_end.has_key(exception.end_pc):
   1.165 +                exception_block_end[exception.end_pc] = []
   1.166 +            exception_block_end[exception.end_pc].append(exception)
   1.167 +            # Index handler positions.
   1.168 +            if not exception_block_handler.has_key(exception.handler_pc):
   1.169 +                exception_block_handler[exception.handler_pc] = []
   1.170 +            exception_block_handler[exception.handler_pc].append(exception)
   1.171 +
   1.172 +        # Process each instruction in the code.
   1.173          while self.java_position < len(code):
   1.174              self.position_mapping[self.java_position] = program.position
   1.175 +
   1.176 +            # Insert exception handling constructs.
   1.177 +            for exception in exception_block_start.get(self.java_position, []):
   1.178 +                # Note that the absolute position is used.
   1.179 +                if exception.catch_type == 0:
   1.180 +                    program.setup_finally(self.position_mapping[exception.handler_pc])
   1.181 +                else:
   1.182 +                    program.setup_except(self.position_mapping[exception.handler_pc])
   1.183 +
   1.184 +            # Insert exception handler end details.
   1.185 +            for exception in exception_block_end.get(self.java_position, []):
   1.186 +                program.end_exception()
   1.187 +
   1.188 +            # Where handlers are begun, do not produce equivalent bytecode since
   1.189 +            # the first handler instruction typically involves saving a local
   1.190 +            # variable that is not applicable to the Python VM.
   1.191 +            #if not exception_block_handler.get(self.java_position, []):
   1.192 +
   1.193 +            # Process the bytecode at the current position.
   1.194              bytecode = ord(code[self.java_position])
   1.195              mnemonic, number_of_arguments = self.java_bytecodes[bytecode]
   1.196 -            self.process_bytecode(mnemonic, number_of_arguments, code, program)
   1.197 +            number_of_arguments = self.process_bytecode(mnemonic, number_of_arguments, code, program)
   1.198 +            next_java_position = self.java_position + 1 + number_of_arguments
   1.199 +
   1.200 +            # Insert exception handler end instructions.
   1.201 +            for exception in exception_block_end.get(next_java_position, []):
   1.202 +                program.pop_block()
   1.203 +
   1.204 +            # Only advance the JVM position after sneaking in extra Python
   1.205 +            # instructions.
   1.206 +            self.java_position = next_java_position
   1.207  
   1.208      def process_bytecode(self, mnemonic, number_of_arguments, code, program):
   1.209          if number_of_arguments is not None:
   1.210 @@ -435,11 +521,10 @@
   1.211  
   1.212              # Call the handler.
   1.213              getattr(self, mnemonic)(arguments, program)
   1.214 +            return number_of_arguments
   1.215          else:
   1.216              # Call the handler.
   1.217 -            number_of_arguments = getattr(self, mnemonic)(code[self.java_position+1:], program)
   1.218 -
   1.219 -        self.java_position = self.java_position + 1 + number_of_arguments
   1.220 +            return getattr(self, mnemonic)(code[self.java_position+1:], program)
   1.221  
   1.222      java_bytecodes = {
   1.223          # code : (mnemonic, number of following bytes, change in stack)
   1.224 @@ -664,6 +749,14 @@
   1.225  
   1.226  class BytecodeDisassemblerProgram:
   1.227      position = 0
   1.228 +    def setup_except(self, target):
   1.229 +        print "(setup_except %s)" % target
   1.230 +    def setup_finally(self, target):
   1.231 +        print "(setup_finally %s)" % target
   1.232 +    def end_exception(self):
   1.233 +        print "(end_exception)"
   1.234 +    def pop_block(self):
   1.235 +        print "(pop_block)"
   1.236  
   1.237  class BytecodeTranslator(BytecodeReader):
   1.238  
   1.239 @@ -763,7 +856,8 @@
   1.240      def checkcast(self, arguments, program):
   1.241          index = (arguments[0] << 8) + arguments[1]
   1.242          target_name = self.class_file.constants[index - 1].get_name()
   1.243 -        target_components = target_name.split("/")
   1.244 +        # NOTE: Using the string version of the name which may contain incompatible characters.
   1.245 +        target_components = str(target_name).split("/")
   1.246  
   1.247          program.dup_top()                   # Stack: objectref, objectref
   1.248          program.load_global("isinstance")   # Stack: objectref, objectref, isinstance
   1.249 @@ -1065,7 +1159,8 @@
   1.250      def instanceof(self, arguments, program):
   1.251          index = (arguments[0] << 8) + arguments[1]
   1.252          target_name = self.class_file.constants[index - 1].get_name()
   1.253 -        target_components = target_name.split("/")
   1.254 +        # NOTE: Using the string version of the name which may contain incompatible characters.
   1.255 +        target_components = str(target_name).split("/")
   1.256  
   1.257          program.load_global("isinstance")   # Stack: objectref, isinstance
   1.258          program.rot_two()                   # Stack: isinstance, objectref
   1.259 @@ -1401,15 +1496,15 @@
   1.260          # NOTE: To be implemented.
   1.261          return number_of_arguments
   1.262  
   1.263 -def disassemble(class_file, code):
   1.264 +def disassemble(class_file, code, exception_table):
   1.265      disassembler = BytecodeDisassembler(class_file)
   1.266 -    disassembler.process(code, BytecodeDisassemblerProgram())
   1.267 +    disassembler.process(code, exception_table, BytecodeDisassemblerProgram())
   1.268  
   1.269 -def translate(class_file, code):
   1.270 +def translate(class_file, code, exception_table):
   1.271      translator = BytecodeTranslator(class_file)
   1.272      writer = BytecodeWriter()
   1.273 -    translator.process(code, writer)
   1.274 -    return writer
   1.275 +    translator.process(code, exception_table, writer)
   1.276 +    return translator, writer
   1.277  
   1.278  if __name__ == "__main__":
   1.279      import sys