1.1 --- a/bytecode.py Tue Nov 09 00:51:39 2004 +0100
1.2 +++ b/bytecode.py Tue Nov 09 00:59:16 2004 +0100
1.3 @@ -17,9 +17,19 @@
1.4 "A Python bytecode writer."
1.5
1.6 def __init__(self):
1.7 - self.loops = []
1.8 + # A stack of loop block or exception block start positions.
1.9 + self.blocks = []
1.10 +
1.11 + # A stack of exception block handler pointers.
1.12 + self.exception_handlers = []
1.13 +
1.14 + # A dictionary mapping labels to jump instructions referencing such labels.
1.15 self.jumps = {}
1.16 +
1.17 + # The output values, including "lazy" subvalues which will need evaluating.
1.18 self.output = []
1.19 +
1.20 + # The current Python bytecode instruction position.
1.21 self.position = 0
1.22
1.23 # Stack depth estimation.
1.24 @@ -91,6 +101,7 @@
1.25 # NOTE: Assume a 16-bit value.
1.26 self.output.append(value.values[0])
1.27 self.output.append(value.values[1])
1.28 + self.position += 2
1.29 elif value <= 0xffff:
1.30 self.output.append(value & 0xff)
1.31 self.output.append((value & 0xff00) >> 8)
1.32 @@ -100,15 +111,18 @@
1.33 raise ValueError, value
1.34
1.35 def setup_loop(self):
1.36 - self.loops.push(self.position)
1.37 + self.blocks.append(self.position)
1.38 self.output.append(opmap["SETUP_LOOP"])
1.39 self.position += 1
1.40 self._write_value(0) # To be filled in later
1.41
1.42 def end_loop(self):
1.43 - current_loop_start = self.loops.pop()
1.44 + current_loop_start = self.blocks.pop()
1.45 self.jump_absolute(current_loop_start)
1.46 - self.output[current_loop_start + 1] = self.position
1.47 + # NOTE: Using 3 as the assumed length of the SETUP_LOOP instruction.
1.48 + # NOTE: 8-bit limit.
1.49 + self.output[current_loop_start + 1] = self.position - current_loop_start - 3
1.50 + self.output[current_loop_start + 2] = 0
1.51 self.pop_block()
1.52
1.53 def jump_to_label(self, status, name):
1.54 @@ -128,7 +142,9 @@
1.55 def start_label(self, name):
1.56 # Fill in all jump instructions.
1.57 for jump_instruction, following_instruction in self.jumps[name]:
1.58 + # NOTE: 8-bit limit.
1.59 self.output[jump_instruction + 1] = self.position - following_instruction
1.60 + self.output[jump_instruction + 2] = 0
1.61 del self.jumps[name]
1.62
1.63 def load_const_ret(self, value):
1.64 @@ -136,19 +152,32 @@
1.65 self.load_const(value)
1.66
1.67 def ret(self, index):
1.68 - # Previously, the constant stored on the stack by jsr/jsr_w was stored
1.69 - # in a local variable. In the JVM, extracting the value from the local
1.70 - # variable and jumping can be done at runtime. In the Python VM, any
1.71 - # jump target must be known in advance and written into the bytecode.
1.72 - self.load_fast(index)
1.73 - for constant in self.constants_for_exceptions:
1.74 - self.dup_top() # Stack: actual-address, actual-address
1.75 - self.load_const(constant) # Stack: actual-address, actual-address, suggested-address
1.76 - self.compare_op("==") # Stack: actual-address, result
1.77 - self.jump_to_label(0, "const")
1.78 - self.jump_absolute(constant)
1.79 - self.start_label("const")
1.80 - self.pop_top() # Stack: actual-address
1.81 + self.end_finally()
1.82 +
1.83 + def setup_except(self, target):
1.84 + self.blocks.append(self.position)
1.85 + self.exception_handlers.append(target)
1.86 + self.output.append(opmap["SETUP_EXCEPT"])
1.87 + self.position += 1
1.88 + self._write_value(0) # To be filled in later
1.89 +
1.90 + def setup_finally(self, target):
1.91 + self.blocks.append(self.position)
1.92 + self.exception_handlers.append(target)
1.93 + self.output.append(opmap["SETUP_FINALLY"])
1.94 + self.position += 1
1.95 + self._write_value(0) # To be filled in later
1.96 +
1.97 + def end_exception(self):
1.98 + current_exception_start = self.blocks.pop()
1.99 + # Convert the "lazy" absolute value.
1.100 + current_exception_target = self.exception_handlers.pop()
1.101 + target = current_exception_target.get_value()
1.102 + # NOTE: Using 3 as the assumed length of the SETUP_* instruction.
1.103 + # NOTE: 8-bit limit.
1.104 + self.output[current_exception_start + 1] = target - current_exception_start - 3
1.105 + self.output[current_exception_start + 2] = 0
1.106 + # NOTE: The POP_BLOCK instruction gets slipped in before this method is called.
1.107
1.108 # Complicated methods.
1.109
1.110 @@ -206,7 +235,7 @@
1.111 # Normal bytecode generators.
1.112
1.113 def for_iter(self):
1.114 - self.loops.push(self.position)
1.115 + self.blocks.append(self.position)
1.116 self.output.append(opmap["FOR_ITER"])
1.117 self.position += 1
1.118 self._write_value(0) # To be filled in later
1.119 @@ -347,6 +376,10 @@
1.120 self.output.append(opmap["POP_BLOCK"])
1.121 self.position += 1
1.122
1.123 + def end_finally(self):
1.124 + self.output.append(opmap["END_FINALLY"])
1.125 + self.position += 1
1.126 +
1.127 # Utility classes and functions.
1.128
1.129 class LazyDict(UserDict):
1.130 @@ -378,8 +411,9 @@
1.131 raise ValueError, value
1.132 def get_value(self):
1.133 value = 0
1.134 - for i in range(0, len(self.values)):
1.135 - value = (value << 8) + self.values.pop().value
1.136 + values = self.values[:]
1.137 + for i in range(0, len(values)):
1.138 + value = (value << 8) + values.pop().value
1.139 return value
1.140
1.141 class LazySubValue:
1.142 @@ -419,13 +453,65 @@
1.143 self.class_file = class_file
1.144 self.position_mapping = LazyDict()
1.145
1.146 - def process(self, code, program):
1.147 + def process(self, code, exception_table, program):
1.148 self.java_position = 0
1.149 +
1.150 + # Produce a structure which permits fast access to exception details.
1.151 + exception_block_start = {}
1.152 + exception_block_end = {}
1.153 + exception_block_handler = {}
1.154 + reversed_exception_table = exception_table[:]
1.155 + reversed_exception_table.reverse()
1.156 +
1.157 + # Later entries have wider coverage than earlier entries.
1.158 + for exception in reversed_exception_table:
1.159 + # Index start positions.
1.160 + if not exception_block_start.has_key(exception.start_pc):
1.161 + exception_block_start[exception.start_pc] = []
1.162 + exception_block_start[exception.start_pc].append(exception)
1.163 + # Index end positions.
1.164 + if not exception_block_end.has_key(exception.end_pc):
1.165 + exception_block_end[exception.end_pc] = []
1.166 + exception_block_end[exception.end_pc].append(exception)
1.167 + # Index handler positions.
1.168 + if not exception_block_handler.has_key(exception.handler_pc):
1.169 + exception_block_handler[exception.handler_pc] = []
1.170 + exception_block_handler[exception.handler_pc].append(exception)
1.171 +
1.172 + # Process each instruction in the code.
1.173 while self.java_position < len(code):
1.174 self.position_mapping[self.java_position] = program.position
1.175 +
1.176 + # Insert exception handling constructs.
1.177 + for exception in exception_block_start.get(self.java_position, []):
1.178 + # Note that the absolute position is used.
1.179 + if exception.catch_type == 0:
1.180 + program.setup_finally(self.position_mapping[exception.handler_pc])
1.181 + else:
1.182 + program.setup_except(self.position_mapping[exception.handler_pc])
1.183 +
1.184 + # Insert exception handler end details.
1.185 + for exception in exception_block_end.get(self.java_position, []):
1.186 + program.end_exception()
1.187 +
1.188 + # Where handlers are begun, do not produce equivalent bytecode since
1.189 + # the first handler instruction typically involves saving a local
1.190 + # variable that is not applicable to the Python VM.
1.191 + #if not exception_block_handler.get(self.java_position, []):
1.192 +
1.193 + # Process the bytecode at the current position.
1.194 bytecode = ord(code[self.java_position])
1.195 mnemonic, number_of_arguments = self.java_bytecodes[bytecode]
1.196 - self.process_bytecode(mnemonic, number_of_arguments, code, program)
1.197 + number_of_arguments = self.process_bytecode(mnemonic, number_of_arguments, code, program)
1.198 + next_java_position = self.java_position + 1 + number_of_arguments
1.199 +
1.200 + # Insert exception handler end instructions.
1.201 + for exception in exception_block_end.get(next_java_position, []):
1.202 + program.pop_block()
1.203 +
1.204 + # Only advance the JVM position after sneaking in extra Python
1.205 + # instructions.
1.206 + self.java_position = next_java_position
1.207
1.208 def process_bytecode(self, mnemonic, number_of_arguments, code, program):
1.209 if number_of_arguments is not None:
1.210 @@ -435,11 +521,10 @@
1.211
1.212 # Call the handler.
1.213 getattr(self, mnemonic)(arguments, program)
1.214 + return number_of_arguments
1.215 else:
1.216 # Call the handler.
1.217 - number_of_arguments = getattr(self, mnemonic)(code[self.java_position+1:], program)
1.218 -
1.219 - self.java_position = self.java_position + 1 + number_of_arguments
1.220 + return getattr(self, mnemonic)(code[self.java_position+1:], program)
1.221
1.222 java_bytecodes = {
1.223 # code : (mnemonic, number of following bytes, change in stack)
1.224 @@ -664,6 +749,14 @@
1.225
1.226 class BytecodeDisassemblerProgram:
1.227 position = 0
1.228 + def setup_except(self, target):
1.229 + print "(setup_except %s)" % target
1.230 + def setup_finally(self, target):
1.231 + print "(setup_finally %s)" % target
1.232 + def end_exception(self):
1.233 + print "(end_exception)"
1.234 + def pop_block(self):
1.235 + print "(pop_block)"
1.236
1.237 class BytecodeTranslator(BytecodeReader):
1.238
1.239 @@ -763,7 +856,8 @@
1.240 def checkcast(self, arguments, program):
1.241 index = (arguments[0] << 8) + arguments[1]
1.242 target_name = self.class_file.constants[index - 1].get_name()
1.243 - target_components = target_name.split("/")
1.244 + # NOTE: Using the string version of the name which may contain incompatible characters.
1.245 + target_components = str(target_name).split("/")
1.246
1.247 program.dup_top() # Stack: objectref, objectref
1.248 program.load_global("isinstance") # Stack: objectref, objectref, isinstance
1.249 @@ -1065,7 +1159,8 @@
1.250 def instanceof(self, arguments, program):
1.251 index = (arguments[0] << 8) + arguments[1]
1.252 target_name = self.class_file.constants[index - 1].get_name()
1.253 - target_components = target_name.split("/")
1.254 + # NOTE: Using the string version of the name which may contain incompatible characters.
1.255 + target_components = str(target_name).split("/")
1.256
1.257 program.load_global("isinstance") # Stack: objectref, isinstance
1.258 program.rot_two() # Stack: isinstance, objectref
1.259 @@ -1401,15 +1496,15 @@
1.260 # NOTE: To be implemented.
1.261 return number_of_arguments
1.262
1.263 -def disassemble(class_file, code):
1.264 +def disassemble(class_file, code, exception_table):
1.265 disassembler = BytecodeDisassembler(class_file)
1.266 - disassembler.process(code, BytecodeDisassemblerProgram())
1.267 + disassembler.process(code, exception_table, BytecodeDisassemblerProgram())
1.268
1.269 -def translate(class_file, code):
1.270 +def translate(class_file, code, exception_table):
1.271 translator = BytecodeTranslator(class_file)
1.272 writer = BytecodeWriter()
1.273 - translator.process(code, writer)
1.274 - return writer
1.275 + translator.process(code, exception_table, writer)
1.276 + return translator, writer
1.277
1.278 if __name__ == "__main__":
1.279 import sys