1.1 --- a/bytecode.py Fri Oct 29 18:48:35 2004 +0200
1.2 +++ b/bytecode.py Sun Nov 07 17:42:07 2004 +0100
1.3 @@ -8,16 +8,33 @@
1.4 """
1.5
1.6 import dis # for access to Python bytecode values
1.7 +from UserDict import UserDict
1.8
1.9 # Bytecode production classes.
1.10
1.11 class BytecodeWriter:
1.12 def __init__(self):
1.13 self.loops = []
1.14 - self.jumps = []
1.15 + self.jumps = {}
1.16 self.output = []
1.17 self.position = 0
1.18
1.19 + # Mapping from values to indexes.
1.20 + self.constants = {}
1.21 +
1.22 + # Mapping from names to indexes.
1.23 + # NOTE: This may be acquired from elsewhere.
1.24 + self.globals = {}
1.25 +
1.26 + def get_output(self):
1.27 + output = []
1.28 + for element in self.output:
1.29 + if isinstance(element, LazyValue):
1.30 + output.append(chr(element.value))
1.31 + else:
1.32 + output.append(chr(element))
1.33 + return "".join(output)
1.34 +
1.35 # Special methods.
1.36
1.37 def end_loop(self):
1.38 @@ -26,16 +43,40 @@
1.39 self.output[current_loop_start + 1] = self.position
1.40 self.pop_block()
1.41
1.42 - def jump_to_next(self, status):
1.43 - self.jumps.push(self.position)
1.44 - if status:
1.45 + def jump_to_label(self, status, name):
1.46 + # Record the instruction using the jump.
1.47 + if not self.jumps.has_key(name):
1.48 + self.jumps[name] = []
1.49 + self.jumps[name].append(self.position)
1.50 + if status is None:
1.51 + self.jump_forward()
1.52 + elif status:
1.53 self.jump_if_true()
1.54 else:
1.55 self.jump_if_false()
1.56
1.57 - def start_next(self):
1.58 - current_jump_start = self.jumps.pop()
1.59 - self.output[current_jump_start + 1] = self.position
1.60 + def start_label(self, name):
1.61 + # Fill in all jump instructions.
1.62 + for jump_instruction in self.jumps[name]:
1.63 + self.output[jump_instruction + 1] = self.position
1.64 +
1.65 + # Complicated methods.
1.66 +
1.67 + def load_const(self, value):
1.68 + self.output.append(opmap["LOAD_CONST"])
1.69 + if not self.constants.has_key(value):
1.70 + self.constants[value] = len(self.constants.keys())
1.71 + self.output.append(self.constants[value])
1.72 +
1.73 + def load_global(self, name):
1.74 + self.output.append(opmap["LOAD_GLOBAL"])
1.75 + if not self.globals.has_key(value):
1.76 + self.globals[name] = len(self.globals.keys())
1.77 + self.output.append(self.globals[name])
1.78 +
1.79 + def load_fast(self, index):
1.80 + self.output.append(opmap["LOAD_FAST"])
1.81 + self.output.append(index)
1.82
1.83 # Normal bytecode generators.
1.84
1.85 @@ -55,12 +96,57 @@
1.86 self.output.append(offset) # May be filled in later
1.87 self.position += 2
1.88
1.89 + def jump_forward(self, offset=None):
1.90 + self.output.append(opmap["JUMP_FORWARD"])
1.91 + self.output.append(offset) # May be filled in later
1.92 + self.position += 2
1.93 +
1.94 +# Utility classes and functions.
1.95 +
1.96 +class LazyDict(UserDict):
1.97 + def __getitem__(self, key):
1.98 + if not self.data.has_key(key):
1.99 + self.data[key] = LazyValue()
1.100 + return self.data[key]
1.101 + def __setitem__(self, key, value):
1.102 + if self.data.has_key(key):
1.103 + existing_value = self.data[key]
1.104 + if isinstance(existing_value, LazyValue):
1.105 + existing_value.value = value
1.106 + return
1.107 + self.data[key] = value
1.108 +
1.109 +class LazyValue:
1.110 + def __init__(self, value=None):
1.111 + self.value = value
1.112 +
1.113 +def signed(value, limit):
1.114 +
1.115 + """
1.116 + Return the signed integer from the unsigned 'value', where 'limit' (a value
1.117 + one greater than the highest possible positive integer) is used to determine
1.118 + whether a negative or positive result is produced.
1.119 + """
1.120 +
1.121 + d, r = divmod(value, limit)
1.122 + if d == 1:
1.123 + mask = limit * 2 - 1
1.124 + return -1 - (value ^ mask)
1.125 + else:
1.126 + return value
1.127 +
1.128 +def signed2(value):
1.129 + return signed(value, 0x8000)
1.130 +
1.131 +def signed4(value):
1.132 + return signed(value, 0x80000000)
1.133 +
1.134 # Bytecode conversion.
1.135
1.136 class BytecodeReader:
1.137 def __init__(self, class_file):
1.138 self.class_file = class_file
1.139 - self.position_mapping = {}
1.140 + self.position_mapping = LazyDict()
1.141
1.142 def process(self, code, program):
1.143 self.java_position = 0
1.144 @@ -120,7 +206,8 @@
1.145 # NOTE: Not using the index to type the list/array.
1.146 index = arguments[0] << 8 + arguments[1]
1.147
1.148 - program.build_list()
1.149 + program.build_list() # Stack: count, list
1.150 + program.rot_two() # Stack: list, count
1.151 program.setup_loop()
1.152 program.load_global("range")
1.153 program.load_const(0) # Stack: list, count, range, 0
1.154 @@ -315,12 +402,12 @@
1.155 getstatic = getfield # Ignoring Java restrictions
1.156
1.157 def goto(self, arguments, program):
1.158 - offset = arguments[0] << 8 + arguments[1]
1.159 + offset = signed2(arguments[0] << 8 + arguments[1])
1.160 java_absolute = self.java_position + offset
1.161 program.jump_absolute(self.position_mapping[java_absolute])
1.162
1.163 def goto_w(self, arguments, program):
1.164 - offset = arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3]
1.165 + offset = signed4(arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3])
1.166 java_absolute = self.java_position + offset
1.167 program.jump_absolute(self.position_mapping[java_absolute])
1.168
1.169 @@ -378,12 +465,12 @@
1.170 idiv = fdiv
1.171
1.172 def _if_xcmpx(self, arguments, program, op):
1.173 - offset = arguments[0] << 8 + arguments[1]
1.174 + offset = signed2(arguments[0] << 8 + arguments[1])
1.175 java_absolute = self.java_position + offset
1.176 program.compare_op(op)
1.177 - program.jump_to_next(0) # skip if false
1.178 + program.jump_to_label(0, "next") # skip if false
1.179 program.goto(offset)
1.180 - program.start_next()
1.181 + program.start_label("next")
1.182
1.183 def if_acmpeq(self, arguments, program):
1.184 # NOTE: No type checking performed.
1.185 @@ -558,14 +645,156 @@
1.186 isub = fsub
1.187 iushr = ishr # Ignoring distinctions between arithmetic and logical shifts
1.188
1.189 - def ishr(self, arguments, program):
1.190 + def ixor(self, arguments, program):
1.191 # NOTE: No type checking performed.
1.192 program.binary_xor()
1.193
1.194 def jsr(self, arguments, program):
1.195 - offset = arguments[0] << 8 + arguments[1]
1.196 + offset = signed2(arguments[0] << 8 + arguments[1])
1.197 + java_absolute = self.java_position + offset
1.198 + # Store the address of the next instruction.
1.199 + program.load_const(self.position_mapping[self.java_position + 3])
1.200 + program.jump_absolute(self.position_mapping[java_absolute])
1.201 +
1.202 + def jsr_w(self, arguments, program):
1.203 + offset = signed4(arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3])
1.204 java_absolute = self.java_position + offset
1.205 + # Store the address of the next instruction.
1.206 + program.load_const(self.position_mapping[self.java_position + 5])
1.207 + program.jump_absolute(self.position_mapping[java_absolute])
1.208 +
1.209 + l2d = i2d
1.210 + l2f = i2f
1.211 +
1.212 + def l2i(self, arguments, program):
1.213 + pass # Preserving Java semantics
1.214 +
1.215 + ladd = iadd
1.216 + laload = iaload
1.217 + land = iand
1.218 + lastore = iastore
1.219 +
1.220 + def lcmp(self, arguments, program):
1.221 + # NOTE: No type checking performed.
1.222 + program.dup_topx(2) # Stack: value1, value2, value1, value2
1.223 + program.compare_op(">") # Stack: value1, value2, result
1.224 + program.jump_to_label(0, "equals")
1.225 + # True - produce result and branch.
1.226 + program.pop_top() # Stack: value1, value2
1.227 + program.pop_top() # Stack: value1
1.228 + program.pop_top() # Stack:
1.229 + program.load_const(1) # Stack: 1
1.230 + program.jump_to_label(None, "next")
1.231 + # False - test equality.
1.232 + program.start_label("equals")
1.233 + program.pop_top() # Stack: value1, value2
1.234 + program.dup_topx(2) # Stack: value1, value2, value1, value2
1.235 + program.compare_op("==") # Stack: value1, value2, result
1.236 + program.jump_to_label(0, "less")
1.237 + # True - produce result and branch.
1.238 + program.pop_top() # Stack: value1, value2
1.239 + program.pop_top() # Stack: value1
1.240 + program.pop_top() # Stack:
1.241 + program.load_const(0) # Stack: 0
1.242 + program.jump_to_label(None, "next")
1.243 + # False - produce result.
1.244 + program.start_label("less")
1.245 + program.pop_top() # Stack: value1, value2
1.246 + program.pop_top() # Stack: value1
1.247 + program.pop_top() # Stack:
1.248 + program.load_const(-1) # Stack: -1
1.249 + program.start_label("next")
1.250 +
1.251 + lconst_0 = iconst_0
1.252 + lconst_1 = iconst_1
1.253 +
1.254 + def ldc(self, arguments, program):
1.255 + program.load_const(self.class_file.constants[arguments[0] - 1])
1.256 +
1.257 + def ldc_w(self, arguments, program):
1.258 + program.load_const(self.class_file.constants[arguments[0] << 8 + arguments[1] - 1])
1.259 +
1.260 + ldc2_w = ldc_w
1.261 + ldiv = idiv
1.262 + lload = iload
1.263 + lload_0 = iload_0
1.264 + lload_1 = iload_1
1.265 + lload_2 = iload_2
1.266 + lload_3 = iload_3
1.267 + lmul = imul
1.268 + lneg = ineg
1.269 +
1.270 + def lookupswitch(self, arguments, program):
1.271 + # Find the offset to the next 4 byte boundary in the code.
1.272 + d, r = divmod(self.java_position, 4)
1.273 + to_boundary = (4 - r) % 4
1.274 + # Get the pertinent arguments.
1.275 + arguments = arguments[to_boundary:]
1.276 + default = arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3]
1.277 + npairs = arguments[4] << 24 + arguments[5] << 16 + arguments[6] << 8 + arguments[7]
1.278 + # Process the pairs.
1.279 + # NOTE: This is not the most optimal implementation.
1.280 + pair_index = 8
1.281 + for pair in range(0, npairs):
1.282 + match = (arguments[pair_index] << 24 + arguments[pair_index + 1] << 16 +
1.283 + arguments[pair_index + 2] << 8 + arguments[pair_index + 3])
1.284 + offset = signed4(arguments[pair_index + 4] << 24 + arguments[pair_index + 5] << 16 +
1.285 + arguments[pair_index + 6] << 8 + arguments[pair_index + 7])
1.286 + # Calculate the branch target.
1.287 + java_absolute = self.java_position + offset
1.288 + # Generate branching code.
1.289 + program.dup_top() # Stack: key, key
1.290 + program.load_const(match) # Stack: key, key, match
1.291 + program.compare_op("==") # Stack: key, result
1.292 + program.jump_to_label(0, "end" + str(pair))
1.293 + program.pop_top() # Stack: key
1.294 + program.pop_top() # Stack:
1.295 + program.jump_absolute(self.position_mapping[java_absolute])
1.296 + # Generate the label for the end of the branching code.
1.297 + program.start_label("end" + str(pair))
1.298 + program.pop_top() # Stack: key
1.299 + # Update the index.
1.300 + pair_index += 8
1.301 + # Generate the default.
1.302 + java_absolute = self.java_position + default
1.303 + program.jump_absolute(self.position_mapping[java_absolute])
1.304 +
1.305 + lor = ior
1.306 + lrem = irem
1.307 + lreturn = ireturn
1.308 + lshl = ishl
1.309 + lshr = ishr
1.310 + lstore = istore
1.311 + lstore_0 = istore_0
1.312 + lstore_1 = istore_1
1.313 + lstore_2 = istore_2
1.314 + lstore_3 = istore_3
1.315 + lsub = isub
1.316 + lushr = iushr
1.317 + lxor = ixor
1.318 +
1.319 + def monitorenter(self, arguments, program):
1.320 # NOTE: To be implemented.
1.321 + pass
1.322 +
1.323 + def monitorexit(self, arguments, program):
1.324 + # NOTE: To be implemented.
1.325 + pass
1.326 +
1.327 + def multianewarray(self, arguments, program):
1.328 + program.build_list() # Stack: count1, count2, ..., countN, list
1.329 + program.rot_two() # Stack: count1, count2, ..., list, countN
1.330 + program.setup_loop()
1.331 + program.load_global("range")
1.332 + program.load_const(0) # Stack: list, count, range, 0
1.333 + program.rot_three() # Stack: list, 0, count, range
1.334 + program.rot_three() # Stack: list, range, 0, count
1.335 + program.call_function(2) # Stack: list, range_list
1.336 + program.get_iter() # Stack: list, iter
1.337 + program.for_iter() # Stack: list, iter, value
1.338 + for i in range(0, arguments[2]):
1.339 + # Stack:
1.340 + self.anewarray(arguments, program) # Stack: list, iter
1.341
1.342 def wide(self, code, program):
1.343 # NOTE: To be implemented.