1 #!/usr/bin/env python 2 3 """ 4 Java bytecode conversion. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/Instructions2.doc.html 6 7 NOTE: Synchronized constructs are not actually supported. 8 """ 9 10 import classfile 11 from dis import opmap, cmp_op # for access to Python bytecode values and operators 12 from UserDict import UserDict 13 import new 14 15 # Bytecode production classes. 16 17 class BytecodeWriter: 18 19 "A Python bytecode writer." 20 21 def __init__(self): 22 # A stack of loop start instructions corresponding to loop blocks. 23 self.loops = [] 24 25 # A stack of loop block or exception block start positions. 26 self.blocks = [] 27 28 # A stack of exception block handler pointers. 29 self.exception_handlers = [] 30 31 # A dictionary mapping labels to jump instructions referencing such labels. 32 self.jumps = {} 33 34 # The output values, including "lazy" subvalues which will need evaluating. 35 self.output = [] 36 37 # The current Python bytecode instruction position. 38 self.position = 0 39 40 # Stack depth estimation. 41 self.stack_depth = 0 42 self.max_stack_depth = 0 43 44 # Local variable estimation. 45 self.max_locals = 0 46 47 # Mapping from values to indexes. 48 self.constants = {} 49 50 # Mapping from names to indexes. 51 # NOTE: This may be acquired from elsewhere. 52 #self.globals = {} 53 54 # Mapping from names to indexes. 55 self.names = {} 56 57 # A list of constants used as exception handler return addresses. 58 self.constants_for_exceptions = [] 59 60 def get_output(self): 61 output = [] 62 for element in self.output: 63 if isinstance(element, LazySubValue): 64 value = element.value 65 else: 66 value = element 67 # NOTE: ValueError gets raised for bad values here. 68 output.append(chr(value)) 69 return "".join(output) 70 71 def get_constants(self): 72 l = self._get_list(self._invert(self.constants)) 73 result = [] 74 for i in l: 75 if isinstance(i, LazyValue): 76 result.append(i.get_value()) 77 else: 78 result.append(i) 79 return result 80 81 #def get_globals(self): 82 # return self._get_list(self._invert(self.globals)) 83 84 def get_names(self): 85 return self._get_list(self._invert(self.names)) 86 87 def _invert(self, d): 88 inverted = {} 89 for k, v in d.items(): 90 inverted[v] = k 91 return inverted 92 93 def _get_list(self, d): 94 l = [] 95 for i in range(0, len(d.keys())): 96 l.append(d[i]) 97 return l 98 99 # Administrative methods. 100 101 def update_stack_depth(self, change): 102 self.stack_depth += change 103 if self.stack_depth > self.max_stack_depth: 104 self.max_stack_depth = self.stack_depth 105 106 def update_locals(self, index): 107 if index > self.max_locals: 108 self.max_locals = index 109 110 # Special methods. 111 112 def _write_value(self, value): 113 if isinstance(value, LazyValue): 114 # NOTE: Assume a 16-bit value. 115 self.output.append(value.values[0]) 116 self.output.append(value.values[1]) 117 self.position += 2 118 elif value <= 0xffff: 119 self.output.append(value & 0xff) 120 self.output.append((value & 0xff00) >> 8) 121 self.position += 2 122 else: 123 # NOTE: EXTENDED_ARG not yet supported. 124 raise ValueError, value 125 126 def _rewrite_value(self, position, value): 127 # NOTE: Assume a 16-bit value. 128 if value <= 0xffff: 129 self.output[position] = (value & 0xff) 130 self.output[position + 1] = ((value & 0xff00) >> 8) 131 else: 132 # NOTE: EXTENDED_ARG not yet supported. 133 raise ValueError, value 134 135 def setup_loop(self): 136 self.loops.append(self.position) 137 self.output.append(opmap["SETUP_LOOP"]) 138 self.position += 1 139 self._write_value(0) # To be filled in later 140 141 def end_loop(self): 142 current_loop_start = self.loops.pop() 143 current_loop_real_start = self.blocks.pop() 144 #print "<", self.blocks, current_loop_real_start 145 # Fix the iterator delta. 146 # NOTE: Using 3 as the assumed length of the FOR_ITER instruction. 147 self.jump_absolute(current_loop_real_start) 148 self._rewrite_value(current_loop_real_start + 1, self.position - current_loop_real_start - 3) 149 self.pop_block() 150 # Fix the loop delta. 151 # NOTE: Using 3 as the assumed length of the SETUP_LOOP instruction. 152 self._rewrite_value(current_loop_start + 1, self.position - current_loop_start - 3) 153 154 def jump_to_label(self, status, name): 155 # Record the instruction using the jump. 156 jump_instruction = self.position 157 if status is None: 158 self.jump_forward() 159 elif status: 160 self.jump_if_true() 161 else: 162 self.jump_if_false() 163 # Record the following instruction, too. 164 if not self.jumps.has_key(name): 165 self.jumps[name] = [] 166 self.jumps[name].append((jump_instruction, self.position)) 167 168 def start_label(self, name): 169 # Fill in all jump instructions. 170 for jump_instruction, following_instruction in self.jumps[name]: 171 self._rewrite_value(jump_instruction + 1, self.position - following_instruction) 172 del self.jumps[name] 173 174 def load_const_ret(self, value): 175 self.constants_for_exceptions.append(value) 176 self.load_const(value) 177 178 def ret(self, index): 179 self.load_fast(index) 180 # Previously, the constant stored on the stack by jsr/jsr_w was stored 181 # in a local variable. In the JVM, extracting the value from the local 182 # variable and jumping can be done at runtime. In the Python VM, any 183 # jump target must be known in advance and written into the bytecode. 184 for constant in self.constants_for_exceptions: 185 self.dup_top() # Stack: actual-address, actual-address 186 self.load_const(constant) # Stack: actual-address, actual-address, suggested-address 187 self.compare_op("==") # Stack: actual-address, result 188 self.jump_to_label(0, "const") 189 self.pop_top() # Stack: actual-address 190 self.pop_top() # Stack: 191 self.jump_absolute(constant) 192 self.start_label("const") 193 self.pop_top() # Stack: actual-address 194 # NOTE: If we get here, something is really wrong. 195 self.pop_top() # Stack: 196 197 def setup_except(self, target): 198 self.blocks.append(self.position) 199 self.exception_handlers.append(target) 200 #print "-", self.position, target 201 self.output.append(opmap["SETUP_EXCEPT"]) 202 self.position += 1 203 self._write_value(0) # To be filled in later 204 205 def setup_finally(self, target): 206 self.blocks.append(self.position) 207 self.exception_handlers.append(target) 208 #print "-", self.position, target 209 self.output.append(opmap["SETUP_FINALLY"]) 210 self.position += 1 211 self._write_value(0) # To be filled in later 212 213 def end_exception(self): 214 current_exception_start = self.blocks.pop() 215 # Convert the "lazy" absolute value. 216 current_exception_target = self.exception_handlers.pop() 217 target = current_exception_target.get_value() 218 #print "*", current_exception_start, target 219 # NOTE: Using 3 as the assumed length of the SETUP_* instruction. 220 self._rewrite_value(current_exception_start + 1, target - current_exception_start - 3) 221 222 def start_handler(self, exc_name): 223 # Where handlers are begun, produce bytecode to test the type of 224 # the exception. 225 self.dup_top() # Stack: exception, exception 226 self.load_global(str(exc_name)) # Stack: exception, exception, handled-exception 227 self.compare_op("exception match") # Stack: exception, result 228 self.jump_to_label(1, "handler") 229 self.pop_top() 230 self.end_finally() 231 self.start_label("handler") 232 self.pop_top() 233 234 # Complicated methods. 235 236 def load_const(self, value): 237 self.output.append(opmap["LOAD_CONST"]) 238 if not self.constants.has_key(value): 239 self.constants[value] = len(self.constants.keys()) 240 self.position += 1 241 self._write_value(self.constants[value]) 242 self.update_stack_depth(1) 243 244 def load_global(self, name): 245 self.output.append(opmap["LOAD_GLOBAL"]) 246 if not self.names.has_key(name): 247 self.names[name] = len(self.names.keys()) 248 self.position += 1 249 self._write_value(self.names[name]) 250 self.update_stack_depth(1) 251 252 def load_attr(self, name): 253 self.output.append(opmap["LOAD_ATTR"]) 254 if not self.names.has_key(name): 255 self.names[name] = len(self.names.keys()) 256 self.position += 1 257 self._write_value(self.names[name]) 258 259 def load_name(self, name): 260 self.output.append(opmap["LOAD_NAME"]) 261 if not self.names.has_key(name): 262 self.names[name] = len(self.names.keys()) 263 self.position += 1 264 self._write_value(self.names[name]) 265 self.update_stack_depth(1) 266 267 def load_fast(self, index): 268 self.output.append(opmap["LOAD_FAST"]) 269 self.position += 1 270 self._write_value(index) 271 self.update_stack_depth(1) 272 self.update_locals(index) 273 274 def store_attr(self, name): 275 self.output.append(opmap["STORE_ATTR"]) 276 if not self.names.has_key(name): 277 self.names[name] = len(self.names.keys()) 278 self.position += 1 279 self._write_value(self.names[name]) 280 self.update_stack_depth(-1) 281 282 def store_fast(self, index): 283 self.output.append(opmap["STORE_FAST"]) 284 self.position += 1 285 self._write_value(index) 286 self.update_stack_depth(-1) 287 self.update_locals(index) 288 289 def for_iter(self): 290 self.blocks.append(self.position) 291 #print ">", self.blocks 292 self.output.append(opmap["FOR_ITER"]) 293 self.position += 1 294 self._write_value(0) # To be filled in later 295 self.update_stack_depth(1) 296 297 def break_loop(self): 298 self.output.append(opmap["BREAK_LOOP"]) 299 self.position += 1 300 self.jump_absolute(self.blocks[-1]) 301 302 # Normal bytecode generators. 303 304 def get_iter(self): 305 self.output.append(opmap["GET_ITER"]) 306 self.position += 1 307 308 def jump_if_false(self, offset=0): 309 self.output.append(opmap["JUMP_IF_FALSE"]) 310 self.position += 1 311 self._write_value(offset) # May be filled in later 312 313 def jump_if_true(self, offset=0): 314 self.output.append(opmap["JUMP_IF_TRUE"]) 315 self.position += 1 316 self._write_value(offset) # May be filled in later 317 318 def jump_forward(self, offset=0): 319 self.output.append(opmap["JUMP_FORWARD"]) 320 self.position += 1 321 self._write_value(offset) # May be filled in later 322 323 def jump_absolute(self, address=0): 324 self.output.append(opmap["JUMP_ABSOLUTE"]) 325 self.position += 1 326 self._write_value(address) # May be filled in later 327 328 def build_tuple(self, count): 329 self.output.append(opmap["BUILD_TUPLE"]) 330 self.position += 1 331 self._write_value(count) 332 self.update_stack_depth(-(count - 1)) 333 334 def build_list(self, count): 335 self.output.append(opmap["BUILD_LIST"]) 336 self.position += 1 337 self._write_value(count) 338 self.update_stack_depth(-(count - 1)) 339 340 def pop_top(self): 341 self.output.append(opmap["POP_TOP"]) 342 self.position += 1 343 self.update_stack_depth(-1) 344 345 def dup_top(self): 346 self.output.append(opmap["DUP_TOP"]) 347 self.position += 1 348 self.update_stack_depth(1) 349 350 def rot_two(self): 351 self.output.append(opmap["ROT_TWO"]) 352 self.position += 1 353 354 def rot_three(self): 355 self.output.append(opmap["ROT_THREE"]) 356 self.position += 1 357 358 def rot_four(self): 359 self.output.append(opmap["ROT_FOUR"]) 360 self.position += 1 361 362 def call_function(self, count): 363 self.output.append(opmap["CALL_FUNCTION"]) 364 self.position += 1 365 self._write_value(count) 366 self.update_stack_depth(-count) 367 368 def binary_subscr(self): 369 self.output.append(opmap["BINARY_SUBSCR"]) 370 self.position += 1 371 self.update_stack_depth(-1) 372 373 def binary_add(self): 374 self.output.append(opmap["BINARY_ADD"]) 375 self.position += 1 376 self.update_stack_depth(-1) 377 378 def binary_divide(self): 379 self.output.append(opmap["BINARY_DIVIDE"]) 380 self.position += 1 381 self.update_stack_depth(-1) 382 383 def binary_multiply(self): 384 self.output.append(opmap["BINARY_MULTIPLY"]) 385 self.position += 1 386 self.update_stack_depth(-1) 387 388 def binary_modulo(self): 389 self.output.append(opmap["BINARY_MODULO"]) 390 self.position += 1 391 self.update_stack_depth(-1) 392 393 def binary_subtract(self): 394 self.output.append(opmap["BINARY_SUBTRACT"]) 395 self.position += 1 396 self.update_stack_depth(-1) 397 398 def binary_and(self): 399 self.output.append(opmap["BINARY_AND"]) 400 self.position += 1 401 self.update_stack_depth(-1) 402 403 def binary_or(self): 404 self.output.append(opmap["BINARY_XOR"]) 405 self.position += 1 406 self.update_stack_depth(-1) 407 408 def binary_lshift(self): 409 self.output.append(opmap["BINARY_LSHIFT"]) 410 self.position += 1 411 self.update_stack_depth(-1) 412 413 def binary_rshift(self): 414 self.output.append(opmap["BINARY_RSHIFT"]) 415 self.position += 1 416 self.update_stack_depth(-1) 417 418 def binary_xor(self): 419 self.output.append(opmap["BINARY_XOR"]) 420 self.position += 1 421 self.update_stack_depth(-1) 422 423 def unary_negative(self): 424 self.output.append(opmap["UNARY_NEGATIVE"]) 425 self.position += 1 426 427 def slice_1(self): 428 self.output.append(opmap["SLICE+1"]) 429 self.position += 1 430 431 def compare_op(self, op): 432 self.output.append(opmap["COMPARE_OP"]) 433 self.position += 1 434 self._write_value(list(cmp_op).index(op)) 435 self.update_stack_depth(-1) 436 437 def return_value(self): 438 self.output.append(opmap["RETURN_VALUE"]) 439 self.position += 1 440 self.update_stack_depth(-1) 441 442 def raise_varargs(self, count): 443 self.output.append(opmap["RAISE_VARARGS"]) 444 self.position += 1 445 self._write_value(count) 446 447 def pop_block(self): 448 self.output.append(opmap["POP_BLOCK"]) 449 self.position += 1 450 451 def end_finally(self): 452 self.output.append(opmap["END_FINALLY"]) 453 self.position += 1 454 455 def unpack_sequence(self, count): 456 self.output.append(opmap["UNPACK_SEQUENCE"]) 457 self.position += 1 458 self._write_value(count) 459 460 # Utility classes and functions. 461 462 class LazyDict(UserDict): 463 def __getitem__(self, key): 464 if not self.data.has_key(key): 465 # NOTE: Assume 16-bit value. 466 self.data[key] = LazyValue(2) 467 return self.data[key] 468 def __setitem__(self, key, value): 469 if self.data.has_key(key): 470 existing_value = self.data[key] 471 if isinstance(existing_value, LazyValue): 472 existing_value.set_value(value) 473 return 474 self.data[key] = value 475 476 class LazyValue: 477 def __init__(self, nvalues): 478 self.values = [] 479 for i in range(0, nvalues): 480 self.values.append(LazySubValue()) 481 def set_value(self, value): 482 # NOTE: Assume at least 16-bit value. No "filling" performed. 483 if value <= 0xffff: 484 self.values[0].set_value(value & 0xff) 485 self.values[1].set_value((value & 0xff00) >> 8) 486 else: 487 # NOTE: EXTENDED_ARG not yet supported. 488 raise ValueError, value 489 def get_value(self): 490 value = 0 491 values = self.values[:] 492 for i in range(0, len(values)): 493 value = (value << 8) + values.pop().value 494 return value 495 496 class LazySubValue: 497 def __init__(self): 498 self.value = 0 499 def set_value(self, value): 500 self.value = value 501 502 def signed(value, limit): 503 504 """ 505 Return the signed integer from the unsigned 'value', where 'limit' (a value 506 one greater than the highest possible positive integer) is used to determine 507 whether a negative or positive result is produced. 508 """ 509 510 d, r = divmod(value, limit) 511 if d == 1: 512 mask = limit * 2 - 1 513 return -1 - (value ^ mask) 514 else: 515 return value 516 517 def signed2(value): 518 return signed(value, 0x8000) 519 520 def signed4(value): 521 return signed(value, 0x80000000) 522 523 # Bytecode conversion. 524 525 class BytecodeReader: 526 527 "A generic Java bytecode reader." 528 529 def __init__(self, class_file): 530 self.class_file = class_file 531 self.position_mapping = LazyDict() 532 533 def process(self, method, program): 534 self.java_position = 0 535 self.in_finally = 0 536 self.method = method 537 538 # NOTE: Not guaranteed. 539 if len(method.attributes) == 0: 540 return 541 attribute = method.attributes[0] 542 code, exception_table = attribute.code, attribute.exception_table 543 544 # Produce a structure which permits fast access to exception details. 545 exception_block_start = {} 546 exception_block_end = {} 547 exception_block_handler = {} 548 reversed_exception_table = exception_table[:] 549 reversed_exception_table.reverse() 550 551 # Later entries have wider coverage than earlier entries. 552 for exception in reversed_exception_table: 553 # Index start positions. 554 if not exception_block_start.has_key(exception.start_pc): 555 exception_block_start[exception.start_pc] = [] 556 exception_block_start[exception.start_pc].append(exception) 557 # Index end positions. 558 if not exception_block_end.has_key(exception.end_pc): 559 exception_block_end[exception.end_pc] = [] 560 exception_block_end[exception.end_pc].append(exception) 561 # Index handler positions. 562 if not exception_block_handler.has_key(exception.handler_pc): 563 exception_block_handler[exception.handler_pc] = [] 564 exception_block_handler[exception.handler_pc].append(exception) 565 566 # Process each instruction in the code. 567 while self.java_position < len(code): 568 self.position_mapping[self.java_position] = program.position 569 570 # Insert exception handling constructs. 571 block_starts = exception_block_start.get(self.java_position, []) 572 for exception in block_starts: 573 # Note that the absolute position is used. 574 if exception.catch_type == 0: 575 program.setup_finally(self.position_mapping[exception.handler_pc]) 576 else: 577 program.setup_except(self.position_mapping[exception.handler_pc]) 578 if block_starts: 579 self.in_finally = 0 580 581 # Insert exception handler details. 582 # NOTE: Ensure that pop_block is reachable by possibly inserting it at the start of finally handlers. 583 # NOTE: Insert a check for the correct exception at the start of each handler. 584 for exception in exception_block_handler.get(self.java_position, []): 585 program.end_exception() 586 if exception.catch_type == 0: 587 self.in_finally = 1 588 else: 589 program.start_handler(self.class_file.constants[exception.catch_type - 1].get_python_name()) 590 591 # Process the bytecode at the current position. 592 bytecode = ord(code[self.java_position]) 593 mnemonic, number_of_arguments = self.java_bytecodes[bytecode] 594 number_of_arguments = self.process_bytecode(mnemonic, number_of_arguments, code, program) 595 next_java_position = self.java_position + 1 + number_of_arguments 596 597 # Insert exception block end details. 598 for exception in exception_block_end.get(next_java_position, []): 599 # NOTE: Insert jump beyond handlers. 600 # NOTE: program.jump_forward/absolute(...) 601 # NOTE: Insert end finally at end of handlers as well as where "ret" occurs. 602 if exception.catch_type != 0: 603 program.pop_block() 604 605 # Only advance the JVM position after sneaking in extra Python 606 # instructions. 607 self.java_position = next_java_position 608 609 def process_bytecode(self, mnemonic, number_of_arguments, code, program): 610 if number_of_arguments is not None: 611 arguments = [] 612 for j in range(0, number_of_arguments): 613 arguments.append(ord(code[self.java_position + 1 + j])) 614 615 # Call the handler. 616 getattr(self, mnemonic)(arguments, program) 617 return number_of_arguments 618 else: 619 # Call the handler. 620 return getattr(self, mnemonic)(code[self.java_position+1:], program) 621 622 java_bytecodes = { 623 # code : (mnemonic, number of following bytes, change in stack) 624 0 : ("nop", 0), 625 1 : ("aconst_null", 0), 626 2 : ("iconst_m1", 0), 627 3 : ("iconst_0", 0), 628 4 : ("iconst_1", 0), 629 5 : ("iconst_2", 0), 630 6 : ("iconst_3", 0), 631 7 : ("iconst_4", 0), 632 8 : ("iconst_5", 0), 633 9 : ("lconst_0", 0), 634 10 : ("lconst_1", 0), 635 11 : ("fconst_0", 0), 636 12 : ("fconst_1", 0), 637 13 : ("fconst_2", 0), 638 14 : ("dconst_0", 0), 639 15 : ("dconst_1", 0), 640 16 : ("bipush", 1), 641 17 : ("sipush", 2), 642 18 : ("ldc", 1), 643 19 : ("ldc_w", 2), 644 20 : ("ldc2_w", 2), 645 21 : ("iload", 1), 646 22 : ("lload", 1), 647 23 : ("fload", 1), 648 24 : ("dload", 1), 649 25 : ("aload", 1), 650 26 : ("iload_0", 0), 651 27 : ("iload_1", 0), 652 28 : ("iload_2", 0), 653 29 : ("iload_3", 0), 654 30 : ("lload_0", 0), 655 31 : ("lload_1", 0), 656 32 : ("lload_2", 0), 657 33 : ("lload_3", 0), 658 34 : ("fload_0", 0), 659 35 : ("fload_1", 0), 660 36 : ("fload_2", 0), 661 37 : ("fload_3", 0), 662 38 : ("dload_0", 0), 663 39 : ("dload_1", 0), 664 40 : ("dload_2", 0), 665 41 : ("dload_3", 0), 666 42 : ("aload_0", 0), 667 43 : ("aload_1", 0), 668 44 : ("aload_2", 0), 669 45 : ("aload_3", 0), 670 46 : ("iaload", 0), 671 47 : ("laload", 0), 672 48 : ("faload", 0), 673 49 : ("daload", 0), 674 50 : ("aaload", 0), 675 51 : ("baload", 0), 676 52 : ("caload", 0), 677 53 : ("saload", 0), 678 54 : ("istore", 1), 679 55 : ("lstore", 1), 680 56 : ("fstore", 1), 681 57 : ("dstore", 1), 682 58 : ("astore", 1), 683 59 : ("istore_0", 0), 684 60 : ("istore_1", 0), 685 61 : ("istore_2", 0), 686 62 : ("istore_3", 0), 687 63 : ("lstore_0", 0), 688 64 : ("lstore_1", 0), 689 65 : ("lstore_2", 0), 690 66 : ("lstore_3", 0), 691 67 : ("fstore_0", 0), 692 68 : ("fstore_1", 0), 693 69 : ("fstore_2", 0), 694 70 : ("fstore_3", 0), 695 71 : ("dstore_0", 0), 696 72 : ("dstore_1", 0), 697 73 : ("dstore_2", 0), 698 74 : ("dstore_3", 0), 699 75 : ("astore_0", 0), 700 76 : ("astore_1", 0), 701 77 : ("astore_2", 0), 702 78 : ("astore_3", 0), 703 79 : ("iastore", 0), 704 80 : ("lastore", 0), 705 81 : ("fastore", 0), 706 82 : ("dastore", 0), 707 83 : ("aastore", 0), 708 84 : ("bastore", 0), 709 85 : ("castore", 0), 710 86 : ("sastore", 0), 711 87 : ("pop", 0), 712 88 : ("pop2", 0), 713 89 : ("dup", 0), 714 90 : ("dup_x1", 0), 715 91 : ("dup_x2", 0), 716 92 : ("dup2", 0), 717 93 : ("dup2_x1", 0), 718 94 : ("dup2_x2", 0), 719 95 : ("swap", 0), 720 96 : ("iadd", 0), 721 97 : ("ladd", 0), 722 98 : ("fadd", 0), 723 99 : ("dadd", 0), 724 100 : ("isub", 0), 725 101 : ("lsub", 0), 726 102 : ("fsub", 0), 727 103 : ("dsub", 0), 728 104 : ("imul", 0), 729 105 : ("lmul", 0), 730 106 : ("fmul", 0), 731 107 : ("dmul", 0), 732 108 : ("idiv", 0), 733 109 : ("ldiv", 0), 734 110 : ("fdiv", 0), 735 111 : ("ddiv", 0), 736 112 : ("irem", 0), 737 113 : ("lrem", 0), 738 114 : ("frem", 0), 739 115 : ("drem", 0), 740 116 : ("ineg", 0), 741 117 : ("lneg", 0), 742 118 : ("fneg", 0), 743 119 : ("dneg", 0), 744 120 : ("ishl", 0), 745 121 : ("lshl", 0), 746 122 : ("ishr", 0), 747 123 : ("lshr", 0), 748 124 : ("iushr", 0), 749 125 : ("lushr", 0), 750 126 : ("iand", 0), 751 127 : ("land", 0), 752 128 : ("ior", 0), 753 129 : ("lor", 0), 754 130 : ("ixor", 0), 755 131 : ("lxor", 0), 756 132 : ("iinc", 2), 757 133 : ("i2l", 0), 758 134 : ("i2f", 0), 759 135 : ("i2d", 0), 760 136 : ("l2i", 0), 761 137 : ("l2f", 0), 762 138 : ("l2d", 0), 763 139 : ("f2i", 0), 764 140 : ("f2l", 0), 765 141 : ("f2d", 0), 766 142 : ("d2i", 0), 767 143 : ("d2l", 0), 768 144 : ("d2f", 0), 769 145 : ("i2b", 0), 770 146 : ("i2c", 0), 771 147 : ("i2s", 0), 772 148 : ("lcmp", 0), 773 149 : ("fcmpl", 0), 774 150 : ("fcmpg", 0), 775 151 : ("dcmpl", 0), 776 152 : ("dcmpg", 0), 777 153 : ("ifeq", 2), 778 154 : ("ifne", 2), 779 155 : ("iflt", 2), 780 156 : ("ifge", 2), 781 157 : ("ifgt", 2), 782 158 : ("ifle", 2), 783 159 : ("if_icmpeq", 2), 784 160 : ("if_icmpne", 2), 785 161 : ("if_icmplt", 2), 786 162 : ("if_icmpge", 2), 787 163 : ("if_icmpgt", 2), 788 164 : ("if_icmple", 2), 789 165 : ("if_acmpeq", 2), 790 166 : ("if_acmpne", 2), 791 167 : ("goto", 2), 792 168 : ("jsr", 2), 793 169 : ("ret", 1), 794 170 : ("tableswitch", None), # variable number of arguments 795 171 : ("lookupswitch", None), # variable number of arguments 796 172 : ("ireturn", 0), 797 173 : ("lreturn", 0), 798 174 : ("freturn", 0), 799 175 : ("dreturn", 0), 800 176 : ("areturn", 0), 801 177 : ("return_", 0), 802 178 : ("getstatic", 2), 803 179 : ("putstatic", 2), 804 180 : ("getfield", 2), 805 181 : ("putfield", 2), 806 182 : ("invokevirtual", 2), 807 183 : ("invokespecial", 2), 808 184 : ("invokestatic", 2), 809 185 : ("invokeinterface", 4), 810 187 : ("new", 2), 811 188 : ("newarray", 1), 812 189 : ("anewarray", 2), 813 190 : ("arraylength", 0), 814 191 : ("athrow", 0), 815 192 : ("checkcast", 2), 816 193 : ("instanceof", 2), 817 194 : ("monitorenter", 0), 818 195 : ("monitorexit", 0), 819 196 : ("wide", None), # 3 or 5 arguments, stack changes according to modified element 820 197 : ("multianewarray", 3), 821 198 : ("ifnull", 2), 822 199 : ("ifnonnull", 2), 823 200 : ("goto_w", 4), 824 201 : ("jsr_w", 4), 825 } 826 827 class BytecodeDisassembler(BytecodeReader): 828 829 "A Java bytecode disassembler." 830 831 bytecode_methods = [spec[0] for spec in BytecodeReader.java_bytecodes.values()] 832 833 def __getattr__(self, name): 834 if name in self.bytecode_methods: 835 print "%5s %s" % (self.java_position, name), 836 return self.generic 837 else: 838 raise AttributeError, name 839 840 def generic(self, arguments, program): 841 print arguments 842 843 class BytecodeDisassemblerProgram: 844 position = 0 845 def setup_except(self, target): 846 print "(setup_except %s)" % target 847 def setup_finally(self, target): 848 print "(setup_finally %s)" % target 849 def end_exception(self): 850 print "(end_exception)" 851 def start_handler(self, exc_name): 852 print "(start_handler %s)" % exc_name 853 def pop_block(self): 854 print "(pop_block)" 855 856 class BytecodeTranslator(BytecodeReader): 857 858 "A Java bytecode translator which uses a Python bytecode writer." 859 860 def aaload(self, arguments, program): 861 # NOTE: No type checking performed. 862 program.binary_subscr() 863 864 def aastore(self, arguments, program): 865 # NOTE: No type checking performed. 866 # Stack: arrayref, index, value 867 program.rot_three() # Stack: value, arrayref, index 868 program.store_subscr() 869 870 def aconst_null(self, arguments, program): 871 program.load_const(None) 872 873 def aload(self, arguments, program): 874 program.load_fast(arguments[0]) 875 876 def aload_0(self, arguments, program): 877 program.load_fast(0) 878 879 def aload_1(self, arguments, program): 880 program.load_fast(1) 881 882 def aload_2(self, arguments, program): 883 program.load_fast(2) 884 885 def aload_3(self, arguments, program): 886 program.load_fast(3) 887 888 def anewarray(self, arguments, program): 889 # NOTE: Does not raise NegativeArraySizeException. 890 # NOTE: Not using the index to type the list/array. 891 index = (arguments[0] << 8) + arguments[1] 892 self._newarray(program) 893 894 def _newarray(self, program): 895 program.build_list() # Stack: count, list 896 program.rot_two() # Stack: list, count 897 program.setup_loop() 898 program.load_global("range") 899 program.load_const(0) # Stack: list, count, range, 0 900 program.rot_three() # Stack: list, 0, count, range 901 program.rot_three() # Stack: list, range, 0, count 902 program.call_function(2) # Stack: list, range_list 903 program.get_iter() # Stack: list, iter 904 program.for_iter() # Stack: list, iter, value 905 program.pop_top() # Stack: list, iter 906 program.rot_two() # Stack: iter, list 907 program.dup_top() # Stack: iter, list, list 908 program.load_attr("append") # Stack: iter, list, append 909 program.load_const(None) # Stack: iter, list, append, None 910 program.call_function(1) # Stack: iter, list, None 911 program.pop_top() # Stack: iter, list 912 program.rot_two() # Stack: list, iter 913 program.end_loop() # Back to for_iter above 914 915 def areturn(self, arguments, program): 916 program.return_value() 917 918 def arraylength(self, arguments, program): 919 program.load_global("len") # Stack: arrayref, len 920 program.rot_two() # Stack: len, arrayref 921 program.call_function(1) 922 923 def astore(self, arguments, program): 924 program.store_fast(arguments[0]) 925 926 def astore_0(self, arguments, program): 927 program.store_fast(0) 928 929 def astore_1(self, arguments, program): 930 program.store_fast(1) 931 932 def astore_2(self, arguments, program): 933 program.store_fast(2) 934 935 def astore_3(self, arguments, program): 936 program.store_fast(3) 937 938 def athrow(self, arguments, program): 939 # NOTE: NullPointerException not raised where null/None is found on the stack. 940 # If this instruction appears in a finally handler, use end_finally instead. 941 if self.in_finally: 942 program.end_finally() 943 else: 944 program.dup_top() 945 program.raise_varargs(1) 946 947 baload = aaload 948 bastore = aastore 949 950 def bipush(self, arguments, program): 951 program.load_const(arguments[0]) 952 953 caload = aaload 954 castore = aastore 955 956 def checkcast(self, arguments, program): 957 index = (arguments[0] << 8) + arguments[1] 958 target_name = self.class_file.constants[index - 1].get_python_name() 959 # NOTE: Using the string version of the name which may contain incompatible characters. 960 target_components = str(target_name).split("/") 961 962 program.dup_top() # Stack: objectref, objectref 963 program.load_global("isinstance") # Stack: objectref, objectref, isinstance 964 program.rot_two() # Stack: objectref, isinstance, objectref 965 program.load_global(target_components[0]) 966 for target_component in target_components[1:]: 967 program.load_attr(target_component) 968 program.call_function(2) # Stack: objectref 969 970 def d2f(self, arguments, program): 971 pass 972 973 def d2i(self, arguments, program): 974 program.load_global("int") # Stack: value, int 975 program.rot_two() # Stack: int, value 976 program.call_function(1) # Stack: result 977 978 d2l = d2i # Preserving Java semantics 979 980 def dadd(self, arguments, program): 981 # NOTE: No type checking performed. 982 program.binary_add() 983 984 daload = aaload 985 dastore = aastore 986 987 def dcmpg(self, arguments, program): 988 # NOTE: No type checking performed. 989 program.compare_op(">") 990 991 def dcmpl(self, arguments, program): 992 # NOTE: No type checking performed. 993 program.compare_op("<") 994 995 def dconst_0(self, arguments, program): 996 program.load_const(0.0) 997 998 def dconst_1(self, arguments, program): 999 program.load_const(1.0) 1000 1001 def ddiv(self, arguments, program): 1002 # NOTE: No type checking performed. 1003 program.binary_divide() 1004 1005 dload = aload 1006 dload_0 = aload_0 1007 dload_1 = aload_1 1008 dload_2 = aload_2 1009 dload_3 = aload_3 1010 1011 def dmul(self, arguments, program): 1012 # NOTE: No type checking performed. 1013 program.binary_multiply() 1014 1015 def dneg(self, arguments, program): 1016 # NOTE: No type checking performed. 1017 program.unary_negative() 1018 1019 def drem(self, arguments, program): 1020 # NOTE: No type checking performed. 1021 program.binary_modulo() 1022 1023 dreturn = areturn 1024 dstore = astore 1025 dstore_0 = astore_0 1026 dstore_1 = astore_1 1027 dstore_2 = astore_2 1028 dstore_3 = astore_3 1029 1030 def dsub(self, arguments, program): 1031 # NOTE: No type checking performed. 1032 program.binary_subtract() 1033 1034 def dup(self, arguments, program): 1035 program.dup_top() 1036 1037 def dup_x1(self, arguments, program): 1038 # Ignoring computational type categories. 1039 program.dup_top() 1040 program.rot_three() 1041 1042 def dup_x2(self, arguments, program): 1043 # Ignoring computational type categories. 1044 program.dup_top() 1045 program.rot_four() 1046 1047 dup2 = dup # Ignoring computational type categories 1048 dup2_x1 = dup_x1 # Ignoring computational type categories 1049 dup2_x2 = dup_x2 # Ignoring computational type categories 1050 1051 def f2d(self, arguments, program): 1052 pass # Preserving Java semantics 1053 1054 def f2i(self, arguments, program): 1055 program.load_global("int") # Stack: value, int 1056 program.rot_two() # Stack: int, value 1057 program.call_function(1) # Stack: result 1058 1059 f2l = f2i # Preserving Java semantics 1060 fadd = dadd 1061 faload = daload 1062 fastore = dastore 1063 fcmpg = dcmpg 1064 fcmpl = dcmpl 1065 fconst_0 = dconst_0 1066 fconst_1 = dconst_1 1067 1068 def fconst_2(self, arguments, program): 1069 program.load_const(2.0) 1070 1071 fdiv = ddiv 1072 fload = dload 1073 fload_0 = dload_0 1074 fload_1 = dload_1 1075 fload_2 = dload_2 1076 fload_3 = dload_3 1077 fmul = dmul 1078 fneg = dneg 1079 frem = drem 1080 freturn = dreturn 1081 fstore = dstore 1082 fstore_0 = dstore_0 1083 fstore_1 = dstore_1 1084 fstore_2 = dstore_2 1085 fstore_3 = dstore_3 1086 fsub = dsub 1087 1088 def getfield(self, arguments, program): 1089 index = (arguments[0] << 8) + arguments[1] 1090 target_name = self.class_file.constants[index - 1].get_python_name() 1091 # NOTE: Using the string version of the name which may contain incompatible characters. 1092 program.load_attr(str(target_name)) 1093 1094 def getstatic(self, arguments, program): 1095 index = (arguments[0] << 8) + arguments[1] 1096 target_name = self.class_file.constants[index - 1].get_python_name() 1097 program.load_name("self") 1098 program.load_attr("__class__") 1099 # NOTE: Using the string version of the name which may contain incompatible characters. 1100 program.load_attr(str(target_name)) 1101 1102 def goto(self, arguments, program): 1103 offset = signed2((arguments[0] << 8) + arguments[1]) 1104 java_absolute = self.java_position + offset 1105 program.jump_absolute(self.position_mapping[java_absolute]) 1106 1107 def goto_w(self, arguments, program): 1108 offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) 1109 java_absolute = self.java_position + offset 1110 program.jump_absolute(self.position_mapping[java_absolute]) 1111 1112 def i2b(self, arguments, program): 1113 pass 1114 1115 def i2c(self, arguments, program): 1116 program.load_global("chr") # Stack: value, chr 1117 program.rot_two() # Stack: chr, value 1118 program.call_function(1) # Stack: result 1119 1120 def i2d(self, arguments, program): 1121 program.load_global("float") # Stack: value, float 1122 program.rot_two() # Stack: float, value 1123 program.call_function(1) # Stack: result 1124 1125 i2f = i2d # Not distinguishing between float and double 1126 1127 def i2l(self, arguments, program): 1128 pass # Preserving Java semantics 1129 1130 def i2s(self, arguments, program): 1131 pass # Not distinguishing between int and short 1132 1133 iadd = fadd 1134 iaload = faload 1135 1136 def iand(self, arguments, program): 1137 # NOTE: No type checking performed. 1138 program.binary_and() 1139 1140 iastore = fastore 1141 1142 def iconst_m1(self, arguments, program): 1143 program.load_const(-1) 1144 1145 def iconst_0(self, arguments, program): 1146 program.load_const(0) 1147 1148 def iconst_1(self, arguments, program): 1149 program.load_const(1) 1150 1151 def iconst_2(self, arguments, program): 1152 program.load_const(2) 1153 1154 def iconst_3(self, arguments, program): 1155 program.load_const(3) 1156 1157 def iconst_4(self, arguments, program): 1158 program.load_const(4) 1159 1160 def iconst_5(self, arguments, program): 1161 program.load_const(5) 1162 1163 idiv = fdiv 1164 1165 def _if_xcmpx(self, arguments, program, op): 1166 offset = signed2((arguments[0] << 8) + arguments[1]) 1167 java_absolute = self.java_position + offset 1168 program.compare_op(op) 1169 program.jump_to_label(0, "next") # skip if false 1170 program.pop_top() 1171 program.jump_absolute(self.position_mapping[java_absolute]) 1172 program.start_label("next") 1173 program.pop_top() 1174 1175 def if_acmpeq(self, arguments, program): 1176 # NOTE: No type checking performed. 1177 self._if_xcmpx(arguments, program, "is") 1178 1179 def if_acmpne(self, arguments, program): 1180 # NOTE: No type checking performed. 1181 self._if_xcmpx(arguments, program, "is not") 1182 1183 def if_icmpeq(self, arguments, program): 1184 # NOTE: No type checking performed. 1185 self._if_xcmpx(arguments, program, "==") 1186 1187 def if_icmpne(self, arguments, program): 1188 # NOTE: No type checking performed. 1189 self._if_xcmpx(arguments, program, "!=") 1190 1191 def if_icmplt(self, arguments, program): 1192 # NOTE: No type checking performed. 1193 self._if_xcmpx(arguments, program, "<") 1194 1195 def if_icmpge(self, arguments, program): 1196 # NOTE: No type checking performed. 1197 self._if_xcmpx(arguments, program, ">=") 1198 1199 def if_icmpgt(self, arguments, program): 1200 # NOTE: No type checking performed. 1201 self._if_xcmpx(arguments, program, ">") 1202 1203 def if_icmple(self, arguments, program): 1204 # NOTE: No type checking performed. 1205 self._if_xcmpx(arguments, program, "<=") 1206 1207 def ifeq(self, arguments, program): 1208 # NOTE: No type checking performed. 1209 program.load_const(0) 1210 self._if_xcmpx(arguments, program, "==") 1211 1212 def ifne(self, arguments, program): 1213 # NOTE: No type checking performed. 1214 program.load_const(0) 1215 self._if_xcmpx(arguments, program, "!=") 1216 1217 def iflt(self, arguments, program): 1218 # NOTE: No type checking performed. 1219 program.load_const(0) 1220 self._if_xcmpx(arguments, program, "<") 1221 1222 def ifge(self, arguments, program): 1223 # NOTE: No type checking performed. 1224 program.load_const(0) 1225 self._if_xcmpx(arguments, program, ">=") 1226 1227 def ifgt(self, arguments, program): 1228 # NOTE: No type checking performed. 1229 program.load_const(0) 1230 self._if_xcmpx(arguments, program, ">") 1231 1232 def ifle(self, arguments, program): 1233 # NOTE: No type checking performed. 1234 program.load_const(0) 1235 self._if_xcmpx(arguments, program, "<=") 1236 1237 def ifnonnull(self, arguments, program): 1238 # NOTE: No type checking performed. 1239 program.load_const(None) 1240 self._if_xcmpx(arguments, program, "is not") 1241 1242 def ifnull(self, arguments, program): 1243 # NOTE: No type checking performed. 1244 program.load_const(None) 1245 self._if_xcmpx(arguments, program, "is") 1246 1247 def iinc(self, arguments, program): 1248 # NOTE: No type checking performed. 1249 program.load_fast(arguments[0]) 1250 program.load_const(arguments[1]) 1251 program.binary_add() 1252 program.store_fast(arguments[0]) 1253 1254 iload = fload 1255 iload_0 = fload_0 1256 iload_1 = fload_1 1257 iload_2 = fload_2 1258 iload_3 = fload_3 1259 imul = fmul 1260 ineg = fneg 1261 1262 def instanceof(self, arguments, program): 1263 index = (arguments[0] << 8) + arguments[1] 1264 target_name = self.class_file.constants[index - 1].get_python_name() 1265 # NOTE: Using the string version of the name which may contain incompatible characters. 1266 target_components = str(target_name).split("/") 1267 1268 program.load_global("isinstance") # Stack: objectref, isinstance 1269 program.rot_two() # Stack: isinstance, objectref 1270 program.load_global(target_components[0]) 1271 for target_component in target_components[1:]: 1272 program.load_attr(target_component) 1273 program.call_function(2) # Stack: result 1274 1275 def _invoke(self, target_name, program): 1276 # NOTE: Using the string version of the name which may contain incompatible characters. 1277 program.load_attr(str(target_name)) # Stack: tuple, method 1278 program.rot_two() # Stack: method, tuple 1279 program.load_global("apply") # Stack: method, tuple, apply 1280 program.rot_three() # Stack: apply, method, tuple 1281 program.call_function(2) 1282 1283 def invokeinterface(self, arguments, program): 1284 # NOTE: This implementation does not perform the necessary checks for 1285 # NOTE: signature-based polymorphism. 1286 # NOTE: Java rules not specifically obeyed. 1287 index = (arguments[0] << 8) + arguments[1] 1288 # NOTE: "count" == nargs + 1, apparently. 1289 count = arguments[2] - 1 1290 target_name = self.class_file.constants[index - 1].get_python_name() 1291 # Stack: objectref, arg1, arg2, ... 1292 program.build_tuple(count) # Stack: objectref, tuple 1293 program.rot_two() # Stack: tuple, objectref 1294 self._invoke(target_name, program) 1295 1296 def invokespecial(self, arguments, program): 1297 # NOTE: This implementation does not perform the necessary checks for 1298 # NOTE: signature-based polymorphism. 1299 # NOTE: Java rules not specifically obeyed. 1300 index = (arguments[0] << 8) + arguments[1] 1301 target = self.class_file.constants[index - 1] 1302 original_name = target.get_name() 1303 target_name = target.get_python_name() 1304 # Get the number of parameters from the descriptor. 1305 count = len(target.get_descriptor()[0]) 1306 1307 # The stack may contain one of the following patterns: 1308 # Stack: classref, arg1, arg2, ... 1309 # Stack: objectref, arg1, arg2, ... 1310 # method == __init__, classref -> classref(arg1, arg2, ...) 1311 # method == __init__, objectref == self -> cls.bases[0].__init__(objectref, arg1, arg2, ...) 1312 # method == __init__, objectref != self -> should not occur 1313 # method != __init__, classref -> classref.method(classref, arg1, arg2, ...) 1314 # method != __init__, objectref == self -> cls.bases[0].method(objectref, arg1, arg2, ...) 1315 # method != __init__, objectref != self -> should not occur 1316 1317 # First, we build a tuple of the reference and arguments. 1318 program.build_tuple(count + 1) # Stack: tuple 1319 1320 # Then, we test the nature of the reference. 1321 program.dup_top() # Stack: tuple, tuple 1322 program.load_const(0) # Stack: tuple, tuple, 0 1323 program.binary_subscr() # Stack: tuple, reference 1324 program.dup_top() # Stack: tuple, reference, reference 1325 1326 # Is it self? 1327 program.load_fast(0) # Stack: tuple, reference, reference, self 1328 program.compare_op("is") # Stack: tuple, reference, result 1329 program.jump_to_label(1, "is-self") 1330 program.pop_top() # Stack: tuple, reference 1331 1332 # Is another class or reference. 1333 # NOTE: Reference case not covered! 1334 if str(original_name) == "<init>": 1335 program.rot_two() # Stack: reference, tuple 1336 program.load_const(1) # Stack: reference, tuple, 1 1337 program.slice_1() # Stack: reference, tuple[1:] 1338 program.load_global("apply") # Stack: reference, tuple, apply 1339 program.rot_three() # Stack: apply, reference, tuple 1340 program.call_function(2) 1341 # NOTE: Combinations of new, dup tend to produce interfering extra 1342 # NOTE: class references. 1343 program.rot_two() # Stack: objectref, classref 1344 program.pop_top() 1345 program.jump_to_label(None, "done") 1346 else: 1347 self._invoke(target_name, program) 1348 program.jump_to_label(None, "done") 1349 1350 # Is self. 1351 program.start_label("is-self") 1352 program.pop_top() # Stack: tuple, reference 1353 program.pop_top() # Stack: tuple 1354 # Get the class name instead of the fully qualified name. 1355 full_class_name = str(self.class_file.this_class.get_python_name()) 1356 class_name = full_class_name.split(".")[-1] 1357 program.load_global(class_name) # Stack: tuple, classref 1358 program.load_attr("__bases__") # Stack: tuple, bases 1359 program.dup_top() # Stack: tuple, bases, bases 1360 program.load_global("len") # Stack: tuple, bases, bases, len 1361 program.rot_two() # Stack: tuple, bases, len, bases 1362 program.call_function(1) # Stack: tuple, bases, #bases 1363 program.load_const(0) # Stack: tuple, bases, #bases, 0 1364 program.compare_op("==") # Stack: tuple, bases, result 1365 program.jump_to_label(1, "no-bases") 1366 program.pop_top() # Stack: tuple, bases 1367 program.load_const(0) # Stack: tuple, bases, 0 1368 program.binary_subscr() # Stack: tuple, bases[0] 1369 self._invoke(target_name, program) 1370 program.jump_to_label(None, "done") 1371 1372 # No bases found, do no invocation. 1373 program.start_label("no-bases") 1374 program.pop_top() # Stack: tuple, bases 1375 program.pop_top() # Stack: tuple 1376 program.pop_top() # Stack: 1377 program.start_label("done") 1378 1379 def invokestatic(self, arguments, program): 1380 # NOTE: This implementation does not perform the necessary checks for 1381 # NOTE: signature-based polymorphism. 1382 # NOTE: Java rules not specifically obeyed. 1383 index = (arguments[0] << 8) + arguments[1] 1384 target = self.class_file.constants[index - 1] 1385 target_name = target.get_python_name() 1386 # Get the number of parameters from the descriptor. 1387 count = len(target.get_descriptor()[0]) 1388 # Stack: arg1, arg2, ... 1389 program.build_tuple(count) # Stack: tuple 1390 # Use the class to provide access to static methods. 1391 program.load_name("self") # Stack: tuple, self 1392 program.load_attr("__class__") # Stack: tuple, class 1393 self._invoke(target_name, program) 1394 1395 def invokevirtual (self, arguments, program): 1396 # NOTE: This implementation does not perform the necessary checks for 1397 # NOTE: signature-based polymorphism. 1398 # NOTE: Java rules not specifically obeyed. 1399 index = (arguments[0] << 8) + arguments[1] 1400 target = self.class_file.constants[index - 1] 1401 target_name = target.get_python_name() 1402 # Get the number of parameters from the descriptor. 1403 count = len(target.get_descriptor()[0]) 1404 # Stack: objectref, arg1, arg2, ... 1405 program.build_tuple(count) # Stack: objectref, tuple 1406 program.rot_two() # Stack: tuple, objectref 1407 self._invoke(target_name, program) 1408 1409 def ior(self, arguments, program): 1410 # NOTE: No type checking performed. 1411 program.binary_or() 1412 1413 irem = frem 1414 ireturn = freturn 1415 1416 def ishl(self, arguments, program): 1417 # NOTE: No type checking performed. 1418 # NOTE: Not verified. 1419 program.binary_lshift() 1420 1421 def ishr(self, arguments, program): 1422 # NOTE: No type checking performed. 1423 # NOTE: Not verified. 1424 program.binary_rshift() 1425 1426 istore = fstore 1427 istore_0 = fstore_0 1428 istore_1 = fstore_1 1429 istore_2 = fstore_2 1430 istore_3 = fstore_3 1431 isub = fsub 1432 iushr = ishr # Ignoring distinctions between arithmetic and logical shifts 1433 1434 def ixor(self, arguments, program): 1435 # NOTE: No type checking performed. 1436 program.binary_xor() 1437 1438 def jsr(self, arguments, program): 1439 offset = signed2((arguments[0] << 8) + arguments[1]) 1440 java_absolute = self.java_position + offset 1441 # Store the address of the next instruction. 1442 program.load_const_ret(self.position_mapping[self.java_position + 3]) 1443 program.jump_absolute(self.position_mapping[java_absolute]) 1444 1445 def jsr_w(self, arguments, program): 1446 offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) 1447 java_absolute = self.java_position + offset 1448 # Store the address of the next instruction. 1449 program.load_const_ret(self.position_mapping[self.java_position + 5]) 1450 program.jump_absolute(self.position_mapping[java_absolute]) 1451 1452 l2d = i2d 1453 l2f = i2f 1454 1455 def l2i(self, arguments, program): 1456 pass # Preserving Java semantics 1457 1458 ladd = iadd 1459 laload = iaload 1460 land = iand 1461 lastore = iastore 1462 1463 def lcmp(self, arguments, program): 1464 # NOTE: No type checking performed. 1465 program.dup_topx(2) # Stack: value1, value2, value1, value2 1466 program.compare_op(">") # Stack: value1, value2, result 1467 program.jump_to_label(0, "equals") 1468 # True - produce result and branch. 1469 program.pop_top() # Stack: value1, value2 1470 program.pop_top() # Stack: value1 1471 program.pop_top() # Stack: 1472 program.load_const(1) # Stack: 1 1473 program.jump_to_label(None, "next") 1474 # False - test equality. 1475 program.start_label("equals") 1476 program.pop_top() # Stack: value1, value2 1477 program.dup_topx(2) # Stack: value1, value2, value1, value2 1478 program.compare_op("==") # Stack: value1, value2, result 1479 program.jump_to_label(0, "less") 1480 # True - produce result and branch. 1481 program.pop_top() # Stack: value1, value2 1482 program.pop_top() # Stack: value1 1483 program.pop_top() # Stack: 1484 program.load_const(0) # Stack: 0 1485 program.jump_to_label(None, "next") 1486 # False - produce result. 1487 program.start_label("less") 1488 program.pop_top() # Stack: value1, value2 1489 program.pop_top() # Stack: value1 1490 program.pop_top() # Stack: 1491 program.load_const(-1) # Stack: -1 1492 program.start_label("next") 1493 1494 lconst_0 = iconst_0 1495 lconst_1 = iconst_1 1496 1497 def ldc(self, arguments, program): 1498 program.load_const(self.class_file.constants[arguments[0] - 1]) 1499 1500 def ldc_w(self, arguments, program): 1501 program.load_const(self.class_file.constants[(arguments[0] << 8) + arguments[1] - 1]) 1502 1503 ldc2_w = ldc_w 1504 ldiv = idiv 1505 lload = iload 1506 lload_0 = iload_0 1507 lload_1 = iload_1 1508 lload_2 = iload_2 1509 lload_3 = iload_3 1510 lmul = imul 1511 lneg = ineg 1512 1513 def lookupswitch(self, arguments, program): 1514 # Find the offset to the next 4 byte boundary in the code. 1515 d, r = divmod(self.java_position, 4) 1516 to_boundary = (4 - r) % 4 1517 # Get the pertinent arguments. 1518 arguments = arguments[to_boundary:] 1519 default = (arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3] 1520 npairs = (arguments[4] << 24) + (arguments[5] << 16) + (arguments[6] << 8) + arguments[7] 1521 # Process the pairs. 1522 # NOTE: This is not the most optimal implementation. 1523 pair_index = 8 1524 for pair in range(0, npairs): 1525 match = ((arguments[pair_index] << 24) + (arguments[pair_index + 1] << 16) + 1526 (arguments[pair_index + 2] << 8) + arguments[pair_index + 3]) 1527 offset = signed4((arguments[pair_index + 4] << 24) + (arguments[pair_index + 5] << 16) + 1528 (arguments[pair_index + 6] << 8) + arguments[pair_index + 7]) 1529 # Calculate the branch target. 1530 java_absolute = self.java_position + offset 1531 # Generate branching code. 1532 program.dup_top() # Stack: key, key 1533 program.load_const(match) # Stack: key, key, match 1534 program.compare_op("==") # Stack: key, result 1535 program.jump_to_label(0, "end") 1536 program.pop_top() # Stack: key 1537 program.pop_top() # Stack: 1538 program.jump_absolute(self.position_mapping[java_absolute]) 1539 # Generate the label for the end of the branching code. 1540 program.start_label("end") 1541 program.pop_top() # Stack: key 1542 # Update the index. 1543 pair_index += 8 1544 # Generate the default. 1545 java_absolute = self.java_position + default 1546 program.jump_absolute(self.position_mapping[java_absolute]) 1547 1548 lor = ior 1549 lrem = irem 1550 lreturn = ireturn 1551 lshl = ishl 1552 lshr = ishr 1553 lstore = istore 1554 lstore_0 = istore_0 1555 lstore_1 = istore_1 1556 lstore_2 = istore_2 1557 lstore_3 = istore_3 1558 lsub = isub 1559 lushr = iushr 1560 lxor = ixor 1561 1562 def monitorenter(self, arguments, program): 1563 # NOTE: To be implemented. 1564 pass 1565 1566 def monitorexit(self, arguments, program): 1567 # NOTE: To be implemented. 1568 pass 1569 1570 def multianewarray(self, arguments, program): 1571 # NOTE: To be implemented. 1572 pass 1573 1574 def new(self, arguments, program): 1575 # This operation is considered to be the same as the calling of the 1576 # initialisation method of the given class with no arguments. 1577 index = (arguments[0] << 8) + arguments[1] 1578 target_name = self.class_file.constants[index - 1].get_python_name() 1579 # NOTE: Using the string version of the name which may contain incompatible characters. 1580 program.load_global(str(target_name)) 1581 # NOTE: Unlike Java, we do not provide an object reference. Instead, a 1582 # NOTE: class reference is provided, and the invokespecial method's 1583 # NOTE: behaviour is changed. 1584 #program.call_function(0) 1585 1586 def newarray(self, arguments, program): 1587 # NOTE: Does not raise NegativeArraySizeException. 1588 # NOTE: Not using the arguments to type the list/array. 1589 self._newarray(program) 1590 1591 def nop(self, arguments, program): 1592 pass 1593 1594 def pop(self, arguments, program): 1595 program.pop_top() 1596 1597 pop2 = pop # ignoring Java stack value distinctions 1598 1599 def putfield(self, arguments, program): 1600 index = (arguments[0] << 8) + arguments[1] 1601 target_name = self.class_file.constants[index - 1].get_python_name() 1602 program.rot_two() 1603 # NOTE: Using the string version of the name which may contain incompatible characters. 1604 program.store_attr(str(target_name)) 1605 1606 def putstatic(self, arguments, program): 1607 index = (arguments[0] << 8) + arguments[1] 1608 target_name = self.class_file.constants[index - 1].get_python_name() 1609 program.load_name("self") 1610 program.load_attr("__class__") 1611 # NOTE: Using the string version of the name which may contain incompatible characters. 1612 program.store_attr(str(target_name)) 1613 1614 def ret(self, arguments, program): 1615 program.ret(arguments[0]) 1616 # Indicate that the finally handler is probably over. 1617 # NOTE: This is seemingly not guaranteed. 1618 self.in_finally = 0 1619 1620 def return_(self, arguments, program): 1621 program.load_const(None) 1622 program.return_value() 1623 1624 saload = laload 1625 sastore = lastore 1626 1627 def sipush(self, arguments, program): 1628 program.load_const((arguments[0] << 8) + arguments[1]) 1629 1630 def swap(self, arguments, program): 1631 program.rot_two() 1632 1633 def tableswitch(self, arguments, program): 1634 # Find the offset to the next 4 byte boundary in the code. 1635 d, r = divmod(self.java_position, 4) 1636 to_boundary = (4 - r) % 4 1637 # Get the pertinent arguments. 1638 arguments = arguments[to_boundary:] 1639 default = (arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3] 1640 low = (arguments[4] << 24) + (arguments[5] << 16) + (arguments[6] << 8) + arguments[7] 1641 high = (arguments[8] << 24) + (arguments[9] << 16) + (arguments[10] << 8) + arguments[11] 1642 # Process the jump entries. 1643 # NOTE: This is not the most optimal implementation. 1644 jump_index = 8 1645 for jump in range(low, high + 1): 1646 offset = signed4((arguments[jump_index] << 24) + (arguments[jump_index + 1] << 16) + 1647 (arguments[jump_index + 2] << 8) + arguments[jump_index + 3]) 1648 # Calculate the branch target. 1649 java_absolute = self.java_position + offset 1650 # Generate branching code. 1651 program.dup_top() # Stack: key, key 1652 program.load_const(jump) # Stack: key, key, jump 1653 program.compare_op("==") # Stack: key, result 1654 program.jump_to_label(0, "end") 1655 program.pop_top() # Stack: key 1656 program.pop_top() # Stack: 1657 program.jump_absolute(self.position_mapping[java_absolute]) 1658 # Generate the label for the end of the branching code. 1659 program.start_label("end") 1660 program.pop_top() # Stack: key 1661 # Update the index. 1662 jump_index += 8 1663 # Generate the default. 1664 java_absolute = self.java_position + default 1665 program.jump_absolute(self.position_mapping[java_absolute]) 1666 1667 def wide(self, code, program): 1668 # NOTE: To be implemented. 1669 return number_of_arguments 1670 1671 def disassemble(class_file, method): 1672 disassembler = BytecodeDisassembler(class_file) 1673 disassembler.process(method, BytecodeDisassemblerProgram()) 1674 1675 class ClassTranslator: 1676 1677 """ 1678 A class which provides a wrapper around a class file and the means to 1679 translate the represented class into a Python class. 1680 """ 1681 1682 def __init__(self, class_file): 1683 1684 "Initialise the object with the given 'class_file'." 1685 1686 self.class_file = class_file 1687 self.filename = str(self.class_file.attributes[0].get_name()) 1688 1689 def translate_method(self, method): 1690 1691 "Translate the given 'method' - an object obtained from the class file." 1692 1693 translator = BytecodeTranslator(self.class_file) 1694 writer = BytecodeWriter() 1695 translator.process(method, writer) 1696 return translator, writer 1697 1698 def make_method(self, method_name, methods, global_names, namespace): 1699 1700 """ 1701 Make a dispatcher method with the given 'method_name', providing 1702 dispatch to the supplied type-sensitive 'methods', accessing the given 1703 'global_names' where necessary, and storing the new method in the 1704 'namespace' provided. 1705 """ 1706 1707 if method_name == "<init>": 1708 method_name = "__init__" 1709 # Where only one method exists, just make an alias. 1710 if len(methods) == 1: 1711 method, fn = methods[0] 1712 namespace[method_name] = fn 1713 return 1714 # Find the maximum number of parameters involved. 1715 #maximum = max([len(method.get_descriptor()[0]) for method in methods]) 1716 program = BytecodeWriter() 1717 # NOTE: The code below should use dictionary-based dispatch for better performance. 1718 program.load_fast(1) # Stack: arguments 1719 for method, fn in methods: 1720 program.dup_top() # Stack: arguments, arguments 1721 program.load_const(1) 1722 program.store_fast(2) # found = 1 1723 program.setup_loop() 1724 # Emit a list of parameter types. 1725 descriptor_types = method.get_descriptor()[0] 1726 for descriptor_type in descriptor_types: 1727 base_type, object_type, array_type = descriptor_type 1728 python_type = classfile.descriptor_base_type_mapping[base_type] 1729 if python_type == "instance": 1730 # NOTE: This will need extending. 1731 python_type = object_type 1732 program.load_global(python_type) # Stack: arguments, type, ... 1733 program.build_list(len(descriptor_types)) 1734 # Stack: arguments, types 1735 # Make a map of arguments and types. 1736 program.load_const(None) # Stack: arguments, types, None 1737 program.rot_three() # Stack: None, arguments, types 1738 program.build_tuple(3) # Stack: tuple 1739 program.load_global("map") # Stack: tuple, map 1740 program.rot_two() # Stack: map, tuple 1741 program.load_global("apply") # Stack: map, tuple, apply 1742 program.rot_three() # Stack: apply, map, tuple 1743 program.call_function(2) # Stack: tuple (mapping arguments to types) 1744 # Loop over each pair. 1745 program.get_iter() # Stack: iter 1746 program.for_iter() # Stack: iter, (argument, type) 1747 program.unpack_sequence(2) # Stack: iter, type, argument 1748 program.dup_top() # Stack: iter, type, argument, argument 1749 program.load_const(None) # Stack: iter, type, argument, argument, None 1750 program.compare_op("is") # Stack: iter, type, argument, result 1751 # Missing argument? 1752 program.jump_to_label(0, "present") 1753 program.pop_top() # Stack: iter, type, argument 1754 program.pop_top() # Stack: iter, type 1755 program.pop_top() # Stack: iter 1756 program.load_const(0) 1757 program.store_fast(2) # found = 0 1758 program.break_loop() 1759 # Argument was present. 1760 program.start_label("present") 1761 program.pop_top() # Stack: iter, type, argument 1762 program.rot_two() # Stack: iter, argument, type 1763 program.dup_top() # Stack: iter, argument, type, type 1764 program.load_const(None) # Stack: iter, argument, type, type, None 1765 program.compare_op("is") # Stack: iter, argument, type, result 1766 # Missing parameter type? 1767 program.jump_to_label(0, "present") 1768 program.pop_top() # Stack: iter, argument, type 1769 program.pop_top() # Stack: iter, argument 1770 program.pop_top() # Stack: iter 1771 program.load_const(0) 1772 program.store_fast(2) # found = 0 1773 program.break_loop() 1774 # Parameter was present. 1775 program.start_label("present") 1776 program.pop_top() # Stack: iter, argument, type 1777 program.build_tuple(2) # Stack: iter, (argument, type) 1778 program.load_global("isinstance") # Stack: iter, (argument, type), isinstance 1779 program.rot_two() # Stack: iter, isinstance, (argument, type) 1780 program.load_global("apply") # Stack: iter, isinstance, (argument, type), apply 1781 program.rot_three() # Stack: iter, apply, isinstance, (argument, type) 1782 program.call_function(2) # Stack: iter, result 1783 program.jump_to_label(1, "match") 1784 program.pop_top() # Stack: iter 1785 program.load_const(0) 1786 program.store_fast(2) # found = 0 1787 program.break_loop() 1788 # Argument type and parameter type matched. 1789 program.start_label("match") 1790 program.pop_top() # Stack: iter 1791 program.end_loop() # Stack: iter 1792 # If all the parameters matched, call the method. 1793 program.load_fast(2) # Stack: iter, match 1794 program.jump_to_label(0, "failed") 1795 # All the parameters matched. 1796 program.pop_top() # Stack: iter 1797 program.load_fast(1) # Stack: arguments 1798 program.load_fast(0) # Stack: arguments, self 1799 program.load_attr(str(method.get_python_name())) 1800 # Stack: arguments, method 1801 program.rot_two() # Stack: method, arguments 1802 program.load_global("apply") # Stack: method, arguments, apply 1803 program.rot_three() # Stack: apply, method, arguments 1804 program.call_function(2) # Stack: result 1805 program.return_value() 1806 # Try the next method if arguments or parameters were missing or incorrect. 1807 program.start_label("failed") 1808 program.pop_top() # Stack: iter 1809 program.pop_top() # Stack: 1810 # Raise an exception if nothing matched. 1811 # NOTE: Improve this. 1812 program.load_const("No matching method") 1813 program.raise_varargs(1) 1814 program.load_const(None) 1815 program.return_value() 1816 1817 # Add the code as a method in the namespace. 1818 # NOTE: One actual parameter, flags as 71 apparently means that a list 1819 # NOTE: parameter is used in a method. 1820 nlocals = program.max_locals + 1 1821 code = new.code(1, nlocals, program.max_stack_depth, 71, program.get_output(), 1822 tuple(program.get_constants()), tuple(program.get_names()), tuple(self.make_varnames(nlocals)), 1823 self.filename, method_name, 0, "") 1824 fn = new.function(code, global_names) 1825 namespace[method_name] = fn 1826 1827 def process(self, global_names): 1828 1829 """ 1830 Process the class, storing it in the 'global_names' dictionary provided. 1831 """ 1832 1833 namespace = {} 1834 real_methods = {} 1835 for method in self.class_file.methods: 1836 t, w = self.translate_method(method) 1837 nlocals = w.max_locals + 1 1838 nargs = len(method.get_descriptor()[0]) + 1 1839 method_name = str(method.get_python_name()) 1840 # NOTE: Add line number table later. 1841 code = new.code(nargs, nlocals, w.max_stack_depth, 67, w.get_output(), tuple(w.get_constants()), tuple(w.get_names()), 1842 tuple(self.make_varnames(nlocals)), self.filename, method_name, 0, "") 1843 # NOTE: May need more globals. 1844 fn = new.function(code, global_names) 1845 namespace[method_name] = fn 1846 real_method_name = str(method.get_name()) 1847 if not real_methods.has_key(real_method_name): 1848 real_methods[real_method_name] = [] 1849 real_methods[real_method_name].append((method, fn)) 1850 # NOTE: Define superclasses properly. 1851 if str(self.class_file.super_class.get_name()) not in ("java/lang/Object", "java/lang/Exception"): 1852 bases = (global_names[str(self.class_file.super_class.get_python_name())],) 1853 else: 1854 bases = () 1855 # Define method dispatchers. 1856 for real_method_name, methods in real_methods.items(): 1857 self.make_method(real_method_name, methods, global_names, namespace) 1858 # Use only the last part of the fully qualified name. 1859 full_class_name = str(self.class_file.this_class.get_python_name()) 1860 class_name = full_class_name.split(".")[-1] 1861 cls = new.classobj(class_name, bases, namespace) 1862 global_names[cls.__name__] = cls 1863 return cls 1864 1865 def make_varnames(self, nlocals): 1866 1867 """ 1868 A utility method which invents variable names for the given number - 1869 'nlocals' - of local variables in a method. Returns a list of such 1870 variable names. 1871 """ 1872 1873 l = ["self"] 1874 for i in range(1, nlocals): 1875 l.append("_l%s" % i) 1876 return l[:nlocals] 1877 1878 def _map(*args): 1879 print args 1880 return apply(__builtins__.map, args) 1881 1882 def _isinstance(*args): 1883 print args 1884 return apply(__builtins__.isinstance, args) 1885 1886 if __name__ == "__main__": 1887 import sys 1888 import dis 1889 global_names = {} 1890 global_names.update(__builtins__.__dict__) 1891 #global_names["isinstance"] = _isinstance 1892 #global_names["map"] = _map 1893 for filename in sys.argv[1:]: 1894 f = open(filename, "rb") 1895 c = classfile.ClassFile(f.read()) 1896 translator = ClassTranslator(c) 1897 cls = translator.process(global_names) 1898 1899 # vim: tabstop=4 expandtab shiftwidth=4