1 #!/usr/bin/env python 2 3 """ 4 Java bytecode conversion. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/Instructions2.doc.html 6 7 NOTE: Synchronized constructs are not actually supported. 8 """ 9 10 import classfile 11 from dis import opmap, cmp_op # for access to Python bytecode values and operators 12 from UserDict import UserDict 13 import new 14 15 # Bytecode production classes. 16 17 class BytecodeWriter: 18 19 "A Python bytecode writer." 20 21 def __init__(self): 22 # A stack of loop start instructions corresponding to loop blocks. 23 self.loops = [] 24 25 # A stack of loop block or exception block start positions. 26 self.blocks = [] 27 28 # A stack of exception block handler pointers. 29 self.exception_handlers = [] 30 31 # A dictionary mapping labels to jump instructions referencing such labels. 32 self.jumps = {} 33 34 # The output values, including "lazy" subvalues which will need evaluating. 35 self.output = [] 36 37 # The current Python bytecode instruction position. 38 self.position = 0 39 40 # Stack depth estimation. 41 self.stack_depth = 0 42 self.max_stack_depth = 0 43 44 # Local variable estimation. 45 self.max_locals = 0 46 47 # Mapping from values to indexes. 48 self.constants = {} 49 50 # Mapping from names to indexes. 51 # NOTE: This may be acquired from elsewhere. 52 #self.globals = {} 53 54 # Mapping from names to indexes. 55 self.names = {} 56 57 # A list of constants used as exception handler return addresses. 58 self.constants_for_exceptions = [] 59 60 def get_output(self): 61 output = [] 62 for element in self.output: 63 if isinstance(element, LazySubValue): 64 value = element.value 65 else: 66 value = element 67 # NOTE: ValueError gets raised for bad values here. 68 output.append(chr(value)) 69 return "".join(output) 70 71 def get_constants(self): 72 l = self._get_list(self._invert(self.constants)) 73 result = [] 74 for i in l: 75 if isinstance(i, LazyValue): 76 result.append(i.get_value()) 77 else: 78 result.append(i) 79 return result 80 81 #def get_globals(self): 82 # return self._get_list(self._invert(self.globals)) 83 84 def get_names(self): 85 return self._get_list(self._invert(self.names)) 86 87 def _invert(self, d): 88 inverted = {} 89 for k, v in d.items(): 90 inverted[v] = k 91 return inverted 92 93 def _get_list(self, d): 94 l = [] 95 for i in range(0, len(d.keys())): 96 l.append(d[i]) 97 return l 98 99 # Administrative methods. 100 101 def update_stack_depth(self, change): 102 self.stack_depth += change 103 if self.stack_depth > self.max_stack_depth: 104 self.max_stack_depth = self.stack_depth 105 106 def update_locals(self, index): 107 if index > self.max_locals: 108 self.max_locals = index 109 110 # Special methods. 111 112 def _write_value(self, value): 113 if isinstance(value, LazyValue): 114 # NOTE: Assume a 16-bit value. 115 self.output.append(value.values[0]) 116 self.output.append(value.values[1]) 117 self.position += 2 118 elif value <= 0xffff: 119 self.output.append(value & 0xff) 120 self.output.append((value & 0xff00) >> 8) 121 self.position += 2 122 else: 123 # NOTE: EXTENDED_ARG not yet supported. 124 raise ValueError, value 125 126 def _rewrite_value(self, position, value): 127 # NOTE: Assume a 16-bit value. 128 if value <= 0xffff: 129 self.output[position] = (value & 0xff) 130 self.output[position + 1] = ((value & 0xff00) >> 8) 131 else: 132 # NOTE: EXTENDED_ARG not yet supported. 133 raise ValueError, value 134 135 def setup_loop(self): 136 self.loops.append(self.position) 137 self.output.append(opmap["SETUP_LOOP"]) 138 self.position += 1 139 self._write_value(0) # To be filled in later 140 141 def end_loop(self): 142 current_loop_start = self.loops.pop() 143 current_loop_real_start = self.blocks.pop() 144 #print "<", self.blocks, current_loop_real_start 145 # Fix the iterator delta. 146 # NOTE: Using 3 as the assumed length of the FOR_ITER instruction. 147 self.jump_absolute(current_loop_real_start) 148 self._rewrite_value(current_loop_real_start + 1, self.position - current_loop_real_start - 3) 149 self.pop_block() 150 # Fix the loop delta. 151 # NOTE: Using 3 as the assumed length of the SETUP_LOOP instruction. 152 self._rewrite_value(current_loop_start + 1, self.position - current_loop_start - 3) 153 154 def jump_to_label(self, status, name): 155 # Record the instruction using the jump. 156 jump_instruction = self.position 157 if status is None: 158 self.jump_forward() 159 elif status: 160 self.jump_if_true() 161 else: 162 self.jump_if_false() 163 # Record the following instruction, too. 164 if not self.jumps.has_key(name): 165 self.jumps[name] = [] 166 self.jumps[name].append((jump_instruction, self.position)) 167 168 def start_label(self, name): 169 # Fill in all jump instructions. 170 for jump_instruction, following_instruction in self.jumps[name]: 171 self._rewrite_value(jump_instruction + 1, self.position - following_instruction) 172 del self.jumps[name] 173 174 def load_const_ret(self, value): 175 self.constants_for_exceptions.append(value) 176 self.load_const(value) 177 178 def ret(self, index): 179 self.load_fast(index) 180 # Previously, the constant stored on the stack by jsr/jsr_w was stored 181 # in a local variable. In the JVM, extracting the value from the local 182 # variable and jumping can be done at runtime. In the Python VM, any 183 # jump target must be known in advance and written into the bytecode. 184 for constant in self.constants_for_exceptions: 185 self.dup_top() # Stack: actual-address, actual-address 186 self.load_const(constant) # Stack: actual-address, actual-address, suggested-address 187 self.compare_op("==") # Stack: actual-address, result 188 self.jump_to_label(0, "const") 189 self.pop_top() # Stack: actual-address 190 self.pop_top() # Stack: 191 self.jump_absolute(constant) 192 self.start_label("const") 193 self.pop_top() # Stack: actual-address 194 # NOTE: If we get here, something is really wrong. 195 self.pop_top() # Stack: 196 197 def setup_except(self, target): 198 self.blocks.append(self.position) 199 self.exception_handlers.append(target) 200 #print "-", self.position, target 201 self.output.append(opmap["SETUP_EXCEPT"]) 202 self.position += 1 203 self._write_value(0) # To be filled in later 204 205 def setup_finally(self, target): 206 self.blocks.append(self.position) 207 self.exception_handlers.append(target) 208 #print "-", self.position, target 209 self.output.append(opmap["SETUP_FINALLY"]) 210 self.position += 1 211 self._write_value(0) # To be filled in later 212 213 def end_exception(self): 214 current_exception_start = self.blocks.pop() 215 # Convert the "lazy" absolute value. 216 current_exception_target = self.exception_handlers.pop() 217 target = current_exception_target.get_value() 218 #print "*", current_exception_start, target 219 # NOTE: Using 3 as the assumed length of the SETUP_* instruction. 220 self._rewrite_value(current_exception_start + 1, target - current_exception_start - 3) 221 222 def start_handler(self, exc_name): 223 # Where handlers are begun, produce bytecode to test the type of 224 # the exception. 225 self.dup_top() # Stack: exception, exception 226 self.load_global(str(exc_name)) # Stack: exception, exception, handled-exception 227 self.compare_op("exception match") # Stack: exception, result 228 self.jump_to_label(1, "handler") 229 self.pop_top() 230 self.end_finally() 231 self.start_label("handler") 232 self.pop_top() 233 234 # Complicated methods. 235 236 def load_const(self, value): 237 self.output.append(opmap["LOAD_CONST"]) 238 if not self.constants.has_key(value): 239 self.constants[value] = len(self.constants.keys()) 240 self.position += 1 241 self._write_value(self.constants[value]) 242 self.update_stack_depth(1) 243 244 def load_global(self, name): 245 self.output.append(opmap["LOAD_GLOBAL"]) 246 if not self.names.has_key(name): 247 self.names[name] = len(self.names.keys()) 248 self.position += 1 249 self._write_value(self.names[name]) 250 self.update_stack_depth(1) 251 252 def load_attr(self, name): 253 self.output.append(opmap["LOAD_ATTR"]) 254 if not self.names.has_key(name): 255 self.names[name] = len(self.names.keys()) 256 self.position += 1 257 self._write_value(self.names[name]) 258 259 def load_name(self, name): 260 self.output.append(opmap["LOAD_NAME"]) 261 if not self.names.has_key(name): 262 self.names[name] = len(self.names.keys()) 263 self.position += 1 264 self._write_value(self.names[name]) 265 self.update_stack_depth(1) 266 267 def load_fast(self, index): 268 self.output.append(opmap["LOAD_FAST"]) 269 self.position += 1 270 self._write_value(index) 271 self.update_stack_depth(1) 272 self.update_locals(index) 273 274 def store_attr(self, name): 275 self.output.append(opmap["STORE_ATTR"]) 276 if not self.names.has_key(name): 277 self.names[name] = len(self.names.keys()) 278 self.position += 1 279 self._write_value(self.names[name]) 280 self.update_stack_depth(-1) 281 282 def store_fast(self, index): 283 self.output.append(opmap["STORE_FAST"]) 284 self.position += 1 285 self._write_value(index) 286 self.update_stack_depth(-1) 287 self.update_locals(index) 288 289 def for_iter(self): 290 self.blocks.append(self.position) 291 #print ">", self.blocks 292 self.output.append(opmap["FOR_ITER"]) 293 self.position += 1 294 self._write_value(0) # To be filled in later 295 self.update_stack_depth(1) 296 297 def break_loop(self): 298 self.output.append(opmap["BREAK_LOOP"]) 299 self.position += 1 300 self.jump_absolute(self.blocks[-1]) 301 302 # Normal bytecode generators. 303 304 def get_iter(self): 305 self.output.append(opmap["GET_ITER"]) 306 self.position += 1 307 308 def jump_if_false(self, offset=0): 309 self.output.append(opmap["JUMP_IF_FALSE"]) 310 self.position += 1 311 self._write_value(offset) # May be filled in later 312 313 def jump_if_true(self, offset=0): 314 self.output.append(opmap["JUMP_IF_TRUE"]) 315 self.position += 1 316 self._write_value(offset) # May be filled in later 317 318 def jump_forward(self, offset=0): 319 self.output.append(opmap["JUMP_FORWARD"]) 320 self.position += 1 321 self._write_value(offset) # May be filled in later 322 323 def jump_absolute(self, address=0): 324 self.output.append(opmap["JUMP_ABSOLUTE"]) 325 self.position += 1 326 self._write_value(address) # May be filled in later 327 328 def build_tuple(self, count): 329 self.output.append(opmap["BUILD_TUPLE"]) 330 self.position += 1 331 self._write_value(count) 332 self.update_stack_depth(-(count - 1)) 333 334 def build_list(self, count): 335 self.output.append(opmap["BUILD_LIST"]) 336 self.position += 1 337 self._write_value(count) 338 self.update_stack_depth(-(count - 1)) 339 340 def pop_top(self): 341 self.output.append(opmap["POP_TOP"]) 342 self.position += 1 343 self.update_stack_depth(-1) 344 345 def dup_top(self): 346 self.output.append(opmap["DUP_TOP"]) 347 self.position += 1 348 self.update_stack_depth(1) 349 350 def rot_two(self): 351 self.output.append(opmap["ROT_TWO"]) 352 self.position += 1 353 354 def rot_three(self): 355 self.output.append(opmap["ROT_THREE"]) 356 self.position += 1 357 358 def rot_four(self): 359 self.output.append(opmap["ROT_FOUR"]) 360 self.position += 1 361 362 def call_function(self, count): 363 self.output.append(opmap["CALL_FUNCTION"]) 364 self.position += 1 365 self._write_value(count) 366 self.update_stack_depth(-count) 367 368 def binary_subscr(self): 369 self.output.append(opmap["BINARY_SUBSCR"]) 370 self.position += 1 371 self.update_stack_depth(-1) 372 373 def binary_add(self): 374 self.output.append(opmap["BINARY_ADD"]) 375 self.position += 1 376 self.update_stack_depth(-1) 377 378 def binary_divide(self): 379 self.output.append(opmap["BINARY_DIVIDE"]) 380 self.position += 1 381 self.update_stack_depth(-1) 382 383 def binary_multiply(self): 384 self.output.append(opmap["BINARY_MULTIPLY"]) 385 self.position += 1 386 self.update_stack_depth(-1) 387 388 def binary_modulo(self): 389 self.output.append(opmap["BINARY_MODULO"]) 390 self.position += 1 391 self.update_stack_depth(-1) 392 393 def binary_subtract(self): 394 self.output.append(opmap["BINARY_SUBTRACT"]) 395 self.position += 1 396 self.update_stack_depth(-1) 397 398 def binary_and(self): 399 self.output.append(opmap["BINARY_AND"]) 400 self.position += 1 401 self.update_stack_depth(-1) 402 403 def binary_or(self): 404 self.output.append(opmap["BINARY_XOR"]) 405 self.position += 1 406 self.update_stack_depth(-1) 407 408 def binary_lshift(self): 409 self.output.append(opmap["BINARY_LSHIFT"]) 410 self.position += 1 411 self.update_stack_depth(-1) 412 413 def binary_rshift(self): 414 self.output.append(opmap["BINARY_RSHIFT"]) 415 self.position += 1 416 self.update_stack_depth(-1) 417 418 def binary_xor(self): 419 self.output.append(opmap["BINARY_XOR"]) 420 self.position += 1 421 self.update_stack_depth(-1) 422 423 def store_subscr(self): 424 self.output.append(opmap["STORE_SUBSCR"]) 425 self.position += 1 426 self.update_stack_depth(-3) 427 428 def unary_negative(self): 429 self.output.append(opmap["UNARY_NEGATIVE"]) 430 self.position += 1 431 432 def slice_1(self): 433 self.output.append(opmap["SLICE+1"]) 434 self.position += 1 435 436 def compare_op(self, op): 437 self.output.append(opmap["COMPARE_OP"]) 438 self.position += 1 439 self._write_value(list(cmp_op).index(op)) 440 self.update_stack_depth(-1) 441 442 def return_value(self): 443 self.output.append(opmap["RETURN_VALUE"]) 444 self.position += 1 445 self.update_stack_depth(-1) 446 447 def raise_varargs(self, count): 448 self.output.append(opmap["RAISE_VARARGS"]) 449 self.position += 1 450 self._write_value(count) 451 452 def pop_block(self): 453 self.output.append(opmap["POP_BLOCK"]) 454 self.position += 1 455 456 def end_finally(self): 457 self.output.append(opmap["END_FINALLY"]) 458 self.position += 1 459 460 def unpack_sequence(self, count): 461 self.output.append(opmap["UNPACK_SEQUENCE"]) 462 self.position += 1 463 self._write_value(count) 464 465 # Utility classes and functions. 466 467 class LazyDict(UserDict): 468 def __getitem__(self, key): 469 if not self.data.has_key(key): 470 # NOTE: Assume 16-bit value. 471 self.data[key] = LazyValue(2) 472 return self.data[key] 473 def __setitem__(self, key, value): 474 if self.data.has_key(key): 475 existing_value = self.data[key] 476 if isinstance(existing_value, LazyValue): 477 existing_value.set_value(value) 478 return 479 self.data[key] = value 480 481 class LazyValue: 482 def __init__(self, nvalues): 483 self.values = [] 484 for i in range(0, nvalues): 485 self.values.append(LazySubValue()) 486 def set_value(self, value): 487 # NOTE: Assume at least 16-bit value. No "filling" performed. 488 if value <= 0xffff: 489 self.values[0].set_value(value & 0xff) 490 self.values[1].set_value((value & 0xff00) >> 8) 491 else: 492 # NOTE: EXTENDED_ARG not yet supported. 493 raise ValueError, value 494 def get_value(self): 495 value = 0 496 values = self.values[:] 497 for i in range(0, len(values)): 498 value = (value << 8) + values.pop().value 499 return value 500 501 class LazySubValue: 502 def __init__(self): 503 self.value = 0 504 def set_value(self, value): 505 self.value = value 506 507 def signed(value, limit): 508 509 """ 510 Return the signed integer from the unsigned 'value', where 'limit' (a value 511 one greater than the highest possible positive integer) is used to determine 512 whether a negative or positive result is produced. 513 """ 514 515 d, r = divmod(value, limit) 516 if d == 1: 517 mask = limit * 2 - 1 518 return -1 - (value ^ mask) 519 else: 520 return value 521 522 def signed2(value): 523 return signed(value, 0x8000) 524 525 def signed4(value): 526 return signed(value, 0x80000000) 527 528 # Bytecode conversion. 529 530 class BytecodeReader: 531 532 "A generic Java bytecode reader." 533 534 def __init__(self, class_file): 535 self.class_file = class_file 536 self.position_mapping = LazyDict() 537 538 def process(self, method, program): 539 self.java_position = 0 540 self.in_finally = 0 541 self.method = method 542 543 # NOTE: Potentially unreliable way of getting necessary information. 544 code, exception_table = None, None 545 for attribute in method.attributes: 546 if isinstance(attribute, classfile.CodeAttributeInfo): 547 code, exception_table = attribute.code, attribute.exception_table 548 break 549 if code is None: 550 return 551 552 # Produce a structure which permits fast access to exception details. 553 exception_block_start = {} 554 exception_block_end = {} 555 exception_block_handler = {} 556 reversed_exception_table = exception_table[:] 557 reversed_exception_table.reverse() 558 559 # Later entries have wider coverage than earlier entries. 560 for exception in reversed_exception_table: 561 # Index start positions. 562 if not exception_block_start.has_key(exception.start_pc): 563 exception_block_start[exception.start_pc] = [] 564 exception_block_start[exception.start_pc].append(exception) 565 # Index end positions. 566 if not exception_block_end.has_key(exception.end_pc): 567 exception_block_end[exception.end_pc] = [] 568 exception_block_end[exception.end_pc].append(exception) 569 # Index handler positions. 570 if not exception_block_handler.has_key(exception.handler_pc): 571 exception_block_handler[exception.handler_pc] = [] 572 exception_block_handler[exception.handler_pc].append(exception) 573 574 # Process each instruction in the code. 575 while self.java_position < len(code): 576 self.position_mapping[self.java_position] = program.position 577 578 # Insert exception handling constructs. 579 block_starts = exception_block_start.get(self.java_position, []) 580 for exception in block_starts: 581 # Note that the absolute position is used. 582 if exception.catch_type == 0: 583 program.setup_finally(self.position_mapping[exception.handler_pc]) 584 else: 585 program.setup_except(self.position_mapping[exception.handler_pc]) 586 if block_starts: 587 self.in_finally = 0 588 589 # Insert exception handler details. 590 # NOTE: Ensure that pop_block is reachable by possibly inserting it at the start of finally handlers. 591 # NOTE: Insert a check for the correct exception at the start of each handler. 592 for exception in exception_block_handler.get(self.java_position, []): 593 program.end_exception() 594 if exception.catch_type == 0: 595 self.in_finally = 1 596 else: 597 program.start_handler(self.class_file.constants[exception.catch_type - 1].get_python_name()) 598 599 # Process the bytecode at the current position. 600 bytecode = ord(code[self.java_position]) 601 mnemonic, number_of_arguments = self.java_bytecodes[bytecode] 602 number_of_arguments = self.process_bytecode(mnemonic, number_of_arguments, code, program) 603 next_java_position = self.java_position + 1 + number_of_arguments 604 605 # Insert exception block end details. 606 for exception in exception_block_end.get(next_java_position, []): 607 # NOTE: Insert jump beyond handlers. 608 # NOTE: program.jump_forward/absolute(...) 609 # NOTE: Insert end finally at end of handlers as well as where "ret" occurs. 610 if exception.catch_type != 0: 611 program.pop_block() 612 613 # Only advance the JVM position after sneaking in extra Python 614 # instructions. 615 self.java_position = next_java_position 616 617 def process_bytecode(self, mnemonic, number_of_arguments, code, program): 618 if number_of_arguments is not None: 619 arguments = [] 620 for j in range(0, number_of_arguments): 621 arguments.append(ord(code[self.java_position + 1 + j])) 622 623 # Call the handler. 624 getattr(self, mnemonic)(arguments, program) 625 return number_of_arguments 626 else: 627 # Call the handler. 628 return getattr(self, mnemonic)(code[self.java_position+1:], program) 629 630 java_bytecodes = { 631 # code : (mnemonic, number of following bytes, change in stack) 632 0 : ("nop", 0), 633 1 : ("aconst_null", 0), 634 2 : ("iconst_m1", 0), 635 3 : ("iconst_0", 0), 636 4 : ("iconst_1", 0), 637 5 : ("iconst_2", 0), 638 6 : ("iconst_3", 0), 639 7 : ("iconst_4", 0), 640 8 : ("iconst_5", 0), 641 9 : ("lconst_0", 0), 642 10 : ("lconst_1", 0), 643 11 : ("fconst_0", 0), 644 12 : ("fconst_1", 0), 645 13 : ("fconst_2", 0), 646 14 : ("dconst_0", 0), 647 15 : ("dconst_1", 0), 648 16 : ("bipush", 1), 649 17 : ("sipush", 2), 650 18 : ("ldc", 1), 651 19 : ("ldc_w", 2), 652 20 : ("ldc2_w", 2), 653 21 : ("iload", 1), 654 22 : ("lload", 1), 655 23 : ("fload", 1), 656 24 : ("dload", 1), 657 25 : ("aload", 1), 658 26 : ("iload_0", 0), 659 27 : ("iload_1", 0), 660 28 : ("iload_2", 0), 661 29 : ("iload_3", 0), 662 30 : ("lload_0", 0), 663 31 : ("lload_1", 0), 664 32 : ("lload_2", 0), 665 33 : ("lload_3", 0), 666 34 : ("fload_0", 0), 667 35 : ("fload_1", 0), 668 36 : ("fload_2", 0), 669 37 : ("fload_3", 0), 670 38 : ("dload_0", 0), 671 39 : ("dload_1", 0), 672 40 : ("dload_2", 0), 673 41 : ("dload_3", 0), 674 42 : ("aload_0", 0), 675 43 : ("aload_1", 0), 676 44 : ("aload_2", 0), 677 45 : ("aload_3", 0), 678 46 : ("iaload", 0), 679 47 : ("laload", 0), 680 48 : ("faload", 0), 681 49 : ("daload", 0), 682 50 : ("aaload", 0), 683 51 : ("baload", 0), 684 52 : ("caload", 0), 685 53 : ("saload", 0), 686 54 : ("istore", 1), 687 55 : ("lstore", 1), 688 56 : ("fstore", 1), 689 57 : ("dstore", 1), 690 58 : ("astore", 1), 691 59 : ("istore_0", 0), 692 60 : ("istore_1", 0), 693 61 : ("istore_2", 0), 694 62 : ("istore_3", 0), 695 63 : ("lstore_0", 0), 696 64 : ("lstore_1", 0), 697 65 : ("lstore_2", 0), 698 66 : ("lstore_3", 0), 699 67 : ("fstore_0", 0), 700 68 : ("fstore_1", 0), 701 69 : ("fstore_2", 0), 702 70 : ("fstore_3", 0), 703 71 : ("dstore_0", 0), 704 72 : ("dstore_1", 0), 705 73 : ("dstore_2", 0), 706 74 : ("dstore_3", 0), 707 75 : ("astore_0", 0), 708 76 : ("astore_1", 0), 709 77 : ("astore_2", 0), 710 78 : ("astore_3", 0), 711 79 : ("iastore", 0), 712 80 : ("lastore", 0), 713 81 : ("fastore", 0), 714 82 : ("dastore", 0), 715 83 : ("aastore", 0), 716 84 : ("bastore", 0), 717 85 : ("castore", 0), 718 86 : ("sastore", 0), 719 87 : ("pop", 0), 720 88 : ("pop2", 0), 721 89 : ("dup", 0), 722 90 : ("dup_x1", 0), 723 91 : ("dup_x2", 0), 724 92 : ("dup2", 0), 725 93 : ("dup2_x1", 0), 726 94 : ("dup2_x2", 0), 727 95 : ("swap", 0), 728 96 : ("iadd", 0), 729 97 : ("ladd", 0), 730 98 : ("fadd", 0), 731 99 : ("dadd", 0), 732 100 : ("isub", 0), 733 101 : ("lsub", 0), 734 102 : ("fsub", 0), 735 103 : ("dsub", 0), 736 104 : ("imul", 0), 737 105 : ("lmul", 0), 738 106 : ("fmul", 0), 739 107 : ("dmul", 0), 740 108 : ("idiv", 0), 741 109 : ("ldiv", 0), 742 110 : ("fdiv", 0), 743 111 : ("ddiv", 0), 744 112 : ("irem", 0), 745 113 : ("lrem", 0), 746 114 : ("frem", 0), 747 115 : ("drem", 0), 748 116 : ("ineg", 0), 749 117 : ("lneg", 0), 750 118 : ("fneg", 0), 751 119 : ("dneg", 0), 752 120 : ("ishl", 0), 753 121 : ("lshl", 0), 754 122 : ("ishr", 0), 755 123 : ("lshr", 0), 756 124 : ("iushr", 0), 757 125 : ("lushr", 0), 758 126 : ("iand", 0), 759 127 : ("land", 0), 760 128 : ("ior", 0), 761 129 : ("lor", 0), 762 130 : ("ixor", 0), 763 131 : ("lxor", 0), 764 132 : ("iinc", 2), 765 133 : ("i2l", 0), 766 134 : ("i2f", 0), 767 135 : ("i2d", 0), 768 136 : ("l2i", 0), 769 137 : ("l2f", 0), 770 138 : ("l2d", 0), 771 139 : ("f2i", 0), 772 140 : ("f2l", 0), 773 141 : ("f2d", 0), 774 142 : ("d2i", 0), 775 143 : ("d2l", 0), 776 144 : ("d2f", 0), 777 145 : ("i2b", 0), 778 146 : ("i2c", 0), 779 147 : ("i2s", 0), 780 148 : ("lcmp", 0), 781 149 : ("fcmpl", 0), 782 150 : ("fcmpg", 0), 783 151 : ("dcmpl", 0), 784 152 : ("dcmpg", 0), 785 153 : ("ifeq", 2), 786 154 : ("ifne", 2), 787 155 : ("iflt", 2), 788 156 : ("ifge", 2), 789 157 : ("ifgt", 2), 790 158 : ("ifle", 2), 791 159 : ("if_icmpeq", 2), 792 160 : ("if_icmpne", 2), 793 161 : ("if_icmplt", 2), 794 162 : ("if_icmpge", 2), 795 163 : ("if_icmpgt", 2), 796 164 : ("if_icmple", 2), 797 165 : ("if_acmpeq", 2), 798 166 : ("if_acmpne", 2), 799 167 : ("goto", 2), 800 168 : ("jsr", 2), 801 169 : ("ret", 1), 802 170 : ("tableswitch", None), # variable number of arguments 803 171 : ("lookupswitch", None), # variable number of arguments 804 172 : ("ireturn", 0), 805 173 : ("lreturn", 0), 806 174 : ("freturn", 0), 807 175 : ("dreturn", 0), 808 176 : ("areturn", 0), 809 177 : ("return_", 0), 810 178 : ("getstatic", 2), 811 179 : ("putstatic", 2), 812 180 : ("getfield", 2), 813 181 : ("putfield", 2), 814 182 : ("invokevirtual", 2), 815 183 : ("invokespecial", 2), 816 184 : ("invokestatic", 2), 817 185 : ("invokeinterface", 4), 818 187 : ("new", 2), 819 188 : ("newarray", 1), 820 189 : ("anewarray", 2), 821 190 : ("arraylength", 0), 822 191 : ("athrow", 0), 823 192 : ("checkcast", 2), 824 193 : ("instanceof", 2), 825 194 : ("monitorenter", 0), 826 195 : ("monitorexit", 0), 827 196 : ("wide", None), # 3 or 5 arguments, stack changes according to modified element 828 197 : ("multianewarray", 3), 829 198 : ("ifnull", 2), 830 199 : ("ifnonnull", 2), 831 200 : ("goto_w", 4), 832 201 : ("jsr_w", 4), 833 } 834 835 class BytecodeDisassembler(BytecodeReader): 836 837 "A Java bytecode disassembler." 838 839 bytecode_methods = [spec[0] for spec in BytecodeReader.java_bytecodes.values()] 840 841 def __getattr__(self, name): 842 if name in self.bytecode_methods: 843 print "%5s %s" % (self.java_position, name), 844 return self.generic 845 else: 846 raise AttributeError, name 847 848 def generic(self, arguments, program): 849 print arguments 850 851 class BytecodeDisassemblerProgram: 852 position = 0 853 def setup_except(self, target): 854 print "(setup_except %s)" % target 855 def setup_finally(self, target): 856 print "(setup_finally %s)" % target 857 def end_exception(self): 858 print "(end_exception)" 859 def start_handler(self, exc_name): 860 print "(start_handler %s)" % exc_name 861 def pop_block(self): 862 print "(pop_block)" 863 864 class BytecodeTranslator(BytecodeReader): 865 866 "A Java bytecode translator which uses a Python bytecode writer." 867 868 def aaload(self, arguments, program): 869 # NOTE: No type checking performed. 870 program.binary_subscr() 871 872 def aastore(self, arguments, program): 873 # NOTE: No type checking performed. 874 # Stack: arrayref, index, value 875 program.rot_three() # Stack: value, arrayref, index 876 program.store_subscr() 877 878 def aconst_null(self, arguments, program): 879 program.load_const(None) 880 881 def aload(self, arguments, program): 882 program.load_fast(arguments[0]) 883 884 def aload_0(self, arguments, program): 885 program.load_fast(0) 886 887 def aload_1(self, arguments, program): 888 program.load_fast(1) 889 890 def aload_2(self, arguments, program): 891 program.load_fast(2) 892 893 def aload_3(self, arguments, program): 894 program.load_fast(3) 895 896 def anewarray(self, arguments, program): 897 # NOTE: Does not raise NegativeArraySizeException. 898 # NOTE: Not using the index to type the list/array. 899 index = (arguments[0] << 8) + arguments[1] 900 self._newarray(program) 901 902 def _newarray(self, program): 903 program.build_list() # Stack: count, list 904 program.rot_two() # Stack: list, count 905 program.setup_loop() 906 program.load_global("range") 907 program.load_const(0) # Stack: list, count, range, 0 908 program.rot_three() # Stack: list, 0, count, range 909 program.rot_three() # Stack: list, range, 0, count 910 program.call_function(2) # Stack: list, range_list 911 program.get_iter() # Stack: list, iter 912 program.for_iter() # Stack: list, iter, value 913 program.pop_top() # Stack: list, iter 914 program.rot_two() # Stack: iter, list 915 program.dup_top() # Stack: iter, list, list 916 program.load_attr("append") # Stack: iter, list, append 917 program.load_const(None) # Stack: iter, list, append, None 918 program.call_function(1) # Stack: iter, list, None 919 program.pop_top() # Stack: iter, list 920 program.rot_two() # Stack: list, iter 921 program.end_loop() # Back to for_iter above 922 923 def areturn(self, arguments, program): 924 program.return_value() 925 926 def arraylength(self, arguments, program): 927 program.load_global("len") # Stack: arrayref, len 928 program.rot_two() # Stack: len, arrayref 929 program.call_function(1) 930 931 def astore(self, arguments, program): 932 program.store_fast(arguments[0]) 933 934 def astore_0(self, arguments, program): 935 program.store_fast(0) 936 937 def astore_1(self, arguments, program): 938 program.store_fast(1) 939 940 def astore_2(self, arguments, program): 941 program.store_fast(2) 942 943 def astore_3(self, arguments, program): 944 program.store_fast(3) 945 946 def athrow(self, arguments, program): 947 # NOTE: NullPointerException not raised where null/None is found on the stack. 948 # If this instruction appears in a finally handler, use end_finally instead. 949 if self.in_finally: 950 program.end_finally() 951 else: 952 program.dup_top() 953 program.raise_varargs(1) 954 955 baload = aaload 956 bastore = aastore 957 958 def bipush(self, arguments, program): 959 program.load_const(arguments[0]) 960 961 caload = aaload 962 castore = aastore 963 964 def checkcast(self, arguments, program): 965 index = (arguments[0] << 8) + arguments[1] 966 target_name = self.class_file.constants[index - 1].get_python_name() 967 # NOTE: Using the string version of the name which may contain incompatible characters. 968 target_components = str(target_name).split("/") 969 970 program.dup_top() # Stack: objectref, objectref 971 program.load_global("isinstance") # Stack: objectref, objectref, isinstance 972 program.rot_two() # Stack: objectref, isinstance, objectref 973 program.load_global(target_components[0]) 974 for target_component in target_components[1:]: 975 program.load_attr(target_component) 976 program.call_function(2) # Stack: objectref 977 978 def d2f(self, arguments, program): 979 pass 980 981 def d2i(self, arguments, program): 982 program.load_global("int") # Stack: value, int 983 program.rot_two() # Stack: int, value 984 program.call_function(1) # Stack: result 985 986 d2l = d2i # Preserving Java semantics 987 988 def dadd(self, arguments, program): 989 # NOTE: No type checking performed. 990 program.binary_add() 991 992 daload = aaload 993 dastore = aastore 994 995 def dcmpg(self, arguments, program): 996 # NOTE: No type checking performed. 997 program.compare_op(">") 998 999 def dcmpl(self, arguments, program): 1000 # NOTE: No type checking performed. 1001 program.compare_op("<") 1002 1003 def dconst_0(self, arguments, program): 1004 program.load_const(0.0) 1005 1006 def dconst_1(self, arguments, program): 1007 program.load_const(1.0) 1008 1009 def ddiv(self, arguments, program): 1010 # NOTE: No type checking performed. 1011 program.binary_divide() 1012 1013 dload = aload 1014 dload_0 = aload_0 1015 dload_1 = aload_1 1016 dload_2 = aload_2 1017 dload_3 = aload_3 1018 1019 def dmul(self, arguments, program): 1020 # NOTE: No type checking performed. 1021 program.binary_multiply() 1022 1023 def dneg(self, arguments, program): 1024 # NOTE: No type checking performed. 1025 program.unary_negative() 1026 1027 def drem(self, arguments, program): 1028 # NOTE: No type checking performed. 1029 program.binary_modulo() 1030 1031 dreturn = areturn 1032 dstore = astore 1033 dstore_0 = astore_0 1034 dstore_1 = astore_1 1035 dstore_2 = astore_2 1036 dstore_3 = astore_3 1037 1038 def dsub(self, arguments, program): 1039 # NOTE: No type checking performed. 1040 program.binary_subtract() 1041 1042 def dup(self, arguments, program): 1043 program.dup_top() 1044 1045 def dup_x1(self, arguments, program): 1046 # Ignoring computational type categories. 1047 program.dup_top() 1048 program.rot_three() 1049 1050 def dup_x2(self, arguments, program): 1051 # Ignoring computational type categories. 1052 program.dup_top() 1053 program.rot_four() 1054 1055 dup2 = dup # Ignoring computational type categories 1056 dup2_x1 = dup_x1 # Ignoring computational type categories 1057 dup2_x2 = dup_x2 # Ignoring computational type categories 1058 1059 def f2d(self, arguments, program): 1060 pass # Preserving Java semantics 1061 1062 def f2i(self, arguments, program): 1063 program.load_global("int") # Stack: value, int 1064 program.rot_two() # Stack: int, value 1065 program.call_function(1) # Stack: result 1066 1067 f2l = f2i # Preserving Java semantics 1068 fadd = dadd 1069 faload = daload 1070 fastore = dastore 1071 fcmpg = dcmpg 1072 fcmpl = dcmpl 1073 fconst_0 = dconst_0 1074 fconst_1 = dconst_1 1075 1076 def fconst_2(self, arguments, program): 1077 program.load_const(2.0) 1078 1079 fdiv = ddiv 1080 fload = dload 1081 fload_0 = dload_0 1082 fload_1 = dload_1 1083 fload_2 = dload_2 1084 fload_3 = dload_3 1085 fmul = dmul 1086 fneg = dneg 1087 frem = drem 1088 freturn = dreturn 1089 fstore = dstore 1090 fstore_0 = dstore_0 1091 fstore_1 = dstore_1 1092 fstore_2 = dstore_2 1093 fstore_3 = dstore_3 1094 fsub = dsub 1095 1096 def getfield(self, arguments, program): 1097 index = (arguments[0] << 8) + arguments[1] 1098 target_name = self.class_file.constants[index - 1].get_python_name() 1099 # NOTE: Using the string version of the name which may contain incompatible characters. 1100 program.load_attr(str(target_name)) 1101 1102 def getstatic(self, arguments, program): 1103 index = (arguments[0] << 8) + arguments[1] 1104 target_name = self.class_file.constants[index - 1].get_python_name() 1105 program.load_name("self") 1106 program.load_attr("__class__") 1107 # NOTE: Using the string version of the name which may contain incompatible characters. 1108 program.load_attr(str(target_name)) 1109 1110 def goto(self, arguments, program): 1111 offset = signed2((arguments[0] << 8) + arguments[1]) 1112 java_absolute = self.java_position + offset 1113 program.jump_absolute(self.position_mapping[java_absolute]) 1114 1115 def goto_w(self, arguments, program): 1116 offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) 1117 java_absolute = self.java_position + offset 1118 program.jump_absolute(self.position_mapping[java_absolute]) 1119 1120 def i2b(self, arguments, program): 1121 pass 1122 1123 def i2c(self, arguments, program): 1124 program.load_global("chr") # Stack: value, chr 1125 program.rot_two() # Stack: chr, value 1126 program.call_function(1) # Stack: result 1127 1128 def i2d(self, arguments, program): 1129 program.load_global("float") # Stack: value, float 1130 program.rot_two() # Stack: float, value 1131 program.call_function(1) # Stack: result 1132 1133 i2f = i2d # Not distinguishing between float and double 1134 1135 def i2l(self, arguments, program): 1136 pass # Preserving Java semantics 1137 1138 def i2s(self, arguments, program): 1139 pass # Not distinguishing between int and short 1140 1141 iadd = fadd 1142 iaload = faload 1143 1144 def iand(self, arguments, program): 1145 # NOTE: No type checking performed. 1146 program.binary_and() 1147 1148 iastore = fastore 1149 1150 def iconst_m1(self, arguments, program): 1151 program.load_const(-1) 1152 1153 def iconst_0(self, arguments, program): 1154 program.load_const(0) 1155 1156 def iconst_1(self, arguments, program): 1157 program.load_const(1) 1158 1159 def iconst_2(self, arguments, program): 1160 program.load_const(2) 1161 1162 def iconst_3(self, arguments, program): 1163 program.load_const(3) 1164 1165 def iconst_4(self, arguments, program): 1166 program.load_const(4) 1167 1168 def iconst_5(self, arguments, program): 1169 program.load_const(5) 1170 1171 idiv = fdiv 1172 1173 def _if_xcmpx(self, arguments, program, op): 1174 offset = signed2((arguments[0] << 8) + arguments[1]) 1175 java_absolute = self.java_position + offset 1176 program.compare_op(op) 1177 program.jump_to_label(0, "next") # skip if false 1178 program.pop_top() 1179 program.jump_absolute(self.position_mapping[java_absolute]) 1180 program.start_label("next") 1181 program.pop_top() 1182 1183 def if_acmpeq(self, arguments, program): 1184 # NOTE: No type checking performed. 1185 self._if_xcmpx(arguments, program, "is") 1186 1187 def if_acmpne(self, arguments, program): 1188 # NOTE: No type checking performed. 1189 self._if_xcmpx(arguments, program, "is not") 1190 1191 def if_icmpeq(self, arguments, program): 1192 # NOTE: No type checking performed. 1193 self._if_xcmpx(arguments, program, "==") 1194 1195 def if_icmpne(self, arguments, program): 1196 # NOTE: No type checking performed. 1197 self._if_xcmpx(arguments, program, "!=") 1198 1199 def if_icmplt(self, arguments, program): 1200 # NOTE: No type checking performed. 1201 self._if_xcmpx(arguments, program, "<") 1202 1203 def if_icmpge(self, arguments, program): 1204 # NOTE: No type checking performed. 1205 self._if_xcmpx(arguments, program, ">=") 1206 1207 def if_icmpgt(self, arguments, program): 1208 # NOTE: No type checking performed. 1209 self._if_xcmpx(arguments, program, ">") 1210 1211 def if_icmple(self, arguments, program): 1212 # NOTE: No type checking performed. 1213 self._if_xcmpx(arguments, program, "<=") 1214 1215 def ifeq(self, arguments, program): 1216 # NOTE: No type checking performed. 1217 program.load_const(0) 1218 self._if_xcmpx(arguments, program, "==") 1219 1220 def ifne(self, arguments, program): 1221 # NOTE: No type checking performed. 1222 program.load_const(0) 1223 self._if_xcmpx(arguments, program, "!=") 1224 1225 def iflt(self, arguments, program): 1226 # NOTE: No type checking performed. 1227 program.load_const(0) 1228 self._if_xcmpx(arguments, program, "<") 1229 1230 def ifge(self, arguments, program): 1231 # NOTE: No type checking performed. 1232 program.load_const(0) 1233 self._if_xcmpx(arguments, program, ">=") 1234 1235 def ifgt(self, arguments, program): 1236 # NOTE: No type checking performed. 1237 program.load_const(0) 1238 self._if_xcmpx(arguments, program, ">") 1239 1240 def ifle(self, arguments, program): 1241 # NOTE: No type checking performed. 1242 program.load_const(0) 1243 self._if_xcmpx(arguments, program, "<=") 1244 1245 def ifnonnull(self, arguments, program): 1246 # NOTE: No type checking performed. 1247 program.load_const(None) 1248 self._if_xcmpx(arguments, program, "is not") 1249 1250 def ifnull(self, arguments, program): 1251 # NOTE: No type checking performed. 1252 program.load_const(None) 1253 self._if_xcmpx(arguments, program, "is") 1254 1255 def iinc(self, arguments, program): 1256 # NOTE: No type checking performed. 1257 program.load_fast(arguments[0]) 1258 program.load_const(arguments[1]) 1259 program.binary_add() 1260 program.store_fast(arguments[0]) 1261 1262 iload = fload 1263 iload_0 = fload_0 1264 iload_1 = fload_1 1265 iload_2 = fload_2 1266 iload_3 = fload_3 1267 imul = fmul 1268 ineg = fneg 1269 1270 def instanceof(self, arguments, program): 1271 index = (arguments[0] << 8) + arguments[1] 1272 target_name = self.class_file.constants[index - 1].get_python_name() 1273 # NOTE: Using the string version of the name which may contain incompatible characters. 1274 target_components = str(target_name).split("/") 1275 1276 program.load_global("isinstance") # Stack: objectref, isinstance 1277 program.rot_two() # Stack: isinstance, objectref 1278 program.load_global(target_components[0]) 1279 for target_component in target_components[1:]: 1280 program.load_attr(target_component) 1281 program.call_function(2) # Stack: result 1282 1283 def _invoke(self, target_name, program): 1284 # NOTE: Using the string version of the name which may contain incompatible characters. 1285 program.load_attr(str(target_name)) # Stack: tuple, method 1286 program.rot_two() # Stack: method, tuple 1287 program.load_global("apply") # Stack: method, tuple, apply 1288 program.rot_three() # Stack: apply, method, tuple 1289 program.call_function(2) 1290 1291 def invokeinterface(self, arguments, program): 1292 # NOTE: This implementation does not perform the necessary checks for 1293 # NOTE: signature-based polymorphism. 1294 # NOTE: Java rules not specifically obeyed. 1295 index = (arguments[0] << 8) + arguments[1] 1296 # NOTE: "count" == nargs + 1, apparently. 1297 count = arguments[2] - 1 1298 target_name = self.class_file.constants[index - 1].get_python_name() 1299 # Stack: objectref, arg1, arg2, ... 1300 program.build_tuple(count) # Stack: objectref, tuple 1301 program.rot_two() # Stack: tuple, objectref 1302 self._invoke(target_name, program) 1303 1304 def invokespecial(self, arguments, program): 1305 # NOTE: This implementation does not perform the necessary checks for 1306 # NOTE: signature-based polymorphism. 1307 # NOTE: Java rules not specifically obeyed. 1308 index = (arguments[0] << 8) + arguments[1] 1309 target = self.class_file.constants[index - 1] 1310 original_name = target.get_name() 1311 target_name = target.get_python_name() 1312 # Get the number of parameters from the descriptor. 1313 count = len(target.get_descriptor()[0]) 1314 1315 # The stack may contain one of the following patterns: 1316 # Stack: classref, arg1, arg2, ... 1317 # Stack: objectref, arg1, arg2, ... 1318 # method == __init__, classref -> classref(arg1, arg2, ...) 1319 # method == __init__, objectref == self -> cls.bases[0].__init__(objectref, arg1, arg2, ...) 1320 # method == __init__, objectref != self -> should not occur 1321 # method != __init__, classref -> classref.method(classref, arg1, arg2, ...) 1322 # method != __init__, objectref == self -> cls.bases[0].method(objectref, arg1, arg2, ...) 1323 # method != __init__, objectref != self -> should not occur 1324 1325 # First, we build a tuple of the reference and arguments. 1326 program.build_tuple(count + 1) # Stack: tuple 1327 1328 # Then, we test the nature of the reference. 1329 program.dup_top() # Stack: tuple, tuple 1330 program.load_const(0) # Stack: tuple, tuple, 0 1331 program.binary_subscr() # Stack: tuple, reference 1332 program.dup_top() # Stack: tuple, reference, reference 1333 1334 # Is it self? 1335 program.load_fast(0) # Stack: tuple, reference, reference, self 1336 program.compare_op("is") # Stack: tuple, reference, result 1337 program.jump_to_label(1, "is-self") 1338 program.pop_top() # Stack: tuple, reference 1339 1340 # Is another class or reference. 1341 # NOTE: Reference case not covered! 1342 if str(original_name) == "<init>": 1343 program.rot_two() # Stack: reference, tuple 1344 program.load_const(1) # Stack: reference, tuple, 1 1345 program.slice_1() # Stack: reference, tuple[1:] 1346 program.load_global("apply") # Stack: reference, tuple, apply 1347 program.rot_three() # Stack: apply, reference, tuple 1348 program.call_function(2) 1349 # NOTE: Combinations of new, dup tend to produce interfering extra 1350 # NOTE: class references. 1351 program.rot_two() # Stack: objectref, classref 1352 program.pop_top() 1353 program.jump_to_label(None, "done") 1354 else: 1355 self._invoke(target_name, program) 1356 program.jump_to_label(None, "done") 1357 1358 # Is self. 1359 program.start_label("is-self") 1360 program.pop_top() # Stack: tuple, reference 1361 program.pop_top() # Stack: tuple 1362 # Get the class name instead of the fully qualified name. 1363 full_class_name = str(self.class_file.this_class.get_python_name()) 1364 class_name = full_class_name.split(".")[-1] 1365 program.load_global(class_name) # Stack: tuple, classref 1366 program.load_attr("__bases__") # Stack: tuple, bases 1367 program.dup_top() # Stack: tuple, bases, bases 1368 program.load_global("len") # Stack: tuple, bases, bases, len 1369 program.rot_two() # Stack: tuple, bases, len, bases 1370 program.call_function(1) # Stack: tuple, bases, #bases 1371 program.load_const(0) # Stack: tuple, bases, #bases, 0 1372 program.compare_op("==") # Stack: tuple, bases, result 1373 program.jump_to_label(1, "no-bases") 1374 program.pop_top() # Stack: tuple, bases 1375 program.load_const(0) # Stack: tuple, bases, 0 1376 program.binary_subscr() # Stack: tuple, bases[0] 1377 self._invoke(target_name, program) 1378 program.jump_to_label(None, "done") 1379 1380 # No bases found, do no invocation. 1381 program.start_label("no-bases") 1382 program.pop_top() # Stack: tuple, bases 1383 program.pop_top() # Stack: tuple 1384 program.pop_top() # Stack: 1385 program.start_label("done") 1386 1387 def invokestatic(self, arguments, program): 1388 # NOTE: This implementation does not perform the necessary checks for 1389 # NOTE: signature-based polymorphism. 1390 # NOTE: Java rules not specifically obeyed. 1391 index = (arguments[0] << 8) + arguments[1] 1392 target = self.class_file.constants[index - 1] 1393 target_name = target.get_python_name() 1394 # Get the number of parameters from the descriptor. 1395 count = len(target.get_descriptor()[0]) 1396 # Stack: arg1, arg2, ... 1397 program.build_tuple(count) # Stack: tuple 1398 # Use the class to provide access to static methods. 1399 program.load_name("self") # Stack: tuple, self 1400 program.load_attr("__class__") # Stack: tuple, class 1401 self._invoke(target_name, program) 1402 1403 def invokevirtual (self, arguments, program): 1404 # NOTE: This implementation does not perform the necessary checks for 1405 # NOTE: signature-based polymorphism. 1406 # NOTE: Java rules not specifically obeyed. 1407 index = (arguments[0] << 8) + arguments[1] 1408 target = self.class_file.constants[index - 1] 1409 target_name = target.get_python_name() 1410 # Get the number of parameters from the descriptor. 1411 count = len(target.get_descriptor()[0]) 1412 # Stack: objectref, arg1, arg2, ... 1413 program.build_tuple(count) # Stack: objectref, tuple 1414 program.rot_two() # Stack: tuple, objectref 1415 self._invoke(target_name, program) 1416 1417 def ior(self, arguments, program): 1418 # NOTE: No type checking performed. 1419 program.binary_or() 1420 1421 irem = frem 1422 ireturn = freturn 1423 1424 def ishl(self, arguments, program): 1425 # NOTE: No type checking performed. 1426 # NOTE: Not verified. 1427 program.binary_lshift() 1428 1429 def ishr(self, arguments, program): 1430 # NOTE: No type checking performed. 1431 # NOTE: Not verified. 1432 program.binary_rshift() 1433 1434 istore = fstore 1435 istore_0 = fstore_0 1436 istore_1 = fstore_1 1437 istore_2 = fstore_2 1438 istore_3 = fstore_3 1439 isub = fsub 1440 iushr = ishr # Ignoring distinctions between arithmetic and logical shifts 1441 1442 def ixor(self, arguments, program): 1443 # NOTE: No type checking performed. 1444 program.binary_xor() 1445 1446 def jsr(self, arguments, program): 1447 offset = signed2((arguments[0] << 8) + arguments[1]) 1448 java_absolute = self.java_position + offset 1449 # Store the address of the next instruction. 1450 program.load_const_ret(self.position_mapping[self.java_position + 3]) 1451 program.jump_absolute(self.position_mapping[java_absolute]) 1452 1453 def jsr_w(self, arguments, program): 1454 offset = signed4((arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3]) 1455 java_absolute = self.java_position + offset 1456 # Store the address of the next instruction. 1457 program.load_const_ret(self.position_mapping[self.java_position + 5]) 1458 program.jump_absolute(self.position_mapping[java_absolute]) 1459 1460 l2d = i2d 1461 l2f = i2f 1462 1463 def l2i(self, arguments, program): 1464 pass # Preserving Java semantics 1465 1466 ladd = iadd 1467 laload = iaload 1468 land = iand 1469 lastore = iastore 1470 1471 def lcmp(self, arguments, program): 1472 # NOTE: No type checking performed. 1473 program.dup_topx(2) # Stack: value1, value2, value1, value2 1474 program.compare_op(">") # Stack: value1, value2, result 1475 program.jump_to_label(0, "equals") 1476 # True - produce result and branch. 1477 program.pop_top() # Stack: value1, value2 1478 program.pop_top() # Stack: value1 1479 program.pop_top() # Stack: 1480 program.load_const(1) # Stack: 1 1481 program.jump_to_label(None, "next") 1482 # False - test equality. 1483 program.start_label("equals") 1484 program.pop_top() # Stack: value1, value2 1485 program.dup_topx(2) # Stack: value1, value2, value1, value2 1486 program.compare_op("==") # Stack: value1, value2, result 1487 program.jump_to_label(0, "less") 1488 # True - produce result and branch. 1489 program.pop_top() # Stack: value1, value2 1490 program.pop_top() # Stack: value1 1491 program.pop_top() # Stack: 1492 program.load_const(0) # Stack: 0 1493 program.jump_to_label(None, "next") 1494 # False - produce result. 1495 program.start_label("less") 1496 program.pop_top() # Stack: value1, value2 1497 program.pop_top() # Stack: value1 1498 program.pop_top() # Stack: 1499 program.load_const(-1) # Stack: -1 1500 program.start_label("next") 1501 1502 lconst_0 = iconst_0 1503 lconst_1 = iconst_1 1504 1505 def ldc(self, arguments, program): 1506 program.load_const(self.class_file.constants[arguments[0] - 1]) 1507 1508 def ldc_w(self, arguments, program): 1509 program.load_const(self.class_file.constants[(arguments[0] << 8) + arguments[1] - 1]) 1510 1511 ldc2_w = ldc_w 1512 ldiv = idiv 1513 lload = iload 1514 lload_0 = iload_0 1515 lload_1 = iload_1 1516 lload_2 = iload_2 1517 lload_3 = iload_3 1518 lmul = imul 1519 lneg = ineg 1520 1521 def lookupswitch(self, arguments, program): 1522 # Find the offset to the next 4 byte boundary in the code. 1523 d, r = divmod(self.java_position, 4) 1524 to_boundary = (4 - r) % 4 1525 # Get the pertinent arguments. 1526 arguments = arguments[to_boundary:] 1527 default = (arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3] 1528 npairs = (arguments[4] << 24) + (arguments[5] << 16) + (arguments[6] << 8) + arguments[7] 1529 # Process the pairs. 1530 # NOTE: This is not the most optimal implementation. 1531 pair_index = 8 1532 for pair in range(0, npairs): 1533 match = ((arguments[pair_index] << 24) + (arguments[pair_index + 1] << 16) + 1534 (arguments[pair_index + 2] << 8) + arguments[pair_index + 3]) 1535 offset = signed4((arguments[pair_index + 4] << 24) + (arguments[pair_index + 5] << 16) + 1536 (arguments[pair_index + 6] << 8) + arguments[pair_index + 7]) 1537 # Calculate the branch target. 1538 java_absolute = self.java_position + offset 1539 # Generate branching code. 1540 program.dup_top() # Stack: key, key 1541 program.load_const(match) # Stack: key, key, match 1542 program.compare_op("==") # Stack: key, result 1543 program.jump_to_label(0, "end") 1544 program.pop_top() # Stack: key 1545 program.pop_top() # Stack: 1546 program.jump_absolute(self.position_mapping[java_absolute]) 1547 # Generate the label for the end of the branching code. 1548 program.start_label("end") 1549 program.pop_top() # Stack: key 1550 # Update the index. 1551 pair_index += 8 1552 # Generate the default. 1553 java_absolute = self.java_position + default 1554 program.jump_absolute(self.position_mapping[java_absolute]) 1555 1556 lor = ior 1557 lrem = irem 1558 lreturn = ireturn 1559 lshl = ishl 1560 lshr = ishr 1561 lstore = istore 1562 lstore_0 = istore_0 1563 lstore_1 = istore_1 1564 lstore_2 = istore_2 1565 lstore_3 = istore_3 1566 lsub = isub 1567 lushr = iushr 1568 lxor = ixor 1569 1570 def monitorenter(self, arguments, program): 1571 # NOTE: To be implemented. 1572 pass 1573 1574 def monitorexit(self, arguments, program): 1575 # NOTE: To be implemented. 1576 pass 1577 1578 def multianewarray(self, arguments, program): 1579 # NOTE: To be implemented. 1580 pass 1581 1582 def new(self, arguments, program): 1583 # This operation is considered to be the same as the calling of the 1584 # initialisation method of the given class with no arguments. 1585 index = (arguments[0] << 8) + arguments[1] 1586 target_name = self.class_file.constants[index - 1].get_python_name() 1587 # NOTE: Using the string version of the name which may contain incompatible characters. 1588 program.load_global(str(target_name)) 1589 # NOTE: Unlike Java, we do not provide an object reference. Instead, a 1590 # NOTE: class reference is provided, and the invokespecial method's 1591 # NOTE: behaviour is changed. 1592 #program.call_function(0) 1593 1594 def newarray(self, arguments, program): 1595 # NOTE: Does not raise NegativeArraySizeException. 1596 # NOTE: Not using the arguments to type the list/array. 1597 self._newarray(program) 1598 1599 def nop(self, arguments, program): 1600 pass 1601 1602 def pop(self, arguments, program): 1603 program.pop_top() 1604 1605 pop2 = pop # ignoring Java stack value distinctions 1606 1607 def putfield(self, arguments, program): 1608 index = (arguments[0] << 8) + arguments[1] 1609 target_name = self.class_file.constants[index - 1].get_python_name() 1610 program.rot_two() 1611 # NOTE: Using the string version of the name which may contain incompatible characters. 1612 program.store_attr(str(target_name)) 1613 1614 def putstatic(self, arguments, program): 1615 index = (arguments[0] << 8) + arguments[1] 1616 target_name = self.class_file.constants[index - 1].get_python_name() 1617 program.load_name("self") 1618 program.load_attr("__class__") 1619 # NOTE: Using the string version of the name which may contain incompatible characters. 1620 program.store_attr(str(target_name)) 1621 1622 def ret(self, arguments, program): 1623 program.ret(arguments[0]) 1624 # Indicate that the finally handler is probably over. 1625 # NOTE: This is seemingly not guaranteed. 1626 self.in_finally = 0 1627 1628 def return_(self, arguments, program): 1629 program.load_const(None) 1630 program.return_value() 1631 1632 saload = laload 1633 sastore = lastore 1634 1635 def sipush(self, arguments, program): 1636 program.load_const((arguments[0] << 8) + arguments[1]) 1637 1638 def swap(self, arguments, program): 1639 program.rot_two() 1640 1641 def tableswitch(self, arguments, program): 1642 # Find the offset to the next 4 byte boundary in the code. 1643 d, r = divmod(self.java_position, 4) 1644 to_boundary = (4 - r) % 4 1645 # Get the pertinent arguments. 1646 arguments = arguments[to_boundary:] 1647 default = (arguments[0] << 24) + (arguments[1] << 16) + (arguments[2] << 8) + arguments[3] 1648 low = (arguments[4] << 24) + (arguments[5] << 16) + (arguments[6] << 8) + arguments[7] 1649 high = (arguments[8] << 24) + (arguments[9] << 16) + (arguments[10] << 8) + arguments[11] 1650 # Process the jump entries. 1651 # NOTE: This is not the most optimal implementation. 1652 jump_index = 8 1653 for jump in range(low, high + 1): 1654 offset = signed4((arguments[jump_index] << 24) + (arguments[jump_index + 1] << 16) + 1655 (arguments[jump_index + 2] << 8) + arguments[jump_index + 3]) 1656 # Calculate the branch target. 1657 java_absolute = self.java_position + offset 1658 # Generate branching code. 1659 program.dup_top() # Stack: key, key 1660 program.load_const(jump) # Stack: key, key, jump 1661 program.compare_op("==") # Stack: key, result 1662 program.jump_to_label(0, "end") 1663 program.pop_top() # Stack: key 1664 program.pop_top() # Stack: 1665 program.jump_absolute(self.position_mapping[java_absolute]) 1666 # Generate the label for the end of the branching code. 1667 program.start_label("end") 1668 program.pop_top() # Stack: key 1669 # Update the index. 1670 jump_index += 8 1671 # Generate the default. 1672 java_absolute = self.java_position + default 1673 program.jump_absolute(self.position_mapping[java_absolute]) 1674 1675 def wide(self, code, program): 1676 # NOTE: To be implemented. 1677 return number_of_arguments 1678 1679 def disassemble(class_file, method): 1680 disassembler = BytecodeDisassembler(class_file) 1681 disassembler.process(method, BytecodeDisassemblerProgram()) 1682 1683 class ClassTranslator: 1684 1685 """ 1686 A class which provides a wrapper around a class file and the means to 1687 translate the represented class into a Python class. 1688 """ 1689 1690 def __init__(self, class_file): 1691 1692 "Initialise the object with the given 'class_file'." 1693 1694 self.class_file = class_file 1695 self.filename = str(self.class_file.attributes[0].get_name()) 1696 1697 def translate_method(self, method): 1698 1699 "Translate the given 'method' - an object obtained from the class file." 1700 1701 translator = BytecodeTranslator(self.class_file) 1702 writer = BytecodeWriter() 1703 translator.process(method, writer) 1704 return translator, writer 1705 1706 def make_method(self, method_name, methods, global_names, namespace): 1707 1708 """ 1709 Make a dispatcher method with the given 'method_name', providing 1710 dispatch to the supplied type-sensitive 'methods', accessing the given 1711 'global_names' where necessary, and storing the new method in the 1712 'namespace' provided. 1713 """ 1714 1715 if method_name == "<init>": 1716 method_name = "__init__" 1717 # Where only one method exists, just make an alias. 1718 if len(methods) == 1: 1719 method, fn = methods[0] 1720 namespace[method_name] = fn 1721 return 1722 # Find the maximum number of parameters involved. 1723 #maximum = max([len(method.get_descriptor()[0]) for method in methods]) 1724 program = BytecodeWriter() 1725 # NOTE: The code below should use dictionary-based dispatch for better performance. 1726 program.load_fast(1) # Stack: arguments 1727 for method, fn in methods: 1728 program.dup_top() # Stack: arguments, arguments 1729 program.load_const(1) 1730 program.store_fast(2) # found = 1 1731 program.setup_loop() 1732 # Emit a list of parameter types. 1733 descriptor_types = method.get_descriptor()[0] 1734 for descriptor_type in descriptor_types: 1735 base_type, object_type, array_type = descriptor_type 1736 python_type = classfile.descriptor_base_type_mapping[base_type] 1737 if python_type == "instance": 1738 # NOTE: This will need extending. 1739 python_type = object_type 1740 program.load_global(python_type) # Stack: arguments, type, ... 1741 program.build_list(len(descriptor_types)) 1742 # Stack: arguments, types 1743 # Make a map of arguments and types. 1744 program.load_const(None) # Stack: arguments, types, None 1745 program.rot_three() # Stack: None, arguments, types 1746 program.build_tuple(3) # Stack: tuple 1747 program.load_global("map") # Stack: tuple, map 1748 program.rot_two() # Stack: map, tuple 1749 program.load_global("apply") # Stack: map, tuple, apply 1750 program.rot_three() # Stack: apply, map, tuple 1751 program.call_function(2) # Stack: tuple (mapping arguments to types) 1752 # Loop over each pair. 1753 program.get_iter() # Stack: iter 1754 program.for_iter() # Stack: iter, (argument, type) 1755 program.unpack_sequence(2) # Stack: iter, type, argument 1756 program.dup_top() # Stack: iter, type, argument, argument 1757 program.load_const(None) # Stack: iter, type, argument, argument, None 1758 program.compare_op("is") # Stack: iter, type, argument, result 1759 # Missing argument? 1760 program.jump_to_label(0, "present") 1761 program.pop_top() # Stack: iter, type, argument 1762 program.pop_top() # Stack: iter, type 1763 program.pop_top() # Stack: iter 1764 program.load_const(0) 1765 program.store_fast(2) # found = 0 1766 program.break_loop() 1767 # Argument was present. 1768 program.start_label("present") 1769 program.pop_top() # Stack: iter, type, argument 1770 program.rot_two() # Stack: iter, argument, type 1771 program.dup_top() # Stack: iter, argument, type, type 1772 program.load_const(None) # Stack: iter, argument, type, type, None 1773 program.compare_op("is") # Stack: iter, argument, type, result 1774 # Missing parameter type? 1775 program.jump_to_label(0, "present") 1776 program.pop_top() # Stack: iter, argument, type 1777 program.pop_top() # Stack: iter, argument 1778 program.pop_top() # Stack: iter 1779 program.load_const(0) 1780 program.store_fast(2) # found = 0 1781 program.break_loop() 1782 # Parameter was present. 1783 program.start_label("present") 1784 program.pop_top() # Stack: iter, argument, type 1785 program.build_tuple(2) # Stack: iter, (argument, type) 1786 program.load_global("isinstance") # Stack: iter, (argument, type), isinstance 1787 program.rot_two() # Stack: iter, isinstance, (argument, type) 1788 program.load_global("apply") # Stack: iter, isinstance, (argument, type), apply 1789 program.rot_three() # Stack: iter, apply, isinstance, (argument, type) 1790 program.call_function(2) # Stack: iter, result 1791 program.jump_to_label(1, "match") 1792 program.pop_top() # Stack: iter 1793 program.load_const(0) 1794 program.store_fast(2) # found = 0 1795 program.break_loop() 1796 # Argument type and parameter type matched. 1797 program.start_label("match") 1798 program.pop_top() # Stack: iter 1799 program.end_loop() # Stack: iter 1800 # If all the parameters matched, call the method. 1801 program.load_fast(2) # Stack: iter, match 1802 program.jump_to_label(0, "failed") 1803 # All the parameters matched. 1804 program.pop_top() # Stack: iter 1805 program.load_fast(1) # Stack: arguments 1806 program.load_fast(0) # Stack: arguments, self 1807 program.load_attr(str(method.get_python_name())) 1808 # Stack: arguments, method 1809 program.rot_two() # Stack: method, arguments 1810 program.load_global("apply") # Stack: method, arguments, apply 1811 program.rot_three() # Stack: apply, method, arguments 1812 program.call_function(2) # Stack: result 1813 program.return_value() 1814 # Try the next method if arguments or parameters were missing or incorrect. 1815 program.start_label("failed") 1816 program.pop_top() # Stack: iter 1817 program.pop_top() # Stack: 1818 # Raise an exception if nothing matched. 1819 # NOTE: Improve this. 1820 program.load_const("No matching method") 1821 program.raise_varargs(1) 1822 program.load_const(None) 1823 program.return_value() 1824 1825 # Add the code as a method in the namespace. 1826 # NOTE: One actual parameter, flags as 71 apparently means that a list 1827 # NOTE: parameter is used in a method. 1828 nlocals = program.max_locals + 1 1829 code = new.code(1, nlocals, program.max_stack_depth, 71, program.get_output(), 1830 tuple(program.get_constants()), tuple(program.get_names()), tuple(self.make_varnames(nlocals)), 1831 self.filename, method_name, 0, "") 1832 fn = new.function(code, global_names) 1833 namespace[method_name] = fn 1834 1835 def process(self, global_names): 1836 1837 """ 1838 Process the class, storing it in the 'global_names' dictionary provided. 1839 """ 1840 1841 namespace = {} 1842 real_methods = {} 1843 for method in self.class_file.methods: 1844 t, w = self.translate_method(method) 1845 nlocals = w.max_locals + 1 1846 nargs = len(method.get_descriptor()[0]) + 1 1847 method_name = str(method.get_python_name()) 1848 # NOTE: Add line number table later. 1849 code = new.code(nargs, nlocals, w.max_stack_depth, 67, w.get_output(), tuple(w.get_constants()), tuple(w.get_names()), 1850 tuple(self.make_varnames(nlocals)), self.filename, method_name, 0, "") 1851 # NOTE: May need more globals. 1852 fn = new.function(code, global_names) 1853 namespace[method_name] = fn 1854 real_method_name = str(method.get_name()) 1855 if not real_methods.has_key(real_method_name): 1856 real_methods[real_method_name] = [] 1857 real_methods[real_method_name].append((method, fn)) 1858 # Define superclasses. 1859 bases = self.get_base_classes(global_names) 1860 # Define method dispatchers. 1861 for real_method_name, methods in real_methods.items(): 1862 self.make_method(real_method_name, methods, global_names, namespace) 1863 # Use only the last part of the fully qualified name. 1864 full_class_name = str(self.class_file.this_class.get_python_name()) 1865 class_name = full_class_name.split(".")[-1] 1866 cls = new.classobj(class_name, bases, namespace) 1867 global_names[cls.__name__] = cls 1868 return cls 1869 1870 def get_base_classes(self, global_names): 1871 1872 """ 1873 Identify the superclass, then either load it from the given 1874 'global_names' if available, or import the class from its parent module. 1875 Return a tuple containing all base classes (typically a single element 1876 tuple). 1877 """ 1878 1879 original_name = str(self.class_file.super_class.get_name()) 1880 if original_name in ("java/lang/Object", "java/lang/Exception"): 1881 return () 1882 else: 1883 full_class_name = str(self.class_file.super_class.get_python_name()) 1884 class_name_parts = full_class_name.split(".") 1885 class_module_name = ".".join(class_name_parts[:-1]) 1886 if class_module_name == "": 1887 class_module_name = "__this__" 1888 class_name = class_name_parts[-1] 1889 print "*", class_module_name, class_name 1890 class_module = __import__(class_module_name, global_names, {}, [class_name]) 1891 base = getattr(class_module, class_name) 1892 return (base,) 1893 1894 def make_varnames(self, nlocals): 1895 1896 """ 1897 A utility method which invents variable names for the given number - 1898 'nlocals' - of local variables in a method. Returns a list of such 1899 variable names. 1900 """ 1901 1902 l = ["self"] 1903 for i in range(1, nlocals): 1904 l.append("_l%s" % i) 1905 return l[:nlocals] 1906 1907 def _map(*args): 1908 print args 1909 return apply(__builtins__.map, args) 1910 1911 def _isinstance(*args): 1912 print args 1913 return apply(__builtins__.isinstance, args) 1914 1915 if __name__ == "__main__": 1916 import sys 1917 import dis 1918 global_names = {} 1919 global_names.update(__builtins__.__dict__) 1920 #global_names["isinstance"] = _isinstance 1921 #global_names["map"] = _map 1922 for filename in sys.argv[1:]: 1923 f = open(filename, "rb") 1924 c = classfile.ClassFile(f.read()) 1925 translator = ClassTranslator(c) 1926 cls = translator.process(global_names) 1927 1928 # vim: tabstop=4 expandtab shiftwidth=4