1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 7 Copyright (C) 2004, 2005, 2006, 2011 Paul Boddie <paul@boddie.org.uk> 8 Copyright (C) 2010 Braden Thomas <bradenthomas@me.com> 9 Copyright (C) 2011 David Drysdale <dmd@lurklurk.org> 10 11 This program is free software; you can redistribute it and/or modify it under 12 the terms of the GNU Lesser General Public License as published by the Free 13 Software Foundation; either version 3 of the License, or (at your option) any 14 later version. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 19 details. 20 21 You should have received a copy of the GNU Lesser General Public License along 22 with this program. If not, see <http://www.gnu.org/licenses/>. 23 """ 24 25 import struct # for general decoding of class files 26 27 # Utility functions. 28 29 def u1(data): 30 return struct.unpack(">B", data[0:1])[0] 31 32 def u2(data): 33 return struct.unpack(">H", data[0:2])[0] 34 35 def s2(data): 36 return struct.unpack(">h", data[0:2])[0] 37 38 def u4(data): 39 return struct.unpack(">L", data[0:4])[0] 40 41 def s4(data): 42 return struct.unpack(">l", data[0:4])[0] 43 44 def s8(data): 45 return struct.unpack(">q", data[0:8])[0] 46 47 def f4(data): 48 return struct.unpack(">f", data[0:4])[0] 49 50 def f8(data): 51 return struct.unpack(">d", data[0:8])[0] 52 53 def su1(value): 54 return struct.pack(">B", value) 55 56 def su2(value): 57 return struct.pack(">H", value) 58 59 def ss2(value): 60 return struct.pack(">h", value) 61 62 def su4(value): 63 return struct.pack(">L", value) 64 65 def ss4(value): 66 return struct.pack(">l", value) 67 68 def ss8(value): 69 return struct.pack(">q", value) 70 71 def sf4(value): 72 return struct.pack(">f", value) 73 74 def sf8(value): 75 return struct.pack(">d", value) 76 77 # Useful tables and constants. 78 79 descriptor_base_type_mapping = { 80 "B" : "int", 81 "C" : "str", 82 "D" : "float", 83 "F" : "float", 84 "I" : "int", 85 "J" : "int", 86 "L" : "object", 87 "S" : "int", 88 "Z" : "bool", 89 "[" : "list" 90 } 91 92 type_names_to_default_values = { 93 "int" : 0, 94 "str" : u"", 95 "float" : 0.0, 96 "object" : None, 97 "bool" : 0, # NOTE: Should be False. 98 "list" : [] 99 } 100 101 def get_default_for_type(type_name): 102 global type_names_to_default_values 103 return type_names_to_default_values.get(type_name) 104 105 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 106 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 107 108 def has_flags(flags, desired): 109 desired_flags = reduce(lambda a, b: a | b, desired, 0) 110 return (flags & desired_flags) == desired_flags 111 112 # Useful mix-ins. 113 114 class PythonMethodUtils: 115 symbol_sep = "___" # was "$" 116 type_sep = "__" # replaces "/" 117 array_sep = "_array_" # was "[]" 118 base_seps = ("_", "_") # was "<" and ">" 119 120 def get_unqualified_python_name(self): 121 name = self.get_name() 122 if str(name) == "<init>": 123 return "__init__" 124 elif str(name) == "<clinit>": 125 return "__clinit__" 126 else: 127 return str(name) 128 129 def get_python_name(self): 130 name = self.get_unqualified_python_name() 131 if name == "__clinit__": 132 return name 133 return name + self.symbol_sep + self._get_descriptor_as_name() 134 135 def _get_descriptor_as_name(self): 136 l = [] 137 for descriptor_type in self.get_descriptor()[0]: 138 l.append(self._get_type_as_name(descriptor_type)) 139 return self.symbol_sep.join(l) 140 141 def _get_type_as_name(self, descriptor_type, s=""): 142 base_type, object_type, array_type = descriptor_type 143 if base_type == "L": 144 return object_type.replace("/", self.type_sep) + s 145 elif base_type == "[": 146 return self._get_type_as_name(array_type, s + self.array_sep) 147 else: 148 return self.base_seps[0] + base_type + self.base_seps[1] + s 149 150 class PythonNameUtils: 151 def get_python_name(self): 152 # NOTE: This may not be comprehensive. 153 if not str(self.get_name()).startswith("["): 154 return str(self.get_name()).replace("/", ".") 155 else: 156 return self._get_type_name( 157 get_field_descriptor( 158 str(self.get_name()) 159 ) 160 ).replace("/", ".") 161 162 def _get_type_name(self, descriptor_type): 163 base_type, object_type, array_type = descriptor_type 164 if base_type == "L": 165 return object_type 166 elif base_type == "[": 167 return self._get_type_name(array_type) 168 else: 169 return descriptor_base_type_mapping[base_type] 170 171 class NameUtils: 172 def get_name(self): 173 if self.name_index != 0: 174 return self.class_file.constants[self.name_index - 1] 175 else: 176 # Some name indexes are zero to indicate special conditions. 177 return None 178 179 class NameAndTypeUtils: 180 def get_name(self): 181 if self.name_and_type_index != 0: 182 return self.class_file.constants[self.name_and_type_index - 1].get_name() 183 else: 184 # Some name indexes are zero to indicate special conditions. 185 return None 186 187 def get_field_descriptor(self): 188 if self.name_and_type_index != 0: 189 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 190 else: 191 # Some name indexes are zero to indicate special conditions. 192 return None 193 194 def get_method_descriptor(self): 195 if self.name_and_type_index != 0: 196 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 197 else: 198 # Some name indexes are zero to indicate special conditions. 199 return None 200 201 def get_class(self): 202 return self.class_file.constants[self.class_index - 1] 203 204 # Symbol parsing. 205 206 def get_method_descriptor(s): 207 assert s[0] == "(" 208 params = [] 209 s = s[1:] 210 while s[0] != ")": 211 parameter_descriptor, s = _get_parameter_descriptor(s) 212 params.append(parameter_descriptor) 213 if s[1] != "V": 214 return_type, s = _get_field_type(s[1:]) 215 else: 216 return_type, s = None, s[1:] 217 return params, return_type 218 219 def get_field_descriptor(s): 220 return _get_field_type(s)[0] 221 222 def _get_parameter_descriptor(s): 223 return _get_field_type(s) 224 225 def _get_component_type(s): 226 return _get_field_type(s) 227 228 def _get_field_type(s): 229 base_type, s = _get_base_type(s) 230 object_type = None 231 array_type = None 232 if base_type == "L": 233 object_type, s = _get_object_type(s) 234 elif base_type == "[": 235 array_type, s = _get_array_type(s) 236 return (base_type, object_type, array_type), s 237 238 def _get_base_type(s): 239 if len(s) > 0: 240 return s[0], s[1:] 241 else: 242 return None, s 243 244 def _get_object_type(s): 245 if len(s) > 0: 246 s_end = s.find(";") 247 assert s_end != -1 248 return s[:s_end], s[s_end+1:] 249 else: 250 return None, s 251 252 def _get_array_type(s): 253 if len(s) > 0: 254 return _get_component_type(s) 255 else: 256 return None, s 257 258 # Constant information. 259 260 class ClassInfo(NameUtils, PythonNameUtils): 261 def init(self, data, class_file): 262 self.class_file = class_file 263 self.name_index = u2(data[0:2]) 264 return data[2:] 265 def serialize(self): 266 return su2(self.name_index) 267 268 class RefInfo(NameAndTypeUtils): 269 def init(self, data, class_file): 270 self.class_file = class_file 271 self.class_index = u2(data[0:2]) 272 self.name_and_type_index = u2(data[2:4]) 273 return data[4:] 274 def serialize(self): 275 return su2(self.class_index)+su2(self.name_and_type_index) 276 277 class FieldRefInfo(RefInfo, PythonNameUtils): 278 def get_descriptor(self): 279 return RefInfo.get_field_descriptor(self) 280 281 class MethodRefInfo(RefInfo, PythonMethodUtils): 282 def get_descriptor(self): 283 return RefInfo.get_method_descriptor(self) 284 285 class InterfaceMethodRefInfo(MethodRefInfo): 286 pass 287 288 class NameAndTypeInfo(NameUtils, PythonNameUtils): 289 def init(self, data, class_file): 290 self.class_file = class_file 291 self.name_index = u2(data[0:2]) 292 self.descriptor_index = u2(data[2:4]) 293 return data[4:] 294 295 def serialize(self): 296 return su2(self.name_index)+su2(self.descriptor_index) 297 298 def get_field_descriptor(self): 299 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 300 301 def get_method_descriptor(self): 302 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 303 304 class Utf8Info: 305 def init(self, data, class_file): 306 self.class_file = class_file 307 self.length = u2(data[0:2]) 308 self.bytes = data[2:2+self.length] 309 return data[2+self.length:] 310 311 def serialize(self): 312 return su2(self.length)+self.bytes 313 314 def __str__(self): 315 return self.bytes 316 317 def __unicode__(self): 318 return unicode(self.bytes, "utf-8") 319 320 def get_value(self): 321 return str(self) 322 323 class StringInfo: 324 def init(self, data, class_file): 325 self.class_file = class_file 326 self.string_index = u2(data[0:2]) 327 return data[2:] 328 329 def serialize(self): 330 return su2(self.string_index) 331 332 def __str__(self): 333 return str(self.class_file.constants[self.string_index - 1]) 334 335 def __unicode__(self): 336 return unicode(self.class_file.constants[self.string_index - 1]) 337 338 def get_value(self): 339 return str(self) 340 341 class SmallNumInfo: 342 def init(self, data, class_file): 343 self.class_file = class_file 344 self.bytes = data[0:4] 345 return data[4:] 346 def serialize(self): 347 return self.bytes 348 349 class IntegerInfo(SmallNumInfo): 350 def get_value(self): 351 return s4(self.bytes) 352 353 class FloatInfo(SmallNumInfo): 354 def get_value(self): 355 return f4(self.bytes) 356 357 class LargeNumInfo: 358 def init(self, data, class_file): 359 self.class_file = class_file 360 self.high_bytes = data[0:4] 361 self.low_bytes = data[4:8] 362 return data[8:] 363 def serialize(self): 364 return self.high_bytes+self.low_bytes 365 366 367 class LongInfo(LargeNumInfo): 368 def get_value(self): 369 return s8(self.high_bytes + self.low_bytes) 370 371 class DoubleInfo(LargeNumInfo): 372 def get_value(self): 373 return f8(self.high_bytes + self.low_bytes) 374 375 # Other information. 376 # Objects of these classes are generally aware of the class they reside in. 377 378 class ItemInfo(NameUtils): 379 def init(self, data, class_file): 380 self.class_file = class_file 381 self.access_flags = u2(data[0:2]) 382 self.name_index = u2(data[2:4]) 383 self.descriptor_index = u2(data[4:6]) 384 self.attributes, data = self.class_file._get_attributes(data[6:]) 385 return data 386 def serialize(self): 387 od = su2(self.access_flags)+su2(self.name_index)+su2(self.descriptor_index) 388 od += self.class_file._serialize_attributes(self.attributes) 389 return od 390 391 class FieldInfo(ItemInfo, PythonNameUtils): 392 def get_descriptor(self): 393 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 394 395 class MethodInfo(ItemInfo, PythonMethodUtils): 396 def get_descriptor(self): 397 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 398 399 class AttributeInfo: 400 def init(self, data, class_file): 401 self.attribute_length = u4(data[0:4]) 402 self.info = data[4:4+self.attribute_length] 403 return data[4+self.attribute_length:] 404 def serialize(self): 405 return su4(self.attribute_length)+self.info 406 407 # NOTE: Decode the different attribute formats. 408 409 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 410 def init(self, data, class_file): 411 self.class_file = class_file 412 self.attribute_length = u4(data[0:4]) 413 # Permit the NameUtils mix-in. 414 self.name_index = self.sourcefile_index = u2(data[4:6]) 415 return data[6:] 416 def serialize(self): 417 return su4(self.attribute_length)+su2(self.name_index) 418 419 class ConstantValueAttributeInfo(AttributeInfo): 420 def init(self, data, class_file): 421 self.class_file = class_file 422 self.attribute_length = u4(data[0:4]) 423 self.constant_value_index = u2(data[4:6]) 424 assert 4+self.attribute_length == 6 425 return data[4+self.attribute_length:] 426 427 def get_value(self): 428 return self.class_file.constants[self.constant_value_index - 1].get_value() 429 430 def serialize(self): 431 return su4(self.attribute_length)+su2(self.constant_value_index) 432 433 class CodeAttributeInfo(AttributeInfo): 434 def init(self, data, class_file): 435 self.class_file = class_file 436 self.attribute_length = u4(data[0:4]) 437 self.max_stack = u2(data[4:6]) 438 self.max_locals = u2(data[6:8]) 439 self.code_length = u4(data[8:12]) 440 end_of_code = 12+self.code_length 441 self.code = data[12:end_of_code] 442 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 443 self.exception_table = [] 444 data = data[end_of_code + 2:] 445 for i in range(0, self.exception_table_length): 446 exception = ExceptionInfo() 447 data = exception.init(data) 448 self.exception_table.append(exception) 449 self.attributes, data = self.class_file._get_attributes(data) 450 return data 451 def serialize(self): 452 od = su4(self.attribute_length)+su2(self.max_stack)+su2(self.max_locals)+su4(self.code_length)+self.code 453 od += su2(self.exception_table_length) 454 for e in self.exception_table: 455 od += e.serialize() 456 od += self.class_file._serialize_attributes(self.attributes) 457 return od 458 459 class ExceptionsAttributeInfo(AttributeInfo): 460 def init(self, data, class_file): 461 self.class_file = class_file 462 self.attribute_length = u4(data[0:4]) 463 self.number_of_exceptions = u2(data[4:6]) 464 self.exception_index_table = [] 465 index = 6 466 for i in range(0, self.number_of_exceptions): 467 self.exception_index_table.append(u2(data[index:index+2])) 468 index += 2 469 return data[index:] 470 471 def get_exception(self, i): 472 exception_index = self.exception_index_table[i] 473 return self.class_file.constants[exception_index - 1] 474 475 def serialize(self): 476 od = su4(self.attribute_length)+su2(self.number_of_exceptions) 477 for ei in self.exception_index_table: 478 od += su2(ei) 479 return od 480 481 class InnerClassesAttributeInfo(AttributeInfo): 482 def init(self, data, class_file): 483 self.class_file = class_file 484 self.attribute_length = u4(data[0:4]) 485 self.number_of_classes = u2(data[4:6]) 486 self.classes = [] 487 data = data[6:] 488 for i in range(0, self.number_of_classes): 489 inner_class = InnerClassInfo() 490 data = inner_class.init(data, self.class_file) 491 self.classes.append(inner_class) 492 return data 493 494 def serialize(self): 495 od = su4(self.attribute_length)+su2(self.number_of_classes) 496 for c in self.classes: 497 od += c.serialize() 498 return od 499 500 class SyntheticAttributeInfo(AttributeInfo): 501 pass 502 503 class LineNumberAttributeInfo(AttributeInfo): 504 def init(self, data, class_file): 505 self.class_file = class_file 506 self.attribute_length = u4(data[0:4]) 507 self.line_number_table_length = u2(data[4:6]) 508 self.line_number_table = [] 509 data = data[6:] 510 for i in range(0, self.line_number_table_length): 511 line_number = LineNumberInfo() 512 data = line_number.init(data) 513 self.line_number_table.append(line_number) 514 return data 515 516 def serialize(self): 517 od = su4(self.attribute_length)+su2(self.line_number_table_length) 518 for ln in self.line_number_table: 519 od += ln.serialize() 520 return od 521 522 class LocalVariableAttributeInfo(AttributeInfo): 523 def init(self, data, class_file): 524 self.class_file = class_file 525 self.attribute_length = u4(data[0:4]) 526 self.local_variable_table_length = u2(data[4:6]) 527 self.local_variable_table = [] 528 data = data[6:] 529 for i in range(0, self.local_variable_table_length): 530 local_variable = LocalVariableInfo() 531 data = local_variable.init(data, self.class_file) 532 self.local_variable_table.append(local_variable) 533 return data 534 535 def serialize(self): 536 od = su4(self.attribute_length)+su2(self.local_variable_table_length) 537 for lv in self.local_variable_table: 538 od += lv.serialize() 539 return od 540 541 class LocalVariableTypeAttributeInfo(AttributeInfo): 542 def init(self, data, class_file): 543 self.class_file = class_file 544 self.attribute_length = u4(data[0:4]) 545 local_variable_type_table_length = u2(data[4:6]) 546 data = data[6:] 547 self.local_variable_type_table = [] 548 for i in range(0, local_variable_type_table_length): 549 local_variable = LocalVariableInfo() 550 data = local_variable.init(data, self.class_file) 551 self.local_variable_type_table.append(local_variable) 552 return data 553 554 def serialize(self): 555 od = su4(self.attribute_length)+su2(len(self.local_variable_type_table)) 556 od += "".join([lv.serialize() for lv in self.local_variable_type_table]) 557 return od 558 559 class DeprecatedAttributeInfo(AttributeInfo): 560 pass 561 562 class VerificationTypeInfo(object): 563 def __init__(self, tag): 564 self.tag = tag 565 def init(self, data, class_file): 566 self.class_file = class_file 567 tag = u1(data[0:1]) 568 assert(tag == self.tag) 569 return data[1:] 570 def serialize(self): 571 return su1(self.tag) 572 class TopVariableInfo(VerificationTypeInfo): 573 TAG = 0 574 class IntegerVariableInfo(VerificationTypeInfo): 575 TAG = 1 576 class FloatVariableInfo(VerificationTypeInfo): 577 TAG = 2 578 class DoubleVariableInfo(VerificationTypeInfo): 579 TAG = 3 580 class LongVariableInfo(VerificationTypeInfo): 581 TAG = 4 582 class NullVariableInfo(VerificationTypeInfo): 583 TAG = 5 584 class UninitializedThisVariableInfo(VerificationTypeInfo): 585 TAG = 6 586 class ObjectVariableInfo(VerificationTypeInfo): 587 TAG = 7 588 def init(self, data, class_file): 589 data = super(ObjectVariableInfo, self).init(data, class_file) 590 self.cpool_index = u2(data) 591 return data[2:] 592 def serialize(self): 593 return super(ObjectVariableInfo, self).serialize() + su2(self.cpool_index) 594 class UninitializedVariableInfo(VerificationTypeInfo): 595 TAG = 8 596 def init(self, data, class_file): 597 data = super(UninitializedVariableInfo, self).init(data, class_file) 598 self.offset = u2(data) 599 return data[2:] 600 def serialize(self): 601 return super(UninitializedVariableInfo, self).serialize() + su2(self.offset) 602 603 VARIABLE_INFO_CLASSES = (TopVariableInfo, IntegerVariableInfo, FloatVariableInfo, DoubleVariableInfo, 604 LongVariableInfo, NullVariableInfo, UninitializedThisVariableInfo, 605 ObjectVariableInfo, UninitializedVariableInfo) 606 VARIABLE_INFO_TAG_MAP = dict([(cls.TAG, cls) for cls in VARIABLE_INFO_CLASSES]) 607 608 # Exception 609 class UnknownVariableInfo: 610 def __init__(self, tag): 611 self.tag = tag 612 def __str__(self): 613 return repr(self.tag) 614 615 def create_verification_type_info(data): 616 # Does not consume data, just does lookahead 617 tag = u1(data[0:1]) 618 if tag in VARIABLE_INFO_TAG_MAP: 619 return VARIABLE_INFO_TAG_MAP[tag](tag) 620 else: 621 raise UnknownVariableInfo, tag 622 623 624 class StackMapFrame(object): 625 def __init__(self, frame_type): 626 self.frame_type = frame_type 627 def init(self, data, class_file): 628 self.class_file = class_file 629 frame_type = u1(data[0:1]) 630 assert(frame_type == self.frame_type) 631 return data[1:] 632 def serialize(self): 633 return su1(self.frame_type) 634 class SameFrame(StackMapFrame): 635 TYPE_LOWER = 0 636 TYPE_UPPER = 63 637 class SameLocals1StackItemFrame(StackMapFrame): 638 TYPE_LOWER = 64 639 TYPE_UPPER = 127 640 def init(self, data, class_file): 641 data = super(SameLocals1StackItemFrame, self).init(data, class_file) 642 self.offset_delta = self.frame_type - 64 643 self.stack = [create_verification_type_info(data)] 644 return self.stack[0].init(data, class_file) 645 def serialize(self): 646 return super(SameLocals1StackItemFrame, self).serialize()+self.stack[0].serialize() 647 class SameLocals1StackItemFrameExtended(StackMapFrame): 648 TYPE_LOWER = 247 649 TYPE_UPPER = 247 650 def init(self, data, class_file): 651 data = super(SameLocals1StackItemFrameExtended, self).init(data, class_file) 652 self.offset_delta = u2(data[0:2]) 653 data = data[2:] 654 self.stack = [create_verification_type_info(data)] 655 return self.stack[0].init(data, class_file) 656 def serialize(self): 657 return super(SameLocals1StackItemFrameExtended, self).serialize()+su2(self.offset_delta)+self.stack[0].serialize() 658 class ChopFrame(StackMapFrame): 659 TYPE_LOWER = 248 660 TYPE_UPPER = 250 661 def init(self, data, class_file): 662 data = super(ChopFrame, self).init(data, class_file) 663 self.offset_delta = u2(data[0:2]) 664 return data[2:] 665 def serialize(self): 666 return super(ChopFrame, self).serialize()+su2(self.offset_delta) 667 class SameFrameExtended(StackMapFrame): 668 TYPE_LOWER = 251 669 TYPE_UPPER = 251 670 def init(self, data, class_file): 671 data = super(SameFrameExtended, self).init(data, class_file) 672 self.offset_delta = u2(data[0:2]) 673 return data[2:] 674 def serialize(self): 675 return super(SameFrameExtended, self).serialize()+su2(self.offset_delta) 676 class AppendFrame(StackMapFrame): 677 TYPE_LOWER = 252 678 TYPE_UPPER = 254 679 def init(self, data, class_file): 680 data = super(AppendFrame, self).init(data, class_file) 681 self.offset_delta = u2(data[0:2]) 682 data = data[2:] 683 num_locals = self.frame_type - 251 684 self.locals = [] 685 for ii in xrange(num_locals): 686 info = create_verification_type_info(data) 687 data = info.init(data, class_file) 688 self.locals.append(info) 689 return data 690 def serialize(self): 691 od = super(AppendFrame, self).serialize()+su2(self.offset_delta) 692 od += "".join([l.serialize() for l in self.locals]) 693 return od 694 class FullFrame(StackMapFrame): 695 TYPE_LOWER = 255 696 TYPE_UPPER = 255 697 def init(self, data, class_file): 698 data = super(FullFrame, self).init(data, class_file) 699 self.offset_delta = u2(data[0:2]) 700 num_locals = u2(data[2:4]) 701 data = data[4:] 702 self.locals = [] 703 for ii in xrange(num_locals): 704 info = create_verification_type_info(data) 705 data = info.init(data, class_file) 706 self.locals.append(info) 707 num_stack_items = u2(data[0:2]) 708 data = data[2:] 709 self.stack = [] 710 for ii in xrange(num_stack_items): 711 stack_item = create_verification_type_info(data) 712 data = stack_item.init(data, class_file) 713 self.stack.append(stack_item) 714 return data 715 def serialize(self): 716 od = super(FullFrame, self).serialize()+su2(self.offset_delta)+su2(len(self.locals)) 717 od += "".join([l.serialize() for l in self.locals]) 718 od += su2(len(self.stack)) 719 od += "".join([s.serialize() for s in self.stack]) 720 return od 721 722 FRAME_CLASSES = (SameFrame, SameLocals1StackItemFrame, SameLocals1StackItemFrameExtended, 723 ChopFrame, SameFrameExtended, AppendFrame, FullFrame) 724 725 # Exception 726 class UnknownStackFrame: 727 def __init__(self, frame_type): 728 self.frame_type = frame_type 729 def __str__(self): 730 return repr(self.frame_type) 731 732 def create_stack_frame(data): 733 # Does not consume data, just does lookahead 734 frame_type = u1(data[0:1]) 735 for cls in FRAME_CLASSES: 736 if frame_type >= cls.TYPE_LOWER and frame_type <= cls.TYPE_UPPER: 737 return cls(frame_type) 738 raise UnknownStackFrame, frame_type 739 740 class StackMapTableAttributeInfo(AttributeInfo): 741 def init(self, data, class_file): 742 self.class_file = class_file 743 self.attribute_length = u4(data[0:4]) 744 num_entries = u2(data[4:6]) 745 self.entries = [] 746 data = data[6:] 747 for i in range(0, num_entries): 748 frame = create_stack_frame(data) 749 data = frame.init(data, class_file) 750 self.entries.append(frame) 751 return data 752 def serialize(self): 753 od = su4(self.attribute_length)+su2(len(self.entries)) 754 od += "".join([e.serialize() for e in self.entries]) 755 return od 756 757 758 class EnclosingMethodAttributeInfo(AttributeInfo): 759 def init(self, data, class_file): 760 self.class_file = class_file 761 self.attribute_length = u4(data[0:4]) 762 self.class_index = u2(data[4:6]) 763 self.method_index = u2(data[6:8]) 764 return data[8:] 765 def serialize(self): 766 return su4(self.attribute_length)+su2(self.class_index)+su2(self.method_index) 767 768 769 class SignatureAttributeInfo(AttributeInfo): 770 def init(self, data, class_file): 771 self.class_file = class_file 772 self.attribute_length = u4(data[0:4]) 773 self.signature_index = u2(data[4:6]) 774 return data[6:] 775 def serialize(self): 776 return su4(self.attribute_length)+su2(self.signature_index) 777 778 779 class SourceDebugExtensionAttributeInfo(AttributeInfo): 780 def init(self, data, class_file): 781 self.class_file = class_file 782 self.attribute_length = u4(data[0:4]) 783 self.debug_extension = data[4:(4 + self.attribute_length)] 784 return data[(4+ self.attribute_length):] 785 def serialize(self): 786 return su4(self.attribute_length)+self.debug_extension 787 788 789 class ElementValue(object): 790 def __init__(self, tag): 791 self.tag = tag 792 def init(self, data, class_file): 793 self.class_file = class_file 794 tag = chr(u1(data[0:1])) 795 assert(tag == self.tag) 796 return data[1:] 797 def serialize(self): 798 return su1(ord(self.tag)) 799 class ConstValue(ElementValue): 800 def init(self, data, class_file): 801 data = super(ConstValue, self).init(data, class_file) 802 self.const_value_index = u2(data[0:2]) 803 return data[2:] 804 def serialize(self): 805 return super(ConstValue, self).serialize()+su2(self.const_value_index) 806 class EnumConstValue(ElementValue): 807 def init(self, data, class_file): 808 data = super(EnumConstValue, self).init(data, class_file) 809 self.type_name_index = u2(data[0:2]) 810 self.const_name_index = u2(data[2:4]) 811 return data[4:] 812 def serialize(self): 813 return super(EnumConstValue, self).serialize()+su2(self.type_name_index)+su2(self.const_name_index) 814 class ClassInfoValue(ElementValue): 815 def init(self, data, class_file): 816 data = super(ClassInfoValue, self).init(data, class_file) 817 self.class_info_index = u2(data[0:2]) 818 return data[2:] 819 def serialize(self): 820 return super(ClassInfoValue, self).serialize()+su2(self.class_info_index) 821 class AnnotationValue(ElementValue): 822 def init(self, data, class_file): 823 data = super(AnnotationValue, self).init(data, class_file) 824 self.annotation_value = Annotation() 825 return self.annotation_value.init(data, class_file) 826 def serialize(self): 827 return super(AnnotationValue, self).serialize()+self.annotation_value.serialize() 828 class ArrayValue(ElementValue): 829 def init(self, data, class_file): 830 data = super(ArrayValue, self).init(data, class_file) 831 num_values = u2(data[0:2]) 832 data = data[2:] 833 self.values = [] 834 for ii in xrange(num_values): 835 element_value = create_element_value(data) 836 data = element_value.init(data, class_file) 837 self.values.append(element_value) 838 return data 839 def serialize(self): 840 od = super(ArrayValue, self).serialize()+su2(len(self.values)) 841 od += "".join([v.serialize() for v in self.values]) 842 return od 843 # Exception 844 class UnknownElementValue: 845 def __init__(self, tag): 846 self.tag = tag 847 def __str__(self): 848 return repr(self.tag) 849 850 def create_element_value(data): 851 tag = chr(u1(data[0:1])) 852 if tag in ('B', 'C', 'D', 'F', 'I', 'J', 'S', 'Z', 's'): 853 return ConstValue(tag) 854 elif tag == 'e': 855 return EnumConstValue(tag) 856 elif tag == 'c': 857 return ClassInfoValue(tag) 858 elif tag == '@': 859 return AnnotationValue(tag) 860 elif tag == '[': 861 return ArrayValue(tag) 862 else: 863 raise UnknownElementValue, tag 864 865 866 class Annotation(object): 867 def init(self, data, class_file): 868 self.class_file = class_file 869 self.type_index = u2(data[0:2]) 870 num_element_value_pairs = u2(data[2:4]) 871 data = data[4:] 872 self.element_value_pairs = [] 873 for ii in xrange(num_element_value_pairs): 874 element_name_index = u2(data[0:2]) 875 data = data[2:] 876 element_value = create_element_value(data) 877 data = element_value.init(data, class_file) 878 self.element_value_pairs.append((element_name_index, element_value)) 879 return data 880 def serialize(self): 881 od = su2(self.type_index)+su2(len(self.element_value_pairs)) 882 od += "".join([su2(evp[0])+evp[1].serialize() for evp in self.element_value_pairs]) 883 return od 884 885 886 class RuntimeAnnotationsAttributeInfo(AttributeInfo): 887 def init(self, data, class_file): 888 self.class_file = class_file 889 self.attribute_length = u4(data[0:4]) 890 num_annotations = u2(data[4:6]) 891 data = data[6:] 892 self.annotations = [] 893 for ii in xrange(num_annotations): 894 annotation = Annotation() 895 data = annotation.init(data, class_file) 896 self.annotations.append(annotation) 897 return data 898 def serialize(self): 899 od = su4(self.attribute_length)+su2(len(self.annotations)) 900 od += "".join([a.serialize() for a in self.annotations]) 901 return od 902 903 class RuntimeVisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo): 904 pass 905 906 class RuntimeInvisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo): 907 pass 908 909 class RuntimeParameterAnnotationsAttributeInfo(AttributeInfo): 910 def init(self, data, class_file): 911 self.class_file = class_file 912 self.attribute_length = u4(data[0:4]) 913 num_parameters = u1(data[4:5]) 914 data = data[5:] 915 self.parameter_annotations = [] 916 for ii in xrange(num_parameters): 917 num_annotations = u2(data[0:2]) 918 data = data[2:] 919 annotations = [] 920 for jj in xrange(num_annotations): 921 annotation = Annotation() 922 data = annotation.init(data, class_file) 923 annotations.append(annotation) 924 self.parameter_annotations.append(annotations) 925 return data 926 def serialize(self): 927 od = su4(self.attribute_length)+su1(len(self.parameter_annotations)) 928 for pa in self.parameter_annotations: 929 od += su2(len(pa)) 930 od += "".join([a.serialize() for a in pa]) 931 return od 932 933 class RuntimeVisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo): 934 pass 935 936 class RuntimeInvisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo): 937 pass 938 939 class AnnotationDefaultAttributeInfo(AttributeInfo): 940 def init(self, data, class_file): 941 self.class_file = class_file 942 self.attribute_length = u4(data[0:4]) 943 data = data[4:] 944 self.default_value = create_element_value(data) 945 return self.default_value.init(data, class_file) 946 def serialize(self): 947 return su4(self.attribute_length)+self.default_value.serialize() 948 949 950 # Child classes of the attribute information classes. 951 952 class ExceptionInfo: 953 def init(self, data): 954 self.start_pc = u2(data[0:2]) 955 self.end_pc = u2(data[2:4]) 956 self.handler_pc = u2(data[4:6]) 957 self.catch_type = u2(data[6:8]) 958 return data[8:] 959 def serialize(self): 960 return su2(self.start_pc)+su2(self.end_pc)+su2(self.handler_pc)+su2(self.catch_type) 961 962 class InnerClassInfo(NameUtils): 963 def init(self, data, class_file): 964 self.class_file = class_file 965 self.inner_class_info_index = u2(data[0:2]) 966 self.outer_class_info_index = u2(data[2:4]) 967 # Permit the NameUtils mix-in. 968 self.name_index = self.inner_name_index = u2(data[4:6]) 969 self.inner_class_access_flags = u2(data[6:8]) 970 return data[8:] 971 def serialize(self): 972 return su2(self.inner_class_info_index)+su2(self.outer_class_info_index)+su2(self.name_index)+su2(self.inner_class_access_flags) 973 974 class LineNumberInfo: 975 def init(self, data): 976 self.start_pc = u2(data[0:2]) 977 self.line_number = u2(data[2:4]) 978 return data[4:] 979 980 def serialize(self): 981 return su2(self.start_pc)+su2(self.line_number) 982 983 class LocalVariableInfo(NameUtils, PythonNameUtils): 984 def init(self, data, class_file): 985 self.class_file = class_file 986 self.start_pc = u2(data[0:2]) 987 self.length = u2(data[2:4]) 988 self.name_index = u2(data[4:6]) 989 self.descriptor_index = u2(data[6:8]) 990 self.index = u2(data[8:10]) 991 return data[10:] 992 993 def get_descriptor(self): 994 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 995 996 def serialize(self): 997 return su2(self.start_pc)+su2(self.length)+su2(self.name_index)+su2(self.descriptor_index)+su2(self.index) 998 999 # Exceptions. 1000 1001 class UnknownTag(Exception): 1002 def __init__(self, tag): 1003 self.tag = tag 1004 def __str__(self): 1005 return repr(self.tag) 1006 1007 class UnknownAttribute(Exception): 1008 def __init__(self, name): 1009 self.name = name 1010 1011 ATTR_NAMES_TO_CLASS = {"SourceFile": SourceFileAttributeInfo, 1012 "ConstantValue": ConstantValueAttributeInfo, 1013 "Code": CodeAttributeInfo, 1014 "Exceptions": ExceptionsAttributeInfo, 1015 "InnerClasses": InnerClassesAttributeInfo, 1016 "Synthetic": SyntheticAttributeInfo, 1017 "LineNumberTable": LineNumberAttributeInfo, 1018 "LocalVariableTable": LocalVariableAttributeInfo, 1019 "Deprecated": DeprecatedAttributeInfo, 1020 # Java SE 1.6, class file >= 50.0, VMSpec v3 s4.7.4 1021 "StackMapTable": StackMapTableAttributeInfo, 1022 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.7 1023 "EnclosingMethod": EnclosingMethodAttributeInfo, 1024 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.9 1025 "Signature": SignatureAttributeInfo, 1026 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.11 1027 "SourceDebugExtension": SourceDebugExtensionAttributeInfo, 1028 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.14 1029 "LocalVariableTypeTable": LocalVariableTypeAttributeInfo, 1030 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.16 1031 "RuntimeVisibleAnnotations": RuntimeVisibleAnnotationsAttributeInfo, 1032 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.17 1033 "RuntimeInvisibleAnnotations": RuntimeInvisibleAnnotationsAttributeInfo, 1034 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.18 1035 "RuntimeVisibleParameterAnnotations": RuntimeVisibleParameterAnnotationsAttributeInfo, 1036 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.19 1037 "RuntimeInvisibleParameterAnnotations": RuntimeInvisibleParameterAnnotationsAttributeInfo, 1038 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.20 1039 "AnnotationDefault": AnnotationDefaultAttributeInfo,} 1040 1041 # Abstractions for the main structures. 1042 1043 class ClassFile: 1044 1045 "A class representing a Java class file." 1046 1047 def __init__(self, s): 1048 1049 """ 1050 Process the given string 's', populating the object with the class 1051 file's details. 1052 """ 1053 1054 self.attribute_class_to_index = None 1055 magic = u4(s[0:]) 1056 if magic != 0xCAFEBABE: 1057 raise UnknownAttribute, magic 1058 self.minorv,self.majorv = u2(s[4:]),u2(s[6:]) 1059 self.constants, s = self._get_constants(s[8:]) 1060 self.access_flags, s = self._get_access_flags(s) 1061 self.this_class, s = self._get_this_class(s) 1062 self.super_class, s = self._get_super_class(s) 1063 self.interfaces, s = self._get_interfaces(s) 1064 self.fields, s = self._get_fields(s) 1065 self.methods, s = self._get_methods(s) 1066 self.attributes, s = self._get_attributes(s) 1067 1068 def serialize(self): 1069 od = su4(0xCAFEBABE)+su2(self.minorv)+su2(self.majorv) 1070 od += self._serialize_constants() 1071 od += self._serialize_access_flags() 1072 od += self._serialize_this_class() 1073 od += self._serialize_super_class() 1074 od += self._serialize_interfaces() 1075 od += self._serialize_fields() 1076 od += self._serialize_methods() 1077 od += self._serialize_attributes(self.attributes) 1078 return od 1079 1080 def _encode_const(self, c): 1081 od = '' 1082 if isinstance(c, Utf8Info): 1083 od += su1(1) 1084 elif isinstance(c, IntegerInfo): 1085 od += su1(3) 1086 elif isinstance(c, FloatInfo): 1087 od += su1(4) 1088 elif isinstance(c, LongInfo): 1089 od += su1(5) 1090 elif isinstance(c, DoubleInfo): 1091 od += su1(6) 1092 elif isinstance(c, ClassInfo): 1093 od += su1(7) 1094 elif isinstance(c, StringInfo): 1095 od += su1(8) 1096 elif isinstance(c, FieldRefInfo): 1097 od += su1(9) 1098 elif isinstance(c, InterfaceMethodRefInfo): # check subclass first 1099 od += su1(11) 1100 elif isinstance(c, MethodRefInfo): 1101 od += su1(10) 1102 elif isinstance(c, NameAndTypeInfo): 1103 od += su1(12) 1104 else: 1105 return od 1106 od += c.serialize() 1107 return od 1108 1109 def _decode_const(self, s): 1110 tag = u1(s[0:1]) 1111 if tag == 1: 1112 const = Utf8Info() 1113 elif tag == 3: 1114 const = IntegerInfo() 1115 elif tag == 4: 1116 const = FloatInfo() 1117 elif tag == 5: 1118 const = LongInfo() 1119 elif tag == 6: 1120 const = DoubleInfo() 1121 elif tag == 7: 1122 const = ClassInfo() 1123 elif tag == 8: 1124 const = StringInfo() 1125 elif tag == 9: 1126 const = FieldRefInfo() 1127 elif tag == 10: 1128 const = MethodRefInfo() 1129 elif tag == 11: 1130 const = InterfaceMethodRefInfo() 1131 elif tag == 12: 1132 const = NameAndTypeInfo() 1133 else: 1134 raise UnknownTag, tag 1135 1136 # Initialise the constant object. 1137 1138 s = const.init(s[1:], self) 1139 return const, s 1140 1141 def _get_constants_from_table(self, count, s): 1142 l = [] 1143 # Have to skip certain entries specially. 1144 i = 1 1145 while i < count: 1146 c, s = self._decode_const(s) 1147 l.append(c) 1148 # Add a blank entry after "large" entries. 1149 if isinstance(c, LargeNumInfo): 1150 l.append(None) 1151 i += 1 1152 i += 1 1153 return l, s 1154 1155 def _get_items_from_table(self, cls, number, s): 1156 l = [] 1157 for i in range(0, number): 1158 f = cls() 1159 s = f.init(s, self) 1160 l.append(f) 1161 return l, s 1162 1163 def _get_methods_from_table(self, number, s): 1164 return self._get_items_from_table(MethodInfo, number, s) 1165 1166 def _get_fields_from_table(self, number, s): 1167 return self._get_items_from_table(FieldInfo, number, s) 1168 1169 def _get_attribute_from_table(self, s): 1170 attribute_name_index = u2(s[0:2]) 1171 constant_name = self.constants[attribute_name_index - 1].bytes 1172 if constant_name in ATTR_NAMES_TO_CLASS: 1173 attribute = ATTR_NAMES_TO_CLASS[constant_name]() 1174 else: 1175 raise UnknownAttribute, constant_name 1176 s = attribute.init(s[2:], self) 1177 return attribute, s 1178 1179 def _get_attributes_from_table(self, number, s): 1180 attributes = [] 1181 for i in range(0, number): 1182 attribute, s = self._get_attribute_from_table(s) 1183 attributes.append(attribute) 1184 return attributes, s 1185 1186 def _get_constants(self, s): 1187 count = u2(s[0:2]) 1188 return self._get_constants_from_table(count, s[2:]) 1189 1190 def _serialize_constants(self): 1191 return su2(len(self.constants)+1)+"".join([self._encode_const(c) for c in self.constants]) 1192 1193 def _get_access_flags(self, s): 1194 return u2(s[0:2]), s[2:] 1195 1196 def _serialize_access_flags(self): 1197 return su2(self.access_flags) 1198 1199 def _get_this_class(self, s): 1200 index = u2(s[0:2]) 1201 return self.constants[index - 1], s[2:] 1202 1203 def _serialize_this_class(self): 1204 return su2(self.constants.index(self.this_class)+1) 1205 1206 def _serialize_super_class(self): 1207 return su2(self.constants.index(self.super_class)+1) 1208 1209 def _get_super_class(self, s): 1210 index = u2(s[0:2]) 1211 if index != 0: 1212 return self.constants[index - 1], s[2:] 1213 else: 1214 return None, s[2:] 1215 1216 def _get_interfaces(self, s): 1217 interfaces = [] 1218 number = u2(s[0:2]) 1219 s = s[2:] 1220 for i in range(0, number): 1221 index = u2(s[0:2]) 1222 interfaces.append(self.constants[index - 1]) 1223 s = s[2:] 1224 return interfaces, s 1225 1226 def _serialize_interfaces(self): 1227 return su2(len(self.interfaces))+"".join([su2(self.constants.index(interf)+1) for interf in self.interfaces]) 1228 1229 def _get_fields(self, s): 1230 number = u2(s[0:2]) 1231 return self._get_fields_from_table(number, s[2:]) 1232 1233 def _serialize_fields(self): 1234 od = su2(len(self.fields)) 1235 od += "".join([f.serialize() for f in self.fields]) 1236 return od 1237 1238 def _get_attributes(self, s): 1239 number = u2(s[0:2]) 1240 return self._get_attributes_from_table(number, s[2:]) 1241 1242 def _serialize_attributes(self, attrs): 1243 od = su2(len(attrs)) 1244 if len(attrs) == 0: return od 1245 if self.attribute_class_to_index == None: 1246 self.attribute_class_to_index = {} 1247 index = 0 1248 for c in self.constants: 1249 index += 1 1250 if isinstance(c, Utf8Info) and str(c) in ATTR_NAMES_TO_CLASS.keys(): 1251 self.attribute_class_to_index[ATTR_NAMES_TO_CLASS[str(c)]]=index 1252 for attribute in attrs: 1253 for (classtype,name_index) in self.attribute_class_to_index.iteritems(): 1254 if isinstance(attribute, classtype): 1255 od += su2(name_index) 1256 break 1257 od += attribute.serialize() 1258 return od 1259 1260 def _get_methods(self, s): 1261 number = u2(s[0:2]) 1262 return self._get_methods_from_table(number, s[2:]) 1263 1264 def _serialize_methods(self): 1265 od = su2(len(self.methods)) 1266 od += "".join([m.serialize() for m in self.methods]) 1267 return od 1268 1269 1270 if __name__ == "__main__": 1271 import sys 1272 f = open(sys.argv[1], "rb") 1273 in_data = f.read() 1274 c = ClassFile(in_data) 1275 f.close() 1276 out_data = c.serialize() 1277 assert(in_data == out_data) 1278 1279 # vim: tabstop=4 expandtab shiftwidth=4