1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def s2(data): 19 return struct.unpack(">h", data[0:2])[0] 20 21 def u4(data): 22 return struct.unpack(">L", data[0:4])[0] 23 24 def s4(data): 25 return struct.unpack(">l", data[0:4])[0] 26 27 def s8(data): 28 return struct.unpack(">q", data[0:8])[0] 29 30 def f4(data): 31 return struct.unpack(">f", data[0:4])[0] 32 33 def f8(data): 34 return struct.unpack(">d", data[0:8])[0] 35 36 # Useful tables and constants. 37 38 descriptor_base_type_mapping = { 39 "B" : "int", 40 "C" : "str", 41 "D" : "float", 42 "F" : "float", 43 "I" : "int", 44 "J" : "int", 45 "L" : "object", 46 "S" : "int", 47 "Z" : "bool", 48 "[" : "list" 49 } 50 51 type_names_to_default_values = { 52 "int" : 0, 53 "str" : u"", 54 "float" : 0.0, 55 "object" : None, 56 "bool" : 0, # NOTE: Should be False. 57 "list" : [] 58 } 59 60 def get_default_for_type(type_name): 61 global type_names_to_default_values 62 return type_names_to_default_values.get(type_name) 63 64 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 65 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 66 67 def has_flags(flags, desired): 68 desired_flags = reduce(lambda a, b: a | b, desired, 0) 69 return (flags & desired_flags) == desired_flags 70 71 # Useful mix-ins. 72 73 class PythonMethodUtils: 74 symbol_sep = "___" # was "$" 75 type_sep = "__" # replaces "/" 76 array_sep = "_array_" # was "[]" 77 base_seps = ("_", "_") # was "<" and ">" 78 79 def get_unqualified_python_name(self): 80 name = self.get_name() 81 if str(name) == "<init>": 82 return "__init__" 83 elif str(name) == "<clinit>": 84 return "__clinit__" 85 else: 86 return str(name) 87 88 def get_python_name(self): 89 name = self.get_unqualified_python_name() 90 if name == "__clinit__": 91 return name 92 return name + self.symbol_sep + self._get_descriptor_as_name() 93 94 def _get_descriptor_as_name(self): 95 l = [] 96 for descriptor_type in self.get_descriptor()[0]: 97 l.append(self._get_type_as_name(descriptor_type)) 98 return self.symbol_sep.join(l) 99 100 def _get_type_as_name(self, descriptor_type, s=""): 101 base_type, object_type, array_type = descriptor_type 102 if base_type == "L": 103 return object_type.replace("/", self.type_sep) + s 104 elif base_type == "[": 105 return self._get_type_as_name(array_type, s + self.array_sep) 106 else: 107 return self.base_seps[0] + base_type + self.base_seps[1] + s 108 109 class PythonNameUtils: 110 def get_python_name(self): 111 # NOTE: This may not be comprehensive. 112 if not str(self.get_name()).startswith("["): 113 return str(self.get_name()).replace("/", ".") 114 else: 115 return self._get_type_name( 116 get_field_descriptor( 117 str(self.get_name()) 118 ) 119 ).replace("/", ".") 120 121 def _get_type_name(self, descriptor_type): 122 base_type, object_type, array_type = descriptor_type 123 if base_type == "L": 124 return object_type 125 elif base_type == "[": 126 return self._get_type_name(array_type) 127 else: 128 return descriptor_base_type_mapping[base_type] 129 130 class NameUtils: 131 def get_name(self): 132 if self.name_index != 0: 133 return self.class_file.constants[self.name_index - 1] 134 else: 135 # Some name indexes are zero to indicate special conditions. 136 return None 137 138 class NameAndTypeUtils: 139 def get_name(self): 140 if self.name_and_type_index != 0: 141 return self.class_file.constants[self.name_and_type_index - 1].get_name() 142 else: 143 # Some name indexes are zero to indicate special conditions. 144 return None 145 146 def get_field_descriptor(self): 147 if self.name_and_type_index != 0: 148 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 149 else: 150 # Some name indexes are zero to indicate special conditions. 151 return None 152 153 def get_method_descriptor(self): 154 if self.name_and_type_index != 0: 155 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 156 else: 157 # Some name indexes are zero to indicate special conditions. 158 return None 159 160 def get_class(self): 161 return self.class_file.constants[self.class_index - 1] 162 163 # Symbol parsing. 164 165 def get_method_descriptor(s): 166 assert s[0] == "(" 167 params = [] 168 s = s[1:] 169 while s[0] != ")": 170 parameter_descriptor, s = _get_parameter_descriptor(s) 171 params.append(parameter_descriptor) 172 if s[1] != "V": 173 return_type, s = _get_field_type(s[1:]) 174 else: 175 return_type, s = None, s[1:] 176 return params, return_type 177 178 def get_field_descriptor(s): 179 return _get_field_type(s)[0] 180 181 def _get_parameter_descriptor(s): 182 return _get_field_type(s) 183 184 def _get_component_type(s): 185 return _get_field_type(s) 186 187 def _get_field_type(s): 188 base_type, s = _get_base_type(s) 189 object_type = None 190 array_type = None 191 if base_type == "L": 192 object_type, s = _get_object_type(s) 193 elif base_type == "[": 194 array_type, s = _get_array_type(s) 195 return (base_type, object_type, array_type), s 196 197 def _get_base_type(s): 198 if len(s) > 0: 199 return s[0], s[1:] 200 else: 201 return None, s 202 203 def _get_object_type(s): 204 if len(s) > 0: 205 s_end = s.find(";") 206 assert s_end != -1 207 return s[:s_end], s[s_end+1:] 208 else: 209 return None, s 210 211 def _get_array_type(s): 212 if len(s) > 0: 213 return _get_component_type(s) 214 else: 215 return None, s 216 217 # Constant information. 218 219 class ClassInfo(NameUtils, PythonNameUtils): 220 def init(self, data, class_file): 221 self.class_file = class_file 222 self.name_index = u2(data[0:2]) 223 return data[2:] 224 225 class RefInfo(NameAndTypeUtils): 226 def init(self, data, class_file): 227 self.class_file = class_file 228 self.class_index = u2(data[0:2]) 229 self.name_and_type_index = u2(data[2:4]) 230 return data[4:] 231 232 class FieldRefInfo(RefInfo, PythonNameUtils): 233 def get_descriptor(self): 234 return RefInfo.get_field_descriptor(self) 235 236 class MethodRefInfo(RefInfo, PythonMethodUtils): 237 def get_descriptor(self): 238 return RefInfo.get_method_descriptor(self) 239 240 class InterfaceMethodRefInfo(MethodRefInfo): 241 pass 242 243 class NameAndTypeInfo(NameUtils, PythonNameUtils): 244 def init(self, data, class_file): 245 self.class_file = class_file 246 self.name_index = u2(data[0:2]) 247 self.descriptor_index = u2(data[2:4]) 248 return data[4:] 249 250 def get_field_descriptor(self): 251 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 252 253 def get_method_descriptor(self): 254 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 255 256 class Utf8Info: 257 def init(self, data, class_file): 258 self.class_file = class_file 259 self.length = u2(data[0:2]) 260 self.bytes = data[2:2+self.length] 261 return data[2+self.length:] 262 263 def __str__(self): 264 return self.bytes 265 266 def __unicode__(self): 267 return unicode(self.bytes, "utf-8") 268 269 def get_value(self): 270 return str(self) 271 272 class StringInfo: 273 def init(self, data, class_file): 274 self.class_file = class_file 275 self.string_index = u2(data[0:2]) 276 return data[2:] 277 278 def __str__(self): 279 return str(self.class_file.constants[self.string_index - 1]) 280 281 def __unicode__(self): 282 return unicode(self.class_file.constants[self.string_index - 1]) 283 284 def get_value(self): 285 return str(self) 286 287 class SmallNumInfo: 288 def init(self, data, class_file): 289 self.class_file = class_file 290 self.bytes = data[0:4] 291 return data[4:] 292 293 class IntegerInfo(SmallNumInfo): 294 def get_value(self): 295 return s4(self.bytes) 296 297 class FloatInfo(SmallNumInfo): 298 def get_value(self): 299 return f4(self.bytes) 300 301 class LargeNumInfo: 302 def init(self, data, class_file): 303 self.class_file = class_file 304 self.high_bytes = data[0:4] 305 self.low_bytes = data[4:8] 306 return data[8:] 307 308 class LongInfo(LargeNumInfo): 309 def get_value(self): 310 return s8(self.high_bytes + self.low_bytes) 311 312 class DoubleInfo(LargeNumInfo): 313 def get_value(self): 314 return f8(self.high_bytes + self.low_bytes) 315 316 # Other information. 317 # Objects of these classes are generally aware of the class they reside in. 318 319 class ItemInfo(NameUtils): 320 def init(self, data, class_file): 321 self.class_file = class_file 322 self.access_flags = u2(data[0:2]) 323 self.name_index = u2(data[2:4]) 324 self.descriptor_index = u2(data[4:6]) 325 self.attributes, data = self.class_file._get_attributes(data[6:]) 326 return data 327 328 class FieldInfo(ItemInfo, PythonNameUtils): 329 def get_descriptor(self): 330 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 331 332 class MethodInfo(ItemInfo, PythonMethodUtils): 333 def get_descriptor(self): 334 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 335 336 class AttributeInfo: 337 def init(self, data, class_file): 338 self.attribute_length = u4(data[0:4]) 339 self.info = data[4:4+self.attribute_length] 340 return data[4+self.attribute_length:] 341 342 # NOTE: Decode the different attribute formats. 343 344 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 345 def init(self, data, class_file): 346 self.class_file = class_file 347 self.attribute_length = u4(data[0:4]) 348 # Permit the NameUtils mix-in. 349 self.name_index = self.sourcefile_index = u2(data[4:6]) 350 return data[6:] 351 352 class ConstantValueAttributeInfo(AttributeInfo): 353 def init(self, data, class_file): 354 self.class_file = class_file 355 self.attribute_length = u4(data[0:4]) 356 self.constant_value_index = u2(data[4:6]) 357 assert 4+self.attribute_length == 6 358 return data[4+self.attribute_length:] 359 360 def get_value(self): 361 return self.class_file.constants[self.constant_value_index - 1].get_value() 362 363 class CodeAttributeInfo(AttributeInfo): 364 def init(self, data, class_file): 365 self.class_file = class_file 366 self.attribute_length = u4(data[0:4]) 367 self.max_stack = u2(data[4:6]) 368 self.max_locals = u2(data[6:8]) 369 self.code_length = u4(data[8:12]) 370 end_of_code = 12+self.code_length 371 self.code = data[12:end_of_code] 372 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 373 self.exception_table = [] 374 data = data[end_of_code + 2:] 375 for i in range(0, self.exception_table_length): 376 exception = ExceptionInfo() 377 data = exception.init(data) 378 self.exception_table.append(exception) 379 self.attributes, data = self.class_file._get_attributes(data) 380 return data 381 382 class ExceptionsAttributeInfo(AttributeInfo): 383 def init(self, data, class_file): 384 self.class_file = class_file 385 self.attribute_length = u4(data[0:4]) 386 self.number_of_exceptions = u2(data[4:6]) 387 self.exception_index_table = [] 388 index = 6 389 for i in range(0, self.number_of_exceptions): 390 self.exception_index_table.append(u2(data[index:index+2])) 391 index += 2 392 return data[index:] 393 394 def get_exception(self, i): 395 exception_index = self.exception_index_table[i] 396 return self.class_file.constants[exception_index - 1] 397 398 class InnerClassesAttributeInfo(AttributeInfo): 399 def init(self, data, class_file): 400 self.class_file = class_file 401 self.attribute_length = u4(data[0:4]) 402 self.number_of_classes = u2(data[4:6]) 403 self.classes = [] 404 data = data[6:] 405 for i in range(0, self.number_of_classes): 406 inner_class = InnerClassInfo() 407 data = inner_class.init(data, self.class_file) 408 self.classes.append(inner_class) 409 return data 410 411 class SyntheticAttributeInfo(AttributeInfo): 412 pass 413 414 class LineNumberAttributeInfo(AttributeInfo): 415 def init(self, data, class_file): 416 self.class_file = class_file 417 self.attribute_length = u4(data[0:4]) 418 self.line_number_table_length = u2(data[4:6]) 419 self.line_number_table = [] 420 data = data[6:] 421 for i in range(0, self.line_number_table_length): 422 line_number = LineNumberInfo() 423 data = line_number.init(data) 424 self.line_number_table.append(line_number) 425 return data 426 427 class LocalVariableAttributeInfo(AttributeInfo): 428 def init(self, data, class_file): 429 self.class_file = class_file 430 self.attribute_length = u4(data[0:4]) 431 self.local_variable_table_length = u2(data[4:6]) 432 self.local_variable_table = [] 433 data = data[6:] 434 for i in range(0, self.local_variable_table_length): 435 local_variable = LocalVariableInfo() 436 data = local_variable.init(data, self.class_file) 437 self.local_variable_table.append(local_variable) 438 return data 439 440 class DeprecatedAttributeInfo(AttributeInfo): 441 pass 442 443 # Child classes of the attribute information classes. 444 445 class ExceptionInfo: 446 def init(self, data): 447 self.start_pc = u2(data[0:2]) 448 self.end_pc = u2(data[2:4]) 449 self.handler_pc = u2(data[4:6]) 450 self.catch_type = u2(data[6:8]) 451 return data[8:] 452 453 class InnerClassInfo(NameUtils): 454 def init(self, data, class_file): 455 self.class_file = class_file 456 self.inner_class_info_index = u2(data[0:2]) 457 self.outer_class_info_index = u2(data[2:4]) 458 # Permit the NameUtils mix-in. 459 self.name_index = self.inner_name_index = u2(data[4:6]) 460 self.inner_class_access_flags = u2(data[6:8]) 461 return data[8:] 462 463 class LineNumberInfo: 464 def init(self, data): 465 self.start_pc = u2(data[0:2]) 466 self.line_number = u2(data[2:4]) 467 return data[4:] 468 469 class LocalVariableInfo(NameUtils, PythonNameUtils): 470 def init(self, data, class_file): 471 self.class_file = class_file 472 self.start_pc = u2(data[0:2]) 473 self.length = u2(data[2:4]) 474 self.name_index = u2(data[4:6]) 475 self.descriptor_index = u2(data[6:8]) 476 self.index = u2(data[8:10]) 477 return data[10:] 478 479 def get_descriptor(self): 480 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 481 482 # Exceptions. 483 484 class UnknownTag(Exception): 485 pass 486 487 class UnknownAttribute(Exception): 488 pass 489 490 # Abstractions for the main structures. 491 492 class ClassFile: 493 494 "A class representing a Java class file." 495 496 def __init__(self, s): 497 498 """ 499 Process the given string 's', populating the object with the class 500 file's details. 501 """ 502 503 self.constants, s = self._get_constants(s[8:]) 504 self.access_flags, s = self._get_access_flags(s) 505 self.this_class, s = self._get_this_class(s) 506 self.super_class, s = self._get_super_class(s) 507 self.interfaces, s = self._get_interfaces(s) 508 self.fields, s = self._get_fields(s) 509 self.methods, s = self._get_methods(s) 510 self.attributes, s = self._get_attributes(s) 511 512 def _decode_const(self, s): 513 tag = u1(s[0:1]) 514 if tag == 1: 515 const = Utf8Info() 516 elif tag == 3: 517 const = IntegerInfo() 518 elif tag == 4: 519 const = FloatInfo() 520 elif tag == 5: 521 const = LongInfo() 522 elif tag == 6: 523 const = DoubleInfo() 524 elif tag == 7: 525 const = ClassInfo() 526 elif tag == 8: 527 const = StringInfo() 528 elif tag == 9: 529 const = FieldRefInfo() 530 elif tag == 10: 531 const = MethodRefInfo() 532 elif tag == 11: 533 const = InterfaceMethodRefInfo() 534 elif tag == 12: 535 const = NameAndTypeInfo() 536 else: 537 raise UnknownTag, tag 538 539 # Initialise the constant object. 540 541 s = const.init(s[1:], self) 542 return const, s 543 544 def _get_constants_from_table(self, count, s): 545 l = [] 546 # Have to skip certain entries specially. 547 i = 1 548 while i < count: 549 c, s = self._decode_const(s) 550 l.append(c) 551 # Add a blank entry after "large" entries. 552 if isinstance(c, LargeNumInfo): 553 l.append(None) 554 i += 1 555 i += 1 556 return l, s 557 558 def _get_items_from_table(self, cls, number, s): 559 l = [] 560 for i in range(0, number): 561 f = cls() 562 s = f.init(s, self) 563 l.append(f) 564 return l, s 565 566 def _get_methods_from_table(self, number, s): 567 return self._get_items_from_table(MethodInfo, number, s) 568 569 def _get_fields_from_table(self, number, s): 570 return self._get_items_from_table(FieldInfo, number, s) 571 572 def _get_attribute_from_table(self, s): 573 attribute_name_index = u2(s[0:2]) 574 constant_name = self.constants[attribute_name_index - 1].bytes 575 if constant_name == "SourceFile": 576 attribute = SourceFileAttributeInfo() 577 elif constant_name == "ConstantValue": 578 attribute = ConstantValueAttributeInfo() 579 elif constant_name == "Code": 580 attribute = CodeAttributeInfo() 581 elif constant_name == "Exceptions": 582 attribute = ExceptionsAttributeInfo() 583 elif constant_name == "InnerClasses": 584 attribute = InnerClassesAttributeInfo() 585 elif constant_name == "Synthetic": 586 attribute = SyntheticAttributeInfo() 587 elif constant_name == "LineNumberTable": 588 attribute = LineNumberAttributeInfo() 589 elif constant_name == "LocalVariableTable": 590 attribute = LocalVariableAttributeInfo() 591 elif constant_name == "Deprecated": 592 attribute = DeprecatedAttributeInfo() 593 else: 594 raise UnknownAttribute, constant_name 595 s = attribute.init(s[2:], self) 596 return attribute, s 597 598 def _get_attributes_from_table(self, number, s): 599 attributes = [] 600 for i in range(0, number): 601 attribute, s = self._get_attribute_from_table(s) 602 attributes.append(attribute) 603 return attributes, s 604 605 def _get_constants(self, s): 606 count = u2(s[0:2]) 607 return self._get_constants_from_table(count, s[2:]) 608 609 def _get_access_flags(self, s): 610 return u2(s[0:2]), s[2:] 611 612 def _get_this_class(self, s): 613 index = u2(s[0:2]) 614 return self.constants[index - 1], s[2:] 615 616 def _get_super_class(self, s): 617 index = u2(s[0:2]) 618 if index != 0: 619 return self.constants[index - 1], s[2:] 620 else: 621 return None, s[2:] 622 623 def _get_interfaces(self, s): 624 interfaces = [] 625 number = u2(s[0:2]) 626 s = s[2:] 627 for i in range(0, number): 628 index = u2(s[0:2]) 629 interfaces.append(self.constants[index - 1]) 630 s = s[2:] 631 return interfaces, s 632 633 def _get_fields(self, s): 634 number = u2(s[0:2]) 635 return self._get_fields_from_table(number, s[2:]) 636 637 def _get_attributes(self, s): 638 number = u2(s[0:2]) 639 return self._get_attributes_from_table(number, s[2:]) 640 641 def _get_methods(self, s): 642 number = u2(s[0:2]) 643 return self._get_methods_from_table(number, s[2:]) 644 645 if __name__ == "__main__": 646 import sys 647 f = open(sys.argv[1], "rb") 648 c = ClassFile(f.read()) 649 f.close() 650 651 # vim: tabstop=4 expandtab shiftwidth=4