1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def s2(data): 19 return struct.unpack(">h", data[0:2])[0] 20 21 def u4(data): 22 return struct.unpack(">L", data[0:4])[0] 23 24 def s4(data): 25 return struct.unpack(">l", data[0:4])[0] 26 27 def s8(data): 28 return struct.unpack(">q", data[0:8])[0] 29 30 def f4(data): 31 return struct.unpack(">f", data[0:4])[0] 32 33 def f8(data): 34 return struct.unpack(">d", data[0:8])[0] 35 36 # Useful tables and constants. 37 38 descriptor_base_type_mapping = { 39 "B" : "int", 40 "C" : "str", 41 "D" : "float", 42 "F" : "float", 43 "I" : "int", 44 "J" : "int", 45 "L" : "object", 46 "S" : "int", 47 "Z" : "bool", 48 "[" : "list" 49 } 50 51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 52 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 53 54 def has_flags(flags, desired): 55 desired_flags = reduce(lambda a, b: a | b, desired, 0) 56 return (flags & desired_flags) == desired_flags 57 58 # Useful mix-ins. 59 60 class PythonMethodUtils: 61 symbol_sep = "___" # was "$" 62 type_sep = "__" # replaces "/" 63 array_sep = "_array_" # was "[]" 64 base_seps = ("_", "_") # was "<" and ">" 65 66 def get_unqualified_python_name(self): 67 name = self.get_name() 68 if str(name) == "<init>": 69 return "__init__" 70 elif str(name) == "<clinit>": 71 return "__clinit__" 72 else: 73 return str(name) 74 75 def get_python_name(self): 76 name = self.get_unqualified_python_name() 77 if name == "__clinit__": 78 return name 79 return name + self.symbol_sep + self._get_descriptor_as_name() 80 81 def _get_descriptor_as_name(self): 82 l = [] 83 for descriptor_type in self.get_descriptor()[0]: 84 l.append(self._get_type_as_name(descriptor_type)) 85 return self.symbol_sep.join(l) 86 87 def _get_type_as_name(self, descriptor_type, s=""): 88 base_type, object_type, array_type = descriptor_type 89 if base_type == "L": 90 return object_type.replace("/", self.type_sep) + s 91 elif base_type == "[": 92 return self._get_type_as_name(array_type, s + self.array_sep) 93 else: 94 return self.base_seps[0] + base_type + self.base_seps[1] + s 95 96 class PythonNameUtils: 97 def get_python_name(self): 98 # NOTE: This may not be comprehensive. 99 if not str(self.get_name()).startswith("["): 100 return str(self.get_name()).replace("/", ".") 101 else: 102 return self._get_type_name( 103 get_field_descriptor( 104 str(self.get_name()) 105 ) 106 ).replace("/", ".") 107 108 def _get_type_name(self, descriptor_type): 109 base_type, object_type, array_type = descriptor_type 110 if base_type == "L": 111 return object_type 112 elif base_type == "[": 113 return self._get_type_name(array_type) 114 else: 115 return descriptor_base_type_mapping[base_type] 116 117 class NameUtils: 118 def get_name(self): 119 if self.name_index != 0: 120 return self.class_file.constants[self.name_index - 1] 121 else: 122 # Some name indexes are zero to indicate special conditions. 123 return None 124 125 class NameAndTypeUtils: 126 def get_name(self): 127 if self.name_and_type_index != 0: 128 return self.class_file.constants[self.name_and_type_index - 1].get_name() 129 else: 130 # Some name indexes are zero to indicate special conditions. 131 return None 132 133 def get_field_descriptor(self): 134 if self.name_and_type_index != 0: 135 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 136 else: 137 # Some name indexes are zero to indicate special conditions. 138 return None 139 140 def get_method_descriptor(self): 141 if self.name_and_type_index != 0: 142 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 143 else: 144 # Some name indexes are zero to indicate special conditions. 145 return None 146 147 def get_class(self): 148 return self.class_file.constants[self.class_index - 1] 149 150 # Symbol parsing. 151 152 def get_method_descriptor(s): 153 assert s[0] == "(" 154 params = [] 155 s = s[1:] 156 while s[0] != ")": 157 parameter_descriptor, s = _get_parameter_descriptor(s) 158 params.append(parameter_descriptor) 159 if s[1] != "V": 160 return_type, s = _get_field_type(s[1:]) 161 else: 162 return_type, s = None, s[1:] 163 return params, return_type 164 165 def get_field_descriptor(s): 166 return _get_field_type(s)[0] 167 168 def _get_parameter_descriptor(s): 169 return _get_field_type(s) 170 171 def _get_component_type(s): 172 return _get_field_type(s) 173 174 def _get_field_type(s): 175 base_type, s = _get_base_type(s) 176 object_type = None 177 array_type = None 178 if base_type == "L": 179 object_type, s = _get_object_type(s) 180 elif base_type == "[": 181 array_type, s = _get_array_type(s) 182 return (base_type, object_type, array_type), s 183 184 def _get_base_type(s): 185 if len(s) > 0: 186 return s[0], s[1:] 187 else: 188 return None, s 189 190 def _get_object_type(s): 191 if len(s) > 0: 192 s_end = s.find(";") 193 assert s_end != -1 194 return s[:s_end], s[s_end+1:] 195 else: 196 return None, s 197 198 def _get_array_type(s): 199 if len(s) > 0: 200 return _get_component_type(s) 201 else: 202 return None, s 203 204 # Constant information. 205 206 class ClassInfo(NameUtils, PythonNameUtils): 207 def init(self, data, class_file): 208 self.class_file = class_file 209 self.name_index = u2(data[0:2]) 210 return data[2:] 211 212 class RefInfo(NameAndTypeUtils): 213 def init(self, data, class_file): 214 self.class_file = class_file 215 self.class_index = u2(data[0:2]) 216 self.name_and_type_index = u2(data[2:4]) 217 return data[4:] 218 219 class FieldRefInfo(RefInfo, PythonNameUtils): 220 def get_descriptor(self): 221 return RefInfo.get_field_descriptor(self) 222 223 class MethodRefInfo(RefInfo, PythonMethodUtils): 224 def get_descriptor(self): 225 return RefInfo.get_method_descriptor(self) 226 227 class InterfaceMethodRefInfo(MethodRefInfo): 228 pass 229 230 class NameAndTypeInfo(NameUtils, PythonNameUtils): 231 def init(self, data, class_file): 232 self.class_file = class_file 233 self.name_index = u2(data[0:2]) 234 self.descriptor_index = u2(data[2:4]) 235 return data[4:] 236 237 def get_field_descriptor(self): 238 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 239 240 def get_method_descriptor(self): 241 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 242 243 class Utf8Info: 244 def init(self, data, class_file): 245 self.class_file = class_file 246 self.length = u2(data[0:2]) 247 self.bytes = data[2:2+self.length] 248 return data[2+self.length:] 249 250 def __str__(self): 251 return self.bytes 252 253 def __unicode__(self): 254 return unicode(self.bytes, "utf-8") 255 256 def get_value(self): 257 return str(self) 258 259 class StringInfo: 260 def init(self, data, class_file): 261 self.class_file = class_file 262 self.string_index = u2(data[0:2]) 263 return data[2:] 264 265 def __str__(self): 266 return str(self.class_file.constants[self.string_index - 1]) 267 268 def __unicode__(self): 269 return unicode(self.class_file.constants[self.string_index - 1]) 270 271 def get_value(self): 272 return str(self) 273 274 class SmallNumInfo: 275 def init(self, data, class_file): 276 self.class_file = class_file 277 self.bytes = data[0:4] 278 return data[4:] 279 280 class IntegerInfo(SmallNumInfo): 281 def get_value(self): 282 return s4(self.bytes) 283 284 class FloatInfo(SmallNumInfo): 285 def get_value(self): 286 return f4(self.bytes) 287 288 class LargeNumInfo: 289 def init(self, data, class_file): 290 self.class_file = class_file 291 self.high_bytes = data[0:4] 292 self.low_bytes = data[4:8] 293 return data[8:] 294 295 class LongInfo(LargeNumInfo): 296 def get_value(self): 297 return s8(self.high_bytes + self.low_bytes) 298 299 class DoubleInfo(LargeNumInfo): 300 def get_value(self): 301 return f8(self.high_bytes + self.low_bytes) 302 303 # Other information. 304 # Objects of these classes are generally aware of the class they reside in. 305 306 class ItemInfo(NameUtils): 307 def init(self, data, class_file): 308 self.class_file = class_file 309 self.access_flags = u2(data[0:2]) 310 self.name_index = u2(data[2:4]) 311 self.descriptor_index = u2(data[4:6]) 312 self.attributes, data = self.class_file._get_attributes(data[6:]) 313 return data 314 315 class FieldInfo(ItemInfo, PythonNameUtils): 316 def get_descriptor(self): 317 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 318 319 class MethodInfo(ItemInfo, PythonMethodUtils): 320 def get_descriptor(self): 321 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 322 323 class AttributeInfo: 324 def init(self, data, class_file): 325 self.attribute_length = u4(data[0:4]) 326 self.info = data[4:4+self.attribute_length] 327 return data[4+self.attribute_length:] 328 329 # NOTE: Decode the different attribute formats. 330 331 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 332 def init(self, data, class_file): 333 self.class_file = class_file 334 self.attribute_length = u4(data[0:4]) 335 # Permit the NameUtils mix-in. 336 self.name_index = self.sourcefile_index = u2(data[4:6]) 337 return data[6:] 338 339 class ConstantValueAttributeInfo(AttributeInfo): 340 def init(self, data, class_file): 341 self.class_file = class_file 342 self.attribute_length = u4(data[0:4]) 343 self.constant_value_index = u2(data[4:6]) 344 assert 4+self.attribute_length == 6 345 return data[4+self.attribute_length:] 346 347 def get_value(self): 348 return self.class_file.constants[self.constant_value_index - 1].get_value() 349 350 class CodeAttributeInfo(AttributeInfo): 351 def init(self, data, class_file): 352 self.class_file = class_file 353 self.attribute_length = u4(data[0:4]) 354 self.max_stack = u2(data[4:6]) 355 self.max_locals = u2(data[6:8]) 356 self.code_length = u4(data[8:12]) 357 end_of_code = 12+self.code_length 358 self.code = data[12:end_of_code] 359 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 360 self.exception_table = [] 361 data = data[end_of_code + 2:] 362 for i in range(0, self.exception_table_length): 363 exception = ExceptionInfo() 364 data = exception.init(data) 365 self.exception_table.append(exception) 366 self.attributes, data = self.class_file._get_attributes(data) 367 return data 368 369 class ExceptionsAttributeInfo(AttributeInfo): 370 def init(self, data, class_file): 371 self.class_file = class_file 372 self.attribute_length = u4(data[0:4]) 373 self.number_of_exceptions = u2(data[4:6]) 374 self.exception_index_table = [] 375 index = 6 376 for i in range(0, self.number_of_exceptions): 377 self.exception_index_table.append(u2(data[index:index+2])) 378 index += 2 379 return data[index:] 380 381 def get_exception(self, i): 382 exception_index = self.exception_index_table[i] 383 return self.class_file.constants[exception_index - 1] 384 385 class InnerClassesAttributeInfo(AttributeInfo): 386 def init(self, data, class_file): 387 self.class_file = class_file 388 self.attribute_length = u4(data[0:4]) 389 self.number_of_classes = u2(data[4:6]) 390 self.classes = [] 391 data = data[6:] 392 for i in range(0, self.number_of_classes): 393 inner_class = InnerClassInfo() 394 data = inner_class.init(data, self.class_file) 395 self.classes.append(inner_class) 396 return data 397 398 class SyntheticAttributeInfo(AttributeInfo): 399 pass 400 401 class LineNumberAttributeInfo(AttributeInfo): 402 def init(self, data, class_file): 403 self.class_file = class_file 404 self.attribute_length = u4(data[0:4]) 405 self.line_number_table_length = u2(data[4:6]) 406 self.line_number_table = [] 407 data = data[6:] 408 for i in range(0, self.line_number_table_length): 409 line_number = LineNumberInfo() 410 data = line_number.init(data) 411 self.line_number_table.append(line_number) 412 return data 413 414 class LocalVariableAttributeInfo(AttributeInfo): 415 def init(self, data, class_file): 416 self.class_file = class_file 417 self.attribute_length = u4(data[0:4]) 418 self.local_variable_table_length = u2(data[4:6]) 419 self.local_variable_table = [] 420 data = data[6:] 421 for i in range(0, self.local_variable_table_length): 422 local_variable = LocalVariableInfo() 423 data = local_variable.init(data, self.class_file) 424 self.local_variable_table.append(local_variable) 425 return data 426 427 class DeprecatedAttributeInfo(AttributeInfo): 428 pass 429 430 # Child classes of the attribute information classes. 431 432 class ExceptionInfo: 433 def init(self, data): 434 self.start_pc = u2(data[0:2]) 435 self.end_pc = u2(data[2:4]) 436 self.handler_pc = u2(data[4:6]) 437 self.catch_type = u2(data[6:8]) 438 return data[8:] 439 440 class InnerClassInfo(NameUtils): 441 def init(self, data, class_file): 442 self.class_file = class_file 443 self.inner_class_info_index = u2(data[0:2]) 444 self.outer_class_info_index = u2(data[2:4]) 445 # Permit the NameUtils mix-in. 446 self.name_index = self.inner_name_index = u2(data[4:6]) 447 self.inner_class_access_flags = u2(data[6:8]) 448 return data[8:] 449 450 class LineNumberInfo: 451 def init(self, data): 452 self.start_pc = u2(data[0:2]) 453 self.line_number = u2(data[2:4]) 454 return data[4:] 455 456 class LocalVariableInfo(NameUtils, PythonNameUtils): 457 def init(self, data, class_file): 458 self.class_file = class_file 459 self.start_pc = u2(data[0:2]) 460 self.length = u2(data[2:4]) 461 self.name_index = u2(data[4:6]) 462 self.descriptor_index = u2(data[6:8]) 463 self.index = u2(data[8:10]) 464 return data[10:] 465 466 def get_descriptor(self): 467 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 468 469 # Exceptions. 470 471 class UnknownTag(Exception): 472 pass 473 474 class UnknownAttribute(Exception): 475 pass 476 477 # Abstractions for the main structures. 478 479 class ClassFile: 480 481 "A class representing a Java class file." 482 483 def __init__(self, s): 484 485 """ 486 Process the given string 's', populating the object with the class 487 file's details. 488 """ 489 490 self.constants, s = self._get_constants(s[8:]) 491 self.access_flags, s = self._get_access_flags(s) 492 self.this_class, s = self._get_this_class(s) 493 self.super_class, s = self._get_super_class(s) 494 self.interfaces, s = self._get_interfaces(s) 495 self.fields, s = self._get_fields(s) 496 self.methods, s = self._get_methods(s) 497 self.attributes, s = self._get_attributes(s) 498 499 def _decode_const(self, s): 500 tag = u1(s[0:1]) 501 if tag == 1: 502 const = Utf8Info() 503 elif tag == 3: 504 const = IntegerInfo() 505 elif tag == 4: 506 const = FloatInfo() 507 elif tag == 5: 508 const = LongInfo() 509 elif tag == 6: 510 const = DoubleInfo() 511 elif tag == 7: 512 const = ClassInfo() 513 elif tag == 8: 514 const = StringInfo() 515 elif tag == 9: 516 const = FieldRefInfo() 517 elif tag == 10: 518 const = MethodRefInfo() 519 elif tag == 11: 520 const = InterfaceMethodRefInfo() 521 elif tag == 12: 522 const = NameAndTypeInfo() 523 else: 524 raise UnknownTag, tag 525 526 # Initialise the constant object. 527 528 s = const.init(s[1:], self) 529 return const, s 530 531 def _get_constants_from_table(self, count, s): 532 l = [] 533 # Have to skip certain entries specially. 534 i = 1 535 while i < count: 536 c, s = self._decode_const(s) 537 l.append(c) 538 # Add a blank entry after "large" entries. 539 if isinstance(c, LargeNumInfo): 540 l.append(None) 541 i += 1 542 i += 1 543 return l, s 544 545 def _get_items_from_table(self, cls, number, s): 546 l = [] 547 for i in range(0, number): 548 f = cls() 549 s = f.init(s, self) 550 l.append(f) 551 return l, s 552 553 def _get_methods_from_table(self, number, s): 554 return self._get_items_from_table(MethodInfo, number, s) 555 556 def _get_fields_from_table(self, number, s): 557 return self._get_items_from_table(FieldInfo, number, s) 558 559 def _get_attribute_from_table(self, s): 560 attribute_name_index = u2(s[0:2]) 561 constant_name = self.constants[attribute_name_index - 1].bytes 562 if constant_name == "SourceFile": 563 attribute = SourceFileAttributeInfo() 564 elif constant_name == "ConstantValue": 565 attribute = ConstantValueAttributeInfo() 566 elif constant_name == "Code": 567 attribute = CodeAttributeInfo() 568 elif constant_name == "Exceptions": 569 attribute = ExceptionsAttributeInfo() 570 elif constant_name == "InnerClasses": 571 attribute = InnerClassesAttributeInfo() 572 elif constant_name == "Synthetic": 573 attribute = SyntheticAttributeInfo() 574 elif constant_name == "LineNumberTable": 575 attribute = LineNumberAttributeInfo() 576 elif constant_name == "LocalVariableTable": 577 attribute = LocalVariableAttributeInfo() 578 elif constant_name == "Deprecated": 579 attribute = DeprecatedAttributeInfo() 580 else: 581 raise UnknownAttribute, constant_name 582 s = attribute.init(s[2:], self) 583 return attribute, s 584 585 def _get_attributes_from_table(self, number, s): 586 attributes = [] 587 for i in range(0, number): 588 attribute, s = self._get_attribute_from_table(s) 589 attributes.append(attribute) 590 return attributes, s 591 592 def _get_constants(self, s): 593 count = u2(s[0:2]) 594 return self._get_constants_from_table(count, s[2:]) 595 596 def _get_access_flags(self, s): 597 return u2(s[0:2]), s[2:] 598 599 def _get_this_class(self, s): 600 index = u2(s[0:2]) 601 return self.constants[index - 1], s[2:] 602 603 _get_super_class = _get_this_class 604 605 def _get_interfaces(self, s): 606 interfaces = [] 607 number = u2(s[0:2]) 608 s = s[2:] 609 for i in range(0, number): 610 index = u2(s[0:2]) 611 interfaces.append(self.constants[index - 1]) 612 s = s[2:] 613 return interfaces, s 614 615 def _get_fields(self, s): 616 number = u2(s[0:2]) 617 return self._get_fields_from_table(number, s[2:]) 618 619 def _get_attributes(self, s): 620 number = u2(s[0:2]) 621 return self._get_attributes_from_table(number, s[2:]) 622 623 def _get_methods(self, s): 624 number = u2(s[0:2]) 625 return self._get_methods_from_table(number, s[2:]) 626 627 if __name__ == "__main__": 628 import sys 629 f = open(sys.argv[1], "rb") 630 c = ClassFile(f.read()) 631 f.close() 632 633 # vim: tabstop=4 expandtab shiftwidth=4