1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def s2(data): 19 return struct.unpack(">h", data[0:2])[0] 20 21 def u4(data): 22 return struct.unpack(">L", data[0:4])[0] 23 24 def s4(data): 25 return struct.unpack(">l", data[0:4])[0] 26 27 def s8(data): 28 return struct.unpack(">q", data[0:8])[0] 29 30 def f4(data): 31 return struct.unpack(">f", data[0:4])[0] 32 33 def f8(data): 34 return struct.unpack(">d", data[0:8])[0] 35 36 # Useful tables and constants. 37 38 descriptor_base_type_mapping = { 39 "B" : "int", 40 "C" : "str", 41 "D" : "float", 42 "F" : "float", 43 "I" : "int", 44 "J" : "int", 45 "L" : "object", 46 "S" : "int", 47 "Z" : "bool", 48 "[" : "list" 49 } 50 51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 52 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 53 54 def has_flags(flags, desired): 55 desired_flags = reduce(lambda a, b: a | b, desired, 0) 56 return (flags & desired_flags) == desired_flags 57 58 # Useful mix-ins. 59 60 class PythonMethodUtils: 61 def get_python_name(self): 62 name = self.get_name() 63 if str(name) == "<init>": 64 name = "__init__" 65 elif str(name) == "<clinit>": 66 return "__clinit__" 67 else: 68 name = str(name) 69 return name + "$" + self._get_descriptor_as_name() 70 71 def _get_descriptor_as_name(self): 72 l = [] 73 for descriptor_type in self.get_descriptor()[0]: 74 l.append(self._get_type_as_name(descriptor_type)) 75 return "$".join(l) 76 77 def _get_type_as_name(self, descriptor_type, s=""): 78 base_type, object_type, array_type = descriptor_type 79 if base_type == "L": 80 return object_type + s 81 elif base_type == "[": 82 return self._get_type_as_name(array_type, s + "[]") 83 else: 84 return "<" + base_type + ">" + s 85 86 class PythonNameUtils: 87 def get_python_name(self): 88 # NOTE: This may not be comprehensive. 89 if not str(self.get_name()).startswith("["): 90 return str(self.get_name()).replace("/", ".") 91 else: 92 return self._get_type_name( 93 get_field_descriptor( 94 str(self.get_name()) 95 ) 96 ).replace("/", ".") 97 98 def _get_type_name(self, descriptor_type): 99 base_type, object_type, array_type = descriptor_type 100 if base_type == "L": 101 return object_type 102 elif base_type == "[": 103 return self._get_type_name(array_type) 104 else: 105 return descriptor_base_type_mapping[base_type] 106 107 class NameUtils: 108 def get_name(self): 109 if self.name_index != 0: 110 return self.class_file.constants[self.name_index - 1] 111 else: 112 # Some name indexes are zero to indicate special conditions. 113 return None 114 115 class NameAndTypeUtils: 116 def get_name(self): 117 if self.name_and_type_index != 0: 118 return self.class_file.constants[self.name_and_type_index - 1].get_name() 119 else: 120 # Some name indexes are zero to indicate special conditions. 121 return None 122 123 def get_field_descriptor(self): 124 if self.name_and_type_index != 0: 125 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 126 else: 127 # Some name indexes are zero to indicate special conditions. 128 return None 129 130 def get_method_descriptor(self): 131 if self.name_and_type_index != 0: 132 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 133 else: 134 # Some name indexes are zero to indicate special conditions. 135 return None 136 137 def get_class(self): 138 return self.class_file.constants[self.class_index - 1] 139 140 # Symbol parsing. 141 142 def get_method_descriptor(s): 143 assert s[0] == "(" 144 params = [] 145 s = s[1:] 146 while s[0] != ")": 147 parameter_descriptor, s = _get_parameter_descriptor(s) 148 params.append(parameter_descriptor) 149 if s[1] != "V": 150 return_type, s = _get_field_type(s[1:]) 151 else: 152 return_type, s = None, s[1:] 153 return params, return_type 154 155 def get_field_descriptor(s): 156 return _get_field_type(s)[0] 157 158 def _get_parameter_descriptor(s): 159 return _get_field_type(s) 160 161 def _get_component_type(s): 162 return _get_field_type(s) 163 164 def _get_field_type(s): 165 base_type, s = _get_base_type(s) 166 object_type = None 167 array_type = None 168 if base_type == "L": 169 object_type, s = _get_object_type(s) 170 elif base_type == "[": 171 array_type, s = _get_array_type(s) 172 return (base_type, object_type, array_type), s 173 174 def _get_base_type(s): 175 if len(s) > 0: 176 return s[0], s[1:] 177 else: 178 return None, s 179 180 def _get_object_type(s): 181 if len(s) > 0: 182 s_end = s.find(";") 183 assert s_end != -1 184 return s[:s_end], s[s_end+1:] 185 else: 186 return None, s 187 188 def _get_array_type(s): 189 if len(s) > 0: 190 return _get_component_type(s) 191 else: 192 return None, s 193 194 # Constant information. 195 196 class ClassInfo(NameUtils, PythonNameUtils): 197 def init(self, data, class_file): 198 self.class_file = class_file 199 self.name_index = u2(data[0:2]) 200 return data[2:] 201 202 class RefInfo(NameAndTypeUtils): 203 def init(self, data, class_file): 204 self.class_file = class_file 205 self.class_index = u2(data[0:2]) 206 self.name_and_type_index = u2(data[2:4]) 207 return data[4:] 208 209 class FieldRefInfo(RefInfo, PythonNameUtils): 210 def get_descriptor(self): 211 return RefInfo.get_field_descriptor(self) 212 213 class MethodRefInfo(RefInfo, PythonMethodUtils): 214 def get_descriptor(self): 215 return RefInfo.get_method_descriptor(self) 216 217 class InterfaceMethodRefInfo(MethodRefInfo): 218 pass 219 220 class NameAndTypeInfo(NameUtils, PythonNameUtils): 221 def init(self, data, class_file): 222 self.class_file = class_file 223 self.name_index = u2(data[0:2]) 224 self.descriptor_index = u2(data[2:4]) 225 return data[4:] 226 227 def get_field_descriptor(self): 228 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 229 230 def get_method_descriptor(self): 231 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 232 233 class Utf8Info: 234 def init(self, data, class_file): 235 self.class_file = class_file 236 self.length = u2(data[0:2]) 237 self.bytes = data[2:2+self.length] 238 return data[2+self.length:] 239 240 def __str__(self): 241 return self.bytes 242 243 def __unicode__(self): 244 return unicode(self.bytes, "utf-8") 245 246 def get_value(self): 247 return str(self) 248 249 class StringInfo: 250 def init(self, data, class_file): 251 self.class_file = class_file 252 self.string_index = u2(data[0:2]) 253 return data[2:] 254 255 def __str__(self): 256 return str(self.class_file.constants[self.string_index - 1]) 257 258 def __unicode__(self): 259 return unicode(self.class_file.constants[self.string_index - 1]) 260 261 def get_value(self): 262 return str(self) 263 264 class SmallNumInfo: 265 def init(self, data, class_file): 266 self.class_file = class_file 267 self.bytes = data[0:4] 268 return data[4:] 269 270 class IntegerInfo(SmallNumInfo): 271 def get_value(self): 272 return s4(self.bytes) 273 274 class FloatInfo(SmallNumInfo): 275 def get_value(self): 276 return f4(self.bytes) 277 278 class LargeNumInfo: 279 def init(self, data, class_file): 280 self.class_file = class_file 281 self.high_bytes = data[0:4] 282 self.low_bytes = data[4:8] 283 return data[8:] 284 285 class LongInfo(LargeNumInfo): 286 def get_value(self): 287 return s8(self.high_bytes + self.low_bytes) 288 289 class DoubleInfo(LargeNumInfo): 290 def get_value(self): 291 return f8(self.high_bytes + self.low_bytes) 292 293 # Other information. 294 # Objects of these classes are generally aware of the class they reside in. 295 296 class ItemInfo(NameUtils): 297 def init(self, data, class_file): 298 self.class_file = class_file 299 self.access_flags = u2(data[0:2]) 300 self.name_index = u2(data[2:4]) 301 self.descriptor_index = u2(data[4:6]) 302 self.attributes, data = self.class_file._get_attributes(data[6:]) 303 return data 304 305 class FieldInfo(ItemInfo, PythonNameUtils): 306 def get_descriptor(self): 307 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 308 309 class MethodInfo(ItemInfo, PythonMethodUtils): 310 def get_descriptor(self): 311 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 312 313 class AttributeInfo: 314 def init(self, data, class_file): 315 self.attribute_length = u4(data[0:4]) 316 self.info = data[4:4+self.attribute_length] 317 return data[4+self.attribute_length:] 318 319 # NOTE: Decode the different attribute formats. 320 321 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 322 def init(self, data, class_file): 323 self.class_file = class_file 324 self.attribute_length = u4(data[0:4]) 325 # Permit the NameUtils mix-in. 326 self.name_index = self.sourcefile_index = u2(data[4:6]) 327 return data[6:] 328 329 class ConstantValueAttributeInfo(AttributeInfo): 330 def init(self, data, class_file): 331 self.class_file = class_file 332 self.attribute_length = u4(data[0:4]) 333 self.constant_value_index = u2(data[4:6]) 334 assert 4+self.attribute_length == 6 335 return data[4+self.attribute_length:] 336 337 def get_value(self): 338 return self.class_file.constants[self.constant_value_index - 1].get_value() 339 340 class CodeAttributeInfo(AttributeInfo): 341 def init(self, data, class_file): 342 self.class_file = class_file 343 self.attribute_length = u4(data[0:4]) 344 self.max_stack = u2(data[4:6]) 345 self.max_locals = u2(data[6:8]) 346 self.code_length = u4(data[8:12]) 347 end_of_code = 12+self.code_length 348 self.code = data[12:end_of_code] 349 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 350 self.exception_table = [] 351 data = data[end_of_code + 2:] 352 for i in range(0, self.exception_table_length): 353 exception = ExceptionInfo() 354 data = exception.init(data) 355 self.exception_table.append(exception) 356 self.attributes, data = self.class_file._get_attributes(data) 357 return data 358 359 class ExceptionsAttributeInfo(AttributeInfo): 360 def init(self, data, class_file): 361 self.class_file = class_file 362 self.attribute_length = u4(data[0:4]) 363 self.number_of_exceptions = u2(data[4:6]) 364 self.exception_index_table = [] 365 index = 6 366 for i in range(0, self.number_of_exceptions): 367 self.exception_index_table.append(u2(data[index:index+2])) 368 index += 2 369 return data[index:] 370 371 def get_exception(self, i): 372 exception_index = self.exception_index_table[i] 373 return self.class_file.constants[exception_index - 1] 374 375 class InnerClassesAttributeInfo(AttributeInfo): 376 def init(self, data, class_file): 377 self.class_file = class_file 378 self.attribute_length = u4(data[0:4]) 379 self.number_of_classes = u2(data[4:6]) 380 self.classes = [] 381 data = data[6:] 382 for i in range(0, self.number_of_classes): 383 inner_class = InnerClassInfo() 384 data = inner_class.init(data, self.class_file) 385 self.classes.append(inner_class) 386 return data 387 388 class SyntheticAttributeInfo(AttributeInfo): 389 pass 390 391 class LineNumberAttributeInfo(AttributeInfo): 392 def init(self, data, class_file): 393 self.class_file = class_file 394 self.attribute_length = u4(data[0:4]) 395 self.line_number_table_length = u2(data[4:6]) 396 self.line_number_table = [] 397 data = data[6:] 398 for i in range(0, self.line_number_table_length): 399 line_number = LineNumberInfo() 400 data = line_number.init(data) 401 self.line_number_table.append(line_number) 402 return data 403 404 class LocalVariableAttributeInfo(AttributeInfo): 405 def init(self, data, class_file): 406 self.class_file = class_file 407 self.attribute_length = u4(data[0:4]) 408 self.local_variable_table_length = u2(data[4:6]) 409 self.local_variable_table = [] 410 data = data[6:] 411 for i in range(0, self.local_variable_table_length): 412 local_variable = LocalVariableInfo() 413 data = local_variable.init(data, self.class_file) 414 self.local_variable_table.append(local_variable) 415 return data 416 417 class DeprecatedAttributeInfo(AttributeInfo): 418 pass 419 420 # Child classes of the attribute information classes. 421 422 class ExceptionInfo: 423 def init(self, data): 424 self.start_pc = u2(data[0:2]) 425 self.end_pc = u2(data[2:4]) 426 self.handler_pc = u2(data[4:6]) 427 self.catch_type = u2(data[6:8]) 428 return data[8:] 429 430 class InnerClassInfo(NameUtils): 431 def init(self, data, class_file): 432 self.class_file = class_file 433 self.inner_class_info_index = u2(data[0:2]) 434 self.outer_class_info_index = u2(data[2:4]) 435 # Permit the NameUtils mix-in. 436 self.name_index = self.inner_name_index = u2(data[4:6]) 437 self.inner_class_access_flags = u2(data[6:8]) 438 return data[8:] 439 440 class LineNumberInfo: 441 def init(self, data): 442 self.start_pc = u2(data[0:2]) 443 self.line_number = u2(data[2:4]) 444 return data[4:] 445 446 class LocalVariableInfo(NameUtils, PythonNameUtils): 447 def init(self, data, class_file): 448 self.class_file = class_file 449 self.start_pc = u2(data[0:2]) 450 self.length = u2(data[2:4]) 451 self.name_index = u2(data[4:6]) 452 self.descriptor_index = u2(data[6:8]) 453 self.index = u2(data[8:10]) 454 return data[10:] 455 456 def get_descriptor(self): 457 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 458 459 # Exceptions. 460 461 class UnknownTag(Exception): 462 pass 463 464 class UnknownAttribute(Exception): 465 pass 466 467 # Abstractions for the main structures. 468 469 class ClassFile: 470 471 "A class representing a Java class file." 472 473 def __init__(self, s): 474 475 """ 476 Process the given string 's', populating the object with the class 477 file's details. 478 """ 479 480 self.constants, s = self._get_constants(s[8:]) 481 self.access_flags, s = self._get_access_flags(s) 482 self.this_class, s = self._get_this_class(s) 483 self.super_class, s = self._get_super_class(s) 484 self.interfaces, s = self._get_interfaces(s) 485 self.fields, s = self._get_fields(s) 486 self.methods, s = self._get_methods(s) 487 self.attributes, s = self._get_attributes(s) 488 489 def _decode_const(self, s): 490 tag = u1(s[0:1]) 491 if tag == 1: 492 const = Utf8Info() 493 elif tag == 3: 494 const = IntegerInfo() 495 elif tag == 4: 496 const = FloatInfo() 497 elif tag == 5: 498 const = LongInfo() 499 elif tag == 6: 500 const = DoubleInfo() 501 elif tag == 7: 502 const = ClassInfo() 503 elif tag == 8: 504 const = StringInfo() 505 elif tag == 9: 506 const = FieldRefInfo() 507 elif tag == 10: 508 const = MethodRefInfo() 509 elif tag == 11: 510 const = InterfaceMethodRefInfo() 511 elif tag == 12: 512 const = NameAndTypeInfo() 513 else: 514 raise UnknownTag, tag 515 516 # Initialise the constant object. 517 518 s = const.init(s[1:], self) 519 return const, s 520 521 def _get_constants_from_table(self, count, s): 522 l = [] 523 # Have to skip certain entries specially. 524 i = 1 525 while i < count: 526 c, s = self._decode_const(s) 527 l.append(c) 528 # Add a blank entry after "large" entries. 529 if isinstance(c, LargeNumInfo): 530 l.append(None) 531 i += 1 532 i += 1 533 return l, s 534 535 def _get_items_from_table(self, cls, number, s): 536 l = [] 537 for i in range(0, number): 538 f = cls() 539 s = f.init(s, self) 540 l.append(f) 541 return l, s 542 543 def _get_methods_from_table(self, number, s): 544 return self._get_items_from_table(MethodInfo, number, s) 545 546 def _get_fields_from_table(self, number, s): 547 return self._get_items_from_table(FieldInfo, number, s) 548 549 def _get_attribute_from_table(self, s): 550 attribute_name_index = u2(s[0:2]) 551 constant_name = self.constants[attribute_name_index - 1].bytes 552 if constant_name == "SourceFile": 553 attribute = SourceFileAttributeInfo() 554 elif constant_name == "ConstantValue": 555 attribute = ConstantValueAttributeInfo() 556 elif constant_name == "Code": 557 attribute = CodeAttributeInfo() 558 elif constant_name == "Exceptions": 559 attribute = ExceptionsAttributeInfo() 560 elif constant_name == "InnerClasses": 561 attribute = InnerClassesAttributeInfo() 562 elif constant_name == "Synthetic": 563 attribute = SyntheticAttributeInfo() 564 elif constant_name == "LineNumberTable": 565 attribute = LineNumberAttributeInfo() 566 elif constant_name == "LocalVariableTable": 567 attribute = LocalVariableAttributeInfo() 568 elif constant_name == "Deprecated": 569 attribute = DeprecatedAttributeInfo() 570 else: 571 raise UnknownAttribute, constant_name 572 s = attribute.init(s[2:], self) 573 return attribute, s 574 575 def _get_attributes_from_table(self, number, s): 576 attributes = [] 577 for i in range(0, number): 578 attribute, s = self._get_attribute_from_table(s) 579 attributes.append(attribute) 580 return attributes, s 581 582 def _get_constants(self, s): 583 count = u2(s[0:2]) 584 return self._get_constants_from_table(count, s[2:]) 585 586 def _get_access_flags(self, s): 587 return u2(s[0:2]), s[2:] 588 589 def _get_this_class(self, s): 590 index = u2(s[0:2]) 591 return self.constants[index - 1], s[2:] 592 593 _get_super_class = _get_this_class 594 595 def _get_interfaces(self, s): 596 interfaces = [] 597 number = u2(s[0:2]) 598 s = s[2:] 599 for i in range(0, number): 600 index = u2(s[0:2]) 601 interfaces.append(self.constants[index - 1]) 602 s = s[2:] 603 return interfaces, s 604 605 def _get_fields(self, s): 606 number = u2(s[0:2]) 607 return self._get_fields_from_table(number, s[2:]) 608 609 def _get_attributes(self, s): 610 number = u2(s[0:2]) 611 return self._get_attributes_from_table(number, s[2:]) 612 613 def _get_methods(self, s): 614 number = u2(s[0:2]) 615 return self._get_methods_from_table(number, s[2:]) 616 617 if __name__ == "__main__": 618 import sys 619 f = open(sys.argv[1]) 620 c = ClassFile(f.read()) 621 622 # vim: tabstop=4 expandtab shiftwidth=4