1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def s2(data): 19 return struct.unpack(">h", data[0:2])[0] 20 21 def u4(data): 22 return struct.unpack(">L", data[0:4])[0] 23 24 def s4(data): 25 return struct.unpack(">l", data[0:4])[0] 26 27 def s8(data): 28 return struct.unpack(">q", data[0:8])[0] 29 30 def f4(data): 31 return struct.unpack(">f", data[0:4])[0] 32 33 def f8(data): 34 return struct.unpack(">d", data[0:8])[0] 35 36 # Useful tables and constants. 37 38 descriptor_base_type_mapping = { 39 "B" : "int", 40 "C" : "str", 41 "D" : "float", 42 "F" : "float", 43 "I" : "int", 44 "J" : "int", 45 "L" : "object", 46 "S" : "int", 47 "Z" : "bool", 48 "[" : "list" 49 } 50 51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 52 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 53 54 def has_flags(flags, desired): 55 desired_flags = reduce(lambda a, b: a | b, desired, 0) 56 return (flags & desired_flags) == desired_flags 57 58 # Useful mix-ins. 59 60 class PythonMethodUtils: 61 symbol_sep = "___" # was "$" 62 type_sep = "__" # replaces "/" 63 array_sep = "_array_" # was "[]" 64 base_seps = ("_", "_") # was "<" and ">" 65 66 def get_python_name(self): 67 name = self.get_name() 68 if str(name) == "<init>": 69 name = "__init__" 70 elif str(name) == "<clinit>": 71 return "__clinit__" 72 else: 73 name = str(name) 74 return name + self.symbol_sep + self._get_descriptor_as_name() 75 76 def _get_descriptor_as_name(self): 77 l = [] 78 for descriptor_type in self.get_descriptor()[0]: 79 l.append(self._get_type_as_name(descriptor_type)) 80 return self.symbol_sep.join(l) 81 82 def _get_type_as_name(self, descriptor_type, s=""): 83 base_type, object_type, array_type = descriptor_type 84 if base_type == "L": 85 return object_type.replace("/", self.type_sep) + s 86 elif base_type == "[": 87 return self._get_type_as_name(array_type, s + self.array_sep) 88 else: 89 return self.base_seps[0] + base_type + self.base_seps[1] + s 90 91 class PythonNameUtils: 92 def get_python_name(self): 93 # NOTE: This may not be comprehensive. 94 if not str(self.get_name()).startswith("["): 95 return str(self.get_name()).replace("/", ".") 96 else: 97 return self._get_type_name( 98 get_field_descriptor( 99 str(self.get_name()) 100 ) 101 ).replace("/", ".") 102 103 def _get_type_name(self, descriptor_type): 104 base_type, object_type, array_type = descriptor_type 105 if base_type == "L": 106 return object_type 107 elif base_type == "[": 108 return self._get_type_name(array_type) 109 else: 110 return descriptor_base_type_mapping[base_type] 111 112 class NameUtils: 113 def get_name(self): 114 if self.name_index != 0: 115 return self.class_file.constants[self.name_index - 1] 116 else: 117 # Some name indexes are zero to indicate special conditions. 118 return None 119 120 class NameAndTypeUtils: 121 def get_name(self): 122 if self.name_and_type_index != 0: 123 return self.class_file.constants[self.name_and_type_index - 1].get_name() 124 else: 125 # Some name indexes are zero to indicate special conditions. 126 return None 127 128 def get_field_descriptor(self): 129 if self.name_and_type_index != 0: 130 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 131 else: 132 # Some name indexes are zero to indicate special conditions. 133 return None 134 135 def get_method_descriptor(self): 136 if self.name_and_type_index != 0: 137 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 138 else: 139 # Some name indexes are zero to indicate special conditions. 140 return None 141 142 def get_class(self): 143 return self.class_file.constants[self.class_index - 1] 144 145 # Symbol parsing. 146 147 def get_method_descriptor(s): 148 assert s[0] == "(" 149 params = [] 150 s = s[1:] 151 while s[0] != ")": 152 parameter_descriptor, s = _get_parameter_descriptor(s) 153 params.append(parameter_descriptor) 154 if s[1] != "V": 155 return_type, s = _get_field_type(s[1:]) 156 else: 157 return_type, s = None, s[1:] 158 return params, return_type 159 160 def get_field_descriptor(s): 161 return _get_field_type(s)[0] 162 163 def _get_parameter_descriptor(s): 164 return _get_field_type(s) 165 166 def _get_component_type(s): 167 return _get_field_type(s) 168 169 def _get_field_type(s): 170 base_type, s = _get_base_type(s) 171 object_type = None 172 array_type = None 173 if base_type == "L": 174 object_type, s = _get_object_type(s) 175 elif base_type == "[": 176 array_type, s = _get_array_type(s) 177 return (base_type, object_type, array_type), s 178 179 def _get_base_type(s): 180 if len(s) > 0: 181 return s[0], s[1:] 182 else: 183 return None, s 184 185 def _get_object_type(s): 186 if len(s) > 0: 187 s_end = s.find(";") 188 assert s_end != -1 189 return s[:s_end], s[s_end+1:] 190 else: 191 return None, s 192 193 def _get_array_type(s): 194 if len(s) > 0: 195 return _get_component_type(s) 196 else: 197 return None, s 198 199 # Constant information. 200 201 class ClassInfo(NameUtils, PythonNameUtils): 202 def init(self, data, class_file): 203 self.class_file = class_file 204 self.name_index = u2(data[0:2]) 205 return data[2:] 206 207 class RefInfo(NameAndTypeUtils): 208 def init(self, data, class_file): 209 self.class_file = class_file 210 self.class_index = u2(data[0:2]) 211 self.name_and_type_index = u2(data[2:4]) 212 return data[4:] 213 214 class FieldRefInfo(RefInfo, PythonNameUtils): 215 def get_descriptor(self): 216 return RefInfo.get_field_descriptor(self) 217 218 class MethodRefInfo(RefInfo, PythonMethodUtils): 219 def get_descriptor(self): 220 return RefInfo.get_method_descriptor(self) 221 222 class InterfaceMethodRefInfo(MethodRefInfo): 223 pass 224 225 class NameAndTypeInfo(NameUtils, PythonNameUtils): 226 def init(self, data, class_file): 227 self.class_file = class_file 228 self.name_index = u2(data[0:2]) 229 self.descriptor_index = u2(data[2:4]) 230 return data[4:] 231 232 def get_field_descriptor(self): 233 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 234 235 def get_method_descriptor(self): 236 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 237 238 class Utf8Info: 239 def init(self, data, class_file): 240 self.class_file = class_file 241 self.length = u2(data[0:2]) 242 self.bytes = data[2:2+self.length] 243 return data[2+self.length:] 244 245 def __str__(self): 246 return self.bytes 247 248 def __unicode__(self): 249 return unicode(self.bytes, "utf-8") 250 251 def get_value(self): 252 return str(self) 253 254 class StringInfo: 255 def init(self, data, class_file): 256 self.class_file = class_file 257 self.string_index = u2(data[0:2]) 258 return data[2:] 259 260 def __str__(self): 261 return str(self.class_file.constants[self.string_index - 1]) 262 263 def __unicode__(self): 264 return unicode(self.class_file.constants[self.string_index - 1]) 265 266 def get_value(self): 267 return str(self) 268 269 class SmallNumInfo: 270 def init(self, data, class_file): 271 self.class_file = class_file 272 self.bytes = data[0:4] 273 return data[4:] 274 275 class IntegerInfo(SmallNumInfo): 276 def get_value(self): 277 return s4(self.bytes) 278 279 class FloatInfo(SmallNumInfo): 280 def get_value(self): 281 return f4(self.bytes) 282 283 class LargeNumInfo: 284 def init(self, data, class_file): 285 self.class_file = class_file 286 self.high_bytes = data[0:4] 287 self.low_bytes = data[4:8] 288 return data[8:] 289 290 class LongInfo(LargeNumInfo): 291 def get_value(self): 292 return s8(self.high_bytes + self.low_bytes) 293 294 class DoubleInfo(LargeNumInfo): 295 def get_value(self): 296 return f8(self.high_bytes + self.low_bytes) 297 298 # Other information. 299 # Objects of these classes are generally aware of the class they reside in. 300 301 class ItemInfo(NameUtils): 302 def init(self, data, class_file): 303 self.class_file = class_file 304 self.access_flags = u2(data[0:2]) 305 self.name_index = u2(data[2:4]) 306 self.descriptor_index = u2(data[4:6]) 307 self.attributes, data = self.class_file._get_attributes(data[6:]) 308 return data 309 310 class FieldInfo(ItemInfo, PythonNameUtils): 311 def get_descriptor(self): 312 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 313 314 class MethodInfo(ItemInfo, PythonMethodUtils): 315 def get_descriptor(self): 316 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 317 318 class AttributeInfo: 319 def init(self, data, class_file): 320 self.attribute_length = u4(data[0:4]) 321 self.info = data[4:4+self.attribute_length] 322 return data[4+self.attribute_length:] 323 324 # NOTE: Decode the different attribute formats. 325 326 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 327 def init(self, data, class_file): 328 self.class_file = class_file 329 self.attribute_length = u4(data[0:4]) 330 # Permit the NameUtils mix-in. 331 self.name_index = self.sourcefile_index = u2(data[4:6]) 332 return data[6:] 333 334 class ConstantValueAttributeInfo(AttributeInfo): 335 def init(self, data, class_file): 336 self.class_file = class_file 337 self.attribute_length = u4(data[0:4]) 338 self.constant_value_index = u2(data[4:6]) 339 assert 4+self.attribute_length == 6 340 return data[4+self.attribute_length:] 341 342 def get_value(self): 343 return self.class_file.constants[self.constant_value_index - 1].get_value() 344 345 class CodeAttributeInfo(AttributeInfo): 346 def init(self, data, class_file): 347 self.class_file = class_file 348 self.attribute_length = u4(data[0:4]) 349 self.max_stack = u2(data[4:6]) 350 self.max_locals = u2(data[6:8]) 351 self.code_length = u4(data[8:12]) 352 end_of_code = 12+self.code_length 353 self.code = data[12:end_of_code] 354 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 355 self.exception_table = [] 356 data = data[end_of_code + 2:] 357 for i in range(0, self.exception_table_length): 358 exception = ExceptionInfo() 359 data = exception.init(data) 360 self.exception_table.append(exception) 361 self.attributes, data = self.class_file._get_attributes(data) 362 return data 363 364 class ExceptionsAttributeInfo(AttributeInfo): 365 def init(self, data, class_file): 366 self.class_file = class_file 367 self.attribute_length = u4(data[0:4]) 368 self.number_of_exceptions = u2(data[4:6]) 369 self.exception_index_table = [] 370 index = 6 371 for i in range(0, self.number_of_exceptions): 372 self.exception_index_table.append(u2(data[index:index+2])) 373 index += 2 374 return data[index:] 375 376 def get_exception(self, i): 377 exception_index = self.exception_index_table[i] 378 return self.class_file.constants[exception_index - 1] 379 380 class InnerClassesAttributeInfo(AttributeInfo): 381 def init(self, data, class_file): 382 self.class_file = class_file 383 self.attribute_length = u4(data[0:4]) 384 self.number_of_classes = u2(data[4:6]) 385 self.classes = [] 386 data = data[6:] 387 for i in range(0, self.number_of_classes): 388 inner_class = InnerClassInfo() 389 data = inner_class.init(data, self.class_file) 390 self.classes.append(inner_class) 391 return data 392 393 class SyntheticAttributeInfo(AttributeInfo): 394 pass 395 396 class LineNumberAttributeInfo(AttributeInfo): 397 def init(self, data, class_file): 398 self.class_file = class_file 399 self.attribute_length = u4(data[0:4]) 400 self.line_number_table_length = u2(data[4:6]) 401 self.line_number_table = [] 402 data = data[6:] 403 for i in range(0, self.line_number_table_length): 404 line_number = LineNumberInfo() 405 data = line_number.init(data) 406 self.line_number_table.append(line_number) 407 return data 408 409 class LocalVariableAttributeInfo(AttributeInfo): 410 def init(self, data, class_file): 411 self.class_file = class_file 412 self.attribute_length = u4(data[0:4]) 413 self.local_variable_table_length = u2(data[4:6]) 414 self.local_variable_table = [] 415 data = data[6:] 416 for i in range(0, self.local_variable_table_length): 417 local_variable = LocalVariableInfo() 418 data = local_variable.init(data, self.class_file) 419 self.local_variable_table.append(local_variable) 420 return data 421 422 class DeprecatedAttributeInfo(AttributeInfo): 423 pass 424 425 # Child classes of the attribute information classes. 426 427 class ExceptionInfo: 428 def init(self, data): 429 self.start_pc = u2(data[0:2]) 430 self.end_pc = u2(data[2:4]) 431 self.handler_pc = u2(data[4:6]) 432 self.catch_type = u2(data[6:8]) 433 return data[8:] 434 435 class InnerClassInfo(NameUtils): 436 def init(self, data, class_file): 437 self.class_file = class_file 438 self.inner_class_info_index = u2(data[0:2]) 439 self.outer_class_info_index = u2(data[2:4]) 440 # Permit the NameUtils mix-in. 441 self.name_index = self.inner_name_index = u2(data[4:6]) 442 self.inner_class_access_flags = u2(data[6:8]) 443 return data[8:] 444 445 class LineNumberInfo: 446 def init(self, data): 447 self.start_pc = u2(data[0:2]) 448 self.line_number = u2(data[2:4]) 449 return data[4:] 450 451 class LocalVariableInfo(NameUtils, PythonNameUtils): 452 def init(self, data, class_file): 453 self.class_file = class_file 454 self.start_pc = u2(data[0:2]) 455 self.length = u2(data[2:4]) 456 self.name_index = u2(data[4:6]) 457 self.descriptor_index = u2(data[6:8]) 458 self.index = u2(data[8:10]) 459 return data[10:] 460 461 def get_descriptor(self): 462 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 463 464 # Exceptions. 465 466 class UnknownTag(Exception): 467 pass 468 469 class UnknownAttribute(Exception): 470 pass 471 472 # Abstractions for the main structures. 473 474 class ClassFile: 475 476 "A class representing a Java class file." 477 478 def __init__(self, s): 479 480 """ 481 Process the given string 's', populating the object with the class 482 file's details. 483 """ 484 485 self.constants, s = self._get_constants(s[8:]) 486 self.access_flags, s = self._get_access_flags(s) 487 self.this_class, s = self._get_this_class(s) 488 self.super_class, s = self._get_super_class(s) 489 self.interfaces, s = self._get_interfaces(s) 490 self.fields, s = self._get_fields(s) 491 self.methods, s = self._get_methods(s) 492 self.attributes, s = self._get_attributes(s) 493 494 def _decode_const(self, s): 495 tag = u1(s[0:1]) 496 if tag == 1: 497 const = Utf8Info() 498 elif tag == 3: 499 const = IntegerInfo() 500 elif tag == 4: 501 const = FloatInfo() 502 elif tag == 5: 503 const = LongInfo() 504 elif tag == 6: 505 const = DoubleInfo() 506 elif tag == 7: 507 const = ClassInfo() 508 elif tag == 8: 509 const = StringInfo() 510 elif tag == 9: 511 const = FieldRefInfo() 512 elif tag == 10: 513 const = MethodRefInfo() 514 elif tag == 11: 515 const = InterfaceMethodRefInfo() 516 elif tag == 12: 517 const = NameAndTypeInfo() 518 else: 519 raise UnknownTag, tag 520 521 # Initialise the constant object. 522 523 s = const.init(s[1:], self) 524 return const, s 525 526 def _get_constants_from_table(self, count, s): 527 l = [] 528 # Have to skip certain entries specially. 529 i = 1 530 while i < count: 531 c, s = self._decode_const(s) 532 l.append(c) 533 # Add a blank entry after "large" entries. 534 if isinstance(c, LargeNumInfo): 535 l.append(None) 536 i += 1 537 i += 1 538 return l, s 539 540 def _get_items_from_table(self, cls, number, s): 541 l = [] 542 for i in range(0, number): 543 f = cls() 544 s = f.init(s, self) 545 l.append(f) 546 return l, s 547 548 def _get_methods_from_table(self, number, s): 549 return self._get_items_from_table(MethodInfo, number, s) 550 551 def _get_fields_from_table(self, number, s): 552 return self._get_items_from_table(FieldInfo, number, s) 553 554 def _get_attribute_from_table(self, s): 555 attribute_name_index = u2(s[0:2]) 556 constant_name = self.constants[attribute_name_index - 1].bytes 557 if constant_name == "SourceFile": 558 attribute = SourceFileAttributeInfo() 559 elif constant_name == "ConstantValue": 560 attribute = ConstantValueAttributeInfo() 561 elif constant_name == "Code": 562 attribute = CodeAttributeInfo() 563 elif constant_name == "Exceptions": 564 attribute = ExceptionsAttributeInfo() 565 elif constant_name == "InnerClasses": 566 attribute = InnerClassesAttributeInfo() 567 elif constant_name == "Synthetic": 568 attribute = SyntheticAttributeInfo() 569 elif constant_name == "LineNumberTable": 570 attribute = LineNumberAttributeInfo() 571 elif constant_name == "LocalVariableTable": 572 attribute = LocalVariableAttributeInfo() 573 elif constant_name == "Deprecated": 574 attribute = DeprecatedAttributeInfo() 575 else: 576 raise UnknownAttribute, constant_name 577 s = attribute.init(s[2:], self) 578 return attribute, s 579 580 def _get_attributes_from_table(self, number, s): 581 attributes = [] 582 for i in range(0, number): 583 attribute, s = self._get_attribute_from_table(s) 584 attributes.append(attribute) 585 return attributes, s 586 587 def _get_constants(self, s): 588 count = u2(s[0:2]) 589 return self._get_constants_from_table(count, s[2:]) 590 591 def _get_access_flags(self, s): 592 return u2(s[0:2]), s[2:] 593 594 def _get_this_class(self, s): 595 index = u2(s[0:2]) 596 return self.constants[index - 1], s[2:] 597 598 _get_super_class = _get_this_class 599 600 def _get_interfaces(self, s): 601 interfaces = [] 602 number = u2(s[0:2]) 603 s = s[2:] 604 for i in range(0, number): 605 index = u2(s[0:2]) 606 interfaces.append(self.constants[index - 1]) 607 s = s[2:] 608 return interfaces, s 609 610 def _get_fields(self, s): 611 number = u2(s[0:2]) 612 return self._get_fields_from_table(number, s[2:]) 613 614 def _get_attributes(self, s): 615 number = u2(s[0:2]) 616 return self._get_attributes_from_table(number, s[2:]) 617 618 def _get_methods(self, s): 619 number = u2(s[0:2]) 620 return self._get_methods_from_table(number, s[2:]) 621 622 if __name__ == "__main__": 623 import sys 624 f = open(sys.argv[1]) 625 c = ClassFile(f.read()) 626 627 # vim: tabstop=4 expandtab shiftwidth=4