1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def s2(data): 19 return struct.unpack(">h", data[0:2])[0] 20 21 def u4(data): 22 return struct.unpack(">L", data[0:4])[0] 23 24 def s4(data): 25 return struct.unpack(">l", data[0:4])[0] 26 27 def s8(data): 28 return struct.unpack(">q", data[0:8])[0] 29 30 def f4(data): 31 return struct.unpack(">f", data[0:4])[0] 32 33 def f8(data): 34 return struct.unpack(">d", data[0:8])[0] 35 36 # Useful tables and constants. 37 38 descriptor_base_type_mapping = { 39 "B" : "int", 40 "C" : "str", 41 "D" : "float", 42 "F" : "float", 43 "I" : "int", 44 "J" : "int", 45 "L" : "object", 46 "S" : "int", 47 "Z" : "bool", 48 "[" : "list" 49 } 50 51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 52 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 53 54 def has_flags(flags, desired): 55 desired_flags = reduce(lambda a, b: a | b, desired, 0) 56 return (flags & desired_flags) == desired_flags 57 58 # Useful mix-ins. 59 60 class PythonMethodUtils: 61 def get_python_name(self): 62 name = self.get_name() 63 if str(name) == "<init>": 64 name = "__init__" 65 elif str(name) == "<clinit>": 66 return "__clinit__" 67 else: 68 name = str(name) 69 return name + "$" + self._get_descriptor_as_name() 70 71 def _get_descriptor_as_name(self): 72 l = [] 73 for descriptor_type in self.get_descriptor()[0]: 74 l.append(self._get_type_as_name(descriptor_type)) 75 return "$".join(l) 76 77 def _get_type_as_name(self, descriptor_type, s=""): 78 base_type, object_type, array_type = descriptor_type 79 if base_type == "L": 80 return object_type + s 81 elif base_type == "[": 82 return self._get_type_as_name(array_type, s + "[]") 83 else: 84 return "<" + base_type + ">" + s 85 86 class PythonNameUtils: 87 def get_python_name(self): 88 return str(self.get_name()).replace("/", ".") 89 90 class NameUtils: 91 def get_name(self): 92 if self.name_index != 0: 93 return self.class_file.constants[self.name_index - 1] 94 else: 95 # Some name indexes are zero to indicate special conditions. 96 return None 97 98 class NameAndTypeUtils: 99 def get_name(self): 100 if self.name_and_type_index != 0: 101 return self.class_file.constants[self.name_and_type_index - 1].get_name() 102 else: 103 # Some name indexes are zero to indicate special conditions. 104 return None 105 106 def get_field_descriptor(self): 107 if self.name_and_type_index != 0: 108 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 109 else: 110 # Some name indexes are zero to indicate special conditions. 111 return None 112 113 def get_method_descriptor(self): 114 if self.name_and_type_index != 0: 115 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 116 else: 117 # Some name indexes are zero to indicate special conditions. 118 return None 119 120 def get_class(self): 121 return self.class_file.constants[self.class_index - 1] 122 123 class DescriptorUtils: 124 125 "Symbol parsing." 126 127 def _get_method_descriptor(self, s): 128 assert s[0] == "(" 129 params = [] 130 s = s[1:] 131 while s[0] != ")": 132 parameter_descriptor, s = self._get_parameter_descriptor(s) 133 params.append(parameter_descriptor) 134 if s[1] != "V": 135 return_type, s = self._get_field_type(s[1:]) 136 else: 137 return_type, s = None, s[1:] 138 return params, return_type 139 140 def _get_parameter_descriptor(self, s): 141 return self._get_field_type(s) 142 143 def _get_field_descriptor(self, s): 144 return self._get_field_type(s) 145 146 def _get_component_type(self, s): 147 return self._get_field_type(s) 148 149 def _get_field_type(self, s): 150 base_type, s = self._get_base_type(s) 151 object_type = None 152 array_type = None 153 if base_type == "L": 154 object_type, s = self._get_object_type(s) 155 elif base_type == "[": 156 array_type, s = self._get_array_type(s) 157 return (base_type, object_type, array_type), s 158 159 def _get_base_type(self, s): 160 if len(s) > 0: 161 return s[0], s[1:] 162 else: 163 return None, s 164 165 def _get_object_type(self, s): 166 if len(s) > 0: 167 s_end = s.find(";") 168 assert s_end != -1 169 return s[:s_end], s[s_end+1:] 170 else: 171 return None, s 172 173 def _get_array_type(self, s): 174 if len(s) > 0: 175 return self._get_component_type(s) 176 else: 177 return None, s 178 179 # Constant information. 180 # Objects of these classes are not directly aware of the class they reside in. 181 182 class ClassInfo(NameUtils, PythonNameUtils): 183 def init(self, data, class_file): 184 self.class_file = class_file 185 self.name_index = u2(data[0:2]) 186 return data[2:] 187 188 class RefInfo(NameAndTypeUtils): 189 def init(self, data, class_file): 190 self.class_file = class_file 191 self.class_index = u2(data[0:2]) 192 self.name_and_type_index = u2(data[2:4]) 193 return data[4:] 194 195 class FieldRefInfo(RefInfo, PythonNameUtils): 196 def get_descriptor(self): 197 return RefInfo.get_field_descriptor(self) 198 199 class MethodRefInfo(RefInfo, PythonMethodUtils): 200 def get_descriptor(self): 201 return RefInfo.get_method_descriptor(self) 202 203 class InterfaceMethodRefInfo(MethodRefInfo): 204 pass 205 206 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils): 207 def init(self, data, class_file): 208 self.class_file = class_file 209 self.name_index = u2(data[0:2]) 210 self.descriptor_index = u2(data[2:4]) 211 return data[4:] 212 213 def get_field_descriptor(self): 214 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 215 216 def get_method_descriptor(self): 217 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 218 219 class Utf8Info: 220 def init(self, data, class_file): 221 self.class_file = class_file 222 self.length = u2(data[0:2]) 223 self.bytes = data[2:2+self.length] 224 return data[2+self.length:] 225 226 def __str__(self): 227 return self.bytes 228 229 def __unicode__(self): 230 return unicode(self.bytes, "utf-8") 231 232 def get_value(self): 233 return str(self) 234 235 class StringInfo: 236 def init(self, data, class_file): 237 self.class_file = class_file 238 self.string_index = u2(data[0:2]) 239 return data[2:] 240 241 def __str__(self): 242 return str(self.class_file.constants[self.string_index - 1]) 243 244 def __unicode__(self): 245 return unicode(self.class_file.constants[self.string_index - 1]) 246 247 def get_value(self): 248 return str(self) 249 250 class SmallNumInfo: 251 def init(self, data, class_file): 252 self.class_file = class_file 253 self.bytes = data[0:4] 254 return data[4:] 255 256 class IntegerInfo(SmallNumInfo): 257 def get_value(self): 258 return s4(self.bytes) 259 260 class FloatInfo(SmallNumInfo): 261 def get_value(self): 262 return f4(self.bytes) 263 264 class LargeNumInfo: 265 def init(self, data, class_file): 266 self.class_file = class_file 267 self.high_bytes = data[0:4] 268 self.low_bytes = data[4:8] 269 return data[8:] 270 271 class LongInfo(LargeNumInfo): 272 def get_value(self): 273 return s8(self.high_bytes + self.low_bytes) 274 275 class DoubleInfo(LargeNumInfo): 276 def get_value(self): 277 return f8(self.high_bytes + self.low_bytes) 278 279 # Other information. 280 # Objects of these classes are generally aware of the class they reside in. 281 282 class ItemInfo(NameUtils, DescriptorUtils): 283 def init(self, data, class_file): 284 self.class_file = class_file 285 self.access_flags = u2(data[0:2]) 286 self.name_index = u2(data[2:4]) 287 self.descriptor_index = u2(data[4:6]) 288 self.attributes, data = self.class_file._get_attributes(data[6:]) 289 return data 290 291 class FieldInfo(ItemInfo, PythonNameUtils): 292 def get_descriptor(self): 293 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 294 295 class MethodInfo(ItemInfo, PythonMethodUtils): 296 def get_descriptor(self): 297 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 298 299 class AttributeInfo: 300 def init(self, data, class_file): 301 self.attribute_length = u4(data[0:4]) 302 self.info = data[4:4+self.attribute_length] 303 return data[4+self.attribute_length:] 304 305 # NOTE: Decode the different attribute formats. 306 307 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 308 def init(self, data, class_file): 309 self.class_file = class_file 310 self.attribute_length = u4(data[0:4]) 311 # Permit the NameUtils mix-in. 312 self.name_index = self.sourcefile_index = u2(data[4:6]) 313 return data[6:] 314 315 class ConstantValueAttributeInfo(AttributeInfo): 316 def init(self, data, class_file): 317 self.class_file = class_file 318 self.attribute_length = u4(data[0:4]) 319 self.constant_value_index = u2(data[4:6]) 320 assert 4+self.attribute_length == 6 321 return data[4+self.attribute_length:] 322 323 def get_value(self): 324 return self.class_file.constants[self.constant_value_index - 1].get_value() 325 326 class CodeAttributeInfo(AttributeInfo): 327 def init(self, data, class_file): 328 self.class_file = class_file 329 self.attribute_length = u4(data[0:4]) 330 self.max_stack = u2(data[4:6]) 331 self.max_locals = u2(data[6:8]) 332 self.code_length = u4(data[8:12]) 333 end_of_code = 12+self.code_length 334 self.code = data[12:end_of_code] 335 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 336 self.exception_table = [] 337 data = data[end_of_code + 2:] 338 for i in range(0, self.exception_table_length): 339 exception = ExceptionInfo() 340 data = exception.init(data) 341 self.exception_table.append(exception) 342 self.attributes, data = self.class_file._get_attributes(data) 343 return data 344 345 class ExceptionsAttributeInfo(AttributeInfo): 346 def init(self, data, class_file): 347 self.class_file = class_file 348 self.attribute_length = u4(data[0:4]) 349 self.number_of_exceptions = u2(data[4:6]) 350 self.exception_index_table = [] 351 index = 6 352 for i in range(0, self.number_of_exceptions): 353 self.exception_index_table.append(u2(data[index:index+2])) 354 index += 2 355 return data[index:] 356 357 def get_exception(self, i): 358 exception_index = self.exception_index_table[i] 359 return self.class_file.constants[exception_index - 1] 360 361 class InnerClassesAttributeInfo(AttributeInfo): 362 def init(self, data, class_file): 363 self.class_file = class_file 364 self.attribute_length = u4(data[0:4]) 365 self.number_of_classes = u2(data[4:6]) 366 self.classes = [] 367 data = data[6:] 368 for i in range(0, self.number_of_classes): 369 inner_class = InnerClassInfo() 370 data = inner_class.init(data, self.class_file) 371 self.classes.append(inner_class) 372 return data 373 374 class SyntheticAttributeInfo(AttributeInfo): 375 pass 376 377 class LineNumberAttributeInfo(AttributeInfo): 378 def init(self, data, class_file): 379 self.class_file = class_file 380 self.attribute_length = u4(data[0:4]) 381 self.line_number_table_length = u2(data[4:6]) 382 self.line_number_table = [] 383 data = data[6:] 384 for i in range(0, self.line_number_table_length): 385 line_number = LineNumberInfo() 386 data = line_number.init(data) 387 self.line_number_table.append(line_number) 388 return data 389 390 class LocalVariableAttributeInfo(AttributeInfo): 391 def init(self, data, class_file): 392 self.class_file = class_file 393 self.attribute_length = u4(data[0:4]) 394 self.local_variable_table_length = u2(data[4:6]) 395 self.local_variable_table = [] 396 data = data[6:] 397 for i in range(0, self.local_variable_table_length): 398 local_variable = LocalVariableInfo() 399 data = local_variable.init(data, self.class_file) 400 self.local_variable_table.append(local_variable) 401 return data 402 403 class DeprecatedAttributeInfo(AttributeInfo): 404 pass 405 406 # Child classes of the attribute information classes. 407 408 class ExceptionInfo: 409 def init(self, data): 410 self.start_pc = u2(data[0:2]) 411 self.end_pc = u2(data[2:4]) 412 self.handler_pc = u2(data[4:6]) 413 self.catch_type = u2(data[6:8]) 414 return data[8:] 415 416 class InnerClassInfo(NameUtils): 417 def init(self, data, class_file): 418 self.class_file = class_file 419 self.inner_class_info_index = u2(data[0:2]) 420 self.outer_class_info_index = u2(data[2:4]) 421 # Permit the NameUtils mix-in. 422 self.name_index = self.inner_name_index = u2(data[4:6]) 423 self.inner_class_access_flags = u2(data[6:8]) 424 return data[8:] 425 426 class LineNumberInfo: 427 def init(self, data): 428 self.start_pc = u2(data[0:2]) 429 self.line_number = u2(data[2:4]) 430 return data[4:] 431 432 class LocalVariableInfo(NameUtils, PythonNameUtils): 433 def init(self, data, class_file): 434 self.class_file = class_file 435 self.start_pc = u2(data[0:2]) 436 self.length = u2(data[2:4]) 437 self.name_index = u2(data[4:6]) 438 self.descriptor_index = u2(data[6:8]) 439 self.index = u2(data[8:10]) 440 return data[10:] 441 442 def get_descriptor(self): 443 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 444 445 # Exceptions. 446 447 class UnknownTag(Exception): 448 pass 449 450 class UnknownAttribute(Exception): 451 pass 452 453 # Abstractions for the main structures. 454 455 class ClassFile: 456 457 "A class representing a Java class file." 458 459 def __init__(self, s): 460 461 """ 462 Process the given string 's', populating the object with the class 463 file's details. 464 """ 465 466 self.constants, s = self._get_constants(s[8:]) 467 self.access_flags, s = self._get_access_flags(s) 468 self.this_class, s = self._get_this_class(s) 469 self.super_class, s = self._get_super_class(s) 470 self.interfaces, s = self._get_interfaces(s) 471 self.fields, s = self._get_fields(s) 472 self.methods, s = self._get_methods(s) 473 self.attributes, s = self._get_attributes(s) 474 475 def _decode_const(self, s): 476 tag = u1(s[0:1]) 477 if tag == 1: 478 const = Utf8Info() 479 elif tag == 3: 480 const = IntegerInfo() 481 elif tag == 4: 482 const = FloatInfo() 483 elif tag == 5: 484 const = LongInfo() 485 elif tag == 6: 486 const = DoubleInfo() 487 elif tag == 7: 488 const = ClassInfo() 489 elif tag == 8: 490 const = StringInfo() 491 elif tag == 9: 492 const = FieldRefInfo() 493 elif tag == 10: 494 const = MethodRefInfo() 495 elif tag == 11: 496 const = InterfaceMethodRefInfo() 497 elif tag == 12: 498 const = NameAndTypeInfo() 499 else: 500 raise UnknownTag, tag 501 502 # Initialise the constant object. 503 504 s = const.init(s[1:], self) 505 return const, s 506 507 def _get_constants_from_table(self, count, s): 508 l = [] 509 # Have to skip certain entries specially. 510 i = 1 511 while i < count: 512 c, s = self._decode_const(s) 513 l.append(c) 514 # Add a blank entry after "large" entries. 515 if isinstance(c, LargeNumInfo): 516 l.append(None) 517 i += 1 518 i += 1 519 return l, s 520 521 def _get_items_from_table(self, cls, number, s): 522 l = [] 523 for i in range(0, number): 524 f = cls() 525 s = f.init(s, self) 526 l.append(f) 527 return l, s 528 529 def _get_methods_from_table(self, number, s): 530 return self._get_items_from_table(MethodInfo, number, s) 531 532 def _get_fields_from_table(self, number, s): 533 return self._get_items_from_table(FieldInfo, number, s) 534 535 def _get_attribute_from_table(self, s): 536 attribute_name_index = u2(s[0:2]) 537 constant_name = self.constants[attribute_name_index - 1].bytes 538 if constant_name == "SourceFile": 539 attribute = SourceFileAttributeInfo() 540 elif constant_name == "ConstantValue": 541 attribute = ConstantValueAttributeInfo() 542 elif constant_name == "Code": 543 attribute = CodeAttributeInfo() 544 elif constant_name == "Exceptions": 545 attribute = ExceptionsAttributeInfo() 546 elif constant_name == "InnerClasses": 547 attribute = InnerClassesAttributeInfo() 548 elif constant_name == "Synthetic": 549 attribute = SyntheticAttributeInfo() 550 elif constant_name == "LineNumberTable": 551 attribute = LineNumberAttributeInfo() 552 elif constant_name == "LocalVariableTable": 553 attribute = LocalVariableAttributeInfo() 554 elif constant_name == "Deprecated": 555 attribute = DeprecatedAttributeInfo() 556 else: 557 raise UnknownAttribute, constant_name 558 s = attribute.init(s[2:], self) 559 return attribute, s 560 561 def _get_attributes_from_table(self, number, s): 562 attributes = [] 563 for i in range(0, number): 564 attribute, s = self._get_attribute_from_table(s) 565 attributes.append(attribute) 566 return attributes, s 567 568 def _get_constants(self, s): 569 count = u2(s[0:2]) 570 return self._get_constants_from_table(count, s[2:]) 571 572 def _get_access_flags(self, s): 573 return u2(s[0:2]), s[2:] 574 575 def _get_this_class(self, s): 576 index = u2(s[0:2]) 577 return self.constants[index - 1], s[2:] 578 579 _get_super_class = _get_this_class 580 581 def _get_interfaces(self, s): 582 interfaces = [] 583 number = u2(s[0:2]) 584 s = s[2:] 585 for i in range(0, number): 586 index = u2(s[0:2]) 587 interfaces.append(self.constants[index - 1]) 588 s = s[2:] 589 return interfaces, s 590 591 def _get_fields(self, s): 592 number = u2(s[0:2]) 593 return self._get_fields_from_table(number, s[2:]) 594 595 def _get_attributes(self, s): 596 number = u2(s[0:2]) 597 return self._get_attributes_from_table(number, s[2:]) 598 599 def _get_methods(self, s): 600 number = u2(s[0:2]) 601 return self._get_methods_from_table(number, s[2:]) 602 603 if __name__ == "__main__": 604 import sys 605 f = open(sys.argv[1]) 606 c = ClassFile(f.read()) 607 608 # vim: tabstop=4 expandtab shiftwidth=4