1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def s2(data): 19 return struct.unpack(">h", data[0:2])[0] 20 21 def u4(data): 22 return struct.unpack(">L", data[0:4])[0] 23 24 def s4(data): 25 return struct.unpack(">l", data[0:4])[0] 26 27 def s8(data): 28 return struct.unpack(">q", data[0:8])[0] 29 30 def f4(data): 31 return struct.unpack(">f", data[0:4])[0] 32 33 def f8(data): 34 return struct.unpack(">d", data[0:8])[0] 35 36 # Useful tables and constants. 37 38 descriptor_base_type_mapping = { 39 "B" : "int", 40 "C" : "str", 41 "D" : "float", 42 "F" : "float", 43 "I" : "int", 44 "J" : "int", 45 "L" : "object", 46 "S" : "int", 47 "Z" : "bool", 48 "[" : "list" 49 } 50 51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 52 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 53 54 def has_flags(flags, desired): 55 desired_flags = reduce(lambda a, b: a | b, desired, 0) 56 return (flags & desired_flags) == desired_flags 57 58 # Useful mix-ins. 59 60 class PythonMethodUtils: 61 def get_python_name(self): 62 name = self.get_name() 63 if str(name) == "<init>": 64 name = "__init__" 65 elif str(name) == "<clinit>": 66 return "__clinit__" 67 else: 68 name = str(name) 69 return name + "$" + self._get_descriptor_as_name() 70 71 def _get_descriptor_as_name(self): 72 l = [] 73 for descriptor_type in self.get_descriptor()[0]: 74 l.append(self._get_type_as_name(descriptor_type)) 75 return "$".join(l) 76 77 def _get_type_as_name(self, descriptor_type, s=""): 78 base_type, object_type, array_type = descriptor_type 79 if base_type == "L": 80 return object_type + s 81 elif base_type == "[": 82 return self._get_type_as_name(array_type, s + "[]") 83 else: 84 return "<" + base_type + ">" + s 85 86 class PythonNameUtils: 87 def get_python_name(self): 88 return str(self.get_name()).replace("/", ".") 89 90 class NameUtils: 91 def get_name(self): 92 if self.name_index != 0: 93 return self.class_file.constants[self.name_index - 1] 94 else: 95 # Some name indexes are zero to indicate special conditions. 96 return None 97 98 class NameAndTypeUtils: 99 def get_name(self): 100 if self.name_and_type_index != 0: 101 return self.class_file.constants[self.name_and_type_index - 1].get_name() 102 else: 103 # Some name indexes are zero to indicate special conditions. 104 return None 105 106 def get_field_descriptor(self): 107 if self.name_and_type_index != 0: 108 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 109 else: 110 # Some name indexes are zero to indicate special conditions. 111 return None 112 113 def get_method_descriptor(self): 114 if self.name_and_type_index != 0: 115 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 116 else: 117 # Some name indexes are zero to indicate special conditions. 118 return None 119 120 class DescriptorUtils: 121 122 "Symbol parsing." 123 124 def _get_method_descriptor(self, s): 125 assert s[0] == "(" 126 params = [] 127 s = s[1:] 128 while s[0] != ")": 129 parameter_descriptor, s = self._get_parameter_descriptor(s) 130 params.append(parameter_descriptor) 131 if s[1] != "V": 132 return_type, s = self._get_field_type(s[1:]) 133 else: 134 return_type, s = None, s[1:] 135 return params, return_type 136 137 def _get_parameter_descriptor(self, s): 138 return self._get_field_type(s) 139 140 def _get_field_descriptor(self, s): 141 return self._get_field_type(s) 142 143 def _get_component_type(self, s): 144 return self._get_field_type(s) 145 146 def _get_field_type(self, s): 147 base_type, s = self._get_base_type(s) 148 object_type = None 149 array_type = None 150 if base_type == "L": 151 object_type, s = self._get_object_type(s) 152 elif base_type == "[": 153 array_type, s = self._get_array_type(s) 154 return (base_type, object_type, array_type), s 155 156 def _get_base_type(self, s): 157 if len(s) > 0: 158 return s[0], s[1:] 159 else: 160 return None, s 161 162 def _get_object_type(self, s): 163 if len(s) > 0: 164 s_end = s.find(";") 165 assert s_end != -1 166 return s[:s_end], s[s_end+1:] 167 else: 168 return None, s 169 170 def _get_array_type(self, s): 171 if len(s) > 0: 172 return self._get_component_type(s) 173 else: 174 return None, s 175 176 # Constant information. 177 # Objects of these classes are not directly aware of the class they reside in. 178 179 class ClassInfo(NameUtils, PythonNameUtils): 180 def init(self, data, class_file): 181 self.class_file = class_file 182 self.name_index = u2(data[0:2]) 183 return data[2:] 184 185 class RefInfo(NameAndTypeUtils): 186 def init(self, data, class_file): 187 self.class_file = class_file 188 self.class_index = u2(data[0:2]) 189 self.name_and_type_index = u2(data[2:4]) 190 return data[4:] 191 192 class FieldRefInfo(RefInfo, PythonNameUtils): 193 def get_descriptor(self): 194 return RefInfo.get_field_descriptor(self) 195 196 class MethodRefInfo(RefInfo, PythonMethodUtils): 197 def get_descriptor(self): 198 return RefInfo.get_method_descriptor(self) 199 200 class InterfaceMethodRefInfo(MethodRefInfo): 201 pass 202 203 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils): 204 def init(self, data, class_file): 205 self.class_file = class_file 206 self.name_index = u2(data[0:2]) 207 self.descriptor_index = u2(data[2:4]) 208 return data[4:] 209 210 def get_field_descriptor(self): 211 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 212 213 def get_method_descriptor(self): 214 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 215 216 class Utf8Info: 217 def init(self, data, class_file): 218 self.class_file = class_file 219 self.length = u2(data[0:2]) 220 self.bytes = data[2:2+self.length] 221 return data[2+self.length:] 222 223 def __str__(self): 224 return self.bytes 225 226 def __unicode__(self): 227 return unicode(self.bytes, "utf-8") 228 229 def get_value(self): 230 return str(self) 231 232 class StringInfo: 233 def init(self, data, class_file): 234 self.class_file = class_file 235 self.string_index = u2(data[0:2]) 236 return data[2:] 237 238 def __str__(self): 239 return str(self.class_file.constants[self.string_index - 1]) 240 241 def __unicode__(self): 242 return unicode(self.class_file.constants[self.string_index - 1]) 243 244 def get_value(self): 245 return str(self) 246 247 class SmallNumInfo: 248 def init(self, data, class_file): 249 self.class_file = class_file 250 self.bytes = data[0:4] 251 return data[4:] 252 253 class IntegerInfo(SmallNumInfo): 254 def get_value(self): 255 return s4(self.bytes) 256 257 class FloatInfo(SmallNumInfo): 258 def get_value(self): 259 return f4(self.bytes) 260 261 class LargeNumInfo: 262 def init(self, data, class_file): 263 self.class_file = class_file 264 self.high_bytes = data[0:4] 265 self.low_bytes = data[4:8] 266 return data[8:] 267 268 class LongInfo(LargeNumInfo): 269 def get_value(self): 270 return s8(self.high_bytes + self.low_bytes) 271 272 class DoubleInfo(LargeNumInfo): 273 def get_value(self): 274 return f8(self.high_bytes + self.low_bytes) 275 276 # Other information. 277 # Objects of these classes are generally aware of the class they reside in. 278 279 class ItemInfo(NameUtils, DescriptorUtils): 280 def init(self, data, class_file): 281 self.class_file = class_file 282 self.access_flags = u2(data[0:2]) 283 self.name_index = u2(data[2:4]) 284 self.descriptor_index = u2(data[4:6]) 285 self.attributes, data = self.class_file._get_attributes(data[6:]) 286 return data 287 288 class FieldInfo(ItemInfo, PythonNameUtils): 289 def get_descriptor(self): 290 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 291 292 class MethodInfo(ItemInfo, PythonMethodUtils): 293 def get_descriptor(self): 294 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 295 296 class AttributeInfo: 297 def init(self, data, class_file): 298 self.attribute_length = u4(data[0:4]) 299 self.info = data[4:4+self.attribute_length] 300 return data[4+self.attribute_length:] 301 302 # NOTE: Decode the different attribute formats. 303 304 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 305 def init(self, data, class_file): 306 self.class_file = class_file 307 self.attribute_length = u4(data[0:4]) 308 # Permit the NameUtils mix-in. 309 self.name_index = self.sourcefile_index = u2(data[4:6]) 310 return data[6:] 311 312 class ConstantValueAttributeInfo(AttributeInfo): 313 def init(self, data, class_file): 314 self.class_file = class_file 315 self.attribute_length = u4(data[0:4]) 316 self.constant_value_index = u2(data[4:6]) 317 assert 4+self.attribute_length == 6 318 return data[4+self.attribute_length:] 319 320 def get_value(self): 321 return self.class_file.constants[self.constant_value_index - 1].get_value() 322 323 class CodeAttributeInfo(AttributeInfo): 324 def init(self, data, class_file): 325 self.class_file = class_file 326 self.attribute_length = u4(data[0:4]) 327 self.max_stack = u2(data[4:6]) 328 self.max_locals = u2(data[6:8]) 329 self.code_length = u4(data[8:12]) 330 end_of_code = 12+self.code_length 331 self.code = data[12:end_of_code] 332 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 333 self.exception_table = [] 334 data = data[end_of_code + 2:] 335 for i in range(0, self.exception_table_length): 336 exception = ExceptionInfo() 337 data = exception.init(data) 338 self.exception_table.append(exception) 339 self.attributes, data = self.class_file._get_attributes(data) 340 return data 341 342 class ExceptionsAttributeInfo(AttributeInfo): 343 def init(self, data, class_file): 344 self.class_file = class_file 345 self.attribute_length = u4(data[0:4]) 346 self.number_of_exceptions = u2(data[4:6]) 347 self.exception_index_table = [] 348 index = 6 349 for i in range(0, self.number_of_exceptions): 350 self.exception_index_table.append(u2(data[index:index+2])) 351 index += 2 352 return data[index:] 353 354 def get_exception(self, i): 355 exception_index = self.exception_index_table[i] 356 return self.class_file.constants[exception_index - 1] 357 358 class InnerClassesAttributeInfo(AttributeInfo): 359 def init(self, data, class_file): 360 self.class_file = class_file 361 self.attribute_length = u4(data[0:4]) 362 self.number_of_classes = u2(data[4:6]) 363 self.classes = [] 364 data = data[6:] 365 for i in range(0, self.number_of_classes): 366 inner_class = InnerClassInfo() 367 data = inner_class.init(data, self.class_file) 368 self.classes.append(inner_class) 369 return data 370 371 class SyntheticAttributeInfo(AttributeInfo): 372 pass 373 374 class LineNumberAttributeInfo(AttributeInfo): 375 def init(self, data, class_file): 376 self.class_file = class_file 377 self.attribute_length = u4(data[0:4]) 378 self.line_number_table_length = u2(data[4:6]) 379 self.line_number_table = [] 380 data = data[6:] 381 for i in range(0, self.line_number_table_length): 382 line_number = LineNumberInfo() 383 data = line_number.init(data) 384 self.line_number_table.append(line_number) 385 return data 386 387 class LocalVariableAttributeInfo(AttributeInfo): 388 def init(self, data, class_file): 389 self.class_file = class_file 390 self.attribute_length = u4(data[0:4]) 391 self.local_variable_table_length = u2(data[4:6]) 392 self.local_variable_table = [] 393 data = data[6:] 394 for i in range(0, self.local_variable_table_length): 395 local_variable = LocalVariableInfo() 396 data = local_variable.init(data, self.class_file) 397 self.local_variable_table.append(local_variable) 398 return data 399 400 class DeprecatedAttributeInfo(AttributeInfo): 401 pass 402 403 # Child classes of the attribute information classes. 404 405 class ExceptionInfo: 406 def init(self, data): 407 self.start_pc = u2(data[0:2]) 408 self.end_pc = u2(data[2:4]) 409 self.handler_pc = u2(data[4:6]) 410 self.catch_type = u2(data[6:8]) 411 return data[8:] 412 413 class InnerClassInfo(NameUtils): 414 def init(self, data, class_file): 415 self.class_file = class_file 416 self.inner_class_info_index = u2(data[0:2]) 417 self.outer_class_info_index = u2(data[2:4]) 418 # Permit the NameUtils mix-in. 419 self.name_index = self.inner_name_index = u2(data[4:6]) 420 self.inner_class_access_flags = u2(data[6:8]) 421 return data[8:] 422 423 class LineNumberInfo: 424 def init(self, data): 425 self.start_pc = u2(data[0:2]) 426 self.line_number = u2(data[2:4]) 427 return data[4:] 428 429 class LocalVariableInfo(NameUtils, PythonNameUtils): 430 def init(self, data, class_file): 431 self.class_file = class_file 432 self.start_pc = u2(data[0:2]) 433 self.length = u2(data[2:4]) 434 self.name_index = u2(data[4:6]) 435 self.descriptor_index = u2(data[6:8]) 436 self.index = u2(data[8:10]) 437 return data[10:] 438 439 def get_descriptor(self): 440 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 441 442 # Exceptions. 443 444 class UnknownTag(Exception): 445 pass 446 447 class UnknownAttribute(Exception): 448 pass 449 450 # Abstractions for the main structures. 451 452 class ClassFile: 453 454 "A class representing a Java class file." 455 456 def __init__(self, s): 457 458 """ 459 Process the given string 's', populating the object with the class 460 file's details. 461 """ 462 463 self.constants, s = self._get_constants(s[8:]) 464 self.access_flags, s = self._get_access_flags(s) 465 self.this_class, s = self._get_this_class(s) 466 self.super_class, s = self._get_super_class(s) 467 self.interfaces, s = self._get_interfaces(s) 468 self.fields, s = self._get_fields(s) 469 self.methods, s = self._get_methods(s) 470 self.attributes, s = self._get_attributes(s) 471 472 def _decode_const(self, s): 473 tag = u1(s[0:1]) 474 if tag == 1: 475 const = Utf8Info() 476 elif tag == 3: 477 const = IntegerInfo() 478 elif tag == 4: 479 const = FloatInfo() 480 elif tag == 5: 481 const = LongInfo() 482 elif tag == 6: 483 const = DoubleInfo() 484 elif tag == 7: 485 const = ClassInfo() 486 elif tag == 8: 487 const = StringInfo() 488 elif tag == 9: 489 const = FieldRefInfo() 490 elif tag == 10: 491 const = MethodRefInfo() 492 elif tag == 11: 493 const = InterfaceMethodRefInfo() 494 elif tag == 12: 495 const = NameAndTypeInfo() 496 else: 497 raise UnknownTag, tag 498 499 # Initialise the constant object. 500 501 s = const.init(s[1:], self) 502 return const, s 503 504 def _get_constants_from_table(self, count, s): 505 l = [] 506 # Have to skip certain entries specially. 507 i = 1 508 while i < count: 509 c, s = self._decode_const(s) 510 l.append(c) 511 # Add a blank entry after "large" entries. 512 if isinstance(c, LargeNumInfo): 513 l.append(None) 514 i += 1 515 i += 1 516 return l, s 517 518 def _get_items_from_table(self, cls, number, s): 519 l = [] 520 for i in range(0, number): 521 f = cls() 522 s = f.init(s, self) 523 l.append(f) 524 return l, s 525 526 def _get_methods_from_table(self, number, s): 527 return self._get_items_from_table(MethodInfo, number, s) 528 529 def _get_fields_from_table(self, number, s): 530 return self._get_items_from_table(FieldInfo, number, s) 531 532 def _get_attribute_from_table(self, s): 533 attribute_name_index = u2(s[0:2]) 534 constant_name = self.constants[attribute_name_index - 1].bytes 535 if constant_name == "SourceFile": 536 attribute = SourceFileAttributeInfo() 537 elif constant_name == "ConstantValue": 538 attribute = ConstantValueAttributeInfo() 539 elif constant_name == "Code": 540 attribute = CodeAttributeInfo() 541 elif constant_name == "Exceptions": 542 attribute = ExceptionsAttributeInfo() 543 elif constant_name == "InnerClasses": 544 attribute = InnerClassesAttributeInfo() 545 elif constant_name == "Synthetic": 546 attribute = SyntheticAttributeInfo() 547 elif constant_name == "LineNumberTable": 548 attribute = LineNumberAttributeInfo() 549 elif constant_name == "LocalVariableTable": 550 attribute = LocalVariableAttributeInfo() 551 elif constant_name == "Deprecated": 552 attribute = DeprecatedAttributeInfo() 553 else: 554 raise UnknownAttribute, constant_name 555 s = attribute.init(s[2:], self) 556 return attribute, s 557 558 def _get_attributes_from_table(self, number, s): 559 attributes = [] 560 for i in range(0, number): 561 attribute, s = self._get_attribute_from_table(s) 562 attributes.append(attribute) 563 return attributes, s 564 565 def _get_constants(self, s): 566 count = u2(s[0:2]) 567 return self._get_constants_from_table(count, s[2:]) 568 569 def _get_access_flags(self, s): 570 return u2(s[0:2]), s[2:] 571 572 def _get_this_class(self, s): 573 index = u2(s[0:2]) 574 return self.constants[index - 1], s[2:] 575 576 _get_super_class = _get_this_class 577 578 def _get_interfaces(self, s): 579 interfaces = [] 580 number = u2(s[0:2]) 581 s = s[2:] 582 for i in range(0, number): 583 index = u2(s[0:2]) 584 interfaces.append(self.constants[index - 1]) 585 s = s[2:] 586 return interfaces, s 587 588 def _get_fields(self, s): 589 number = u2(s[0:2]) 590 return self._get_fields_from_table(number, s[2:]) 591 592 def _get_attributes(self, s): 593 number = u2(s[0:2]) 594 return self._get_attributes_from_table(number, s[2:]) 595 596 def _get_methods(self, s): 597 number = u2(s[0:2]) 598 return self._get_methods_from_table(number, s[2:]) 599 600 if __name__ == "__main__": 601 import sys 602 f = open(sys.argv[1]) 603 c = ClassFile(f.read()) 604 605 # vim: tabstop=4 expandtab shiftwidth=4