1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def u4(data): 19 return struct.unpack(">L", data[0:4])[0] 20 21 def s4(data): 22 return struct.unpack(">l", data[0:4])[0] 23 24 def s8(data): 25 return struct.unpack(">q", data[0:8])[0] 26 27 def f4(data): 28 return struct.unpack(">f", data[0:4])[0] 29 30 def f8(data): 31 return struct.unpack(">d", data[0:8])[0] 32 33 # Useful tables and constants. 34 35 descriptor_base_type_mapping = { 36 "B" : "int", 37 "C" : "str", 38 "D" : "float", 39 "F" : "float", 40 "I" : "int", 41 "J" : "int", 42 "L" : "object", 43 "S" : "int", 44 "Z" : "bool", 45 "[" : "list" 46 } 47 48 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 49 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 50 51 def has_flags(flags, desired): 52 desired_flags = reduce(lambda a, b: a | b, desired, 0) 53 return (flags & desired_flags) == desired_flags 54 55 # Useful mix-ins. 56 57 class PythonMethodUtils: 58 def get_python_name(self): 59 name = self.get_name() 60 if str(name) == "<init>": 61 name = "__init__" 62 elif str(name) == "<clinit>": 63 return "__clinit__" 64 else: 65 name = str(name) 66 return name + "$" + self._get_descriptor_as_name() 67 68 def _get_descriptor_as_name(self): 69 l = [] 70 for descriptor_type in self.get_descriptor()[0]: 71 l.append(self._get_type_as_name(descriptor_type)) 72 return "$".join(l) 73 74 def _get_type_as_name(self, descriptor_type, s=""): 75 base_type, object_type, array_type = descriptor_type 76 if base_type == "L": 77 return object_type + s 78 elif base_type == "[": 79 return self._get_type_as_name(array_type, s + "[]") 80 else: 81 return "<" + base_type + ">" + s 82 83 class PythonNameUtils: 84 def get_python_name(self): 85 return str(self.get_name()).replace("/", ".") 86 87 class NameUtils: 88 def get_name(self): 89 if self.name_index != 0: 90 return self.class_file.constants[self.name_index - 1] 91 else: 92 # Some name indexes are zero to indicate special conditions. 93 return None 94 95 class NameAndTypeUtils: 96 def get_name(self): 97 if self.name_and_type_index != 0: 98 return self.class_file.constants[self.name_and_type_index - 1].get_name() 99 else: 100 # Some name indexes are zero to indicate special conditions. 101 return None 102 103 def get_field_descriptor(self): 104 if self.name_and_type_index != 0: 105 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 106 else: 107 # Some name indexes are zero to indicate special conditions. 108 return None 109 110 def get_method_descriptor(self): 111 if self.name_and_type_index != 0: 112 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 113 else: 114 # Some name indexes are zero to indicate special conditions. 115 return None 116 117 class DescriptorUtils: 118 119 "Symbol parsing." 120 121 def _get_method_descriptor(self, s): 122 assert s[0] == "(" 123 params = [] 124 s = s[1:] 125 while s[0] != ")": 126 parameter_descriptor, s = self._get_parameter_descriptor(s) 127 params.append(parameter_descriptor) 128 if s[1] != "V": 129 return_type, s = self._get_field_type(s[1:]) 130 else: 131 return_type, s = None, s[1:] 132 return params, return_type 133 134 def _get_parameter_descriptor(self, s): 135 return self._get_field_type(s) 136 137 def _get_field_descriptor(self, s): 138 return self._get_field_type(s) 139 140 def _get_component_type(self, s): 141 return self._get_field_type(s) 142 143 def _get_field_type(self, s): 144 base_type, s = self._get_base_type(s) 145 object_type = None 146 array_type = None 147 if base_type == "L": 148 object_type, s = self._get_object_type(s) 149 elif base_type == "[": 150 array_type, s = self._get_array_type(s) 151 return (base_type, object_type, array_type), s 152 153 def _get_base_type(self, s): 154 if len(s) > 0: 155 return s[0], s[1:] 156 else: 157 return None, s 158 159 def _get_object_type(self, s): 160 if len(s) > 0: 161 s_end = s.find(";") 162 assert s_end != -1 163 return s[:s_end], s[s_end+1:] 164 else: 165 return None, s 166 167 def _get_array_type(self, s): 168 if len(s) > 0: 169 return self._get_component_type(s) 170 else: 171 return None, s 172 173 # Constant information. 174 # Objects of these classes are not directly aware of the class they reside in. 175 176 class ClassInfo(NameUtils, PythonNameUtils): 177 def init(self, data, class_file): 178 self.class_file = class_file 179 self.name_index = u2(data[0:2]) 180 return data[2:] 181 182 class RefInfo(NameAndTypeUtils): 183 def init(self, data, class_file): 184 self.class_file = class_file 185 self.class_index = u2(data[0:2]) 186 self.name_and_type_index = u2(data[2:4]) 187 return data[4:] 188 189 class FieldRefInfo(RefInfo, PythonNameUtils): 190 def get_descriptor(self): 191 return RefInfo.get_field_descriptor(self) 192 193 class MethodRefInfo(RefInfo, PythonMethodUtils): 194 def get_descriptor(self): 195 return RefInfo.get_method_descriptor(self) 196 197 class InterfaceMethodRefInfo(MethodRefInfo): 198 pass 199 200 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils): 201 def init(self, data, class_file): 202 self.class_file = class_file 203 self.name_index = u2(data[0:2]) 204 self.descriptor_index = u2(data[2:4]) 205 return data[4:] 206 207 def get_field_descriptor(self): 208 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 209 210 def get_method_descriptor(self): 211 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 212 213 class Utf8Info: 214 def init(self, data, class_file): 215 self.class_file = class_file 216 self.length = u2(data[0:2]) 217 self.bytes = data[2:2+self.length] 218 return data[2+self.length:] 219 220 def __str__(self): 221 return self.bytes 222 223 def __unicode__(self): 224 return unicode(self.bytes, "utf-8") 225 226 class StringInfo: 227 def init(self, data, class_file): 228 self.class_file = class_file 229 self.string_index = u2(data[0:2]) 230 return data[2:] 231 232 class SmallNumInfo: 233 def init(self, data, class_file): 234 self.class_file = class_file 235 self.bytes = data[0:4] 236 return data[4:] 237 238 class IntegerInfo(SmallNumInfo): 239 def get_value(self): 240 return s4(self.bytes) 241 242 class FloatInfo(SmallNumInfo): 243 def get_value(self): 244 return f4(self.bytes) 245 246 class LargeNumInfo: 247 def init(self, data, class_file): 248 self.class_file = class_file 249 self.high_bytes = u4(data[0:4]) 250 self.low_bytes = u4(data[4:8]) 251 return data[8:] 252 253 class LongInfo(LargeNumInfo): 254 def get_value(self): 255 return s8(self.high_bytes + self.low_bytes) 256 257 class DoubleInfo(LargeNumInfo): 258 def get_value(self): 259 return f8(self.high_bytes + self.low_bytes) 260 261 # Other information. 262 # Objects of these classes are generally aware of the class they reside in. 263 264 class ItemInfo(NameUtils, DescriptorUtils): 265 def init(self, data, class_file): 266 self.class_file = class_file 267 self.access_flags = u2(data[0:2]) 268 self.name_index = u2(data[2:4]) 269 self.descriptor_index = u2(data[4:6]) 270 self.attributes, data = self.class_file._get_attributes(data[6:]) 271 return data 272 273 class FieldInfo(ItemInfo, PythonNameUtils): 274 def get_descriptor(self): 275 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 276 277 class MethodInfo(ItemInfo, PythonMethodUtils): 278 def get_descriptor(self): 279 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 280 281 class AttributeInfo: 282 def init(self, data, class_file): 283 self.attribute_length = u4(data[0:4]) 284 self.info = data[4:4+self.attribute_length] 285 return data[4+self.attribute_length:] 286 287 # NOTE: Decode the different attribute formats. 288 289 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 290 def init(self, data, class_file): 291 self.class_file = class_file 292 self.attribute_length = u4(data[0:4]) 293 # Permit the NameUtils mix-in. 294 self.name_index = self.sourcefile_index = u2(data[4:6]) 295 296 class ConstantValueAttributeInfo(AttributeInfo): 297 def init(self, data, class_file): 298 self.class_file = class_file 299 self.attribute_length = u4(data[0:4]) 300 self.constant_value_index = u2(data[4:6]) 301 assert 4+self.attribute_length == 6 302 return data[4+self.attribute_length:] 303 304 def get_value(self): 305 return self.class_file.constants[self.constant_value_index - 1].get_value() 306 307 class CodeAttributeInfo(AttributeInfo): 308 def init(self, data, class_file): 309 self.class_file = class_file 310 self.attribute_length = u4(data[0:4]) 311 self.max_stack = u2(data[4:6]) 312 self.max_locals = u2(data[6:8]) 313 self.code_length = u4(data[8:12]) 314 end_of_code = 12+self.code_length 315 self.code = data[12:end_of_code] 316 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 317 self.exception_table = [] 318 data = data[end_of_code + 2:] 319 for i in range(0, self.exception_table_length): 320 exception = ExceptionInfo() 321 data = exception.init(data) 322 self.exception_table.append(exception) 323 self.attributes, data = self.class_file._get_attributes(data) 324 return data 325 326 class ExceptionsAttributeInfo(AttributeInfo): 327 def init(self, data, class_file): 328 self.class_file = class_file 329 self.attribute_length = u4(data[0:4]) 330 self.number_of_exceptions = u2(data[4:6]) 331 self.exception_index_table = [] 332 index = 6 333 for i in range(0, self.number_of_exceptions): 334 self.exception_index_table.append(u2(data[index:index+2])) 335 index += 2 336 return data[index:] 337 338 def get_exception(self, i): 339 exception_index = self.exception_index_table[i] 340 return self.class_file.constants[exception_index - 1] 341 342 class InnerClassesAttributeInfo(AttributeInfo): 343 def init(self, data, class_file): 344 self.class_file = class_file 345 self.attribute_length = u4(data[0:4]) 346 self.number_of_classes = u2(data[4:6]) 347 self.classes = [] 348 data = data[6:] 349 for i in range(0, self.number_of_classes): 350 inner_class = InnerClassInfo() 351 data = inner_class.init(data, self.class_file) 352 self.classes.append(inner_class) 353 return data 354 355 class SyntheticAttributeInfo(AttributeInfo): 356 pass 357 358 class LineNumberAttributeInfo(AttributeInfo): 359 def init(self, data, class_file): 360 self.class_file = class_file 361 self.attribute_length = u4(data[0:4]) 362 self.line_number_table_length = u2(data[4:6]) 363 self.line_number_table = [] 364 data = data[6:] 365 for i in range(0, self.line_number_table_length): 366 line_number = LineNumberInfo() 367 data = line_number.init(data) 368 self.line_number_table.append(line_number) 369 return data 370 371 class LocalVariableAttributeInfo(AttributeInfo): 372 def init(self, data, class_file): 373 self.class_file = class_file 374 self.attribute_length = u4(data[0:4]) 375 self.local_variable_table_length = u2(data[4:6]) 376 self.local_variable_table = [] 377 data = data[6:] 378 for i in range(0, self.local_variable_table_length): 379 local_variable = LocalVariableInfo() 380 data = local_variable.init(data, self.class_file) 381 self.local_variable_table.append(local_variable) 382 return data 383 384 class DeprecatedAttributeInfo(AttributeInfo): 385 pass 386 387 # Child classes of the attribute information classes. 388 389 class ExceptionInfo: 390 def init(self, data): 391 self.start_pc = u2(data[0:2]) 392 self.end_pc = u2(data[2:4]) 393 self.handler_pc = u2(data[4:6]) 394 self.catch_type = u2(data[6:8]) 395 return data[8:] 396 397 class InnerClassInfo(NameUtils): 398 def init(self, data, class_file): 399 self.class_file = class_file 400 self.inner_class_info_index = u2(data[0:2]) 401 self.outer_class_info_index = u2(data[2:4]) 402 # Permit the NameUtils mix-in. 403 self.name_index = self.inner_name_index = u2(data[4:6]) 404 self.inner_class_access_flags = u2(data[6:8]) 405 return data[8:] 406 407 class LineNumberInfo: 408 def init(self, data): 409 self.start_pc = u2(data[0:2]) 410 self.line_number = u2(data[2:4]) 411 return data[4:] 412 413 class LocalVariableInfo(NameUtils, PythonNameUtils): 414 def init(self, data, class_file): 415 self.class_file = class_file 416 self.start_pc = u2(data[0:2]) 417 self.length = u2(data[2:4]) 418 self.name_index = u2(data[4:6]) 419 self.descriptor_index = u2(data[6:8]) 420 self.index = u2(data[8:10]) 421 return data[10:] 422 423 def get_descriptor(self): 424 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 425 426 # Exceptions. 427 428 class UnknownTag(Exception): 429 pass 430 431 class UnknownAttribute(Exception): 432 pass 433 434 # Abstractions for the main structures. 435 436 class ClassFile: 437 438 "A class representing a Java class file." 439 440 def __init__(self, s): 441 442 """ 443 Process the given string 's', populating the object with the class 444 file's details. 445 """ 446 447 self.constants, s = self._get_constants(s[8:]) 448 self.access_flags, s = self._get_access_flags(s) 449 self.this_class, s = self._get_this_class(s) 450 self.super_class, s = self._get_super_class(s) 451 self.interfaces, s = self._get_interfaces(s) 452 self.fields, s = self._get_fields(s) 453 self.methods, s = self._get_methods(s) 454 self.attributes, s = self._get_attributes(s) 455 456 def _decode_const(self, s): 457 tag = u1(s[0:1]) 458 if tag == 1: 459 const = Utf8Info() 460 elif tag == 3: 461 const = IntegerInfo() 462 elif tag == 4: 463 const = FloatInfo() 464 elif tag == 5: 465 const = LongInfo() 466 elif tag == 6: 467 const = DoubleInfo() 468 elif tag == 7: 469 const = ClassInfo() 470 elif tag == 8: 471 const = StringInfo() 472 elif tag == 9: 473 const = FieldRefInfo() 474 elif tag == 10: 475 const = MethodRefInfo() 476 elif tag == 11: 477 const = InterfaceMethodRefInfo() 478 elif tag == 12: 479 const = NameAndTypeInfo() 480 else: 481 raise UnknownTag, tag 482 483 # Initialise the constant object. 484 485 s = const.init(s[1:], self) 486 return const, s 487 488 def _get_constants_from_table(self, count, s): 489 l = [] 490 # Have to skip certain entries specially. 491 i = 1 492 while i < count: 493 c, s = self._decode_const(s) 494 l.append(c) 495 # Add a blank entry after "large" entries. 496 if isinstance(c, LargeNumInfo): 497 l.append(None) 498 i += 1 499 i += 1 500 return l, s 501 502 def _get_items_from_table(self, cls, number, s): 503 l = [] 504 for i in range(0, number): 505 f = cls() 506 s = f.init(s, self) 507 l.append(f) 508 return l, s 509 510 def _get_methods_from_table(self, number, s): 511 return self._get_items_from_table(MethodInfo, number, s) 512 513 def _get_fields_from_table(self, number, s): 514 return self._get_items_from_table(FieldInfo, number, s) 515 516 def _get_attribute_from_table(self, s): 517 attribute_name_index = u2(s[0:2]) 518 constant_name = self.constants[attribute_name_index - 1].bytes 519 if constant_name == "SourceFile": 520 attribute = SourceFileAttributeInfo() 521 elif constant_name == "ConstantValue": 522 attribute = ConstantValueAttributeInfo() 523 elif constant_name == "Code": 524 attribute = CodeAttributeInfo() 525 elif constant_name == "Exceptions": 526 attribute = ExceptionsAttributeInfo() 527 elif constant_name == "InnerClasses": 528 attribute = InnerClassesAttributeInfo() 529 elif constant_name == "Synthetic": 530 attribute = SyntheticAttributeInfo() 531 elif constant_name == "LineNumberTable": 532 attribute = LineNumberAttributeInfo() 533 elif constant_name == "LocalVariableTable": 534 attribute = LocalVariableAttributeInfo() 535 elif constant_name == "Deprecated": 536 attribute = DeprecatedAttributeInfo() 537 else: 538 raise UnknownAttribute, constant_name 539 s = attribute.init(s[2:], self) 540 return attribute, s 541 542 def _get_attributes_from_table(self, number, s): 543 attributes = [] 544 for i in range(0, number): 545 attribute, s = self._get_attribute_from_table(s) 546 attributes.append(attribute) 547 return attributes, s 548 549 def _get_constants(self, s): 550 count = u2(s[0:2]) 551 return self._get_constants_from_table(count, s[2:]) 552 553 def _get_access_flags(self, s): 554 return u2(s[0:2]), s[2:] 555 556 def _get_this_class(self, s): 557 index = u2(s[0:2]) 558 return self.constants[index - 1], s[2:] 559 560 _get_super_class = _get_this_class 561 562 def _get_interfaces(self, s): 563 interfaces = [] 564 number = u2(s[0:2]) 565 s = s[2:] 566 for i in range(0, number): 567 index = u2(s[0:2]) 568 interfaces.append(self.constants[index - 1]) 569 s = s[2:] 570 return interfaces, s 571 572 def _get_fields(self, s): 573 number = u2(s[0:2]) 574 return self._get_fields_from_table(number, s[2:]) 575 576 def _get_attributes(self, s): 577 number = u2(s[0:2]) 578 return self._get_attributes_from_table(number, s[2:]) 579 580 def _get_methods(self, s): 581 number = u2(s[0:2]) 582 return self._get_methods_from_table(number, s[2:]) 583 584 if __name__ == "__main__": 585 import sys 586 f = open(sys.argv[1]) 587 c = ClassFile(f.read()) 588 589 # vim: tabstop=4 expandtab shiftwidth=4