1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def u4(data): 19 return struct.unpack(">L", data[0:4])[0] 20 21 def s4(data): 22 return struct.unpack(">l", data[0:4])[0] 23 24 def s8(data): 25 return struct.unpack(">q", data[0:8])[0] 26 27 def f4(data): 28 return struct.unpack(">f", data[0:4])[0] 29 30 def f8(data): 31 return struct.unpack(">d", data[0:8])[0] 32 33 descriptor_base_type_mapping = { 34 "B" : "int", 35 "C" : "str", 36 "D" : "float", 37 "F" : "float", 38 "I" : "int", 39 "J" : "int", 40 "L" : "object", 41 "S" : "int", 42 "Z" : "bool", 43 "[" : "list" 44 } 45 46 # Useful mix-ins. 47 48 class PythonMethodUtils: 49 def get_python_name(self): 50 name = self.get_name() 51 if str(name) == "<init>": 52 name = "__init__" 53 else: 54 name = str(name) 55 return name + "$" + self._get_descriptor_as_name() 56 57 def _get_descriptor_as_name(self): 58 l = [] 59 for descriptor_type in self.get_descriptor()[0]: 60 l.append(self._get_type_as_name(descriptor_type)) 61 return "$".join(l) 62 63 def _get_type_as_name(self, descriptor_type, s=""): 64 base_type, object_type, array_type = descriptor_type 65 if base_type == "L": 66 return object_type + s 67 elif base_type == "[": 68 return self._get_type_as_name(array_type, s + "[]") 69 else: 70 return "<" + base_type + ">" + s 71 72 class PythonNameUtils: 73 def get_python_name(self): 74 return self.get_name() 75 76 class NameUtils: 77 def get_name(self): 78 if self.name_index != 0: 79 return self.class_file.constants[self.name_index - 1] 80 else: 81 # Some name indexes are zero to indicate special conditions. 82 return None 83 84 class NameAndTypeUtils: 85 def get_name(self): 86 if self.name_and_type_index != 0: 87 return self.class_file.constants[self.name_and_type_index - 1].get_name() 88 else: 89 # Some name indexes are zero to indicate special conditions. 90 return None 91 92 def get_field_descriptor(self): 93 if self.name_and_type_index != 0: 94 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 95 else: 96 # Some name indexes are zero to indicate special conditions. 97 return None 98 99 def get_method_descriptor(self): 100 if self.name_and_type_index != 0: 101 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 102 else: 103 # Some name indexes are zero to indicate special conditions. 104 return None 105 106 class DescriptorUtils: 107 108 "Symbol parsing." 109 110 def _get_method_descriptor(self, s): 111 assert s[0] == "(" 112 params = [] 113 s = s[1:] 114 while s[0] != ")": 115 parameter_descriptor, s = self._get_parameter_descriptor(s) 116 params.append(parameter_descriptor) 117 if s[1] != "V": 118 return_type, s = self._get_field_type(s[1:]) 119 else: 120 return_type, s = None, s[1:] 121 return params, return_type 122 123 def _get_parameter_descriptor(self, s): 124 return self._get_field_type(s) 125 126 def _get_field_descriptor(self, s): 127 return self._get_field_type(s) 128 129 def _get_component_type(self, s): 130 return self._get_field_type(s) 131 132 def _get_field_type(self, s): 133 base_type, s = self._get_base_type(s) 134 object_type = None 135 array_type = None 136 if base_type == "L": 137 object_type, s = self._get_object_type(s) 138 elif base_type == "[": 139 array_type, s = self._get_array_type(s) 140 return (base_type, object_type, array_type), s 141 142 def _get_base_type(self, s): 143 if len(s) > 0: 144 return s[0], s[1:] 145 else: 146 return None, s 147 148 def _get_object_type(self, s): 149 if len(s) > 0: 150 s_end = s.find(";") 151 assert s_end != -1 152 return s[:s_end], s[s_end+1:] 153 else: 154 return None, s 155 156 def _get_array_type(self, s): 157 if len(s) > 0: 158 return self._get_component_type(s) 159 else: 160 return None, s 161 162 # Constant information. 163 # Objects of these classes are not directly aware of the class they reside in. 164 165 class ClassInfo(NameUtils, PythonNameUtils): 166 def init(self, data, class_file): 167 self.class_file = class_file 168 self.name_index = u2(data[0:2]) 169 return data[2:] 170 171 class RefInfo(NameAndTypeUtils): 172 def init(self, data, class_file): 173 self.class_file = class_file 174 self.class_index = u2(data[0:2]) 175 self.name_and_type_index = u2(data[2:4]) 176 return data[4:] 177 178 class FieldRefInfo(RefInfo, PythonNameUtils): 179 def get_descriptor(self): 180 return RefInfo.get_field_descriptor(self) 181 182 class MethodRefInfo(RefInfo, PythonMethodUtils): 183 def get_descriptor(self): 184 return RefInfo.get_method_descriptor(self) 185 186 class InterfaceMethodRefInfo(MethodRefInfo): 187 pass 188 189 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils): 190 def init(self, data, class_file): 191 self.class_file = class_file 192 self.name_index = u2(data[0:2]) 193 self.descriptor_index = u2(data[2:4]) 194 return data[4:] 195 196 def get_field_descriptor(self): 197 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 198 199 def get_method_descriptor(self): 200 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 201 202 class Utf8Info: 203 def init(self, data, class_file): 204 self.class_file = class_file 205 self.length = u2(data[0:2]) 206 self.bytes = data[2:2+self.length] 207 return data[2+self.length:] 208 209 def __str__(self): 210 return self.bytes 211 212 def __unicode__(self): 213 return unicode(self.bytes, "utf-8") 214 215 class StringInfo: 216 def init(self, data, class_file): 217 self.class_file = class_file 218 self.string_index = u2(data[0:2]) 219 return data[2:] 220 221 class SmallNumInfo: 222 def init(self, data, class_file): 223 self.class_file = class_file 224 self.bytes = data[0:4] 225 return data[4:] 226 227 class IntegerInfo(SmallNumInfo): 228 def get_value(self): 229 return s4(self.bytes) 230 231 class FloatInfo(SmallNumInfo): 232 def get_value(self): 233 return f4(self.bytes) 234 235 class LargeNumInfo: 236 def init(self, data, class_file): 237 self.class_file = class_file 238 self.high_bytes = u4(data[0:4]) 239 self.low_bytes = u4(data[4:8]) 240 return data[8:] 241 242 class LongInfo(LargeNumInfo): 243 def get_value(self): 244 return s8(self.high_bytes + self.low_bytes) 245 246 class DoubleInfo(LargeNumInfo): 247 def get_value(self): 248 return f8(self.high_bytes + self.low_bytes) 249 250 # Other information. 251 # Objects of these classes are generally aware of the class they reside in. 252 253 class ItemInfo(NameUtils, DescriptorUtils): 254 def init(self, data, class_file): 255 self.class_file = class_file 256 self.access_flags = u2(data[0:2]) 257 self.name_index = u2(data[2:4]) 258 self.descriptor_index = u2(data[4:6]) 259 self.attributes, data = self.class_file._get_attributes(data[6:]) 260 return data 261 262 class FieldInfo(ItemInfo, PythonNameUtils): 263 def get_descriptor(self): 264 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 265 266 class MethodInfo(ItemInfo, PythonMethodUtils): 267 def get_descriptor(self): 268 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 269 270 class AttributeInfo: 271 def init(self, data, class_file): 272 self.attribute_length = u4(data[0:4]) 273 self.info = data[4:4+self.attribute_length] 274 return data[4+self.attribute_length:] 275 276 # NOTE: Decode the different attribute formats. 277 278 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 279 def init(self, data, class_file): 280 self.class_file = class_file 281 self.attribute_length = u4(data[0:4]) 282 # Permit the NameUtils mix-in. 283 self.name_index = self.sourcefile_index = u2(data[4:6]) 284 285 class ConstantValueAttributeInfo(AttributeInfo): 286 def init(self, data, class_file): 287 self.class_file = class_file 288 self.attribute_length = u4(data[0:4]) 289 self.constant_value_index = u2(data[4:6]) 290 assert 4+self.attribute_length == 6 291 return data[4+self.attribute_length:] 292 293 def get_value(self): 294 return self.class_file.constants[self.constant_value_index - 1].get_value() 295 296 class CodeAttributeInfo(AttributeInfo): 297 def init(self, data, class_file): 298 self.class_file = class_file 299 self.attribute_length = u4(data[0:4]) 300 self.max_stack = u2(data[4:6]) 301 self.max_locals = u2(data[6:8]) 302 self.code_length = u4(data[8:12]) 303 end_of_code = 12+self.code_length 304 self.code = data[12:end_of_code] 305 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 306 self.exception_table = [] 307 data = data[end_of_code + 2:] 308 for i in range(0, self.exception_table_length): 309 exception = ExceptionInfo() 310 data = exception.init(data) 311 self.exception_table.append(exception) 312 self.attributes, data = self.class_file._get_attributes(data) 313 return data 314 315 class ExceptionsAttributeInfo(AttributeInfo): 316 def init(self, data, class_file): 317 self.class_file = class_file 318 self.attribute_length = u4(data[0:4]) 319 self.number_of_exceptions = u2(data[4:6]) 320 self.exception_index_table = [] 321 index = 6 322 for i in range(0, self.number_of_exceptions): 323 self.exception_index_table.append(u2(data[index:index+2])) 324 index += 2 325 return data[index:] 326 327 def get_exception(self, i): 328 exception_index = self.exception_index_table[i] 329 return self.class_file.constants[exception_index - 1] 330 331 class InnerClassesAttributeInfo(AttributeInfo): 332 def init(self, data, class_file): 333 self.class_file = class_file 334 self.attribute_length = u4(data[0:4]) 335 self.number_of_classes = u2(data[4:6]) 336 self.classes = [] 337 data = data[6:] 338 for i in range(0, self.number_of_classes): 339 inner_class = InnerClassInfo() 340 data = inner_class.init(data, self.class_file) 341 self.classes.append(inner_class) 342 return data 343 344 class SyntheticAttributeInfo(AttributeInfo): 345 pass 346 347 class LineNumberAttributeInfo(AttributeInfo): 348 def init(self, data, class_file): 349 self.class_file = class_file 350 self.attribute_length = u4(data[0:4]) 351 self.line_number_table_length = u2(data[4:6]) 352 self.line_number_table = [] 353 data = data[6:] 354 for i in range(0, self.line_number_table_length): 355 line_number = LineNumberInfo() 356 data = line_number.init(data) 357 self.line_number_table.append(line_number) 358 return data 359 360 class LocalVariableAttributeInfo(AttributeInfo): 361 def init(self, data, class_file): 362 self.class_file = class_file 363 self.attribute_length = u4(data[0:4]) 364 self.local_variable_table_length = u2(data[4:6]) 365 self.local_variable_table = [] 366 data = data[6:] 367 for i in range(0, self.local_variable_table_length): 368 local_variable = LocalVariableInfo() 369 data = local_variable.init(data) 370 self.local_variable_table.append(local_variable) 371 return data 372 373 class DeprecatedAttributeInfo(AttributeInfo): 374 pass 375 376 # Child classes of the attribute information classes. 377 378 class ExceptionInfo: 379 def init(self, data): 380 self.start_pc = u2(data[0:2]) 381 self.end_pc = u2(data[2:4]) 382 self.handler_pc = u2(data[4:6]) 383 self.catch_type = u2(data[6:8]) 384 return data[8:] 385 386 class InnerClassInfo(NameUtils): 387 def init(self, data, class_file): 388 self.class_file = class_file 389 self.inner_class_info_index = u2(data[0:2]) 390 self.outer_class_info_index = u2(data[2:4]) 391 # Permit the NameUtils mix-in. 392 self.name_index = self.inner_name_index = u2(data[4:6]) 393 self.inner_class_access_flags = u2(data[6:8]) 394 return data[8:] 395 396 class LineNumberInfo: 397 def init(self, data): 398 self.start_pc = u2(data[0:2]) 399 self.line_number = u2(data[2:4]) 400 return data[4:] 401 402 class LocalVariableInfo(NameUtils, PythonNameUtils): 403 def init(self, data, class_file): 404 self.class_file = class_file 405 self.start_pc = u2(data[0:2]) 406 self.length = u2(data[2:4]) 407 self.name_index = u2(data[4:6]) 408 self.descriptor_index = u2(data[6:8]) 409 self.index = u2(data[8:10]) 410 return data[10:] 411 412 def get_descriptor(self): 413 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 414 415 # Exceptions. 416 417 class UnknownTag(Exception): 418 pass 419 420 class UnknownAttribute(Exception): 421 pass 422 423 # Abstractions for the main structures. 424 425 class ClassFile: 426 427 "A class representing a Java class file." 428 429 def __init__(self, s): 430 431 """ 432 Process the given string 's', populating the object with the class 433 file's details. 434 """ 435 436 self.constants, s = self._get_constants(s[8:]) 437 self.access_flags, s = self._get_access_flags(s) 438 self.this_class, s = self._get_this_class(s) 439 self.super_class, s = self._get_super_class(s) 440 self.interfaces, s = self._get_interfaces(s) 441 self.fields, s = self._get_fields(s) 442 self.methods, s = self._get_methods(s) 443 self.attributes, s = self._get_attributes(s) 444 445 def _decode_const(self, s): 446 tag = u1(s[0:1]) 447 if tag == 1: 448 const = Utf8Info() 449 elif tag == 3: 450 const = IntegerInfo() 451 elif tag == 4: 452 const = FloatInfo() 453 elif tag == 5: 454 const = LongInfo() 455 elif tag == 6: 456 const = DoubleInfo() 457 elif tag == 7: 458 const = ClassInfo() 459 elif tag == 8: 460 const = StringInfo() 461 elif tag == 9: 462 const = FieldRefInfo() 463 elif tag == 10: 464 const = MethodRefInfo() 465 elif tag == 11: 466 const = InterfaceMethodRefInfo() 467 elif tag == 12: 468 const = NameAndTypeInfo() 469 else: 470 raise UnknownTag, tag 471 472 # Initialise the constant object. 473 474 s = const.init(s[1:], self) 475 return const, s 476 477 def _get_constants_from_table(self, count, s): 478 l = [] 479 # Have to skip certain entries specially. 480 i = 1 481 while i < count: 482 c, s = self._decode_const(s) 483 l.append(c) 484 # Add a blank entry after "large" entries. 485 if isinstance(c, LargeNumInfo): 486 l.append(None) 487 i += 1 488 i += 1 489 return l, s 490 491 def _get_items_from_table(self, cls, number, s): 492 l = [] 493 for i in range(0, number): 494 f = cls() 495 s = f.init(s, self) 496 l.append(f) 497 return l, s 498 499 def _get_methods_from_table(self, number, s): 500 return self._get_items_from_table(MethodInfo, number, s) 501 502 def _get_fields_from_table(self, number, s): 503 return self._get_items_from_table(FieldInfo, number, s) 504 505 def _get_attribute_from_table(self, s): 506 attribute_name_index = u2(s[0:2]) 507 constant_name = self.constants[attribute_name_index - 1].bytes 508 if constant_name == "SourceFile": 509 attribute = SourceFileAttributeInfo() 510 elif constant_name == "ConstantValue": 511 attribute = ConstantValueAttributeInfo() 512 elif constant_name == "Code": 513 attribute = CodeAttributeInfo() 514 elif constant_name == "Exceptions": 515 attribute = ExceptionsAttributeInfo() 516 elif constant_name == "InnerClasses": 517 attribute = InnerClassesAttributeInfo() 518 elif constant_name == "Synthetic": 519 attribute = SyntheticAttributeInfo() 520 elif constant_name == "LineNumberTable": 521 attribute = LineNumberAttributeInfo() 522 elif constant_name == "LocalVariableTable": 523 attribute = LocalVariableAttributeInfo() 524 elif constant_name == "Deprecated": 525 attribute = DeprecatedAttributeInfo() 526 else: 527 raise UnknownAttribute, constant_name 528 s = attribute.init(s[2:], self) 529 return attribute, s 530 531 def _get_attributes_from_table(self, number, s): 532 attributes = [] 533 for i in range(0, number): 534 attribute, s = self._get_attribute_from_table(s) 535 attributes.append(attribute) 536 return attributes, s 537 538 def _get_constants(self, s): 539 count = u2(s[0:2]) 540 return self._get_constants_from_table(count, s[2:]) 541 542 def _get_access_flags(self, s): 543 return u2(s[0:2]), s[2:] 544 545 def _get_this_class(self, s): 546 index = u2(s[0:2]) 547 return self.constants[index - 1], s[2:] 548 549 _get_super_class = _get_this_class 550 551 def _get_interfaces(self, s): 552 interfaces = [] 553 number = u2(s[0:2]) 554 s = s[2:] 555 for i in range(0, number): 556 index = u2(s[0:2]) 557 interfaces.append(self.constants[index - 1]) 558 s = s[2:] 559 return interfaces, s 560 561 def _get_fields(self, s): 562 number = u2(s[0:2]) 563 return self._get_fields_from_table(number, s[2:]) 564 565 def _get_attributes(self, s): 566 number = u2(s[0:2]) 567 return self._get_attributes_from_table(number, s[2:]) 568 569 def _get_methods(self, s): 570 number = u2(s[0:2]) 571 return self._get_methods_from_table(number, s[2:]) 572 573 if __name__ == "__main__": 574 import sys 575 f = open(sys.argv[1]) 576 c = ClassFile(f.read()) 577 578 # vim: tabstop=4 expandtab shiftwidth=4