1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def u4(data): 19 return struct.unpack(">L", data[0:4])[0] 20 21 def s4(data): 22 return struct.unpack(">l", data[0:4])[0] 23 24 def s8(data): 25 return struct.unpack(">q", data[0:8])[0] 26 27 def f4(data): 28 return struct.unpack(">f", data[0:4])[0] 29 30 def f8(data): 31 return struct.unpack(">d", data[0:8])[0] 32 33 # Useful mix-ins. 34 35 class PythonMethodUtils: 36 def get_python_name(self): 37 name = self.get_name() 38 if str(name) == "<init>": 39 name = "__init__" 40 else: 41 name = str(name) 42 return name + "$" + self._get_descriptor_as_name() 43 44 def _get_descriptor_as_name(self): 45 l = [] 46 for descriptor_type in self.get_descriptor()[0]: 47 l.append(self._get_type_as_name(descriptor_type)) 48 return "$".join(l) 49 50 def _get_type_as_name(self, descriptor_type, s=""): 51 base_type, object_type, array_type = descriptor_type 52 if base_type == "L": 53 return object_type + s 54 elif base_type == "[": 55 return self._get_type_as_name(array_type, s + "[]") 56 else: 57 return "<" + base_type + ">" + s 58 59 class PythonNameUtils: 60 def get_python_name(self): 61 return self.get_name() 62 63 class NameUtils: 64 def get_name(self): 65 if self.name_index != 0: 66 return self.class_file.constants[self.name_index - 1] 67 else: 68 # Some name indexes are zero to indicate special conditions. 69 return None 70 71 class NameAndTypeUtils: 72 def get_name(self): 73 if self.name_and_type_index != 0: 74 return self.class_file.constants[self.name_and_type_index - 1].get_name() 75 else: 76 # Some name indexes are zero to indicate special conditions. 77 return None 78 79 def get_field_descriptor(self): 80 if self.name_and_type_index != 0: 81 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 82 else: 83 # Some name indexes are zero to indicate special conditions. 84 return None 85 86 def get_method_descriptor(self): 87 if self.name_and_type_index != 0: 88 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 89 else: 90 # Some name indexes are zero to indicate special conditions. 91 return None 92 93 class DescriptorUtils: 94 95 "Symbol parsing." 96 97 def _get_method_descriptor(self, s): 98 assert s[0] == "(" 99 params = [] 100 s = s[1:] 101 while s[0] != ")": 102 parameter_descriptor, s = self._get_parameter_descriptor(s) 103 params.append(parameter_descriptor) 104 if s[1] != "V": 105 return_type, s = self._get_field_type(s[1:]) 106 else: 107 return_type, s = None, s[1:] 108 return params, return_type 109 110 def _get_parameter_descriptor(self, s): 111 return self._get_field_type(s) 112 113 def _get_field_descriptor(self, s): 114 return self._get_field_type(s) 115 116 def _get_component_type(self, s): 117 return self._get_field_type(s) 118 119 def _get_field_type(self, s): 120 base_type, s = self._get_base_type(s) 121 object_type = None 122 array_type = None 123 if base_type == "L": 124 object_type, s = self._get_object_type(s) 125 elif base_type == "[": 126 array_type, s = self._get_array_type(s) 127 return (base_type, object_type, array_type), s 128 129 def _get_base_type(self, s): 130 if len(s) > 0: 131 return s[0], s[1:] 132 else: 133 return None, s 134 135 def _get_object_type(self, s): 136 if len(s) > 0: 137 s_end = s.find(";") 138 assert s_end != -1 139 return s[:s_end], s[s_end+1:] 140 else: 141 return None, s 142 143 def _get_array_type(self, s): 144 if len(s) > 0: 145 return self._get_component_type(s) 146 else: 147 return None, s 148 149 # Constant information. 150 # Objects of these classes are not directly aware of the class they reside in. 151 152 class ClassInfo(NameUtils, PythonNameUtils): 153 def init(self, data, class_file): 154 self.class_file = class_file 155 self.name_index = u2(data[0:2]) 156 return data[2:] 157 158 class RefInfo(NameAndTypeUtils): 159 def init(self, data, class_file): 160 self.class_file = class_file 161 self.class_index = u2(data[0:2]) 162 self.name_and_type_index = u2(data[2:4]) 163 return data[4:] 164 165 class FieldRefInfo(RefInfo, PythonNameUtils): 166 def get_descriptor(self): 167 return RefInfo.get_field_descriptor(self) 168 169 class MethodRefInfo(RefInfo, PythonMethodUtils): 170 def get_descriptor(self): 171 return RefInfo.get_method_descriptor(self) 172 173 class InterfaceMethodRefInfo(MethodRefInfo): 174 pass 175 176 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonMethodUtils): 177 def init(self, data, class_file): 178 self.class_file = class_file 179 self.name_index = u2(data[0:2]) 180 self.descriptor_index = u2(data[2:4]) 181 return data[4:] 182 183 def get_field_descriptor(self): 184 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 185 186 def get_method_descriptor(self): 187 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 188 189 class Utf8Info: 190 def init(self, data, class_file): 191 self.class_file = class_file 192 self.length = u2(data[0:2]) 193 self.bytes = data[2:2+self.length] 194 return data[2+self.length:] 195 196 def __str__(self): 197 return self.bytes 198 199 def __unicode__(self): 200 return unicode(self.bytes, "utf-8") 201 202 class StringInfo: 203 def init(self, data, class_file): 204 self.class_file = class_file 205 self.string_index = u2(data[0:2]) 206 return data[2:] 207 208 class SmallNumInfo: 209 def init(self, data, class_file): 210 self.class_file = class_file 211 self.bytes = data[0:4] 212 return data[4:] 213 214 class IntegerInfo(SmallNumInfo): 215 def get_value(self): 216 return s4(self.bytes) 217 218 class FloatInfo(SmallNumInfo): 219 def get_value(self): 220 return f4(self.bytes) 221 222 class LargeNumInfo: 223 def init(self, data, class_file): 224 self.class_file = class_file 225 self.high_bytes = u4(data[0:4]) 226 self.low_bytes = u4(data[4:8]) 227 return data[8:] 228 229 class LongInfo(LargeNumInfo): 230 def get_value(self): 231 return s8(self.high_bytes + self.low_bytes) 232 233 class DoubleInfo(LargeNumInfo): 234 def get_value(self): 235 return f8(self.high_bytes + self.low_bytes) 236 237 # Other information. 238 # Objects of these classes are generally aware of the class they reside in. 239 240 class ItemInfo(NameUtils, DescriptorUtils, PythonMethodUtils): 241 def init(self, data, class_file): 242 self.class_file = class_file 243 self.access_flags = u2(data[0:2]) 244 self.name_index = u2(data[2:4]) 245 self.descriptor_index = u2(data[4:6]) 246 self.attributes, data = self.class_file._get_attributes(data[6:]) 247 return data 248 249 class FieldInfo(ItemInfo): 250 def get_descriptor(self): 251 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 252 253 class MethodInfo(ItemInfo): 254 def get_descriptor(self): 255 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 256 257 class AttributeInfo: 258 def init(self, data, class_file): 259 self.attribute_length = u4(data[0:4]) 260 self.info = data[4:4+self.attribute_length] 261 return data[4+self.attribute_length:] 262 263 # NOTE: Decode the different attribute formats. 264 265 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 266 def init(self, data, class_file): 267 self.class_file = class_file 268 self.attribute_length = u4(data[0:4]) 269 # Permit the NameUtils mix-in. 270 self.name_index = self.sourcefile_index = u2(data[4:6]) 271 272 class ConstantValueAttributeInfo(AttributeInfo): 273 def init(self, data, class_file): 274 self.class_file = class_file 275 self.attribute_length = u4(data[0:4]) 276 self.constant_value_index = u2(data[4:6]) 277 assert 4+self.attribute_length == 6 278 return data[4+self.attribute_length:] 279 280 def get_value(self): 281 return self.class_file.constants[self.constant_value_index - 1].get_value() 282 283 class CodeAttributeInfo(AttributeInfo): 284 def init(self, data, class_file): 285 self.class_file = class_file 286 self.attribute_length = u4(data[0:4]) 287 self.max_stack = u2(data[4:6]) 288 self.max_locals = u2(data[6:8]) 289 self.code_length = u4(data[8:12]) 290 end_of_code = 12+self.code_length 291 self.code = data[12:end_of_code] 292 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 293 self.exception_table = [] 294 data = data[end_of_code + 2:] 295 for i in range(0, self.exception_table_length): 296 exception = ExceptionInfo() 297 data = exception.init(data) 298 self.exception_table.append(exception) 299 self.attributes, data = self.class_file._get_attributes(data) 300 return data 301 302 class ExceptionsAttributeInfo(AttributeInfo): 303 def init(self, data, class_file): 304 self.class_file = class_file 305 self.attribute_length = u4(data[0:4]) 306 self.number_of_exceptions = u2(data[4:6]) 307 self.exception_index_table = [] 308 index = 6 309 for i in range(0, self.number_of_exceptions): 310 self.exception_index_table.append(u2(data[index:index+2])) 311 index += 2 312 return data[index:] 313 314 def get_exception(self, i): 315 exception_index = self.exception_index_table[i] 316 return self.class_file.constants[exception_index - 1] 317 318 class InnerClassesAttributeInfo(AttributeInfo): 319 def init(self, data, class_file): 320 self.class_file = class_file 321 self.attribute_length = u4(data[0:4]) 322 self.number_of_classes = u2(data[4:6]) 323 self.classes = [] 324 data = data[6:] 325 for i in range(0, self.number_of_classes): 326 inner_class = InnerClassInfo() 327 data = inner_class.init(data, self.class_file) 328 self.classes.append(inner_class) 329 return data 330 331 class SyntheticAttributeInfo(AttributeInfo): 332 pass 333 334 class LineNumberAttributeInfo(AttributeInfo): 335 def init(self, data, class_file): 336 self.class_file = class_file 337 self.attribute_length = u4(data[0:4]) 338 self.line_number_table_length = u2(data[4:6]) 339 self.line_number_table = [] 340 data = data[6:] 341 for i in range(0, self.line_number_table_length): 342 line_number = LineNumberInfo() 343 data = line_number.init(data) 344 self.line_number_table.append(line_number) 345 return data 346 347 class LocalVariableAttributeInfo(AttributeInfo): 348 def init(self, data, class_file): 349 self.class_file = class_file 350 self.attribute_length = u4(data[0:4]) 351 self.local_variable_table_length = u2(data[4:6]) 352 self.local_variable_table = [] 353 data = data[6:] 354 for i in range(0, self.local_variable_table_length): 355 local_variable = LocalVariableInfo() 356 data = local_variable.init(data) 357 self.local_variable_table.append(local_variable) 358 return data 359 360 class DeprecatedAttributeInfo(AttributeInfo): 361 pass 362 363 # Child classes of the attribute information classes. 364 365 class ExceptionInfo: 366 def init(self, data): 367 self.start_pc = u2(data[0:2]) 368 self.end_pc = u2(data[2:4]) 369 self.handler_pc = u2(data[4:6]) 370 self.catch_type = u2(data[6:8]) 371 return data[8:] 372 373 class InnerClassInfo(NameUtils): 374 def init(self, data, class_file): 375 self.class_file = class_file 376 self.inner_class_info_index = u2(data[0:2]) 377 self.outer_class_info_index = u2(data[2:4]) 378 # Permit the NameUtils mix-in. 379 self.name_index = self.inner_name_index = u2(data[4:6]) 380 self.inner_class_access_flags = u2(data[6:8]) 381 return data[8:] 382 383 class LineNumberInfo: 384 def init(self, data): 385 self.start_pc = u2(data[0:2]) 386 self.line_number = u2(data[2:4]) 387 return data[4:] 388 389 class LocalVariableInfo(NameUtils, PythonNameUtils): 390 def init(self, data, class_file): 391 self.class_file = class_file 392 self.start_pc = u2(data[0:2]) 393 self.length = u2(data[2:4]) 394 self.name_index = u2(data[4:6]) 395 self.descriptor_index = u2(data[6:8]) 396 self.index = u2(data[8:10]) 397 return data[10:] 398 399 def get_descriptor(self): 400 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 401 402 # Exceptions. 403 404 class UnknownTag(Exception): 405 pass 406 407 class UnknownAttribute(Exception): 408 pass 409 410 # Abstractions for the main structures. 411 412 class ClassFile: 413 414 "A class representing a Java class file." 415 416 def __init__(self, s): 417 418 """ 419 Process the given string 's', populating the object with the class 420 file's details. 421 """ 422 423 self.constants, s = self._get_constants(s[8:]) 424 self.access_flags, s = self._get_access_flags(s) 425 self.this_class, s = self._get_this_class(s) 426 self.super_class, s = self._get_super_class(s) 427 self.interfaces, s = self._get_interfaces(s) 428 self.fields, s = self._get_fields(s) 429 self.methods, s = self._get_methods(s) 430 self.attributes, s = self._get_attributes(s) 431 432 def _decode_const(self, s): 433 tag = u1(s[0:1]) 434 if tag == 1: 435 const = Utf8Info() 436 elif tag == 3: 437 const = IntegerInfo() 438 elif tag == 4: 439 const = FloatInfo() 440 elif tag == 5: 441 const = LongInfo() 442 elif tag == 6: 443 const = DoubleInfo() 444 elif tag == 7: 445 const = ClassInfo() 446 elif tag == 8: 447 const = StringInfo() 448 elif tag == 9: 449 const = FieldRefInfo() 450 elif tag == 10: 451 const = MethodRefInfo() 452 elif tag == 11: 453 const = InterfaceMethodRefInfo() 454 elif tag == 12: 455 const = NameAndTypeInfo() 456 else: 457 raise UnknownTag, tag 458 459 # Initialise the constant object. 460 461 s = const.init(s[1:], self) 462 return const, s 463 464 def _get_constants_from_table(self, count, s): 465 l = [] 466 # Have to skip certain entries specially. 467 i = 1 468 while i < count: 469 c, s = self._decode_const(s) 470 l.append(c) 471 # Add a blank entry after "large" entries. 472 if isinstance(c, LargeNumInfo): 473 l.append(None) 474 i += 1 475 i += 1 476 return l, s 477 478 def _get_items_from_table(self, cls, number, s): 479 l = [] 480 for i in range(0, number): 481 f = cls() 482 s = f.init(s, self) 483 l.append(f) 484 return l, s 485 486 def _get_methods_from_table(self, number, s): 487 return self._get_items_from_table(MethodInfo, number, s) 488 489 def _get_fields_from_table(self, number, s): 490 return self._get_items_from_table(FieldInfo, number, s) 491 492 def _get_attribute_from_table(self, s): 493 attribute_name_index = u2(s[0:2]) 494 constant_name = self.constants[attribute_name_index - 1].bytes 495 if constant_name == "SourceFile": 496 attribute = SourceFileAttributeInfo() 497 elif constant_name == "ConstantValue": 498 attribute = ConstantValueAttributeInfo() 499 elif constant_name == "Code": 500 attribute = CodeAttributeInfo() 501 elif constant_name == "Exceptions": 502 attribute = ExceptionsAttributeInfo() 503 elif constant_name == "InnerClasses": 504 attribute = InnerClassesAttributeInfo() 505 elif constant_name == "Synthetic": 506 attribute = SyntheticAttributeInfo() 507 elif constant_name == "LineNumberTable": 508 attribute = LineNumberAttributeInfo() 509 elif constant_name == "LocalVariableTable": 510 attribute = LocalVariableAttributeInfo() 511 elif constant_name == "Deprecated": 512 attribute = DeprecatedAttributeInfo() 513 else: 514 raise UnknownAttribute, constant_name 515 s = attribute.init(s[2:], self) 516 return attribute, s 517 518 def _get_attributes_from_table(self, number, s): 519 attributes = [] 520 for i in range(0, number): 521 attribute, s = self._get_attribute_from_table(s) 522 attributes.append(attribute) 523 return attributes, s 524 525 def _get_constants(self, s): 526 count = u2(s[0:2]) 527 return self._get_constants_from_table(count, s[2:]) 528 529 def _get_access_flags(self, s): 530 return u2(s[0:2]), s[2:] 531 532 def _get_this_class(self, s): 533 index = u2(s[0:2]) 534 return self.constants[index - 1], s[2:] 535 536 _get_super_class = _get_this_class 537 538 def _get_interfaces(self, s): 539 interfaces = [] 540 number = u2(s[0:2]) 541 s = s[2:] 542 for i in range(0, number): 543 index = u2(s[0:2]) 544 interfaces.append(self.constants[index - 1]) 545 s = s[2:] 546 return interfaces, s 547 548 def _get_fields(self, s): 549 number = u2(s[0:2]) 550 return self._get_fields_from_table(number, s[2:]) 551 552 def _get_attributes(self, s): 553 number = u2(s[0:2]) 554 return self._get_attributes_from_table(number, s[2:]) 555 556 def _get_methods(self, s): 557 number = u2(s[0:2]) 558 return self._get_methods_from_table(number, s[2:]) 559 560 if __name__ == "__main__": 561 import sys 562 f = open(sys.argv[1]) 563 c = ClassFile(f.read()) 564 565 # vim: tabstop=4 expandtab shiftwidth=4