paul@0 | 1 | #!/usr/bin/env python |
paul@0 | 2 | |
paul@0 | 3 | """ |
paul@0 | 4 | Java class file decoder. Specification found at the following URL: |
paul@0 | 5 | http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html |
paul@0 | 6 | """ |
paul@0 | 7 | |
paul@0 | 8 | import struct |
paul@0 | 9 | |
paul@1 | 10 | # Utility functions. |
paul@1 | 11 | |
paul@1 | 12 | def u1(data): |
paul@1 | 13 | return struct.unpack(">B", data[0:1])[0] |
paul@1 | 14 | |
paul@1 | 15 | def u2(data): |
paul@1 | 16 | return struct.unpack(">H", data[0:2])[0] |
paul@1 | 17 | |
paul@1 | 18 | def u4(data): |
paul@1 | 19 | return struct.unpack(">L", data[0:4])[0] |
paul@1 | 20 | |
paul@0 | 21 | # Constant information. |
paul@1 | 22 | # Objects of these classes are not directly aware of the class they reside in. |
paul@0 | 23 | |
paul@0 | 24 | class ClassInfo: |
paul@0 | 25 | def init(self, data): |
paul@1 | 26 | self.name_index = u2(data[0:2]) |
paul@0 | 27 | return data[2:] |
paul@0 | 28 | |
paul@0 | 29 | class RefInfo: |
paul@0 | 30 | def init(self, data): |
paul@1 | 31 | self.class_index = u2(data[0:2]) |
paul@1 | 32 | self.name_and_type_index = u2(data[2:4]) |
paul@0 | 33 | return data[4:] |
paul@0 | 34 | |
paul@0 | 35 | class FieldRefInfo(RefInfo): |
paul@0 | 36 | pass |
paul@0 | 37 | |
paul@0 | 38 | class MethodRefInfo(RefInfo): |
paul@0 | 39 | pass |
paul@0 | 40 | |
paul@0 | 41 | class InterfaceMethodRefInfo(RefInfo): |
paul@0 | 42 | pass |
paul@0 | 43 | |
paul@0 | 44 | class NameAndTypeInfo: |
paul@0 | 45 | def init(self, data): |
paul@1 | 46 | self.name_index = u2(data[0:2]) |
paul@1 | 47 | self.descriptor_index = u2(data[2:4]) |
paul@0 | 48 | return data[4:] |
paul@0 | 49 | |
paul@0 | 50 | class Utf8Info: |
paul@0 | 51 | def init(self, data): |
paul@1 | 52 | self.length = u2(data[0:2]) |
paul@0 | 53 | self.bytes = data[2:2+self.length] |
paul@0 | 54 | return data[2+self.length:] |
paul@0 | 55 | |
paul@0 | 56 | def __str__(self): |
paul@0 | 57 | return self.bytes |
paul@0 | 58 | |
paul@0 | 59 | def __unicode__(self): |
paul@0 | 60 | return unicode(self.bytes, "utf-8") |
paul@0 | 61 | |
paul@0 | 62 | class StringInfo: |
paul@0 | 63 | def init(self, data): |
paul@1 | 64 | self.string_index = u2(data[0:2]) |
paul@0 | 65 | return data[2:] |
paul@0 | 66 | |
paul@0 | 67 | class SmallNumInfo: |
paul@0 | 68 | def init(self, data): |
paul@1 | 69 | self.bytes = u4(data[0:4]) |
paul@0 | 70 | return data[4:] |
paul@0 | 71 | |
paul@0 | 72 | class IntegerInfo(SmallNumInfo): |
paul@0 | 73 | pass |
paul@0 | 74 | |
paul@0 | 75 | class FloatInfo(SmallNumInfo): |
paul@0 | 76 | pass |
paul@0 | 77 | |
paul@0 | 78 | class LargeNumInfo: |
paul@0 | 79 | def init(self, data): |
paul@1 | 80 | self.high_bytes = u4(data[0:4]) |
paul@1 | 81 | self.low_bytes = u4(data[4:8]) |
paul@0 | 82 | return data[8:] |
paul@0 | 83 | |
paul@0 | 84 | class LongInfo(LargeNumInfo): |
paul@0 | 85 | pass |
paul@0 | 86 | |
paul@0 | 87 | class DoubleInfo(LargeNumInfo): |
paul@0 | 88 | pass |
paul@0 | 89 | |
paul@0 | 90 | # Other information. |
paul@1 | 91 | # Objects of these classes are generally aware of the class they reside in. |
paul@0 | 92 | |
paul@0 | 93 | class ItemInfo: |
paul@0 | 94 | def init(self, data, class_file): |
paul@0 | 95 | self.class_file = class_file |
paul@1 | 96 | self.access_flags = u2(data[0:2]) |
paul@1 | 97 | self.name_index = u2(data[2:4]) |
paul@1 | 98 | self.descriptor_index = u2(data[4:6]) |
paul@0 | 99 | self.attributes, data = self.class_file._get_attributes(data[6:]) |
paul@0 | 100 | return data |
paul@0 | 101 | |
paul@0 | 102 | # Symbol parsing. |
paul@0 | 103 | |
paul@0 | 104 | def _get_method_descriptor(self, s): |
paul@0 | 105 | assert s[0] == "(" |
paul@0 | 106 | params = [] |
paul@0 | 107 | s = s[1:] |
paul@0 | 108 | while s[0] != ")": |
paul@0 | 109 | parameter_descriptor, s = self._get_parameter_descriptor(s) |
paul@0 | 110 | params.append(parameter_descriptor) |
paul@0 | 111 | if s[1] != "V": |
paul@0 | 112 | return_type, s = self._get_field_type(s[1:]) |
paul@0 | 113 | else: |
paul@0 | 114 | return_type, s = None, s[1:] |
paul@0 | 115 | return params, return_type |
paul@0 | 116 | |
paul@0 | 117 | def _get_parameter_descriptor(self, s): |
paul@0 | 118 | return self._get_field_type(s) |
paul@0 | 119 | |
paul@0 | 120 | def _get_field_descriptor(self, s): |
paul@0 | 121 | return self._get_field_type(s) |
paul@0 | 122 | |
paul@0 | 123 | def _get_component_type(self, s): |
paul@0 | 124 | return self._get_field_type(s) |
paul@0 | 125 | |
paul@0 | 126 | def _get_field_type(self, s): |
paul@0 | 127 | base_type, s = self._get_base_type(s) |
paul@0 | 128 | object_type = None |
paul@0 | 129 | array_type = None |
paul@0 | 130 | if base_type == "L": |
paul@0 | 131 | object_type, s = self._get_object_type(s) |
paul@0 | 132 | elif base_type == "[": |
paul@0 | 133 | array_type, s = self._get_array_type(s) |
paul@0 | 134 | return (base_type, object_type, array_type), s |
paul@0 | 135 | |
paul@0 | 136 | def _get_base_type(self, s): |
paul@0 | 137 | if len(s) > 0: |
paul@0 | 138 | return s[0], s[1:] |
paul@0 | 139 | else: |
paul@0 | 140 | return None, s |
paul@0 | 141 | |
paul@0 | 142 | def _get_object_type(self, s): |
paul@0 | 143 | if len(s) > 0: |
paul@0 | 144 | s_end = s.find(";") |
paul@0 | 145 | assert s_end != -1 |
paul@0 | 146 | return s[:s_end], s[s_end+1:] |
paul@0 | 147 | else: |
paul@0 | 148 | return None, s |
paul@0 | 149 | |
paul@0 | 150 | def _get_array_type(self, s): |
paul@0 | 151 | if len(s) > 0: |
paul@0 | 152 | return self._get_component_type(s[1:]) |
paul@0 | 153 | else: |
paul@0 | 154 | return None, s |
paul@0 | 155 | |
paul@0 | 156 | # Processed details. |
paul@0 | 157 | |
paul@0 | 158 | def get_name(self): |
paul@0 | 159 | return unicode(self.class_file.constants[self.name_index - 1]) |
paul@0 | 160 | |
paul@0 | 161 | class FieldInfo(ItemInfo): |
paul@0 | 162 | def get_descriptor(self): |
paul@0 | 163 | return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@0 | 164 | |
paul@0 | 165 | class MethodInfo(ItemInfo): |
paul@0 | 166 | def get_descriptor(self): |
paul@0 | 167 | return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@0 | 168 | |
paul@0 | 169 | class AttributeInfo: |
paul@0 | 170 | def init(self, data, class_file): |
paul@1 | 171 | self.attribute_length = u4(data[0:4]) |
paul@0 | 172 | self.info = data[4:4+self.attribute_length] |
paul@0 | 173 | return data[4+self.attribute_length:] |
paul@0 | 174 | |
paul@0 | 175 | # NOTE: Decode the different attribute formats. |
paul@0 | 176 | |
paul@0 | 177 | class SourceFileAttributeInfo(AttributeInfo): |
paul@0 | 178 | pass |
paul@0 | 179 | |
paul@0 | 180 | class ConstantValueAttributeInfo(AttributeInfo): |
paul@0 | 181 | def init(self, data, class_file): |
paul@1 | 182 | self.attribute_length = u4(data[0:4]) |
paul@1 | 183 | self.constant_value_index = u2(data[4:6]) |
paul@0 | 184 | assert 4+self.attribute_length == 6 |
paul@0 | 185 | return data[4+self.attribute_length:] |
paul@0 | 186 | |
paul@0 | 187 | class CodeAttributeInfo(AttributeInfo): |
paul@0 | 188 | def init(self, data, class_file): |
paul@0 | 189 | self.class_file = class_file |
paul@1 | 190 | self.attribute_length = u4(data[0:4]) |
paul@1 | 191 | self.max_stack = u2(data[4:6]) |
paul@1 | 192 | self.max_locals = u2(data[6:8]) |
paul@1 | 193 | self.code_length = u4(data[8:12]) |
paul@0 | 194 | end_of_code = 12+self.code_length |
paul@0 | 195 | self.code = data[12:end_of_code] |
paul@1 | 196 | self.exception_table_length = u2(data[end_of_code:end_of_code+2]) |
paul@0 | 197 | self.exception_table = [] |
paul@0 | 198 | data = data[end_of_code + 2:] |
paul@0 | 199 | for i in range(0, self.exception_table_length): |
paul@0 | 200 | exception = ExceptionInfo() |
paul@0 | 201 | data = exception.init(data) |
paul@0 | 202 | self.attributes, data = self.class_file._get_attributes(data) |
paul@0 | 203 | return data |
paul@0 | 204 | |
paul@0 | 205 | class ExceptionsAttributeInfo(AttributeInfo): |
paul@1 | 206 | def init(self, data, class_file): |
paul@1 | 207 | self.class_file = class_file |
paul@1 | 208 | self.attribute_length = u4(data[0:4]) |
paul@1 | 209 | self.number_of_exceptions = u2(data[4:6]) |
paul@1 | 210 | self.exception_index_table = [] |
paul@1 | 211 | index = 6 |
paul@1 | 212 | for i in range(0, self.number_of_exceptions): |
paul@1 | 213 | self.exception_index_table.append(u2(data[index:index+2])) |
paul@1 | 214 | index += 2 |
paul@1 | 215 | return data[index:] |
paul@1 | 216 | |
paul@1 | 217 | def get_exception(self, i): |
paul@1 | 218 | exception_index = self.exception_index_table[i] |
paul@1 | 219 | return self.class_file.constants[exception_index - 1] |
paul@0 | 220 | |
paul@0 | 221 | class InnerClassesAttributeInfo(AttributeInfo): |
paul@0 | 222 | pass |
paul@0 | 223 | |
paul@0 | 224 | class SyntheticAttributeInfo(AttributeInfo): |
paul@0 | 225 | pass |
paul@0 | 226 | |
paul@0 | 227 | class LineNumberAttributeInfo(AttributeInfo): |
paul@0 | 228 | pass |
paul@0 | 229 | |
paul@0 | 230 | class LocalVariableAttributeInfo(AttributeInfo): |
paul@0 | 231 | pass |
paul@0 | 232 | |
paul@0 | 233 | class DeprecatedAttributeInfo(AttributeInfo): |
paul@0 | 234 | pass |
paul@0 | 235 | |
paul@0 | 236 | class ExceptionInfo: |
paul@0 | 237 | def __init__(self): |
paul@0 | 238 | self.start_pc, self.end_pc, self.handler_pc, self.catch_type = None, None, None, None |
paul@0 | 239 | |
paul@0 | 240 | def init(self, data): |
paul@1 | 241 | self.start_pc = u2(data[0:2]) |
paul@1 | 242 | self.end_pc = u2(data[2:4]) |
paul@1 | 243 | self.handler_pc = u2(data[4:6]) |
paul@1 | 244 | self.catch_type = u2(data[6:8]) |
paul@0 | 245 | return data[8:] |
paul@0 | 246 | |
paul@0 | 247 | class UnknownTag(Exception): |
paul@0 | 248 | pass |
paul@0 | 249 | |
paul@0 | 250 | class UnknownAttribute(Exception): |
paul@0 | 251 | pass |
paul@0 | 252 | |
paul@0 | 253 | # Abstractions for the main structures. |
paul@0 | 254 | |
paul@0 | 255 | class ClassFile: |
paul@0 | 256 | |
paul@0 | 257 | "A class representing a Java class file." |
paul@0 | 258 | |
paul@0 | 259 | def __init__(self, s): |
paul@0 | 260 | |
paul@0 | 261 | """ |
paul@0 | 262 | Process the given string 's', populating the object with the class |
paul@0 | 263 | file's details. |
paul@0 | 264 | """ |
paul@0 | 265 | |
paul@0 | 266 | self.constants, s = self._get_constants(s[8:]) |
paul@0 | 267 | self.access_flags, s = self._get_access_flags(s) |
paul@0 | 268 | self.this_class, s = self._get_this_class(s) |
paul@0 | 269 | self.super_class, s = self._get_super_class(s) |
paul@0 | 270 | self.interfaces, s = self._get_interfaces(s) |
paul@0 | 271 | self.fields, s = self._get_fields(s) |
paul@0 | 272 | self.methods, s = self._get_methods(s) |
paul@0 | 273 | self.attributes, s = self._get_attributes(s) |
paul@0 | 274 | |
paul@0 | 275 | def _decode_const(self, s): |
paul@1 | 276 | tag = u1(s[0:1]) |
paul@0 | 277 | if tag == 1: |
paul@0 | 278 | const = Utf8Info() |
paul@0 | 279 | elif tag == 3: |
paul@0 | 280 | const = IntegerInfo() |
paul@0 | 281 | elif tag == 4: |
paul@0 | 282 | const = FloatInfo() |
paul@0 | 283 | elif tag == 5: |
paul@0 | 284 | const = LongInfo() |
paul@0 | 285 | elif tag == 6: |
paul@0 | 286 | const = DoubleInfo() |
paul@0 | 287 | elif tag == 7: |
paul@0 | 288 | const = ClassInfo() |
paul@0 | 289 | elif tag == 8: |
paul@0 | 290 | const = StringInfo() |
paul@0 | 291 | elif tag == 9: |
paul@0 | 292 | const = FieldRefInfo() |
paul@0 | 293 | elif tag == 10: |
paul@0 | 294 | const = MethodRefInfo() |
paul@0 | 295 | elif tag == 11: |
paul@0 | 296 | const = InterfaceMethodRefInfo() |
paul@0 | 297 | elif tag == 12: |
paul@0 | 298 | const = NameAndTypeInfo() |
paul@0 | 299 | else: |
paul@0 | 300 | raise UnknownTag, tag |
paul@0 | 301 | s = const.init(s[1:]) |
paul@0 | 302 | return const, s |
paul@0 | 303 | |
paul@0 | 304 | def _get_constants_from_table(self, count, s): |
paul@0 | 305 | l = [] |
paul@0 | 306 | # Have to skip certain entries specially. |
paul@0 | 307 | i = 1 |
paul@0 | 308 | while i < count: |
paul@0 | 309 | c, s = self._decode_const(s) |
paul@0 | 310 | l.append(c) |
paul@0 | 311 | # Add a blank entry after "large" entries. |
paul@0 | 312 | if isinstance(c, LargeNumInfo): |
paul@0 | 313 | l.append(None) |
paul@0 | 314 | i += 1 |
paul@0 | 315 | i += 1 |
paul@0 | 316 | return l, s |
paul@0 | 317 | |
paul@0 | 318 | def _get_items_from_table(self, cls, number, s): |
paul@0 | 319 | l = [] |
paul@0 | 320 | for i in range(0, number): |
paul@0 | 321 | f = cls() |
paul@0 | 322 | s = f.init(s, self) |
paul@0 | 323 | l.append(f) |
paul@0 | 324 | return l, s |
paul@0 | 325 | |
paul@0 | 326 | def _get_methods_from_table(self, number, s): |
paul@0 | 327 | return self._get_items_from_table(MethodInfo, number, s) |
paul@0 | 328 | |
paul@0 | 329 | def _get_fields_from_table(self, number, s): |
paul@0 | 330 | return self._get_items_from_table(FieldInfo, number, s) |
paul@0 | 331 | |
paul@0 | 332 | def _get_attribute_from_table(self, s): |
paul@1 | 333 | attribute_name_index = u2(s[0:2]) |
paul@0 | 334 | constant_name = self.constants[attribute_name_index - 1].bytes |
paul@0 | 335 | if constant_name == "SourceFile": |
paul@0 | 336 | attribute = SourceFileAttributeInfo() |
paul@0 | 337 | elif constant_name == "ConstantValue": |
paul@0 | 338 | attribute = ConstantValueAttributeInfo() |
paul@0 | 339 | elif constant_name == "Code": |
paul@0 | 340 | attribute = CodeAttributeInfo() |
paul@0 | 341 | elif constant_name == "Exceptions": |
paul@0 | 342 | attribute = ExceptionsAttributeInfo() |
paul@0 | 343 | elif constant_name == "InnerClasses": |
paul@0 | 344 | attribute = InnerClassesAttributeInfo() |
paul@0 | 345 | elif constant_name == "Synthetic": |
paul@0 | 346 | attribute = SyntheticAttributeInfo() |
paul@0 | 347 | elif constant_name == "LineNumberTable": |
paul@0 | 348 | attribute = LineNumberAttributeInfo() |
paul@0 | 349 | elif constant_name == "LocalVariableTable": |
paul@0 | 350 | attribute = LocalVariableAttributeInfo() |
paul@0 | 351 | elif constant_name == "Deprecated": |
paul@0 | 352 | attribute = DeprecatedAttributeInfo() |
paul@0 | 353 | else: |
paul@0 | 354 | raise UnknownAttribute, constant_name |
paul@0 | 355 | s = attribute.init(s[2:], self) |
paul@0 | 356 | return attribute, s |
paul@0 | 357 | |
paul@0 | 358 | def _get_attributes_from_table(self, number, s): |
paul@0 | 359 | attributes = [] |
paul@0 | 360 | for i in range(0, number): |
paul@0 | 361 | attribute, s = self._get_attribute_from_table(s) |
paul@0 | 362 | attributes.append(attribute) |
paul@0 | 363 | return attributes, s |
paul@0 | 364 | |
paul@0 | 365 | def _get_constants(self, s): |
paul@1 | 366 | count = u2(s[0:2]) |
paul@0 | 367 | return self._get_constants_from_table(count, s[2:]) |
paul@0 | 368 | |
paul@0 | 369 | def _get_access_flags(self, s): |
paul@1 | 370 | return u2(s[0:2]), s[2:] |
paul@0 | 371 | |
paul@0 | 372 | def _get_this_class(self, s): |
paul@1 | 373 | index = u2(s[0:2]) |
paul@0 | 374 | return self.constants[index - 1], s[2:] |
paul@0 | 375 | |
paul@0 | 376 | _get_super_class = _get_this_class |
paul@0 | 377 | |
paul@0 | 378 | def _get_interfaces(self, s): |
paul@0 | 379 | interfaces = [] |
paul@1 | 380 | number = u2(s[0:2]) |
paul@0 | 381 | s = s[2:] |
paul@0 | 382 | for i in range(0, number): |
paul@1 | 383 | index = u2(s[0:2]) |
paul@0 | 384 | interfaces.append(self.constants[index - 1]) |
paul@0 | 385 | s = s[2:] |
paul@0 | 386 | return interfaces, s |
paul@0 | 387 | |
paul@0 | 388 | def _get_fields(self, s): |
paul@1 | 389 | number = u2(s[0:2]) |
paul@0 | 390 | return self._get_fields_from_table(number, s[2:]) |
paul@0 | 391 | |
paul@0 | 392 | def _get_attributes(self, s): |
paul@1 | 393 | number = u2(s[0:2]) |
paul@0 | 394 | return self._get_attributes_from_table(number, s[2:]) |
paul@0 | 395 | |
paul@0 | 396 | def _get_methods(self, s): |
paul@1 | 397 | number = u2(s[0:2]) |
paul@0 | 398 | return self._get_methods_from_table(number, s[2:]) |
paul@0 | 399 | |
paul@0 | 400 | if __name__ == "__main__": |
paul@0 | 401 | import sys |
paul@0 | 402 | f = open(sys.argv[1]) |
paul@0 | 403 | c = ClassFile(f.read()) |
paul@0 | 404 | |
paul@0 | 405 | # vim: tabstop=4 expandtab shiftwidth=4 |