paul@0 | 1 | #!/usr/bin/env python |
paul@0 | 2 | |
paul@0 | 3 | """ |
paul@0 | 4 | Java class file decoder. Specification found at the following URL: |
paul@0 | 5 | http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html |
paul@0 | 6 | """ |
paul@0 | 7 | |
paul@0 | 8 | import struct |
paul@0 | 9 | |
paul@0 | 10 | # Constant information. |
paul@0 | 11 | |
paul@0 | 12 | class ClassInfo: |
paul@0 | 13 | def init(self, data): |
paul@0 | 14 | self.name_index = struct.unpack(">H", data[0:2])[0] |
paul@0 | 15 | return data[2:] |
paul@0 | 16 | |
paul@0 | 17 | class RefInfo: |
paul@0 | 18 | def init(self, data): |
paul@0 | 19 | self.class_index = struct.unpack(">H", data[0:2])[0] |
paul@0 | 20 | self.name_and_type_index = struct.unpack(">H", data[2:4])[0] |
paul@0 | 21 | return data[4:] |
paul@0 | 22 | |
paul@0 | 23 | class FieldRefInfo(RefInfo): |
paul@0 | 24 | pass |
paul@0 | 25 | |
paul@0 | 26 | class MethodRefInfo(RefInfo): |
paul@0 | 27 | pass |
paul@0 | 28 | |
paul@0 | 29 | class InterfaceMethodRefInfo(RefInfo): |
paul@0 | 30 | pass |
paul@0 | 31 | |
paul@0 | 32 | class NameAndTypeInfo: |
paul@0 | 33 | def init(self, data): |
paul@0 | 34 | self.name_index = struct.unpack(">H", data[0:2])[0] |
paul@0 | 35 | self.descriptor_index = struct.unpack(">H", data[2:4])[0] |
paul@0 | 36 | return data[4:] |
paul@0 | 37 | |
paul@0 | 38 | class Utf8Info: |
paul@0 | 39 | def init(self, data): |
paul@0 | 40 | self.length = struct.unpack(">H", data[0:2])[0] |
paul@0 | 41 | self.bytes = data[2:2+self.length] |
paul@0 | 42 | return data[2+self.length:] |
paul@0 | 43 | |
paul@0 | 44 | def __str__(self): |
paul@0 | 45 | return self.bytes |
paul@0 | 46 | |
paul@0 | 47 | def __unicode__(self): |
paul@0 | 48 | return unicode(self.bytes, "utf-8") |
paul@0 | 49 | |
paul@0 | 50 | class StringInfo: |
paul@0 | 51 | def init(self, data): |
paul@0 | 52 | self.string_index = struct.unpack(">H", data[0:2])[0] |
paul@0 | 53 | return data[2:] |
paul@0 | 54 | |
paul@0 | 55 | class SmallNumInfo: |
paul@0 | 56 | def init(self, data): |
paul@0 | 57 | self.bytes = struct.unpack(">L", data[0:4])[0] |
paul@0 | 58 | return data[4:] |
paul@0 | 59 | |
paul@0 | 60 | class IntegerInfo(SmallNumInfo): |
paul@0 | 61 | pass |
paul@0 | 62 | |
paul@0 | 63 | class FloatInfo(SmallNumInfo): |
paul@0 | 64 | pass |
paul@0 | 65 | |
paul@0 | 66 | class LargeNumInfo: |
paul@0 | 67 | def init(self, data): |
paul@0 | 68 | self.high_bytes = struct.unpack(">L", data[0:4])[0] |
paul@0 | 69 | self.low_bytes = struct.unpack(">L", data[4:8])[0] |
paul@0 | 70 | return data[8:] |
paul@0 | 71 | |
paul@0 | 72 | class LongInfo(LargeNumInfo): |
paul@0 | 73 | pass |
paul@0 | 74 | |
paul@0 | 75 | class DoubleInfo(LargeNumInfo): |
paul@0 | 76 | pass |
paul@0 | 77 | |
paul@0 | 78 | # Other information. |
paul@0 | 79 | |
paul@0 | 80 | class ItemInfo: |
paul@0 | 81 | def init(self, data, class_file): |
paul@0 | 82 | self.class_file = class_file |
paul@0 | 83 | self.access_flags = struct.unpack(">H", data[0:2])[0] |
paul@0 | 84 | self.name_index = struct.unpack(">H", data[2:4])[0] |
paul@0 | 85 | self.descriptor_index = struct.unpack(">H", data[4:6])[0] |
paul@0 | 86 | self.attributes, data = self.class_file._get_attributes(data[6:]) |
paul@0 | 87 | return data |
paul@0 | 88 | |
paul@0 | 89 | # Symbol parsing. |
paul@0 | 90 | |
paul@0 | 91 | def _get_method_descriptor(self, s): |
paul@0 | 92 | assert s[0] == "(" |
paul@0 | 93 | params = [] |
paul@0 | 94 | s = s[1:] |
paul@0 | 95 | while s[0] != ")": |
paul@0 | 96 | parameter_descriptor, s = self._get_parameter_descriptor(s) |
paul@0 | 97 | params.append(parameter_descriptor) |
paul@0 | 98 | if s[1] != "V": |
paul@0 | 99 | return_type, s = self._get_field_type(s[1:]) |
paul@0 | 100 | else: |
paul@0 | 101 | return_type, s = None, s[1:] |
paul@0 | 102 | return params, return_type |
paul@0 | 103 | |
paul@0 | 104 | def _get_parameter_descriptor(self, s): |
paul@0 | 105 | return self._get_field_type(s) |
paul@0 | 106 | |
paul@0 | 107 | def _get_field_descriptor(self, s): |
paul@0 | 108 | return self._get_field_type(s) |
paul@0 | 109 | |
paul@0 | 110 | def _get_component_type(self, s): |
paul@0 | 111 | return self._get_field_type(s) |
paul@0 | 112 | |
paul@0 | 113 | def _get_field_type(self, s): |
paul@0 | 114 | base_type, s = self._get_base_type(s) |
paul@0 | 115 | object_type = None |
paul@0 | 116 | array_type = None |
paul@0 | 117 | if base_type == "L": |
paul@0 | 118 | object_type, s = self._get_object_type(s) |
paul@0 | 119 | elif base_type == "[": |
paul@0 | 120 | array_type, s = self._get_array_type(s) |
paul@0 | 121 | return (base_type, object_type, array_type), s |
paul@0 | 122 | |
paul@0 | 123 | def _get_base_type(self, s): |
paul@0 | 124 | if len(s) > 0: |
paul@0 | 125 | return s[0], s[1:] |
paul@0 | 126 | else: |
paul@0 | 127 | return None, s |
paul@0 | 128 | |
paul@0 | 129 | def _get_object_type(self, s): |
paul@0 | 130 | if len(s) > 0: |
paul@0 | 131 | s_end = s.find(";") |
paul@0 | 132 | assert s_end != -1 |
paul@0 | 133 | return s[:s_end], s[s_end+1:] |
paul@0 | 134 | else: |
paul@0 | 135 | return None, s |
paul@0 | 136 | |
paul@0 | 137 | def _get_array_type(self, s): |
paul@0 | 138 | if len(s) > 0: |
paul@0 | 139 | return self._get_component_type(s[1:]) |
paul@0 | 140 | else: |
paul@0 | 141 | return None, s |
paul@0 | 142 | |
paul@0 | 143 | # Processed details. |
paul@0 | 144 | |
paul@0 | 145 | def get_name(self): |
paul@0 | 146 | return unicode(self.class_file.constants[self.name_index - 1]) |
paul@0 | 147 | |
paul@0 | 148 | class FieldInfo(ItemInfo): |
paul@0 | 149 | def get_descriptor(self): |
paul@0 | 150 | return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@0 | 151 | |
paul@0 | 152 | class MethodInfo(ItemInfo): |
paul@0 | 153 | def get_descriptor(self): |
paul@0 | 154 | return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@0 | 155 | |
paul@0 | 156 | class AttributeInfo: |
paul@0 | 157 | def init(self, data, class_file): |
paul@0 | 158 | self.attribute_length = struct.unpack(">L", data[0:4])[0] |
paul@0 | 159 | self.info = data[4:4+self.attribute_length] |
paul@0 | 160 | return data[4+self.attribute_length:] |
paul@0 | 161 | |
paul@0 | 162 | # NOTE: Decode the different attribute formats. |
paul@0 | 163 | |
paul@0 | 164 | class SourceFileAttributeInfo(AttributeInfo): |
paul@0 | 165 | pass |
paul@0 | 166 | |
paul@0 | 167 | class ConstantValueAttributeInfo(AttributeInfo): |
paul@0 | 168 | def init(self, data, class_file): |
paul@0 | 169 | self.attribute_length = struct.unpack(">L", data[0:4])[0] |
paul@0 | 170 | self.constant_value_index = struct.unpack(">H", data[4:6])[0] |
paul@0 | 171 | assert 4+self.attribute_length == 6 |
paul@0 | 172 | return data[4+self.attribute_length:] |
paul@0 | 173 | |
paul@0 | 174 | class CodeAttributeInfo(AttributeInfo): |
paul@0 | 175 | def init(self, data, class_file): |
paul@0 | 176 | self.class_file = class_file |
paul@0 | 177 | self.attribute_length = struct.unpack(">L", data[0:4])[0] |
paul@0 | 178 | self.max_stack = struct.unpack(">H", data[4:6])[0] |
paul@0 | 179 | self.max_locals = struct.unpack(">H", data[6:8])[0] |
paul@0 | 180 | self.code_length = struct.unpack(">L", data[8:12])[0] |
paul@0 | 181 | end_of_code = 12+self.code_length |
paul@0 | 182 | self.code = data[12:end_of_code] |
paul@0 | 183 | self.exception_table_length = struct.unpack(">H", data[end_of_code:end_of_code+2])[0] |
paul@0 | 184 | self.exception_table = [] |
paul@0 | 185 | data = data[end_of_code + 2:] |
paul@0 | 186 | for i in range(0, self.exception_table_length): |
paul@0 | 187 | exception = ExceptionInfo() |
paul@0 | 188 | data = exception.init(data) |
paul@0 | 189 | self.attributes, data = self.class_file._get_attributes(data) |
paul@0 | 190 | return data |
paul@0 | 191 | |
paul@0 | 192 | class ExceptionsAttributeInfo(AttributeInfo): |
paul@0 | 193 | pass |
paul@0 | 194 | |
paul@0 | 195 | class InnerClassesAttributeInfo(AttributeInfo): |
paul@0 | 196 | pass |
paul@0 | 197 | |
paul@0 | 198 | class SyntheticAttributeInfo(AttributeInfo): |
paul@0 | 199 | pass |
paul@0 | 200 | |
paul@0 | 201 | class LineNumberAttributeInfo(AttributeInfo): |
paul@0 | 202 | pass |
paul@0 | 203 | |
paul@0 | 204 | class LocalVariableAttributeInfo(AttributeInfo): |
paul@0 | 205 | pass |
paul@0 | 206 | |
paul@0 | 207 | class DeprecatedAttributeInfo(AttributeInfo): |
paul@0 | 208 | pass |
paul@0 | 209 | |
paul@0 | 210 | class ExceptionInfo: |
paul@0 | 211 | def __init__(self): |
paul@0 | 212 | self.start_pc, self.end_pc, self.handler_pc, self.catch_type = None, None, None, None |
paul@0 | 213 | |
paul@0 | 214 | def init(self, data): |
paul@0 | 215 | self.start_pc = struct.unpack(">H", data[0:2])[0] |
paul@0 | 216 | self.end_pc = struct.unpack(">H", data[2:4])[0] |
paul@0 | 217 | self.handler_pc = struct.unpack(">H", data[4:6])[0] |
paul@0 | 218 | self.catch_type = struct.unpack(">H", data[6:8])[0] |
paul@0 | 219 | return data[8:] |
paul@0 | 220 | |
paul@0 | 221 | class UnknownTag(Exception): |
paul@0 | 222 | pass |
paul@0 | 223 | |
paul@0 | 224 | class UnknownAttribute(Exception): |
paul@0 | 225 | pass |
paul@0 | 226 | |
paul@0 | 227 | # Abstractions for the main structures. |
paul@0 | 228 | |
paul@0 | 229 | class ClassFile: |
paul@0 | 230 | |
paul@0 | 231 | "A class representing a Java class file." |
paul@0 | 232 | |
paul@0 | 233 | def __init__(self, s): |
paul@0 | 234 | |
paul@0 | 235 | """ |
paul@0 | 236 | Process the given string 's', populating the object with the class |
paul@0 | 237 | file's details. |
paul@0 | 238 | """ |
paul@0 | 239 | |
paul@0 | 240 | self.constants, s = self._get_constants(s[8:]) |
paul@0 | 241 | self.access_flags, s = self._get_access_flags(s) |
paul@0 | 242 | self.this_class, s = self._get_this_class(s) |
paul@0 | 243 | self.super_class, s = self._get_super_class(s) |
paul@0 | 244 | self.interfaces, s = self._get_interfaces(s) |
paul@0 | 245 | self.fields, s = self._get_fields(s) |
paul@0 | 246 | self.methods, s = self._get_methods(s) |
paul@0 | 247 | self.attributes, s = self._get_attributes(s) |
paul@0 | 248 | |
paul@0 | 249 | def _decode_const(self, s): |
paul@0 | 250 | tag = struct.unpack(">B", s[0:1])[0] |
paul@0 | 251 | if tag == 1: |
paul@0 | 252 | const = Utf8Info() |
paul@0 | 253 | elif tag == 3: |
paul@0 | 254 | const = IntegerInfo() |
paul@0 | 255 | elif tag == 4: |
paul@0 | 256 | const = FloatInfo() |
paul@0 | 257 | elif tag == 5: |
paul@0 | 258 | const = LongInfo() |
paul@0 | 259 | elif tag == 6: |
paul@0 | 260 | const = DoubleInfo() |
paul@0 | 261 | elif tag == 7: |
paul@0 | 262 | const = ClassInfo() |
paul@0 | 263 | elif tag == 8: |
paul@0 | 264 | const = StringInfo() |
paul@0 | 265 | elif tag == 9: |
paul@0 | 266 | const = FieldRefInfo() |
paul@0 | 267 | elif tag == 10: |
paul@0 | 268 | const = MethodRefInfo() |
paul@0 | 269 | elif tag == 11: |
paul@0 | 270 | const = InterfaceMethodRefInfo() |
paul@0 | 271 | elif tag == 12: |
paul@0 | 272 | const = NameAndTypeInfo() |
paul@0 | 273 | else: |
paul@0 | 274 | raise UnknownTag, tag |
paul@0 | 275 | s = const.init(s[1:]) |
paul@0 | 276 | return const, s |
paul@0 | 277 | |
paul@0 | 278 | def _get_constants_from_table(self, count, s): |
paul@0 | 279 | l = [] |
paul@0 | 280 | # Have to skip certain entries specially. |
paul@0 | 281 | i = 1 |
paul@0 | 282 | while i < count: |
paul@0 | 283 | c, s = self._decode_const(s) |
paul@0 | 284 | l.append(c) |
paul@0 | 285 | # Add a blank entry after "large" entries. |
paul@0 | 286 | if isinstance(c, LargeNumInfo): |
paul@0 | 287 | l.append(None) |
paul@0 | 288 | i += 1 |
paul@0 | 289 | i += 1 |
paul@0 | 290 | return l, s |
paul@0 | 291 | |
paul@0 | 292 | def _get_items_from_table(self, cls, number, s): |
paul@0 | 293 | l = [] |
paul@0 | 294 | for i in range(0, number): |
paul@0 | 295 | f = cls() |
paul@0 | 296 | s = f.init(s, self) |
paul@0 | 297 | l.append(f) |
paul@0 | 298 | return l, s |
paul@0 | 299 | |
paul@0 | 300 | def _get_methods_from_table(self, number, s): |
paul@0 | 301 | return self._get_items_from_table(MethodInfo, number, s) |
paul@0 | 302 | |
paul@0 | 303 | def _get_fields_from_table(self, number, s): |
paul@0 | 304 | return self._get_items_from_table(FieldInfo, number, s) |
paul@0 | 305 | |
paul@0 | 306 | def _get_attribute_from_table(self, s): |
paul@0 | 307 | attribute_name_index = struct.unpack(">H", s[0:2])[0] |
paul@0 | 308 | constant_name = self.constants[attribute_name_index - 1].bytes |
paul@0 | 309 | if constant_name == "SourceFile": |
paul@0 | 310 | attribute = SourceFileAttributeInfo() |
paul@0 | 311 | elif constant_name == "ConstantValue": |
paul@0 | 312 | attribute = ConstantValueAttributeInfo() |
paul@0 | 313 | elif constant_name == "Code": |
paul@0 | 314 | attribute = CodeAttributeInfo() |
paul@0 | 315 | elif constant_name == "Exceptions": |
paul@0 | 316 | attribute = ExceptionsAttributeInfo() |
paul@0 | 317 | elif constant_name == "InnerClasses": |
paul@0 | 318 | attribute = InnerClassesAttributeInfo() |
paul@0 | 319 | elif constant_name == "Synthetic": |
paul@0 | 320 | attribute = SyntheticAttributeInfo() |
paul@0 | 321 | elif constant_name == "LineNumberTable": |
paul@0 | 322 | attribute = LineNumberAttributeInfo() |
paul@0 | 323 | elif constant_name == "LocalVariableTable": |
paul@0 | 324 | attribute = LocalVariableAttributeInfo() |
paul@0 | 325 | elif constant_name == "Deprecated": |
paul@0 | 326 | attribute = DeprecatedAttributeInfo() |
paul@0 | 327 | else: |
paul@0 | 328 | raise UnknownAttribute, constant_name |
paul@0 | 329 | s = attribute.init(s[2:], self) |
paul@0 | 330 | return attribute, s |
paul@0 | 331 | |
paul@0 | 332 | def _get_attributes_from_table(self, number, s): |
paul@0 | 333 | attributes = [] |
paul@0 | 334 | for i in range(0, number): |
paul@0 | 335 | attribute, s = self._get_attribute_from_table(s) |
paul@0 | 336 | attributes.append(attribute) |
paul@0 | 337 | return attributes, s |
paul@0 | 338 | |
paul@0 | 339 | def _get_constants(self, s): |
paul@0 | 340 | count = struct.unpack(">H", s[0:2])[0] |
paul@0 | 341 | return self._get_constants_from_table(count, s[2:]) |
paul@0 | 342 | |
paul@0 | 343 | def _get_access_flags(self, s): |
paul@0 | 344 | return struct.unpack(">H", s[0:2])[0], s[2:] |
paul@0 | 345 | |
paul@0 | 346 | def _get_this_class(self, s): |
paul@0 | 347 | index = struct.unpack(">H", s[0:2])[0] |
paul@0 | 348 | return self.constants[index - 1], s[2:] |
paul@0 | 349 | |
paul@0 | 350 | _get_super_class = _get_this_class |
paul@0 | 351 | |
paul@0 | 352 | def _get_interfaces(self, s): |
paul@0 | 353 | interfaces = [] |
paul@0 | 354 | number = struct.unpack(">H", s[0:2])[0] |
paul@0 | 355 | s = s[2:] |
paul@0 | 356 | for i in range(0, number): |
paul@0 | 357 | index = struct.unpack(">H", s[0:2])[0] |
paul@0 | 358 | interfaces.append(self.constants[index - 1]) |
paul@0 | 359 | s = s[2:] |
paul@0 | 360 | return interfaces, s |
paul@0 | 361 | |
paul@0 | 362 | def _get_fields(self, s): |
paul@0 | 363 | number = struct.unpack(">H", s[0:2])[0] |
paul@0 | 364 | return self._get_fields_from_table(number, s[2:]) |
paul@0 | 365 | |
paul@0 | 366 | def _get_attributes(self, s): |
paul@0 | 367 | number = struct.unpack(">H", s[0:2])[0] |
paul@0 | 368 | return self._get_attributes_from_table(number, s[2:]) |
paul@0 | 369 | |
paul@0 | 370 | def _get_methods(self, s): |
paul@0 | 371 | number = struct.unpack(">H", s[0:2])[0] |
paul@0 | 372 | return self._get_methods_from_table(number, s[2:]) |
paul@0 | 373 | |
paul@0 | 374 | if __name__ == "__main__": |
paul@0 | 375 | import sys |
paul@0 | 376 | f = open(sys.argv[1]) |
paul@0 | 377 | c = ClassFile(f.read()) |
paul@0 | 378 | |
paul@0 | 379 | # vim: tabstop=4 expandtab shiftwidth=4 |