1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct 9 10 # Constant information. 11 12 class ClassInfo: 13 def init(self, data): 14 self.name_index = struct.unpack(">H", data[0:2])[0] 15 return data[2:] 16 17 class RefInfo: 18 def init(self, data): 19 self.class_index = struct.unpack(">H", data[0:2])[0] 20 self.name_and_type_index = struct.unpack(">H", data[2:4])[0] 21 return data[4:] 22 23 class FieldRefInfo(RefInfo): 24 pass 25 26 class MethodRefInfo(RefInfo): 27 pass 28 29 class InterfaceMethodRefInfo(RefInfo): 30 pass 31 32 class NameAndTypeInfo: 33 def init(self, data): 34 self.name_index = struct.unpack(">H", data[0:2])[0] 35 self.descriptor_index = struct.unpack(">H", data[2:4])[0] 36 return data[4:] 37 38 class Utf8Info: 39 def init(self, data): 40 self.length = struct.unpack(">H", data[0:2])[0] 41 self.bytes = data[2:2+self.length] 42 return data[2+self.length:] 43 44 def __str__(self): 45 return self.bytes 46 47 def __unicode__(self): 48 return unicode(self.bytes, "utf-8") 49 50 class StringInfo: 51 def init(self, data): 52 self.string_index = struct.unpack(">H", data[0:2])[0] 53 return data[2:] 54 55 class SmallNumInfo: 56 def init(self, data): 57 self.bytes = struct.unpack(">L", data[0:4])[0] 58 return data[4:] 59 60 class IntegerInfo(SmallNumInfo): 61 pass 62 63 class FloatInfo(SmallNumInfo): 64 pass 65 66 class LargeNumInfo: 67 def init(self, data): 68 self.high_bytes = struct.unpack(">L", data[0:4])[0] 69 self.low_bytes = struct.unpack(">L", data[4:8])[0] 70 return data[8:] 71 72 class LongInfo(LargeNumInfo): 73 pass 74 75 class DoubleInfo(LargeNumInfo): 76 pass 77 78 # Other information. 79 80 class ItemInfo: 81 def init(self, data, class_file): 82 self.class_file = class_file 83 self.access_flags = struct.unpack(">H", data[0:2])[0] 84 self.name_index = struct.unpack(">H", data[2:4])[0] 85 self.descriptor_index = struct.unpack(">H", data[4:6])[0] 86 self.attributes, data = self.class_file._get_attributes(data[6:]) 87 return data 88 89 # Symbol parsing. 90 91 def _get_method_descriptor(self, s): 92 assert s[0] == "(" 93 params = [] 94 s = s[1:] 95 while s[0] != ")": 96 parameter_descriptor, s = self._get_parameter_descriptor(s) 97 params.append(parameter_descriptor) 98 if s[1] != "V": 99 return_type, s = self._get_field_type(s[1:]) 100 else: 101 return_type, s = None, s[1:] 102 return params, return_type 103 104 def _get_parameter_descriptor(self, s): 105 return self._get_field_type(s) 106 107 def _get_field_descriptor(self, s): 108 return self._get_field_type(s) 109 110 def _get_component_type(self, s): 111 return self._get_field_type(s) 112 113 def _get_field_type(self, s): 114 base_type, s = self._get_base_type(s) 115 object_type = None 116 array_type = None 117 if base_type == "L": 118 object_type, s = self._get_object_type(s) 119 elif base_type == "[": 120 array_type, s = self._get_array_type(s) 121 return (base_type, object_type, array_type), s 122 123 def _get_base_type(self, s): 124 if len(s) > 0: 125 return s[0], s[1:] 126 else: 127 return None, s 128 129 def _get_object_type(self, s): 130 if len(s) > 0: 131 s_end = s.find(";") 132 assert s_end != -1 133 return s[:s_end], s[s_end+1:] 134 else: 135 return None, s 136 137 def _get_array_type(self, s): 138 if len(s) > 0: 139 return self._get_component_type(s[1:]) 140 else: 141 return None, s 142 143 # Processed details. 144 145 def get_name(self): 146 return unicode(self.class_file.constants[self.name_index - 1]) 147 148 class FieldInfo(ItemInfo): 149 def get_descriptor(self): 150 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 151 152 class MethodInfo(ItemInfo): 153 def get_descriptor(self): 154 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 155 156 class AttributeInfo: 157 def init(self, data, class_file): 158 self.attribute_length = struct.unpack(">L", data[0:4])[0] 159 self.info = data[4:4+self.attribute_length] 160 return data[4+self.attribute_length:] 161 162 # NOTE: Decode the different attribute formats. 163 164 class SourceFileAttributeInfo(AttributeInfo): 165 pass 166 167 class ConstantValueAttributeInfo(AttributeInfo): 168 def init(self, data, class_file): 169 self.attribute_length = struct.unpack(">L", data[0:4])[0] 170 self.constant_value_index = struct.unpack(">H", data[4:6])[0] 171 assert 4+self.attribute_length == 6 172 return data[4+self.attribute_length:] 173 174 class CodeAttributeInfo(AttributeInfo): 175 def init(self, data, class_file): 176 self.class_file = class_file 177 self.attribute_length = struct.unpack(">L", data[0:4])[0] 178 self.max_stack = struct.unpack(">H", data[4:6])[0] 179 self.max_locals = struct.unpack(">H", data[6:8])[0] 180 self.code_length = struct.unpack(">L", data[8:12])[0] 181 end_of_code = 12+self.code_length 182 self.code = data[12:end_of_code] 183 self.exception_table_length = struct.unpack(">H", data[end_of_code:end_of_code+2])[0] 184 self.exception_table = [] 185 data = data[end_of_code + 2:] 186 for i in range(0, self.exception_table_length): 187 exception = ExceptionInfo() 188 data = exception.init(data) 189 self.attributes, data = self.class_file._get_attributes(data) 190 return data 191 192 class ExceptionsAttributeInfo(AttributeInfo): 193 pass 194 195 class InnerClassesAttributeInfo(AttributeInfo): 196 pass 197 198 class SyntheticAttributeInfo(AttributeInfo): 199 pass 200 201 class LineNumberAttributeInfo(AttributeInfo): 202 pass 203 204 class LocalVariableAttributeInfo(AttributeInfo): 205 pass 206 207 class DeprecatedAttributeInfo(AttributeInfo): 208 pass 209 210 class ExceptionInfo: 211 def __init__(self): 212 self.start_pc, self.end_pc, self.handler_pc, self.catch_type = None, None, None, None 213 214 def init(self, data): 215 self.start_pc = struct.unpack(">H", data[0:2])[0] 216 self.end_pc = struct.unpack(">H", data[2:4])[0] 217 self.handler_pc = struct.unpack(">H", data[4:6])[0] 218 self.catch_type = struct.unpack(">H", data[6:8])[0] 219 return data[8:] 220 221 class UnknownTag(Exception): 222 pass 223 224 class UnknownAttribute(Exception): 225 pass 226 227 # Abstractions for the main structures. 228 229 class ClassFile: 230 231 "A class representing a Java class file." 232 233 def __init__(self, s): 234 235 """ 236 Process the given string 's', populating the object with the class 237 file's details. 238 """ 239 240 self.constants, s = self._get_constants(s[8:]) 241 self.access_flags, s = self._get_access_flags(s) 242 self.this_class, s = self._get_this_class(s) 243 self.super_class, s = self._get_super_class(s) 244 self.interfaces, s = self._get_interfaces(s) 245 self.fields, s = self._get_fields(s) 246 self.methods, s = self._get_methods(s) 247 self.attributes, s = self._get_attributes(s) 248 249 def _decode_const(self, s): 250 tag = struct.unpack(">B", s[0:1])[0] 251 if tag == 1: 252 const = Utf8Info() 253 elif tag == 3: 254 const = IntegerInfo() 255 elif tag == 4: 256 const = FloatInfo() 257 elif tag == 5: 258 const = LongInfo() 259 elif tag == 6: 260 const = DoubleInfo() 261 elif tag == 7: 262 const = ClassInfo() 263 elif tag == 8: 264 const = StringInfo() 265 elif tag == 9: 266 const = FieldRefInfo() 267 elif tag == 10: 268 const = MethodRefInfo() 269 elif tag == 11: 270 const = InterfaceMethodRefInfo() 271 elif tag == 12: 272 const = NameAndTypeInfo() 273 else: 274 raise UnknownTag, tag 275 s = const.init(s[1:]) 276 return const, s 277 278 def _get_constants_from_table(self, count, s): 279 l = [] 280 # Have to skip certain entries specially. 281 i = 1 282 while i < count: 283 c, s = self._decode_const(s) 284 l.append(c) 285 # Add a blank entry after "large" entries. 286 if isinstance(c, LargeNumInfo): 287 l.append(None) 288 i += 1 289 i += 1 290 return l, s 291 292 def _get_items_from_table(self, cls, number, s): 293 l = [] 294 for i in range(0, number): 295 f = cls() 296 s = f.init(s, self) 297 l.append(f) 298 return l, s 299 300 def _get_methods_from_table(self, number, s): 301 return self._get_items_from_table(MethodInfo, number, s) 302 303 def _get_fields_from_table(self, number, s): 304 return self._get_items_from_table(FieldInfo, number, s) 305 306 def _get_attribute_from_table(self, s): 307 attribute_name_index = struct.unpack(">H", s[0:2])[0] 308 constant_name = self.constants[attribute_name_index - 1].bytes 309 if constant_name == "SourceFile": 310 attribute = SourceFileAttributeInfo() 311 elif constant_name == "ConstantValue": 312 attribute = ConstantValueAttributeInfo() 313 elif constant_name == "Code": 314 attribute = CodeAttributeInfo() 315 elif constant_name == "Exceptions": 316 attribute = ExceptionsAttributeInfo() 317 elif constant_name == "InnerClasses": 318 attribute = InnerClassesAttributeInfo() 319 elif constant_name == "Synthetic": 320 attribute = SyntheticAttributeInfo() 321 elif constant_name == "LineNumberTable": 322 attribute = LineNumberAttributeInfo() 323 elif constant_name == "LocalVariableTable": 324 attribute = LocalVariableAttributeInfo() 325 elif constant_name == "Deprecated": 326 attribute = DeprecatedAttributeInfo() 327 else: 328 raise UnknownAttribute, constant_name 329 s = attribute.init(s[2:], self) 330 return attribute, s 331 332 def _get_attributes_from_table(self, number, s): 333 attributes = [] 334 for i in range(0, number): 335 attribute, s = self._get_attribute_from_table(s) 336 attributes.append(attribute) 337 return attributes, s 338 339 def _get_constants(self, s): 340 count = struct.unpack(">H", s[0:2])[0] 341 return self._get_constants_from_table(count, s[2:]) 342 343 def _get_access_flags(self, s): 344 return struct.unpack(">H", s[0:2])[0], s[2:] 345 346 def _get_this_class(self, s): 347 index = struct.unpack(">H", s[0:2])[0] 348 return self.constants[index - 1], s[2:] 349 350 _get_super_class = _get_this_class 351 352 def _get_interfaces(self, s): 353 interfaces = [] 354 number = struct.unpack(">H", s[0:2])[0] 355 s = s[2:] 356 for i in range(0, number): 357 index = struct.unpack(">H", s[0:2])[0] 358 interfaces.append(self.constants[index - 1]) 359 s = s[2:] 360 return interfaces, s 361 362 def _get_fields(self, s): 363 number = struct.unpack(">H", s[0:2])[0] 364 return self._get_fields_from_table(number, s[2:]) 365 366 def _get_attributes(self, s): 367 number = struct.unpack(">H", s[0:2])[0] 368 return self._get_attributes_from_table(number, s[2:]) 369 370 def _get_methods(self, s): 371 number = struct.unpack(">H", s[0:2])[0] 372 return self._get_methods_from_table(number, s[2:]) 373 374 if __name__ == "__main__": 375 import sys 376 f = open(sys.argv[1]) 377 c = ClassFile(f.read()) 378 379 # vim: tabstop=4 expandtab shiftwidth=4