javaclass

classfile.py

133:3e197ad55b82
2005-01-18 Paul Boddie Added more test programs.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def s2(data):    19     return struct.unpack(">h", data[0:2])[0]    20     21 def u4(data):    22     return struct.unpack(">L", data[0:4])[0]    23     24 def s4(data):    25     return struct.unpack(">l", data[0:4])[0]    26     27 def s8(data):    28     return struct.unpack(">q", data[0:8])[0]    29     30 def f4(data):    31     return struct.unpack(">f", data[0:4])[0]    32     33 def f8(data):    34     return struct.unpack(">d", data[0:8])[0]    35     36 # Useful tables and constants.    37     38 descriptor_base_type_mapping = {    39     "B" : "int",    40     "C" : "str",    41     "D" : "float",    42     "F" : "float",    43     "I" : "int",    44     "J" : "int",    45     "L" : "object",    46     "S" : "int",    47     "Z" : "bool",    48     "[" : "list"    49     }    50     51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    52 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    53     54 def has_flags(flags, desired):    55     desired_flags = reduce(lambda a, b: a | b, desired, 0)    56     return (flags & desired_flags) == desired_flags    57     58 # Useful mix-ins.    59     60 class PythonMethodUtils:    61     symbol_sep = "___" # was "$"    62     type_sep = "__" # replaces "/"    63     array_sep = "_array_" # was "[]"    64     base_seps = ("_", "_") # was "<" and ">"    65     66     def get_unqualified_python_name(self):    67         name = self.get_name()    68         if str(name) == "<init>":    69             return "__init__"    70         elif str(name) == "<clinit>":    71             return "__clinit__"    72         else:    73             return str(name)    74     75     def get_python_name(self):    76         name = self.get_unqualified_python_name()    77         if name == "__clinit__":    78             return name    79         return name + self.symbol_sep + self._get_descriptor_as_name()    80     81     def _get_descriptor_as_name(self):    82         l = []    83         for descriptor_type in self.get_descriptor()[0]:    84             l.append(self._get_type_as_name(descriptor_type))    85         return self.symbol_sep.join(l)    86     87     def _get_type_as_name(self, descriptor_type, s=""):    88         base_type, object_type, array_type = descriptor_type    89         if base_type == "L":    90             return object_type.replace("/", self.type_sep) + s    91         elif base_type == "[":    92             return self._get_type_as_name(array_type, s + self.array_sep)    93         else:    94             return self.base_seps[0] + base_type + self.base_seps[1] + s    95     96 class PythonNameUtils:    97     def get_python_name(self):    98         # NOTE: This may not be comprehensive.    99         if not str(self.get_name()).startswith("["):   100             return str(self.get_name()).replace("/", ".")   101         else:   102             return self._get_type_name(   103                 get_field_descriptor(   104                     str(self.get_name())   105                     )   106                 ).replace("/", ".")   107    108     def _get_type_name(self, descriptor_type):   109         base_type, object_type, array_type = descriptor_type   110         if base_type == "L":   111             return object_type   112         elif base_type == "[":   113             return self._get_type_name(array_type)   114         else:   115             return descriptor_base_type_mapping[base_type]   116    117 class NameUtils:   118     def get_name(self):   119         if self.name_index != 0:   120             return self.class_file.constants[self.name_index - 1]   121         else:   122             # Some name indexes are zero to indicate special conditions.   123             return None   124    125 class NameAndTypeUtils:   126     def get_name(self):   127         if self.name_and_type_index != 0:   128             return self.class_file.constants[self.name_and_type_index - 1].get_name()   129         else:   130             # Some name indexes are zero to indicate special conditions.   131             return None   132    133     def get_field_descriptor(self):   134         if self.name_and_type_index != 0:   135             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   136         else:   137             # Some name indexes are zero to indicate special conditions.   138             return None   139    140     def get_method_descriptor(self):   141         if self.name_and_type_index != 0:   142             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   143         else:   144             # Some name indexes are zero to indicate special conditions.   145             return None   146    147     def get_class(self):   148         return self.class_file.constants[self.class_index - 1]   149    150 # Symbol parsing.   151    152 def get_method_descriptor(s):   153     assert s[0] == "("   154     params = []   155     s = s[1:]   156     while s[0] != ")":   157         parameter_descriptor, s = _get_parameter_descriptor(s)   158         params.append(parameter_descriptor)   159     if s[1] != "V":   160         return_type, s = _get_field_type(s[1:])   161     else:   162         return_type, s = None, s[1:]   163     return params, return_type   164    165 def get_field_descriptor(s):   166     return _get_field_type(s)[0]   167    168 def _get_parameter_descriptor(s):   169     return _get_field_type(s)   170    171 def _get_component_type(s):   172     return _get_field_type(s)   173    174 def _get_field_type(s):   175     base_type, s = _get_base_type(s)   176     object_type = None   177     array_type = None   178     if base_type == "L":   179         object_type, s = _get_object_type(s)   180     elif base_type == "[":   181         array_type, s = _get_array_type(s)   182     return (base_type, object_type, array_type), s   183    184 def _get_base_type(s):   185     if len(s) > 0:   186         return s[0], s[1:]   187     else:   188         return None, s   189    190 def _get_object_type(s):   191     if len(s) > 0:   192         s_end = s.find(";")   193         assert s_end != -1   194         return s[:s_end], s[s_end+1:]   195     else:   196         return None, s   197    198 def _get_array_type(s):   199     if len(s) > 0:   200         return _get_component_type(s)   201     else:   202         return None, s   203    204 # Constant information.   205    206 class ClassInfo(NameUtils, PythonNameUtils):   207     def init(self, data, class_file):   208         self.class_file = class_file   209         self.name_index = u2(data[0:2])   210         return data[2:]   211    212 class RefInfo(NameAndTypeUtils):   213     def init(self, data, class_file):   214         self.class_file = class_file   215         self.class_index = u2(data[0:2])   216         self.name_and_type_index = u2(data[2:4])   217         return data[4:]   218    219 class FieldRefInfo(RefInfo, PythonNameUtils):   220     def get_descriptor(self):   221         return RefInfo.get_field_descriptor(self)   222    223 class MethodRefInfo(RefInfo, PythonMethodUtils):   224     def get_descriptor(self):   225         return RefInfo.get_method_descriptor(self)   226    227 class InterfaceMethodRefInfo(MethodRefInfo):   228     pass   229    230 class NameAndTypeInfo(NameUtils, PythonNameUtils):   231     def init(self, data, class_file):   232         self.class_file = class_file   233         self.name_index = u2(data[0:2])   234         self.descriptor_index = u2(data[2:4])   235         return data[4:]   236    237     def get_field_descriptor(self):   238         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   239    240     def get_method_descriptor(self):   241         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   242    243 class Utf8Info:   244     def init(self, data, class_file):   245         self.class_file = class_file   246         self.length = u2(data[0:2])   247         self.bytes = data[2:2+self.length]   248         return data[2+self.length:]   249    250     def __str__(self):   251         return self.bytes   252    253     def __unicode__(self):   254         return unicode(self.bytes, "utf-8")   255    256     def get_value(self):   257         return str(self)   258    259 class StringInfo:   260     def init(self, data, class_file):   261         self.class_file = class_file   262         self.string_index = u2(data[0:2])   263         return data[2:]   264    265     def __str__(self):   266         return str(self.class_file.constants[self.string_index - 1])   267    268     def __unicode__(self):   269         return unicode(self.class_file.constants[self.string_index - 1])   270    271     def get_value(self):   272         return str(self)   273    274 class SmallNumInfo:   275     def init(self, data, class_file):   276         self.class_file = class_file   277         self.bytes = data[0:4]   278         return data[4:]   279    280 class IntegerInfo(SmallNumInfo):   281     def get_value(self):   282         return s4(self.bytes)   283    284 class FloatInfo(SmallNumInfo):   285     def get_value(self):   286         return f4(self.bytes)   287    288 class LargeNumInfo:   289     def init(self, data, class_file):   290         self.class_file = class_file   291         self.high_bytes = data[0:4]   292         self.low_bytes = data[4:8]   293         return data[8:]   294    295 class LongInfo(LargeNumInfo):   296     def get_value(self):   297         return s8(self.high_bytes + self.low_bytes)   298    299 class DoubleInfo(LargeNumInfo):   300     def get_value(self):   301         return f8(self.high_bytes + self.low_bytes)   302    303 # Other information.   304 # Objects of these classes are generally aware of the class they reside in.   305    306 class ItemInfo(NameUtils):   307     def init(self, data, class_file):   308         self.class_file = class_file   309         self.access_flags = u2(data[0:2])   310         self.name_index = u2(data[2:4])   311         self.descriptor_index = u2(data[4:6])   312         self.attributes, data = self.class_file._get_attributes(data[6:])   313         return data   314    315 class FieldInfo(ItemInfo, PythonNameUtils):   316     def get_descriptor(self):   317         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   318    319 class MethodInfo(ItemInfo, PythonMethodUtils):   320     def get_descriptor(self):   321         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   322    323 class AttributeInfo:   324     def init(self, data, class_file):   325         self.attribute_length = u4(data[0:4])   326         self.info = data[4:4+self.attribute_length]   327         return data[4+self.attribute_length:]   328    329 # NOTE: Decode the different attribute formats.   330    331 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   332     def init(self, data, class_file):   333         self.class_file = class_file   334         self.attribute_length = u4(data[0:4])   335         # Permit the NameUtils mix-in.   336         self.name_index = self.sourcefile_index = u2(data[4:6])   337         return data[6:]   338    339 class ConstantValueAttributeInfo(AttributeInfo):   340     def init(self, data, class_file):   341         self.class_file = class_file   342         self.attribute_length = u4(data[0:4])   343         self.constant_value_index = u2(data[4:6])   344         assert 4+self.attribute_length == 6   345         return data[4+self.attribute_length:]   346    347     def get_value(self):   348         return self.class_file.constants[self.constant_value_index - 1].get_value()   349    350 class CodeAttributeInfo(AttributeInfo):   351     def init(self, data, class_file):   352         self.class_file = class_file   353         self.attribute_length = u4(data[0:4])   354         self.max_stack = u2(data[4:6])   355         self.max_locals = u2(data[6:8])   356         self.code_length = u4(data[8:12])   357         end_of_code = 12+self.code_length   358         self.code = data[12:end_of_code]   359         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   360         self.exception_table = []   361         data = data[end_of_code + 2:]   362         for i in range(0, self.exception_table_length):   363             exception = ExceptionInfo()   364             data = exception.init(data)   365             self.exception_table.append(exception)   366         self.attributes, data = self.class_file._get_attributes(data)   367         return data   368    369 class ExceptionsAttributeInfo(AttributeInfo):   370     def init(self, data, class_file):   371         self.class_file = class_file   372         self.attribute_length = u4(data[0:4])   373         self.number_of_exceptions = u2(data[4:6])   374         self.exception_index_table = []   375         index = 6   376         for i in range(0, self.number_of_exceptions):   377             self.exception_index_table.append(u2(data[index:index+2]))   378             index += 2   379         return data[index:]   380    381     def get_exception(self, i):   382         exception_index = self.exception_index_table[i]   383         return self.class_file.constants[exception_index - 1]   384    385 class InnerClassesAttributeInfo(AttributeInfo):   386     def init(self, data, class_file):   387         self.class_file = class_file   388         self.attribute_length = u4(data[0:4])   389         self.number_of_classes = u2(data[4:6])   390         self.classes = []   391         data = data[6:]   392         for i in range(0, self.number_of_classes):   393             inner_class = InnerClassInfo()   394             data = inner_class.init(data, self.class_file)   395             self.classes.append(inner_class)   396         return data   397    398 class SyntheticAttributeInfo(AttributeInfo):   399     pass   400    401 class LineNumberAttributeInfo(AttributeInfo):   402     def init(self, data, class_file):   403         self.class_file = class_file   404         self.attribute_length = u4(data[0:4])   405         self.line_number_table_length = u2(data[4:6])   406         self.line_number_table = []   407         data = data[6:]   408         for i in range(0, self.line_number_table_length):   409             line_number = LineNumberInfo()   410             data = line_number.init(data)   411             self.line_number_table.append(line_number)   412         return data   413    414 class LocalVariableAttributeInfo(AttributeInfo):   415     def init(self, data, class_file):   416         self.class_file = class_file   417         self.attribute_length = u4(data[0:4])   418         self.local_variable_table_length = u2(data[4:6])   419         self.local_variable_table = []   420         data = data[6:]   421         for i in range(0, self.local_variable_table_length):   422             local_variable = LocalVariableInfo()   423             data = local_variable.init(data, self.class_file)   424             self.local_variable_table.append(local_variable)   425         return data   426    427 class DeprecatedAttributeInfo(AttributeInfo):   428     pass   429    430 # Child classes of the attribute information classes.   431    432 class ExceptionInfo:   433     def init(self, data):   434         self.start_pc = u2(data[0:2])   435         self.end_pc = u2(data[2:4])   436         self.handler_pc = u2(data[4:6])   437         self.catch_type = u2(data[6:8])   438         return data[8:]   439    440 class InnerClassInfo(NameUtils):   441     def init(self, data, class_file):   442         self.class_file = class_file   443         self.inner_class_info_index = u2(data[0:2])   444         self.outer_class_info_index = u2(data[2:4])   445         # Permit the NameUtils mix-in.   446         self.name_index = self.inner_name_index = u2(data[4:6])   447         self.inner_class_access_flags = u2(data[6:8])   448         return data[8:]   449    450 class LineNumberInfo:   451     def init(self, data):   452         self.start_pc = u2(data[0:2])   453         self.line_number = u2(data[2:4])   454         return data[4:]   455    456 class LocalVariableInfo(NameUtils, PythonNameUtils):   457     def init(self, data, class_file):   458         self.class_file = class_file   459         self.start_pc = u2(data[0:2])   460         self.length = u2(data[2:4])   461         self.name_index = u2(data[4:6])   462         self.descriptor_index = u2(data[6:8])   463         self.index = u2(data[8:10])   464         return data[10:]   465    466     def get_descriptor(self):   467         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   468    469 # Exceptions.   470    471 class UnknownTag(Exception):   472     pass   473    474 class UnknownAttribute(Exception):   475     pass   476    477 # Abstractions for the main structures.   478    479 class ClassFile:   480    481     "A class representing a Java class file."   482    483     def __init__(self, s):   484    485         """   486         Process the given string 's', populating the object with the class   487         file's details.   488         """   489    490         self.constants, s = self._get_constants(s[8:])   491         self.access_flags, s = self._get_access_flags(s)   492         self.this_class, s = self._get_this_class(s)   493         self.super_class, s = self._get_super_class(s)   494         self.interfaces, s = self._get_interfaces(s)   495         self.fields, s = self._get_fields(s)   496         self.methods, s = self._get_methods(s)   497         self.attributes, s = self._get_attributes(s)   498    499     def _decode_const(self, s):   500         tag = u1(s[0:1])   501         if tag == 1:   502             const = Utf8Info()   503         elif tag == 3:   504             const = IntegerInfo()   505         elif tag == 4:   506             const = FloatInfo()   507         elif tag == 5:   508             const = LongInfo()   509         elif tag == 6:   510             const = DoubleInfo()   511         elif tag == 7:   512             const = ClassInfo()   513         elif tag == 8:   514             const = StringInfo()   515         elif tag == 9:   516             const = FieldRefInfo()   517         elif tag == 10:   518             const = MethodRefInfo()   519         elif tag == 11:   520             const = InterfaceMethodRefInfo()   521         elif tag == 12:   522             const = NameAndTypeInfo()   523         else:   524             raise UnknownTag, tag   525    526         # Initialise the constant object.   527    528         s = const.init(s[1:], self)   529         return const, s   530    531     def _get_constants_from_table(self, count, s):   532         l = []   533         # Have to skip certain entries specially.   534         i = 1   535         while i < count:   536             c, s = self._decode_const(s)   537             l.append(c)   538             # Add a blank entry after "large" entries.   539             if isinstance(c, LargeNumInfo):   540                 l.append(None)   541                 i += 1   542             i += 1   543         return l, s   544    545     def _get_items_from_table(self, cls, number, s):   546         l = []   547         for i in range(0, number):   548             f = cls()   549             s = f.init(s, self)   550             l.append(f)   551         return l, s   552    553     def _get_methods_from_table(self, number, s):   554         return self._get_items_from_table(MethodInfo, number, s)   555    556     def _get_fields_from_table(self, number, s):   557         return self._get_items_from_table(FieldInfo, number, s)   558    559     def _get_attribute_from_table(self, s):   560         attribute_name_index = u2(s[0:2])   561         constant_name = self.constants[attribute_name_index - 1].bytes   562         if constant_name == "SourceFile":   563             attribute = SourceFileAttributeInfo()   564         elif constant_name == "ConstantValue":   565             attribute = ConstantValueAttributeInfo()   566         elif constant_name == "Code":   567             attribute = CodeAttributeInfo()   568         elif constant_name == "Exceptions":   569             attribute = ExceptionsAttributeInfo()   570         elif constant_name == "InnerClasses":   571             attribute = InnerClassesAttributeInfo()   572         elif constant_name == "Synthetic":   573             attribute = SyntheticAttributeInfo()   574         elif constant_name == "LineNumberTable":   575             attribute = LineNumberAttributeInfo()   576         elif constant_name == "LocalVariableTable":   577             attribute = LocalVariableAttributeInfo()   578         elif constant_name == "Deprecated":   579             attribute = DeprecatedAttributeInfo()   580         else:   581             raise UnknownAttribute, constant_name   582         s = attribute.init(s[2:], self)   583         return attribute, s   584    585     def _get_attributes_from_table(self, number, s):   586         attributes = []   587         for i in range(0, number):   588             attribute, s = self._get_attribute_from_table(s)   589             attributes.append(attribute)   590         return attributes, s   591    592     def _get_constants(self, s):   593         count = u2(s[0:2])   594         return self._get_constants_from_table(count, s[2:])   595    596     def _get_access_flags(self, s):   597         return u2(s[0:2]), s[2:]   598    599     def _get_this_class(self, s):   600         index = u2(s[0:2])   601         return self.constants[index - 1], s[2:]   602    603     _get_super_class = _get_this_class   604    605     def _get_interfaces(self, s):   606         interfaces = []   607         number = u2(s[0:2])   608         s = s[2:]   609         for i in range(0, number):   610             index = u2(s[0:2])   611             interfaces.append(self.constants[index - 1])   612             s = s[2:]   613         return interfaces, s   614    615     def _get_fields(self, s):   616         number = u2(s[0:2])   617         return self._get_fields_from_table(number, s[2:])   618    619     def _get_attributes(self, s):   620         number = u2(s[0:2])   621         return self._get_attributes_from_table(number, s[2:])   622    623     def _get_methods(self, s):   624         number = u2(s[0:2])   625         return self._get_methods_from_table(number, s[2:])   626    627 if __name__ == "__main__":   628     import sys   629     f = open(sys.argv[1], "rb")   630     c = ClassFile(f.read())   631     f.close()   632    633 # vim: tabstop=4 expandtab shiftwidth=4