javaclass

classfile.py

33:ef53ab16c02e
2004-11-11 Paul Boddie Added type information to method names.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class PythonMethodUtils:    36     def get_python_name(self):    37         name = self.get_name()    38         if str(name) == "<init>":    39             name = "__init__"    40         else:    41             name = str(name)    42         return name + "$" + self._get_descriptor_as_name()    43     44     def _get_descriptor_as_name(self):    45         l = []    46         for descriptor_type in self.get_descriptor()[0]:    47             l.append(self._get_type_as_name(descriptor_type))    48         return "$".join(l)    49     50     def _get_type_as_name(self, descriptor_type, s=""):    51         base_type, object_type, array_type = descriptor_type    52         if base_type == "L":    53             return object_type + s    54         elif base_type == "[":    55             return self._get_type_as_name(array_type, s + "[]")    56         else:    57             return "<" + base_type + ">" + s    58     59 class PythonNameUtils:    60     def get_python_name(self):    61         return self.get_name()    62     63 class NameUtils:    64     def get_name(self):    65         if self.name_index != 0:    66             return self.class_file.constants[self.name_index - 1]    67         else:    68             # Some name indexes are zero to indicate special conditions.    69             return None    70     71 class NameAndTypeUtils:    72     def get_name(self):    73         if self.name_and_type_index != 0:    74             return self.class_file.constants[self.name_and_type_index - 1].get_name()    75         else:    76             # Some name indexes are zero to indicate special conditions.    77             return None    78     79     def get_field_descriptor(self):    80         if self.name_and_type_index != 0:    81             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    82         else:    83             # Some name indexes are zero to indicate special conditions.    84             return None    85     86     def get_method_descriptor(self):    87         if self.name_and_type_index != 0:    88             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()    89         else:    90             # Some name indexes are zero to indicate special conditions.    91             return None    92     93 class DescriptorUtils:    94     95     "Symbol parsing."    96     97     def _get_method_descriptor(self, s):    98         assert s[0] == "("    99         params = []   100         s = s[1:]   101         while s[0] != ")":   102             parameter_descriptor, s = self._get_parameter_descriptor(s)   103             params.append(parameter_descriptor)   104         if s[1] != "V":   105             return_type, s = self._get_field_type(s[1:])   106         else:   107             return_type, s = None, s[1:]   108         return params, return_type   109    110     def _get_parameter_descriptor(self, s):   111         return self._get_field_type(s)   112    113     def _get_field_descriptor(self, s):   114         return self._get_field_type(s)   115    116     def _get_component_type(self, s):   117         return self._get_field_type(s)   118    119     def _get_field_type(self, s):   120         base_type, s = self._get_base_type(s)   121         object_type = None   122         array_type = None   123         if base_type == "L":   124             object_type, s = self._get_object_type(s)   125         elif base_type == "[":   126             array_type, s = self._get_array_type(s)   127         return (base_type, object_type, array_type), s   128    129     def _get_base_type(self, s):   130         if len(s) > 0:   131             return s[0], s[1:]   132         else:   133             return None, s   134    135     def _get_object_type(self, s):   136         if len(s) > 0:   137             s_end = s.find(";")   138             assert s_end != -1   139             return s[:s_end], s[s_end+1:]   140         else:   141             return None, s   142    143     def _get_array_type(self, s):   144         if len(s) > 0:   145             return self._get_component_type(s)   146         else:   147             return None, s   148    149 # Constant information.   150 # Objects of these classes are not directly aware of the class they reside in.   151    152 class ClassInfo(NameUtils, PythonNameUtils):   153     def init(self, data, class_file):   154         self.class_file = class_file   155         self.name_index = u2(data[0:2])   156         return data[2:]   157    158 class RefInfo(NameAndTypeUtils):   159     def init(self, data, class_file):   160         self.class_file = class_file   161         self.class_index = u2(data[0:2])   162         self.name_and_type_index = u2(data[2:4])   163         return data[4:]   164    165 class FieldRefInfo(RefInfo, PythonNameUtils):   166     def get_descriptor(self):   167         return RefInfo.get_field_descriptor(self)   168    169 class MethodRefInfo(RefInfo, PythonMethodUtils):   170     def get_descriptor(self):   171         return RefInfo.get_method_descriptor(self)   172    173 class InterfaceMethodRefInfo(MethodRefInfo):   174     pass   175    176 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonMethodUtils):   177     def init(self, data, class_file):   178         self.class_file = class_file   179         self.name_index = u2(data[0:2])   180         self.descriptor_index = u2(data[2:4])   181         return data[4:]   182    183     def get_field_descriptor(self):   184         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   185    186     def get_method_descriptor(self):   187         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   188    189 class Utf8Info:   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.length = u2(data[0:2])   193         self.bytes = data[2:2+self.length]   194         return data[2+self.length:]   195    196     def __str__(self):   197         return self.bytes   198    199     def __unicode__(self):   200         return unicode(self.bytes, "utf-8")   201    202 class StringInfo:   203     def init(self, data, class_file):   204         self.class_file = class_file   205         self.string_index = u2(data[0:2])   206         return data[2:]   207    208 class SmallNumInfo:   209     def init(self, data, class_file):   210         self.class_file = class_file   211         self.bytes = data[0:4]   212         return data[4:]   213    214 class IntegerInfo(SmallNumInfo):   215     def get_value(self):   216         return s4(self.bytes)   217    218 class FloatInfo(SmallNumInfo):   219     def get_value(self):   220         return f4(self.bytes)   221    222 class LargeNumInfo:   223     def init(self, data, class_file):   224         self.class_file = class_file   225         self.high_bytes = u4(data[0:4])   226         self.low_bytes = u4(data[4:8])   227         return data[8:]   228    229 class LongInfo(LargeNumInfo):   230     def get_value(self):   231         return s8(self.high_bytes + self.low_bytes)   232    233 class DoubleInfo(LargeNumInfo):   234     def get_value(self):   235         return f8(self.high_bytes + self.low_bytes)   236    237 # Other information.   238 # Objects of these classes are generally aware of the class they reside in.   239    240 class ItemInfo(NameUtils, DescriptorUtils, PythonMethodUtils):   241     def init(self, data, class_file):   242         self.class_file = class_file   243         self.access_flags = u2(data[0:2])   244         self.name_index = u2(data[2:4])   245         self.descriptor_index = u2(data[4:6])   246         self.attributes, data = self.class_file._get_attributes(data[6:])   247         return data   248    249 class FieldInfo(ItemInfo):   250     def get_descriptor(self):   251         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   252    253 class MethodInfo(ItemInfo):   254     def get_descriptor(self):   255         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   256    257 class AttributeInfo:   258     def init(self, data, class_file):   259         self.attribute_length = u4(data[0:4])   260         self.info = data[4:4+self.attribute_length]   261         return data[4+self.attribute_length:]   262    263 # NOTE: Decode the different attribute formats.   264    265 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   266     def init(self, data, class_file):   267         self.class_file = class_file   268         self.attribute_length = u4(data[0:4])   269         # Permit the NameUtils mix-in.   270         self.name_index = self.sourcefile_index = u2(data[4:6])   271    272 class ConstantValueAttributeInfo(AttributeInfo):   273     def init(self, data, class_file):   274         self.class_file = class_file   275         self.attribute_length = u4(data[0:4])   276         self.constant_value_index = u2(data[4:6])   277         assert 4+self.attribute_length == 6   278         return data[4+self.attribute_length:]   279    280     def get_value(self):   281         return self.class_file.constants[self.constant_value_index - 1].get_value()   282    283 class CodeAttributeInfo(AttributeInfo):   284     def init(self, data, class_file):   285         self.class_file = class_file   286         self.attribute_length = u4(data[0:4])   287         self.max_stack = u2(data[4:6])   288         self.max_locals = u2(data[6:8])   289         self.code_length = u4(data[8:12])   290         end_of_code = 12+self.code_length   291         self.code = data[12:end_of_code]   292         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   293         self.exception_table = []   294         data = data[end_of_code + 2:]   295         for i in range(0, self.exception_table_length):   296             exception = ExceptionInfo()   297             data = exception.init(data)   298             self.exception_table.append(exception)   299         self.attributes, data = self.class_file._get_attributes(data)   300         return data   301    302 class ExceptionsAttributeInfo(AttributeInfo):   303     def init(self, data, class_file):   304         self.class_file = class_file   305         self.attribute_length = u4(data[0:4])   306         self.number_of_exceptions = u2(data[4:6])   307         self.exception_index_table = []   308         index = 6   309         for i in range(0, self.number_of_exceptions):   310             self.exception_index_table.append(u2(data[index:index+2]))   311             index += 2   312         return data[index:]   313    314     def get_exception(self, i):   315         exception_index = self.exception_index_table[i]   316         return self.class_file.constants[exception_index - 1]   317    318 class InnerClassesAttributeInfo(AttributeInfo):   319     def init(self, data, class_file):   320         self.class_file = class_file   321         self.attribute_length = u4(data[0:4])   322         self.number_of_classes = u2(data[4:6])   323         self.classes = []   324         data = data[6:]   325         for i in range(0, self.number_of_classes):   326             inner_class = InnerClassInfo()   327             data = inner_class.init(data, self.class_file)   328             self.classes.append(inner_class)   329         return data   330    331 class SyntheticAttributeInfo(AttributeInfo):   332     pass   333    334 class LineNumberAttributeInfo(AttributeInfo):   335     def init(self, data, class_file):   336         self.class_file = class_file   337         self.attribute_length = u4(data[0:4])   338         self.line_number_table_length = u2(data[4:6])   339         self.line_number_table = []   340         data = data[6:]   341         for i in range(0, self.line_number_table_length):   342             line_number = LineNumberInfo()   343             data = line_number.init(data)   344             self.line_number_table.append(line_number)   345         return data   346    347 class LocalVariableAttributeInfo(AttributeInfo):   348     def init(self, data, class_file):   349         self.class_file = class_file   350         self.attribute_length = u4(data[0:4])   351         self.local_variable_table_length = u2(data[4:6])   352         self.local_variable_table = []   353         data = data[6:]   354         for i in range(0, self.local_variable_table_length):   355             local_variable = LocalVariableInfo()   356             data = local_variable.init(data)   357             self.local_variable_table.append(local_variable)   358         return data   359    360 class DeprecatedAttributeInfo(AttributeInfo):   361     pass   362    363 # Child classes of the attribute information classes.   364    365 class ExceptionInfo:   366     def init(self, data):   367         self.start_pc = u2(data[0:2])   368         self.end_pc = u2(data[2:4])   369         self.handler_pc = u2(data[4:6])   370         self.catch_type = u2(data[6:8])   371         return data[8:]   372    373 class InnerClassInfo(NameUtils):   374     def init(self, data, class_file):   375         self.class_file = class_file   376         self.inner_class_info_index = u2(data[0:2])   377         self.outer_class_info_index = u2(data[2:4])   378         # Permit the NameUtils mix-in.   379         self.name_index = self.inner_name_index = u2(data[4:6])   380         self.inner_class_access_flags = u2(data[6:8])   381         return data[8:]   382    383 class LineNumberInfo:   384     def init(self, data):   385         self.start_pc = u2(data[0:2])   386         self.line_number = u2(data[2:4])   387         return data[4:]   388    389 class LocalVariableInfo(NameUtils, PythonNameUtils):   390     def init(self, data, class_file):   391         self.class_file = class_file   392         self.start_pc = u2(data[0:2])   393         self.length = u2(data[2:4])   394         self.name_index = u2(data[4:6])   395         self.descriptor_index = u2(data[6:8])   396         self.index = u2(data[8:10])   397         return data[10:]   398    399     def get_descriptor(self):   400         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   401    402 # Exceptions.   403    404 class UnknownTag(Exception):   405     pass   406    407 class UnknownAttribute(Exception):   408     pass   409    410 # Abstractions for the main structures.   411    412 class ClassFile:   413    414     "A class representing a Java class file."   415    416     def __init__(self, s):   417    418         """   419         Process the given string 's', populating the object with the class   420         file's details.   421         """   422    423         self.constants, s = self._get_constants(s[8:])   424         self.access_flags, s = self._get_access_flags(s)   425         self.this_class, s = self._get_this_class(s)   426         self.super_class, s = self._get_super_class(s)   427         self.interfaces, s = self._get_interfaces(s)   428         self.fields, s = self._get_fields(s)   429         self.methods, s = self._get_methods(s)   430         self.attributes, s = self._get_attributes(s)   431    432     def _decode_const(self, s):   433         tag = u1(s[0:1])   434         if tag == 1:   435             const = Utf8Info()   436         elif tag == 3:   437             const = IntegerInfo()   438         elif tag == 4:   439             const = FloatInfo()   440         elif tag == 5:   441             const = LongInfo()   442         elif tag == 6:   443             const = DoubleInfo()   444         elif tag == 7:   445             const = ClassInfo()   446         elif tag == 8:   447             const = StringInfo()   448         elif tag == 9:   449             const = FieldRefInfo()   450         elif tag == 10:   451             const = MethodRefInfo()   452         elif tag == 11:   453             const = InterfaceMethodRefInfo()   454         elif tag == 12:   455             const = NameAndTypeInfo()   456         else:   457             raise UnknownTag, tag   458    459         # Initialise the constant object.   460    461         s = const.init(s[1:], self)   462         return const, s   463    464     def _get_constants_from_table(self, count, s):   465         l = []   466         # Have to skip certain entries specially.   467         i = 1   468         while i < count:   469             c, s = self._decode_const(s)   470             l.append(c)   471             # Add a blank entry after "large" entries.   472             if isinstance(c, LargeNumInfo):   473                 l.append(None)   474                 i += 1   475             i += 1   476         return l, s   477    478     def _get_items_from_table(self, cls, number, s):   479         l = []   480         for i in range(0, number):   481             f = cls()   482             s = f.init(s, self)   483             l.append(f)   484         return l, s   485    486     def _get_methods_from_table(self, number, s):   487         return self._get_items_from_table(MethodInfo, number, s)   488    489     def _get_fields_from_table(self, number, s):   490         return self._get_items_from_table(FieldInfo, number, s)   491    492     def _get_attribute_from_table(self, s):   493         attribute_name_index = u2(s[0:2])   494         constant_name = self.constants[attribute_name_index - 1].bytes   495         if constant_name == "SourceFile":   496             attribute = SourceFileAttributeInfo()   497         elif constant_name == "ConstantValue":   498             attribute = ConstantValueAttributeInfo()   499         elif constant_name == "Code":   500             attribute = CodeAttributeInfo()   501         elif constant_name == "Exceptions":   502             attribute = ExceptionsAttributeInfo()   503         elif constant_name == "InnerClasses":   504             attribute = InnerClassesAttributeInfo()   505         elif constant_name == "Synthetic":   506             attribute = SyntheticAttributeInfo()   507         elif constant_name == "LineNumberTable":   508             attribute = LineNumberAttributeInfo()   509         elif constant_name == "LocalVariableTable":   510             attribute = LocalVariableAttributeInfo()   511         elif constant_name == "Deprecated":   512             attribute = DeprecatedAttributeInfo()   513         else:   514             raise UnknownAttribute, constant_name   515         s = attribute.init(s[2:], self)   516         return attribute, s   517    518     def _get_attributes_from_table(self, number, s):   519         attributes = []   520         for i in range(0, number):   521             attribute, s = self._get_attribute_from_table(s)   522             attributes.append(attribute)   523         return attributes, s   524    525     def _get_constants(self, s):   526         count = u2(s[0:2])   527         return self._get_constants_from_table(count, s[2:])   528    529     def _get_access_flags(self, s):   530         return u2(s[0:2]), s[2:]   531    532     def _get_this_class(self, s):   533         index = u2(s[0:2])   534         return self.constants[index - 1], s[2:]   535    536     _get_super_class = _get_this_class   537    538     def _get_interfaces(self, s):   539         interfaces = []   540         number = u2(s[0:2])   541         s = s[2:]   542         for i in range(0, number):   543             index = u2(s[0:2])   544             interfaces.append(self.constants[index - 1])   545             s = s[2:]   546         return interfaces, s   547    548     def _get_fields(self, s):   549         number = u2(s[0:2])   550         return self._get_fields_from_table(number, s[2:])   551    552     def _get_attributes(self, s):   553         number = u2(s[0:2])   554         return self._get_attributes_from_table(number, s[2:])   555    556     def _get_methods(self, s):   557         number = u2(s[0:2])   558         return self._get_methods_from_table(number, s[2:])   559    560 if __name__ == "__main__":   561     import sys   562     f = open(sys.argv[1])   563     c = ClassFile(f.read())   564    565 # vim: tabstop=4 expandtab shiftwidth=4