javaclass

classfile.py

73:5eb3ab12a1b8
2004-11-21 Paul Boddie Added tentative library implementations.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def s2(data):    19     return struct.unpack(">h", data[0:2])[0]    20     21 def u4(data):    22     return struct.unpack(">L", data[0:4])[0]    23     24 def s4(data):    25     return struct.unpack(">l", data[0:4])[0]    26     27 def s8(data):    28     return struct.unpack(">q", data[0:8])[0]    29     30 def f4(data):    31     return struct.unpack(">f", data[0:4])[0]    32     33 def f8(data):    34     return struct.unpack(">d", data[0:8])[0]    35     36 # Useful tables and constants.    37     38 descriptor_base_type_mapping = {    39     "B" : "int",    40     "C" : "str",    41     "D" : "float",    42     "F" : "float",    43     "I" : "int",    44     "J" : "int",    45     "L" : "object",    46     "S" : "int",    47     "Z" : "bool",    48     "[" : "list"    49     }    50     51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    52 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    53     54 def has_flags(flags, desired):    55     desired_flags = reduce(lambda a, b: a | b, desired, 0)    56     return (flags & desired_flags) == desired_flags    57     58 # Useful mix-ins.    59     60 class PythonMethodUtils:    61     def get_python_name(self):    62         name = self.get_name()    63         if str(name) == "<init>":    64             name = "__init__"    65         elif str(name) == "<clinit>":    66             return "__clinit__"    67         else:    68             name = str(name)    69         return name + "$" + self._get_descriptor_as_name()    70     71     def _get_descriptor_as_name(self):    72         l = []    73         for descriptor_type in self.get_descriptor()[0]:    74             l.append(self._get_type_as_name(descriptor_type))    75         return "$".join(l)    76     77     def _get_type_as_name(self, descriptor_type, s=""):    78         base_type, object_type, array_type = descriptor_type    79         if base_type == "L":    80             return object_type + s    81         elif base_type == "[":    82             return self._get_type_as_name(array_type, s + "[]")    83         else:    84             return "<" + base_type + ">" + s    85     86 class PythonNameUtils:    87     def get_python_name(self):    88         # NOTE: This may not be comprehensive.    89         if not str(self.get_name()).startswith("["):    90             return str(self.get_name()).replace("/", ".")    91         else:    92             return self._get_type_name(    93                 get_field_descriptor(    94                     str(self.get_name())    95                     )    96                 ).replace("/", ".")    97     98     def _get_type_name(self, descriptor_type):    99         base_type, object_type, array_type = descriptor_type   100         if base_type == "L":   101             return object_type   102         elif base_type == "[":   103             return self._get_type_name(array_type)   104         else:   105             return descriptor_base_type_mapping[base_type]   106    107 class NameUtils:   108     def get_name(self):   109         if self.name_index != 0:   110             return self.class_file.constants[self.name_index - 1]   111         else:   112             # Some name indexes are zero to indicate special conditions.   113             return None   114    115 class NameAndTypeUtils:   116     def get_name(self):   117         if self.name_and_type_index != 0:   118             return self.class_file.constants[self.name_and_type_index - 1].get_name()   119         else:   120             # Some name indexes are zero to indicate special conditions.   121             return None   122    123     def get_field_descriptor(self):   124         if self.name_and_type_index != 0:   125             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   126         else:   127             # Some name indexes are zero to indicate special conditions.   128             return None   129    130     def get_method_descriptor(self):   131         if self.name_and_type_index != 0:   132             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   133         else:   134             # Some name indexes are zero to indicate special conditions.   135             return None   136    137     def get_class(self):   138         return self.class_file.constants[self.class_index - 1]   139    140 # Symbol parsing.   141    142 def get_method_descriptor(s):   143     assert s[0] == "("   144     params = []   145     s = s[1:]   146     while s[0] != ")":   147         parameter_descriptor, s = _get_parameter_descriptor(s)   148         params.append(parameter_descriptor)   149     if s[1] != "V":   150         return_type, s = _get_field_type(s[1:])   151     else:   152         return_type, s = None, s[1:]   153     return params, return_type   154    155 def get_field_descriptor(s):   156     return _get_field_type(s)[0]   157    158 def _get_parameter_descriptor(s):   159     return _get_field_type(s)   160    161 def _get_component_type(s):   162     return _get_field_type(s)   163    164 def _get_field_type(s):   165     base_type, s = _get_base_type(s)   166     object_type = None   167     array_type = None   168     if base_type == "L":   169         object_type, s = _get_object_type(s)   170     elif base_type == "[":   171         array_type, s = _get_array_type(s)   172     return (base_type, object_type, array_type), s   173    174 def _get_base_type(s):   175     if len(s) > 0:   176         return s[0], s[1:]   177     else:   178         return None, s   179    180 def _get_object_type(s):   181     if len(s) > 0:   182         s_end = s.find(";")   183         assert s_end != -1   184         return s[:s_end], s[s_end+1:]   185     else:   186         return None, s   187    188 def _get_array_type(s):   189     if len(s) > 0:   190         return _get_component_type(s)   191     else:   192         return None, s   193    194 # Constant information.   195    196 class ClassInfo(NameUtils, PythonNameUtils):   197     def init(self, data, class_file):   198         self.class_file = class_file   199         self.name_index = u2(data[0:2])   200         return data[2:]   201    202 class RefInfo(NameAndTypeUtils):   203     def init(self, data, class_file):   204         self.class_file = class_file   205         self.class_index = u2(data[0:2])   206         self.name_and_type_index = u2(data[2:4])   207         return data[4:]   208    209 class FieldRefInfo(RefInfo, PythonNameUtils):   210     def get_descriptor(self):   211         return RefInfo.get_field_descriptor(self)   212    213 class MethodRefInfo(RefInfo, PythonMethodUtils):   214     def get_descriptor(self):   215         return RefInfo.get_method_descriptor(self)   216    217 class InterfaceMethodRefInfo(MethodRefInfo):   218     pass   219    220 class NameAndTypeInfo(NameUtils, PythonNameUtils):   221     def init(self, data, class_file):   222         self.class_file = class_file   223         self.name_index = u2(data[0:2])   224         self.descriptor_index = u2(data[2:4])   225         return data[4:]   226    227     def get_field_descriptor(self):   228         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   229    230     def get_method_descriptor(self):   231         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   232    233 class Utf8Info:   234     def init(self, data, class_file):   235         self.class_file = class_file   236         self.length = u2(data[0:2])   237         self.bytes = data[2:2+self.length]   238         return data[2+self.length:]   239    240     def __str__(self):   241         return self.bytes   242    243     def __unicode__(self):   244         return unicode(self.bytes, "utf-8")   245    246     def get_value(self):   247         return str(self)   248    249 class StringInfo:   250     def init(self, data, class_file):   251         self.class_file = class_file   252         self.string_index = u2(data[0:2])   253         return data[2:]   254    255     def __str__(self):   256         return str(self.class_file.constants[self.string_index - 1])   257    258     def __unicode__(self):   259         return unicode(self.class_file.constants[self.string_index - 1])   260    261     def get_value(self):   262         return str(self)   263    264 class SmallNumInfo:   265     def init(self, data, class_file):   266         self.class_file = class_file   267         self.bytes = data[0:4]   268         return data[4:]   269    270 class IntegerInfo(SmallNumInfo):   271     def get_value(self):   272         return s4(self.bytes)   273    274 class FloatInfo(SmallNumInfo):   275     def get_value(self):   276         return f4(self.bytes)   277    278 class LargeNumInfo:   279     def init(self, data, class_file):   280         self.class_file = class_file   281         self.high_bytes = data[0:4]   282         self.low_bytes = data[4:8]   283         return data[8:]   284    285 class LongInfo(LargeNumInfo):   286     def get_value(self):   287         return s8(self.high_bytes + self.low_bytes)   288    289 class DoubleInfo(LargeNumInfo):   290     def get_value(self):   291         return f8(self.high_bytes + self.low_bytes)   292    293 # Other information.   294 # Objects of these classes are generally aware of the class they reside in.   295    296 class ItemInfo(NameUtils):   297     def init(self, data, class_file):   298         self.class_file = class_file   299         self.access_flags = u2(data[0:2])   300         self.name_index = u2(data[2:4])   301         self.descriptor_index = u2(data[4:6])   302         self.attributes, data = self.class_file._get_attributes(data[6:])   303         return data   304    305 class FieldInfo(ItemInfo, PythonNameUtils):   306     def get_descriptor(self):   307         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   308    309 class MethodInfo(ItemInfo, PythonMethodUtils):   310     def get_descriptor(self):   311         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   312    313 class AttributeInfo:   314     def init(self, data, class_file):   315         self.attribute_length = u4(data[0:4])   316         self.info = data[4:4+self.attribute_length]   317         return data[4+self.attribute_length:]   318    319 # NOTE: Decode the different attribute formats.   320    321 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   322     def init(self, data, class_file):   323         self.class_file = class_file   324         self.attribute_length = u4(data[0:4])   325         # Permit the NameUtils mix-in.   326         self.name_index = self.sourcefile_index = u2(data[4:6])   327         return data[6:]   328    329 class ConstantValueAttributeInfo(AttributeInfo):   330     def init(self, data, class_file):   331         self.class_file = class_file   332         self.attribute_length = u4(data[0:4])   333         self.constant_value_index = u2(data[4:6])   334         assert 4+self.attribute_length == 6   335         return data[4+self.attribute_length:]   336    337     def get_value(self):   338         return self.class_file.constants[self.constant_value_index - 1].get_value()   339    340 class CodeAttributeInfo(AttributeInfo):   341     def init(self, data, class_file):   342         self.class_file = class_file   343         self.attribute_length = u4(data[0:4])   344         self.max_stack = u2(data[4:6])   345         self.max_locals = u2(data[6:8])   346         self.code_length = u4(data[8:12])   347         end_of_code = 12+self.code_length   348         self.code = data[12:end_of_code]   349         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   350         self.exception_table = []   351         data = data[end_of_code + 2:]   352         for i in range(0, self.exception_table_length):   353             exception = ExceptionInfo()   354             data = exception.init(data)   355             self.exception_table.append(exception)   356         self.attributes, data = self.class_file._get_attributes(data)   357         return data   358    359 class ExceptionsAttributeInfo(AttributeInfo):   360     def init(self, data, class_file):   361         self.class_file = class_file   362         self.attribute_length = u4(data[0:4])   363         self.number_of_exceptions = u2(data[4:6])   364         self.exception_index_table = []   365         index = 6   366         for i in range(0, self.number_of_exceptions):   367             self.exception_index_table.append(u2(data[index:index+2]))   368             index += 2   369         return data[index:]   370    371     def get_exception(self, i):   372         exception_index = self.exception_index_table[i]   373         return self.class_file.constants[exception_index - 1]   374    375 class InnerClassesAttributeInfo(AttributeInfo):   376     def init(self, data, class_file):   377         self.class_file = class_file   378         self.attribute_length = u4(data[0:4])   379         self.number_of_classes = u2(data[4:6])   380         self.classes = []   381         data = data[6:]   382         for i in range(0, self.number_of_classes):   383             inner_class = InnerClassInfo()   384             data = inner_class.init(data, self.class_file)   385             self.classes.append(inner_class)   386         return data   387    388 class SyntheticAttributeInfo(AttributeInfo):   389     pass   390    391 class LineNumberAttributeInfo(AttributeInfo):   392     def init(self, data, class_file):   393         self.class_file = class_file   394         self.attribute_length = u4(data[0:4])   395         self.line_number_table_length = u2(data[4:6])   396         self.line_number_table = []   397         data = data[6:]   398         for i in range(0, self.line_number_table_length):   399             line_number = LineNumberInfo()   400             data = line_number.init(data)   401             self.line_number_table.append(line_number)   402         return data   403    404 class LocalVariableAttributeInfo(AttributeInfo):   405     def init(self, data, class_file):   406         self.class_file = class_file   407         self.attribute_length = u4(data[0:4])   408         self.local_variable_table_length = u2(data[4:6])   409         self.local_variable_table = []   410         data = data[6:]   411         for i in range(0, self.local_variable_table_length):   412             local_variable = LocalVariableInfo()   413             data = local_variable.init(data, self.class_file)   414             self.local_variable_table.append(local_variable)   415         return data   416    417 class DeprecatedAttributeInfo(AttributeInfo):   418     pass   419    420 # Child classes of the attribute information classes.   421    422 class ExceptionInfo:   423     def init(self, data):   424         self.start_pc = u2(data[0:2])   425         self.end_pc = u2(data[2:4])   426         self.handler_pc = u2(data[4:6])   427         self.catch_type = u2(data[6:8])   428         return data[8:]   429    430 class InnerClassInfo(NameUtils):   431     def init(self, data, class_file):   432         self.class_file = class_file   433         self.inner_class_info_index = u2(data[0:2])   434         self.outer_class_info_index = u2(data[2:4])   435         # Permit the NameUtils mix-in.   436         self.name_index = self.inner_name_index = u2(data[4:6])   437         self.inner_class_access_flags = u2(data[6:8])   438         return data[8:]   439    440 class LineNumberInfo:   441     def init(self, data):   442         self.start_pc = u2(data[0:2])   443         self.line_number = u2(data[2:4])   444         return data[4:]   445    446 class LocalVariableInfo(NameUtils, PythonNameUtils):   447     def init(self, data, class_file):   448         self.class_file = class_file   449         self.start_pc = u2(data[0:2])   450         self.length = u2(data[2:4])   451         self.name_index = u2(data[4:6])   452         self.descriptor_index = u2(data[6:8])   453         self.index = u2(data[8:10])   454         return data[10:]   455    456     def get_descriptor(self):   457         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   458    459 # Exceptions.   460    461 class UnknownTag(Exception):   462     pass   463    464 class UnknownAttribute(Exception):   465     pass   466    467 # Abstractions for the main structures.   468    469 class ClassFile:   470    471     "A class representing a Java class file."   472    473     def __init__(self, s):   474    475         """   476         Process the given string 's', populating the object with the class   477         file's details.   478         """   479    480         self.constants, s = self._get_constants(s[8:])   481         self.access_flags, s = self._get_access_flags(s)   482         self.this_class, s = self._get_this_class(s)   483         self.super_class, s = self._get_super_class(s)   484         self.interfaces, s = self._get_interfaces(s)   485         self.fields, s = self._get_fields(s)   486         self.methods, s = self._get_methods(s)   487         self.attributes, s = self._get_attributes(s)   488    489     def _decode_const(self, s):   490         tag = u1(s[0:1])   491         if tag == 1:   492             const = Utf8Info()   493         elif tag == 3:   494             const = IntegerInfo()   495         elif tag == 4:   496             const = FloatInfo()   497         elif tag == 5:   498             const = LongInfo()   499         elif tag == 6:   500             const = DoubleInfo()   501         elif tag == 7:   502             const = ClassInfo()   503         elif tag == 8:   504             const = StringInfo()   505         elif tag == 9:   506             const = FieldRefInfo()   507         elif tag == 10:   508             const = MethodRefInfo()   509         elif tag == 11:   510             const = InterfaceMethodRefInfo()   511         elif tag == 12:   512             const = NameAndTypeInfo()   513         else:   514             raise UnknownTag, tag   515    516         # Initialise the constant object.   517    518         s = const.init(s[1:], self)   519         return const, s   520    521     def _get_constants_from_table(self, count, s):   522         l = []   523         # Have to skip certain entries specially.   524         i = 1   525         while i < count:   526             c, s = self._decode_const(s)   527             l.append(c)   528             # Add a blank entry after "large" entries.   529             if isinstance(c, LargeNumInfo):   530                 l.append(None)   531                 i += 1   532             i += 1   533         return l, s   534    535     def _get_items_from_table(self, cls, number, s):   536         l = []   537         for i in range(0, number):   538             f = cls()   539             s = f.init(s, self)   540             l.append(f)   541         return l, s   542    543     def _get_methods_from_table(self, number, s):   544         return self._get_items_from_table(MethodInfo, number, s)   545    546     def _get_fields_from_table(self, number, s):   547         return self._get_items_from_table(FieldInfo, number, s)   548    549     def _get_attribute_from_table(self, s):   550         attribute_name_index = u2(s[0:2])   551         constant_name = self.constants[attribute_name_index - 1].bytes   552         if constant_name == "SourceFile":   553             attribute = SourceFileAttributeInfo()   554         elif constant_name == "ConstantValue":   555             attribute = ConstantValueAttributeInfo()   556         elif constant_name == "Code":   557             attribute = CodeAttributeInfo()   558         elif constant_name == "Exceptions":   559             attribute = ExceptionsAttributeInfo()   560         elif constant_name == "InnerClasses":   561             attribute = InnerClassesAttributeInfo()   562         elif constant_name == "Synthetic":   563             attribute = SyntheticAttributeInfo()   564         elif constant_name == "LineNumberTable":   565             attribute = LineNumberAttributeInfo()   566         elif constant_name == "LocalVariableTable":   567             attribute = LocalVariableAttributeInfo()   568         elif constant_name == "Deprecated":   569             attribute = DeprecatedAttributeInfo()   570         else:   571             raise UnknownAttribute, constant_name   572         s = attribute.init(s[2:], self)   573         return attribute, s   574    575     def _get_attributes_from_table(self, number, s):   576         attributes = []   577         for i in range(0, number):   578             attribute, s = self._get_attribute_from_table(s)   579             attributes.append(attribute)   580         return attributes, s   581    582     def _get_constants(self, s):   583         count = u2(s[0:2])   584         return self._get_constants_from_table(count, s[2:])   585    586     def _get_access_flags(self, s):   587         return u2(s[0:2]), s[2:]   588    589     def _get_this_class(self, s):   590         index = u2(s[0:2])   591         return self.constants[index - 1], s[2:]   592    593     _get_super_class = _get_this_class   594    595     def _get_interfaces(self, s):   596         interfaces = []   597         number = u2(s[0:2])   598         s = s[2:]   599         for i in range(0, number):   600             index = u2(s[0:2])   601             interfaces.append(self.constants[index - 1])   602             s = s[2:]   603         return interfaces, s   604    605     def _get_fields(self, s):   606         number = u2(s[0:2])   607         return self._get_fields_from_table(number, s[2:])   608    609     def _get_attributes(self, s):   610         number = u2(s[0:2])   611         return self._get_attributes_from_table(number, s[2:])   612    613     def _get_methods(self, s):   614         number = u2(s[0:2])   615         return self._get_methods_from_table(number, s[2:])   616    617 if __name__ == "__main__":   618     import sys   619     f = open(sys.argv[1])   620     c = ClassFile(f.read())   621    622 # vim: tabstop=4 expandtab shiftwidth=4