javaclass

classfile.py

68:318c92a01ff6
2004-11-21 Paul Boddie Added workarounds so that exceptions may be represented by new-style classes, too.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def s2(data):    19     return struct.unpack(">h", data[0:2])[0]    20     21 def u4(data):    22     return struct.unpack(">L", data[0:4])[0]    23     24 def s4(data):    25     return struct.unpack(">l", data[0:4])[0]    26     27 def s8(data):    28     return struct.unpack(">q", data[0:8])[0]    29     30 def f4(data):    31     return struct.unpack(">f", data[0:4])[0]    32     33 def f8(data):    34     return struct.unpack(">d", data[0:8])[0]    35     36 # Useful tables and constants.    37     38 descriptor_base_type_mapping = {    39     "B" : "int",    40     "C" : "str",    41     "D" : "float",    42     "F" : "float",    43     "I" : "int",    44     "J" : "int",    45     "L" : "object",    46     "S" : "int",    47     "Z" : "bool",    48     "[" : "list"    49     }    50     51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    52 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    53     54 def has_flags(flags, desired):    55     desired_flags = reduce(lambda a, b: a | b, desired, 0)    56     return (flags & desired_flags) == desired_flags    57     58 # Useful mix-ins.    59     60 class PythonMethodUtils:    61     def get_python_name(self):    62         name = self.get_name()    63         if str(name) == "<init>":    64             name = "__init__"    65         elif str(name) == "<clinit>":    66             return "__clinit__"    67         else:    68             name = str(name)    69         return name + "$" + self._get_descriptor_as_name()    70     71     def _get_descriptor_as_name(self):    72         l = []    73         for descriptor_type in self.get_descriptor()[0]:    74             l.append(self._get_type_as_name(descriptor_type))    75         return "$".join(l)    76     77     def _get_type_as_name(self, descriptor_type, s=""):    78         base_type, object_type, array_type = descriptor_type    79         if base_type == "L":    80             return object_type + s    81         elif base_type == "[":    82             return self._get_type_as_name(array_type, s + "[]")    83         else:    84             return "<" + base_type + ">" + s    85     86 class PythonNameUtils:    87     def get_python_name(self):    88         return str(self.get_name()).replace("/", ".")    89     90 class NameUtils:    91     def get_name(self):    92         if self.name_index != 0:    93             return self.class_file.constants[self.name_index - 1]    94         else:    95             # Some name indexes are zero to indicate special conditions.    96             return None    97     98 class NameAndTypeUtils:    99     def get_name(self):   100         if self.name_and_type_index != 0:   101             return self.class_file.constants[self.name_and_type_index - 1].get_name()   102         else:   103             # Some name indexes are zero to indicate special conditions.   104             return None   105    106     def get_field_descriptor(self):   107         if self.name_and_type_index != 0:   108             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   109         else:   110             # Some name indexes are zero to indicate special conditions.   111             return None   112    113     def get_method_descriptor(self):   114         if self.name_and_type_index != 0:   115             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   116         else:   117             # Some name indexes are zero to indicate special conditions.   118             return None   119    120     def get_class(self):   121         return self.class_file.constants[self.class_index - 1]   122    123 class DescriptorUtils:   124    125     "Symbol parsing."   126    127     def _get_method_descriptor(self, s):   128         assert s[0] == "("   129         params = []   130         s = s[1:]   131         while s[0] != ")":   132             parameter_descriptor, s = self._get_parameter_descriptor(s)   133             params.append(parameter_descriptor)   134         if s[1] != "V":   135             return_type, s = self._get_field_type(s[1:])   136         else:   137             return_type, s = None, s[1:]   138         return params, return_type   139    140     def _get_parameter_descriptor(self, s):   141         return self._get_field_type(s)   142    143     def _get_field_descriptor(self, s):   144         return self._get_field_type(s)   145    146     def _get_component_type(self, s):   147         return self._get_field_type(s)   148    149     def _get_field_type(self, s):   150         base_type, s = self._get_base_type(s)   151         object_type = None   152         array_type = None   153         if base_type == "L":   154             object_type, s = self._get_object_type(s)   155         elif base_type == "[":   156             array_type, s = self._get_array_type(s)   157         return (base_type, object_type, array_type), s   158    159     def _get_base_type(self, s):   160         if len(s) > 0:   161             return s[0], s[1:]   162         else:   163             return None, s   164    165     def _get_object_type(self, s):   166         if len(s) > 0:   167             s_end = s.find(";")   168             assert s_end != -1   169             return s[:s_end], s[s_end+1:]   170         else:   171             return None, s   172    173     def _get_array_type(self, s):   174         if len(s) > 0:   175             return self._get_component_type(s)   176         else:   177             return None, s   178    179 # Constant information.   180 # Objects of these classes are not directly aware of the class they reside in.   181    182 class ClassInfo(NameUtils, PythonNameUtils):   183     def init(self, data, class_file):   184         self.class_file = class_file   185         self.name_index = u2(data[0:2])   186         return data[2:]   187    188 class RefInfo(NameAndTypeUtils):   189     def init(self, data, class_file):   190         self.class_file = class_file   191         self.class_index = u2(data[0:2])   192         self.name_and_type_index = u2(data[2:4])   193         return data[4:]   194    195 class FieldRefInfo(RefInfo, PythonNameUtils):   196     def get_descriptor(self):   197         return RefInfo.get_field_descriptor(self)   198    199 class MethodRefInfo(RefInfo, PythonMethodUtils):   200     def get_descriptor(self):   201         return RefInfo.get_method_descriptor(self)   202    203 class InterfaceMethodRefInfo(MethodRefInfo):   204     pass   205    206 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils):   207     def init(self, data, class_file):   208         self.class_file = class_file   209         self.name_index = u2(data[0:2])   210         self.descriptor_index = u2(data[2:4])   211         return data[4:]   212    213     def get_field_descriptor(self):   214         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   215    216     def get_method_descriptor(self):   217         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   218    219 class Utf8Info:   220     def init(self, data, class_file):   221         self.class_file = class_file   222         self.length = u2(data[0:2])   223         self.bytes = data[2:2+self.length]   224         return data[2+self.length:]   225    226     def __str__(self):   227         return self.bytes   228    229     def __unicode__(self):   230         return unicode(self.bytes, "utf-8")   231    232     def get_value(self):   233         return str(self)   234    235 class StringInfo:   236     def init(self, data, class_file):   237         self.class_file = class_file   238         self.string_index = u2(data[0:2])   239         return data[2:]   240    241     def __str__(self):   242         return str(self.class_file.constants[self.string_index - 1])   243    244     def __unicode__(self):   245         return unicode(self.class_file.constants[self.string_index - 1])   246    247     def get_value(self):   248         return str(self)   249    250 class SmallNumInfo:   251     def init(self, data, class_file):   252         self.class_file = class_file   253         self.bytes = data[0:4]   254         return data[4:]   255    256 class IntegerInfo(SmallNumInfo):   257     def get_value(self):   258         return s4(self.bytes)   259    260 class FloatInfo(SmallNumInfo):   261     def get_value(self):   262         return f4(self.bytes)   263    264 class LargeNumInfo:   265     def init(self, data, class_file):   266         self.class_file = class_file   267         self.high_bytes = data[0:4]   268         self.low_bytes = data[4:8]   269         return data[8:]   270    271 class LongInfo(LargeNumInfo):   272     def get_value(self):   273         return s8(self.high_bytes + self.low_bytes)   274    275 class DoubleInfo(LargeNumInfo):   276     def get_value(self):   277         return f8(self.high_bytes + self.low_bytes)   278    279 # Other information.   280 # Objects of these classes are generally aware of the class they reside in.   281    282 class ItemInfo(NameUtils, DescriptorUtils):   283     def init(self, data, class_file):   284         self.class_file = class_file   285         self.access_flags = u2(data[0:2])   286         self.name_index = u2(data[2:4])   287         self.descriptor_index = u2(data[4:6])   288         self.attributes, data = self.class_file._get_attributes(data[6:])   289         return data   290    291 class FieldInfo(ItemInfo, PythonNameUtils):   292     def get_descriptor(self):   293         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   294    295 class MethodInfo(ItemInfo, PythonMethodUtils):   296     def get_descriptor(self):   297         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   298    299 class AttributeInfo:   300     def init(self, data, class_file):   301         self.attribute_length = u4(data[0:4])   302         self.info = data[4:4+self.attribute_length]   303         return data[4+self.attribute_length:]   304    305 # NOTE: Decode the different attribute formats.   306    307 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   308     def init(self, data, class_file):   309         self.class_file = class_file   310         self.attribute_length = u4(data[0:4])   311         # Permit the NameUtils mix-in.   312         self.name_index = self.sourcefile_index = u2(data[4:6])   313         return data[6:]   314    315 class ConstantValueAttributeInfo(AttributeInfo):   316     def init(self, data, class_file):   317         self.class_file = class_file   318         self.attribute_length = u4(data[0:4])   319         self.constant_value_index = u2(data[4:6])   320         assert 4+self.attribute_length == 6   321         return data[4+self.attribute_length:]   322    323     def get_value(self):   324         return self.class_file.constants[self.constant_value_index - 1].get_value()   325    326 class CodeAttributeInfo(AttributeInfo):   327     def init(self, data, class_file):   328         self.class_file = class_file   329         self.attribute_length = u4(data[0:4])   330         self.max_stack = u2(data[4:6])   331         self.max_locals = u2(data[6:8])   332         self.code_length = u4(data[8:12])   333         end_of_code = 12+self.code_length   334         self.code = data[12:end_of_code]   335         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   336         self.exception_table = []   337         data = data[end_of_code + 2:]   338         for i in range(0, self.exception_table_length):   339             exception = ExceptionInfo()   340             data = exception.init(data)   341             self.exception_table.append(exception)   342         self.attributes, data = self.class_file._get_attributes(data)   343         return data   344    345 class ExceptionsAttributeInfo(AttributeInfo):   346     def init(self, data, class_file):   347         self.class_file = class_file   348         self.attribute_length = u4(data[0:4])   349         self.number_of_exceptions = u2(data[4:6])   350         self.exception_index_table = []   351         index = 6   352         for i in range(0, self.number_of_exceptions):   353             self.exception_index_table.append(u2(data[index:index+2]))   354             index += 2   355         return data[index:]   356    357     def get_exception(self, i):   358         exception_index = self.exception_index_table[i]   359         return self.class_file.constants[exception_index - 1]   360    361 class InnerClassesAttributeInfo(AttributeInfo):   362     def init(self, data, class_file):   363         self.class_file = class_file   364         self.attribute_length = u4(data[0:4])   365         self.number_of_classes = u2(data[4:6])   366         self.classes = []   367         data = data[6:]   368         for i in range(0, self.number_of_classes):   369             inner_class = InnerClassInfo()   370             data = inner_class.init(data, self.class_file)   371             self.classes.append(inner_class)   372         return data   373    374 class SyntheticAttributeInfo(AttributeInfo):   375     pass   376    377 class LineNumberAttributeInfo(AttributeInfo):   378     def init(self, data, class_file):   379         self.class_file = class_file   380         self.attribute_length = u4(data[0:4])   381         self.line_number_table_length = u2(data[4:6])   382         self.line_number_table = []   383         data = data[6:]   384         for i in range(0, self.line_number_table_length):   385             line_number = LineNumberInfo()   386             data = line_number.init(data)   387             self.line_number_table.append(line_number)   388         return data   389    390 class LocalVariableAttributeInfo(AttributeInfo):   391     def init(self, data, class_file):   392         self.class_file = class_file   393         self.attribute_length = u4(data[0:4])   394         self.local_variable_table_length = u2(data[4:6])   395         self.local_variable_table = []   396         data = data[6:]   397         for i in range(0, self.local_variable_table_length):   398             local_variable = LocalVariableInfo()   399             data = local_variable.init(data, self.class_file)   400             self.local_variable_table.append(local_variable)   401         return data   402    403 class DeprecatedAttributeInfo(AttributeInfo):   404     pass   405    406 # Child classes of the attribute information classes.   407    408 class ExceptionInfo:   409     def init(self, data):   410         self.start_pc = u2(data[0:2])   411         self.end_pc = u2(data[2:4])   412         self.handler_pc = u2(data[4:6])   413         self.catch_type = u2(data[6:8])   414         return data[8:]   415    416 class InnerClassInfo(NameUtils):   417     def init(self, data, class_file):   418         self.class_file = class_file   419         self.inner_class_info_index = u2(data[0:2])   420         self.outer_class_info_index = u2(data[2:4])   421         # Permit the NameUtils mix-in.   422         self.name_index = self.inner_name_index = u2(data[4:6])   423         self.inner_class_access_flags = u2(data[6:8])   424         return data[8:]   425    426 class LineNumberInfo:   427     def init(self, data):   428         self.start_pc = u2(data[0:2])   429         self.line_number = u2(data[2:4])   430         return data[4:]   431    432 class LocalVariableInfo(NameUtils, PythonNameUtils):   433     def init(self, data, class_file):   434         self.class_file = class_file   435         self.start_pc = u2(data[0:2])   436         self.length = u2(data[2:4])   437         self.name_index = u2(data[4:6])   438         self.descriptor_index = u2(data[6:8])   439         self.index = u2(data[8:10])   440         return data[10:]   441    442     def get_descriptor(self):   443         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   444    445 # Exceptions.   446    447 class UnknownTag(Exception):   448     pass   449    450 class UnknownAttribute(Exception):   451     pass   452    453 # Abstractions for the main structures.   454    455 class ClassFile:   456    457     "A class representing a Java class file."   458    459     def __init__(self, s):   460    461         """   462         Process the given string 's', populating the object with the class   463         file's details.   464         """   465    466         self.constants, s = self._get_constants(s[8:])   467         self.access_flags, s = self._get_access_flags(s)   468         self.this_class, s = self._get_this_class(s)   469         self.super_class, s = self._get_super_class(s)   470         self.interfaces, s = self._get_interfaces(s)   471         self.fields, s = self._get_fields(s)   472         self.methods, s = self._get_methods(s)   473         self.attributes, s = self._get_attributes(s)   474    475     def _decode_const(self, s):   476         tag = u1(s[0:1])   477         if tag == 1:   478             const = Utf8Info()   479         elif tag == 3:   480             const = IntegerInfo()   481         elif tag == 4:   482             const = FloatInfo()   483         elif tag == 5:   484             const = LongInfo()   485         elif tag == 6:   486             const = DoubleInfo()   487         elif tag == 7:   488             const = ClassInfo()   489         elif tag == 8:   490             const = StringInfo()   491         elif tag == 9:   492             const = FieldRefInfo()   493         elif tag == 10:   494             const = MethodRefInfo()   495         elif tag == 11:   496             const = InterfaceMethodRefInfo()   497         elif tag == 12:   498             const = NameAndTypeInfo()   499         else:   500             raise UnknownTag, tag   501    502         # Initialise the constant object.   503    504         s = const.init(s[1:], self)   505         return const, s   506    507     def _get_constants_from_table(self, count, s):   508         l = []   509         # Have to skip certain entries specially.   510         i = 1   511         while i < count:   512             c, s = self._decode_const(s)   513             l.append(c)   514             # Add a blank entry after "large" entries.   515             if isinstance(c, LargeNumInfo):   516                 l.append(None)   517                 i += 1   518             i += 1   519         return l, s   520    521     def _get_items_from_table(self, cls, number, s):   522         l = []   523         for i in range(0, number):   524             f = cls()   525             s = f.init(s, self)   526             l.append(f)   527         return l, s   528    529     def _get_methods_from_table(self, number, s):   530         return self._get_items_from_table(MethodInfo, number, s)   531    532     def _get_fields_from_table(self, number, s):   533         return self._get_items_from_table(FieldInfo, number, s)   534    535     def _get_attribute_from_table(self, s):   536         attribute_name_index = u2(s[0:2])   537         constant_name = self.constants[attribute_name_index - 1].bytes   538         if constant_name == "SourceFile":   539             attribute = SourceFileAttributeInfo()   540         elif constant_name == "ConstantValue":   541             attribute = ConstantValueAttributeInfo()   542         elif constant_name == "Code":   543             attribute = CodeAttributeInfo()   544         elif constant_name == "Exceptions":   545             attribute = ExceptionsAttributeInfo()   546         elif constant_name == "InnerClasses":   547             attribute = InnerClassesAttributeInfo()   548         elif constant_name == "Synthetic":   549             attribute = SyntheticAttributeInfo()   550         elif constant_name == "LineNumberTable":   551             attribute = LineNumberAttributeInfo()   552         elif constant_name == "LocalVariableTable":   553             attribute = LocalVariableAttributeInfo()   554         elif constant_name == "Deprecated":   555             attribute = DeprecatedAttributeInfo()   556         else:   557             raise UnknownAttribute, constant_name   558         s = attribute.init(s[2:], self)   559         return attribute, s   560    561     def _get_attributes_from_table(self, number, s):   562         attributes = []   563         for i in range(0, number):   564             attribute, s = self._get_attribute_from_table(s)   565             attributes.append(attribute)   566         return attributes, s   567    568     def _get_constants(self, s):   569         count = u2(s[0:2])   570         return self._get_constants_from_table(count, s[2:])   571    572     def _get_access_flags(self, s):   573         return u2(s[0:2]), s[2:]   574    575     def _get_this_class(self, s):   576         index = u2(s[0:2])   577         return self.constants[index - 1], s[2:]   578    579     _get_super_class = _get_this_class   580    581     def _get_interfaces(self, s):   582         interfaces = []   583         number = u2(s[0:2])   584         s = s[2:]   585         for i in range(0, number):   586             index = u2(s[0:2])   587             interfaces.append(self.constants[index - 1])   588             s = s[2:]   589         return interfaces, s   590    591     def _get_fields(self, s):   592         number = u2(s[0:2])   593         return self._get_fields_from_table(number, s[2:])   594    595     def _get_attributes(self, s):   596         number = u2(s[0:2])   597         return self._get_attributes_from_table(number, s[2:])   598    599     def _get_methods(self, s):   600         number = u2(s[0:2])   601         return self._get_methods_from_table(number, s[2:])   602    603 if __name__ == "__main__":   604     import sys   605     f = open(sys.argv[1])   606     c = ClassFile(f.read())   607    608 # vim: tabstop=4 expandtab shiftwidth=4