javaclass

classfile.py

95:055b402634bc
2005-01-09 Paul Boddie Added a method to provide the "unqualified Python name" of methods. This is useful when generating wrappers of existing Python classes where the additional type information usually included in "Python names" is not present in the wrapped method names.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def s2(data):    19     return struct.unpack(">h", data[0:2])[0]    20     21 def u4(data):    22     return struct.unpack(">L", data[0:4])[0]    23     24 def s4(data):    25     return struct.unpack(">l", data[0:4])[0]    26     27 def s8(data):    28     return struct.unpack(">q", data[0:8])[0]    29     30 def f4(data):    31     return struct.unpack(">f", data[0:4])[0]    32     33 def f8(data):    34     return struct.unpack(">d", data[0:8])[0]    35     36 # Useful tables and constants.    37     38 descriptor_base_type_mapping = {    39     "B" : "int",    40     "C" : "str",    41     "D" : "float",    42     "F" : "float",    43     "I" : "int",    44     "J" : "int",    45     "L" : "object",    46     "S" : "int",    47     "Z" : "bool",    48     "[" : "list"    49     }    50     51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    52 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    53     54 def has_flags(flags, desired):    55     desired_flags = reduce(lambda a, b: a | b, desired, 0)    56     return (flags & desired_flags) == desired_flags    57     58 # Useful mix-ins.    59     60 class PythonMethodUtils:    61     symbol_sep = "___" # was "$"    62     type_sep = "__" # replaces "/"    63     array_sep = "_array_" # was "[]"    64     base_seps = ("_", "_") # was "<" and ">"    65     66     def get_unqualified_python_name(self):    67         name = self.get_name()    68         if str(name) == "<init>":    69             return "__init__"    70         elif str(name) == "<clinit>":    71             return "__clinit__"    72         else:    73             return str(name)    74     75     def get_python_name(self):    76         name = self.get_unqualified_python_name()    77         if name == "__clinit__":    78             return name    79         return name + self.symbol_sep + self._get_descriptor_as_name()    80     81     def _get_descriptor_as_name(self):    82         l = []    83         for descriptor_type in self.get_descriptor()[0]:    84             l.append(self._get_type_as_name(descriptor_type))    85         return self.symbol_sep.join(l)    86     87     def _get_type_as_name(self, descriptor_type, s=""):    88         base_type, object_type, array_type = descriptor_type    89         if base_type == "L":    90             return object_type.replace("/", self.type_sep) + s    91         elif base_type == "[":    92             return self._get_type_as_name(array_type, s + self.array_sep)    93         else:    94             return self.base_seps[0] + base_type + self.base_seps[1] + s    95     96 class PythonNameUtils:    97     def get_python_name(self):    98         # NOTE: This may not be comprehensive.    99         if not str(self.get_name()).startswith("["):   100             return str(self.get_name()).replace("/", ".")   101         else:   102             return self._get_type_name(   103                 get_field_descriptor(   104                     str(self.get_name())   105                     )   106                 ).replace("/", ".")   107    108     def _get_type_name(self, descriptor_type):   109         base_type, object_type, array_type = descriptor_type   110         if base_type == "L":   111             return object_type   112         elif base_type == "[":   113             return self._get_type_name(array_type)   114         else:   115             return descriptor_base_type_mapping[base_type]   116    117 class NameUtils:   118     def get_name(self):   119         if self.name_index != 0:   120             return self.class_file.constants[self.name_index - 1]   121         else:   122             # Some name indexes are zero to indicate special conditions.   123             return None   124    125 class NameAndTypeUtils:   126     def get_name(self):   127         if self.name_and_type_index != 0:   128             return self.class_file.constants[self.name_and_type_index - 1].get_name()   129         else:   130             # Some name indexes are zero to indicate special conditions.   131             return None   132    133     def get_field_descriptor(self):   134         if self.name_and_type_index != 0:   135             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   136         else:   137             # Some name indexes are zero to indicate special conditions.   138             return None   139    140     def get_method_descriptor(self):   141         if self.name_and_type_index != 0:   142             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   143         else:   144             # Some name indexes are zero to indicate special conditions.   145             return None   146    147     def get_class(self):   148         return self.class_file.constants[self.class_index - 1]   149    150 # Symbol parsing.   151    152 def get_method_descriptor(s):   153     assert s[0] == "("   154     params = []   155     s = s[1:]   156     while s[0] != ")":   157         parameter_descriptor, s = _get_parameter_descriptor(s)   158         params.append(parameter_descriptor)   159     if s[1] != "V":   160         return_type, s = _get_field_type(s[1:])   161     else:   162         return_type, s = None, s[1:]   163     return params, return_type   164    165 def get_field_descriptor(s):   166     return _get_field_type(s)[0]   167    168 def _get_parameter_descriptor(s):   169     return _get_field_type(s)   170    171 def _get_component_type(s):   172     return _get_field_type(s)   173    174 def _get_field_type(s):   175     base_type, s = _get_base_type(s)   176     object_type = None   177     array_type = None   178     if base_type == "L":   179         object_type, s = _get_object_type(s)   180     elif base_type == "[":   181         array_type, s = _get_array_type(s)   182     return (base_type, object_type, array_type), s   183    184 def _get_base_type(s):   185     if len(s) > 0:   186         return s[0], s[1:]   187     else:   188         return None, s   189    190 def _get_object_type(s):   191     if len(s) > 0:   192         s_end = s.find(";")   193         assert s_end != -1   194         return s[:s_end], s[s_end+1:]   195     else:   196         return None, s   197    198 def _get_array_type(s):   199     if len(s) > 0:   200         return _get_component_type(s)   201     else:   202         return None, s   203    204 # Constant information.   205    206 class ClassInfo(NameUtils, PythonNameUtils):   207     def init(self, data, class_file):   208         self.class_file = class_file   209         self.name_index = u2(data[0:2])   210         return data[2:]   211    212 class RefInfo(NameAndTypeUtils):   213     def init(self, data, class_file):   214         self.class_file = class_file   215         self.class_index = u2(data[0:2])   216         self.name_and_type_index = u2(data[2:4])   217         return data[4:]   218    219 class FieldRefInfo(RefInfo, PythonNameUtils):   220     def get_descriptor(self):   221         return RefInfo.get_field_descriptor(self)   222    223 class MethodRefInfo(RefInfo, PythonMethodUtils):   224     def get_descriptor(self):   225         return RefInfo.get_method_descriptor(self)   226    227 class InterfaceMethodRefInfo(MethodRefInfo):   228     pass   229    230 class NameAndTypeInfo(NameUtils, PythonNameUtils):   231     def init(self, data, class_file):   232         self.class_file = class_file   233         self.name_index = u2(data[0:2])   234         self.descriptor_index = u2(data[2:4])   235         return data[4:]   236    237     def get_field_descriptor(self):   238         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   239    240     def get_method_descriptor(self):   241         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   242    243 class Utf8Info:   244     def init(self, data, class_file):   245         self.class_file = class_file   246         self.length = u2(data[0:2])   247         self.bytes = data[2:2+self.length]   248         return data[2+self.length:]   249    250     def __str__(self):   251         return self.bytes   252    253     def __unicode__(self):   254         return unicode(self.bytes, "utf-8")   255    256     def get_value(self):   257         return str(self)   258    259 class StringInfo:   260     def init(self, data, class_file):   261         self.class_file = class_file   262         self.string_index = u2(data[0:2])   263         return data[2:]   264    265     def __str__(self):   266         return str(self.class_file.constants[self.string_index - 1])   267    268     def __unicode__(self):   269         return unicode(self.class_file.constants[self.string_index - 1])   270    271     def get_value(self):   272         return str(self)   273    274 class SmallNumInfo:   275     def init(self, data, class_file):   276         self.class_file = class_file   277         self.bytes = data[0:4]   278         return data[4:]   279    280 class IntegerInfo(SmallNumInfo):   281     def get_value(self):   282         return s4(self.bytes)   283    284 class FloatInfo(SmallNumInfo):   285     def get_value(self):   286         return f4(self.bytes)   287    288 class LargeNumInfo:   289     def init(self, data, class_file):   290         self.class_file = class_file   291         self.high_bytes = data[0:4]   292         self.low_bytes = data[4:8]   293         return data[8:]   294    295 class LongInfo(LargeNumInfo):   296     def get_value(self):   297         return s8(self.high_bytes + self.low_bytes)   298    299 class DoubleInfo(LargeNumInfo):   300     def get_value(self):   301         return f8(self.high_bytes + self.low_bytes)   302    303 # Other information.   304 # Objects of these classes are generally aware of the class they reside in.   305    306 class ItemInfo(NameUtils):   307     def init(self, data, class_file):   308         self.class_file = class_file   309         self.access_flags = u2(data[0:2])   310         self.name_index = u2(data[2:4])   311         self.descriptor_index = u2(data[4:6])   312         self.attributes, data = self.class_file._get_attributes(data[6:])   313         return data   314    315 class FieldInfo(ItemInfo, PythonNameUtils):   316     def get_descriptor(self):   317         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   318    319 class MethodInfo(ItemInfo, PythonMethodUtils):   320     def get_descriptor(self):   321         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   322    323 class AttributeInfo:   324     def init(self, data, class_file):   325         self.attribute_length = u4(data[0:4])   326         self.info = data[4:4+self.attribute_length]   327         return data[4+self.attribute_length:]   328    329 # NOTE: Decode the different attribute formats.   330    331 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   332     def init(self, data, class_file):   333         self.class_file = class_file   334         self.attribute_length = u4(data[0:4])   335         # Permit the NameUtils mix-in.   336         self.name_index = self.sourcefile_index = u2(data[4:6])   337         return data[6:]   338    339 class ConstantValueAttributeInfo(AttributeInfo):   340     def init(self, data, class_file):   341         self.class_file = class_file   342         self.attribute_length = u4(data[0:4])   343         self.constant_value_index = u2(data[4:6])   344         assert 4+self.attribute_length == 6   345         return data[4+self.attribute_length:]   346    347     def get_value(self):   348         return self.class_file.constants[self.constant_value_index - 1].get_value()   349    350 class CodeAttributeInfo(AttributeInfo):   351     def init(self, data, class_file):   352         self.class_file = class_file   353         self.attribute_length = u4(data[0:4])   354         self.max_stack = u2(data[4:6])   355         self.max_locals = u2(data[6:8])   356         self.code_length = u4(data[8:12])   357         end_of_code = 12+self.code_length   358         self.code = data[12:end_of_code]   359         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   360         self.exception_table = []   361         data = data[end_of_code + 2:]   362         for i in range(0, self.exception_table_length):   363             exception = ExceptionInfo()   364             data = exception.init(data)   365             self.exception_table.append(exception)   366         self.attributes, data = self.class_file._get_attributes(data)   367         return data   368    369 class ExceptionsAttributeInfo(AttributeInfo):   370     def init(self, data, class_file):   371         self.class_file = class_file   372         self.attribute_length = u4(data[0:4])   373         self.number_of_exceptions = u2(data[4:6])   374         self.exception_index_table = []   375         index = 6   376         for i in range(0, self.number_of_exceptions):   377             self.exception_index_table.append(u2(data[index:index+2]))   378             index += 2   379         return data[index:]   380    381     def get_exception(self, i):   382         exception_index = self.exception_index_table[i]   383         return self.class_file.constants[exception_index - 1]   384    385 class InnerClassesAttributeInfo(AttributeInfo):   386     def init(self, data, class_file):   387         self.class_file = class_file   388         self.attribute_length = u4(data[0:4])   389         self.number_of_classes = u2(data[4:6])   390         self.classes = []   391         data = data[6:]   392         for i in range(0, self.number_of_classes):   393             inner_class = InnerClassInfo()   394             data = inner_class.init(data, self.class_file)   395             self.classes.append(inner_class)   396         return data   397    398 class SyntheticAttributeInfo(AttributeInfo):   399     pass   400    401 class LineNumberAttributeInfo(AttributeInfo):   402     def init(self, data, class_file):   403         self.class_file = class_file   404         self.attribute_length = u4(data[0:4])   405         self.line_number_table_length = u2(data[4:6])   406         self.line_number_table = []   407         data = data[6:]   408         for i in range(0, self.line_number_table_length):   409             line_number = LineNumberInfo()   410             data = line_number.init(data)   411             self.line_number_table.append(line_number)   412         return data   413    414 class LocalVariableAttributeInfo(AttributeInfo):   415     def init(self, data, class_file):   416         self.class_file = class_file   417         self.attribute_length = u4(data[0:4])   418         self.local_variable_table_length = u2(data[4:6])   419         self.local_variable_table = []   420         data = data[6:]   421         for i in range(0, self.local_variable_table_length):   422             local_variable = LocalVariableInfo()   423             data = local_variable.init(data, self.class_file)   424             self.local_variable_table.append(local_variable)   425         return data   426    427 class DeprecatedAttributeInfo(AttributeInfo):   428     pass   429    430 # Child classes of the attribute information classes.   431    432 class ExceptionInfo:   433     def init(self, data):   434         self.start_pc = u2(data[0:2])   435         self.end_pc = u2(data[2:4])   436         self.handler_pc = u2(data[4:6])   437         self.catch_type = u2(data[6:8])   438         return data[8:]   439    440 class InnerClassInfo(NameUtils):   441     def init(self, data, class_file):   442         self.class_file = class_file   443         self.inner_class_info_index = u2(data[0:2])   444         self.outer_class_info_index = u2(data[2:4])   445         # Permit the NameUtils mix-in.   446         self.name_index = self.inner_name_index = u2(data[4:6])   447         self.inner_class_access_flags = u2(data[6:8])   448         return data[8:]   449    450 class LineNumberInfo:   451     def init(self, data):   452         self.start_pc = u2(data[0:2])   453         self.line_number = u2(data[2:4])   454         return data[4:]   455    456 class LocalVariableInfo(NameUtils, PythonNameUtils):   457     def init(self, data, class_file):   458         self.class_file = class_file   459         self.start_pc = u2(data[0:2])   460         self.length = u2(data[2:4])   461         self.name_index = u2(data[4:6])   462         self.descriptor_index = u2(data[6:8])   463         self.index = u2(data[8:10])   464         return data[10:]   465    466     def get_descriptor(self):   467         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   468    469 # Exceptions.   470    471 class UnknownTag(Exception):   472     pass   473    474 class UnknownAttribute(Exception):   475     pass   476    477 # Abstractions for the main structures.   478    479 class ClassFile:   480    481     "A class representing a Java class file."   482    483     def __init__(self, s):   484    485         """   486         Process the given string 's', populating the object with the class   487         file's details.   488         """   489    490         self.constants, s = self._get_constants(s[8:])   491         self.access_flags, s = self._get_access_flags(s)   492         self.this_class, s = self._get_this_class(s)   493         self.super_class, s = self._get_super_class(s)   494         self.interfaces, s = self._get_interfaces(s)   495         self.fields, s = self._get_fields(s)   496         self.methods, s = self._get_methods(s)   497         self.attributes, s = self._get_attributes(s)   498    499     def _decode_const(self, s):   500         tag = u1(s[0:1])   501         if tag == 1:   502             const = Utf8Info()   503         elif tag == 3:   504             const = IntegerInfo()   505         elif tag == 4:   506             const = FloatInfo()   507         elif tag == 5:   508             const = LongInfo()   509         elif tag == 6:   510             const = DoubleInfo()   511         elif tag == 7:   512             const = ClassInfo()   513         elif tag == 8:   514             const = StringInfo()   515         elif tag == 9:   516             const = FieldRefInfo()   517         elif tag == 10:   518             const = MethodRefInfo()   519         elif tag == 11:   520             const = InterfaceMethodRefInfo()   521         elif tag == 12:   522             const = NameAndTypeInfo()   523         else:   524             raise UnknownTag, tag   525    526         # Initialise the constant object.   527    528         s = const.init(s[1:], self)   529         return const, s   530    531     def _get_constants_from_table(self, count, s):   532         l = []   533         # Have to skip certain entries specially.   534         i = 1   535         while i < count:   536             c, s = self._decode_const(s)   537             l.append(c)   538             # Add a blank entry after "large" entries.   539             if isinstance(c, LargeNumInfo):   540                 l.append(None)   541                 i += 1   542             i += 1   543         return l, s   544    545     def _get_items_from_table(self, cls, number, s):   546         l = []   547         for i in range(0, number):   548             f = cls()   549             s = f.init(s, self)   550             l.append(f)   551         return l, s   552    553     def _get_methods_from_table(self, number, s):   554         return self._get_items_from_table(MethodInfo, number, s)   555    556     def _get_fields_from_table(self, number, s):   557         return self._get_items_from_table(FieldInfo, number, s)   558    559     def _get_attribute_from_table(self, s):   560         attribute_name_index = u2(s[0:2])   561         constant_name = self.constants[attribute_name_index - 1].bytes   562         if constant_name == "SourceFile":   563             attribute = SourceFileAttributeInfo()   564         elif constant_name == "ConstantValue":   565             attribute = ConstantValueAttributeInfo()   566         elif constant_name == "Code":   567             attribute = CodeAttributeInfo()   568         elif constant_name == "Exceptions":   569             attribute = ExceptionsAttributeInfo()   570         elif constant_name == "InnerClasses":   571             attribute = InnerClassesAttributeInfo()   572         elif constant_name == "Synthetic":   573             attribute = SyntheticAttributeInfo()   574         elif constant_name == "LineNumberTable":   575             attribute = LineNumberAttributeInfo()   576         elif constant_name == "LocalVariableTable":   577             attribute = LocalVariableAttributeInfo()   578         elif constant_name == "Deprecated":   579             attribute = DeprecatedAttributeInfo()   580         else:   581             raise UnknownAttribute, constant_name   582         s = attribute.init(s[2:], self)   583         return attribute, s   584    585     def _get_attributes_from_table(self, number, s):   586         attributes = []   587         for i in range(0, number):   588             attribute, s = self._get_attribute_from_table(s)   589             attributes.append(attribute)   590         return attributes, s   591    592     def _get_constants(self, s):   593         count = u2(s[0:2])   594         return self._get_constants_from_table(count, s[2:])   595    596     def _get_access_flags(self, s):   597         return u2(s[0:2]), s[2:]   598    599     def _get_this_class(self, s):   600         index = u2(s[0:2])   601         return self.constants[index - 1], s[2:]   602    603     _get_super_class = _get_this_class   604    605     def _get_interfaces(self, s):   606         interfaces = []   607         number = u2(s[0:2])   608         s = s[2:]   609         for i in range(0, number):   610             index = u2(s[0:2])   611             interfaces.append(self.constants[index - 1])   612             s = s[2:]   613         return interfaces, s   614    615     def _get_fields(self, s):   616         number = u2(s[0:2])   617         return self._get_fields_from_table(number, s[2:])   618    619     def _get_attributes(self, s):   620         number = u2(s[0:2])   621         return self._get_attributes_from_table(number, s[2:])   622    623     def _get_methods(self, s):   624         number = u2(s[0:2])   625         return self._get_methods_from_table(number, s[2:])   626    627 if __name__ == "__main__":   628     import sys   629     f = open(sys.argv[1], "rb")   630     c = ClassFile(f.read())   631     f.close()   632    633 # vim: tabstop=4 expandtab shiftwidth=4