javaclass

classfile.py

35:630f6b688af0
2004-11-11 Paul Boddie Fixed invokespecial for foreign object constructors. Moved much of the main program into the ClassTranslator class, introducing trivial aliases for each method name mapping to only one "real" method.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class PythonMethodUtils:    36     def get_python_name(self):    37         name = self.get_name()    38         if str(name) == "<init>":    39             name = "__init__"    40         else:    41             name = str(name)    42         return name + "$" + self._get_descriptor_as_name()    43     44     def _get_descriptor_as_name(self):    45         l = []    46         for descriptor_type in self.get_descriptor()[0]:    47             l.append(self._get_type_as_name(descriptor_type))    48         return "$".join(l)    49     50     def _get_type_as_name(self, descriptor_type, s=""):    51         base_type, object_type, array_type = descriptor_type    52         if base_type == "L":    53             return object_type + s    54         elif base_type == "[":    55             return self._get_type_as_name(array_type, s + "[]")    56         else:    57             return "<" + base_type + ">" + s    58     59 class PythonNameUtils:    60     def get_python_name(self):    61         return self.get_name()    62     63 class NameUtils:    64     def get_name(self):    65         if self.name_index != 0:    66             return self.class_file.constants[self.name_index - 1]    67         else:    68             # Some name indexes are zero to indicate special conditions.    69             return None    70     71 class NameAndTypeUtils:    72     def get_name(self):    73         if self.name_and_type_index != 0:    74             return self.class_file.constants[self.name_and_type_index - 1].get_name()    75         else:    76             # Some name indexes are zero to indicate special conditions.    77             return None    78     79     def get_field_descriptor(self):    80         if self.name_and_type_index != 0:    81             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    82         else:    83             # Some name indexes are zero to indicate special conditions.    84             return None    85     86     def get_method_descriptor(self):    87         if self.name_and_type_index != 0:    88             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()    89         else:    90             # Some name indexes are zero to indicate special conditions.    91             return None    92     93 class DescriptorUtils:    94     95     "Symbol parsing."    96     97     def _get_method_descriptor(self, s):    98         assert s[0] == "("    99         params = []   100         s = s[1:]   101         while s[0] != ")":   102             parameter_descriptor, s = self._get_parameter_descriptor(s)   103             params.append(parameter_descriptor)   104         if s[1] != "V":   105             return_type, s = self._get_field_type(s[1:])   106         else:   107             return_type, s = None, s[1:]   108         return params, return_type   109    110     def _get_parameter_descriptor(self, s):   111         return self._get_field_type(s)   112    113     def _get_field_descriptor(self, s):   114         return self._get_field_type(s)   115    116     def _get_component_type(self, s):   117         return self._get_field_type(s)   118    119     def _get_field_type(self, s):   120         base_type, s = self._get_base_type(s)   121         object_type = None   122         array_type = None   123         if base_type == "L":   124             object_type, s = self._get_object_type(s)   125         elif base_type == "[":   126             array_type, s = self._get_array_type(s)   127         return (base_type, object_type, array_type), s   128    129     def _get_base_type(self, s):   130         if len(s) > 0:   131             return s[0], s[1:]   132         else:   133             return None, s   134    135     def _get_object_type(self, s):   136         if len(s) > 0:   137             s_end = s.find(";")   138             assert s_end != -1   139             return s[:s_end], s[s_end+1:]   140         else:   141             return None, s   142    143     def _get_array_type(self, s):   144         if len(s) > 0:   145             return self._get_component_type(s)   146         else:   147             return None, s   148    149 # Constant information.   150 # Objects of these classes are not directly aware of the class they reside in.   151    152 class ClassInfo(NameUtils, PythonNameUtils):   153     def init(self, data, class_file):   154         self.class_file = class_file   155         self.name_index = u2(data[0:2])   156         return data[2:]   157    158 class RefInfo(NameAndTypeUtils):   159     def init(self, data, class_file):   160         self.class_file = class_file   161         self.class_index = u2(data[0:2])   162         self.name_and_type_index = u2(data[2:4])   163         return data[4:]   164    165 class FieldRefInfo(RefInfo, PythonNameUtils):   166     def get_descriptor(self):   167         return RefInfo.get_field_descriptor(self)   168    169 class MethodRefInfo(RefInfo, PythonMethodUtils):   170     def get_descriptor(self):   171         return RefInfo.get_method_descriptor(self)   172    173 class InterfaceMethodRefInfo(MethodRefInfo):   174     pass   175    176 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils):   177     def init(self, data, class_file):   178         self.class_file = class_file   179         self.name_index = u2(data[0:2])   180         self.descriptor_index = u2(data[2:4])   181         return data[4:]   182    183     def get_field_descriptor(self):   184         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   185    186     def get_method_descriptor(self):   187         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   188    189 class Utf8Info:   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.length = u2(data[0:2])   193         self.bytes = data[2:2+self.length]   194         return data[2+self.length:]   195    196     def __str__(self):   197         return self.bytes   198    199     def __unicode__(self):   200         return unicode(self.bytes, "utf-8")   201    202 class StringInfo:   203     def init(self, data, class_file):   204         self.class_file = class_file   205         self.string_index = u2(data[0:2])   206         return data[2:]   207    208 class SmallNumInfo:   209     def init(self, data, class_file):   210         self.class_file = class_file   211         self.bytes = data[0:4]   212         return data[4:]   213    214 class IntegerInfo(SmallNumInfo):   215     def get_value(self):   216         return s4(self.bytes)   217    218 class FloatInfo(SmallNumInfo):   219     def get_value(self):   220         return f4(self.bytes)   221    222 class LargeNumInfo:   223     def init(self, data, class_file):   224         self.class_file = class_file   225         self.high_bytes = u4(data[0:4])   226         self.low_bytes = u4(data[4:8])   227         return data[8:]   228    229 class LongInfo(LargeNumInfo):   230     def get_value(self):   231         return s8(self.high_bytes + self.low_bytes)   232    233 class DoubleInfo(LargeNumInfo):   234     def get_value(self):   235         return f8(self.high_bytes + self.low_bytes)   236    237 # Other information.   238 # Objects of these classes are generally aware of the class they reside in.   239    240 class ItemInfo(NameUtils, DescriptorUtils):   241     def init(self, data, class_file):   242         self.class_file = class_file   243         self.access_flags = u2(data[0:2])   244         self.name_index = u2(data[2:4])   245         self.descriptor_index = u2(data[4:6])   246         self.attributes, data = self.class_file._get_attributes(data[6:])   247         return data   248    249 class FieldInfo(ItemInfo, PythonNameUtils):   250     def get_descriptor(self):   251         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   252    253 class MethodInfo(ItemInfo, PythonMethodUtils):   254     def get_descriptor(self):   255         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   256    257 class AttributeInfo:   258     def init(self, data, class_file):   259         self.attribute_length = u4(data[0:4])   260         self.info = data[4:4+self.attribute_length]   261         return data[4+self.attribute_length:]   262    263 # NOTE: Decode the different attribute formats.   264    265 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   266     def init(self, data, class_file):   267         self.class_file = class_file   268         self.attribute_length = u4(data[0:4])   269         # Permit the NameUtils mix-in.   270         self.name_index = self.sourcefile_index = u2(data[4:6])   271    272 class ConstantValueAttributeInfo(AttributeInfo):   273     def init(self, data, class_file):   274         self.class_file = class_file   275         self.attribute_length = u4(data[0:4])   276         self.constant_value_index = u2(data[4:6])   277         assert 4+self.attribute_length == 6   278         return data[4+self.attribute_length:]   279    280     def get_value(self):   281         return self.class_file.constants[self.constant_value_index - 1].get_value()   282    283 class CodeAttributeInfo(AttributeInfo):   284     def init(self, data, class_file):   285         self.class_file = class_file   286         self.attribute_length = u4(data[0:4])   287         self.max_stack = u2(data[4:6])   288         self.max_locals = u2(data[6:8])   289         self.code_length = u4(data[8:12])   290         end_of_code = 12+self.code_length   291         self.code = data[12:end_of_code]   292         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   293         self.exception_table = []   294         data = data[end_of_code + 2:]   295         for i in range(0, self.exception_table_length):   296             exception = ExceptionInfo()   297             data = exception.init(data)   298             self.exception_table.append(exception)   299         self.attributes, data = self.class_file._get_attributes(data)   300         return data   301    302 class ExceptionsAttributeInfo(AttributeInfo):   303     def init(self, data, class_file):   304         self.class_file = class_file   305         self.attribute_length = u4(data[0:4])   306         self.number_of_exceptions = u2(data[4:6])   307         self.exception_index_table = []   308         index = 6   309         for i in range(0, self.number_of_exceptions):   310             self.exception_index_table.append(u2(data[index:index+2]))   311             index += 2   312         return data[index:]   313    314     def get_exception(self, i):   315         exception_index = self.exception_index_table[i]   316         return self.class_file.constants[exception_index - 1]   317    318 class InnerClassesAttributeInfo(AttributeInfo):   319     def init(self, data, class_file):   320         self.class_file = class_file   321         self.attribute_length = u4(data[0:4])   322         self.number_of_classes = u2(data[4:6])   323         self.classes = []   324         data = data[6:]   325         for i in range(0, self.number_of_classes):   326             inner_class = InnerClassInfo()   327             data = inner_class.init(data, self.class_file)   328             self.classes.append(inner_class)   329         return data   330    331 class SyntheticAttributeInfo(AttributeInfo):   332     pass   333    334 class LineNumberAttributeInfo(AttributeInfo):   335     def init(self, data, class_file):   336         self.class_file = class_file   337         self.attribute_length = u4(data[0:4])   338         self.line_number_table_length = u2(data[4:6])   339         self.line_number_table = []   340         data = data[6:]   341         for i in range(0, self.line_number_table_length):   342             line_number = LineNumberInfo()   343             data = line_number.init(data)   344             self.line_number_table.append(line_number)   345         return data   346    347 class LocalVariableAttributeInfo(AttributeInfo):   348     def init(self, data, class_file):   349         self.class_file = class_file   350         self.attribute_length = u4(data[0:4])   351         self.local_variable_table_length = u2(data[4:6])   352         self.local_variable_table = []   353         data = data[6:]   354         for i in range(0, self.local_variable_table_length):   355             local_variable = LocalVariableInfo()   356             data = local_variable.init(data)   357             self.local_variable_table.append(local_variable)   358         return data   359    360 class DeprecatedAttributeInfo(AttributeInfo):   361     pass   362    363 # Child classes of the attribute information classes.   364    365 class ExceptionInfo:   366     def init(self, data):   367         self.start_pc = u2(data[0:2])   368         self.end_pc = u2(data[2:4])   369         self.handler_pc = u2(data[4:6])   370         self.catch_type = u2(data[6:8])   371         return data[8:]   372    373 class InnerClassInfo(NameUtils):   374     def init(self, data, class_file):   375         self.class_file = class_file   376         self.inner_class_info_index = u2(data[0:2])   377         self.outer_class_info_index = u2(data[2:4])   378         # Permit the NameUtils mix-in.   379         self.name_index = self.inner_name_index = u2(data[4:6])   380         self.inner_class_access_flags = u2(data[6:8])   381         return data[8:]   382    383 class LineNumberInfo:   384     def init(self, data):   385         self.start_pc = u2(data[0:2])   386         self.line_number = u2(data[2:4])   387         return data[4:]   388    389 class LocalVariableInfo(NameUtils, PythonNameUtils):   390     def init(self, data, class_file):   391         self.class_file = class_file   392         self.start_pc = u2(data[0:2])   393         self.length = u2(data[2:4])   394         self.name_index = u2(data[4:6])   395         self.descriptor_index = u2(data[6:8])   396         self.index = u2(data[8:10])   397         return data[10:]   398    399     def get_descriptor(self):   400         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   401    402 # Exceptions.   403    404 class UnknownTag(Exception):   405     pass   406    407 class UnknownAttribute(Exception):   408     pass   409    410 # Abstractions for the main structures.   411    412 class ClassFile:   413    414     "A class representing a Java class file."   415    416     def __init__(self, s):   417    418         """   419         Process the given string 's', populating the object with the class   420         file's details.   421         """   422    423         self.constants, s = self._get_constants(s[8:])   424         self.access_flags, s = self._get_access_flags(s)   425         self.this_class, s = self._get_this_class(s)   426         self.super_class, s = self._get_super_class(s)   427         self.interfaces, s = self._get_interfaces(s)   428         self.fields, s = self._get_fields(s)   429         self.methods, s = self._get_methods(s)   430         self.attributes, s = self._get_attributes(s)   431    432     def _decode_const(self, s):   433         tag = u1(s[0:1])   434         if tag == 1:   435             const = Utf8Info()   436         elif tag == 3:   437             const = IntegerInfo()   438         elif tag == 4:   439             const = FloatInfo()   440         elif tag == 5:   441             const = LongInfo()   442         elif tag == 6:   443             const = DoubleInfo()   444         elif tag == 7:   445             const = ClassInfo()   446         elif tag == 8:   447             const = StringInfo()   448         elif tag == 9:   449             const = FieldRefInfo()   450         elif tag == 10:   451             const = MethodRefInfo()   452         elif tag == 11:   453             const = InterfaceMethodRefInfo()   454         elif tag == 12:   455             const = NameAndTypeInfo()   456         else:   457             raise UnknownTag, tag   458    459         # Initialise the constant object.   460    461         s = const.init(s[1:], self)   462         return const, s   463    464     def _get_constants_from_table(self, count, s):   465         l = []   466         # Have to skip certain entries specially.   467         i = 1   468         while i < count:   469             c, s = self._decode_const(s)   470             l.append(c)   471             # Add a blank entry after "large" entries.   472             if isinstance(c, LargeNumInfo):   473                 l.append(None)   474                 i += 1   475             i += 1   476         return l, s   477    478     def _get_items_from_table(self, cls, number, s):   479         l = []   480         for i in range(0, number):   481             f = cls()   482             s = f.init(s, self)   483             l.append(f)   484         return l, s   485    486     def _get_methods_from_table(self, number, s):   487         return self._get_items_from_table(MethodInfo, number, s)   488    489     def _get_fields_from_table(self, number, s):   490         return self._get_items_from_table(FieldInfo, number, s)   491    492     def _get_attribute_from_table(self, s):   493         attribute_name_index = u2(s[0:2])   494         constant_name = self.constants[attribute_name_index - 1].bytes   495         if constant_name == "SourceFile":   496             attribute = SourceFileAttributeInfo()   497         elif constant_name == "ConstantValue":   498             attribute = ConstantValueAttributeInfo()   499         elif constant_name == "Code":   500             attribute = CodeAttributeInfo()   501         elif constant_name == "Exceptions":   502             attribute = ExceptionsAttributeInfo()   503         elif constant_name == "InnerClasses":   504             attribute = InnerClassesAttributeInfo()   505         elif constant_name == "Synthetic":   506             attribute = SyntheticAttributeInfo()   507         elif constant_name == "LineNumberTable":   508             attribute = LineNumberAttributeInfo()   509         elif constant_name == "LocalVariableTable":   510             attribute = LocalVariableAttributeInfo()   511         elif constant_name == "Deprecated":   512             attribute = DeprecatedAttributeInfo()   513         else:   514             raise UnknownAttribute, constant_name   515         s = attribute.init(s[2:], self)   516         return attribute, s   517    518     def _get_attributes_from_table(self, number, s):   519         attributes = []   520         for i in range(0, number):   521             attribute, s = self._get_attribute_from_table(s)   522             attributes.append(attribute)   523         return attributes, s   524    525     def _get_constants(self, s):   526         count = u2(s[0:2])   527         return self._get_constants_from_table(count, s[2:])   528    529     def _get_access_flags(self, s):   530         return u2(s[0:2]), s[2:]   531    532     def _get_this_class(self, s):   533         index = u2(s[0:2])   534         return self.constants[index - 1], s[2:]   535    536     _get_super_class = _get_this_class   537    538     def _get_interfaces(self, s):   539         interfaces = []   540         number = u2(s[0:2])   541         s = s[2:]   542         for i in range(0, number):   543             index = u2(s[0:2])   544             interfaces.append(self.constants[index - 1])   545             s = s[2:]   546         return interfaces, s   547    548     def _get_fields(self, s):   549         number = u2(s[0:2])   550         return self._get_fields_from_table(number, s[2:])   551    552     def _get_attributes(self, s):   553         number = u2(s[0:2])   554         return self._get_attributes_from_table(number, s[2:])   555    556     def _get_methods(self, s):   557         number = u2(s[0:2])   558         return self._get_methods_from_table(number, s[2:])   559    560 if __name__ == "__main__":   561     import sys   562     f = open(sys.argv[1])   563     c = ClassFile(f.read())   564    565 # vim: tabstop=4 expandtab shiftwidth=4