javaclass

classfile.py

48:3c6102b22bfd
2004-11-14 Paul Boddie Adjusted the dispatcher method code, adding comments, where the only real changes are the removal of pop_top at the end of the code for testing each method (since it had been assumed that the iterator was still on the stack, but it was probably something else) and the moving of setup_loop upwards before the preparation of the argument-to-type mapping.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 descriptor_base_type_mapping = {    34     "B" : "int",    35     "C" : "str",    36     "D" : "float",    37     "F" : "float",    38     "I" : "int",    39     "J" : "int",    40     "L" : "object",    41     "S" : "int",    42     "Z" : "bool",    43     "[" : "list"    44     }    45     46 # Useful mix-ins.    47     48 class PythonMethodUtils:    49     def get_python_name(self):    50         name = self.get_name()    51         if str(name) == "<init>":    52             name = "__init__"    53         else:    54             name = str(name)    55         return name + "$" + self._get_descriptor_as_name()    56     57     def _get_descriptor_as_name(self):    58         l = []    59         for descriptor_type in self.get_descriptor()[0]:    60             l.append(self._get_type_as_name(descriptor_type))    61         return "$".join(l)    62     63     def _get_type_as_name(self, descriptor_type, s=""):    64         base_type, object_type, array_type = descriptor_type    65         if base_type == "L":    66             return object_type + s    67         elif base_type == "[":    68             return self._get_type_as_name(array_type, s + "[]")    69         else:    70             return "<" + base_type + ">" + s    71     72 class PythonNameUtils:    73     def get_python_name(self):    74         return str(self.get_name()).replace("/", ".")    75     76 class NameUtils:    77     def get_name(self):    78         if self.name_index != 0:    79             return self.class_file.constants[self.name_index - 1]    80         else:    81             # Some name indexes are zero to indicate special conditions.    82             return None    83     84 class NameAndTypeUtils:    85     def get_name(self):    86         if self.name_and_type_index != 0:    87             return self.class_file.constants[self.name_and_type_index - 1].get_name()    88         else:    89             # Some name indexes are zero to indicate special conditions.    90             return None    91     92     def get_field_descriptor(self):    93         if self.name_and_type_index != 0:    94             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    95         else:    96             # Some name indexes are zero to indicate special conditions.    97             return None    98     99     def get_method_descriptor(self):   100         if self.name_and_type_index != 0:   101             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   102         else:   103             # Some name indexes are zero to indicate special conditions.   104             return None   105    106 class DescriptorUtils:   107    108     "Symbol parsing."   109    110     def _get_method_descriptor(self, s):   111         assert s[0] == "("   112         params = []   113         s = s[1:]   114         while s[0] != ")":   115             parameter_descriptor, s = self._get_parameter_descriptor(s)   116             params.append(parameter_descriptor)   117         if s[1] != "V":   118             return_type, s = self._get_field_type(s[1:])   119         else:   120             return_type, s = None, s[1:]   121         return params, return_type   122    123     def _get_parameter_descriptor(self, s):   124         return self._get_field_type(s)   125    126     def _get_field_descriptor(self, s):   127         return self._get_field_type(s)   128    129     def _get_component_type(self, s):   130         return self._get_field_type(s)   131    132     def _get_field_type(self, s):   133         base_type, s = self._get_base_type(s)   134         object_type = None   135         array_type = None   136         if base_type == "L":   137             object_type, s = self._get_object_type(s)   138         elif base_type == "[":   139             array_type, s = self._get_array_type(s)   140         return (base_type, object_type, array_type), s   141    142     def _get_base_type(self, s):   143         if len(s) > 0:   144             return s[0], s[1:]   145         else:   146             return None, s   147    148     def _get_object_type(self, s):   149         if len(s) > 0:   150             s_end = s.find(";")   151             assert s_end != -1   152             return s[:s_end], s[s_end+1:]   153         else:   154             return None, s   155    156     def _get_array_type(self, s):   157         if len(s) > 0:   158             return self._get_component_type(s)   159         else:   160             return None, s   161    162 # Constant information.   163 # Objects of these classes are not directly aware of the class they reside in.   164    165 class ClassInfo(NameUtils, PythonNameUtils):   166     def init(self, data, class_file):   167         self.class_file = class_file   168         self.name_index = u2(data[0:2])   169         return data[2:]   170    171 class RefInfo(NameAndTypeUtils):   172     def init(self, data, class_file):   173         self.class_file = class_file   174         self.class_index = u2(data[0:2])   175         self.name_and_type_index = u2(data[2:4])   176         return data[4:]   177    178 class FieldRefInfo(RefInfo, PythonNameUtils):   179     def get_descriptor(self):   180         return RefInfo.get_field_descriptor(self)   181    182 class MethodRefInfo(RefInfo, PythonMethodUtils):   183     def get_descriptor(self):   184         return RefInfo.get_method_descriptor(self)   185    186 class InterfaceMethodRefInfo(MethodRefInfo):   187     pass   188    189 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils):   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.name_index = u2(data[0:2])   193         self.descriptor_index = u2(data[2:4])   194         return data[4:]   195    196     def get_field_descriptor(self):   197         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   198    199     def get_method_descriptor(self):   200         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   201    202 class Utf8Info:   203     def init(self, data, class_file):   204         self.class_file = class_file   205         self.length = u2(data[0:2])   206         self.bytes = data[2:2+self.length]   207         return data[2+self.length:]   208    209     def __str__(self):   210         return self.bytes   211    212     def __unicode__(self):   213         return unicode(self.bytes, "utf-8")   214    215 class StringInfo:   216     def init(self, data, class_file):   217         self.class_file = class_file   218         self.string_index = u2(data[0:2])   219         return data[2:]   220    221 class SmallNumInfo:   222     def init(self, data, class_file):   223         self.class_file = class_file   224         self.bytes = data[0:4]   225         return data[4:]   226    227 class IntegerInfo(SmallNumInfo):   228     def get_value(self):   229         return s4(self.bytes)   230    231 class FloatInfo(SmallNumInfo):   232     def get_value(self):   233         return f4(self.bytes)   234    235 class LargeNumInfo:   236     def init(self, data, class_file):   237         self.class_file = class_file   238         self.high_bytes = u4(data[0:4])   239         self.low_bytes = u4(data[4:8])   240         return data[8:]   241    242 class LongInfo(LargeNumInfo):   243     def get_value(self):   244         return s8(self.high_bytes + self.low_bytes)   245    246 class DoubleInfo(LargeNumInfo):   247     def get_value(self):   248         return f8(self.high_bytes + self.low_bytes)   249    250 # Other information.   251 # Objects of these classes are generally aware of the class they reside in.   252    253 class ItemInfo(NameUtils, DescriptorUtils):   254     def init(self, data, class_file):   255         self.class_file = class_file   256         self.access_flags = u2(data[0:2])   257         self.name_index = u2(data[2:4])   258         self.descriptor_index = u2(data[4:6])   259         self.attributes, data = self.class_file._get_attributes(data[6:])   260         return data   261    262 class FieldInfo(ItemInfo, PythonNameUtils):   263     def get_descriptor(self):   264         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   265    266 class MethodInfo(ItemInfo, PythonMethodUtils):   267     def get_descriptor(self):   268         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   269    270 class AttributeInfo:   271     def init(self, data, class_file):   272         self.attribute_length = u4(data[0:4])   273         self.info = data[4:4+self.attribute_length]   274         return data[4+self.attribute_length:]   275    276 # NOTE: Decode the different attribute formats.   277    278 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   279     def init(self, data, class_file):   280         self.class_file = class_file   281         self.attribute_length = u4(data[0:4])   282         # Permit the NameUtils mix-in.   283         self.name_index = self.sourcefile_index = u2(data[4:6])   284    285 class ConstantValueAttributeInfo(AttributeInfo):   286     def init(self, data, class_file):   287         self.class_file = class_file   288         self.attribute_length = u4(data[0:4])   289         self.constant_value_index = u2(data[4:6])   290         assert 4+self.attribute_length == 6   291         return data[4+self.attribute_length:]   292    293     def get_value(self):   294         return self.class_file.constants[self.constant_value_index - 1].get_value()   295    296 class CodeAttributeInfo(AttributeInfo):   297     def init(self, data, class_file):   298         self.class_file = class_file   299         self.attribute_length = u4(data[0:4])   300         self.max_stack = u2(data[4:6])   301         self.max_locals = u2(data[6:8])   302         self.code_length = u4(data[8:12])   303         end_of_code = 12+self.code_length   304         self.code = data[12:end_of_code]   305         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   306         self.exception_table = []   307         data = data[end_of_code + 2:]   308         for i in range(0, self.exception_table_length):   309             exception = ExceptionInfo()   310             data = exception.init(data)   311             self.exception_table.append(exception)   312         self.attributes, data = self.class_file._get_attributes(data)   313         return data   314    315 class ExceptionsAttributeInfo(AttributeInfo):   316     def init(self, data, class_file):   317         self.class_file = class_file   318         self.attribute_length = u4(data[0:4])   319         self.number_of_exceptions = u2(data[4:6])   320         self.exception_index_table = []   321         index = 6   322         for i in range(0, self.number_of_exceptions):   323             self.exception_index_table.append(u2(data[index:index+2]))   324             index += 2   325         return data[index:]   326    327     def get_exception(self, i):   328         exception_index = self.exception_index_table[i]   329         return self.class_file.constants[exception_index - 1]   330    331 class InnerClassesAttributeInfo(AttributeInfo):   332     def init(self, data, class_file):   333         self.class_file = class_file   334         self.attribute_length = u4(data[0:4])   335         self.number_of_classes = u2(data[4:6])   336         self.classes = []   337         data = data[6:]   338         for i in range(0, self.number_of_classes):   339             inner_class = InnerClassInfo()   340             data = inner_class.init(data, self.class_file)   341             self.classes.append(inner_class)   342         return data   343    344 class SyntheticAttributeInfo(AttributeInfo):   345     pass   346    347 class LineNumberAttributeInfo(AttributeInfo):   348     def init(self, data, class_file):   349         self.class_file = class_file   350         self.attribute_length = u4(data[0:4])   351         self.line_number_table_length = u2(data[4:6])   352         self.line_number_table = []   353         data = data[6:]   354         for i in range(0, self.line_number_table_length):   355             line_number = LineNumberInfo()   356             data = line_number.init(data)   357             self.line_number_table.append(line_number)   358         return data   359    360 class LocalVariableAttributeInfo(AttributeInfo):   361     def init(self, data, class_file):   362         self.class_file = class_file   363         self.attribute_length = u4(data[0:4])   364         self.local_variable_table_length = u2(data[4:6])   365         self.local_variable_table = []   366         data = data[6:]   367         for i in range(0, self.local_variable_table_length):   368             local_variable = LocalVariableInfo()   369             data = local_variable.init(data, self.class_file)   370             self.local_variable_table.append(local_variable)   371         return data   372    373 class DeprecatedAttributeInfo(AttributeInfo):   374     pass   375    376 # Child classes of the attribute information classes.   377    378 class ExceptionInfo:   379     def init(self, data):   380         self.start_pc = u2(data[0:2])   381         self.end_pc = u2(data[2:4])   382         self.handler_pc = u2(data[4:6])   383         self.catch_type = u2(data[6:8])   384         return data[8:]   385    386 class InnerClassInfo(NameUtils):   387     def init(self, data, class_file):   388         self.class_file = class_file   389         self.inner_class_info_index = u2(data[0:2])   390         self.outer_class_info_index = u2(data[2:4])   391         # Permit the NameUtils mix-in.   392         self.name_index = self.inner_name_index = u2(data[4:6])   393         self.inner_class_access_flags = u2(data[6:8])   394         return data[8:]   395    396 class LineNumberInfo:   397     def init(self, data):   398         self.start_pc = u2(data[0:2])   399         self.line_number = u2(data[2:4])   400         return data[4:]   401    402 class LocalVariableInfo(NameUtils, PythonNameUtils):   403     def init(self, data, class_file):   404         self.class_file = class_file   405         self.start_pc = u2(data[0:2])   406         self.length = u2(data[2:4])   407         self.name_index = u2(data[4:6])   408         self.descriptor_index = u2(data[6:8])   409         self.index = u2(data[8:10])   410         return data[10:]   411    412     def get_descriptor(self):   413         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   414    415 # Exceptions.   416    417 class UnknownTag(Exception):   418     pass   419    420 class UnknownAttribute(Exception):   421     pass   422    423 # Abstractions for the main structures.   424    425 class ClassFile:   426    427     "A class representing a Java class file."   428    429     def __init__(self, s):   430    431         """   432         Process the given string 's', populating the object with the class   433         file's details.   434         """   435    436         self.constants, s = self._get_constants(s[8:])   437         self.access_flags, s = self._get_access_flags(s)   438         self.this_class, s = self._get_this_class(s)   439         self.super_class, s = self._get_super_class(s)   440         self.interfaces, s = self._get_interfaces(s)   441         self.fields, s = self._get_fields(s)   442         self.methods, s = self._get_methods(s)   443         self.attributes, s = self._get_attributes(s)   444    445     def _decode_const(self, s):   446         tag = u1(s[0:1])   447         if tag == 1:   448             const = Utf8Info()   449         elif tag == 3:   450             const = IntegerInfo()   451         elif tag == 4:   452             const = FloatInfo()   453         elif tag == 5:   454             const = LongInfo()   455         elif tag == 6:   456             const = DoubleInfo()   457         elif tag == 7:   458             const = ClassInfo()   459         elif tag == 8:   460             const = StringInfo()   461         elif tag == 9:   462             const = FieldRefInfo()   463         elif tag == 10:   464             const = MethodRefInfo()   465         elif tag == 11:   466             const = InterfaceMethodRefInfo()   467         elif tag == 12:   468             const = NameAndTypeInfo()   469         else:   470             raise UnknownTag, tag   471    472         # Initialise the constant object.   473    474         s = const.init(s[1:], self)   475         return const, s   476    477     def _get_constants_from_table(self, count, s):   478         l = []   479         # Have to skip certain entries specially.   480         i = 1   481         while i < count:   482             c, s = self._decode_const(s)   483             l.append(c)   484             # Add a blank entry after "large" entries.   485             if isinstance(c, LargeNumInfo):   486                 l.append(None)   487                 i += 1   488             i += 1   489         return l, s   490    491     def _get_items_from_table(self, cls, number, s):   492         l = []   493         for i in range(0, number):   494             f = cls()   495             s = f.init(s, self)   496             l.append(f)   497         return l, s   498    499     def _get_methods_from_table(self, number, s):   500         return self._get_items_from_table(MethodInfo, number, s)   501    502     def _get_fields_from_table(self, number, s):   503         return self._get_items_from_table(FieldInfo, number, s)   504    505     def _get_attribute_from_table(self, s):   506         attribute_name_index = u2(s[0:2])   507         constant_name = self.constants[attribute_name_index - 1].bytes   508         if constant_name == "SourceFile":   509             attribute = SourceFileAttributeInfo()   510         elif constant_name == "ConstantValue":   511             attribute = ConstantValueAttributeInfo()   512         elif constant_name == "Code":   513             attribute = CodeAttributeInfo()   514         elif constant_name == "Exceptions":   515             attribute = ExceptionsAttributeInfo()   516         elif constant_name == "InnerClasses":   517             attribute = InnerClassesAttributeInfo()   518         elif constant_name == "Synthetic":   519             attribute = SyntheticAttributeInfo()   520         elif constant_name == "LineNumberTable":   521             attribute = LineNumberAttributeInfo()   522         elif constant_name == "LocalVariableTable":   523             attribute = LocalVariableAttributeInfo()   524         elif constant_name == "Deprecated":   525             attribute = DeprecatedAttributeInfo()   526         else:   527             raise UnknownAttribute, constant_name   528         s = attribute.init(s[2:], self)   529         return attribute, s   530    531     def _get_attributes_from_table(self, number, s):   532         attributes = []   533         for i in range(0, number):   534             attribute, s = self._get_attribute_from_table(s)   535             attributes.append(attribute)   536         return attributes, s   537    538     def _get_constants(self, s):   539         count = u2(s[0:2])   540         return self._get_constants_from_table(count, s[2:])   541    542     def _get_access_flags(self, s):   543         return u2(s[0:2]), s[2:]   544    545     def _get_this_class(self, s):   546         index = u2(s[0:2])   547         return self.constants[index - 1], s[2:]   548    549     _get_super_class = _get_this_class   550    551     def _get_interfaces(self, s):   552         interfaces = []   553         number = u2(s[0:2])   554         s = s[2:]   555         for i in range(0, number):   556             index = u2(s[0:2])   557             interfaces.append(self.constants[index - 1])   558             s = s[2:]   559         return interfaces, s   560    561     def _get_fields(self, s):   562         number = u2(s[0:2])   563         return self._get_fields_from_table(number, s[2:])   564    565     def _get_attributes(self, s):   566         number = u2(s[0:2])   567         return self._get_attributes_from_table(number, s[2:])   568    569     def _get_methods(self, s):   570         number = u2(s[0:2])   571         return self._get_methods_from_table(number, s[2:])   572    573 if __name__ == "__main__":   574     import sys   575     f = open(sys.argv[1])   576     c = ClassFile(f.read())   577    578 # vim: tabstop=4 expandtab shiftwidth=4