javaclass

classfile.py

39:e64b08b9681f
2004-11-13 Paul Boddie Fixed 8-bit limited value rewriting. Added code generation avoidance for interfaces. Added an import hook mechanism for importing directories of classes.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 descriptor_base_type_mapping = {    34     "B" : "int",    35     "C" : "str",    36     "D" : "float",    37     "F" : "float",    38     "I" : "int",    39     "J" : "int",    40     "L" : "object",    41     "S" : "int",    42     "Z" : "bool",    43     "[" : "list"    44     }    45     46 # Useful mix-ins.    47     48 class PythonMethodUtils:    49     def get_python_name(self):    50         name = self.get_name()    51         if str(name) == "<init>":    52             name = "__init__"    53         else:    54             name = str(name)    55         return name + "$" + self._get_descriptor_as_name()    56     57     def _get_descriptor_as_name(self):    58         l = []    59         for descriptor_type in self.get_descriptor()[0]:    60             l.append(self._get_type_as_name(descriptor_type))    61         return "$".join(l)    62     63     def _get_type_as_name(self, descriptor_type, s=""):    64         base_type, object_type, array_type = descriptor_type    65         if base_type == "L":    66             return object_type + s    67         elif base_type == "[":    68             return self._get_type_as_name(array_type, s + "[]")    69         else:    70             return "<" + base_type + ">" + s    71     72 class PythonNameUtils:    73     def get_python_name(self):    74         return self.get_name()    75     76 class NameUtils:    77     def get_name(self):    78         if self.name_index != 0:    79             return self.class_file.constants[self.name_index - 1]    80         else:    81             # Some name indexes are zero to indicate special conditions.    82             return None    83     84 class NameAndTypeUtils:    85     def get_name(self):    86         if self.name_and_type_index != 0:    87             return self.class_file.constants[self.name_and_type_index - 1].get_name()    88         else:    89             # Some name indexes are zero to indicate special conditions.    90             return None    91     92     def get_field_descriptor(self):    93         if self.name_and_type_index != 0:    94             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    95         else:    96             # Some name indexes are zero to indicate special conditions.    97             return None    98     99     def get_method_descriptor(self):   100         if self.name_and_type_index != 0:   101             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   102         else:   103             # Some name indexes are zero to indicate special conditions.   104             return None   105    106 class DescriptorUtils:   107    108     "Symbol parsing."   109    110     def _get_method_descriptor(self, s):   111         assert s[0] == "("   112         params = []   113         s = s[1:]   114         while s[0] != ")":   115             parameter_descriptor, s = self._get_parameter_descriptor(s)   116             params.append(parameter_descriptor)   117         if s[1] != "V":   118             return_type, s = self._get_field_type(s[1:])   119         else:   120             return_type, s = None, s[1:]   121         return params, return_type   122    123     def _get_parameter_descriptor(self, s):   124         return self._get_field_type(s)   125    126     def _get_field_descriptor(self, s):   127         return self._get_field_type(s)   128    129     def _get_component_type(self, s):   130         return self._get_field_type(s)   131    132     def _get_field_type(self, s):   133         base_type, s = self._get_base_type(s)   134         object_type = None   135         array_type = None   136         if base_type == "L":   137             object_type, s = self._get_object_type(s)   138         elif base_type == "[":   139             array_type, s = self._get_array_type(s)   140         return (base_type, object_type, array_type), s   141    142     def _get_base_type(self, s):   143         if len(s) > 0:   144             return s[0], s[1:]   145         else:   146             return None, s   147    148     def _get_object_type(self, s):   149         if len(s) > 0:   150             s_end = s.find(";")   151             assert s_end != -1   152             return s[:s_end], s[s_end+1:]   153         else:   154             return None, s   155    156     def _get_array_type(self, s):   157         if len(s) > 0:   158             return self._get_component_type(s)   159         else:   160             return None, s   161    162 # Constant information.   163 # Objects of these classes are not directly aware of the class they reside in.   164    165 class ClassInfo(NameUtils, PythonNameUtils):   166     def init(self, data, class_file):   167         self.class_file = class_file   168         self.name_index = u2(data[0:2])   169         return data[2:]   170    171 class RefInfo(NameAndTypeUtils):   172     def init(self, data, class_file):   173         self.class_file = class_file   174         self.class_index = u2(data[0:2])   175         self.name_and_type_index = u2(data[2:4])   176         return data[4:]   177    178 class FieldRefInfo(RefInfo, PythonNameUtils):   179     def get_descriptor(self):   180         return RefInfo.get_field_descriptor(self)   181    182 class MethodRefInfo(RefInfo, PythonMethodUtils):   183     def get_descriptor(self):   184         return RefInfo.get_method_descriptor(self)   185    186 class InterfaceMethodRefInfo(MethodRefInfo):   187     pass   188    189 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils):   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.name_index = u2(data[0:2])   193         self.descriptor_index = u2(data[2:4])   194         return data[4:]   195    196     def get_field_descriptor(self):   197         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   198    199     def get_method_descriptor(self):   200         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   201    202 class Utf8Info:   203     def init(self, data, class_file):   204         self.class_file = class_file   205         self.length = u2(data[0:2])   206         self.bytes = data[2:2+self.length]   207         return data[2+self.length:]   208    209     def __str__(self):   210         return self.bytes   211    212     def __unicode__(self):   213         return unicode(self.bytes, "utf-8")   214    215 class StringInfo:   216     def init(self, data, class_file):   217         self.class_file = class_file   218         self.string_index = u2(data[0:2])   219         return data[2:]   220    221 class SmallNumInfo:   222     def init(self, data, class_file):   223         self.class_file = class_file   224         self.bytes = data[0:4]   225         return data[4:]   226    227 class IntegerInfo(SmallNumInfo):   228     def get_value(self):   229         return s4(self.bytes)   230    231 class FloatInfo(SmallNumInfo):   232     def get_value(self):   233         return f4(self.bytes)   234    235 class LargeNumInfo:   236     def init(self, data, class_file):   237         self.class_file = class_file   238         self.high_bytes = u4(data[0:4])   239         self.low_bytes = u4(data[4:8])   240         return data[8:]   241    242 class LongInfo(LargeNumInfo):   243     def get_value(self):   244         return s8(self.high_bytes + self.low_bytes)   245    246 class DoubleInfo(LargeNumInfo):   247     def get_value(self):   248         return f8(self.high_bytes + self.low_bytes)   249    250 # Other information.   251 # Objects of these classes are generally aware of the class they reside in.   252    253 class ItemInfo(NameUtils, DescriptorUtils):   254     def init(self, data, class_file):   255         self.class_file = class_file   256         self.access_flags = u2(data[0:2])   257         self.name_index = u2(data[2:4])   258         self.descriptor_index = u2(data[4:6])   259         self.attributes, data = self.class_file._get_attributes(data[6:])   260         return data   261    262 class FieldInfo(ItemInfo, PythonNameUtils):   263     def get_descriptor(self):   264         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   265    266 class MethodInfo(ItemInfo, PythonMethodUtils):   267     def get_descriptor(self):   268         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   269    270 class AttributeInfo:   271     def init(self, data, class_file):   272         self.attribute_length = u4(data[0:4])   273         self.info = data[4:4+self.attribute_length]   274         return data[4+self.attribute_length:]   275    276 # NOTE: Decode the different attribute formats.   277    278 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   279     def init(self, data, class_file):   280         self.class_file = class_file   281         self.attribute_length = u4(data[0:4])   282         # Permit the NameUtils mix-in.   283         self.name_index = self.sourcefile_index = u2(data[4:6])   284    285 class ConstantValueAttributeInfo(AttributeInfo):   286     def init(self, data, class_file):   287         self.class_file = class_file   288         self.attribute_length = u4(data[0:4])   289         self.constant_value_index = u2(data[4:6])   290         assert 4+self.attribute_length == 6   291         return data[4+self.attribute_length:]   292    293     def get_value(self):   294         return self.class_file.constants[self.constant_value_index - 1].get_value()   295    296 class CodeAttributeInfo(AttributeInfo):   297     def init(self, data, class_file):   298         self.class_file = class_file   299         self.attribute_length = u4(data[0:4])   300         self.max_stack = u2(data[4:6])   301         self.max_locals = u2(data[6:8])   302         self.code_length = u4(data[8:12])   303         end_of_code = 12+self.code_length   304         self.code = data[12:end_of_code]   305         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   306         self.exception_table = []   307         data = data[end_of_code + 2:]   308         for i in range(0, self.exception_table_length):   309             exception = ExceptionInfo()   310             data = exception.init(data)   311             self.exception_table.append(exception)   312         self.attributes, data = self.class_file._get_attributes(data)   313         return data   314    315 class ExceptionsAttributeInfo(AttributeInfo):   316     def init(self, data, class_file):   317         self.class_file = class_file   318         self.attribute_length = u4(data[0:4])   319         self.number_of_exceptions = u2(data[4:6])   320         self.exception_index_table = []   321         index = 6   322         for i in range(0, self.number_of_exceptions):   323             self.exception_index_table.append(u2(data[index:index+2]))   324             index += 2   325         return data[index:]   326    327     def get_exception(self, i):   328         exception_index = self.exception_index_table[i]   329         return self.class_file.constants[exception_index - 1]   330    331 class InnerClassesAttributeInfo(AttributeInfo):   332     def init(self, data, class_file):   333         self.class_file = class_file   334         self.attribute_length = u4(data[0:4])   335         self.number_of_classes = u2(data[4:6])   336         self.classes = []   337         data = data[6:]   338         for i in range(0, self.number_of_classes):   339             inner_class = InnerClassInfo()   340             data = inner_class.init(data, self.class_file)   341             self.classes.append(inner_class)   342         return data   343    344 class SyntheticAttributeInfo(AttributeInfo):   345     pass   346    347 class LineNumberAttributeInfo(AttributeInfo):   348     def init(self, data, class_file):   349         self.class_file = class_file   350         self.attribute_length = u4(data[0:4])   351         self.line_number_table_length = u2(data[4:6])   352         self.line_number_table = []   353         data = data[6:]   354         for i in range(0, self.line_number_table_length):   355             line_number = LineNumberInfo()   356             data = line_number.init(data)   357             self.line_number_table.append(line_number)   358         return data   359    360 class LocalVariableAttributeInfo(AttributeInfo):   361     def init(self, data, class_file):   362         self.class_file = class_file   363         self.attribute_length = u4(data[0:4])   364         self.local_variable_table_length = u2(data[4:6])   365         self.local_variable_table = []   366         data = data[6:]   367         for i in range(0, self.local_variable_table_length):   368             local_variable = LocalVariableInfo()   369             data = local_variable.init(data)   370             self.local_variable_table.append(local_variable)   371         return data   372    373 class DeprecatedAttributeInfo(AttributeInfo):   374     pass   375    376 # Child classes of the attribute information classes.   377    378 class ExceptionInfo:   379     def init(self, data):   380         self.start_pc = u2(data[0:2])   381         self.end_pc = u2(data[2:4])   382         self.handler_pc = u2(data[4:6])   383         self.catch_type = u2(data[6:8])   384         return data[8:]   385    386 class InnerClassInfo(NameUtils):   387     def init(self, data, class_file):   388         self.class_file = class_file   389         self.inner_class_info_index = u2(data[0:2])   390         self.outer_class_info_index = u2(data[2:4])   391         # Permit the NameUtils mix-in.   392         self.name_index = self.inner_name_index = u2(data[4:6])   393         self.inner_class_access_flags = u2(data[6:8])   394         return data[8:]   395    396 class LineNumberInfo:   397     def init(self, data):   398         self.start_pc = u2(data[0:2])   399         self.line_number = u2(data[2:4])   400         return data[4:]   401    402 class LocalVariableInfo(NameUtils, PythonNameUtils):   403     def init(self, data, class_file):   404         self.class_file = class_file   405         self.start_pc = u2(data[0:2])   406         self.length = u2(data[2:4])   407         self.name_index = u2(data[4:6])   408         self.descriptor_index = u2(data[6:8])   409         self.index = u2(data[8:10])   410         return data[10:]   411    412     def get_descriptor(self):   413         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   414    415 # Exceptions.   416    417 class UnknownTag(Exception):   418     pass   419    420 class UnknownAttribute(Exception):   421     pass   422    423 # Abstractions for the main structures.   424    425 class ClassFile:   426    427     "A class representing a Java class file."   428    429     def __init__(self, s):   430    431         """   432         Process the given string 's', populating the object with the class   433         file's details.   434         """   435    436         self.constants, s = self._get_constants(s[8:])   437         self.access_flags, s = self._get_access_flags(s)   438         self.this_class, s = self._get_this_class(s)   439         self.super_class, s = self._get_super_class(s)   440         self.interfaces, s = self._get_interfaces(s)   441         self.fields, s = self._get_fields(s)   442         self.methods, s = self._get_methods(s)   443         self.attributes, s = self._get_attributes(s)   444    445     def _decode_const(self, s):   446         tag = u1(s[0:1])   447         if tag == 1:   448             const = Utf8Info()   449         elif tag == 3:   450             const = IntegerInfo()   451         elif tag == 4:   452             const = FloatInfo()   453         elif tag == 5:   454             const = LongInfo()   455         elif tag == 6:   456             const = DoubleInfo()   457         elif tag == 7:   458             const = ClassInfo()   459         elif tag == 8:   460             const = StringInfo()   461         elif tag == 9:   462             const = FieldRefInfo()   463         elif tag == 10:   464             const = MethodRefInfo()   465         elif tag == 11:   466             const = InterfaceMethodRefInfo()   467         elif tag == 12:   468             const = NameAndTypeInfo()   469         else:   470             raise UnknownTag, tag   471    472         # Initialise the constant object.   473    474         s = const.init(s[1:], self)   475         return const, s   476    477     def _get_constants_from_table(self, count, s):   478         l = []   479         # Have to skip certain entries specially.   480         i = 1   481         while i < count:   482             c, s = self._decode_const(s)   483             l.append(c)   484             # Add a blank entry after "large" entries.   485             if isinstance(c, LargeNumInfo):   486                 l.append(None)   487                 i += 1   488             i += 1   489         return l, s   490    491     def _get_items_from_table(self, cls, number, s):   492         l = []   493         for i in range(0, number):   494             f = cls()   495             s = f.init(s, self)   496             l.append(f)   497         return l, s   498    499     def _get_methods_from_table(self, number, s):   500         return self._get_items_from_table(MethodInfo, number, s)   501    502     def _get_fields_from_table(self, number, s):   503         return self._get_items_from_table(FieldInfo, number, s)   504    505     def _get_attribute_from_table(self, s):   506         attribute_name_index = u2(s[0:2])   507         constant_name = self.constants[attribute_name_index - 1].bytes   508         if constant_name == "SourceFile":   509             attribute = SourceFileAttributeInfo()   510         elif constant_name == "ConstantValue":   511             attribute = ConstantValueAttributeInfo()   512         elif constant_name == "Code":   513             attribute = CodeAttributeInfo()   514         elif constant_name == "Exceptions":   515             attribute = ExceptionsAttributeInfo()   516         elif constant_name == "InnerClasses":   517             attribute = InnerClassesAttributeInfo()   518         elif constant_name == "Synthetic":   519             attribute = SyntheticAttributeInfo()   520         elif constant_name == "LineNumberTable":   521             attribute = LineNumberAttributeInfo()   522         elif constant_name == "LocalVariableTable":   523             attribute = LocalVariableAttributeInfo()   524         elif constant_name == "Deprecated":   525             attribute = DeprecatedAttributeInfo()   526         else:   527             raise UnknownAttribute, constant_name   528         s = attribute.init(s[2:], self)   529         return attribute, s   530    531     def _get_attributes_from_table(self, number, s):   532         attributes = []   533         for i in range(0, number):   534             attribute, s = self._get_attribute_from_table(s)   535             attributes.append(attribute)   536         return attributes, s   537    538     def _get_constants(self, s):   539         count = u2(s[0:2])   540         return self._get_constants_from_table(count, s[2:])   541    542     def _get_access_flags(self, s):   543         return u2(s[0:2]), s[2:]   544    545     def _get_this_class(self, s):   546         index = u2(s[0:2])   547         return self.constants[index - 1], s[2:]   548    549     _get_super_class = _get_this_class   550    551     def _get_interfaces(self, s):   552         interfaces = []   553         number = u2(s[0:2])   554         s = s[2:]   555         for i in range(0, number):   556             index = u2(s[0:2])   557             interfaces.append(self.constants[index - 1])   558             s = s[2:]   559         return interfaces, s   560    561     def _get_fields(self, s):   562         number = u2(s[0:2])   563         return self._get_fields_from_table(number, s[2:])   564    565     def _get_attributes(self, s):   566         number = u2(s[0:2])   567         return self._get_attributes_from_table(number, s[2:])   568    569     def _get_methods(self, s):   570         number = u2(s[0:2])   571         return self._get_methods_from_table(number, s[2:])   572    573 if __name__ == "__main__":   574     import sys   575     f = open(sys.argv[1])   576     c = ClassFile(f.read())   577    578 # vim: tabstop=4 expandtab shiftwidth=4