javaclass

classfile.py

85:85e3410a7de9
2004-12-10 Paul Boddie Moved class name loading (in translated instructions) into a top-level function in the bytecode module. Added initialisation order processing to classhook.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def s2(data):    19     return struct.unpack(">h", data[0:2])[0]    20     21 def u4(data):    22     return struct.unpack(">L", data[0:4])[0]    23     24 def s4(data):    25     return struct.unpack(">l", data[0:4])[0]    26     27 def s8(data):    28     return struct.unpack(">q", data[0:8])[0]    29     30 def f4(data):    31     return struct.unpack(">f", data[0:4])[0]    32     33 def f8(data):    34     return struct.unpack(">d", data[0:8])[0]    35     36 # Useful tables and constants.    37     38 descriptor_base_type_mapping = {    39     "B" : "int",    40     "C" : "str",    41     "D" : "float",    42     "F" : "float",    43     "I" : "int",    44     "J" : "int",    45     "L" : "object",    46     "S" : "int",    47     "Z" : "bool",    48     "[" : "list"    49     }    50     51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    52 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    53     54 def has_flags(flags, desired):    55     desired_flags = reduce(lambda a, b: a | b, desired, 0)    56     return (flags & desired_flags) == desired_flags    57     58 # Useful mix-ins.    59     60 class PythonMethodUtils:    61     symbol_sep = "___" # was "$"    62     type_sep = "__" # replaces "/"    63     array_sep = "_array_" # was "[]"    64     base_seps = ("_", "_") # was "<" and ">"    65     66     def get_python_name(self):    67         name = self.get_name()    68         if str(name) == "<init>":    69             name = "__init__"    70         elif str(name) == "<clinit>":    71             return "__clinit__"    72         else:    73             name = str(name)    74         return name + self.symbol_sep + self._get_descriptor_as_name()    75     76     def _get_descriptor_as_name(self):    77         l = []    78         for descriptor_type in self.get_descriptor()[0]:    79             l.append(self._get_type_as_name(descriptor_type))    80         return self.symbol_sep.join(l)    81     82     def _get_type_as_name(self, descriptor_type, s=""):    83         base_type, object_type, array_type = descriptor_type    84         if base_type == "L":    85             return object_type.replace("/", self.type_sep) + s    86         elif base_type == "[":    87             return self._get_type_as_name(array_type, s + self.array_sep)    88         else:    89             return self.base_seps[0] + base_type + self.base_seps[1] + s    90     91 class PythonNameUtils:    92     def get_python_name(self):    93         # NOTE: This may not be comprehensive.    94         if not str(self.get_name()).startswith("["):    95             return str(self.get_name()).replace("/", ".")    96         else:    97             return self._get_type_name(    98                 get_field_descriptor(    99                     str(self.get_name())   100                     )   101                 ).replace("/", ".")   102    103     def _get_type_name(self, descriptor_type):   104         base_type, object_type, array_type = descriptor_type   105         if base_type == "L":   106             return object_type   107         elif base_type == "[":   108             return self._get_type_name(array_type)   109         else:   110             return descriptor_base_type_mapping[base_type]   111    112 class NameUtils:   113     def get_name(self):   114         if self.name_index != 0:   115             return self.class_file.constants[self.name_index - 1]   116         else:   117             # Some name indexes are zero to indicate special conditions.   118             return None   119    120 class NameAndTypeUtils:   121     def get_name(self):   122         if self.name_and_type_index != 0:   123             return self.class_file.constants[self.name_and_type_index - 1].get_name()   124         else:   125             # Some name indexes are zero to indicate special conditions.   126             return None   127    128     def get_field_descriptor(self):   129         if self.name_and_type_index != 0:   130             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   131         else:   132             # Some name indexes are zero to indicate special conditions.   133             return None   134    135     def get_method_descriptor(self):   136         if self.name_and_type_index != 0:   137             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   138         else:   139             # Some name indexes are zero to indicate special conditions.   140             return None   141    142     def get_class(self):   143         return self.class_file.constants[self.class_index - 1]   144    145 # Symbol parsing.   146    147 def get_method_descriptor(s):   148     assert s[0] == "("   149     params = []   150     s = s[1:]   151     while s[0] != ")":   152         parameter_descriptor, s = _get_parameter_descriptor(s)   153         params.append(parameter_descriptor)   154     if s[1] != "V":   155         return_type, s = _get_field_type(s[1:])   156     else:   157         return_type, s = None, s[1:]   158     return params, return_type   159    160 def get_field_descriptor(s):   161     return _get_field_type(s)[0]   162    163 def _get_parameter_descriptor(s):   164     return _get_field_type(s)   165    166 def _get_component_type(s):   167     return _get_field_type(s)   168    169 def _get_field_type(s):   170     base_type, s = _get_base_type(s)   171     object_type = None   172     array_type = None   173     if base_type == "L":   174         object_type, s = _get_object_type(s)   175     elif base_type == "[":   176         array_type, s = _get_array_type(s)   177     return (base_type, object_type, array_type), s   178    179 def _get_base_type(s):   180     if len(s) > 0:   181         return s[0], s[1:]   182     else:   183         return None, s   184    185 def _get_object_type(s):   186     if len(s) > 0:   187         s_end = s.find(";")   188         assert s_end != -1   189         return s[:s_end], s[s_end+1:]   190     else:   191         return None, s   192    193 def _get_array_type(s):   194     if len(s) > 0:   195         return _get_component_type(s)   196     else:   197         return None, s   198    199 # Constant information.   200    201 class ClassInfo(NameUtils, PythonNameUtils):   202     def init(self, data, class_file):   203         self.class_file = class_file   204         self.name_index = u2(data[0:2])   205         return data[2:]   206    207 class RefInfo(NameAndTypeUtils):   208     def init(self, data, class_file):   209         self.class_file = class_file   210         self.class_index = u2(data[0:2])   211         self.name_and_type_index = u2(data[2:4])   212         return data[4:]   213    214 class FieldRefInfo(RefInfo, PythonNameUtils):   215     def get_descriptor(self):   216         return RefInfo.get_field_descriptor(self)   217    218 class MethodRefInfo(RefInfo, PythonMethodUtils):   219     def get_descriptor(self):   220         return RefInfo.get_method_descriptor(self)   221    222 class InterfaceMethodRefInfo(MethodRefInfo):   223     pass   224    225 class NameAndTypeInfo(NameUtils, PythonNameUtils):   226     def init(self, data, class_file):   227         self.class_file = class_file   228         self.name_index = u2(data[0:2])   229         self.descriptor_index = u2(data[2:4])   230         return data[4:]   231    232     def get_field_descriptor(self):   233         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   234    235     def get_method_descriptor(self):   236         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   237    238 class Utf8Info:   239     def init(self, data, class_file):   240         self.class_file = class_file   241         self.length = u2(data[0:2])   242         self.bytes = data[2:2+self.length]   243         return data[2+self.length:]   244    245     def __str__(self):   246         return self.bytes   247    248     def __unicode__(self):   249         return unicode(self.bytes, "utf-8")   250    251     def get_value(self):   252         return str(self)   253    254 class StringInfo:   255     def init(self, data, class_file):   256         self.class_file = class_file   257         self.string_index = u2(data[0:2])   258         return data[2:]   259    260     def __str__(self):   261         return str(self.class_file.constants[self.string_index - 1])   262    263     def __unicode__(self):   264         return unicode(self.class_file.constants[self.string_index - 1])   265    266     def get_value(self):   267         return str(self)   268    269 class SmallNumInfo:   270     def init(self, data, class_file):   271         self.class_file = class_file   272         self.bytes = data[0:4]   273         return data[4:]   274    275 class IntegerInfo(SmallNumInfo):   276     def get_value(self):   277         return s4(self.bytes)   278    279 class FloatInfo(SmallNumInfo):   280     def get_value(self):   281         return f4(self.bytes)   282    283 class LargeNumInfo:   284     def init(self, data, class_file):   285         self.class_file = class_file   286         self.high_bytes = data[0:4]   287         self.low_bytes = data[4:8]   288         return data[8:]   289    290 class LongInfo(LargeNumInfo):   291     def get_value(self):   292         return s8(self.high_bytes + self.low_bytes)   293    294 class DoubleInfo(LargeNumInfo):   295     def get_value(self):   296         return f8(self.high_bytes + self.low_bytes)   297    298 # Other information.   299 # Objects of these classes are generally aware of the class they reside in.   300    301 class ItemInfo(NameUtils):   302     def init(self, data, class_file):   303         self.class_file = class_file   304         self.access_flags = u2(data[0:2])   305         self.name_index = u2(data[2:4])   306         self.descriptor_index = u2(data[4:6])   307         self.attributes, data = self.class_file._get_attributes(data[6:])   308         return data   309    310 class FieldInfo(ItemInfo, PythonNameUtils):   311     def get_descriptor(self):   312         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   313    314 class MethodInfo(ItemInfo, PythonMethodUtils):   315     def get_descriptor(self):   316         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   317    318 class AttributeInfo:   319     def init(self, data, class_file):   320         self.attribute_length = u4(data[0:4])   321         self.info = data[4:4+self.attribute_length]   322         return data[4+self.attribute_length:]   323    324 # NOTE: Decode the different attribute formats.   325    326 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   327     def init(self, data, class_file):   328         self.class_file = class_file   329         self.attribute_length = u4(data[0:4])   330         # Permit the NameUtils mix-in.   331         self.name_index = self.sourcefile_index = u2(data[4:6])   332         return data[6:]   333    334 class ConstantValueAttributeInfo(AttributeInfo):   335     def init(self, data, class_file):   336         self.class_file = class_file   337         self.attribute_length = u4(data[0:4])   338         self.constant_value_index = u2(data[4:6])   339         assert 4+self.attribute_length == 6   340         return data[4+self.attribute_length:]   341    342     def get_value(self):   343         return self.class_file.constants[self.constant_value_index - 1].get_value()   344    345 class CodeAttributeInfo(AttributeInfo):   346     def init(self, data, class_file):   347         self.class_file = class_file   348         self.attribute_length = u4(data[0:4])   349         self.max_stack = u2(data[4:6])   350         self.max_locals = u2(data[6:8])   351         self.code_length = u4(data[8:12])   352         end_of_code = 12+self.code_length   353         self.code = data[12:end_of_code]   354         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   355         self.exception_table = []   356         data = data[end_of_code + 2:]   357         for i in range(0, self.exception_table_length):   358             exception = ExceptionInfo()   359             data = exception.init(data)   360             self.exception_table.append(exception)   361         self.attributes, data = self.class_file._get_attributes(data)   362         return data   363    364 class ExceptionsAttributeInfo(AttributeInfo):   365     def init(self, data, class_file):   366         self.class_file = class_file   367         self.attribute_length = u4(data[0:4])   368         self.number_of_exceptions = u2(data[4:6])   369         self.exception_index_table = []   370         index = 6   371         for i in range(0, self.number_of_exceptions):   372             self.exception_index_table.append(u2(data[index:index+2]))   373             index += 2   374         return data[index:]   375    376     def get_exception(self, i):   377         exception_index = self.exception_index_table[i]   378         return self.class_file.constants[exception_index - 1]   379    380 class InnerClassesAttributeInfo(AttributeInfo):   381     def init(self, data, class_file):   382         self.class_file = class_file   383         self.attribute_length = u4(data[0:4])   384         self.number_of_classes = u2(data[4:6])   385         self.classes = []   386         data = data[6:]   387         for i in range(0, self.number_of_classes):   388             inner_class = InnerClassInfo()   389             data = inner_class.init(data, self.class_file)   390             self.classes.append(inner_class)   391         return data   392    393 class SyntheticAttributeInfo(AttributeInfo):   394     pass   395    396 class LineNumberAttributeInfo(AttributeInfo):   397     def init(self, data, class_file):   398         self.class_file = class_file   399         self.attribute_length = u4(data[0:4])   400         self.line_number_table_length = u2(data[4:6])   401         self.line_number_table = []   402         data = data[6:]   403         for i in range(0, self.line_number_table_length):   404             line_number = LineNumberInfo()   405             data = line_number.init(data)   406             self.line_number_table.append(line_number)   407         return data   408    409 class LocalVariableAttributeInfo(AttributeInfo):   410     def init(self, data, class_file):   411         self.class_file = class_file   412         self.attribute_length = u4(data[0:4])   413         self.local_variable_table_length = u2(data[4:6])   414         self.local_variable_table = []   415         data = data[6:]   416         for i in range(0, self.local_variable_table_length):   417             local_variable = LocalVariableInfo()   418             data = local_variable.init(data, self.class_file)   419             self.local_variable_table.append(local_variable)   420         return data   421    422 class DeprecatedAttributeInfo(AttributeInfo):   423     pass   424    425 # Child classes of the attribute information classes.   426    427 class ExceptionInfo:   428     def init(self, data):   429         self.start_pc = u2(data[0:2])   430         self.end_pc = u2(data[2:4])   431         self.handler_pc = u2(data[4:6])   432         self.catch_type = u2(data[6:8])   433         return data[8:]   434    435 class InnerClassInfo(NameUtils):   436     def init(self, data, class_file):   437         self.class_file = class_file   438         self.inner_class_info_index = u2(data[0:2])   439         self.outer_class_info_index = u2(data[2:4])   440         # Permit the NameUtils mix-in.   441         self.name_index = self.inner_name_index = u2(data[4:6])   442         self.inner_class_access_flags = u2(data[6:8])   443         return data[8:]   444    445 class LineNumberInfo:   446     def init(self, data):   447         self.start_pc = u2(data[0:2])   448         self.line_number = u2(data[2:4])   449         return data[4:]   450    451 class LocalVariableInfo(NameUtils, PythonNameUtils):   452     def init(self, data, class_file):   453         self.class_file = class_file   454         self.start_pc = u2(data[0:2])   455         self.length = u2(data[2:4])   456         self.name_index = u2(data[4:6])   457         self.descriptor_index = u2(data[6:8])   458         self.index = u2(data[8:10])   459         return data[10:]   460    461     def get_descriptor(self):   462         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   463    464 # Exceptions.   465    466 class UnknownTag(Exception):   467     pass   468    469 class UnknownAttribute(Exception):   470     pass   471    472 # Abstractions for the main structures.   473    474 class ClassFile:   475    476     "A class representing a Java class file."   477    478     def __init__(self, s):   479    480         """   481         Process the given string 's', populating the object with the class   482         file's details.   483         """   484    485         self.constants, s = self._get_constants(s[8:])   486         self.access_flags, s = self._get_access_flags(s)   487         self.this_class, s = self._get_this_class(s)   488         self.super_class, s = self._get_super_class(s)   489         self.interfaces, s = self._get_interfaces(s)   490         self.fields, s = self._get_fields(s)   491         self.methods, s = self._get_methods(s)   492         self.attributes, s = self._get_attributes(s)   493    494     def _decode_const(self, s):   495         tag = u1(s[0:1])   496         if tag == 1:   497             const = Utf8Info()   498         elif tag == 3:   499             const = IntegerInfo()   500         elif tag == 4:   501             const = FloatInfo()   502         elif tag == 5:   503             const = LongInfo()   504         elif tag == 6:   505             const = DoubleInfo()   506         elif tag == 7:   507             const = ClassInfo()   508         elif tag == 8:   509             const = StringInfo()   510         elif tag == 9:   511             const = FieldRefInfo()   512         elif tag == 10:   513             const = MethodRefInfo()   514         elif tag == 11:   515             const = InterfaceMethodRefInfo()   516         elif tag == 12:   517             const = NameAndTypeInfo()   518         else:   519             raise UnknownTag, tag   520    521         # Initialise the constant object.   522    523         s = const.init(s[1:], self)   524         return const, s   525    526     def _get_constants_from_table(self, count, s):   527         l = []   528         # Have to skip certain entries specially.   529         i = 1   530         while i < count:   531             c, s = self._decode_const(s)   532             l.append(c)   533             # Add a blank entry after "large" entries.   534             if isinstance(c, LargeNumInfo):   535                 l.append(None)   536                 i += 1   537             i += 1   538         return l, s   539    540     def _get_items_from_table(self, cls, number, s):   541         l = []   542         for i in range(0, number):   543             f = cls()   544             s = f.init(s, self)   545             l.append(f)   546         return l, s   547    548     def _get_methods_from_table(self, number, s):   549         return self._get_items_from_table(MethodInfo, number, s)   550    551     def _get_fields_from_table(self, number, s):   552         return self._get_items_from_table(FieldInfo, number, s)   553    554     def _get_attribute_from_table(self, s):   555         attribute_name_index = u2(s[0:2])   556         constant_name = self.constants[attribute_name_index - 1].bytes   557         if constant_name == "SourceFile":   558             attribute = SourceFileAttributeInfo()   559         elif constant_name == "ConstantValue":   560             attribute = ConstantValueAttributeInfo()   561         elif constant_name == "Code":   562             attribute = CodeAttributeInfo()   563         elif constant_name == "Exceptions":   564             attribute = ExceptionsAttributeInfo()   565         elif constant_name == "InnerClasses":   566             attribute = InnerClassesAttributeInfo()   567         elif constant_name == "Synthetic":   568             attribute = SyntheticAttributeInfo()   569         elif constant_name == "LineNumberTable":   570             attribute = LineNumberAttributeInfo()   571         elif constant_name == "LocalVariableTable":   572             attribute = LocalVariableAttributeInfo()   573         elif constant_name == "Deprecated":   574             attribute = DeprecatedAttributeInfo()   575         else:   576             raise UnknownAttribute, constant_name   577         s = attribute.init(s[2:], self)   578         return attribute, s   579    580     def _get_attributes_from_table(self, number, s):   581         attributes = []   582         for i in range(0, number):   583             attribute, s = self._get_attribute_from_table(s)   584             attributes.append(attribute)   585         return attributes, s   586    587     def _get_constants(self, s):   588         count = u2(s[0:2])   589         return self._get_constants_from_table(count, s[2:])   590    591     def _get_access_flags(self, s):   592         return u2(s[0:2]), s[2:]   593    594     def _get_this_class(self, s):   595         index = u2(s[0:2])   596         return self.constants[index - 1], s[2:]   597    598     _get_super_class = _get_this_class   599    600     def _get_interfaces(self, s):   601         interfaces = []   602         number = u2(s[0:2])   603         s = s[2:]   604         for i in range(0, number):   605             index = u2(s[0:2])   606             interfaces.append(self.constants[index - 1])   607             s = s[2:]   608         return interfaces, s   609    610     def _get_fields(self, s):   611         number = u2(s[0:2])   612         return self._get_fields_from_table(number, s[2:])   613    614     def _get_attributes(self, s):   615         number = u2(s[0:2])   616         return self._get_attributes_from_table(number, s[2:])   617    618     def _get_methods(self, s):   619         number = u2(s[0:2])   620         return self._get_methods_from_table(number, s[2:])   621    622 if __name__ == "__main__":   623     import sys   624     f = open(sys.argv[1])   625     c = ClassFile(f.read())   626    627 # vim: tabstop=4 expandtab shiftwidth=4