javaclass

javaclass/classfile.py

168:1f3fa26a27e2
2005-02-13 Paul Boddie Attempted to simplify the class loading mechanism by adding instance-level state to remember which classes have been imported so far. Only at the top level will such classes be created and initialised. Added a convenience method to ClassTranslator.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def s2(data):    19     return struct.unpack(">h", data[0:2])[0]    20     21 def u4(data):    22     return struct.unpack(">L", data[0:4])[0]    23     24 def s4(data):    25     return struct.unpack(">l", data[0:4])[0]    26     27 def s8(data):    28     return struct.unpack(">q", data[0:8])[0]    29     30 def f4(data):    31     return struct.unpack(">f", data[0:4])[0]    32     33 def f8(data):    34     return struct.unpack(">d", data[0:8])[0]    35     36 # Useful tables and constants.    37     38 descriptor_base_type_mapping = {    39     "B" : "int",    40     "C" : "str",    41     "D" : "float",    42     "F" : "float",    43     "I" : "int",    44     "J" : "int",    45     "L" : "object",    46     "S" : "int",    47     "Z" : "bool",    48     "[" : "list"    49     }    50     51 type_names_to_default_values = {    52     "int" : 0,    53     "str" : u"",    54     "float" : 0.0,    55     "object" : None,    56     "bool" : 0, # NOTE: Should be False.    57     "list" : []    58     }    59     60 def get_default_for_type(type_name):    61     global type_names_to_default_values    62     return type_names_to_default_values.get(type_name)    63     64 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    65 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    66     67 def has_flags(flags, desired):    68     desired_flags = reduce(lambda a, b: a | b, desired, 0)    69     return (flags & desired_flags) == desired_flags    70     71 # Useful mix-ins.    72     73 class PythonMethodUtils:    74     symbol_sep = "___" # was "$"    75     type_sep = "__" # replaces "/"    76     array_sep = "_array_" # was "[]"    77     base_seps = ("_", "_") # was "<" and ">"    78     79     def get_unqualified_python_name(self):    80         name = self.get_name()    81         if str(name) == "<init>":    82             return "__init__"    83         elif str(name) == "<clinit>":    84             return "__clinit__"    85         else:    86             return str(name)    87     88     def get_python_name(self):    89         name = self.get_unqualified_python_name()    90         if name == "__clinit__":    91             return name    92         return name + self.symbol_sep + self._get_descriptor_as_name()    93     94     def _get_descriptor_as_name(self):    95         l = []    96         for descriptor_type in self.get_descriptor()[0]:    97             l.append(self._get_type_as_name(descriptor_type))    98         return self.symbol_sep.join(l)    99    100     def _get_type_as_name(self, descriptor_type, s=""):   101         base_type, object_type, array_type = descriptor_type   102         if base_type == "L":   103             return object_type.replace("/", self.type_sep) + s   104         elif base_type == "[":   105             return self._get_type_as_name(array_type, s + self.array_sep)   106         else:   107             return self.base_seps[0] + base_type + self.base_seps[1] + s   108    109 class PythonNameUtils:   110     def get_python_name(self):   111         # NOTE: This may not be comprehensive.   112         if not str(self.get_name()).startswith("["):   113             return str(self.get_name()).replace("/", ".")   114         else:   115             return self._get_type_name(   116                 get_field_descriptor(   117                     str(self.get_name())   118                     )   119                 ).replace("/", ".")   120    121     def _get_type_name(self, descriptor_type):   122         base_type, object_type, array_type = descriptor_type   123         if base_type == "L":   124             return object_type   125         elif base_type == "[":   126             return self._get_type_name(array_type)   127         else:   128             return descriptor_base_type_mapping[base_type]   129    130 class NameUtils:   131     def get_name(self):   132         if self.name_index != 0:   133             return self.class_file.constants[self.name_index - 1]   134         else:   135             # Some name indexes are zero to indicate special conditions.   136             return None   137    138 class NameAndTypeUtils:   139     def get_name(self):   140         if self.name_and_type_index != 0:   141             return self.class_file.constants[self.name_and_type_index - 1].get_name()   142         else:   143             # Some name indexes are zero to indicate special conditions.   144             return None   145    146     def get_field_descriptor(self):   147         if self.name_and_type_index != 0:   148             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   149         else:   150             # Some name indexes are zero to indicate special conditions.   151             return None   152    153     def get_method_descriptor(self):   154         if self.name_and_type_index != 0:   155             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   156         else:   157             # Some name indexes are zero to indicate special conditions.   158             return None   159    160     def get_class(self):   161         return self.class_file.constants[self.class_index - 1]   162    163 # Symbol parsing.   164    165 def get_method_descriptor(s):   166     assert s[0] == "("   167     params = []   168     s = s[1:]   169     while s[0] != ")":   170         parameter_descriptor, s = _get_parameter_descriptor(s)   171         params.append(parameter_descriptor)   172     if s[1] != "V":   173         return_type, s = _get_field_type(s[1:])   174     else:   175         return_type, s = None, s[1:]   176     return params, return_type   177    178 def get_field_descriptor(s):   179     return _get_field_type(s)[0]   180    181 def _get_parameter_descriptor(s):   182     return _get_field_type(s)   183    184 def _get_component_type(s):   185     return _get_field_type(s)   186    187 def _get_field_type(s):   188     base_type, s = _get_base_type(s)   189     object_type = None   190     array_type = None   191     if base_type == "L":   192         object_type, s = _get_object_type(s)   193     elif base_type == "[":   194         array_type, s = _get_array_type(s)   195     return (base_type, object_type, array_type), s   196    197 def _get_base_type(s):   198     if len(s) > 0:   199         return s[0], s[1:]   200     else:   201         return None, s   202    203 def _get_object_type(s):   204     if len(s) > 0:   205         s_end = s.find(";")   206         assert s_end != -1   207         return s[:s_end], s[s_end+1:]   208     else:   209         return None, s   210    211 def _get_array_type(s):   212     if len(s) > 0:   213         return _get_component_type(s)   214     else:   215         return None, s   216    217 # Constant information.   218    219 class ClassInfo(NameUtils, PythonNameUtils):   220     def init(self, data, class_file):   221         self.class_file = class_file   222         self.name_index = u2(data[0:2])   223         return data[2:]   224    225 class RefInfo(NameAndTypeUtils):   226     def init(self, data, class_file):   227         self.class_file = class_file   228         self.class_index = u2(data[0:2])   229         self.name_and_type_index = u2(data[2:4])   230         return data[4:]   231    232 class FieldRefInfo(RefInfo, PythonNameUtils):   233     def get_descriptor(self):   234         return RefInfo.get_field_descriptor(self)   235    236 class MethodRefInfo(RefInfo, PythonMethodUtils):   237     def get_descriptor(self):   238         return RefInfo.get_method_descriptor(self)   239    240 class InterfaceMethodRefInfo(MethodRefInfo):   241     pass   242    243 class NameAndTypeInfo(NameUtils, PythonNameUtils):   244     def init(self, data, class_file):   245         self.class_file = class_file   246         self.name_index = u2(data[0:2])   247         self.descriptor_index = u2(data[2:4])   248         return data[4:]   249    250     def get_field_descriptor(self):   251         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   252    253     def get_method_descriptor(self):   254         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   255    256 class Utf8Info:   257     def init(self, data, class_file):   258         self.class_file = class_file   259         self.length = u2(data[0:2])   260         self.bytes = data[2:2+self.length]   261         return data[2+self.length:]   262    263     def __str__(self):   264         return self.bytes   265    266     def __unicode__(self):   267         return unicode(self.bytes, "utf-8")   268    269     def get_value(self):   270         return str(self)   271    272 class StringInfo:   273     def init(self, data, class_file):   274         self.class_file = class_file   275         self.string_index = u2(data[0:2])   276         return data[2:]   277    278     def __str__(self):   279         return str(self.class_file.constants[self.string_index - 1])   280    281     def __unicode__(self):   282         return unicode(self.class_file.constants[self.string_index - 1])   283    284     def get_value(self):   285         return str(self)   286    287 class SmallNumInfo:   288     def init(self, data, class_file):   289         self.class_file = class_file   290         self.bytes = data[0:4]   291         return data[4:]   292    293 class IntegerInfo(SmallNumInfo):   294     def get_value(self):   295         return s4(self.bytes)   296    297 class FloatInfo(SmallNumInfo):   298     def get_value(self):   299         return f4(self.bytes)   300    301 class LargeNumInfo:   302     def init(self, data, class_file):   303         self.class_file = class_file   304         self.high_bytes = data[0:4]   305         self.low_bytes = data[4:8]   306         return data[8:]   307    308 class LongInfo(LargeNumInfo):   309     def get_value(self):   310         return s8(self.high_bytes + self.low_bytes)   311    312 class DoubleInfo(LargeNumInfo):   313     def get_value(self):   314         return f8(self.high_bytes + self.low_bytes)   315    316 # Other information.   317 # Objects of these classes are generally aware of the class they reside in.   318    319 class ItemInfo(NameUtils):   320     def init(self, data, class_file):   321         self.class_file = class_file   322         self.access_flags = u2(data[0:2])   323         self.name_index = u2(data[2:4])   324         self.descriptor_index = u2(data[4:6])   325         self.attributes, data = self.class_file._get_attributes(data[6:])   326         return data   327    328 class FieldInfo(ItemInfo, PythonNameUtils):   329     def get_descriptor(self):   330         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   331    332 class MethodInfo(ItemInfo, PythonMethodUtils):   333     def get_descriptor(self):   334         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   335    336 class AttributeInfo:   337     def init(self, data, class_file):   338         self.attribute_length = u4(data[0:4])   339         self.info = data[4:4+self.attribute_length]   340         return data[4+self.attribute_length:]   341    342 # NOTE: Decode the different attribute formats.   343    344 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   345     def init(self, data, class_file):   346         self.class_file = class_file   347         self.attribute_length = u4(data[0:4])   348         # Permit the NameUtils mix-in.   349         self.name_index = self.sourcefile_index = u2(data[4:6])   350         return data[6:]   351    352 class ConstantValueAttributeInfo(AttributeInfo):   353     def init(self, data, class_file):   354         self.class_file = class_file   355         self.attribute_length = u4(data[0:4])   356         self.constant_value_index = u2(data[4:6])   357         assert 4+self.attribute_length == 6   358         return data[4+self.attribute_length:]   359    360     def get_value(self):   361         return self.class_file.constants[self.constant_value_index - 1].get_value()   362    363 class CodeAttributeInfo(AttributeInfo):   364     def init(self, data, class_file):   365         self.class_file = class_file   366         self.attribute_length = u4(data[0:4])   367         self.max_stack = u2(data[4:6])   368         self.max_locals = u2(data[6:8])   369         self.code_length = u4(data[8:12])   370         end_of_code = 12+self.code_length   371         self.code = data[12:end_of_code]   372         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   373         self.exception_table = []   374         data = data[end_of_code + 2:]   375         for i in range(0, self.exception_table_length):   376             exception = ExceptionInfo()   377             data = exception.init(data)   378             self.exception_table.append(exception)   379         self.attributes, data = self.class_file._get_attributes(data)   380         return data   381    382 class ExceptionsAttributeInfo(AttributeInfo):   383     def init(self, data, class_file):   384         self.class_file = class_file   385         self.attribute_length = u4(data[0:4])   386         self.number_of_exceptions = u2(data[4:6])   387         self.exception_index_table = []   388         index = 6   389         for i in range(0, self.number_of_exceptions):   390             self.exception_index_table.append(u2(data[index:index+2]))   391             index += 2   392         return data[index:]   393    394     def get_exception(self, i):   395         exception_index = self.exception_index_table[i]   396         return self.class_file.constants[exception_index - 1]   397    398 class InnerClassesAttributeInfo(AttributeInfo):   399     def init(self, data, class_file):   400         self.class_file = class_file   401         self.attribute_length = u4(data[0:4])   402         self.number_of_classes = u2(data[4:6])   403         self.classes = []   404         data = data[6:]   405         for i in range(0, self.number_of_classes):   406             inner_class = InnerClassInfo()   407             data = inner_class.init(data, self.class_file)   408             self.classes.append(inner_class)   409         return data   410    411 class SyntheticAttributeInfo(AttributeInfo):   412     pass   413    414 class LineNumberAttributeInfo(AttributeInfo):   415     def init(self, data, class_file):   416         self.class_file = class_file   417         self.attribute_length = u4(data[0:4])   418         self.line_number_table_length = u2(data[4:6])   419         self.line_number_table = []   420         data = data[6:]   421         for i in range(0, self.line_number_table_length):   422             line_number = LineNumberInfo()   423             data = line_number.init(data)   424             self.line_number_table.append(line_number)   425         return data   426    427 class LocalVariableAttributeInfo(AttributeInfo):   428     def init(self, data, class_file):   429         self.class_file = class_file   430         self.attribute_length = u4(data[0:4])   431         self.local_variable_table_length = u2(data[4:6])   432         self.local_variable_table = []   433         data = data[6:]   434         for i in range(0, self.local_variable_table_length):   435             local_variable = LocalVariableInfo()   436             data = local_variable.init(data, self.class_file)   437             self.local_variable_table.append(local_variable)   438         return data   439    440 class DeprecatedAttributeInfo(AttributeInfo):   441     pass   442    443 # Child classes of the attribute information classes.   444    445 class ExceptionInfo:   446     def init(self, data):   447         self.start_pc = u2(data[0:2])   448         self.end_pc = u2(data[2:4])   449         self.handler_pc = u2(data[4:6])   450         self.catch_type = u2(data[6:8])   451         return data[8:]   452    453 class InnerClassInfo(NameUtils):   454     def init(self, data, class_file):   455         self.class_file = class_file   456         self.inner_class_info_index = u2(data[0:2])   457         self.outer_class_info_index = u2(data[2:4])   458         # Permit the NameUtils mix-in.   459         self.name_index = self.inner_name_index = u2(data[4:6])   460         self.inner_class_access_flags = u2(data[6:8])   461         return data[8:]   462    463 class LineNumberInfo:   464     def init(self, data):   465         self.start_pc = u2(data[0:2])   466         self.line_number = u2(data[2:4])   467         return data[4:]   468    469 class LocalVariableInfo(NameUtils, PythonNameUtils):   470     def init(self, data, class_file):   471         self.class_file = class_file   472         self.start_pc = u2(data[0:2])   473         self.length = u2(data[2:4])   474         self.name_index = u2(data[4:6])   475         self.descriptor_index = u2(data[6:8])   476         self.index = u2(data[8:10])   477         return data[10:]   478    479     def get_descriptor(self):   480         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   481    482 # Exceptions.   483    484 class UnknownTag(Exception):   485     pass   486    487 class UnknownAttribute(Exception):   488     pass   489    490 # Abstractions for the main structures.   491    492 class ClassFile:   493    494     "A class representing a Java class file."   495    496     def __init__(self, s):   497    498         """   499         Process the given string 's', populating the object with the class   500         file's details.   501         """   502    503         self.constants, s = self._get_constants(s[8:])   504         self.access_flags, s = self._get_access_flags(s)   505         self.this_class, s = self._get_this_class(s)   506         self.super_class, s = self._get_super_class(s)   507         self.interfaces, s = self._get_interfaces(s)   508         self.fields, s = self._get_fields(s)   509         self.methods, s = self._get_methods(s)   510         self.attributes, s = self._get_attributes(s)   511    512     def _decode_const(self, s):   513         tag = u1(s[0:1])   514         if tag == 1:   515             const = Utf8Info()   516         elif tag == 3:   517             const = IntegerInfo()   518         elif tag == 4:   519             const = FloatInfo()   520         elif tag == 5:   521             const = LongInfo()   522         elif tag == 6:   523             const = DoubleInfo()   524         elif tag == 7:   525             const = ClassInfo()   526         elif tag == 8:   527             const = StringInfo()   528         elif tag == 9:   529             const = FieldRefInfo()   530         elif tag == 10:   531             const = MethodRefInfo()   532         elif tag == 11:   533             const = InterfaceMethodRefInfo()   534         elif tag == 12:   535             const = NameAndTypeInfo()   536         else:   537             raise UnknownTag, tag   538    539         # Initialise the constant object.   540    541         s = const.init(s[1:], self)   542         return const, s   543    544     def _get_constants_from_table(self, count, s):   545         l = []   546         # Have to skip certain entries specially.   547         i = 1   548         while i < count:   549             c, s = self._decode_const(s)   550             l.append(c)   551             # Add a blank entry after "large" entries.   552             if isinstance(c, LargeNumInfo):   553                 l.append(None)   554                 i += 1   555             i += 1   556         return l, s   557    558     def _get_items_from_table(self, cls, number, s):   559         l = []   560         for i in range(0, number):   561             f = cls()   562             s = f.init(s, self)   563             l.append(f)   564         return l, s   565    566     def _get_methods_from_table(self, number, s):   567         return self._get_items_from_table(MethodInfo, number, s)   568    569     def _get_fields_from_table(self, number, s):   570         return self._get_items_from_table(FieldInfo, number, s)   571    572     def _get_attribute_from_table(self, s):   573         attribute_name_index = u2(s[0:2])   574         constant_name = self.constants[attribute_name_index - 1].bytes   575         if constant_name == "SourceFile":   576             attribute = SourceFileAttributeInfo()   577         elif constant_name == "ConstantValue":   578             attribute = ConstantValueAttributeInfo()   579         elif constant_name == "Code":   580             attribute = CodeAttributeInfo()   581         elif constant_name == "Exceptions":   582             attribute = ExceptionsAttributeInfo()   583         elif constant_name == "InnerClasses":   584             attribute = InnerClassesAttributeInfo()   585         elif constant_name == "Synthetic":   586             attribute = SyntheticAttributeInfo()   587         elif constant_name == "LineNumberTable":   588             attribute = LineNumberAttributeInfo()   589         elif constant_name == "LocalVariableTable":   590             attribute = LocalVariableAttributeInfo()   591         elif constant_name == "Deprecated":   592             attribute = DeprecatedAttributeInfo()   593         else:   594             raise UnknownAttribute, constant_name   595         s = attribute.init(s[2:], self)   596         return attribute, s   597    598     def _get_attributes_from_table(self, number, s):   599         attributes = []   600         for i in range(0, number):   601             attribute, s = self._get_attribute_from_table(s)   602             attributes.append(attribute)   603         return attributes, s   604    605     def _get_constants(self, s):   606         count = u2(s[0:2])   607         return self._get_constants_from_table(count, s[2:])   608    609     def _get_access_flags(self, s):   610         return u2(s[0:2]), s[2:]   611    612     def _get_this_class(self, s):   613         index = u2(s[0:2])   614         return self.constants[index - 1], s[2:]   615    616     def _get_super_class(self, s):   617         index = u2(s[0:2])   618         if index != 0:   619             return self.constants[index - 1], s[2:]   620         else:   621             return None, s[2:]   622    623     def _get_interfaces(self, s):   624         interfaces = []   625         number = u2(s[0:2])   626         s = s[2:]   627         for i in range(0, number):   628             index = u2(s[0:2])   629             interfaces.append(self.constants[index - 1])   630             s = s[2:]   631         return interfaces, s   632    633     def _get_fields(self, s):   634         number = u2(s[0:2])   635         return self._get_fields_from_table(number, s[2:])   636    637     def _get_attributes(self, s):   638         number = u2(s[0:2])   639         return self._get_attributes_from_table(number, s[2:])   640    641     def _get_methods(self, s):   642         number = u2(s[0:2])   643         return self._get_methods_from_table(number, s[2:])   644    645 if __name__ == "__main__":   646     import sys   647     f = open(sys.argv[1], "rb")   648     c = ClassFile(f.read())   649     f.close()   650    651 # vim: tabstop=4 expandtab shiftwidth=4