javaclass

classfile.py

18:2a50b70c772b
2004-11-09 Paul Boddie Fixed comparison plus branch (if_acmp*) by removing the comparison result from the operand stack. Added stack top duplication before RAISE_VARARGS so that the appropriate handlers can save the value. Changed the value stored in the load_const_ret method so that None is stored (although this may need verifying). When the translated ret instruction (END_FINALLY in the Python VM, again requiring verification) is executed, the value loaded just prior to its execution should be the same as that saved at the beginning of the handler, and this should be None or a raised exception. NOTE: The load_const_ret translation should arguably retrieve the saved NOTE: value from the beginning of any active handler instead of just NOTE: loading None. This may be assured in try...finally constructs NOTE: (without catch sections), however. Rearranged Python VM instruction insertion for exceptions. Changed some load_global(None) usage to load_const(None). Added instruction positions in the disassembly output.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class PythonNameUtils:    36     def get_python_name(self):    37         name = self.get_name()    38         if str(name) == "<init>":    39             return "__java_init__"    40         else:    41             return name    42     43 class NameUtils(PythonNameUtils):    44     def get_name(self):    45         if self.name_index != 0:    46             return self.class_file.constants[self.name_index - 1]    47         else:    48             # Some name indexes are zero to indicate special conditions.    49             return None    50     51 class NameAndTypeUtils(PythonNameUtils):    52     def get_name(self):    53         if self.name_and_type_index != 0:    54             return self.class_file.constants[self.name_and_type_index - 1].get_name()    55         else:    56             # Some name indexes are zero to indicate special conditions.    57             return None    58     59     def get_field_descriptor(self):    60         if self.name_and_type_index != 0:    61             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    62         else:    63             # Some name indexes are zero to indicate special conditions.    64             return None    65     66     def get_method_descriptor(self):    67         if self.name_and_type_index != 0:    68             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()    69         else:    70             # Some name indexes are zero to indicate special conditions.    71             return None    72     73 class DescriptorUtils:    74     75     "Symbol parsing."    76     77     def _get_method_descriptor(self, s):    78         assert s[0] == "("    79         params = []    80         s = s[1:]    81         while s[0] != ")":    82             parameter_descriptor, s = self._get_parameter_descriptor(s)    83             params.append(parameter_descriptor)    84         if s[1] != "V":    85             return_type, s = self._get_field_type(s[1:])    86         else:    87             return_type, s = None, s[1:]    88         return params, return_type    89     90     def _get_parameter_descriptor(self, s):    91         return self._get_field_type(s)    92     93     def _get_field_descriptor(self, s):    94         return self._get_field_type(s)    95     96     def _get_component_type(self, s):    97         return self._get_field_type(s)    98     99     def _get_field_type(self, s):   100         base_type, s = self._get_base_type(s)   101         object_type = None   102         array_type = None   103         if base_type == "L":   104             object_type, s = self._get_object_type(s)   105         elif base_type == "[":   106             array_type, s = self._get_array_type(s)   107         return (base_type, object_type, array_type), s   108    109     def _get_base_type(self, s):   110         if len(s) > 0:   111             return s[0], s[1:]   112         else:   113             return None, s   114    115     def _get_object_type(self, s):   116         if len(s) > 0:   117             s_end = s.find(";")   118             assert s_end != -1   119             return s[:s_end], s[s_end+1:]   120         else:   121             return None, s   122    123     def _get_array_type(self, s):   124         if len(s) > 0:   125             return self._get_component_type(s)   126         else:   127             return None, s   128    129 # Constant information.   130 # Objects of these classes are not directly aware of the class they reside in.   131    132 class ClassInfo(NameUtils):   133     def init(self, data, class_file):   134         self.class_file = class_file   135         self.name_index = u2(data[0:2])   136         return data[2:]   137    138 class RefInfo(NameAndTypeUtils):   139     def init(self, data, class_file):   140         self.class_file = class_file   141         self.class_index = u2(data[0:2])   142         self.name_and_type_index = u2(data[2:4])   143         return data[4:]   144    145 class FieldRefInfo(RefInfo):   146     def get_descriptor(self):   147         return RefInfo.get_field_descriptor(self)   148    149 class MethodRefInfo(RefInfo):   150     def get_descriptor(self):   151         return RefInfo.get_method_descriptor(self)   152    153 class InterfaceMethodRefInfo(RefInfo):   154     def get_descriptor(self):   155         return RefInfo.get_method_descriptor(self)   156    157 class NameAndTypeInfo(NameUtils, DescriptorUtils):   158     def init(self, data, class_file):   159         self.class_file = class_file   160         self.name_index = u2(data[0:2])   161         self.descriptor_index = u2(data[2:4])   162         return data[4:]   163    164     def get_field_descriptor(self):   165         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   166    167     def get_method_descriptor(self):   168         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   169    170 class Utf8Info:   171     def init(self, data, class_file):   172         self.class_file = class_file   173         self.length = u2(data[0:2])   174         self.bytes = data[2:2+self.length]   175         return data[2+self.length:]   176    177     def __str__(self):   178         return self.bytes   179    180     def __unicode__(self):   181         return unicode(self.bytes, "utf-8")   182    183 class StringInfo:   184     def init(self, data, class_file):   185         self.class_file = class_file   186         self.string_index = u2(data[0:2])   187         return data[2:]   188    189 class SmallNumInfo:   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.bytes = data[0:4]   193         return data[4:]   194    195 class IntegerInfo(SmallNumInfo):   196     def get_value(self):   197         return s4(self.bytes)   198    199 class FloatInfo(SmallNumInfo):   200     def get_value(self):   201         return f4(self.bytes)   202    203 class LargeNumInfo:   204     def init(self, data, class_file):   205         self.class_file = class_file   206         self.high_bytes = u4(data[0:4])   207         self.low_bytes = u4(data[4:8])   208         return data[8:]   209    210 class LongInfo(LargeNumInfo):   211     def get_value(self):   212         return s8(self.high_bytes + self.low_bytes)   213    214 class DoubleInfo(LargeNumInfo):   215     def get_value(self):   216         return f8(self.high_bytes + self.low_bytes)   217    218 # Other information.   219 # Objects of these classes are generally aware of the class they reside in.   220    221 class ItemInfo(NameUtils, DescriptorUtils):   222     def init(self, data, class_file):   223         self.class_file = class_file   224         self.access_flags = u2(data[0:2])   225         self.name_index = u2(data[2:4])   226         self.descriptor_index = u2(data[4:6])   227         self.attributes, data = self.class_file._get_attributes(data[6:])   228         return data   229    230 class FieldInfo(ItemInfo):   231     def get_descriptor(self):   232         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   233    234 class MethodInfo(ItemInfo):   235     def get_descriptor(self):   236         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   237    238 class AttributeInfo:   239     def init(self, data, class_file):   240         self.attribute_length = u4(data[0:4])   241         self.info = data[4:4+self.attribute_length]   242         return data[4+self.attribute_length:]   243    244 # NOTE: Decode the different attribute formats.   245    246 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   247     def init(self, data, class_file):   248         self.class_file = class_file   249         self.attribute_length = u4(data[0:4])   250         # Permit the NameUtils mix-in.   251         self.name_index = self.sourcefile_index = u2(data[4:6])   252    253 class ConstantValueAttributeInfo(AttributeInfo):   254     def init(self, data, class_file):   255         self.class_file = class_file   256         self.attribute_length = u4(data[0:4])   257         self.constant_value_index = u2(data[4:6])   258         assert 4+self.attribute_length == 6   259         return data[4+self.attribute_length:]   260    261     def get_value(self):   262         return self.class_file.constants[self.constant_value_index - 1].get_value()   263    264 class CodeAttributeInfo(AttributeInfo):   265     def init(self, data, class_file):   266         self.class_file = class_file   267         self.attribute_length = u4(data[0:4])   268         self.max_stack = u2(data[4:6])   269         self.max_locals = u2(data[6:8])   270         self.code_length = u4(data[8:12])   271         end_of_code = 12+self.code_length   272         self.code = data[12:end_of_code]   273         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   274         self.exception_table = []   275         data = data[end_of_code + 2:]   276         for i in range(0, self.exception_table_length):   277             exception = ExceptionInfo()   278             data = exception.init(data)   279             self.exception_table.append(exception)   280         self.attributes, data = self.class_file._get_attributes(data)   281         return data   282    283 class ExceptionsAttributeInfo(AttributeInfo):   284     def init(self, data, class_file):   285         self.class_file = class_file   286         self.attribute_length = u4(data[0:4])   287         self.number_of_exceptions = u2(data[4:6])   288         self.exception_index_table = []   289         index = 6   290         for i in range(0, self.number_of_exceptions):   291             self.exception_index_table.append(u2(data[index:index+2]))   292             index += 2   293         return data[index:]   294    295     def get_exception(self, i):   296         exception_index = self.exception_index_table[i]   297         return self.class_file.constants[exception_index - 1]   298    299 class InnerClassesAttributeInfo(AttributeInfo):   300     def init(self, data, class_file):   301         self.class_file = class_file   302         self.attribute_length = u4(data[0:4])   303         self.number_of_classes = u2(data[4:6])   304         self.classes = []   305         data = data[6:]   306         for i in range(0, self.number_of_classes):   307             inner_class = InnerClassInfo()   308             data = inner_class.init(data, self.class_file)   309             self.classes.append(inner_class)   310         return data   311    312 class SyntheticAttributeInfo(AttributeInfo):   313     pass   314    315 class LineNumberAttributeInfo(AttributeInfo):   316     def init(self, data, class_file):   317         self.class_file = class_file   318         self.attribute_length = u4(data[0:4])   319         self.line_number_table_length = u2(data[4:6])   320         self.line_number_table = []   321         data = data[6:]   322         for i in range(0, self.line_number_table_length):   323             line_number = LineNumberInfo()   324             data = line_number.init(data)   325             self.line_number_table.append(line_number)   326         return data   327    328 class LocalVariableAttributeInfo(AttributeInfo):   329     def init(self, data, class_file):   330         self.class_file = class_file   331         self.attribute_length = u4(data[0:4])   332         self.local_variable_table_length = u2(data[4:6])   333         self.local_variable_table = []   334         data = data[6:]   335         for i in range(0, self.local_variable_table_length):   336             local_variable = LocalVariableInfo()   337             data = local_variable.init(data)   338             self.local_variable_table.append(local_variable)   339         return data   340    341 class DeprecatedAttributeInfo(AttributeInfo):   342     pass   343    344 # Child classes of the attribute information classes.   345    346 class ExceptionInfo:   347     def init(self, data):   348         self.start_pc = u2(data[0:2])   349         self.end_pc = u2(data[2:4])   350         self.handler_pc = u2(data[4:6])   351         self.catch_type = u2(data[6:8])   352         return data[8:]   353    354 class InnerClassInfo(NameUtils):   355     def init(self, data, class_file):   356         self.class_file = class_file   357         self.inner_class_info_index = u2(data[0:2])   358         self.outer_class_info_index = u2(data[2:4])   359         # Permit the NameUtils mix-in.   360         self.name_index = self.inner_name_index = u2(data[4:6])   361         self.inner_class_access_flags = u2(data[6:8])   362         return data[8:]   363    364 class LineNumberInfo:   365     def init(self, data):   366         self.start_pc = u2(data[0:2])   367         self.line_number = u2(data[2:4])   368         return data[4:]   369    370 class LocalVariableInfo(NameUtils):   371     def init(self, data, class_file):   372         self.class_file = class_file   373         self.start_pc = u2(data[0:2])   374         self.length = u2(data[2:4])   375         self.name_index = u2(data[4:6])   376         self.descriptor_index = u2(data[6:8])   377         self.index = u2(data[8:10])   378         return data[10:]   379    380     def get_descriptor(self):   381         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   382    383 # Exceptions.   384    385 class UnknownTag(Exception):   386     pass   387    388 class UnknownAttribute(Exception):   389     pass   390    391 # Abstractions for the main structures.   392    393 class ClassFile:   394    395     "A class representing a Java class file."   396    397     def __init__(self, s):   398    399         """   400         Process the given string 's', populating the object with the class   401         file's details.   402         """   403    404         self.constants, s = self._get_constants(s[8:])   405         self.access_flags, s = self._get_access_flags(s)   406         self.this_class, s = self._get_this_class(s)   407         self.super_class, s = self._get_super_class(s)   408         self.interfaces, s = self._get_interfaces(s)   409         self.fields, s = self._get_fields(s)   410         self.methods, s = self._get_methods(s)   411         self.attributes, s = self._get_attributes(s)   412    413     def _decode_const(self, s):   414         tag = u1(s[0:1])   415         if tag == 1:   416             const = Utf8Info()   417         elif tag == 3:   418             const = IntegerInfo()   419         elif tag == 4:   420             const = FloatInfo()   421         elif tag == 5:   422             const = LongInfo()   423         elif tag == 6:   424             const = DoubleInfo()   425         elif tag == 7:   426             const = ClassInfo()   427         elif tag == 8:   428             const = StringInfo()   429         elif tag == 9:   430             const = FieldRefInfo()   431         elif tag == 10:   432             const = MethodRefInfo()   433         elif tag == 11:   434             const = InterfaceMethodRefInfo()   435         elif tag == 12:   436             const = NameAndTypeInfo()   437         else:   438             raise UnknownTag, tag   439    440         # Initialise the constant object.   441    442         s = const.init(s[1:], self)   443         return const, s   444    445     def _get_constants_from_table(self, count, s):   446         l = []   447         # Have to skip certain entries specially.   448         i = 1   449         while i < count:   450             c, s = self._decode_const(s)   451             l.append(c)   452             # Add a blank entry after "large" entries.   453             if isinstance(c, LargeNumInfo):   454                 l.append(None)   455                 i += 1   456             i += 1   457         return l, s   458    459     def _get_items_from_table(self, cls, number, s):   460         l = []   461         for i in range(0, number):   462             f = cls()   463             s = f.init(s, self)   464             l.append(f)   465         return l, s   466    467     def _get_methods_from_table(self, number, s):   468         return self._get_items_from_table(MethodInfo, number, s)   469    470     def _get_fields_from_table(self, number, s):   471         return self._get_items_from_table(FieldInfo, number, s)   472    473     def _get_attribute_from_table(self, s):   474         attribute_name_index = u2(s[0:2])   475         constant_name = self.constants[attribute_name_index - 1].bytes   476         if constant_name == "SourceFile":   477             attribute = SourceFileAttributeInfo()   478         elif constant_name == "ConstantValue":   479             attribute = ConstantValueAttributeInfo()   480         elif constant_name == "Code":   481             attribute = CodeAttributeInfo()   482         elif constant_name == "Exceptions":   483             attribute = ExceptionsAttributeInfo()   484         elif constant_name == "InnerClasses":   485             attribute = InnerClassesAttributeInfo()   486         elif constant_name == "Synthetic":   487             attribute = SyntheticAttributeInfo()   488         elif constant_name == "LineNumberTable":   489             attribute = LineNumberAttributeInfo()   490         elif constant_name == "LocalVariableTable":   491             attribute = LocalVariableAttributeInfo()   492         elif constant_name == "Deprecated":   493             attribute = DeprecatedAttributeInfo()   494         else:   495             raise UnknownAttribute, constant_name   496         s = attribute.init(s[2:], self)   497         return attribute, s   498    499     def _get_attributes_from_table(self, number, s):   500         attributes = []   501         for i in range(0, number):   502             attribute, s = self._get_attribute_from_table(s)   503             attributes.append(attribute)   504         return attributes, s   505    506     def _get_constants(self, s):   507         count = u2(s[0:2])   508         return self._get_constants_from_table(count, s[2:])   509    510     def _get_access_flags(self, s):   511         return u2(s[0:2]), s[2:]   512    513     def _get_this_class(self, s):   514         index = u2(s[0:2])   515         return self.constants[index - 1], s[2:]   516    517     _get_super_class = _get_this_class   518    519     def _get_interfaces(self, s):   520         interfaces = []   521         number = u2(s[0:2])   522         s = s[2:]   523         for i in range(0, number):   524             index = u2(s[0:2])   525             interfaces.append(self.constants[index - 1])   526             s = s[2:]   527         return interfaces, s   528    529     def _get_fields(self, s):   530         number = u2(s[0:2])   531         return self._get_fields_from_table(number, s[2:])   532    533     def _get_attributes(self, s):   534         number = u2(s[0:2])   535         return self._get_attributes_from_table(number, s[2:])   536    537     def _get_methods(self, s):   538         number = u2(s[0:2])   539         return self._get_methods_from_table(number, s[2:])   540    541 if __name__ == "__main__":   542     import sys   543     f = open(sys.argv[1])   544     c = ClassFile(f.read())   545    546 # vim: tabstop=4 expandtab shiftwidth=4