javaclass

javaclass/classfile.py

189:8391014fcbde
2011-10-08 Paul Boddie Minor formatting changes.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6      7 Copyright (C) 2004, 2005, 2006, 2011 Paul Boddie <paul@boddie.org.uk>     8 Copyright (C) 2010 Braden Thomas <bradenthomas@me.com>     9 Copyright (C) 2011 David Drysdale <dmd@lurklurk.org>    10     11 This program is free software; you can redistribute it and/or modify it under    12 the terms of the GNU Lesser General Public License as published by the Free    13 Software Foundation; either version 3 of the License, or (at your option) any    14 later version.    15     16 This program is distributed in the hope that it will be useful, but WITHOUT    17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    18 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more    19 details.    20     21 You should have received a copy of the GNU Lesser General Public License along    22 with this program.  If not, see <http://www.gnu.org/licenses/>.    23 """    24     25 import struct # for general decoding of class files    26     27 # Utility functions.    28     29 def u1(data):    30     return struct.unpack(">B", data[0:1])[0]    31     32 def u2(data):    33     return struct.unpack(">H", data[0:2])[0]    34     35 def s2(data):    36     return struct.unpack(">h", data[0:2])[0]    37     38 def u4(data):    39     return struct.unpack(">L", data[0:4])[0]    40     41 def s4(data):    42     return struct.unpack(">l", data[0:4])[0]    43     44 def s8(data):    45     return struct.unpack(">q", data[0:8])[0]    46     47 def f4(data):    48     return struct.unpack(">f", data[0:4])[0]    49     50 def f8(data):    51     return struct.unpack(">d", data[0:8])[0]    52     53 def su1(value):    54     return struct.pack(">B", value)    55     56 def su2(value):    57     return struct.pack(">H", value)    58     59 def ss2(value):    60     return struct.pack(">h", value)    61     62 def su4(value):    63     return struct.pack(">L", value)    64     65 def ss4(value):    66     return struct.pack(">l", value)    67     68 def ss8(value):    69     return struct.pack(">q", value)    70     71 def sf4(value):    72     return struct.pack(">f", value)    73     74 def sf8(value):    75     return struct.pack(">d", value)    76     77 # Useful tables and constants.    78     79 descriptor_base_type_mapping = {    80     "B" : "int",    81     "C" : "str",    82     "D" : "float",    83     "F" : "float",    84     "I" : "int",    85     "J" : "int",    86     "L" : "object",    87     "S" : "int",    88     "Z" : "bool",    89     "[" : "list"    90     }    91     92 type_names_to_default_values = {    93     "int" : 0,    94     "str" : u"",    95     "float" : 0.0,    96     "object" : None,    97     "bool" : 0, # NOTE: Should be False.    98     "list" : []    99     }   100    101 def get_default_for_type(type_name):   102     global type_names_to_default_values   103     return type_names_to_default_values.get(type_name)   104    105 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \   106 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800   107    108 def has_flags(flags, desired):   109     desired_flags = reduce(lambda a, b: a | b, desired, 0)   110     return (flags & desired_flags) == desired_flags   111    112 # Useful mix-ins.   113    114 class PythonMethodUtils:   115     symbol_sep = "___" # was "$"   116     type_sep = "__" # replaces "/"   117     array_sep = "_array_" # was "[]"   118     base_seps = ("_", "_") # was "<" and ">"   119    120     def get_unqualified_python_name(self):   121         name = self.get_name()   122         if str(name) == "<init>":   123             return "__init__"   124         elif str(name) == "<clinit>":   125             return "__clinit__"   126         else:   127             return str(name)   128    129     def get_python_name(self):   130         name = self.get_unqualified_python_name()   131         if name == "__clinit__":   132             return name   133         return name + self.symbol_sep + self._get_descriptor_as_name()   134    135     def _get_descriptor_as_name(self):   136         l = []   137         for descriptor_type in self.get_descriptor()[0]:   138             l.append(self._get_type_as_name(descriptor_type))   139         return self.symbol_sep.join(l)   140    141     def _get_type_as_name(self, descriptor_type, s=""):   142         base_type, object_type, array_type = descriptor_type   143         if base_type == "L":   144             return object_type.replace("/", self.type_sep) + s   145         elif base_type == "[":   146             return self._get_type_as_name(array_type, s + self.array_sep)   147         else:   148             return self.base_seps[0] + base_type + self.base_seps[1] + s   149    150 class PythonNameUtils:   151     def get_python_name(self):   152         # NOTE: This may not be comprehensive.   153         if not str(self.get_name()).startswith("["):   154             return str(self.get_name()).replace("/", ".")   155         else:   156             return self._get_type_name(   157                 get_field_descriptor(   158                     str(self.get_name())   159                     )   160                 ).replace("/", ".")   161    162     def _get_type_name(self, descriptor_type):   163         base_type, object_type, array_type = descriptor_type   164         if base_type == "L":   165             return object_type   166         elif base_type == "[":   167             return self._get_type_name(array_type)   168         else:   169             return descriptor_base_type_mapping[base_type]   170    171 class NameUtils:   172     def get_name(self):   173         if self.name_index != 0:   174             return self.class_file.constants[self.name_index - 1]   175         else:   176             # Some name indexes are zero to indicate special conditions.   177             return None   178    179 class NameAndTypeUtils:   180     def get_name(self):   181         if self.name_and_type_index != 0:   182             return self.class_file.constants[self.name_and_type_index - 1].get_name()   183         else:   184             # Some name indexes are zero to indicate special conditions.   185             return None   186    187     def get_field_descriptor(self):   188         if self.name_and_type_index != 0:   189             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   190         else:   191             # Some name indexes are zero to indicate special conditions.   192             return None   193    194     def get_method_descriptor(self):   195         if self.name_and_type_index != 0:   196             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   197         else:   198             # Some name indexes are zero to indicate special conditions.   199             return None   200    201     def get_class(self):   202         return self.class_file.constants[self.class_index - 1]   203    204 # Symbol parsing.   205    206 def get_method_descriptor(s):   207     assert s[0] == "("   208     params = []   209     s = s[1:]   210     while s[0] != ")":   211         parameter_descriptor, s = _get_parameter_descriptor(s)   212         params.append(parameter_descriptor)   213     if s[1] != "V":   214         return_type, s = _get_field_type(s[1:])   215     else:   216         return_type, s = None, s[1:]   217     return params, return_type   218    219 def get_field_descriptor(s):   220     return _get_field_type(s)[0]   221    222 def _get_parameter_descriptor(s):   223     return _get_field_type(s)   224    225 def _get_component_type(s):   226     return _get_field_type(s)   227    228 def _get_field_type(s):   229     base_type, s = _get_base_type(s)   230     object_type = None   231     array_type = None   232     if base_type == "L":   233         object_type, s = _get_object_type(s)   234     elif base_type == "[":   235         array_type, s = _get_array_type(s)   236     return (base_type, object_type, array_type), s   237    238 def _get_base_type(s):   239     if len(s) > 0:   240         return s[0], s[1:]   241     else:   242         return None, s   243    244 def _get_object_type(s):   245     if len(s) > 0:   246         s_end = s.find(";")   247         assert s_end != -1   248         return s[:s_end], s[s_end+1:]   249     else:   250         return None, s   251    252 def _get_array_type(s):   253     if len(s) > 0:   254         return _get_component_type(s)   255     else:   256         return None, s   257    258 # Constant information.   259    260 class ClassInfo(NameUtils, PythonNameUtils):   261     def init(self, data, class_file):   262         self.class_file = class_file   263         self.name_index = u2(data[0:2])   264         return data[2:]   265     def serialize(self):   266         return su2(self.name_index)   267    268 class RefInfo(NameAndTypeUtils):   269     def init(self, data, class_file):   270         self.class_file = class_file   271         self.class_index = u2(data[0:2])   272         self.name_and_type_index = u2(data[2:4])   273         return data[4:]   274     def serialize(self):   275         return su2(self.class_index)+su2(self.name_and_type_index)   276    277 class FieldRefInfo(RefInfo, PythonNameUtils):   278     def get_descriptor(self):   279         return RefInfo.get_field_descriptor(self)   280    281 class MethodRefInfo(RefInfo, PythonMethodUtils):   282     def get_descriptor(self):   283         return RefInfo.get_method_descriptor(self)   284    285 class InterfaceMethodRefInfo(MethodRefInfo):   286     pass   287    288 class NameAndTypeInfo(NameUtils, PythonNameUtils):   289     def init(self, data, class_file):   290         self.class_file = class_file   291         self.name_index = u2(data[0:2])   292         self.descriptor_index = u2(data[2:4])   293         return data[4:]   294    295     def serialize(self):   296         return su2(self.name_index)+su2(self.descriptor_index)   297    298     def get_field_descriptor(self):   299         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   300    301     def get_method_descriptor(self):   302         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   303    304 class Utf8Info:   305     def init(self, data, class_file):   306         self.class_file = class_file   307         self.length = u2(data[0:2])   308         self.bytes = data[2:2+self.length]   309         return data[2+self.length:]   310    311     def serialize(self):   312         return su2(self.length)+self.bytes   313    314     def __str__(self):   315         return self.bytes   316    317     def __unicode__(self):   318         return unicode(self.bytes, "utf-8")   319    320     def get_value(self):   321         return str(self)   322    323 class StringInfo:   324     def init(self, data, class_file):   325         self.class_file = class_file   326         self.string_index = u2(data[0:2])   327         return data[2:]   328    329     def serialize(self):   330         return su2(self.string_index)   331    332     def __str__(self):   333         return str(self.class_file.constants[self.string_index - 1])   334    335     def __unicode__(self):   336         return unicode(self.class_file.constants[self.string_index - 1])   337    338     def get_value(self):   339         return str(self)   340    341 class SmallNumInfo:   342     def init(self, data, class_file):   343         self.class_file = class_file   344         self.bytes = data[0:4]   345         return data[4:]   346     def serialize(self):   347         return self.bytes   348    349 class IntegerInfo(SmallNumInfo):   350     def get_value(self):   351         return s4(self.bytes)   352    353 class FloatInfo(SmallNumInfo):   354     def get_value(self):   355         return f4(self.bytes)   356    357 class LargeNumInfo:   358     def init(self, data, class_file):   359         self.class_file = class_file   360         self.high_bytes = data[0:4]   361         self.low_bytes = data[4:8]   362         return data[8:]   363     def serialize(self):   364         return self.high_bytes+self.low_bytes   365    366    367 class LongInfo(LargeNumInfo):   368     def get_value(self):   369         return s8(self.high_bytes + self.low_bytes)   370    371 class DoubleInfo(LargeNumInfo):   372     def get_value(self):   373         return f8(self.high_bytes + self.low_bytes)   374    375 # Other information.   376 # Objects of these classes are generally aware of the class they reside in.   377    378 class ItemInfo(NameUtils):   379     def init(self, data, class_file):   380         self.class_file = class_file   381         self.access_flags = u2(data[0:2])   382         self.name_index = u2(data[2:4])   383         self.descriptor_index = u2(data[4:6])   384         self.attributes, data = self.class_file._get_attributes(data[6:])   385         return data   386    387     def serialize(self):   388         od = su2(self.access_flags)+su2(self.name_index)+su2(self.descriptor_index)   389         od += self.class_file._serialize_attributes(self.attributes)   390         return od   391    392 class FieldInfo(ItemInfo, PythonNameUtils):   393     def get_descriptor(self):   394         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   395    396 class MethodInfo(ItemInfo, PythonMethodUtils):   397     def get_descriptor(self):   398         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   399    400 class AttributeInfo:   401     def init(self, data, class_file):   402         self.attribute_length = u4(data[0:4])   403         self.info = data[4:4+self.attribute_length]   404         return data[4+self.attribute_length:]   405    406     def serialize(self):   407         return su4(self.attribute_length)+self.info   408    409 # NOTE: Decode the different attribute formats.   410    411 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   412     def init(self, data, class_file):   413         self.class_file = class_file   414         self.attribute_length = u4(data[0:4])   415         # Permit the NameUtils mix-in.   416         self.name_index = self.sourcefile_index = u2(data[4:6])   417         return data[6:]   418    419     def serialize(self):   420         return su4(self.attribute_length)+su2(self.name_index)   421    422 class ConstantValueAttributeInfo(AttributeInfo):   423     def init(self, data, class_file):   424         self.class_file = class_file   425         self.attribute_length = u4(data[0:4])   426         self.constant_value_index = u2(data[4:6])   427         assert 4+self.attribute_length == 6   428         return data[4+self.attribute_length:]   429    430     def get_value(self):   431         return self.class_file.constants[self.constant_value_index - 1].get_value()   432    433     def serialize(self):   434         return su4(self.attribute_length)+su2(self.constant_value_index)   435    436 class CodeAttributeInfo(AttributeInfo):   437     def init(self, data, class_file):   438         self.class_file = class_file   439         self.attribute_length = u4(data[0:4])   440         self.max_stack = u2(data[4:6])   441         self.max_locals = u2(data[6:8])   442         self.code_length = u4(data[8:12])   443         end_of_code = 12+self.code_length   444         self.code = data[12:end_of_code]   445         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   446         self.exception_table = []   447         data = data[end_of_code + 2:]   448         for i in range(0, self.exception_table_length):   449             exception = ExceptionInfo()   450             data = exception.init(data)   451             self.exception_table.append(exception)   452         self.attributes, data = self.class_file._get_attributes(data)   453         return data   454    455     def serialize(self):   456         od = su4(self.attribute_length)+su2(self.max_stack)+su2(self.max_locals)+su4(self.code_length)+self.code   457         od += su2(self.exception_table_length)   458         for e in self.exception_table:   459             od += e.serialize()   460         od += self.class_file._serialize_attributes(self.attributes)   461         return od   462    463 class ExceptionsAttributeInfo(AttributeInfo):   464     def init(self, data, class_file):   465         self.class_file = class_file   466         self.attribute_length = u4(data[0:4])   467         self.number_of_exceptions = u2(data[4:6])   468         self.exception_index_table = []   469         index = 6   470         for i in range(0, self.number_of_exceptions):   471             self.exception_index_table.append(u2(data[index:index+2]))   472             index += 2   473         return data[index:]   474    475     def get_exception(self, i):   476         exception_index = self.exception_index_table[i]   477         return self.class_file.constants[exception_index - 1]   478            479     def serialize(self):   480         od = su4(self.attribute_length)+su2(self.number_of_exceptions)   481         for ei in self.exception_index_table:   482             od += su2(ei)   483         return od   484    485 class InnerClassesAttributeInfo(AttributeInfo):   486     def init(self, data, class_file):   487         self.class_file = class_file   488         self.attribute_length = u4(data[0:4])   489         self.number_of_classes = u2(data[4:6])   490         self.classes = []   491         data = data[6:]   492         for i in range(0, self.number_of_classes):   493             inner_class = InnerClassInfo()   494             data = inner_class.init(data, self.class_file)   495             self.classes.append(inner_class)   496         return data   497    498     def serialize(self):   499         od = su4(self.attribute_length)+su2(self.number_of_classes)   500         for c in self.classes:   501             od += c.serialize()   502         return od   503    504 class SyntheticAttributeInfo(AttributeInfo):   505     pass   506    507 class LineNumberAttributeInfo(AttributeInfo):   508     def init(self, data, class_file):   509         self.class_file = class_file   510         self.attribute_length = u4(data[0:4])   511         self.line_number_table_length = u2(data[4:6])   512         self.line_number_table = []   513         data = data[6:]   514         for i in range(0, self.line_number_table_length):   515             line_number = LineNumberInfo()   516             data = line_number.init(data)   517             self.line_number_table.append(line_number)   518         return data   519            520     def serialize(self):   521         od = su4(self.attribute_length)+su2(self.line_number_table_length)   522         for ln in self.line_number_table:   523             od += ln.serialize()   524         return od   525    526 class LocalVariableAttributeInfo(AttributeInfo):   527     def init(self, data, class_file):   528         self.class_file = class_file   529         self.attribute_length = u4(data[0:4])   530         self.local_variable_table_length = u2(data[4:6])   531         self.local_variable_table = []   532         data = data[6:]   533         for i in range(0, self.local_variable_table_length):   534             local_variable = LocalVariableInfo()   535             data = local_variable.init(data, self.class_file)   536             self.local_variable_table.append(local_variable)   537         return data   538    539     def serialize(self):   540         od = su4(self.attribute_length)+su2(self.local_variable_table_length)   541         for lv in self.local_variable_table:   542             od += lv.serialize()   543         return od   544    545 class LocalVariableTypeAttributeInfo(AttributeInfo):   546     def init(self, data, class_file):   547         self.class_file = class_file   548         self.attribute_length = u4(data[0:4])   549         local_variable_type_table_length = u2(data[4:6])   550         data = data[6:]   551         self.local_variable_type_table = []   552         for i in range(0, local_variable_type_table_length):   553             local_variable = LocalVariableInfo()   554             data = local_variable.init(data, self.class_file)   555             self.local_variable_type_table.append(local_variable)   556         return data   557    558     def serialize(self):   559         od = su4(self.attribute_length)+su2(len(self.local_variable_type_table))   560         od += "".join([lv.serialize() for lv in self.local_variable_type_table])   561         return od   562    563 class DeprecatedAttributeInfo(AttributeInfo):   564     pass   565    566 class VerificationTypeInfo(object):   567     def __init__(self, tag):   568         self.tag = tag   569    570     def init(self, data, class_file):   571         self.class_file = class_file   572         tag = u1(data[0:1])   573         assert(tag == self.tag)   574         return data[1:]   575    576     def serialize(self):   577         return su1(self.tag)   578    579 class TopVariableInfo(VerificationTypeInfo):   580     TAG = 0   581    582 class IntegerVariableInfo(VerificationTypeInfo):   583     TAG = 1   584    585 class FloatVariableInfo(VerificationTypeInfo):   586     TAG = 2   587    588 class DoubleVariableInfo(VerificationTypeInfo):   589     TAG = 3   590    591 class LongVariableInfo(VerificationTypeInfo):   592     TAG = 4   593    594 class NullVariableInfo(VerificationTypeInfo):   595     TAG = 5   596    597 class UninitializedThisVariableInfo(VerificationTypeInfo):   598     TAG = 6   599    600 class ObjectVariableInfo(VerificationTypeInfo):   601     TAG = 7   602    603     def init(self, data, class_file):   604         data = super(ObjectVariableInfo, self).init(data, class_file)   605         self.cpool_index = u2(data)   606         return data[2:]   607    608     def serialize(self):   609         return super(ObjectVariableInfo, self).serialize() + su2(self.cpool_index)   610    611 class UninitializedVariableInfo(VerificationTypeInfo):   612     TAG = 8   613    614     def init(self, data, class_file):   615         data = super(UninitializedVariableInfo, self).init(data, class_file)   616         self.offset = u2(data)   617         return data[2:]   618    619     def serialize(self):   620         return super(UninitializedVariableInfo, self).serialize() + su2(self.offset)   621    622 VARIABLE_INFO_CLASSES = (TopVariableInfo, IntegerVariableInfo, FloatVariableInfo, DoubleVariableInfo,   623                          LongVariableInfo, NullVariableInfo, UninitializedThisVariableInfo,   624                          ObjectVariableInfo, UninitializedVariableInfo)   625 VARIABLE_INFO_TAG_MAP = dict([(cls.TAG, cls) for cls in VARIABLE_INFO_CLASSES])   626    627 # Exception.   628    629 class UnknownVariableInfo:   630     def __init__(self, tag):   631         self.tag = tag   632    633     def __str__(self):   634         return repr(self.tag)   635    636 def create_verification_type_info(data):   637     # Does not consume data, just does lookahead.   638     tag = u1(data[0:1])   639     if tag in VARIABLE_INFO_TAG_MAP:   640         return VARIABLE_INFO_TAG_MAP[tag](tag)   641     else:   642         raise UnknownVariableInfo, tag   643    644 class StackMapFrame(object):   645     def __init__(self, frame_type):   646         self.frame_type = frame_type   647    648     def init(self, data, class_file):   649         self.class_file = class_file   650         frame_type = u1(data[0:1])   651         assert(frame_type == self.frame_type)   652         return data[1:]   653    654     def serialize(self):   655         return su1(self.frame_type)   656    657 class SameFrame(StackMapFrame):   658     TYPE_LOWER = 0   659     TYPE_UPPER = 63   660    661 class SameLocals1StackItemFrame(StackMapFrame):   662     TYPE_LOWER = 64   663     TYPE_UPPER = 127   664    665     def init(self, data, class_file):   666         data = super(SameLocals1StackItemFrame, self).init(data, class_file)   667         self.offset_delta = self.frame_type - 64   668         self.stack = [create_verification_type_info(data)]   669         return self.stack[0].init(data, class_file)   670    671     def serialize(self):   672         return super(SameLocals1StackItemFrame, self).serialize()+self.stack[0].serialize()   673    674 class SameLocals1StackItemFrameExtended(StackMapFrame):   675     TYPE_LOWER = 247   676     TYPE_UPPER = 247   677    678     def init(self, data, class_file):   679         data = super(SameLocals1StackItemFrameExtended, self).init(data, class_file)   680         self.offset_delta = u2(data[0:2])   681         data = data[2:]   682         self.stack = [create_verification_type_info(data)]   683         return self.stack[0].init(data, class_file)   684    685     def serialize(self):   686         return super(SameLocals1StackItemFrameExtended, self).serialize()+su2(self.offset_delta)+self.stack[0].serialize()   687    688 class ChopFrame(StackMapFrame):   689     TYPE_LOWER = 248   690     TYPE_UPPER = 250   691    692     def init(self, data, class_file):   693         data = super(ChopFrame, self).init(data, class_file)   694         self.offset_delta = u2(data[0:2])   695         return data[2:]   696    697     def serialize(self):   698         return super(ChopFrame, self).serialize()+su2(self.offset_delta)   699    700 class SameFrameExtended(StackMapFrame):   701     TYPE_LOWER = 251   702     TYPE_UPPER = 251   703    704     def init(self, data, class_file):   705         data = super(SameFrameExtended, self).init(data, class_file)   706         self.offset_delta = u2(data[0:2])   707         return data[2:]   708    709     def serialize(self):   710         return super(SameFrameExtended, self).serialize()+su2(self.offset_delta)   711    712 class AppendFrame(StackMapFrame):   713     TYPE_LOWER = 252   714     TYPE_UPPER = 254   715    716     def init(self, data, class_file):   717         data = super(AppendFrame, self).init(data, class_file)   718         self.offset_delta = u2(data[0:2])   719         data = data[2:]   720         num_locals = self.frame_type - 251   721         self.locals = []   722         for ii in xrange(num_locals):   723             info = create_verification_type_info(data)   724             data = info.init(data, class_file)   725             self.locals.append(info)   726         return data   727    728     def serialize(self):   729         od = super(AppendFrame, self).serialize()+su2(self.offset_delta)   730         od += "".join([l.serialize() for l in self.locals])   731         return od   732    733 class FullFrame(StackMapFrame):   734     TYPE_LOWER = 255   735     TYPE_UPPER = 255   736    737     def init(self, data, class_file):   738         data = super(FullFrame, self).init(data, class_file)   739         self.offset_delta = u2(data[0:2])   740         num_locals = u2(data[2:4])   741         data = data[4:]   742         self.locals = []   743         for ii in xrange(num_locals):   744             info = create_verification_type_info(data)   745             data = info.init(data, class_file)   746             self.locals.append(info)   747         num_stack_items = u2(data[0:2])   748         data = data[2:]   749         self.stack = []   750         for ii in xrange(num_stack_items):   751             stack_item = create_verification_type_info(data)   752             data = stack_item.init(data, class_file)   753             self.stack.append(stack_item)   754         return data   755    756     def serialize(self):   757         od = super(FullFrame, self).serialize()+su2(self.offset_delta)+su2(len(self.locals))   758         od += "".join([l.serialize() for l in self.locals])   759         od += su2(len(self.stack))   760         od += "".join([s.serialize() for s in self.stack])   761         return od   762    763 FRAME_CLASSES = (SameFrame, SameLocals1StackItemFrame, SameLocals1StackItemFrameExtended,   764                  ChopFrame, SameFrameExtended, AppendFrame, FullFrame)   765    766 # Exception.   767    768 class UnknownStackFrame:   769     def __init__(self, frame_type):   770         self.frame_type = frame_type   771     def __str__(self):   772         return repr(self.frame_type)   773    774 def create_stack_frame(data):   775     # Does not consume data, just does lookahead.   776     frame_type = u1(data[0:1])   777     for cls in FRAME_CLASSES:   778         if frame_type >= cls.TYPE_LOWER and frame_type <= cls.TYPE_UPPER:   779             return cls(frame_type)   780     raise UnknownStackFrame, frame_type   781    782 class StackMapTableAttributeInfo(AttributeInfo):   783     def init(self, data, class_file):   784         self.class_file = class_file   785         self.attribute_length = u4(data[0:4])   786         num_entries = u2(data[4:6])   787         self.entries = []   788         data = data[6:]   789         for i in range(0, num_entries):   790             frame = create_stack_frame(data)   791             data = frame.init(data, class_file)   792             self.entries.append(frame)   793         return data   794    795     def serialize(self):   796         od = su4(self.attribute_length)+su2(len(self.entries))   797         od += "".join([e.serialize() for e in self.entries])   798         return od   799    800 class EnclosingMethodAttributeInfo(AttributeInfo):   801     def init(self, data, class_file):   802         self.class_file = class_file   803         self.attribute_length = u4(data[0:4])   804         self.class_index = u2(data[4:6])   805         self.method_index = u2(data[6:8])   806         return data[8:]   807    808     def serialize(self):   809         return su4(self.attribute_length)+su2(self.class_index)+su2(self.method_index)   810    811 class SignatureAttributeInfo(AttributeInfo):   812     def init(self, data, class_file):   813         self.class_file = class_file   814         self.attribute_length = u4(data[0:4])   815         self.signature_index = u2(data[4:6])   816         return data[6:]   817    818     def serialize(self):   819         return su4(self.attribute_length)+su2(self.signature_index)   820    821 class SourceDebugExtensionAttributeInfo(AttributeInfo):   822     def init(self, data, class_file):   823         self.class_file = class_file   824         self.attribute_length = u4(data[0:4])   825         self.debug_extension = data[4:(4 + self.attribute_length)]   826         return data[(4+ self.attribute_length):]   827    828     def serialize(self):   829         return su4(self.attribute_length)+self.debug_extension   830    831 class ElementValue(object):   832     def __init__(self, tag):   833         self.tag = tag   834    835     def init(self, data, class_file):   836         self.class_file = class_file   837         tag = chr(u1(data[0:1]))   838         assert(tag == self.tag)   839         return data[1:]   840    841     def serialize(self):   842         return su1(ord(self.tag))   843    844 class ConstValue(ElementValue):   845     def init(self, data, class_file):   846         data = super(ConstValue, self).init(data, class_file)   847         self.const_value_index = u2(data[0:2])   848         return data[2:]   849    850     def serialize(self):   851         return super(ConstValue, self).serialize()+su2(self.const_value_index)   852    853 class EnumConstValue(ElementValue):   854     def init(self, data, class_file):   855         data = super(EnumConstValue, self).init(data, class_file)   856         self.type_name_index = u2(data[0:2])   857         self.const_name_index = u2(data[2:4])   858         return data[4:]   859    860     def serialize(self):   861         return super(EnumConstValue, self).serialize()+su2(self.type_name_index)+su2(self.const_name_index)   862    863 class ClassInfoValue(ElementValue):   864     def init(self, data, class_file):   865         data = super(ClassInfoValue, self).init(data, class_file)   866         self.class_info_index = u2(data[0:2])   867         return data[2:]   868    869     def serialize(self):   870         return super(ClassInfoValue, self).serialize()+su2(self.class_info_index)   871    872 class AnnotationValue(ElementValue):   873     def init(self, data, class_file):   874         data = super(AnnotationValue, self).init(data, class_file)   875         self.annotation_value = Annotation()   876         return self.annotation_value.init(data, class_file)   877    878     def serialize(self):   879         return super(AnnotationValue, self).serialize()+self.annotation_value.serialize()   880    881 class ArrayValue(ElementValue):   882     def init(self, data, class_file):   883         data = super(ArrayValue, self).init(data, class_file)   884         num_values = u2(data[0:2])   885         data = data[2:]   886         self.values = []   887         for ii in xrange(num_values):   888             element_value = create_element_value(data)   889             data = element_value.init(data, class_file)   890             self.values.append(element_value)   891         return data   892    893     def serialize(self):   894         od = super(ArrayValue, self).serialize()+su2(len(self.values))   895         od += "".join([v.serialize() for v in self.values])   896         return od   897    898 # Exception.   899    900 class UnknownElementValue:   901     def __init__(self, tag):   902         self.tag = tag   903     def __str__(self):   904         return repr(self.tag)   905    906 def create_element_value(data):   907     tag = chr(u1(data[0:1]))   908     if tag in ('B', 'C', 'D', 'F', 'I', 'J', 'S', 'Z', 's'):   909         return ConstValue(tag)   910     elif tag == 'e':   911         return EnumConstValue(tag)   912     elif tag == 'c':   913         return ClassInfoValue(tag)   914     elif tag == '@':   915         return AnnotationValue(tag)   916     elif tag == '[':   917         return ArrayValue(tag)   918     else:   919         raise UnknownElementValue, tag   920    921 class Annotation(object):   922     def init(self, data, class_file):   923         self.class_file = class_file   924         self.type_index = u2(data[0:2])   925         num_element_value_pairs = u2(data[2:4])   926         data = data[4:]   927         self.element_value_pairs = []   928         for ii in xrange(num_element_value_pairs):   929             element_name_index = u2(data[0:2])   930             data = data[2:]   931             element_value = create_element_value(data)   932             data = element_value.init(data, class_file)   933             self.element_value_pairs.append((element_name_index, element_value))   934         return data   935    936     def serialize(self):   937         od = su2(self.type_index)+su2(len(self.element_value_pairs))   938         od += "".join([su2(evp[0])+evp[1].serialize() for evp in self.element_value_pairs])   939         return od   940    941 class RuntimeAnnotationsAttributeInfo(AttributeInfo):   942     def init(self, data, class_file):   943         self.class_file = class_file   944         self.attribute_length = u4(data[0:4])   945         num_annotations = u2(data[4:6])   946         data = data[6:]   947         self.annotations = []   948         for ii in xrange(num_annotations):   949             annotation = Annotation()    950             data = annotation.init(data, class_file)   951             self.annotations.append(annotation)   952         return data   953    954     def serialize(self):   955         od = su4(self.attribute_length)+su2(len(self.annotations))   956         od += "".join([a.serialize() for a in self.annotations])   957         return od   958    959 class RuntimeVisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo):   960     pass   961    962 class RuntimeInvisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo):   963     pass   964    965 class RuntimeParameterAnnotationsAttributeInfo(AttributeInfo):   966     def init(self, data, class_file):   967         self.class_file = class_file   968         self.attribute_length = u4(data[0:4])   969         num_parameters = u1(data[4:5])   970         data = data[5:]   971         self.parameter_annotations = []   972         for ii in xrange(num_parameters):   973             num_annotations = u2(data[0:2])   974             data = data[2:]   975             annotations = []   976             for jj in xrange(num_annotations):   977                 annotation = Annotation()    978                 data = annotation.init(data, class_file)   979                 annotations.append(annotation)   980             self.parameter_annotations.append(annotations)   981         return data   982    983     def serialize(self):   984         od = su4(self.attribute_length)+su1(len(self.parameter_annotations))   985         for pa in self.parameter_annotations:   986             od += su2(len(pa))   987             od += "".join([a.serialize() for a in pa])   988         return od   989            990 class RuntimeVisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo):   991     pass   992    993 class RuntimeInvisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo):   994     pass   995    996 class AnnotationDefaultAttributeInfo(AttributeInfo):   997     def init(self, data, class_file):   998         self.class_file = class_file   999         self.attribute_length = u4(data[0:4])  1000         data = data[4:]  1001         self.default_value = create_element_value(data)  1002         return self.default_value.init(data, class_file)  1003   1004     def serialize(self):  1005         return su4(self.attribute_length)+self.default_value.serialize()  1006   1007 # Child classes of the attribute information classes.  1008   1009 class ExceptionInfo:  1010     def init(self, data):  1011         self.start_pc = u2(data[0:2])  1012         self.end_pc = u2(data[2:4])  1013         self.handler_pc = u2(data[4:6])  1014         self.catch_type = u2(data[6:8])  1015         return data[8:]  1016   1017     def serialize(self):  1018         return su2(self.start_pc)+su2(self.end_pc)+su2(self.handler_pc)+su2(self.catch_type)  1019   1020 class InnerClassInfo(NameUtils):  1021     def init(self, data, class_file):  1022         self.class_file = class_file  1023         self.inner_class_info_index = u2(data[0:2])  1024         self.outer_class_info_index = u2(data[2:4])  1025         # Permit the NameUtils mix-in.  1026         self.name_index = self.inner_name_index = u2(data[4:6])  1027         self.inner_class_access_flags = u2(data[6:8])  1028         return data[8:]  1029   1030     def serialize(self):  1031         return su2(self.inner_class_info_index)+su2(self.outer_class_info_index)+su2(self.name_index)+su2(self.inner_class_access_flags)  1032   1033 class LineNumberInfo:  1034     def init(self, data):  1035         self.start_pc = u2(data[0:2])  1036         self.line_number = u2(data[2:4])  1037         return data[4:]  1038           1039     def serialize(self):  1040         return su2(self.start_pc)+su2(self.line_number)  1041   1042 class LocalVariableInfo(NameUtils, PythonNameUtils):  1043     def init(self, data, class_file):  1044         self.class_file = class_file  1045         self.start_pc = u2(data[0:2])  1046         self.length = u2(data[2:4])  1047         self.name_index = u2(data[4:6])  1048         self.descriptor_index = u2(data[6:8])  1049         self.index = u2(data[8:10])  1050         return data[10:]  1051   1052     def get_descriptor(self):  1053         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))  1054           1055     def serialize(self):  1056         return su2(self.start_pc)+su2(self.length)+su2(self.name_index)+su2(self.descriptor_index)+su2(self.index)  1057   1058 # Exceptions.  1059   1060 class UnknownTag(Exception):  1061     def __init__(self, tag):  1062         self.tag = tag  1063     def __str__(self):  1064         return repr(self.tag)  1065   1066 class UnknownAttribute(Exception):  1067     def __init__(self, name):  1068         self.name = name  1069   1070 ATTR_NAMES_TO_CLASS = {"SourceFile": SourceFileAttributeInfo,   1071                        "ConstantValue": ConstantValueAttributeInfo,   1072                        "Code": CodeAttributeInfo,   1073                        "Exceptions": ExceptionsAttributeInfo,  1074                        "InnerClasses": InnerClassesAttributeInfo,   1075                        "Synthetic": SyntheticAttributeInfo,  1076                        "LineNumberTable": LineNumberAttributeInfo,   1077                        "LocalVariableTable": LocalVariableAttributeInfo,   1078                        "Deprecated": DeprecatedAttributeInfo,  1079                        # Java SE 1.6, class file >= 50.0, VMSpec v3 s4.7.4  1080                        "StackMapTable": StackMapTableAttributeInfo,  1081                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.7  1082                        "EnclosingMethod": EnclosingMethodAttributeInfo,  1083                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.9  1084                        "Signature": SignatureAttributeInfo,  1085                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.11  1086                        "SourceDebugExtension": SourceDebugExtensionAttributeInfo,  1087                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.14  1088                        "LocalVariableTypeTable": LocalVariableTypeAttributeInfo,  1089                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.16  1090                        "RuntimeVisibleAnnotations": RuntimeVisibleAnnotationsAttributeInfo,  1091                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.17  1092                        "RuntimeInvisibleAnnotations": RuntimeInvisibleAnnotationsAttributeInfo,  1093                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.18  1094                        "RuntimeVisibleParameterAnnotations": RuntimeVisibleParameterAnnotationsAttributeInfo,  1095                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.19  1096                        "RuntimeInvisibleParameterAnnotations": RuntimeInvisibleParameterAnnotationsAttributeInfo,  1097                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.20  1098                        "AnnotationDefault": AnnotationDefaultAttributeInfo,}  1099                          1100 # Abstractions for the main structures.  1101   1102 class ClassFile:  1103   1104     "A class representing a Java class file."  1105   1106     def __init__(self, s):  1107   1108         """  1109         Process the given string 's', populating the object with the class  1110         file's details.  1111         """  1112   1113         self.attribute_class_to_index = None  1114         magic = u4(s[0:])  1115         if magic != 0xCAFEBABE:  1116             raise UnknownAttribute, magic  1117         self.minorv,self.majorv = u2(s[4:]),u2(s[6:])  1118         self.constants, s = self._get_constants(s[8:])  1119         self.access_flags, s = self._get_access_flags(s)  1120         self.this_class, s = self._get_this_class(s)  1121         self.super_class, s = self._get_super_class(s)  1122         self.interfaces, s = self._get_interfaces(s)  1123         self.fields, s = self._get_fields(s)  1124         self.methods, s = self._get_methods(s)  1125         self.attributes, s = self._get_attributes(s)  1126   1127     def serialize(self):  1128         od = su4(0xCAFEBABE)+su2(self.minorv)+su2(self.majorv)  1129         od += self._serialize_constants()  1130         od += self._serialize_access_flags()  1131         od += self._serialize_this_class()  1132         od += self._serialize_super_class()  1133         od += self._serialize_interfaces()  1134         od += self._serialize_fields()  1135         od += self._serialize_methods()  1136         od += self._serialize_attributes(self.attributes)  1137         return od  1138   1139     def _encode_const(self, c):  1140         od = ''  1141         if isinstance(c, Utf8Info):  1142             od += su1(1)  1143         elif isinstance(c, IntegerInfo):  1144             od += su1(3)  1145         elif isinstance(c, FloatInfo):  1146             od += su1(4)  1147         elif isinstance(c, LongInfo):  1148             od += su1(5)  1149         elif isinstance(c, DoubleInfo):  1150             od += su1(6)  1151         elif isinstance(c, ClassInfo):  1152             od += su1(7)  1153         elif isinstance(c, StringInfo):  1154             od += su1(8)  1155         elif isinstance(c, FieldRefInfo):  1156             od += su1(9)  1157         elif isinstance(c, InterfaceMethodRefInfo):  # check subclass first  1158             od += su1(11)  1159         elif isinstance(c, MethodRefInfo):  1160             od += su1(10)  1161         elif isinstance(c, NameAndTypeInfo):  1162             od += su1(12)  1163         else:  1164             return od  1165         od += c.serialize()  1166         return od  1167   1168     def _decode_const(self, s):  1169         tag = u1(s[0:1])  1170         if tag == 1:  1171             const = Utf8Info()  1172         elif tag == 3:  1173             const = IntegerInfo()  1174         elif tag == 4:  1175             const = FloatInfo()  1176         elif tag == 5:  1177             const = LongInfo()  1178         elif tag == 6:  1179             const = DoubleInfo()  1180         elif tag == 7:  1181             const = ClassInfo()  1182         elif tag == 8:  1183             const = StringInfo()  1184         elif tag == 9:  1185             const = FieldRefInfo()  1186         elif tag == 10:  1187             const = MethodRefInfo()  1188         elif tag == 11:  1189             const = InterfaceMethodRefInfo()  1190         elif tag == 12:  1191             const = NameAndTypeInfo()  1192         else:  1193             raise UnknownTag, tag  1194   1195         # Initialise the constant object.  1196   1197         s = const.init(s[1:], self)  1198         return const, s  1199   1200     def _get_constants_from_table(self, count, s):  1201         l = []  1202         # Have to skip certain entries specially.  1203         i = 1  1204         while i < count:  1205             c, s = self._decode_const(s)  1206             l.append(c)  1207             # Add a blank entry after "large" entries.  1208             if isinstance(c, LargeNumInfo):  1209                 l.append(None)  1210                 i += 1  1211             i += 1  1212         return l, s  1213   1214     def _get_items_from_table(self, cls, number, s):  1215         l = []  1216         for i in range(0, number):  1217             f = cls()  1218             s = f.init(s, self)  1219             l.append(f)  1220         return l, s  1221   1222     def _get_methods_from_table(self, number, s):  1223         return self._get_items_from_table(MethodInfo, number, s)  1224   1225     def _get_fields_from_table(self, number, s):  1226         return self._get_items_from_table(FieldInfo, number, s)  1227   1228     def _get_attribute_from_table(self, s):  1229         attribute_name_index = u2(s[0:2])  1230         constant_name = self.constants[attribute_name_index - 1].bytes  1231         if constant_name in ATTR_NAMES_TO_CLASS:  1232             attribute = ATTR_NAMES_TO_CLASS[constant_name]()  1233         else:  1234             raise UnknownAttribute, constant_name  1235         s = attribute.init(s[2:], self)  1236         return attribute, s  1237   1238     def _get_attributes_from_table(self, number, s):  1239         attributes = []  1240         for i in range(0, number):  1241             attribute, s = self._get_attribute_from_table(s)  1242             attributes.append(attribute)  1243         return attributes, s  1244   1245     def _get_constants(self, s):  1246         count = u2(s[0:2])  1247         return self._get_constants_from_table(count, s[2:])  1248   1249     def _serialize_constants(self):  1250         return su2(len(self.constants)+1)+"".join([self._encode_const(c) for c in self.constants])  1251   1252     def _get_access_flags(self, s):  1253         return u2(s[0:2]), s[2:]  1254           1255     def _serialize_access_flags(self):  1256         return su2(self.access_flags)  1257   1258     def _get_this_class(self, s):  1259         index = u2(s[0:2])  1260         return self.constants[index - 1], s[2:]  1261   1262     def _serialize_this_class(self):  1263         return su2(self.constants.index(self.this_class)+1)  1264   1265     def _serialize_super_class(self):  1266         return su2(self.constants.index(self.super_class)+1)  1267   1268     def _get_super_class(self, s):  1269         index = u2(s[0:2])  1270         if index != 0:  1271             return self.constants[index - 1], s[2:]  1272         else:  1273             return None, s[2:]  1274   1275     def _get_interfaces(self, s):  1276         interfaces = []  1277         number = u2(s[0:2])  1278         s = s[2:]  1279         for i in range(0, number):  1280             index = u2(s[0:2])  1281             interfaces.append(self.constants[index - 1])  1282             s = s[2:]  1283         return interfaces, s  1284   1285     def _serialize_interfaces(self):  1286         return su2(len(self.interfaces))+"".join([su2(self.constants.index(interf)+1) for interf in self.interfaces])  1287   1288     def _get_fields(self, s):  1289         number = u2(s[0:2])  1290         return self._get_fields_from_table(number, s[2:])  1291   1292     def _serialize_fields(self):  1293         od = su2(len(self.fields))  1294         od += "".join([f.serialize() for f in self.fields])  1295         return od  1296   1297     def _get_attributes(self, s):  1298         number = u2(s[0:2])  1299         return self._get_attributes_from_table(number, s[2:])  1300   1301     def _serialize_attributes(self, attrs):  1302         od = su2(len(attrs))  1303         if len(attrs) == 0: return od  1304         if self.attribute_class_to_index == None:  1305             self.attribute_class_to_index = {}  1306             index = 0  1307             for c in self.constants:  1308                 index += 1  1309                 if isinstance(c, Utf8Info) and str(c) in ATTR_NAMES_TO_CLASS.keys():  1310                     self.attribute_class_to_index[ATTR_NAMES_TO_CLASS[str(c)]]=index  1311         for attribute in attrs:  1312             for (classtype,name_index) in self.attribute_class_to_index.iteritems():  1313                 if isinstance(attribute, classtype):  1314                     od += su2(name_index)  1315                     break  1316             od += attribute.serialize()  1317         return od  1318   1319     def _get_methods(self, s):  1320         number = u2(s[0:2])  1321         return self._get_methods_from_table(number, s[2:])  1322   1323     def _serialize_methods(self):  1324         od = su2(len(self.methods))  1325         od += "".join([m.serialize() for m in self.methods])  1326         return od  1327   1328   1329 if __name__ == "__main__":  1330     import sys  1331     f = open(sys.argv[1], "rb")  1332     in_data = f.read()  1333     c = ClassFile(in_data)  1334     f.close()  1335     out_data = c.serialize()  1336     assert(in_data == out_data)  1337   1338 # vim: tabstop=4 expandtab shiftwidth=4