Lichen (file common.py at ed201ad4d8e1)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     # Namespace management.   190    191     def enter_namespace(self, name):   192    193         "Enter the namespace having the given 'name'."   194    195         self.namespace_path.append(name)   196    197     def exit_namespace(self):   198    199         "Exit the current namespace."   200    201         self.namespace_path.pop()   202    203     # Constant reference naming.   204    205     def get_constant_name(self, value, value_type, encoding=None):   206    207         """   208         Add a new constant to the current namespace for 'value' with   209         'value_type'.   210         """   211    212         path = self.get_namespace_path()   213         init_item(self.constants, path, dict)   214         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   215    216     # Literal reference naming.   217    218     def get_literal_name(self):   219    220         "Add a new literal to the current namespace."   221    222         path = self.get_namespace_path()   223         init_item(self.literals, path, lambda: 0)   224         return "$C%d" % self.literals[path]   225    226     def next_literal(self):   227         self.literals[self.get_namespace_path()] += 1   228    229     # Temporary variable naming.   230    231     def get_temporary_name(self):   232         path = self.get_namespace_path()   233         init_item(self.temp, path, lambda: 0)   234         return "$t%d" % self.temp[path]   235    236     def next_temporary(self):   237         self.temp[self.get_namespace_path()] += 1   238    239     # Arbitrary function naming.   240    241     def get_lambda_name(self):   242         path = self.get_namespace_path()   243         init_item(self.lambdas, path, lambda: 0)   244         name = "$l%d" % self.lambdas[path]   245         self.lambdas[path] += 1   246         return name   247    248     def reset_lambdas(self):   249         self.lambdas = {}   250    251     # Constant and literal recording.   252    253     def get_constant_value(self, value, literals=None):   254    255         """   256         Encode the 'value' if appropriate, returning a value, a typename and any   257         encoding.   258         """   259    260         if isinstance(value, unicode):   261             return value.encode("utf-8"), "unicode", self.encoding   262    263         # Attempt to convert plain strings to text.   264    265         elif isinstance(value, str) and self.encoding:   266             try:   267                 return get_string_details(literals, self.encoding)   268             except UnicodeDecodeError:   269                 pass   270    271         return value, value.__class__.__name__, None   272    273     def get_constant_reference(self, ref, value, encoding=None):   274    275         """   276         Return a constant reference for the given 'ref' type and 'value', with   277         the optional 'encoding' applying to text values.   278         """   279    280         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   281    282         # Return a reference for the constant.   283    284         objpath = self.get_object_path(constant_name)   285         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   286    287         # Record the value and type for the constant.   288    289         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   290         return name_ref   291    292     def reserve_constant(self, objpath, value, origin, encoding=None):   293    294         """   295         Reserve a constant within 'objpath' with the given 'value' and having a   296         type with the given 'origin', with the optional 'encoding' applying to   297         text values.   298         """   299    300         constant_name = self.get_constant_name(value, origin)   301         objpath = self.get_object_path(constant_name)   302         self._reserve_constant(objpath, value, origin, encoding)   303    304     def _reserve_constant(self, objpath, value, origin, encoding):   305    306         """   307         Store a constant for 'objpath' with the given 'value' and 'origin', with   308         the optional 'encoding' applying to text values.   309         """   310    311         self.constant_values[objpath] = value, origin, encoding   312    313     def get_literal_reference(self, name, ref, items, cls):   314    315         """   316         Return a literal reference for the given type 'name', literal 'ref',   317         node 'items' and employing the given 'cls' as the class of the returned   318         reference object.   319         """   320    321         # Construct an invocation using the items as arguments.   322    323         typename = "$L%s" % name   324    325         invocation = compiler.ast.CallFunc(   326             compiler.ast.Name(typename),   327             items   328             )   329    330         # Get a name for the actual literal.   331    332         instname = self.get_literal_name()   333         self.next_literal()   334    335         # Record the type for the literal.   336    337         objpath = self.get_object_path(instname)   338         self.literal_types[objpath] = ref.get_origin()   339    340         # Return a wrapper for the invocation exposing the items.   341    342         return cls(   343             instname,   344             ref.instance_of(),   345             self.process_structure_node(invocation),   346             invocation.args   347             )   348    349     # Node handling.   350    351     def process_structure(self, node):   352    353         """   354         Within the given 'node', process the program structure.   355    356         During inspection, this will process global declarations, adjusting the   357         module namespace, and import statements, building a module dependency   358         hierarchy.   359    360         During translation, this will consult deduced program information and   361         output translated code.   362         """   363    364         l = []   365         for n in node.getChildNodes():   366             l.append(self.process_structure_node(n))   367         return l   368    369     def process_augassign_node(self, n):   370    371         "Process the given augmented assignment node 'n'."   372    373         op = operator_functions[n.op]   374    375         if isinstance(n.node, compiler.ast.Getattr):   376             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   377         elif isinstance(n.node, compiler.ast.Name):   378             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   379         else:   380             target = n.node   381    382         assignment = compiler.ast.Assign(   383             [target],   384             compiler.ast.CallFunc(   385                 compiler.ast.Name("$op%s" % op),   386                 [n.node, n.expr]))   387    388         return self.process_structure_node(assignment)   389    390     def process_assignment_for_object(self, original_name, source):   391    392         """   393         Return an assignment operation making 'original_name' refer to the given   394         'source'.   395         """   396    397         assignment = compiler.ast.Assign(   398             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   399             source   400             )   401    402         return self.process_structure_node(assignment)   403    404     def process_assignment_node_items(self, n, expr):   405    406         """   407         Process the given assignment node 'n' whose children are to be assigned   408         items of 'expr'.   409         """   410    411         name_ref = self.process_structure_node(expr)   412    413         # Either unpack the items and present them directly to each assignment   414         # node.   415    416         if isinstance(name_ref, LiteralSequenceRef) and \   417            self.process_literal_sequence_items(n, name_ref):   418    419             pass   420    421         # Or have the assignment nodes access each item via the sequence API.   422    423         else:   424             self.process_assignment_node_items_by_position(n, expr, name_ref)   425    426     def process_assignment_node_items_by_position(self, n, expr, name_ref):   427    428         """   429         Process the given sequence assignment node 'n', converting the node to   430         the separate assignment of each target using positional access on a   431         temporary variable representing the sequence. Use 'expr' as the assigned   432         value and 'name_ref' as the reference providing any existing temporary   433         variable.   434         """   435    436         assignments = []   437    438         # Employ existing names to access the sequence.   439         # Literal sequences do not provide names of accessible objects.   440    441         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   442             temp = name_ref.name   443    444         # For other expressions, create a temporary name to reference the items.   445    446         else:   447             temp = self.get_temporary_name()   448             self.next_temporary()   449    450             assignments.append(   451                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   452                 )   453    454         # Assign the items to the target nodes.   455    456         for i, node in enumerate(n.nodes):   457             assignments.append(   458                 compiler.ast.Assign([node], compiler.ast.Subscript(   459                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   460                 )   461    462         return self.process_structure_node(compiler.ast.Stmt(assignments))   463    464     def process_literal_sequence_items(self, n, name_ref):   465    466         """   467         Process the given assignment node 'n', obtaining from the given   468         'name_ref' the items to be assigned to the assignment targets.   469    470         Return whether this method was able to process the assignment node as   471         a sequence of direct assignments.   472         """   473    474         if len(n.nodes) == len(name_ref.items):   475             assigned_names, count = get_names_from_nodes(n.nodes)   476             accessed_names, _count = get_names_from_nodes(name_ref.items)   477    478             # Only assign directly between items if all assigned names are   479             # plain names (not attribute assignments), and if the assigned names   480             # do not appear in the accessed names.   481    482             if len(assigned_names) == count and \   483                not assigned_names.intersection(accessed_names):   484    485                 for node, item in zip(n.nodes, name_ref.items):   486                     self.process_assignment_node(node, item)   487    488                 return True   489    490             # Otherwise, use the position-based mechanism to obtain values.   491    492             else:   493                 return False   494         else:   495             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   496                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   497    498     def process_compare_node(self, n):   499    500         """   501         Process the given comparison node 'n', converting an operator sequence   502         from...   503    504         <expr1> <op1> <expr2> <op2> <expr3>   505    506         ...to...   507    508         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   509         """   510    511         invocations = []   512         last = n.expr   513    514         for op, op_node in n.ops:   515             op = operator_functions.get(op)   516    517             invocations.append(compiler.ast.CallFunc(   518                 compiler.ast.Name("$op%s" % op),   519                 [last, op_node]))   520    521             last = op_node   522    523         if len(invocations) > 1:   524             result = compiler.ast.And(invocations)   525         else:   526             result = invocations[0]   527    528         return self.process_structure_node(result)   529    530     def process_dict_node(self, node):   531    532         """   533         Process the given dictionary 'node', returning a list of (key, value)   534         tuples.   535         """   536    537         l = []   538         for key, value in node.items:   539             l.append((   540                 self.process_structure_node(key),   541                 self.process_structure_node(value)))   542         return l   543    544     def process_for_node(self, n):   545    546         """   547         Generate attribute accesses for {n.list}.__iter__ and the next method on   548         the iterator, producing a replacement node for the original.   549         """   550    551         t0 = self.get_temporary_name()   552         self.next_temporary()   553         t1 = self.get_temporary_name()   554         self.next_temporary()   555         t2 = self.get_temporary_name()   556         self.next_temporary()   557    558         node = compiler.ast.Stmt([   559    560             # <t0> = {n.list}   561             # <t1> = <t0>.__iter__()   562    563             compiler.ast.Assign(   564                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   565                 n.list),   566    567             compiler.ast.Assign(   568                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   569                 compiler.ast.CallFunc(   570                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   571                     [])),   572    573             # <t2> = <t1>.next   574             # try:   575             #     while True:   576             #         <var>... = <t2>()   577             #         ...   578             # except StopIteration:   579             #     pass   580    581             compiler.ast.Assign(   582                 [compiler.ast.AssName(t2, "OP_ASSIGN")],   583                 compiler.ast.Getattr(compiler.ast.Name(t1), "next")),   584    585             compiler.ast.TryExcept(   586                 compiler.ast.While(   587                     compiler.ast.Name("True"),   588                     compiler.ast.Stmt([   589                         compiler.ast.Assign(   590                             [n.assign],   591                             compiler.ast.CallFunc(   592                                 compiler.ast.Name(t2),   593                                 []   594                                 )),   595                         n.body]),   596                     None),   597                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   598                 None)   599             ])   600    601         self.process_structure_node(node)   602    603     def process_literal_sequence_node(self, n, name, ref, cls):   604    605         """   606         Process the given literal sequence node 'n' as a function invocation,   607         with 'name' indicating the type of the sequence, and 'ref' being a   608         reference to the type. The 'cls' is used to instantiate a suitable name   609         reference.   610         """   611    612         if name == "dict":   613             items = []   614             for key, value in n.items:   615                 items.append(compiler.ast.Tuple([key, value]))   616         else: # name in ("list", "tuple"):   617             items = n.nodes   618    619         return self.get_literal_reference(name, ref, items, cls)   620    621     def process_operator_node(self, n):   622    623         """   624         Process the given operator node 'n' as an operator function invocation.   625         """   626    627         opname = n.__class__.__name__   628         operands = n.getChildNodes()   629    630         # Convert a unary operation to an invocation.   631    632         op = unary_operator_functions.get(opname)   633    634         if op:   635             invocation = compiler.ast.CallFunc(   636                 compiler.ast.Name("$op%s" % op),   637                 [operands[0]]   638                 )   639    640         # Convert a single operator with a list of operands to a combination of   641         # pairwise operations.   642    643         else:   644             op = operator_functions[opname]   645             invocation = self._process_operator_node(op, operands)   646    647         return self.process_structure_node(invocation)   648    649     def _process_operator_node(self, op, operands):   650    651         """   652         Process the given 'op', being an operator function, together with the   653         supplied 'operands', returning either a single remaining operand or an   654         invocation combining the operands.   655         """   656    657         remaining = operands[1:]   658         if not remaining:   659             return operands[0]   660    661         return compiler.ast.CallFunc(   662             compiler.ast.Name("$op%s" % op),   663             [operands[0], self._process_operator_node(op, remaining)]   664             )   665    666     def process_print_node(self, n):   667    668         """   669         Process the given print node 'n' as an invocation on a stream of the   670         form...   671    672         $print(dest, args, nl)   673    674         The special function name will be translated elsewhere.   675         """   676    677         nl = isinstance(n, compiler.ast.Printnl)   678         invocation = compiler.ast.CallFunc(   679             compiler.ast.Name("$print"),   680             [n.dest or compiler.ast.Name("None"),   681              compiler.ast.List(list(n.nodes)),   682              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   683             )   684         return self.process_structure_node(invocation)   685    686     def process_slice_node(self, n, expr=None):   687    688         """   689         Process the given slice node 'n' as a method invocation.   690         """   691    692         if n.flags == "OP_ASSIGN": op = "__setslice__"   693         elif n.flags == "OP_DELETE": op = "__delslice__"   694         else: op = "__getslice__"   695    696         invocation = compiler.ast.CallFunc(   697             compiler.ast.Getattr(n.expr, op),   698             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   699                 (expr and [expr] or [])   700             )   701    702         # Fix parse tree structure.   703    704         if op == "__delslice__":   705             invocation = compiler.ast.Discard(invocation)   706    707         return self.process_structure_node(invocation)   708    709     def process_sliceobj_node(self, n):   710    711         """   712         Process the given slice object node 'n' as a slice constructor.   713         """   714    715         op = "slice"   716         invocation = compiler.ast.CallFunc(   717             compiler.ast.Name("$op%s" % op),   718             n.nodes   719             )   720         return self.process_structure_node(invocation)   721    722     def process_subscript_node(self, n, expr=None):   723    724         """   725         Process the given subscript node 'n' as a method invocation.   726         """   727    728         if n.flags == "OP_ASSIGN": op = "__setitem__"   729         elif n.flags == "OP_DELETE": op = "__delitem__"   730         else: op = "__getitem__"   731    732         invocation = compiler.ast.CallFunc(   733             compiler.ast.Getattr(n.expr, op),   734             list(n.subs) + (expr and [expr] or [])   735             )   736    737         # Fix parse tree structure.   738    739         if op == "__delitem__":   740             invocation = compiler.ast.Discard(invocation)   741    742         return self.process_structure_node(invocation)   743    744     def process_attribute_chain(self, n):   745    746         """   747         Process the given attribute access node 'n'. Return a reference   748         describing the expression.   749         """   750    751         # AssAttr/Getattr are nested with the outermost access being the last   752         # access in any chain.   753    754         self.attrs.insert(0, n.attrname)   755         attrs = self.attrs   756    757         # Break attribute chains where non-access nodes are found.   758    759         if not self.have_access_expression(n):   760             self.reset_attribute_chain()   761    762         # Descend into the expression, extending backwards any existing chain,   763         # or building another for the expression.   764    765         name_ref = self.process_structure_node(n.expr)   766    767         # Restore chain information applying to this node.   768    769         if not self.have_access_expression(n):   770             self.restore_attribute_chain(attrs)   771    772         # Return immediately if the expression was another access and thus a   773         # continuation backwards along the chain. The above processing will   774         # have followed the chain all the way to its conclusion.   775    776         if self.have_access_expression(n):   777             del self.attrs[0]   778    779         return name_ref   780    781     # Attribute chain handling.   782    783     def reset_attribute_chain(self):   784    785         "Reset the attribute chain for a subexpression of an attribute access."   786    787         self.attrs = []   788         self.chain_assignment.append(self.in_assignment)   789         self.chain_invocation.append(self.in_invocation)   790         self.in_assignment = None   791         self.in_invocation = None   792    793     def restore_attribute_chain(self, attrs):   794    795         "Restore the attribute chain for an attribute access."   796    797         self.attrs = attrs   798         self.in_assignment = self.chain_assignment.pop()   799         self.in_invocation = self.chain_invocation.pop()   800    801     def have_access_expression(self, node):   802    803         "Return whether the expression associated with 'node' is Getattr."   804    805         return isinstance(node.expr, compiler.ast.Getattr)   806    807     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   808    809         """   810         Return the name to be used for attribute usage observations involving   811         the given 'name' in the current namespace.   812    813         If the name is being used outside a function, and if 'name_ref' is   814         given and indicates a global or if 'is_global' is specified as a true   815         value, a path featuring the name in the global namespace is returned.   816         Otherwise, a path computed using the current namespace and the given   817         name is returned.   818    819         The intention of this method is to provide a suitably-qualified name   820         that can be tracked across namespaces. Where globals are being   821         referenced in class namespaces, they should be referenced using their   822         path within the module, not using a path within each class.   823    824         It may not be possible to identify a global within a function at the   825         time of inspection (since a global may appear later in a file).   826         Consequently, globals are identified by their local name rather than   827         their module-qualified path.   828         """   829    830         # For functions, use the appropriate local names.   831    832         if self.in_function:   833             return name   834    835         # For global names outside functions, use a global name.   836    837         elif is_global or name_ref and name_ref.is_global_name():   838             return self.get_global_path(name)   839    840         # Otherwise, establish a name in the current namespace.   841    842         else:   843             return self.get_object_path(name)   844    845     def get_path_for_access(self):   846    847         "Outside functions, register accesses at the module level."   848    849         if not self.in_function:   850             return self.name   851         else:   852             return self.get_namespace_path()   853    854     def get_module_name(self, node):   855    856         """   857         Using the given From 'node' in this module, calculate any relative import   858         information, returning a tuple containing a module to import along with any   859         names to import based on the node's name information.   860    861         Where the returned module is given as None, whole module imports should   862         be performed for the returned modules using the returned names.   863         """   864    865         # Absolute import.   866    867         if node.level == 0:   868             return node.modname, node.names   869    870         # Relative to an ancestor of this module.   871    872         else:   873             path = self.name.split(".")   874             level = node.level   875    876             # Relative imports treat package roots as submodules.   877    878             if split(self.filename)[-1] == "__init__.py":   879                 level -= 1   880    881             if level > len(path):   882                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   883                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   884    885             basename = ".".join(path[:len(path)-level])   886    887         # Name imports from a module.   888    889         if node.modname:   890             return "%s.%s" % (basename, node.modname), node.names   891    892         # Relative whole module imports.   893    894         else:   895             return basename, node.names   896    897 def get_argnames(args):   898    899     """   900     Return a list of all names provided by 'args'. Since tuples may be   901     employed, the arguments are traversed depth-first.   902     """   903    904     l = []   905     for arg in args:   906         if isinstance(arg, tuple):   907             l += get_argnames(arg)   908         else:   909             l.append(arg)   910     return l   911    912 def get_names_from_nodes(nodes):   913    914     """   915     Return the names employed in the given 'nodes' along with the number of   916     nodes excluding sequences.   917     """   918    919     names = set()   920     count = 0   921    922     for node in nodes:   923    924         # Add names and count them.   925    926         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   927             names.add(node.name)   928             count += 1   929    930         # Add names from sequences and incorporate their counts.   931    932         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   933                                compiler.ast.List, compiler.ast.Set,   934                                compiler.ast.Tuple)):   935             _names, _count = get_names_from_nodes(node.nodes)   936             names.update(_names)   937             count += _count   938    939         # Count non-name, non-sequence nodes.   940    941         else:   942             count += 1   943    944     return names, count   945    946 # Location classes.   947    948 class Location:   949    950     "A generic program location."   951    952     def __init__(self, path, name, attrnames=None, version=None, access_number=None):   953         self.path = path   954         self.name = name   955         self.attrnames = attrnames   956         self.version = version   957         self.access_number = access_number   958    959     def __repr__(self):   960         return "Location(%r, %r, %r, %r, %r)" % self.as_tuple()   961    962     def as_tuple(self):   963         return (self.path, self.name, self.attrnames, self.version, self.access_number)   964    965     def __hash__(self):   966         return hash(self.as_tuple())   967    968     def __eq__(self, other):   969         return self.as_tuple() == other.as_tuple()   970    971     def __cmp__(self, other):   972         return cmp(self.as_tuple(), other.as_tuple())   973    974     def get_attrname(self):   975    976         """   977         Extract the first attribute from the attribute names employed in this   978         location.   979         """   980    981         attrnames = self.attrnames   982         if not attrnames:   983             return attrnames   984         return get_attrnames(attrnames)[0]   985    986 class AccessLocation(Location):   987    988     "A specialised access location."   989    990     def __init__(self, path, name, attrnames, access_number):   991    992         """   993         Initialise an access location featuring 'path', 'name', 'attrnames' and   994         'access_number'.   995         """   996    997         Location.__init__(self, path, name, attrnames, None, access_number)   998    999     def __repr__(self):  1000         return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number)  1001   1002 # Result classes.  1003   1004 class InstructionSequence:  1005   1006     "A generic sequence of instructions."  1007   1008     def __init__(self, instructions):  1009         self.instructions = instructions  1010   1011     def get_value_instruction(self):  1012         return self.instructions[-1]  1013   1014     def get_init_instructions(self):  1015         return self.instructions[:-1]  1016   1017 # Dictionary utilities.  1018   1019 def init_item(d, key, fn):  1020   1021     """  1022     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial  1023     value where no entry already exists.  1024     """  1025   1026     if not d.has_key(key):  1027         d[key] = fn()  1028     return d[key]  1029   1030 def dict_for_keys(d, keys):  1031   1032     "Return a new dictionary containing entries from 'd' for the given 'keys'."  1033   1034     nd = {}  1035     for key in keys:  1036         if d.has_key(key):  1037             nd[key] = d[key]  1038     return nd  1039   1040 def make_key(s):  1041   1042     "Make sequence 's' into a tuple-based key, first sorting its contents."  1043   1044     l = list(s)  1045     l.sort()  1046     return tuple(l)  1047   1048 def add_counter_item(d, key):  1049   1050     """  1051     Make a mapping in 'd' for 'key' to the number of keys added before it, thus  1052     maintaining a mapping of keys to their order of insertion.  1053     """  1054   1055     if not d.has_key(key):  1056         d[key] = len(d.keys())  1057     return d[key]   1058   1059 def remove_items(d1, d2):  1060   1061     "Remove from 'd1' all items from 'd2'."  1062   1063     for key in d2.keys():  1064         if d1.has_key(key):  1065             del d1[key]  1066   1067 # Set utilities.  1068   1069 def first(s):  1070     return list(s)[0]  1071   1072 def same(s1, s2):  1073     return set(s1) == set(s2)  1074   1075 def order_dependencies(all_depends):  1076   1077     """  1078     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1079     has the form "A depends on B, C...". The result will order A, B, C, and so  1080     on.  1081     """  1082   1083     usage = init_reverse_dependencies(all_depends)  1084   1085     # Produce an ordering by obtaining exposed items (required by items already  1086     # processed) and putting them at the start of the list.  1087   1088     ordered = []  1089   1090     while usage:  1091         have_next = False  1092   1093         for key, n in usage.items():  1094   1095             # Add items needed by no other items to the ordering.  1096   1097             if not n:  1098                 remove_dependency(key, all_depends, usage, ordered)  1099                 have_next = True  1100   1101         if not have_next:  1102             raise ValueError, usage  1103   1104     return ordered  1105   1106 def order_dependencies_partial(all_depends):  1107   1108     """  1109     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1110     has the form "A depends on B, C...". The result will order A, B, C, and so  1111     on. Where cycles exist, they will be broken and a partial ordering returned.  1112     """  1113   1114     usage = init_reverse_dependencies(all_depends)  1115   1116     # Duplicate the dependencies for subsequent modification.  1117   1118     new_depends = {}  1119     for key, values in all_depends.items():  1120         new_depends[key] = set(values)  1121   1122     all_depends = new_depends  1123   1124     # Produce an ordering by obtaining exposed items (required by items already  1125     # processed) and putting them at the start of the list.  1126   1127     ordered = []  1128   1129     while usage:  1130         least = None  1131         least_key = None  1132   1133         for key, n in usage.items():  1134   1135             # Add items needed by no other items to the ordering.  1136   1137             if not n:  1138                 remove_dependency(key, all_depends, usage, ordered)  1139                 least = 0  1140   1141             # When breaking cycles, note the least used items.  1142   1143             elif least is None or len(n) < least:  1144                 least_key = key  1145                 least = len(n)  1146   1147         if least:  1148             transfer_dependencies(least_key, all_depends, usage, ordered)  1149   1150     return ordered  1151   1152 def init_reverse_dependencies(all_depends):  1153   1154     """  1155     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1156     record the reverse dependencies, making a mapping of the form  1157     "B is needed by A", "C is needed by A", and so on.  1158     """  1159   1160     usage = {}  1161   1162     # Record path-based dependencies.  1163   1164     for key in all_depends.keys():  1165         usage[key] = set()  1166   1167     for key, depends in all_depends.items():  1168         for depend in depends:  1169             init_item(usage, depend, set)  1170             usage[depend].add(key)  1171   1172     return usage  1173   1174 def transfer_dependencies(key, all_depends, usage, ordered):  1175   1176     """  1177     Transfer items needed by 'key' to those items needing 'key', found using  1178     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1179     collection of dependencies.  1180   1181     If "A is needed by X" and "B is needed by A", then transferring items needed  1182     by A will cause "B is needed by X" to be recorded as a consequence.  1183   1184     Transferring items also needs to occur in the reverse mapping, so that  1185     "A needs B" and "X needs A", then the consequence must be recorded as  1186     "X needs B".  1187     """  1188   1189     ordered.insert(0, key)  1190   1191     needing = usage[key]                        # A is needed by X  1192     needed = all_depends.get(key)               # A needs B  1193   1194     if needing:  1195         for depend in needing:  1196             l = all_depends.get(depend)  1197             if not l:  1198                 continue  1199   1200             l.remove(key)                       # X needs (A)  1201   1202             if needed:  1203                 l.update(needed)                # X needs B...  1204   1205                 # Prevent self references.  1206   1207                 if depend in needed:  1208                     l.remove(depend)  1209   1210     if needed:  1211         for depend in needed:  1212             l = usage.get(depend)  1213             if not l:  1214                 continue  1215   1216             l.remove(key)                       # B is needed by (A)  1217             l.update(needing)                   # B is needed by X...  1218   1219             # Prevent self references.  1220   1221             if depend in needing:  1222                 l.remove(depend)  1223   1224     if needed:  1225         del all_depends[key]  1226     del usage[key]  1227   1228 def remove_dependency(key, all_depends, usage, ordered):  1229   1230     """  1231     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1232     'ordered' collection of dependencies.  1233   1234     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1235     upon removing A from 'usage', the outcome is that all keys needing A will  1236     have A removed from their 'usage' records.  1237   1238     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1239     as a consequence.  1240     """  1241   1242     ordered.insert(0, key)  1243   1244     depends = all_depends.get(key)  1245   1246     # Reduce usage of the referenced items.  1247   1248     if depends:  1249         for depend in depends:  1250             usage[depend].remove(key)  1251   1252     del usage[key]  1253   1254 # General input/output.  1255   1256 def readfile(filename):  1257   1258     "Return the contents of 'filename'."  1259   1260     f = open(filename)  1261     try:  1262         return f.read()  1263     finally:  1264         f.close()  1265   1266 def writefile(filename, s):  1267   1268     "Write to 'filename' the string 's'."  1269   1270     f = open(filename, "w")  1271     try:  1272         f.write(s)  1273     finally:  1274         f.close()  1275   1276 # General encoding.  1277   1278 def sorted_output(x):  1279   1280     "Sort sequence 'x' and return a string with commas separating the values."  1281   1282     x = map(str, x)  1283     x.sort()  1284     return ", ".join(x)  1285   1286 def get_string_details(literals, encoding):  1287   1288     """  1289     Determine whether 'literals' represent Unicode strings or byte strings,  1290     using 'encoding' to reproduce byte sequences.  1291   1292     Each literal is the full program representation including prefix and quotes  1293     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1294     string needs to be translated back to its original encoding.  1295   1296     Return a single encoded literal value, a type name, and the original  1297     encoding as a tuple.  1298     """  1299   1300     typename = "unicode"  1301   1302     l = []  1303   1304     for s in literals:  1305         out, _typename = get_literal_details(s)  1306         if _typename == "str":  1307             typename = "str"  1308         l.append(out)  1309   1310     out = "".join(l)  1311   1312     # For Unicode values, convert to the UTF-8 program representation.  1313   1314     if typename == "unicode":  1315         return out.encode("utf-8"), typename, encoding  1316   1317     # For byte string values, convert back to the original encoding.  1318   1319     else:  1320         return out.encode(encoding), typename, encoding  1321   1322 def get_literal_details(s):  1323   1324     """  1325     Determine whether 's' represents a Unicode string or a byte string, where  1326     's' contains the full program representation of a literal including prefix  1327     and quotes, recoded by the parser to UTF-8.  1328   1329     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1330     and byte or Unicode values starting with <backslash><octal digit> or  1331     <backslash>x.  1332   1333     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1334     to be considered as Unicode values. Otherwise, they produce byte values and  1335     cause unprefixed strings to be considered as byte strings.  1336   1337     Literals prefixed with "r" do not have their backslash-encoded values  1338     converted unless also prefixed with "u", in which case only the above value  1339     formats are converted, not any of the other special sequences for things  1340     like newlines.  1341   1342     Return the literal value as a Unicode object together with the appropriate  1343     type name in a tuple.  1344     """  1345   1346     l = []  1347   1348     # Identify the quote character and use it to identify the prefix.  1349   1350     quote_type = s[-1]  1351     prefix_end = s.find(quote_type)  1352     prefix = s[:prefix_end].lower()  1353   1354     if prefix not in ("", "b", "br", "r", "u", "ur"):  1355         raise ValueError, "String literal does not have a supported prefix: %s" % s  1356   1357     if "b" in prefix:  1358         typename = "str"  1359     else:  1360         typename = "unicode"  1361   1362     # Identify triple quotes or single quotes.  1363   1364     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1365         quote = s[prefix_end:prefix_end+3]  1366         current = prefix_end + 3  1367         end = len(s) - 3  1368     else:  1369         quote = s[prefix_end]  1370         current = prefix_end + 1  1371         end = len(s) - 1  1372   1373     # Conversions of some quoted values.  1374   1375     searches = {  1376         "u" : (6, 16),  1377         "U" : (10, 16),  1378         "x" : (4, 16),  1379         }  1380   1381     octal_digits = map(str, range(0, 8))  1382   1383     # Translations of some quoted values.  1384   1385     escaped = {  1386         "\\" : "\\", "'" : "'", '"' : '"',  1387         "a" : "\a", "b" : "\b", "f" : "\f",  1388         "n" : "\n", "r" : "\r", "t" : "\t",  1389         }  1390   1391     while current < end:  1392   1393         # Look for quoted values.  1394   1395         index = s.find("\\", current)  1396         if index == -1 or index + 1 == end:  1397             l.append(s[current:end])  1398             break  1399   1400         # Add the preceding text.  1401   1402         l.append(s[current:index])  1403   1404         # Handle quoted text.  1405   1406         term = s[index+1]  1407   1408         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1409         # produce Unicode values.  1410   1411         if typename == "unicode" and (  1412             term in ("u", "U") or   1413             "u" in prefix and (term == "x" or term in octal_digits)):  1414   1415             needed, base = searches.get(term, (4, 8))  1416             value = convert_quoted_value(s, index, needed, end, base, unichr)  1417             l.append(value)  1418             current = index + needed  1419   1420         # Add raw byte values, changing the string type.  1421   1422         elif "r" not in prefix and (  1423              term == "x" or term in octal_digits):  1424   1425             needed, base = searches.get(term, (4, 8))  1426             value = convert_quoted_value(s, index, needed, end, base, chr)  1427             l.append(value)  1428             typename = "str"  1429             current = index + needed  1430   1431         # Add other escaped values.  1432   1433         elif "r" not in prefix and escaped.has_key(term):  1434             l.append(escaped[term])  1435             current = index + 2  1436   1437         # Add other text as found.  1438   1439         else:  1440             l.append(s[index:index+2])  1441             current = index + 2  1442   1443     # Collect the components into a single Unicode object. Since the literal  1444     # text was already in UTF-8 form, interpret plain strings as UTF-8  1445     # sequences.  1446   1447     out = []  1448   1449     for value in l:  1450         if isinstance(value, unicode):  1451             out.append(value)  1452         else:  1453             out.append(unicode(value, "utf-8"))  1454   1455     return "".join(out), typename  1456   1457 def convert_quoted_value(s, index, needed, end, base, fn):  1458   1459     """  1460     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1461     positions, and with the given 'end' indicating the first position after the  1462     end of the actual string content.  1463   1464     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1465     to convert the value to an appropriate type.  1466     """  1467   1468     s = s[index:min(index+needed, end)]  1469   1470     # Not a complete occurrence.  1471   1472     if len(s) < needed:  1473         return s  1474   1475     # Test for a well-formed value.  1476   1477     try:  1478         first = base == 8 and 1 or 2  1479         value = int(s[first:needed], base)  1480     except ValueError:  1481         return s  1482     else:  1483         return fn(value)  1484   1485 # Attribute chain decoding.  1486   1487 def get_attrnames(attrnames):  1488   1489     """  1490     Split the qualified attribute chain 'attrnames' into its components,  1491     handling special attributes starting with "#" that indicate type  1492     conformance.  1493     """  1494   1495     if attrnames.startswith("#"):  1496         return [attrnames]  1497     else:  1498         return attrnames.split(".")  1499   1500 def get_name_path(path, name):  1501   1502     "Return a suitable qualified name from the given 'path' and 'name'."  1503   1504     if "." in name:  1505         return name  1506     else:  1507         return "%s.%s" % (path, name)  1508   1509 # Usage-related functions.  1510   1511 def get_types_for_usage(attrnames, objects):  1512   1513     """  1514     Identify the types that can support the given 'attrnames', using the  1515     given 'objects' as the catalogue of type details.  1516     """  1517   1518     types = []  1519     for name, _attrnames in objects.items():  1520         if set(attrnames).issubset(_attrnames):  1521             types.append(name)  1522     return types  1523   1524 def get_invoked_attributes(usage):  1525   1526     "Obtain invoked attribute from the given 'usage'."  1527   1528     invoked = []  1529     if usage:  1530         for attrname, invocation, assignment in usage:  1531             if invocation:  1532                 invoked.append(attrname)  1533     return invoked  1534   1535 def get_assigned_attributes(usage):  1536   1537     "Obtain assigned attribute from the given 'usage'."  1538   1539     assigned = []  1540     if usage:  1541         for attrname, invocation, assignment in usage:  1542             if assignment:  1543                 assigned.append(attrname)  1544     return assigned  1545   1546 # Type and module functions.  1547 # NOTE: This makes assumptions about the __builtins__ structure.  1548   1549 def get_builtin_module(name):  1550   1551     "Return the module name containing the given type 'name'."  1552   1553     if name == "string":  1554         modname = "str"  1555     elif name == "utf8string":  1556         modname = "unicode"  1557     elif name == "NoneType":  1558         modname = "none"  1559     else:  1560         modname = name  1561   1562     return "__builtins__.%s" % modname  1563   1564 def get_builtin_type(name):  1565   1566     "Return the type name provided by the given Python value 'name'."  1567   1568     if name == "str":  1569         return "string"  1570     elif name == "unicode":  1571         return "utf8string"  1572     else:  1573         return name  1574   1575 def get_builtin_class(name):  1576   1577     "Return the full name of the built-in class having the given 'name'."  1578   1579     typename = get_builtin_type(name)  1580     module = get_builtin_module(typename)  1581     return "%s.%s" % (module, typename)  1582   1583 # Useful data.  1584   1585 predefined_constants = "False", "None", "NotImplemented", "True"  1586   1587 unary_operator_functions = {  1588   1589     # Unary operations.  1590   1591     "Invert" : "invert",  1592     "UnaryAdd" : "pos",  1593     "UnarySub" : "neg",  1594     }  1595   1596 operator_functions = {  1597   1598     # Fundamental operations.  1599   1600     "is" : "is_",  1601     "is not" : "is_not",  1602   1603     # Binary operations.  1604   1605     "in" : "in_",  1606     "not in" : "not_in",  1607     "Add" : "add",  1608     "Bitand" : "and_",  1609     "Bitor" : "or_",  1610     "Bitxor" : "xor",  1611     "Div" : "div",  1612     "FloorDiv" : "floordiv",  1613     "LeftShift" : "lshift",  1614     "Mod" : "mod",  1615     "Mul" : "mul",  1616     "Power" : "pow",  1617     "RightShift" : "rshift",  1618     "Sub" : "sub",  1619   1620     # Augmented assignment.  1621   1622     "+=" : "iadd",  1623     "-=" : "isub",  1624     "*=" : "imul",  1625     "/=" : "idiv",  1626     "//=" : "ifloordiv",  1627     "%=" : "imod",  1628     "**=" : "ipow",  1629     "<<=" : "ilshift",  1630     ">>=" : "irshift",  1631     "&=" : "iand",  1632     "^=" : "ixor",  1633     "|=" : "ior",  1634   1635     # Comparisons.  1636   1637     "==" : "eq",  1638     "!=" : "ne",  1639     "<" : "lt",  1640     "<=" : "le",  1641     ">=" : "ge",  1642     ">" : "gt",  1643     }  1644   1645 operator_functions.update(unary_operator_functions)  1646   1647 # vim: tabstop=4 expandtab shiftwidth=4