Lichen

common.py

755:5fd3c0524204
2017-03-21 Paul Boddie Added support for an integer cache containing specific pre-allocated objects. integer-cache
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     def get_parent_path(self):   190         return ".".join([self.name] + self.namespace_path[:-1])   191    192     # Namespace management.   193    194     def enter_namespace(self, name):   195    196         "Enter the namespace having the given 'name'."   197    198         self.namespace_path.append(name)   199    200     def exit_namespace(self):   201    202         "Exit the current namespace."   203    204         self.namespace_path.pop()   205    206     # Constant reference naming.   207    208     def get_constant_name(self, value, value_type, encoding=None):   209    210         """   211         Add a new constant to the current namespace for 'value' with   212         'value_type'.   213         """   214    215         path = self.get_namespace_path()   216         init_item(self.constants, path, dict)   217         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   218    219     # Literal reference naming.   220    221     def get_literal_name(self):   222    223         "Add a new literal to the current namespace."   224    225         path = self.get_namespace_path()   226         init_item(self.literals, path, lambda: 0)   227         return "$C%d" % self.literals[path]   228    229     def next_literal(self):   230         self.literals[self.get_namespace_path()] += 1   231    232     # Temporary variable naming.   233    234     def get_temporary_name(self):   235         path = self.get_namespace_path()   236         init_item(self.temp, path, lambda: 0)   237         return "$t%d" % self.temp[path]   238    239     def next_temporary(self):   240         self.temp[self.get_namespace_path()] += 1   241    242     # Arbitrary function naming.   243    244     def get_lambda_name(self):   245         path = self.get_namespace_path()   246         init_item(self.lambdas, path, lambda: 0)   247         name = "$l%d" % self.lambdas[path]   248         self.lambdas[path] += 1   249         return name   250    251     def reset_lambdas(self):   252         self.lambdas = {}   253    254     # Constant and literal recording.   255    256     def get_constant_value(self, value, literals=None):   257    258         """   259         Encode the 'value' if appropriate, returning a value, a typename and any   260         encoding.   261         """   262    263         if isinstance(value, unicode):   264             return value.encode("utf-8"), "unicode", self.encoding   265    266         # Attempt to convert plain strings to text.   267    268         elif isinstance(value, str) and self.encoding:   269             try:   270                 return get_string_details(literals, self.encoding)   271             except UnicodeDecodeError:   272                 pass   273    274         return value, value.__class__.__name__, None   275    276     def get_constant_reference(self, ref, value, encoding=None):   277    278         """   279         Return a constant reference for the given 'ref' type and 'value', with   280         the optional 'encoding' applying to text values.   281         """   282    283         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   284    285         # Return a reference for the constant.   286    287         objpath = self.get_object_path(constant_name)   288         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   289    290         # Record the value and type for the constant.   291    292         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   293         return name_ref   294    295     def reserve_constant(self, objpath, value, origin, encoding=None):   296    297         """   298         Reserve a constant within 'objpath' with the given 'value' and having a   299         type with the given 'origin', with the optional 'encoding' applying to   300         text values.   301         """   302    303         constant_name = self.get_constant_name(value, origin)   304         objpath = self.get_object_path(constant_name)   305         self._reserve_constant(objpath, value, origin, encoding)   306    307     def _reserve_constant(self, objpath, value, origin, encoding):   308    309         """   310         Store a constant for 'objpath' with the given 'value' and 'origin', with   311         the optional 'encoding' applying to text values.   312         """   313    314         self.constant_values[objpath] = value, origin, encoding   315    316     def get_literal_reference(self, name, ref, items, cls):   317    318         """   319         Return a literal reference for the given type 'name', literal 'ref',   320         node 'items' and employing the given 'cls' as the class of the returned   321         reference object.   322         """   323    324         # Construct an invocation using the items as arguments.   325    326         typename = "$L%s" % name   327    328         invocation = compiler.ast.CallFunc(   329             compiler.ast.Name(typename),   330             items   331             )   332    333         # Get a name for the actual literal.   334    335         instname = self.get_literal_name()   336         self.next_literal()   337    338         # Record the type for the literal.   339    340         objpath = self.get_object_path(instname)   341         self.literal_types[objpath] = ref.get_origin()   342    343         # Return a wrapper for the invocation exposing the items.   344    345         return cls(   346             instname,   347             ref.instance_of(),   348             self.process_structure_node(invocation),   349             invocation.args   350             )   351    352     # Node handling.   353    354     def process_structure(self, node):   355    356         """   357         Within the given 'node', process the program structure.   358    359         During inspection, this will process global declarations, adjusting the   360         module namespace, and import statements, building a module dependency   361         hierarchy.   362    363         During translation, this will consult deduced program information and   364         output translated code.   365         """   366    367         l = []   368         for n in node.getChildNodes():   369             l.append(self.process_structure_node(n))   370         return l   371    372     def process_augassign_node(self, n):   373    374         "Process the given augmented assignment node 'n'."   375    376         op = operator_functions[n.op]   377    378         if isinstance(n.node, compiler.ast.Getattr):   379             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   380         elif isinstance(n.node, compiler.ast.Name):   381             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   382         else:   383             target = n.node   384    385         assignment = compiler.ast.Assign(   386             [target],   387             compiler.ast.CallFunc(   388                 compiler.ast.Name("$op%s" % op),   389                 [n.node, n.expr]))   390    391         return self.process_structure_node(assignment)   392    393     def process_assignment_for_object(self, original_name, source):   394    395         """   396         Return an assignment operation making 'original_name' refer to the given   397         'source'.   398         """   399    400         assignment = compiler.ast.Assign(   401             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   402             source   403             )   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_node_items(self, n, expr):   408    409         """   410         Process the given assignment node 'n' whose children are to be assigned   411         items of 'expr'.   412         """   413    414         name_ref = self.process_structure_node(expr)   415    416         # Either unpack the items and present them directly to each assignment   417         # node.   418    419         if isinstance(name_ref, LiteralSequenceRef) and \   420            self.process_literal_sequence_items(n, name_ref):   421    422             pass   423    424         # Or have the assignment nodes access each item via the sequence API.   425    426         else:   427             self.process_assignment_node_items_by_position(n, expr, name_ref)   428    429     def process_assignment_node_items_by_position(self, n, expr, name_ref):   430    431         """   432         Process the given sequence assignment node 'n', converting the node to   433         the separate assignment of each target using positional access on a   434         temporary variable representing the sequence. Use 'expr' as the assigned   435         value and 'name_ref' as the reference providing any existing temporary   436         variable.   437         """   438    439         assignments = []   440    441         # Employ existing names to access the sequence.   442         # Literal sequences do not provide names of accessible objects.   443    444         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   445             temp = name_ref.name   446    447         # For other expressions, create a temporary name to reference the items.   448    449         else:   450             temp = self.get_temporary_name()   451             self.next_temporary()   452    453             assignments.append(   454                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   455                 )   456    457         # Assign the items to the target nodes.   458    459         for i, node in enumerate(n.nodes):   460             assignments.append(   461                 compiler.ast.Assign([node], compiler.ast.Subscript(   462                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   463                 )   464    465         return self.process_structure_node(compiler.ast.Stmt(assignments))   466    467     def process_literal_sequence_items(self, n, name_ref):   468    469         """   470         Process the given assignment node 'n', obtaining from the given   471         'name_ref' the items to be assigned to the assignment targets.   472    473         Return whether this method was able to process the assignment node as   474         a sequence of direct assignments.   475         """   476    477         if len(n.nodes) == len(name_ref.items):   478             assigned_names, count = get_names_from_nodes(n.nodes)   479             accessed_names, _count = get_names_from_nodes(name_ref.items)   480    481             # Only assign directly between items if all assigned names are   482             # plain names (not attribute assignments), and if the assigned names   483             # do not appear in the accessed names.   484    485             if len(assigned_names) == count and \   486                not assigned_names.intersection(accessed_names):   487    488                 for node, item in zip(n.nodes, name_ref.items):   489                     self.process_assignment_node(node, item)   490    491                 return True   492    493             # Otherwise, use the position-based mechanism to obtain values.   494    495             else:   496                 return False   497         else:   498             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   499                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   500    501     def process_compare_node(self, n):   502    503         """   504         Process the given comparison node 'n', converting an operator sequence   505         from...   506    507         <expr1> <op1> <expr2> <op2> <expr3>   508    509         ...to...   510    511         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   512         """   513    514         invocations = []   515         last = n.expr   516    517         for op, op_node in n.ops:   518             op = operator_functions.get(op)   519    520             invocations.append(compiler.ast.CallFunc(   521                 compiler.ast.Name("$op%s" % op),   522                 [last, op_node]))   523    524             last = op_node   525    526         if len(invocations) > 1:   527             result = compiler.ast.And(invocations)   528         else:   529             result = invocations[0]   530    531         return self.process_structure_node(result)   532    533     def process_dict_node(self, node):   534    535         """   536         Process the given dictionary 'node', returning a list of (key, value)   537         tuples.   538         """   539    540         l = []   541         for key, value in node.items:   542             l.append((   543                 self.process_structure_node(key),   544                 self.process_structure_node(value)))   545         return l   546    547     def process_for_node(self, n):   548    549         """   550         Generate attribute accesses for {n.list}.__iter__ and the next method on   551         the iterator, producing a replacement node for the original.   552         """   553    554         t0 = self.get_temporary_name()   555         self.next_temporary()   556         t1 = self.get_temporary_name()   557         self.next_temporary()   558         i0 = self.get_temporary_name()   559         self.next_temporary()   560    561         node = compiler.ast.Stmt([   562    563             # <t0> = {n.list}   564             # <t1> = <t0>.__iter__()   565             # <next> = <t1>.next   566    567             compiler.ast.Assign(   568                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   569                 n.list),   570    571             compiler.ast.Assign(   572                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   573                 compiler.ast.CallFunc(   574                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   575                     [])),   576    577             compiler.ast.Assign(   578                 [compiler.ast.AssName(i0, "OP_ASSIGN")],   579                 compiler.ast.Getattr(compiler.ast.Name(t1), "next")),   580    581             # try:   582             #     while True:   583             #         <var>... = <next>()   584             #         ...   585             # except StopIteration:   586             #     pass   587    588             compiler.ast.TryExcept(   589                 compiler.ast.While(   590                     compiler.ast.Name("True"),   591                     compiler.ast.Stmt([   592                         compiler.ast.Assign(   593                             [n.assign],   594                             compiler.ast.CallFunc(   595                                 compiler.ast.Name(i0),   596                                 []   597                                 )),   598                         n.body]),   599                     None),   600                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   601                 None)   602             ])   603    604         self.process_structure_node(node)   605    606     def process_literal_sequence_node(self, n, name, ref, cls):   607    608         """   609         Process the given literal sequence node 'n' as a function invocation,   610         with 'name' indicating the type of the sequence, and 'ref' being a   611         reference to the type. The 'cls' is used to instantiate a suitable name   612         reference.   613         """   614    615         if name == "dict":   616             items = []   617             for key, value in n.items:   618                 items.append(compiler.ast.Tuple([key, value]))   619         else: # name in ("list", "tuple"):   620             items = n.nodes   621    622         return self.get_literal_reference(name, ref, items, cls)   623    624     def process_operator_node(self, n):   625    626         """   627         Process the given operator node 'n' as an operator function invocation.   628         """   629    630         op = operator_functions[n.__class__.__name__]   631         invocation = compiler.ast.CallFunc(   632             compiler.ast.Name("$op%s" % op),   633             list(n.getChildNodes())   634             )   635         return self.process_structure_node(invocation)   636    637     def process_print_node(self, n):   638    639         """   640         Process the given print node 'n' as an invocation on a stream of the   641         form...   642    643         $print(dest, args, nl)   644    645         The special function name will be translated elsewhere.   646         """   647    648         nl = isinstance(n, compiler.ast.Printnl)   649         invocation = compiler.ast.CallFunc(   650             compiler.ast.Name("$print"),   651             [n.dest or compiler.ast.Name("None"),   652              compiler.ast.List(list(n.nodes)),   653              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   654             )   655         return self.process_structure_node(invocation)   656    657     def process_slice_node(self, n, expr=None):   658    659         """   660         Process the given slice node 'n' as an operator function invocation.   661         """   662    663         if n.flags == "OP_ASSIGN": op = "setslice"   664         elif n.flags == "OP_DELETE": op = "delslice"   665         else: op = "getslice"   666    667         invocation = compiler.ast.CallFunc(   668             compiler.ast.Name("$op%s" % op),   669             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   670                 (expr and [expr] or [])   671             )   672    673         # Fix parse tree structure.   674    675         if op == "delslice":   676             invocation = compiler.ast.Discard(invocation)   677    678         return self.process_structure_node(invocation)   679    680     def process_sliceobj_node(self, n):   681    682         """   683         Process the given slice object node 'n' as a slice constructor.   684         """   685    686         op = "slice"   687         invocation = compiler.ast.CallFunc(   688             compiler.ast.Name("$op%s" % op),   689             n.nodes   690             )   691         return self.process_structure_node(invocation)   692    693     def process_subscript_node(self, n, expr=None):   694    695         """   696         Process the given subscript node 'n' as an operator function invocation.   697         """   698    699         if n.flags == "OP_ASSIGN": op = "setitem"   700         elif n.flags == "OP_DELETE": op = "delitem"   701         else: op = "getitem"   702    703         invocation = compiler.ast.CallFunc(   704             compiler.ast.Name("$op%s" % op),   705             [n.expr] + list(n.subs) + (expr and [expr] or [])   706             )   707    708         # Fix parse tree structure.   709    710         if op == "delitem":   711             invocation = compiler.ast.Discard(invocation)   712    713         return self.process_structure_node(invocation)   714    715     def process_attribute_chain(self, n):   716    717         """   718         Process the given attribute access node 'n'. Return a reference   719         describing the expression.   720         """   721    722         # AssAttr/Getattr are nested with the outermost access being the last   723         # access in any chain.   724    725         self.attrs.insert(0, n.attrname)   726         attrs = self.attrs   727    728         # Break attribute chains where non-access nodes are found.   729    730         if not self.have_access_expression(n):   731             self.reset_attribute_chain()   732    733         # Descend into the expression, extending backwards any existing chain,   734         # or building another for the expression.   735    736         name_ref = self.process_structure_node(n.expr)   737    738         # Restore chain information applying to this node.   739    740         if not self.have_access_expression(n):   741             self.restore_attribute_chain(attrs)   742    743         # Return immediately if the expression was another access and thus a   744         # continuation backwards along the chain. The above processing will   745         # have followed the chain all the way to its conclusion.   746    747         if self.have_access_expression(n):   748             del self.attrs[0]   749    750         return name_ref   751    752     # Attribute chain handling.   753    754     def reset_attribute_chain(self):   755    756         "Reset the attribute chain for a subexpression of an attribute access."   757    758         self.attrs = []   759         self.chain_assignment.append(self.in_assignment)   760         self.chain_invocation.append(self.in_invocation)   761         self.in_assignment = None   762         self.in_invocation = None   763    764     def restore_attribute_chain(self, attrs):   765    766         "Restore the attribute chain for an attribute access."   767    768         self.attrs = attrs   769         self.in_assignment = self.chain_assignment.pop()   770         self.in_invocation = self.chain_invocation.pop()   771    772     def have_access_expression(self, node):   773    774         "Return whether the expression associated with 'node' is Getattr."   775    776         return isinstance(node.expr, compiler.ast.Getattr)   777    778     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   779    780         """   781         Return the name to be used for attribute usage observations involving   782         the given 'name' in the current namespace.   783    784         If the name is being used outside a function, and if 'name_ref' is   785         given and indicates a global or if 'is_global' is specified as a true   786         value, a path featuring the name in the global namespace is returned.   787         Otherwise, a path computed using the current namespace and the given   788         name is returned.   789    790         The intention of this method is to provide a suitably-qualified name   791         that can be tracked across namespaces. Where globals are being   792         referenced in class namespaces, they should be referenced using their   793         path within the module, not using a path within each class.   794    795         It may not be possible to identify a global within a function at the   796         time of inspection (since a global may appear later in a file).   797         Consequently, globals are identified by their local name rather than   798         their module-qualified path.   799         """   800    801         # For functions, use the appropriate local names.   802    803         if self.in_function:   804             return name   805    806         # For global names outside functions, use a global name.   807    808         elif is_global or name_ref and name_ref.is_global_name():   809             return self.get_global_path(name)   810    811         # Otherwise, establish a name in the current namespace.   812    813         else:   814             return self.get_object_path(name)   815    816     def get_path_for_access(self):   817    818         "Outside functions, register accesses at the module level."   819    820         if not self.in_function:   821             return self.name   822         else:   823             return self.get_namespace_path()   824    825     def get_module_name(self, node):   826    827         """   828         Using the given From 'node' in this module, calculate any relative import   829         information, returning a tuple containing a module to import along with any   830         names to import based on the node's name information.   831    832         Where the returned module is given as None, whole module imports should   833         be performed for the returned modules using the returned names.   834         """   835    836         # Absolute import.   837    838         if node.level == 0:   839             return node.modname, node.names   840    841         # Relative to an ancestor of this module.   842    843         else:   844             path = self.name.split(".")   845             level = node.level   846    847             # Relative imports treat package roots as submodules.   848    849             if split(self.filename)[-1] == "__init__.py":   850                 level -= 1   851    852             if level > len(path):   853                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   854                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   855    856             basename = ".".join(path[:len(path)-level])   857    858         # Name imports from a module.   859    860         if node.modname:   861             return "%s.%s" % (basename, node.modname), node.names   862    863         # Relative whole module imports.   864    865         else:   866             return basename, node.names   867    868 def get_argnames(args):   869    870     """   871     Return a list of all names provided by 'args'. Since tuples may be   872     employed, the arguments are traversed depth-first.   873     """   874    875     l = []   876     for arg in args:   877         if isinstance(arg, tuple):   878             l += get_argnames(arg)   879         else:   880             l.append(arg)   881     return l   882    883 def get_names_from_nodes(nodes):   884    885     """   886     Return the names employed in the given 'nodes' along with the number of   887     nodes excluding sequences.   888     """   889    890     names = set()   891     count = 0   892    893     for node in nodes:   894    895         # Add names and count them.   896    897         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   898             names.add(node.name)   899             count += 1   900    901         # Add names from sequences and incorporate their counts.   902    903         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   904                                compiler.ast.List, compiler.ast.Set,   905                                compiler.ast.Tuple)):   906             _names, _count = get_names_from_nodes(node.nodes)   907             names.update(_names)   908             count += _count   909    910         # Count non-name, non-sequence nodes.   911    912         else:   913             count += 1   914    915     return names, count   916    917 # Result classes.   918    919 class InstructionSequence:   920    921     "A generic sequence of instructions."   922    923     def __init__(self, instructions):   924         self.instructions = instructions   925    926     def get_value_instruction(self):   927         return self.instructions[-1]   928    929     def get_init_instructions(self):   930         return self.instructions[:-1]   931    932 # Dictionary utilities.   933    934 def init_item(d, key, fn):   935    936     """   937     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   938     value where no entry already exists.   939     """   940    941     if not d.has_key(key):   942         d[key] = fn()   943     return d[key]   944    945 def dict_for_keys(d, keys):   946    947     "Return a new dictionary containing entries from 'd' for the given 'keys'."   948    949     nd = {}   950     for key in keys:   951         if d.has_key(key):   952             nd[key] = d[key]   953     return nd   954    955 def make_key(s):   956    957     "Make sequence 's' into a tuple-based key, first sorting its contents."   958    959     l = list(s)   960     l.sort()   961     return tuple(l)   962    963 def add_counter_item(d, key):   964    965     """   966     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   967     maintaining a mapping of keys to their order of insertion.   968     """   969    970     if not d.has_key(key):   971         d[key] = len(d.keys())   972     return d[key]    973    974 def remove_items(d1, d2):   975    976     "Remove from 'd1' all items from 'd2'."   977    978     for key in d2.keys():   979         if d1.has_key(key):   980             del d1[key]   981    982 # Set utilities.   983    984 def first(s):   985     return list(s)[0]   986    987 def same(s1, s2):   988     return set(s1) == set(s2)   989    990 def order_dependencies(all_depends):   991    992     """   993     Produce a dependency ordering for the 'all_depends' mapping. This mapping   994     has the form "A depends on B, C...". The result will order A, B, C, and so   995     on.   996     """   997    998     usage = init_reverse_dependencies(all_depends)   999   1000     # Produce an ordering by obtaining exposed items (required by items already  1001     # processed) and putting them at the start of the list.  1002   1003     ordered = []  1004   1005     while usage:  1006         have_next = False  1007   1008         for key, n in usage.items():  1009   1010             # Add items needed by no other items to the ordering.  1011   1012             if not n:  1013                 remove_dependency(key, all_depends, usage, ordered)  1014                 have_next = True  1015   1016         if not have_next:  1017             raise ValueError, usage  1018   1019     return ordered  1020   1021 def order_dependencies_partial(all_depends):  1022   1023     """  1024     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1025     has the form "A depends on B, C...". The result will order A, B, C, and so  1026     on. Where cycles exist, they will be broken and a partial ordering returned.  1027     """  1028   1029     usage = init_reverse_dependencies(all_depends)  1030   1031     # Duplicate the dependencies for subsequent modification.  1032   1033     new_depends = {}  1034     for key, values in all_depends.items():  1035         new_depends[key] = set(values)  1036   1037     all_depends = new_depends  1038   1039     # Produce an ordering by obtaining exposed items (required by items already  1040     # processed) and putting them at the start of the list.  1041   1042     ordered = []  1043   1044     while usage:  1045         least = None  1046         least_key = None  1047   1048         for key, n in usage.items():  1049   1050             # Add items needed by no other items to the ordering.  1051   1052             if not n:  1053                 remove_dependency(key, all_depends, usage, ordered)  1054                 least = 0  1055   1056             # When breaking cycles, note the least used items.  1057   1058             elif least is None or len(n) < least:  1059                 least_key = key  1060                 least = len(n)  1061   1062         if least:  1063             transfer_dependencies(least_key, all_depends, usage, ordered)  1064   1065     return ordered  1066   1067 def init_reverse_dependencies(all_depends):  1068   1069     """  1070     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1071     record the reverse dependencies, making a mapping of the form  1072     "B is needed by A", "C is needed by A", and so on.  1073     """  1074   1075     usage = {}  1076   1077     # Record path-based dependencies.  1078   1079     for key in all_depends.keys():  1080         usage[key] = set()  1081   1082     for key, depends in all_depends.items():  1083         for depend in depends:  1084             init_item(usage, depend, set)  1085             usage[depend].add(key)  1086   1087     return usage  1088   1089 def transfer_dependencies(key, all_depends, usage, ordered):  1090   1091     """  1092     Transfer items needed by 'key' to those items needing 'key', found using  1093     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1094     collection of dependencies.  1095   1096     If "A is needed by X" and "B is needed by A", then transferring items needed  1097     by A will cause "B is needed by X" to be recorded as a consequence.  1098   1099     Transferring items also needs to occur in the reverse mapping, so that  1100     "A needs B" and "X needs A", then the consequence must be recorded as  1101     "X needs B".  1102     """  1103   1104     ordered.insert(0, key)  1105   1106     needing = usage[key]                        # A is needed by X  1107     needed = all_depends.get(key)               # A needs B  1108   1109     if needing:  1110         for depend in needing:  1111             l = all_depends.get(depend)  1112             if not l:  1113                 continue  1114   1115             l.remove(key)                       # X needs (A)  1116   1117             if needed:  1118                 l.update(needed)                # X needs B...  1119   1120                 # Prevent self references.  1121   1122                 if depend in needed:  1123                     l.remove(depend)  1124   1125     if needed:  1126         for depend in needed:  1127             l = usage.get(depend)  1128             if not l:  1129                 continue  1130   1131             l.remove(key)                       # B is needed by (A)  1132             l.update(needing)                   # B is needed by X...  1133   1134             # Prevent self references.  1135   1136             if depend in needing:  1137                 l.remove(depend)  1138   1139     if needed:  1140         del all_depends[key]  1141     del usage[key]  1142   1143 def remove_dependency(key, all_depends, usage, ordered):  1144   1145     """  1146     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1147     'ordered' collection of dependencies.  1148   1149     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1150     upon removing A from 'usage', the outcome is that all keys needing A will  1151     have A removed from their 'usage' records.  1152   1153     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1154     as a consequence.  1155     """  1156   1157     ordered.insert(0, key)  1158   1159     depends = all_depends.get(key)  1160   1161     # Reduce usage of the referenced items.  1162   1163     if depends:  1164         for depend in depends:  1165             usage[depend].remove(key)  1166   1167     del usage[key]  1168   1169 # General input/output.  1170   1171 def readfile(filename):  1172   1173     "Return the contents of 'filename'."  1174   1175     f = open(filename)  1176     try:  1177         return f.read()  1178     finally:  1179         f.close()  1180   1181 def writefile(filename, s):  1182   1183     "Write to 'filename' the string 's'."  1184   1185     f = open(filename, "w")  1186     try:  1187         f.write(s)  1188     finally:  1189         f.close()  1190   1191 def indent(indent, s):  1192   1193     "Insert 'indent' before every line in 's'."  1194   1195     l = []  1196     for line in s.split("\n"):  1197         l.append(indent + line)  1198     return "\n".join(l)  1199   1200 # General encoding.  1201   1202 def sorted_output(x):  1203   1204     "Sort sequence 'x' and return a string with commas separating the values."  1205   1206     x = map(str, x)  1207     x.sort()  1208     return ", ".join(x)  1209   1210 def get_string_details(literals, encoding):  1211   1212     """  1213     Determine whether 'literals' represent Unicode strings or byte strings,  1214     using 'encoding' to reproduce byte sequences.  1215   1216     Each literal is the full program representation including prefix and quotes  1217     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1218     string needs to be translated back to its original encoding.  1219   1220     Return a single encoded literal value, a type name, and the original  1221     encoding as a tuple.  1222     """  1223   1224     typename = "unicode"  1225   1226     l = []  1227   1228     for s in literals:  1229         out, _typename = get_literal_details(s)  1230         if _typename == "str":  1231             typename = "str"  1232         l.append(out)  1233   1234     out = "".join(l)  1235   1236     # For Unicode values, convert to the UTF-8 program representation.  1237   1238     if typename == "unicode":  1239         return out.encode("utf-8"), typename, encoding  1240   1241     # For byte string values, convert back to the original encoding.  1242   1243     else:  1244         return out.encode(encoding), typename, encoding  1245   1246 def get_literal_details(s):  1247   1248     """  1249     Determine whether 's' represents a Unicode string or a byte string, where  1250     's' contains the full program representation of a literal including prefix  1251     and quotes, recoded by the parser to UTF-8.  1252   1253     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1254     and byte or Unicode values starting with <backslash><octal digit> or  1255     <backslash>x.  1256   1257     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1258     to be considered as Unicode values. Otherwise, they produce byte values and  1259     cause unprefixed strings to be considered as byte strings.  1260   1261     Literals prefixed with "r" do not have their backslash-encoded values  1262     converted unless also prefixed with "u", in which case only the above value  1263     formats are converted, not any of the other special sequences for things  1264     like newlines.  1265   1266     Return the literal value as a Unicode object together with the appropriate  1267     type name in a tuple.  1268     """  1269   1270     l = []  1271   1272     # Identify the quote character and use it to identify the prefix.  1273   1274     quote_type = s[-1]  1275     prefix_end = s.find(quote_type)  1276     prefix = s[:prefix_end].lower()  1277   1278     if prefix not in ("", "b", "br", "r", "u", "ur"):  1279         raise ValueError, "String literal does not have a supported prefix: %s" % s  1280   1281     if "b" in prefix:  1282         typename = "str"  1283     else:  1284         typename = "unicode"  1285   1286     # Identify triple quotes or single quotes.  1287   1288     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1289         quote = s[prefix_end:prefix_end+3]  1290         current = prefix_end + 3  1291         end = len(s) - 3  1292     else:  1293         quote = s[prefix_end]  1294         current = prefix_end + 1  1295         end = len(s) - 1  1296   1297     # Conversions of some quoted values.  1298   1299     searches = {  1300         "u" : (6, 16),  1301         "U" : (10, 16),  1302         "x" : (4, 16),  1303         }  1304   1305     octal_digits = map(str, range(0, 8))  1306   1307     # Translations of some quoted values.  1308   1309     escaped = {  1310         "\\" : "\\", "'" : "'", '"' : '"',  1311         "a" : "\a", "b" : "\b", "f" : "\f",  1312         "n" : "\n", "r" : "\r", "t" : "\t",  1313         }  1314   1315     while current < end:  1316   1317         # Look for quoted values.  1318   1319         index = s.find("\\", current)  1320         if index == -1 or index + 1 == end:  1321             l.append(s[current:end])  1322             break  1323   1324         # Add the preceding text.  1325   1326         l.append(s[current:index])  1327   1328         # Handle quoted text.  1329   1330         term = s[index+1]  1331   1332         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1333         # produce Unicode values.  1334   1335         if typename == "unicode" and (  1336             term in ("u", "U") or   1337             "u" in prefix and (term == "x" or term in octal_digits)):  1338   1339             needed, base = searches.get(term, (4, 8))  1340             value = convert_quoted_value(s, index, needed, end, base, unichr)  1341             l.append(value)  1342             current = index + needed  1343   1344         # Add raw byte values, changing the string type.  1345   1346         elif "r" not in prefix and (  1347              term == "x" or term in octal_digits):  1348   1349             needed, base = searches.get(term, (4, 8))  1350             value = convert_quoted_value(s, index, needed, end, base, chr)  1351             l.append(value)  1352             typename = "str"  1353             current = index + needed  1354   1355         # Add other escaped values.  1356   1357         elif "r" not in prefix and escaped.has_key(term):  1358             l.append(escaped[term])  1359             current = index + 2  1360   1361         # Add other text as found.  1362   1363         else:  1364             l.append(s[index:index+2])  1365             current = index + 2  1366   1367     # Collect the components into a single Unicode object. Since the literal  1368     # text was already in UTF-8 form, interpret plain strings as UTF-8  1369     # sequences.  1370   1371     out = []  1372   1373     for value in l:  1374         if isinstance(value, unicode):  1375             out.append(value)  1376         else:  1377             out.append(unicode(value, "utf-8"))  1378   1379     return "".join(out), typename  1380   1381 def convert_quoted_value(s, index, needed, end, base, fn):  1382   1383     """  1384     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1385     positions, and with the given 'end' indicating the first position after the  1386     end of the actual string content.  1387   1388     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1389     to convert the value to an appropriate type.  1390     """  1391   1392     s = s[index:min(index+needed, end)]  1393   1394     # Not a complete occurrence.  1395   1396     if len(s) < needed:  1397         return s  1398   1399     # Test for a well-formed value.  1400   1401     try:  1402         first = base == 8 and 1 or 2  1403         value = int(s[first:needed], base)  1404     except ValueError:  1405         return s  1406     else:  1407         return fn(value)  1408   1409 # Attribute chain decoding.  1410   1411 def get_attrnames(attrnames):  1412   1413     """  1414     Split the qualified attribute chain 'attrnames' into its components,  1415     handling special attributes starting with "#" that indicate type  1416     conformance.  1417     """  1418   1419     if attrnames.startswith("#"):  1420         return [attrnames]  1421     else:  1422         return attrnames.split(".")  1423   1424 def get_attrname_from_location(location):  1425   1426     """  1427     Extract the first attribute from the attribute names employed in a  1428     'location'.  1429     """  1430   1431     path, name, attrnames, access = location  1432     if not attrnames:  1433         return attrnames  1434     return get_attrnames(attrnames)[0]  1435   1436 def get_name_path(path, name):  1437   1438     "Return a suitable qualified name from the given 'path' and 'name'."  1439   1440     if "." in name:  1441         return name  1442     else:  1443         return "%s.%s" % (path, name)  1444   1445 # Usage-related functions.  1446   1447 def get_types_for_usage(attrnames, objects):  1448   1449     """  1450     Identify the types that can support the given 'attrnames', using the  1451     given 'objects' as the catalogue of type details.  1452     """  1453   1454     types = []  1455     for name, _attrnames in objects.items():  1456         if set(attrnames).issubset(_attrnames):  1457             types.append(name)  1458     return types  1459   1460 def get_invoked_attributes(usage):  1461   1462     "Obtain invoked attribute from the given 'usage'."  1463   1464     invoked = []  1465     if usage:  1466         for attrname, invocation, assignment in usage:  1467             if invocation:  1468                 invoked.append(attrname)  1469     return invoked  1470   1471 def get_assigned_attributes(usage):  1472   1473     "Obtain assigned attribute from the given 'usage'."  1474   1475     assigned = []  1476     if usage:  1477         for attrname, invocation, assignment in usage:  1478             if assignment:  1479                 assigned.append(attrname)  1480     return assigned  1481   1482 # Type and module functions.  1483 # NOTE: This makes assumptions about the __builtins__ structure.  1484   1485 def get_builtin_module(name):  1486   1487     "Return the module name containing the given type 'name'."  1488   1489     if name == "string":  1490         modname = "str"  1491     elif name == "utf8string":  1492         modname = "unicode"  1493     elif name == "NoneType":  1494         modname = "none"  1495     else:  1496         modname = name  1497   1498     return "__builtins__.%s" % modname  1499   1500 def get_builtin_type(name):  1501   1502     "Return the type name provided by the given Python value 'name'."  1503   1504     if name == "str":  1505         return "string"  1506     elif name == "unicode":  1507         return "utf8string"  1508     else:  1509         return name  1510   1511 def get_builtin_class(name):  1512   1513     "Return the full name of the built-in class having the given 'name'."  1514   1515     typename = get_builtin_type(name)  1516     module = get_builtin_module(typename)  1517     return "%s.%s" % (module, typename)  1518   1519 # Useful data.  1520   1521 predefined_constants = "False", "None", "NotImplemented", "True"  1522   1523 operator_functions = {  1524   1525     # Fundamental operations.  1526   1527     "is" : "is_",  1528     "is not" : "is_not",  1529   1530     # Binary operations.  1531   1532     "in" : "in_",  1533     "not in" : "not_in",  1534     "Add" : "add",  1535     "Bitand" : "and_",  1536     "Bitor" : "or_",  1537     "Bitxor" : "xor",  1538     "Div" : "div",  1539     "FloorDiv" : "floordiv",  1540     "LeftShift" : "lshift",  1541     "Mod" : "mod",  1542     "Mul" : "mul",  1543     "Power" : "pow",  1544     "RightShift" : "rshift",  1545     "Sub" : "sub",  1546   1547     # Unary operations.  1548   1549     "Invert" : "invert",  1550     "UnaryAdd" : "pos",  1551     "UnarySub" : "neg",  1552   1553     # Augmented assignment.  1554   1555     "+=" : "iadd",  1556     "-=" : "isub",  1557     "*=" : "imul",  1558     "/=" : "idiv",  1559     "//=" : "ifloordiv",  1560     "%=" : "imod",  1561     "**=" : "ipow",  1562     "<<=" : "ilshift",  1563     ">>=" : "irshift",  1564     "&=" : "iand",  1565     "^=" : "ixor",  1566     "|=" : "ior",  1567   1568     # Comparisons.  1569   1570     "==" : "eq",  1571     "!=" : "ne",  1572     "<" : "lt",  1573     "<=" : "le",  1574     ">=" : "ge",  1575     ">" : "gt",  1576     }  1577   1578 # vim: tabstop=4 expandtab shiftwidth=4