Lichen (file common.py at c2106f746c88)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     def get_parent_path(self):   190         return ".".join([self.name] + self.namespace_path[:-1])   191    192     # Namespace management.   193    194     def enter_namespace(self, name):   195    196         "Enter the namespace having the given 'name'."   197    198         self.namespace_path.append(name)   199    200     def exit_namespace(self):   201    202         "Exit the current namespace."   203    204         self.namespace_path.pop()   205    206     # Constant reference naming.   207    208     def get_constant_name(self, value, value_type, encoding=None):   209    210         """   211         Add a new constant to the current namespace for 'value' with   212         'value_type'.   213         """   214    215         path = self.get_namespace_path()   216         init_item(self.constants, path, dict)   217         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   218    219     # Literal reference naming.   220    221     def get_literal_name(self):   222    223         "Add a new literal to the current namespace."   224    225         path = self.get_namespace_path()   226         init_item(self.literals, path, lambda: 0)   227         return "$C%d" % self.literals[path]   228    229     def next_literal(self):   230         self.literals[self.get_namespace_path()] += 1   231    232     # Temporary variable naming.   233    234     def get_temporary_name(self):   235         path = self.get_namespace_path()   236         init_item(self.temp, path, lambda: 0)   237         return "$t%d" % self.temp[path]   238    239     def next_temporary(self):   240         self.temp[self.get_namespace_path()] += 1   241    242     # Arbitrary function naming.   243    244     def get_lambda_name(self):   245         path = self.get_namespace_path()   246         init_item(self.lambdas, path, lambda: 0)   247         name = "$l%d" % self.lambdas[path]   248         self.lambdas[path] += 1   249         return name   250    251     def reset_lambdas(self):   252         self.lambdas = {}   253    254     # Constant and literal recording.   255    256     def get_constant_value(self, value, literals=None):   257    258         """   259         Encode the 'value' if appropriate, returning a value, a typename and any   260         encoding.   261         """   262    263         if isinstance(value, unicode):   264             return value.encode("utf-8"), "unicode", self.encoding   265    266         # Attempt to convert plain strings to text.   267    268         elif isinstance(value, str) and self.encoding:   269             try:   270                 return get_string_details(literals, self.encoding)   271             except UnicodeDecodeError:   272                 pass   273    274         return value, value.__class__.__name__, None   275    276     def get_constant_reference(self, ref, value, encoding=None):   277    278         """   279         Return a constant reference for the given 'ref' type and 'value', with   280         the optional 'encoding' applying to text values.   281         """   282    283         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   284    285         # Return a reference for the constant.   286    287         objpath = self.get_object_path(constant_name)   288         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   289    290         # Record the value and type for the constant.   291    292         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   293         return name_ref   294    295     def reserve_constant(self, objpath, value, origin, encoding=None):   296    297         """   298         Reserve a constant within 'objpath' with the given 'value' and having a   299         type with the given 'origin', with the optional 'encoding' applying to   300         text values.   301         """   302    303         constant_name = self.get_constant_name(value, origin)   304         objpath = self.get_object_path(constant_name)   305         self._reserve_constant(objpath, value, origin, encoding)   306    307     def _reserve_constant(self, objpath, value, origin, encoding):   308    309         """   310         Store a constant for 'objpath' with the given 'value' and 'origin', with   311         the optional 'encoding' applying to text values.   312         """   313    314         self.constant_values[objpath] = value, origin, encoding   315    316     def get_literal_reference(self, name, ref, items, cls):   317    318         """   319         Return a literal reference for the given type 'name', literal 'ref',   320         node 'items' and employing the given 'cls' as the class of the returned   321         reference object.   322         """   323    324         # Construct an invocation using the items as arguments.   325    326         typename = "$L%s" % name   327    328         invocation = compiler.ast.CallFunc(   329             compiler.ast.Name(typename),   330             items   331             )   332    333         # Get a name for the actual literal.   334    335         instname = self.get_literal_name()   336         self.next_literal()   337    338         # Record the type for the literal.   339    340         objpath = self.get_object_path(instname)   341         self.literal_types[objpath] = ref.get_origin()   342    343         # Return a wrapper for the invocation exposing the items.   344    345         return cls(   346             instname,   347             ref.instance_of(),   348             self.process_structure_node(invocation),   349             invocation.args   350             )   351    352     # Node handling.   353    354     def process_structure(self, node):   355    356         """   357         Within the given 'node', process the program structure.   358    359         During inspection, this will process global declarations, adjusting the   360         module namespace, and import statements, building a module dependency   361         hierarchy.   362    363         During translation, this will consult deduced program information and   364         output translated code.   365         """   366    367         l = []   368         for n in node.getChildNodes():   369             l.append(self.process_structure_node(n))   370         return l   371    372     def process_augassign_node(self, n):   373    374         "Process the given augmented assignment node 'n'."   375    376         op = operator_functions[n.op]   377    378         if isinstance(n.node, compiler.ast.Getattr):   379             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   380         elif isinstance(n.node, compiler.ast.Name):   381             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   382         else:   383             target = n.node   384    385         assignment = compiler.ast.Assign(   386             [target],   387             compiler.ast.CallFunc(   388                 compiler.ast.Name("$op%s" % op),   389                 [n.node, n.expr]))   390    391         return self.process_structure_node(assignment)   392    393     def process_assignment_for_object(self, original_name, source):   394    395         """   396         Return an assignment operation making 'original_name' refer to the given   397         'source'.   398         """   399    400         assignment = compiler.ast.Assign(   401             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   402             source   403             )   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_node_items(self, n, expr):   408    409         """   410         Process the given assignment node 'n' whose children are to be assigned   411         items of 'expr'.   412         """   413    414         name_ref = self.process_structure_node(expr)   415    416         # Either unpack the items and present them directly to each assignment   417         # node.   418    419         if isinstance(name_ref, LiteralSequenceRef) and \   420            self.process_literal_sequence_items(n, name_ref):   421    422             pass   423    424         # Or have the assignment nodes access each item via the sequence API.   425    426         else:   427             self.process_assignment_node_items_by_position(n, expr, name_ref)   428    429     def process_assignment_node_items_by_position(self, n, expr, name_ref):   430    431         """   432         Process the given sequence assignment node 'n', converting the node to   433         the separate assignment of each target using positional access on a   434         temporary variable representing the sequence. Use 'expr' as the assigned   435         value and 'name_ref' as the reference providing any existing temporary   436         variable.   437         """   438    439         assignments = []   440    441         # Employ existing names to access the sequence.   442         # Literal sequences do not provide names of accessible objects.   443    444         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   445             temp = name_ref.name   446    447         # For other expressions, create a temporary name to reference the items.   448    449         else:   450             temp = self.get_temporary_name()   451             self.next_temporary()   452    453             assignments.append(   454                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   455                 )   456    457         # Assign the items to the target nodes.   458    459         for i, node in enumerate(n.nodes):   460             assignments.append(   461                 compiler.ast.Assign([node], compiler.ast.Subscript(   462                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   463                 )   464    465         return self.process_structure_node(compiler.ast.Stmt(assignments))   466    467     def process_literal_sequence_items(self, n, name_ref):   468    469         """   470         Process the given assignment node 'n', obtaining from the given   471         'name_ref' the items to be assigned to the assignment targets.   472    473         Return whether this method was able to process the assignment node as   474         a sequence of direct assignments.   475         """   476    477         if len(n.nodes) == len(name_ref.items):   478             assigned_names, count = get_names_from_nodes(n.nodes)   479             accessed_names, _count = get_names_from_nodes(name_ref.items)   480    481             # Only assign directly between items if all assigned names are   482             # plain names (not attribute assignments), and if the assigned names   483             # do not appear in the accessed names.   484    485             if len(assigned_names) == count and \   486                not assigned_names.intersection(accessed_names):   487    488                 for node, item in zip(n.nodes, name_ref.items):   489                     self.process_assignment_node(node, item)   490    491                 return True   492    493             # Otherwise, use the position-based mechanism to obtain values.   494    495             else:   496                 return False   497         else:   498             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   499                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   500    501     def process_compare_node(self, n):   502    503         """   504         Process the given comparison node 'n', converting an operator sequence   505         from...   506    507         <expr1> <op1> <expr2> <op2> <expr3>   508    509         ...to...   510    511         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   512         """   513    514         invocations = []   515         last = n.expr   516    517         for op, op_node in n.ops:   518             op = operator_functions.get(op)   519    520             invocations.append(compiler.ast.CallFunc(   521                 compiler.ast.Name("$op%s" % op),   522                 [last, op_node]))   523    524             last = op_node   525    526         if len(invocations) > 1:   527             result = compiler.ast.And(invocations)   528         else:   529             result = invocations[0]   530    531         return self.process_structure_node(result)   532    533     def process_dict_node(self, node):   534    535         """   536         Process the given dictionary 'node', returning a list of (key, value)   537         tuples.   538         """   539    540         l = []   541         for key, value in node.items:   542             l.append((   543                 self.process_structure_node(key),   544                 self.process_structure_node(value)))   545         return l   546    547     def process_for_node(self, n):   548    549         """   550         Generate attribute accesses for {n.list}.__iter__ and the next method on   551         the iterator, producing a replacement node for the original.   552         """   553    554         t0 = self.get_temporary_name()   555         self.next_temporary()   556         t1 = self.get_temporary_name()   557         self.next_temporary()   558         i0 = self.get_temporary_name()   559         self.next_temporary()   560    561         node = compiler.ast.Stmt([   562    563             # <t0> = {n.list}   564             # <t1> = <t0>.__iter__()   565             # <i0> = <t1>.next   566    567             compiler.ast.Assign(   568                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   569                 n.list),   570    571             compiler.ast.Assign(   572                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   573                 compiler.ast.CallFunc(   574                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   575                     [])),   576    577             compiler.ast.Assign(   578                 [compiler.ast.AssName(i0, "OP_ASSIGN")],   579                 compiler.ast.Getattr(compiler.ast.Name(t1), "next")),   580    581             # try:   582             #     while True:   583             #         <var>... = <next>()   584             #         ...   585             # except StopIteration:   586             #     pass   587    588             compiler.ast.TryExcept(   589                 compiler.ast.While(   590                     compiler.ast.Name("True"),   591                     compiler.ast.Stmt([   592                         compiler.ast.Assign(   593                             [n.assign],   594                             compiler.ast.CallFunc(   595                                 compiler.ast.Name(i0),   596                                 []   597                                 )),   598                         n.body]),   599                     None),   600                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   601                 None)   602             ])   603    604         self.process_structure_node(node)   605    606     def process_literal_sequence_node(self, n, name, ref, cls):   607    608         """   609         Process the given literal sequence node 'n' as a function invocation,   610         with 'name' indicating the type of the sequence, and 'ref' being a   611         reference to the type. The 'cls' is used to instantiate a suitable name   612         reference.   613         """   614    615         if name == "dict":   616             items = []   617             for key, value in n.items:   618                 items.append(compiler.ast.Tuple([key, value]))   619         else: # name in ("list", "tuple"):   620             items = n.nodes   621    622         return self.get_literal_reference(name, ref, items, cls)   623    624     def process_operator_node(self, n):   625    626         """   627         Process the given operator node 'n' as an operator function invocation.   628         """   629    630         op = operator_functions[n.__class__.__name__]   631         invocation = compiler.ast.CallFunc(   632             compiler.ast.Name("$op%s" % op),   633             list(n.getChildNodes())   634             )   635         return self.process_structure_node(invocation)   636    637     def process_print_node(self, n):   638    639         """   640         Process the given print node 'n' as an invocation on a stream of the   641         form...   642    643         $print(dest, args, nl)   644    645         The special function name will be translated elsewhere.   646         """   647    648         nl = isinstance(n, compiler.ast.Printnl)   649         invocation = compiler.ast.CallFunc(   650             compiler.ast.Name("$print"),   651             [n.dest or compiler.ast.Name("None"),   652              compiler.ast.List(list(n.nodes)),   653              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   654             )   655         return self.process_structure_node(invocation)   656    657     def process_slice_node(self, n, expr=None):   658    659         """   660         Process the given slice node 'n' as an operator function invocation.   661         """   662    663         if n.flags == "OP_ASSIGN": op = "setslice"   664         elif n.flags == "OP_DELETE": op = "delslice"   665         else: op = "getslice"   666    667         invocation = compiler.ast.CallFunc(   668             compiler.ast.Name("$op%s" % op),   669             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   670                 (expr and [expr] or [])   671             )   672    673         # Fix parse tree structure.   674    675         if op == "delslice":   676             invocation = compiler.ast.Discard(invocation)   677    678         return self.process_structure_node(invocation)   679    680     def process_sliceobj_node(self, n):   681    682         """   683         Process the given slice object node 'n' as a slice constructor.   684         """   685    686         op = "slice"   687         invocation = compiler.ast.CallFunc(   688             compiler.ast.Name("$op%s" % op),   689             n.nodes   690             )   691         return self.process_structure_node(invocation)   692    693     def process_subscript_node(self, n, expr=None):   694    695         """   696         Process the given subscript node 'n' as an operator function invocation.   697         """   698    699         if n.flags == "OP_ASSIGN": op = "setitem"   700         elif n.flags == "OP_DELETE": op = "delitem"   701         else: op = "getitem"   702    703         invocation = compiler.ast.CallFunc(   704             compiler.ast.Name("$op%s" % op),   705             [n.expr] + list(n.subs) + (expr and [expr] or [])   706             )   707    708         # Fix parse tree structure.   709    710         if op == "delitem":   711             invocation = compiler.ast.Discard(invocation)   712    713         return self.process_structure_node(invocation)   714    715     def process_attribute_chain(self, n):   716    717         """   718         Process the given attribute access node 'n'. Return a reference   719         describing the expression.   720         """   721    722         # AssAttr/Getattr are nested with the outermost access being the last   723         # access in any chain.   724    725         self.attrs.insert(0, n.attrname)   726         attrs = self.attrs   727    728         # Break attribute chains where non-access nodes are found.   729    730         if not self.have_access_expression(n):   731             self.reset_attribute_chain()   732    733         # Descend into the expression, extending backwards any existing chain,   734         # or building another for the expression.   735    736         name_ref = self.process_structure_node(n.expr)   737    738         # Restore chain information applying to this node.   739    740         if not self.have_access_expression(n):   741             self.restore_attribute_chain(attrs)   742    743         # Return immediately if the expression was another access and thus a   744         # continuation backwards along the chain. The above processing will   745         # have followed the chain all the way to its conclusion.   746    747         if self.have_access_expression(n):   748             del self.attrs[0]   749    750         return name_ref   751    752     # Attribute chain handling.   753    754     def reset_attribute_chain(self):   755    756         "Reset the attribute chain for a subexpression of an attribute access."   757    758         self.attrs = []   759         self.chain_assignment.append(self.in_assignment)   760         self.chain_invocation.append(self.in_invocation)   761         self.in_assignment = None   762         self.in_invocation = None   763    764     def restore_attribute_chain(self, attrs):   765    766         "Restore the attribute chain for an attribute access."   767    768         self.attrs = attrs   769         self.in_assignment = self.chain_assignment.pop()   770         self.in_invocation = self.chain_invocation.pop()   771    772     def have_access_expression(self, node):   773    774         "Return whether the expression associated with 'node' is Getattr."   775    776         return isinstance(node.expr, compiler.ast.Getattr)   777    778     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   779    780         """   781         Return the name to be used for attribute usage observations involving   782         the given 'name' in the current namespace.   783    784         If the name is being used outside a function, and if 'name_ref' is   785         given and indicates a global or if 'is_global' is specified as a true   786         value, a path featuring the name in the global namespace is returned.   787         Otherwise, a path computed using the current namespace and the given   788         name is returned.   789    790         The intention of this method is to provide a suitably-qualified name   791         that can be tracked across namespaces. Where globals are being   792         referenced in class namespaces, they should be referenced using their   793         path within the module, not using a path within each class.   794    795         It may not be possible to identify a global within a function at the   796         time of inspection (since a global may appear later in a file).   797         Consequently, globals are identified by their local name rather than   798         their module-qualified path.   799         """   800    801         # For functions, use the appropriate local names.   802    803         if self.in_function:   804             return name   805    806         # For global names outside functions, use a global name.   807    808         elif is_global or name_ref and name_ref.is_global_name():   809             return self.get_global_path(name)   810    811         # Otherwise, establish a name in the current namespace.   812    813         else:   814             return self.get_object_path(name)   815    816     def get_path_for_access(self):   817    818         "Outside functions, register accesses at the module level."   819    820         if not self.in_function:   821             return self.name   822         else:   823             return self.get_namespace_path()   824    825     def get_module_name(self, node):   826    827         """   828         Using the given From 'node' in this module, calculate any relative import   829         information, returning a tuple containing a module to import along with any   830         names to import based on the node's name information.   831    832         Where the returned module is given as None, whole module imports should   833         be performed for the returned modules using the returned names.   834         """   835    836         # Absolute import.   837    838         if node.level == 0:   839             return node.modname, node.names   840    841         # Relative to an ancestor of this module.   842    843         else:   844             path = self.name.split(".")   845             level = node.level   846    847             # Relative imports treat package roots as submodules.   848    849             if split(self.filename)[-1] == "__init__.py":   850                 level -= 1   851    852             if level > len(path):   853                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   854                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   855    856             basename = ".".join(path[:len(path)-level])   857    858         # Name imports from a module.   859    860         if node.modname:   861             return "%s.%s" % (basename, node.modname), node.names   862    863         # Relative whole module imports.   864    865         else:   866             return basename, node.names   867    868 def get_argnames(args):   869    870     """   871     Return a list of all names provided by 'args'. Since tuples may be   872     employed, the arguments are traversed depth-first.   873     """   874    875     l = []   876     for arg in args:   877         if isinstance(arg, tuple):   878             l += get_argnames(arg)   879         else:   880             l.append(arg)   881     return l   882    883 def get_names_from_nodes(nodes):   884    885     """   886     Return the names employed in the given 'nodes' along with the number of   887     nodes excluding sequences.   888     """   889    890     names = set()   891     count = 0   892    893     for node in nodes:   894    895         # Add names and count them.   896    897         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   898             names.add(node.name)   899             count += 1   900    901         # Add names from sequences and incorporate their counts.   902    903         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   904                                compiler.ast.List, compiler.ast.Set,   905                                compiler.ast.Tuple)):   906             _names, _count = get_names_from_nodes(node.nodes)   907             names.update(_names)   908             count += _count   909    910         # Count non-name, non-sequence nodes.   911    912         else:   913             count += 1   914    915     return names, count   916    917 # Result classes.   918    919 class InstructionSequence:   920    921     "A generic sequence of instructions."   922    923     def __init__(self, instructions):   924         self.instructions = instructions   925    926     def get_value_instruction(self):   927         return self.instructions[-1]   928    929     def get_init_instructions(self):   930         return self.instructions[:-1]   931    932 # Dictionary utilities.   933    934 def init_item(d, key, fn):   935    936     """   937     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   938     value where no entry already exists.   939     """   940    941     if not d.has_key(key):   942         d[key] = fn()   943     return d[key]   944    945 def dict_for_keys(d, keys):   946    947     "Return a new dictionary containing entries from 'd' for the given 'keys'."   948    949     nd = {}   950     for key in keys:   951         if d.has_key(key):   952             nd[key] = d[key]   953     return nd   954    955 def make_key(s):   956    957     "Make sequence 's' into a tuple-based key, first sorting its contents."   958    959     l = list(s)   960     l.sort()   961     return tuple(l)   962    963 def add_counter_item(d, key):   964    965     """   966     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   967     maintaining a mapping of keys to their order of insertion.   968     """   969    970     if not d.has_key(key):   971         d[key] = len(d.keys())   972     return d[key]    973    974 def remove_items(d1, d2):   975    976     "Remove from 'd1' all items from 'd2'."   977    978     for key in d2.keys():   979         if d1.has_key(key):   980             del d1[key]   981    982 # Set utilities.   983    984 def first(s):   985     return list(s)[0]   986    987 def same(s1, s2):   988     return set(s1) == set(s2)   989    990 # General input/output.   991    992 def readfile(filename):   993    994     "Return the contents of 'filename'."   995    996     f = open(filename)   997     try:   998         return f.read()   999     finally:  1000         f.close()  1001   1002 def writefile(filename, s):  1003   1004     "Write to 'filename' the string 's'."  1005   1006     f = open(filename, "w")  1007     try:  1008         f.write(s)  1009     finally:  1010         f.close()  1011   1012 # General encoding.  1013   1014 def sorted_output(x):  1015   1016     "Sort sequence 'x' and return a string with commas separating the values."  1017   1018     x = map(str, x)  1019     x.sort()  1020     return ", ".join(x)  1021   1022 def get_string_details(literals, encoding):  1023   1024     """  1025     Determine whether 'literals' represent Unicode strings or byte strings,  1026     using 'encoding' to reproduce byte sequences.  1027   1028     Each literal is the full program representation including prefix and quotes  1029     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1030     string needs to be translated back to its original encoding.  1031   1032     Return a single encoded literal value, a type name, and the original  1033     encoding as a tuple.  1034     """  1035   1036     typename = "unicode"  1037   1038     l = []  1039   1040     for s in literals:  1041         out, _typename = get_literal_details(s)  1042         if _typename == "str":  1043             typename = "str"  1044         l.append(out)  1045   1046     out = "".join(l)  1047   1048     # For Unicode values, convert to the UTF-8 program representation.  1049   1050     if typename == "unicode":  1051         return out.encode("utf-8"), typename, encoding  1052   1053     # For byte string values, convert back to the original encoding.  1054   1055     else:  1056         return out.encode(encoding), typename, encoding  1057   1058 def get_literal_details(s):  1059   1060     """  1061     Determine whether 's' represents a Unicode string or a byte string, where  1062     's' contains the full program representation of a literal including prefix  1063     and quotes, recoded by the parser to UTF-8.  1064   1065     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1066     and byte or Unicode values starting with <backslash><octal digit> or  1067     <backslash>x.  1068   1069     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1070     to be considered as Unicode values. Otherwise, they produce byte values and  1071     cause unprefixed strings to be considered as byte strings.  1072   1073     Literals prefixed with "r" do not have their backslash-encoded values  1074     converted unless also prefixed with "u", in which case only the above value  1075     formats are converted, not any of the other special sequences for things  1076     like newlines.  1077   1078     Return the literal value as a Unicode object together with the appropriate  1079     type name in a tuple.  1080     """  1081   1082     l = []  1083   1084     # Identify the quote character and use it to identify the prefix.  1085   1086     quote_type = s[-1]  1087     prefix_end = s.find(quote_type)  1088     prefix = s[:prefix_end].lower()  1089   1090     if prefix not in ("", "b", "br", "r", "u", "ur"):  1091         raise ValueError, "String literal does not have a supported prefix: %s" % s  1092   1093     if "b" in prefix:  1094         typename = "str"  1095     else:  1096         typename = "unicode"  1097   1098     # Identify triple quotes or single quotes.  1099   1100     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1101         quote = s[prefix_end:prefix_end+3]  1102         current = prefix_end + 3  1103         end = len(s) - 3  1104     else:  1105         quote = s[prefix_end]  1106         current = prefix_end + 1  1107         end = len(s) - 1  1108   1109     # Conversions of some quoted values.  1110   1111     searches = {  1112         "u" : (6, 16),  1113         "U" : (10, 16),  1114         "x" : (4, 16),  1115         }  1116   1117     octal_digits = map(str, range(0, 8))  1118   1119     # Translations of some quoted values.  1120   1121     escaped = {  1122         "\\" : "\\", "'" : "'", '"' : '"',  1123         "a" : "\a", "b" : "\b", "f" : "\f",  1124         "n" : "\n", "r" : "\r", "t" : "\t",  1125         }  1126   1127     while current < end:  1128   1129         # Look for quoted values.  1130   1131         index = s.find("\\", current)  1132         if index == -1 or index + 1 == end:  1133             l.append(s[current:end])  1134             break  1135   1136         # Add the preceding text.  1137   1138         l.append(s[current:index])  1139   1140         # Handle quoted text.  1141   1142         term = s[index+1]  1143   1144         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1145         # produce Unicode values.  1146   1147         if typename == "unicode" and (  1148             term in ("u", "U") or   1149             "u" in prefix and (term == "x" or term in octal_digits)):  1150   1151             needed, base = searches.get(term, (4, 8))  1152             value = convert_quoted_value(s, index, needed, end, base, unichr)  1153             l.append(value)  1154             current = index + needed  1155   1156         # Add raw byte values, changing the string type.  1157   1158         elif "r" not in prefix and (  1159              term == "x" or term in octal_digits):  1160   1161             needed, base = searches.get(term, (4, 8))  1162             value = convert_quoted_value(s, index, needed, end, base, chr)  1163             l.append(value)  1164             typename = "str"  1165             current = index + needed  1166   1167         # Add other escaped values.  1168   1169         elif "r" not in prefix and escaped.has_key(term):  1170             l.append(escaped[term])  1171             current = index + 2  1172   1173         # Add other text as found.  1174   1175         else:  1176             l.append(s[index:index+2])  1177             current = index + 2  1178   1179     # Collect the components into a single Unicode object. Since the literal  1180     # text was already in UTF-8 form, interpret plain strings as UTF-8  1181     # sequences.  1182   1183     out = []  1184   1185     for value in l:  1186         if isinstance(value, unicode):  1187             out.append(value)  1188         else:  1189             out.append(unicode(value, "utf-8"))  1190   1191     return "".join(out), typename  1192   1193 def convert_quoted_value(s, index, needed, end, base, fn):  1194   1195     """  1196     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1197     positions, and with the given 'end' indicating the first position after the  1198     end of the actual string content.  1199   1200     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1201     to convert the value to an appropriate type.  1202     """  1203   1204     s = s[index:min(index+needed, end)]  1205   1206     # Not a complete occurrence.  1207   1208     if len(s) < needed:  1209         return s  1210   1211     # Test for a well-formed value.  1212   1213     try:  1214         first = base == 8 and 1 or 2  1215         value = int(s[first:needed], base)  1216     except ValueError:  1217         return s  1218     else:  1219         return fn(value)  1220   1221 # Attribute chain decoding.  1222   1223 def get_attrnames(attrnames):  1224   1225     """  1226     Split the qualified attribute chain 'attrnames' into its components,  1227     handling special attributes starting with "#" that indicate type  1228     conformance.  1229     """  1230   1231     if attrnames.startswith("#"):  1232         return [attrnames]  1233     else:  1234         return attrnames.split(".")  1235   1236 def get_attrname_from_location(location):  1237   1238     """  1239     Extract the first attribute from the attribute names employed in a  1240     'location'.  1241     """  1242   1243     path, name, attrnames, access = location  1244     if not attrnames:  1245         return attrnames  1246     return get_attrnames(attrnames)[0]  1247   1248 def get_name_path(path, name):  1249   1250     "Return a suitable qualified name from the given 'path' and 'name'."  1251   1252     if "." in name:  1253         return name  1254     else:  1255         return "%s.%s" % (path, name)  1256   1257 # Usage-related functions.  1258   1259 def get_types_for_usage(attrnames, objects):  1260   1261     """  1262     Identify the types that can support the given 'attrnames', using the  1263     given 'objects' as the catalogue of type details.  1264     """  1265   1266     types = []  1267     for name, _attrnames in objects.items():  1268         if set(attrnames).issubset(_attrnames):  1269             types.append(name)  1270     return types  1271   1272 def get_invoked_attributes(usage):  1273   1274     "Obtain invoked attribute from the given 'usage'."  1275   1276     invoked = []  1277     if usage:  1278         for attrname, invocation, assignment in usage:  1279             if invocation:  1280                 invoked.append(attrname)  1281     return invoked  1282   1283 def get_assigned_attributes(usage):  1284   1285     "Obtain assigned attribute from the given 'usage'."  1286   1287     assigned = []  1288     if usage:  1289         for attrname, invocation, assignment in usage:  1290             if assignment:  1291                 assigned.append(attrname)  1292     return assigned  1293   1294 # Type and module functions.  1295 # NOTE: This makes assumptions about the __builtins__ structure.  1296   1297 def get_builtin_module(name):  1298   1299     "Return the module name containing the given type 'name'."  1300   1301     if name == "string":  1302         modname = "str"  1303     elif name == "utf8string":  1304         modname = "unicode"  1305     elif name == "NoneType":  1306         modname = "none"  1307     else:  1308         modname = name  1309   1310     return "__builtins__.%s" % modname  1311   1312 def get_builtin_type(name):  1313   1314     "Return the type name provided by the given Python value 'name'."  1315   1316     if name == "str":  1317         return "string"  1318     elif name == "unicode":  1319         return "utf8string"  1320     else:  1321         return name  1322   1323 def get_builtin_class(name):  1324   1325     "Return the full name of the built-in class having the given 'name'."  1326   1327     typename = get_builtin_type(name)  1328     module = get_builtin_module(typename)  1329     return "%s.%s" % (module, typename)  1330   1331 # Useful data.  1332   1333 predefined_constants = "False", "None", "NotImplemented", "True"  1334   1335 operator_functions = {  1336   1337     # Fundamental operations.  1338   1339     "is" : "is_",  1340     "is not" : "is_not",  1341   1342     # Binary operations.  1343   1344     "in" : "in_",  1345     "not in" : "not_in",  1346     "Add" : "add",  1347     "Bitand" : "and_",  1348     "Bitor" : "or_",  1349     "Bitxor" : "xor",  1350     "Div" : "div",  1351     "FloorDiv" : "floordiv",  1352     "LeftShift" : "lshift",  1353     "Mod" : "mod",  1354     "Mul" : "mul",  1355     "Power" : "pow",  1356     "RightShift" : "rshift",  1357     "Sub" : "sub",  1358   1359     # Unary operations.  1360   1361     "Invert" : "invert",  1362     "UnaryAdd" : "pos",  1363     "UnarySub" : "neg",  1364   1365     # Augmented assignment.  1366   1367     "+=" : "iadd",  1368     "-=" : "isub",  1369     "*=" : "imul",  1370     "/=" : "idiv",  1371     "//=" : "ifloordiv",  1372     "%=" : "imod",  1373     "**=" : "ipow",  1374     "<<=" : "ilshift",  1375     ">>=" : "irshift",  1376     "&=" : "iand",  1377     "^=" : "ixor",  1378     "|=" : "ior",  1379   1380     # Comparisons.  1381   1382     "==" : "eq",  1383     "!=" : "ne",  1384     "<" : "lt",  1385     "<=" : "le",  1386     ">=" : "ge",  1387     ">" : "gt",  1388     }  1389   1390 # vim: tabstop=4 expandtab shiftwidth=4