Lichen (file common.py at eab034cdda85)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     def get_parent_path(self):   190         return ".".join([self.name] + self.namespace_path[:-1])   191    192     # Namespace management.   193    194     def enter_namespace(self, name):   195    196         "Enter the namespace having the given 'name'."   197    198         self.namespace_path.append(name)   199    200     def exit_namespace(self):   201    202         "Exit the current namespace."   203    204         self.namespace_path.pop()   205    206     # Constant reference naming.   207    208     def get_constant_name(self, value, value_type, encoding=None):   209    210         """   211         Add a new constant to the current namespace for 'value' with   212         'value_type'.   213         """   214    215         path = self.get_namespace_path()   216         init_item(self.constants, path, dict)   217         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   218    219     # Literal reference naming.   220    221     def get_literal_name(self):   222    223         "Add a new literal to the current namespace."   224    225         path = self.get_namespace_path()   226         init_item(self.literals, path, lambda: 0)   227         return "$C%d" % self.literals[path]   228    229     def next_literal(self):   230         self.literals[self.get_namespace_path()] += 1   231    232     # Temporary variable naming.   233    234     def get_temporary_name(self):   235         path = self.get_namespace_path()   236         init_item(self.temp, path, lambda: 0)   237         return "$t%d" % self.temp[path]   238    239     def next_temporary(self):   240         self.temp[self.get_namespace_path()] += 1   241    242     # Arbitrary function naming.   243    244     def get_lambda_name(self):   245         path = self.get_namespace_path()   246         init_item(self.lambdas, path, lambda: 0)   247         name = "$l%d" % self.lambdas[path]   248         self.lambdas[path] += 1   249         return name   250    251     def reset_lambdas(self):   252         self.lambdas = {}   253    254     # Constant and literal recording.   255    256     def get_constant_value(self, value, literals=None):   257    258         """   259         Encode the 'value' if appropriate, returning a value, a typename and any   260         encoding.   261         """   262    263         if isinstance(value, unicode):   264             return value.encode("utf-8"), "unicode", self.encoding   265    266         # Attempt to convert plain strings to text.   267    268         elif isinstance(value, str) and self.encoding:   269             try:   270                 return get_string_details(literals, self.encoding)   271             except UnicodeDecodeError:   272                 pass   273    274         return value, value.__class__.__name__, None   275    276     def get_constant_reference(self, ref, value, encoding=None):   277    278         """   279         Return a constant reference for the given 'ref' type and 'value', with   280         the optional 'encoding' applying to text values.   281         """   282    283         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   284    285         # Return a reference for the constant.   286    287         objpath = self.get_object_path(constant_name)   288         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   289    290         # Record the value and type for the constant.   291    292         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   293         return name_ref   294    295     def reserve_constant(self, objpath, value, origin, encoding=None):   296    297         """   298         Reserve a constant within 'objpath' with the given 'value' and having a   299         type with the given 'origin', with the optional 'encoding' applying to   300         text values.   301         """   302    303         constant_name = self.get_constant_name(value, origin)   304         objpath = self.get_object_path(constant_name)   305         self._reserve_constant(objpath, value, origin, encoding)   306    307     def _reserve_constant(self, objpath, value, origin, encoding):   308    309         """   310         Store a constant for 'objpath' with the given 'value' and 'origin', with   311         the optional 'encoding' applying to text values.   312         """   313    314         self.constant_values[objpath] = value, origin, encoding   315    316     def get_literal_reference(self, name, ref, items, cls):   317    318         """   319         Return a literal reference for the given type 'name', literal 'ref',   320         node 'items' and employing the given 'cls' as the class of the returned   321         reference object.   322         """   323    324         # Construct an invocation using the items as arguments.   325    326         typename = "$L%s" % name   327    328         invocation = compiler.ast.CallFunc(   329             compiler.ast.Name(typename),   330             items   331             )   332    333         # Get a name for the actual literal.   334    335         instname = self.get_literal_name()   336         self.next_literal()   337    338         # Record the type for the literal.   339    340         objpath = self.get_object_path(instname)   341         self.literal_types[objpath] = ref.get_origin()   342    343         # Return a wrapper for the invocation exposing the items.   344    345         return cls(   346             instname,   347             ref.instance_of(),   348             self.process_structure_node(invocation),   349             invocation.args   350             )   351    352     # Node handling.   353    354     def process_structure(self, node):   355    356         """   357         Within the given 'node', process the program structure.   358    359         During inspection, this will process global declarations, adjusting the   360         module namespace, and import statements, building a module dependency   361         hierarchy.   362    363         During translation, this will consult deduced program information and   364         output translated code.   365         """   366    367         l = []   368         for n in node.getChildNodes():   369             l.append(self.process_structure_node(n))   370         return l   371    372     def process_augassign_node(self, n):   373    374         "Process the given augmented assignment node 'n'."   375    376         op = operator_functions[n.op]   377    378         if isinstance(n.node, compiler.ast.Getattr):   379             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   380         elif isinstance(n.node, compiler.ast.Name):   381             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   382         else:   383             target = n.node   384    385         assignment = compiler.ast.Assign(   386             [target],   387             compiler.ast.CallFunc(   388                 compiler.ast.Name("$op%s" % op),   389                 [n.node, n.expr]))   390    391         return self.process_structure_node(assignment)   392    393     def process_assignment_for_object(self, original_name, source):   394    395         """   396         Return an assignment operation making 'original_name' refer to the given   397         'source'.   398         """   399    400         assignment = compiler.ast.Assign(   401             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   402             source   403             )   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_node_items(self, n, expr):   408    409         """   410         Process the given assignment node 'n' whose children are to be assigned   411         items of 'expr'.   412         """   413    414         name_ref = self.process_structure_node(expr)   415    416         # Either unpack the items and present them directly to each assignment   417         # node.   418    419         if isinstance(name_ref, LiteralSequenceRef) and \   420            self.process_literal_sequence_items(n, name_ref):   421    422             pass   423    424         # Or have the assignment nodes access each item via the sequence API.   425    426         else:   427             self.process_assignment_node_items_by_position(n, expr, name_ref)   428    429     def process_assignment_node_items_by_position(self, n, expr, name_ref):   430    431         """   432         Process the given sequence assignment node 'n', converting the node to   433         the separate assignment of each target using positional access on a   434         temporary variable representing the sequence. Use 'expr' as the assigned   435         value and 'name_ref' as the reference providing any existing temporary   436         variable.   437         """   438    439         assignments = []   440    441         # Employ existing names to access the sequence.   442         # Literal sequences do not provide names of accessible objects.   443    444         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   445             temp = name_ref.name   446    447         # For other expressions, create a temporary name to reference the items.   448    449         else:   450             temp = self.get_temporary_name()   451             self.next_temporary()   452    453             assignments.append(   454                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   455                 )   456    457         # Assign the items to the target nodes.   458    459         for i, node in enumerate(n.nodes):   460             assignments.append(   461                 compiler.ast.Assign([node], compiler.ast.Subscript(   462                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   463                 )   464    465         return self.process_structure_node(compiler.ast.Stmt(assignments))   466    467     def process_literal_sequence_items(self, n, name_ref):   468    469         """   470         Process the given assignment node 'n', obtaining from the given   471         'name_ref' the items to be assigned to the assignment targets.   472    473         Return whether this method was able to process the assignment node as   474         a sequence of direct assignments.   475         """   476    477         if len(n.nodes) == len(name_ref.items):   478             assigned_names, count = get_names_from_nodes(n.nodes)   479             accessed_names, _count = get_names_from_nodes(name_ref.items)   480    481             # Only assign directly between items if all assigned names are   482             # plain names (not attribute assignments), and if the assigned names   483             # do not appear in the accessed names.   484    485             if len(assigned_names) == count and \   486                not assigned_names.intersection(accessed_names):   487    488                 for node, item in zip(n.nodes, name_ref.items):   489                     self.process_assignment_node(node, item)   490    491                 return True   492    493             # Otherwise, use the position-based mechanism to obtain values.   494    495             else:   496                 return False   497         else:   498             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   499                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   500    501     def process_compare_node(self, n):   502    503         """   504         Process the given comparison node 'n', converting an operator sequence   505         from...   506    507         <expr1> <op1> <expr2> <op2> <expr3>   508    509         ...to...   510    511         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   512         """   513    514         invocations = []   515         last = n.expr   516    517         for op, op_node in n.ops:   518             op = operator_functions.get(op)   519    520             invocations.append(compiler.ast.CallFunc(   521                 compiler.ast.Name("$op%s" % op),   522                 [last, op_node]))   523    524             last = op_node   525    526         if len(invocations) > 1:   527             result = compiler.ast.And(invocations)   528         else:   529             result = invocations[0]   530    531         return self.process_structure_node(result)   532    533     def process_dict_node(self, node):   534    535         """   536         Process the given dictionary 'node', returning a list of (key, value)   537         tuples.   538         """   539    540         l = []   541         for key, value in node.items:   542             l.append((   543                 self.process_structure_node(key),   544                 self.process_structure_node(value)))   545         return l   546    547     def process_for_node(self, n):   548    549         """   550         Generate attribute accesses for {n.list}.__iter__ and the next method on   551         the iterator, producing a replacement node for the original.   552         """   553    554         i0 = self.get_temporary_name()   555         self.next_temporary()   556    557         node = compiler.ast.Stmt([   558    559             # <next> = {n.list}.__iter__().next   560    561             compiler.ast.Assign(   562                 [compiler.ast.AssName(i0, "OP_ASSIGN")],   563                 compiler.ast.Getattr(   564                     compiler.ast.CallFunc(   565                         compiler.ast.Getattr(n.list, "__iter__"),   566                         []   567                         ), "next")),   568    569             # try:   570             #     while True:   571             #         <var>... = <next>()   572             #         ...   573             # except StopIteration:   574             #     pass   575    576             compiler.ast.TryExcept(   577                 compiler.ast.While(   578                     compiler.ast.Name("True"),   579                     compiler.ast.Stmt([   580                         compiler.ast.Assign(   581                             [n.assign],   582                             compiler.ast.CallFunc(   583                                 compiler.ast.Name(i0),   584                                 []   585                                 )),   586                         n.body]),   587                     None),   588                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   589                 None)   590             ])   591    592         self.process_structure_node(node)   593    594     def process_literal_sequence_node(self, n, name, ref, cls):   595    596         """   597         Process the given literal sequence node 'n' as a function invocation,   598         with 'name' indicating the type of the sequence, and 'ref' being a   599         reference to the type. The 'cls' is used to instantiate a suitable name   600         reference.   601         """   602    603         if name == "dict":   604             items = []   605             for key, value in n.items:   606                 items.append(compiler.ast.Tuple([key, value]))   607         else: # name in ("list", "tuple"):   608             items = n.nodes   609    610         return self.get_literal_reference(name, ref, items, cls)   611    612     def process_operator_node(self, n):   613    614         """   615         Process the given operator node 'n' as an operator function invocation.   616         """   617    618         op = operator_functions[n.__class__.__name__]   619         invocation = compiler.ast.CallFunc(   620             compiler.ast.Name("$op%s" % op),   621             list(n.getChildNodes())   622             )   623         return self.process_structure_node(invocation)   624    625     def process_print_node(self, n):   626    627         """   628         Process the given print node 'n' as an invocation on a stream of the   629         form...   630    631         $print(dest, args, nl)   632    633         The special function name will be translated elsewhere.   634         """   635    636         nl = isinstance(n, compiler.ast.Printnl)   637         invocation = compiler.ast.CallFunc(   638             compiler.ast.Name("$print"),   639             [n.dest or compiler.ast.Name("None"),   640              compiler.ast.List(list(n.nodes)),   641              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   642             )   643         return self.process_structure_node(invocation)   644    645     def process_slice_node(self, n, expr=None):   646    647         """   648         Process the given slice node 'n' as an operator function invocation.   649         """   650    651         if n.flags == "OP_ASSIGN": op = "setslice"   652         elif n.flags == "OP_DELETE": op = "delslice"   653         else: op = "getslice"   654    655         invocation = compiler.ast.CallFunc(   656             compiler.ast.Name("$op%s" % op),   657             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   658                 (expr and [expr] or [])   659             )   660    661         # Fix parse tree structure.   662    663         if op == "delslice":   664             invocation = compiler.ast.Discard(invocation)   665    666         return self.process_structure_node(invocation)   667    668     def process_sliceobj_node(self, n):   669    670         """   671         Process the given slice object node 'n' as a slice constructor.   672         """   673    674         op = "slice"   675         invocation = compiler.ast.CallFunc(   676             compiler.ast.Name("$op%s" % op),   677             n.nodes   678             )   679         return self.process_structure_node(invocation)   680    681     def process_subscript_node(self, n, expr=None):   682    683         """   684         Process the given subscript node 'n' as an operator function invocation.   685         """   686    687         if n.flags == "OP_ASSIGN": op = "setitem"   688         elif n.flags == "OP_DELETE": op = "delitem"   689         else: op = "getitem"   690    691         invocation = compiler.ast.CallFunc(   692             compiler.ast.Name("$op%s" % op),   693             [n.expr] + list(n.subs) + (expr and [expr] or [])   694             )   695    696         # Fix parse tree structure.   697    698         if op == "delitem":   699             invocation = compiler.ast.Discard(invocation)   700    701         return self.process_structure_node(invocation)   702    703     def process_attribute_chain(self, n):   704    705         """   706         Process the given attribute access node 'n'. Return a reference   707         describing the expression.   708         """   709    710         # AssAttr/Getattr are nested with the outermost access being the last   711         # access in any chain.   712    713         self.attrs.insert(0, n.attrname)   714         attrs = self.attrs   715    716         # Break attribute chains where non-access nodes are found.   717    718         if not self.have_access_expression(n):   719             self.reset_attribute_chain()   720    721         # Descend into the expression, extending backwards any existing chain,   722         # or building another for the expression.   723    724         name_ref = self.process_structure_node(n.expr)   725    726         # Restore chain information applying to this node.   727    728         if not self.have_access_expression(n):   729             self.restore_attribute_chain(attrs)   730    731         # Return immediately if the expression was another access and thus a   732         # continuation backwards along the chain. The above processing will   733         # have followed the chain all the way to its conclusion.   734    735         if self.have_access_expression(n):   736             del self.attrs[0]   737    738         return name_ref   739    740     # Attribute chain handling.   741    742     def reset_attribute_chain(self):   743    744         "Reset the attribute chain for a subexpression of an attribute access."   745    746         self.attrs = []   747         self.chain_assignment.append(self.in_assignment)   748         self.chain_invocation.append(self.in_invocation)   749         self.in_assignment = None   750         self.in_invocation = None   751    752     def restore_attribute_chain(self, attrs):   753    754         "Restore the attribute chain for an attribute access."   755    756         self.attrs = attrs   757         self.in_assignment = self.chain_assignment.pop()   758         self.in_invocation = self.chain_invocation.pop()   759    760     def have_access_expression(self, node):   761    762         "Return whether the expression associated with 'node' is Getattr."   763    764         return isinstance(node.expr, compiler.ast.Getattr)   765    766     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   767    768         """   769         Return the name to be used for attribute usage observations involving   770         the given 'name' in the current namespace.   771    772         If the name is being used outside a function, and if 'name_ref' is   773         given and indicates a global or if 'is_global' is specified as a true   774         value, a path featuring the name in the global namespace is returned.   775         Otherwise, a path computed using the current namespace and the given   776         name is returned.   777    778         The intention of this method is to provide a suitably-qualified name   779         that can be tracked across namespaces. Where globals are being   780         referenced in class namespaces, they should be referenced using their   781         path within the module, not using a path within each class.   782    783         It may not be possible to identify a global within a function at the   784         time of inspection (since a global may appear later in a file).   785         Consequently, globals are identified by their local name rather than   786         their module-qualified path.   787         """   788    789         # For functions, use the appropriate local names.   790    791         if self.in_function:   792             return name   793    794         # For global names outside functions, use a global name.   795    796         elif is_global or name_ref and name_ref.is_global_name():   797             return self.get_global_path(name)   798    799         # Otherwise, establish a name in the current namespace.   800    801         else:   802             return self.get_object_path(name)   803    804     def get_path_for_access(self):   805    806         "Outside functions, register accesses at the module level."   807    808         if not self.in_function:   809             return self.name   810         else:   811             return self.get_namespace_path()   812    813     def get_module_name(self, node):   814    815         """   816         Using the given From 'node' in this module, calculate any relative import   817         information, returning a tuple containing a module to import along with any   818         names to import based on the node's name information.   819    820         Where the returned module is given as None, whole module imports should   821         be performed for the returned modules using the returned names.   822         """   823    824         # Absolute import.   825    826         if node.level == 0:   827             return node.modname, node.names   828    829         # Relative to an ancestor of this module.   830    831         else:   832             path = self.name.split(".")   833             level = node.level   834    835             # Relative imports treat package roots as submodules.   836    837             if split(self.filename)[-1] == "__init__.py":   838                 level -= 1   839    840             if level > len(path):   841                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   842                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   843    844             basename = ".".join(path[:len(path)-level])   845    846         # Name imports from a module.   847    848         if node.modname:   849             return "%s.%s" % (basename, node.modname), node.names   850    851         # Relative whole module imports.   852    853         else:   854             return basename, node.names   855    856 def get_argnames(args):   857    858     """   859     Return a list of all names provided by 'args'. Since tuples may be   860     employed, the arguments are traversed depth-first.   861     """   862    863     l = []   864     for arg in args:   865         if isinstance(arg, tuple):   866             l += get_argnames(arg)   867         else:   868             l.append(arg)   869     return l   870    871 def get_names_from_nodes(nodes):   872    873     """   874     Return the names employed in the given 'nodes' along with the number of   875     nodes excluding sequences.   876     """   877    878     names = set()   879     count = 0   880    881     for node in nodes:   882    883         # Add names and count them.   884    885         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   886             names.add(node.name)   887             count += 1   888    889         # Add names from sequences and incorporate their counts.   890    891         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   892                                compiler.ast.List, compiler.ast.Set,   893                                compiler.ast.Tuple)):   894             _names, _count = get_names_from_nodes(node.nodes)   895             names.update(_names)   896             count += _count   897    898         # Count non-name, non-sequence nodes.   899    900         else:   901             count += 1   902    903     return names, count   904    905 # Result classes.   906    907 class InstructionSequence:   908    909     "A generic sequence of instructions."   910    911     def __init__(self, instructions):   912         self.instructions = instructions   913    914     def get_value_instruction(self):   915         return self.instructions[-1]   916    917     def get_init_instructions(self):   918         return self.instructions[:-1]   919    920 # Dictionary utilities.   921    922 def init_item(d, key, fn):   923    924     """   925     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   926     value where no entry already exists.   927     """   928    929     if not d.has_key(key):   930         d[key] = fn()   931     return d[key]   932    933 def dict_for_keys(d, keys):   934    935     "Return a new dictionary containing entries from 'd' for the given 'keys'."   936    937     nd = {}   938     for key in keys:   939         if d.has_key(key):   940             nd[key] = d[key]   941     return nd   942    943 def make_key(s):   944    945     "Make sequence 's' into a tuple-based key, first sorting its contents."   946    947     l = list(s)   948     l.sort()   949     return tuple(l)   950    951 def add_counter_item(d, key):   952    953     """   954     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   955     maintaining a mapping of keys to their order of insertion.   956     """   957    958     if not d.has_key(key):   959         d[key] = len(d.keys())   960     return d[key]    961    962 def remove_items(d1, d2):   963    964     "Remove from 'd1' all items from 'd2'."   965    966     for key in d2.keys():   967         if d1.has_key(key):   968             del d1[key]   969    970 # Set utilities.   971    972 def first(s):   973     return list(s)[0]   974    975 def same(s1, s2):   976     return set(s1) == set(s2)   977    978 # General input/output.   979    980 def readfile(filename):   981    982     "Return the contents of 'filename'."   983    984     f = open(filename)   985     try:   986         return f.read()   987     finally:   988         f.close()   989    990 def writefile(filename, s):   991    992     "Write to 'filename' the string 's'."   993    994     f = open(filename, "w")   995     try:   996         f.write(s)   997     finally:   998         f.close()   999   1000 # General encoding.  1001   1002 def sorted_output(x):  1003   1004     "Sort sequence 'x' and return a string with commas separating the values."  1005   1006     x = map(str, x)  1007     x.sort()  1008     return ", ".join(x)  1009   1010 def get_string_details(literals, encoding):  1011   1012     """  1013     Determine whether 'literals' represent Unicode strings or byte strings,  1014     using 'encoding' to reproduce byte sequences.  1015   1016     Each literal is the full program representation including prefix and quotes  1017     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1018     string needs to be translated back to its original encoding.  1019   1020     Return a single encoded literal value, a type name, and the original  1021     encoding as a tuple.  1022     """  1023   1024     typename = "unicode"  1025   1026     l = []  1027   1028     for s in literals:  1029         out, _typename = get_literal_details(s)  1030         if _typename == "str":  1031             typename = "str"  1032         l.append(out)  1033   1034     out = "".join(l)  1035   1036     # For Unicode values, convert to the UTF-8 program representation.  1037   1038     if typename == "unicode":  1039         return out.encode("utf-8"), typename, encoding  1040   1041     # For byte string values, convert back to the original encoding.  1042   1043     else:  1044         return out.encode(encoding), typename, encoding  1045   1046 def get_literal_details(s):  1047   1048     """  1049     Determine whether 's' represents a Unicode string or a byte string, where  1050     's' contains the full program representation of a literal including prefix  1051     and quotes, recoded by the parser to UTF-8.  1052   1053     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1054     and byte or Unicode values starting with <backslash><octal digit> or  1055     <backslash>x.  1056   1057     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1058     to be considered as Unicode values. Otherwise, they produce byte values and  1059     cause unprefixed strings to be considered as byte strings.  1060   1061     Literals prefixed with "r" do not have their backslash-encoded values  1062     converted unless also prefixed with "u", in which case only the above value  1063     formats are converted, not any of the other special sequences for things  1064     like newlines.  1065   1066     Return the literal value as a Unicode object together with the appropriate  1067     type name in a tuple.  1068     """  1069   1070     l = []  1071   1072     # Identify the quote character and use it to identify the prefix.  1073   1074     quote_type = s[-1]  1075     prefix_end = s.find(quote_type)  1076     prefix = s[:prefix_end].lower()  1077   1078     if prefix not in ("", "b", "br", "r", "u", "ur"):  1079         raise ValueError, "String literal does not have a supported prefix: %s" % s  1080   1081     if "b" in prefix:  1082         typename = "str"  1083     else:  1084         typename = "unicode"  1085   1086     # Identify triple quotes or single quotes.  1087   1088     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1089         quote = s[prefix_end:prefix_end+3]  1090         current = prefix_end + 3  1091         end = len(s) - 3  1092     else:  1093         quote = s[prefix_end]  1094         current = prefix_end + 1  1095         end = len(s) - 1  1096   1097     # Conversions of some quoted values.  1098   1099     searches = {  1100         "u" : (6, 16),  1101         "U" : (10, 16),  1102         "x" : (4, 16),  1103         }  1104   1105     octal_digits = map(str, range(0, 8))  1106   1107     # Translations of some quoted values.  1108   1109     escaped = {  1110         "\\" : "\\", "'" : "'", '"' : '"',  1111         "a" : "\a", "b" : "\b", "f" : "\f",  1112         "n" : "\n", "r" : "\r", "t" : "\t",  1113         }  1114   1115     while current < end:  1116   1117         # Look for quoted values.  1118   1119         index = s.find("\\", current)  1120         if index == -1 or index + 1 == end:  1121             l.append(s[current:end])  1122             break  1123   1124         # Add the preceding text.  1125   1126         l.append(s[current:index])  1127   1128         # Handle quoted text.  1129   1130         term = s[index+1]  1131   1132         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1133         # produce Unicode values.  1134   1135         if typename == "unicode" and (  1136             term in ("u", "U") or   1137             "u" in prefix and (term == "x" or term in octal_digits)):  1138   1139             needed, base = searches.get(term, (4, 8))  1140             value = convert_quoted_value(s, index, needed, end, base, unichr)  1141             l.append(value)  1142             current = index + needed  1143   1144         # Add raw byte values, changing the string type.  1145   1146         elif "r" not in prefix and (  1147              term == "x" or term in octal_digits):  1148   1149             needed, base = searches.get(term, (4, 8))  1150             value = convert_quoted_value(s, index, needed, end, base, chr)  1151             l.append(value)  1152             typename = "str"  1153             current = index + needed  1154   1155         # Add other escaped values.  1156   1157         elif "r" not in prefix and escaped.has_key(term):  1158             l.append(escaped[term])  1159             current = index + 2  1160   1161         # Add other text as found.  1162   1163         else:  1164             l.append(s[index:index+2])  1165             current = index + 2  1166   1167     # Collect the components into a single Unicode object. Since the literal  1168     # text was already in UTF-8 form, interpret plain strings as UTF-8  1169     # sequences.  1170   1171     out = []  1172   1173     for value in l:  1174         if isinstance(value, unicode):  1175             out.append(value)  1176         else:  1177             out.append(unicode(value, "utf-8"))  1178   1179     return "".join(out), typename  1180   1181 def convert_quoted_value(s, index, needed, end, base, fn):  1182   1183     """  1184     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1185     positions, and with the given 'end' indicating the first position after the  1186     end of the actual string content.  1187   1188     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1189     to convert the value to an appropriate type.  1190     """  1191   1192     s = s[index:min(index+needed, end)]  1193   1194     # Not a complete occurrence.  1195   1196     if len(s) < needed:  1197         return s  1198   1199     # Test for a well-formed value.  1200   1201     try:  1202         first = base == 8 and 1 or 2  1203         value = int(s[first:needed], base)  1204     except ValueError:  1205         return s  1206     else:  1207         return fn(value)  1208   1209 # Attribute chain decoding.  1210   1211 def get_attrnames(attrnames):  1212   1213     """  1214     Split the qualified attribute chain 'attrnames' into its components,  1215     handling special attributes starting with "#" that indicate type  1216     conformance.  1217     """  1218   1219     if attrnames.startswith("#"):  1220         return [attrnames]  1221     else:  1222         return attrnames.split(".")  1223   1224 def get_attrname_from_location(location):  1225   1226     """  1227     Extract the first attribute from the attribute names employed in a  1228     'location'.  1229     """  1230   1231     path, name, attrnames, access = location  1232     if not attrnames:  1233         return attrnames  1234     return get_attrnames(attrnames)[0]  1235   1236 def get_name_path(path, name):  1237   1238     "Return a suitable qualified name from the given 'path' and 'name'."  1239   1240     if "." in name:  1241         return name  1242     else:  1243         return "%s.%s" % (path, name)  1244   1245 # Usage-related functions.  1246   1247 def get_types_for_usage(attrnames, objects):  1248   1249     """  1250     Identify the types that can support the given 'attrnames', using the  1251     given 'objects' as the catalogue of type details.  1252     """  1253   1254     types = []  1255     for name, _attrnames in objects.items():  1256         if set(attrnames).issubset(_attrnames):  1257             types.append(name)  1258     return types  1259   1260 def get_invoked_attributes(usage):  1261   1262     "Obtain invoked attribute from the given 'usage'."  1263   1264     invoked = []  1265     if usage:  1266         for attrname, invocation, assignment in usage:  1267             if invocation:  1268                 invoked.append(attrname)  1269     return invoked  1270   1271 def get_assigned_attributes(usage):  1272   1273     "Obtain assigned attribute from the given 'usage'."  1274   1275     assigned = []  1276     if usage:  1277         for attrname, invocation, assignment in usage:  1278             if assignment:  1279                 assigned.append(attrname)  1280     return assigned  1281   1282 # Type and module functions.  1283 # NOTE: This makes assumptions about the __builtins__ structure.  1284   1285 def get_builtin_module(name):  1286   1287     "Return the module name containing the given type 'name'."  1288   1289     if name == "string":  1290         modname = "str"  1291     elif name == "utf8string":  1292         modname = "unicode"  1293     elif name == "NoneType":  1294         modname = "none"  1295     else:  1296         modname = name  1297   1298     return "__builtins__.%s" % modname  1299   1300 def get_builtin_type(name):  1301   1302     "Return the type name provided by the given Python value 'name'."  1303   1304     if name == "str":  1305         return "string"  1306     elif name == "unicode":  1307         return "utf8string"  1308     else:  1309         return name  1310   1311 def get_builtin_class(name):  1312   1313     "Return the full name of the built-in class having the given 'name'."  1314   1315     typename = get_builtin_type(name)  1316     module = get_builtin_module(typename)  1317     return "%s.%s" % (module, typename)  1318   1319 # Useful data.  1320   1321 predefined_constants = "False", "None", "NotImplemented", "True"  1322   1323 operator_functions = {  1324   1325     # Fundamental operations.  1326   1327     "is" : "is_",  1328     "is not" : "is_not",  1329   1330     # Binary operations.  1331   1332     "in" : "in_",  1333     "not in" : "not_in",  1334     "Add" : "add",  1335     "Bitand" : "and_",  1336     "Bitor" : "or_",  1337     "Bitxor" : "xor",  1338     "Div" : "div",  1339     "FloorDiv" : "floordiv",  1340     "LeftShift" : "lshift",  1341     "Mod" : "mod",  1342     "Mul" : "mul",  1343     "Power" : "pow",  1344     "RightShift" : "rshift",  1345     "Sub" : "sub",  1346   1347     # Unary operations.  1348   1349     "Invert" : "invert",  1350     "UnaryAdd" : "pos",  1351     "UnarySub" : "neg",  1352   1353     # Augmented assignment.  1354   1355     "+=" : "iadd",  1356     "-=" : "isub",  1357     "*=" : "imul",  1358     "/=" : "idiv",  1359     "//=" : "ifloordiv",  1360     "%=" : "imod",  1361     "**=" : "ipow",  1362     "<<=" : "ilshift",  1363     ">>=" : "irshift",  1364     "&=" : "iand",  1365     "^=" : "ixor",  1366     "|=" : "ior",  1367   1368     # Comparisons.  1369   1370     "==" : "eq",  1371     "!=" : "ne",  1372     "<" : "lt",  1373     "<=" : "le",  1374     ">=" : "ge",  1375     ">" : "gt",  1376     }  1377   1378 # vim: tabstop=4 expandtab shiftwidth=4