Lichen

branching.py

230:c21393bbe002
2016-11-24 Paul Boddie Added get_using support. Also added various representation methods.
     1 #!/usr/bin/env python     2      3 """     4 Track attribute usage for names.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from common import dict_for_keys, init_item    24     25 class Branch:    26     27     """    28     A control-flow branch capturing local attribute usage for names.    29     Branches typically begin with assignments or function parameters and are    30     connected to others introduced by conditional and loop nodes.    31     32     Branches hosting accesses, and thus providing usage information, are    33     contributors to preceding branches.    34     35     Branches also provide a route from accesses back to assignments which are    36     the ultimate suppliers of the names involved.    37     """    38     39     def __init__(self, names, assigning=False, values=None):    40     41         """    42         Capture attribute usage for the given 'names', with the accompanying    43         'values' indicating assigned values for each name, if indicated.    44         """    45     46         self.contributors = set()    47         self.suppliers = {}    48         self.assignments = set(assigning and names or [])    49         self.usage = {}    50         self.values = {}    51     52         # Initialise usage for each name.    53     54         for name in names:    55             self.usage[name] = set()    56     57         # Initialise assigned values if any were provided.    58     59         if values:    60             for name, value in zip(names, values):    61                 if value:    62                     self.values[name] = value    63     64         # Computed results.    65     66         self.combined_usage = None    67     68     def get_assignment_sources(self, name):    69     70         """    71         Return the sources of 'name' from this branch's assignment information,    72         returning a list containing only this branch itself if it is the source.    73         """    74     75         if name in self.assignments:    76             return [self]    77         else:    78             return [b for b in self.get_all_suppliers(name) if name in b.assignments]    79     80     def set_usage(self, name, attrname, invocation=False, assignment=False):    81     82         """    83         Record usage on the given 'name' of the attribute 'attrname', noting the    84         invocation of the attribute if 'invocation' is set to a true value, or    85         noting the assignment of the attribute if 'assignment' is set to a true    86         value.    87         """    88     89         if self.usage.has_key(name):    90             self.usage[name].add((attrname, invocation, assignment))    91     92     def get_usage(self):    93     94         """    95         Obtain usage from this node, combined with usage observed by its    96         contributors. Unlike the local usage which involves only a single set of    97         attribute names for a given variable name, the returned usage is a set    98         of attribute name combinations for a given variable name. For example:    99    100         {'a': set([('p', 'q', 'r'), ('p', 'r')])}   101         """   102    103         if self.combined_usage is None:   104    105             # Accumulate usage observations from contributors.   106    107             all_usage = []   108    109             for contributor in self.contributors:   110    111                 # Record any usage that can be returned.   112    113                 all_usage.append(contributor.get_usage())   114    115             # Merge usage from the contributors.   116    117             merged_usage = merge_dicts(all_usage)   118    119             # Make the local usage compatible with the combined usage.   120    121             usage = deepen_dict(self.usage)   122    123             self.combined_usage = combine_dicts(usage, merged_usage, combine_sets)   124    125         return self.combined_usage   126    127     def get_all_suppliers(self, name, all_suppliers=None):   128    129         "Return all branches supplying this branch with definitions of 'name'."   130    131         all_suppliers = all_suppliers or set()   132         all_suppliers.add(self)   133    134         if self.suppliers.has_key(name):   135             for supplier in self.suppliers[name]:   136                 if supplier not in all_suppliers:   137                     supplier.get_all_suppliers(name, all_suppliers)   138    139         return all_suppliers   140    141     def __repr__(self):   142         return "Branch(%r, %r)" % (self.usage.keys(),   143             self.assignments and True or False)   144    145 class BranchTracker:   146    147     """   148     A tracker of attribute usage for names in a namespace. This tracker directs   149     usage observations to branches which are the ultimate repositories of   150     attribute usage information.   151    152     As a program unit is inspected, the branches associated with names may   153     change. Assignments reset the branches; control-flow operations cause   154     branches to be accumulated from different code paths.   155     """   156    157     def __init__(self):   158    159         # Track assignments.   160    161         self.assignments = {}   162    163         # Details of attributes at each active branch level.   164    165         self.attribute_branches = [{}]          # stack of branches for names   166         self.attribute_branch_shelves = []      # stack of shelved branches   167    168         # Suspended branch details plus loop details.   169    170         self.suspended_broken_branches = []     # stack of lists of dicts   171         self.suspended_continuing_branches = [] # stack of lists of dicts   172    173         # Abandoned usage, useful for reviving usage for exception handlers.   174    175         self.abandoned_branches = [[]]          # stack of lists of branches   176    177         # Returning branches are like abandoned branches but are only revived in   178         # finally clauses.   179    180         self.returning_branches = [[]]   181    182         # Branches active when starting loops.   183    184         self.loop_branches = []   185    186     # Structure assembly methods.   187    188     def new_branchpoint(self, loop_node=False):   189    190         """   191         Indicate that branches diverge, initialising resources dependent on   192         any given 'loop_node'.   193         """   194    195         self.attribute_branch_shelves.append([])   196    197         if loop_node:   198             self.suspended_broken_branches.append([])   199             self.suspended_continuing_branches.append([])   200    201         # Retain a record of abandoned branches.   202    203         self.abandoned_branches.append([])   204         self.returning_branches.append([])   205    206     def new_branch(self, loop_node=False):   207    208         "Create a new branch."   209    210         attribute_branches = self.attribute_branches[-1]   211    212         branch, new_branches = self._new_branch(attribute_branches)   213    214         if branch and loop_node:   215             self.loop_branches.append(branch)   216    217         # Start using the branch for known names.   218    219         self.attribute_branches.append(new_branches)   220    221     def _new_branch(self, attribute_branches):   222    223         """   224         Define a new branch that will record attribute usage on known names from   225         'attribute_branches'.   226         """   227    228         # Detect abandoned branches.   229    230         if isinstance(attribute_branches, AbandonedDict):   231             return None, AbandonedDict()   232    233         # Otherwise, define a new branch.   234    235         names = attribute_branches.keys()   236    237         new_branches = {}   238         branch = Branch(names)   239    240         for name in names:   241             new_branches[name] = [branch]   242    243         # Add this new branch as a contributor to the previously active   244         # branches.   245    246         self._connect_branches(attribute_branches, branch)   247    248         return branch, new_branches   249    250     def shelve_branch(self, loop_node=False):   251    252         "Retain the current branch for later merging."   253    254         branches = self.attribute_branches.pop()   255         self.attribute_branch_shelves[-1].append(branches)   256    257         # Connect any loop branch to the active branches as contributors.   258    259         if loop_node:   260             branch = self.loop_branches.pop()   261             self._connect_branches(branches, branch, loop_node)   262    263     def abandon_branch(self):   264    265         "Abandon the current branch, retaining it for later."   266    267         attribute_branches = self.attribute_branches[-1]   268         self._abandon_branch()   269         self.abandoned_branches[-1].append(attribute_branches)   270    271     def abandon_returning_branch(self):   272    273         "Abandon the current branch, retaining it for later."   274    275         attribute_branches = self.attribute_branches[-1]   276         self._abandon_branch()   277         self.returning_branches[-1].append(attribute_branches)   278    279     def suspend_broken_branch(self):   280    281         "Suspend a branch for breaking out of a loop."   282    283         attribute_branches = self.attribute_branches[-1]   284    285         branches = self.suspended_broken_branches[-1]   286         branches.append(attribute_branches)   287         self._abandon_branch()   288    289     def suspend_continuing_branch(self):   290    291         "Suspend a branch for loop continuation."   292    293         attribute_branches = self.attribute_branches[-1]   294    295         branches = self.suspended_continuing_branches[-1]   296         branches.append(attribute_branches)   297         self._abandon_branch()   298    299     def _abandon_branch(self):   300    301         "Abandon the current branch."   302    303         self.attribute_branches[-1] = AbandonedDict()   304    305     def resume_abandoned_branches(self):   306    307         """   308         Resume branches previously abandoned.   309    310         Abandoned branches are not reset because they may not be handled by   311         exception handlers after all.   312         """   313    314         current_branches = self.attribute_branches[-1]   315         abandoned_branches = self.abandoned_branches[-1]   316         merged_branches = merge_dicts(abandoned_branches + [current_branches])   317    318         # Replace the combined branches with a new branch applying to all active   319         # names, connected to the supplying branches.   320    321         branch, new_branches = self._new_branch(merged_branches)   322         self.attribute_branches.append(new_branches)   323    324         # Although returning branches should not be considered as accumulating   325         # usage, they do provide sources of assignments.   326    327         if branch:   328             for returning_branches in self.returning_branches[-1]:   329                 self._connect_suppliers(returning_branches, branch)   330    331     def resume_all_abandoned_branches(self):   332    333         """   334         Resume branches previously abandoned including returning branches.   335    336         Abandoned branches are not reset because they may not be handled by   337         exception handlers after all.   338         """   339    340         current_branches = self.attribute_branches[-1]   341         abandoned_branches = self.abandoned_branches[-1]   342         returning_branches = self.returning_branches[-1]   343         merged_branches = merge_dicts(abandoned_branches + returning_branches + [current_branches])   344         self.replace_branches(merged_branches)   345    346         # Return the previously-active branches for later restoration.   347    348         return current_branches   349    350     def resume_broken_branches(self):   351    352         "Resume branches previously suspended for breaking out of a loop."   353    354         suspended_branches = self.suspended_broken_branches.pop()   355         current_branches = self.attribute_branches[-1]   356    357         # Merge suspended branches with the current branch.   358    359         merged_branches = merge_dicts(suspended_branches + [current_branches])   360         self.replace_branches(merged_branches)   361    362     def resume_continuing_branches(self):   363    364         "Resume branches previously suspended for loop continuation."   365    366         suspended_branches = self.suspended_continuing_branches.pop()   367         current_branches = self.attribute_branches[-1]   368    369         # Merge suspended branches with the current branch.   370    371         merged_branches = merge_dicts(suspended_branches + [current_branches])   372         self.replace_branches(merged_branches)   373    374     def replace_branches(self, merged_branches):   375    376         """   377         Replace the 'merged_branches' with a new branch applying to all active   378         names, connected to the supplying branches.   379         """   380    381         branch, new_branches = self._new_branch(merged_branches)   382         self.attribute_branches[-1] = new_branches   383    384     def restore_active_branches(self, branches):   385    386         "Restore the active 'branches'."   387    388         self.attribute_branches[-1] = branches   389    390     def merge_branches(self):   391    392         "Merge branches."   393    394         # Combine the attribute branches. This ensures that a list of branches   395         # affected by attribute usage is maintained for the current branch.   396    397         all_shelved_branches = self.attribute_branch_shelves.pop()   398         merged_branches = merge_dicts(all_shelved_branches, missing=make_missing)   399         self.replace_branches(merged_branches)   400    401         # Abandoned branches are retained for exception handling purposes.   402    403         all_abandoned_branches = self.abandoned_branches.pop()   404         new_abandoned_branches = merge_dicts(all_abandoned_branches)   405         self.abandoned_branches[-1].append(new_abandoned_branches)   406    407         # Returning branches are retained for finally clauses.   408    409         all_returning_branches = self.returning_branches.pop()   410         new_returning_branches = merge_dicts(all_returning_branches)   411         self.returning_branches[-1].append(new_returning_branches)   412    413     # Internal structure assembly methods.   414    415     def _connect_branches(self, attribute_branches, contributor, loop_node=False):   416    417         """   418         Given the 'attribute_branches' mapping, connect the branches referenced   419         in the mapping to the given 'contributor' branch. If 'loop_node' is   420         set to a true value, connect only the branches so that the 'contributor'   421         references the nodes supplying it with name information.   422         """   423    424         all_branches = self._connect_suppliers(attribute_branches, contributor)   425         if not loop_node:   426             self._connect_contributor(contributor, all_branches)   427    428     def _connect_suppliers(self, attribute_branches, contributor):   429    430         "Connect the 'attribute_branches' to the given 'contributor'."   431    432         # Gather branches involved with all known names into a single set.   433    434         all_branches = set()   435    436         for name, branches in attribute_branches.items():   437             all_branches.update(branches)   438    439             # Also note receiving branches on the contributor.   440    441             for branch in branches:   442                 init_item(contributor.suppliers, name, set)   443                 contributor.suppliers[name].add(branch)   444    445         return all_branches   446    447     def _connect_contributor(self, contributor, branches):   448    449         "Connect the given 'contributor' branch to the given 'branches'."   450    451         for branch in branches:   452             branch.contributors.add(contributor)   453    454     # Attribute usage methods.   455    456     def tracking_name(self, name):   457    458         """   459         Return whether 'name' is being tracked, returning all branches doing so   460         if it is.   461         """   462    463         return self.assignments.has_key(name) and self.have_name(name)   464    465     def have_name(self, name):   466    467         "Return whether 'name' is known."   468    469         return self.attribute_branches[-1].get(name)   470    471     def assign_names(self, names, values=None):   472    473         """   474         Define the start of usage tracking for the given 'names', each being   475         assigned with the corresponding 'values' if indicated.   476         """   477    478         branches = self.attribute_branches[-1]   479         branch = Branch(names, True, values)   480    481         for name in names:   482             branches[name] = [branch]   483             init_item(self.assignments, name, list)   484             self.assignments[name].append(branch)   485    486         return branch   487    488     def use_attribute(self, name, attrname, invocation=False, assignment=False):   489    490         """   491         Indicate the use on the given 'name' of an attribute with the given   492         'attrname', optionally involving an invocation of the attribute if   493         'invocation' is set to a true value, or involving an assignment of the   494         attribute if 'assignment' is set to a true value.   495    496         Return all branches that support 'name'.   497         """   498    499         branches = self.attribute_branches[-1]   500    501         # Add the usage to all current branches.   502    503         if branches.has_key(name):   504             for branch in branches[name]:   505                 branch.set_usage(name, attrname, invocation, assignment)   506             return branches[name]   507         else:   508             return None   509    510     # Query methods.   511    512     def get_assignment_positions_for_branches(self, name, branches, missing=True):   513    514         """   515         Return the positions of assignments involving the given 'name' affected   516         by the given 'branches'. If 'missing' is set to a false value, branches   517         with missing name details will be excluded instead of contributing the   518         value None to the list of positions.   519         """   520    521         if not branches:   522             return [None]   523    524         positions = set()   525         assignments = self.assignments[name]   526    527         for assignment in self.get_assignments_for_branches(name, branches):   528    529             # Use None to indicate a branch without assignment information.   530    531             if missing and isinstance(assignment, MissingBranch):   532                 positions.add(None)   533             else:   534                 pos = assignments.index(assignment)   535                 positions.add(pos)   536    537         positions = list(positions)   538         positions.sort()   539         return positions   540    541     def get_assignments_for_branches(self, name, branches, missing=True):   542    543         """   544         Return the origins of assignments involving the given 'name' affected   545         by the given 'branches'. The origins are a list of branches where names   546         are defined using assignments. If 'missing' is set to a false value,   547         branches with missing name details are excluded.   548         """   549    550         all_branches = []   551         assignments = self.assignments[name]   552    553         # Obtain the assignments recorded for each branch.   554    555         for branch in branches:   556    557             # Find the branch representing the definition of some names in the   558             # scope's assignments, making sure that the given name is involved.   559    560             for assignment in branch.get_assignment_sources(name):   561    562                 # Capture branches without assignment information as well as   563                 # genuine assignment branches.   564    565                 if assignment in assignments or missing and isinstance(assignment, MissingBranch):   566                     all_branches.append(assignment)   567    568         return all_branches   569    570     def get_all_usage(self):   571    572         """   573         Convert usage observations from the tracker to a simple mapping of   574         names to sets of attribute names.   575         """   576    577         d = {}   578         for name, branches in self.assignments.items():   579             d[name] = self.get_usage_from_branches_for_name(branches, name)   580         return d   581    582     def get_usage_from_branches_for_name(self, branches, name):   583    584         """   585         Convert usage observations from the 'branches' to a simple list of   586         usage sets for the given 'name'.   587         """   588    589         l = []   590         for branch in branches:   591             l.append(branch.get_usage()[name])   592         return l   593    594     def get_all_values(self):   595    596         "Return a mapping from names to lists of assigned values."   597    598         d = {}   599         for name, branches in self.assignments.items():   600             d[name] = [branch.values.get(name) for branch in branches]   601         return d   602    603 # Special objects.   604    605 class AbandonedDict(dict):   606    607     "A dictionary representing mappings in an abandoned branch."   608    609     def __repr__(self):   610         return "AbandonedDict()"   611    612 class MissingBranch(Branch):   613    614     "A branch introduced during dictionary merging."   615    616     def __repr__(self):   617         return "MissingBranch(%r, %r)" % (self.usage.keys(),   618             self.assignments and True or False)   619    620 def make_missing(name):   621    622     "Make a special branch indicating missing name information."   623    624     return set([MissingBranch([name], True)])   625    626 # Dictionary utilities.   627    628 def merge_dicts(dicts, ignored=AbandonedDict, missing=None):   629    630     """   631     Merge the given 'dicts' mapping keys to sets of values.   632    633     Where 'ignored' is specified, any dictionary of the given type is ignored.   634     Where all dictionaries to be merged are of the given type, an instance of   635     the type is returned as the merged dictionary.   636    637     Where 'missing' is specified, it provides a callable that produces a set of   638     suitable values for a given name.   639     """   640    641     new_dict = {}   642     all_names = set()   643    644     # Determine all known names.   645    646     for old_dict in dicts:   647         all_names.update(old_dict.keys())   648    649     # Merge the dictionaries, looking for all known names in each one.   650    651     have_dicts = False   652    653     for old_dict in dicts:   654    655         # Abandoned dictionaries should not contribute information.   656    657         if isinstance(old_dict, ignored):   658             continue   659         else:   660             have_dicts = True   661    662         for name in all_names:   663    664             # Find branches providing each name.   665    666             if old_dict.has_key(name):   667                 values = old_dict[name]   668    669             # Branches not providing names may indicate usage before assignment.   670    671             elif missing:   672                 values = missing(name)   673             else:   674                 continue   675    676             # Initialise mappings in the resulting dictionary.   677    678             if not new_dict.has_key(name):   679                 new_dict[name] = set(values)   680             else:   681                 new_dict[name].update(values)   682    683     # Where no dictionaries contributed, all branches were abandoned.   684    685     if have_dicts:   686         return new_dict   687     else:   688         return ignored()   689    690 def deepen_dict(d):   691    692     """   693     Return a version of dictionary 'd' with its values converted to sets   694     containing each original value as a single element in each new value.   695     Original values are assumed to be sequences. Thus...   696    697     {"self" : ("x", "y")}   698    699     ...would become...   700    701     {"self" : set([("x", "y")])}   702    703     ...allowing other such values to be added to the set alongside the original   704     value.   705     """   706    707     l = []   708    709     for key, value in d.items():   710    711         # Sort the attribute name details for stable comparisons.   712    713         value = list(value)   714         value.sort()   715         l.append((key, set([tuple(value)])))   716    717     return dict(l)   718    719 def combine_sets(s1, s2):   720    721     "Combine elements from sets 's1' and 's2'."   722    723     if not s1:   724         return s2   725     elif not s2:   726         return s1   727    728     s = set()   729    730     for i1 in s1:   731         for i2 in s2:   732    733             # Sort the attribute name details for stable comparisons.   734    735             l = list(set(i1 + i2))   736             l.sort()   737             s.add(tuple(l))   738    739     return s   740    741 def combine_dicts(d1, d2, combine=combine_sets):   742    743     """   744     Combine dictionaries 'd1' and 'd2' such that the values for common keys   745     are themselves combined in the result.   746     """   747    748     d = {}   749    750     for key in d1.keys():   751         if d2.has_key(key):   752             d[key] = combine(d1[key], d2[key])   753         else:   754             d[key] = d1[key]   755    756     return d   757    758 # vim: tabstop=4 expandtab shiftwidth=4