1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 assignments = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 assignments.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Assign the items to the target nodes. 455 456 for i, node in enumerate(n.nodes): 457 assignments.append( 458 compiler.ast.Assign([node], compiler.ast.Subscript( 459 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 460 ) 461 462 return self.process_structure_node(compiler.ast.Stmt(assignments)) 463 464 def process_literal_sequence_items(self, n, name_ref): 465 466 """ 467 Process the given assignment node 'n', obtaining from the given 468 'name_ref' the items to be assigned to the assignment targets. 469 470 Return whether this method was able to process the assignment node as 471 a sequence of direct assignments. 472 """ 473 474 if len(n.nodes) == len(name_ref.items): 475 assigned_names, count = get_names_from_nodes(n.nodes) 476 accessed_names, _count = get_names_from_nodes(name_ref.items) 477 478 # Only assign directly between items if all assigned names are 479 # plain names (not attribute assignments), and if the assigned names 480 # do not appear in the accessed names. 481 482 if len(assigned_names) == count and \ 483 not assigned_names.intersection(accessed_names): 484 485 for node, item in zip(n.nodes, name_ref.items): 486 self.process_assignment_node(node, item) 487 488 return True 489 490 # Otherwise, use the position-based mechanism to obtain values. 491 492 else: 493 return False 494 else: 495 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 496 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 497 498 def process_compare_node(self, n): 499 500 """ 501 Process the given comparison node 'n', converting an operator sequence 502 from... 503 504 <expr1> <op1> <expr2> <op2> <expr3> 505 506 ...to... 507 508 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 509 """ 510 511 invocations = [] 512 last = n.expr 513 514 for op, op_node in n.ops: 515 op = operator_functions.get(op) 516 517 invocations.append(compiler.ast.CallFunc( 518 compiler.ast.Name("$op%s" % op), 519 [last, op_node])) 520 521 last = op_node 522 523 if len(invocations) > 1: 524 result = compiler.ast.And(invocations) 525 else: 526 result = invocations[0] 527 528 return self.process_structure_node(result) 529 530 def process_dict_node(self, node): 531 532 """ 533 Process the given dictionary 'node', returning a list of (key, value) 534 tuples. 535 """ 536 537 l = [] 538 for key, value in node.items: 539 l.append(( 540 self.process_structure_node(key), 541 self.process_structure_node(value))) 542 return l 543 544 def process_for_node(self, n): 545 546 """ 547 Generate attribute accesses for {n.list}.__iter__ and the next method on 548 the iterator, producing a replacement node for the original. 549 """ 550 551 t0 = self.get_temporary_name() 552 self.next_temporary() 553 t1 = self.get_temporary_name() 554 self.next_temporary() 555 t2 = self.get_temporary_name() 556 self.next_temporary() 557 558 node = compiler.ast.Stmt([ 559 560 # <t0> = {n.list} 561 # <t1> = <t0>.__iter__() 562 563 compiler.ast.Assign( 564 [compiler.ast.AssName(t0, "OP_ASSIGN")], 565 n.list), 566 567 compiler.ast.Assign( 568 [compiler.ast.AssName(t1, "OP_ASSIGN")], 569 compiler.ast.CallFunc( 570 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 571 [])), 572 573 # <t2> = <t1>.next 574 # try: 575 # while True: 576 # <var>... = <t2>() 577 # ... 578 # except StopIteration: 579 # pass 580 581 compiler.ast.Assign( 582 [compiler.ast.AssName(t2, "OP_ASSIGN")], 583 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 584 585 compiler.ast.TryExcept( 586 compiler.ast.While( 587 compiler.ast.Name("True"), 588 compiler.ast.Stmt([ 589 compiler.ast.Assign( 590 [n.assign], 591 compiler.ast.CallFunc( 592 compiler.ast.Name(t2), 593 [] 594 )), 595 n.body]), 596 None), 597 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 598 None) 599 ]) 600 601 self.process_structure_node(node) 602 603 def process_literal_sequence_node(self, n, name, ref, cls): 604 605 """ 606 Process the given literal sequence node 'n' as a function invocation, 607 with 'name' indicating the type of the sequence, and 'ref' being a 608 reference to the type. The 'cls' is used to instantiate a suitable name 609 reference. 610 """ 611 612 if name == "dict": 613 items = [] 614 for key, value in n.items: 615 items.append(compiler.ast.Tuple([key, value])) 616 else: # name in ("list", "tuple"): 617 items = n.nodes 618 619 return self.get_literal_reference(name, ref, items, cls) 620 621 def process_operator_node(self, n): 622 623 """ 624 Process the given operator node 'n' as an operator function invocation. 625 """ 626 627 opname = n.__class__.__name__ 628 operands = n.getChildNodes() 629 630 # Convert a unary operation to an invocation. 631 632 op = unary_operator_functions.get(opname) 633 634 if op: 635 invocation = compiler.ast.CallFunc( 636 compiler.ast.Name("$op%s" % op), 637 [operands[0]] 638 ) 639 640 # Convert a single operator with a list of operands to a combination of 641 # pairwise operations. 642 643 else: 644 op = operator_functions[opname] 645 invocation = self._process_operator_node(op, operands) 646 647 return self.process_structure_node(invocation) 648 649 def _process_operator_node(self, op, operands): 650 651 """ 652 Process the given 'op', being an operator function, together with the 653 supplied 'operands', returning either a single remaining operand or an 654 invocation combining the operands. 655 """ 656 657 remaining = operands[1:] 658 if not remaining: 659 return operands[0] 660 661 return compiler.ast.CallFunc( 662 compiler.ast.Name("$op%s" % op), 663 [operands[0], self._process_operator_node(op, remaining)] 664 ) 665 666 def process_print_node(self, n): 667 668 """ 669 Process the given print node 'n' as an invocation on a stream of the 670 form... 671 672 $print(dest, args, nl) 673 674 The special function name will be translated elsewhere. 675 """ 676 677 nl = isinstance(n, compiler.ast.Printnl) 678 invocation = compiler.ast.CallFunc( 679 compiler.ast.Name("$print"), 680 [n.dest or compiler.ast.Name("None"), 681 compiler.ast.List(list(n.nodes)), 682 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 683 ) 684 return self.process_structure_node(invocation) 685 686 def process_slice_node(self, n, expr=None): 687 688 """ 689 Process the given slice node 'n' as a method invocation. 690 """ 691 692 if n.flags == "OP_ASSIGN": op = "__setslice__" 693 elif n.flags == "OP_DELETE": op = "__delslice__" 694 else: op = "__getslice__" 695 696 invocation = compiler.ast.CallFunc( 697 compiler.ast.Getattr(n.expr, op), 698 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 699 (expr and [expr] or []) 700 ) 701 702 # Fix parse tree structure. 703 704 if op == "__delslice__": 705 invocation = compiler.ast.Discard(invocation) 706 707 return self.process_structure_node(invocation) 708 709 def process_sliceobj_node(self, n): 710 711 """ 712 Process the given slice object node 'n' as a slice constructor. 713 """ 714 715 op = "slice" 716 invocation = compiler.ast.CallFunc( 717 compiler.ast.Name("$op%s" % op), 718 n.nodes 719 ) 720 return self.process_structure_node(invocation) 721 722 def process_subscript_node(self, n, expr=None): 723 724 """ 725 Process the given subscript node 'n' as a method invocation. 726 """ 727 728 if n.flags == "OP_ASSIGN": op = "__setitem__" 729 elif n.flags == "OP_DELETE": op = "__delitem__" 730 else: op = "__getitem__" 731 732 invocation = compiler.ast.CallFunc( 733 compiler.ast.Getattr(n.expr, op), 734 list(n.subs) + (expr and [expr] or []) 735 ) 736 737 # Fix parse tree structure. 738 739 if op == "__delitem__": 740 invocation = compiler.ast.Discard(invocation) 741 742 return self.process_structure_node(invocation) 743 744 def process_attribute_chain(self, n): 745 746 """ 747 Process the given attribute access node 'n'. Return a reference 748 describing the expression. 749 """ 750 751 # AssAttr/Getattr are nested with the outermost access being the last 752 # access in any chain. 753 754 self.attrs.insert(0, n.attrname) 755 attrs = self.attrs 756 757 # Break attribute chains where non-access nodes are found. 758 759 if not self.have_access_expression(n): 760 self.reset_attribute_chain() 761 762 # Descend into the expression, extending backwards any existing chain, 763 # or building another for the expression. 764 765 name_ref = self.process_structure_node(n.expr) 766 767 # Restore chain information applying to this node. 768 769 if not self.have_access_expression(n): 770 self.restore_attribute_chain(attrs) 771 772 # Return immediately if the expression was another access and thus a 773 # continuation backwards along the chain. The above processing will 774 # have followed the chain all the way to its conclusion. 775 776 if self.have_access_expression(n): 777 del self.attrs[0] 778 779 return name_ref 780 781 # Attribute chain handling. 782 783 def reset_attribute_chain(self): 784 785 "Reset the attribute chain for a subexpression of an attribute access." 786 787 self.attrs = [] 788 self.chain_assignment.append(self.in_assignment) 789 self.chain_invocation.append(self.in_invocation) 790 self.in_assignment = None 791 self.in_invocation = None 792 793 def restore_attribute_chain(self, attrs): 794 795 "Restore the attribute chain for an attribute access." 796 797 self.attrs = attrs 798 self.in_assignment = self.chain_assignment.pop() 799 self.in_invocation = self.chain_invocation.pop() 800 801 def have_access_expression(self, node): 802 803 "Return whether the expression associated with 'node' is Getattr." 804 805 return isinstance(node.expr, compiler.ast.Getattr) 806 807 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 808 809 """ 810 Return the name to be used for attribute usage observations involving 811 the given 'name' in the current namespace. 812 813 If the name is being used outside a function, and if 'name_ref' is 814 given and indicates a global or if 'is_global' is specified as a true 815 value, a path featuring the name in the global namespace is returned. 816 Otherwise, a path computed using the current namespace and the given 817 name is returned. 818 819 The intention of this method is to provide a suitably-qualified name 820 that can be tracked across namespaces. Where globals are being 821 referenced in class namespaces, they should be referenced using their 822 path within the module, not using a path within each class. 823 824 It may not be possible to identify a global within a function at the 825 time of inspection (since a global may appear later in a file). 826 Consequently, globals are identified by their local name rather than 827 their module-qualified path. 828 """ 829 830 # For functions, use the appropriate local names. 831 832 if self.in_function: 833 return name 834 835 # For global names outside functions, use a global name. 836 837 elif is_global or name_ref and name_ref.is_global_name(): 838 return self.get_global_path(name) 839 840 # Otherwise, establish a name in the current namespace. 841 842 else: 843 return self.get_object_path(name) 844 845 def get_path_for_access(self): 846 847 "Outside functions, register accesses at the module level." 848 849 if not self.in_function: 850 return self.name 851 else: 852 return self.get_namespace_path() 853 854 def get_module_name(self, node): 855 856 """ 857 Using the given From 'node' in this module, calculate any relative import 858 information, returning a tuple containing a module to import along with any 859 names to import based on the node's name information. 860 861 Where the returned module is given as None, whole module imports should 862 be performed for the returned modules using the returned names. 863 """ 864 865 # Absolute import. 866 867 if node.level == 0: 868 return node.modname, node.names 869 870 # Relative to an ancestor of this module. 871 872 else: 873 path = self.name.split(".") 874 level = node.level 875 876 # Relative imports treat package roots as submodules. 877 878 if split(self.filename)[-1] == "__init__.py": 879 level -= 1 880 881 if level > len(path): 882 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 883 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 884 885 basename = ".".join(path[:len(path)-level]) 886 887 # Name imports from a module. 888 889 if node.modname: 890 return "%s.%s" % (basename, node.modname), node.names 891 892 # Relative whole module imports. 893 894 else: 895 return basename, node.names 896 897 def get_argnames(args): 898 899 """ 900 Return a list of all names provided by 'args'. Since tuples may be 901 employed, the arguments are traversed depth-first. 902 """ 903 904 l = [] 905 for arg in args: 906 if isinstance(arg, tuple): 907 l += get_argnames(arg) 908 else: 909 l.append(arg) 910 return l 911 912 def get_names_from_nodes(nodes): 913 914 """ 915 Return the names employed in the given 'nodes' along with the number of 916 nodes excluding sequences. 917 """ 918 919 names = set() 920 count = 0 921 922 for node in nodes: 923 924 # Add names and count them. 925 926 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 927 names.add(node.name) 928 count += 1 929 930 # Add names from sequences and incorporate their counts. 931 932 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 933 compiler.ast.List, compiler.ast.Set, 934 compiler.ast.Tuple)): 935 _names, _count = get_names_from_nodes(node.nodes) 936 names.update(_names) 937 count += _count 938 939 # Count non-name, non-sequence nodes. 940 941 else: 942 count += 1 943 944 return names, count 945 946 # Location classes. 947 948 class Location: 949 950 "A generic program location." 951 952 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 953 self.path = path 954 self.name = name 955 self.attrnames = attrnames 956 self.version = version 957 self.access_number = access_number 958 959 def __repr__(self): 960 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 961 962 def as_tuple(self): 963 return (self.path, self.name, self.attrnames, self.version, self.access_number) 964 965 def __hash__(self): 966 return hash(self.as_tuple()) 967 968 def __eq__(self, other): 969 return self.as_tuple() == other.as_tuple() 970 971 def __cmp__(self, other): 972 return cmp(self.as_tuple(), other.as_tuple()) 973 974 def get_attrname(self): 975 976 """ 977 Extract the first attribute from the attribute names employed in this 978 location. 979 """ 980 981 attrnames = self.attrnames 982 if not attrnames: 983 return attrnames 984 return get_attrnames(attrnames)[0] 985 986 class AccessLocation(Location): 987 988 "A specialised access location." 989 990 def __init__(self, path, name, attrnames, access_number): 991 992 """ 993 Initialise an access location featuring 'path', 'name', 'attrnames' and 994 'access_number'. 995 """ 996 997 Location.__init__(self, path, name, attrnames, None, access_number) 998 999 def __repr__(self): 1000 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1001 1002 # Result classes. 1003 1004 class InstructionSequence: 1005 1006 "A generic sequence of instructions." 1007 1008 def __init__(self, instructions): 1009 self.instructions = instructions 1010 1011 def get_value_instruction(self): 1012 return self.instructions[-1] 1013 1014 def get_init_instructions(self): 1015 return self.instructions[:-1] 1016 1017 # Dictionary utilities. 1018 1019 def init_item(d, key, fn): 1020 1021 """ 1022 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1023 value where no entry already exists. 1024 """ 1025 1026 if not d.has_key(key): 1027 d[key] = fn() 1028 return d[key] 1029 1030 def dict_for_keys(d, keys): 1031 1032 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1033 1034 nd = {} 1035 for key in keys: 1036 if d.has_key(key): 1037 nd[key] = d[key] 1038 return nd 1039 1040 def make_key(s): 1041 1042 "Make sequence 's' into a tuple-based key, first sorting its contents." 1043 1044 l = list(s) 1045 l.sort() 1046 return tuple(l) 1047 1048 def add_counter_item(d, key): 1049 1050 """ 1051 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1052 maintaining a mapping of keys to their order of insertion. 1053 """ 1054 1055 if not d.has_key(key): 1056 d[key] = len(d.keys()) 1057 return d[key] 1058 1059 def remove_items(d1, d2): 1060 1061 "Remove from 'd1' all items from 'd2'." 1062 1063 for key in d2.keys(): 1064 if d1.has_key(key): 1065 del d1[key] 1066 1067 # Set utilities. 1068 1069 def first(s): 1070 return list(s)[0] 1071 1072 def same(s1, s2): 1073 return set(s1) == set(s2) 1074 1075 def order_dependencies(all_depends): 1076 1077 """ 1078 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1079 has the form "A depends on B, C...". The result will order A, B, C, and so 1080 on. 1081 """ 1082 1083 usage = init_reverse_dependencies(all_depends) 1084 1085 # Produce an ordering by obtaining exposed items (required by items already 1086 # processed) and putting them at the start of the list. 1087 1088 ordered = [] 1089 1090 while usage: 1091 have_next = False 1092 1093 for key, n in usage.items(): 1094 1095 # Add items needed by no other items to the ordering. 1096 1097 if not n: 1098 remove_dependency(key, all_depends, usage, ordered) 1099 have_next = True 1100 1101 if not have_next: 1102 raise ValueError, usage 1103 1104 return ordered 1105 1106 def order_dependencies_partial(all_depends): 1107 1108 """ 1109 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1110 has the form "A depends on B, C...". The result will order A, B, C, and so 1111 on. Where cycles exist, they will be broken and a partial ordering returned. 1112 """ 1113 1114 usage = init_reverse_dependencies(all_depends) 1115 1116 # Duplicate the dependencies for subsequent modification. 1117 1118 new_depends = {} 1119 for key, values in all_depends.items(): 1120 new_depends[key] = set(values) 1121 1122 all_depends = new_depends 1123 1124 # Produce an ordering by obtaining exposed items (required by items already 1125 # processed) and putting them at the start of the list. 1126 1127 ordered = [] 1128 1129 while usage: 1130 least = None 1131 least_key = None 1132 1133 for key, n in usage.items(): 1134 1135 # Add items needed by no other items to the ordering. 1136 1137 if not n: 1138 remove_dependency(key, all_depends, usage, ordered) 1139 least = 0 1140 1141 # When breaking cycles, note the least used items. 1142 1143 elif least is None or len(n) < least: 1144 least_key = key 1145 least = len(n) 1146 1147 if least: 1148 transfer_dependencies(least_key, all_depends, usage, ordered) 1149 1150 return ordered 1151 1152 def init_reverse_dependencies(all_depends): 1153 1154 """ 1155 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1156 record the reverse dependencies, making a mapping of the form 1157 "B is needed by A", "C is needed by A", and so on. 1158 """ 1159 1160 usage = {} 1161 1162 # Record path-based dependencies. 1163 1164 for key in all_depends.keys(): 1165 usage[key] = set() 1166 1167 for key, depends in all_depends.items(): 1168 for depend in depends: 1169 init_item(usage, depend, set) 1170 usage[depend].add(key) 1171 1172 return usage 1173 1174 def transfer_dependencies(key, all_depends, usage, ordered): 1175 1176 """ 1177 Transfer items needed by 'key' to those items needing 'key', found using 1178 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1179 collection of dependencies. 1180 1181 If "A is needed by X" and "B is needed by A", then transferring items needed 1182 by A will cause "B is needed by X" to be recorded as a consequence. 1183 1184 Transferring items also needs to occur in the reverse mapping, so that 1185 "A needs B" and "X needs A", then the consequence must be recorded as 1186 "X needs B". 1187 """ 1188 1189 ordered.insert(0, key) 1190 1191 needing = usage[key] # A is needed by X 1192 needed = all_depends.get(key) # A needs B 1193 1194 if needing: 1195 for depend in needing: 1196 l = all_depends.get(depend) 1197 if not l: 1198 continue 1199 1200 l.remove(key) # X needs (A) 1201 1202 if needed: 1203 l.update(needed) # X needs B... 1204 1205 # Prevent self references. 1206 1207 if depend in needed: 1208 l.remove(depend) 1209 1210 if needed: 1211 for depend in needed: 1212 l = usage.get(depend) 1213 if not l: 1214 continue 1215 1216 l.remove(key) # B is needed by (A) 1217 l.update(needing) # B is needed by X... 1218 1219 # Prevent self references. 1220 1221 if depend in needing: 1222 l.remove(depend) 1223 1224 if needed: 1225 del all_depends[key] 1226 del usage[key] 1227 1228 def remove_dependency(key, all_depends, usage, ordered): 1229 1230 """ 1231 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1232 'ordered' collection of dependencies. 1233 1234 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1235 upon removing A from 'usage', the outcome is that all keys needing A will 1236 have A removed from their 'usage' records. 1237 1238 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1239 as a consequence. 1240 """ 1241 1242 ordered.insert(0, key) 1243 1244 depends = all_depends.get(key) 1245 1246 # Reduce usage of the referenced items. 1247 1248 if depends: 1249 for depend in depends: 1250 usage[depend].remove(key) 1251 1252 del usage[key] 1253 1254 # General input/output. 1255 1256 def readfile(filename): 1257 1258 "Return the contents of 'filename'." 1259 1260 f = open(filename) 1261 try: 1262 return f.read() 1263 finally: 1264 f.close() 1265 1266 def writefile(filename, s): 1267 1268 "Write to 'filename' the string 's'." 1269 1270 f = open(filename, "w") 1271 try: 1272 f.write(s) 1273 finally: 1274 f.close() 1275 1276 # General encoding. 1277 1278 def sorted_output(x): 1279 1280 "Sort sequence 'x' and return a string with commas separating the values." 1281 1282 x = map(str, x) 1283 x.sort() 1284 return ", ".join(x) 1285 1286 def get_string_details(literals, encoding): 1287 1288 """ 1289 Determine whether 'literals' represent Unicode strings or byte strings, 1290 using 'encoding' to reproduce byte sequences. 1291 1292 Each literal is the full program representation including prefix and quotes 1293 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1294 string needs to be translated back to its original encoding. 1295 1296 Return a single encoded literal value, a type name, and the original 1297 encoding as a tuple. 1298 """ 1299 1300 typename = "unicode" 1301 1302 l = [] 1303 1304 for s in literals: 1305 out, _typename = get_literal_details(s) 1306 if _typename == "str": 1307 typename = "str" 1308 l.append(out) 1309 1310 out = "".join(l) 1311 1312 # For Unicode values, convert to the UTF-8 program representation. 1313 1314 if typename == "unicode": 1315 return out.encode("utf-8"), typename, encoding 1316 1317 # For byte string values, convert back to the original encoding. 1318 1319 else: 1320 return out.encode(encoding), typename, encoding 1321 1322 def get_literal_details(s): 1323 1324 """ 1325 Determine whether 's' represents a Unicode string or a byte string, where 1326 's' contains the full program representation of a literal including prefix 1327 and quotes, recoded by the parser to UTF-8. 1328 1329 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1330 and byte or Unicode values starting with <backslash><octal digit> or 1331 <backslash>x. 1332 1333 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1334 to be considered as Unicode values. Otherwise, they produce byte values and 1335 cause unprefixed strings to be considered as byte strings. 1336 1337 Literals prefixed with "r" do not have their backslash-encoded values 1338 converted unless also prefixed with "u", in which case only the above value 1339 formats are converted, not any of the other special sequences for things 1340 like newlines. 1341 1342 Return the literal value as a Unicode object together with the appropriate 1343 type name in a tuple. 1344 """ 1345 1346 l = [] 1347 1348 # Identify the quote character and use it to identify the prefix. 1349 1350 quote_type = s[-1] 1351 prefix_end = s.find(quote_type) 1352 prefix = s[:prefix_end].lower() 1353 1354 if prefix not in ("", "b", "br", "r", "u", "ur"): 1355 raise ValueError, "String literal does not have a supported prefix: %s" % s 1356 1357 if "b" in prefix: 1358 typename = "str" 1359 else: 1360 typename = "unicode" 1361 1362 # Identify triple quotes or single quotes. 1363 1364 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1365 quote = s[prefix_end:prefix_end+3] 1366 current = prefix_end + 3 1367 end = len(s) - 3 1368 else: 1369 quote = s[prefix_end] 1370 current = prefix_end + 1 1371 end = len(s) - 1 1372 1373 # Conversions of some quoted values. 1374 1375 searches = { 1376 "u" : (6, 16), 1377 "U" : (10, 16), 1378 "x" : (4, 16), 1379 } 1380 1381 octal_digits = map(str, range(0, 8)) 1382 1383 # Translations of some quoted values. 1384 1385 escaped = { 1386 "\\" : "\\", "'" : "'", '"' : '"', 1387 "a" : "\a", "b" : "\b", "f" : "\f", 1388 "n" : "\n", "r" : "\r", "t" : "\t", 1389 } 1390 1391 while current < end: 1392 1393 # Look for quoted values. 1394 1395 index = s.find("\\", current) 1396 if index == -1 or index + 1 == end: 1397 l.append(s[current:end]) 1398 break 1399 1400 # Add the preceding text. 1401 1402 l.append(s[current:index]) 1403 1404 # Handle quoted text. 1405 1406 term = s[index+1] 1407 1408 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1409 # produce Unicode values. 1410 1411 if typename == "unicode" and ( 1412 term in ("u", "U") or 1413 "u" in prefix and (term == "x" or term in octal_digits)): 1414 1415 needed, base = searches.get(term, (4, 8)) 1416 value = convert_quoted_value(s, index, needed, end, base, unichr) 1417 l.append(value) 1418 current = index + needed 1419 1420 # Add raw byte values, changing the string type. 1421 1422 elif "r" not in prefix and ( 1423 term == "x" or term in octal_digits): 1424 1425 needed, base = searches.get(term, (4, 8)) 1426 value = convert_quoted_value(s, index, needed, end, base, chr) 1427 l.append(value) 1428 typename = "str" 1429 current = index + needed 1430 1431 # Add other escaped values. 1432 1433 elif "r" not in prefix and escaped.has_key(term): 1434 l.append(escaped[term]) 1435 current = index + 2 1436 1437 # Add other text as found. 1438 1439 else: 1440 l.append(s[index:index+2]) 1441 current = index + 2 1442 1443 # Collect the components into a single Unicode object. Since the literal 1444 # text was already in UTF-8 form, interpret plain strings as UTF-8 1445 # sequences. 1446 1447 out = [] 1448 1449 for value in l: 1450 if isinstance(value, unicode): 1451 out.append(value) 1452 else: 1453 out.append(unicode(value, "utf-8")) 1454 1455 return "".join(out), typename 1456 1457 def convert_quoted_value(s, index, needed, end, base, fn): 1458 1459 """ 1460 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1461 positions, and with the given 'end' indicating the first position after the 1462 end of the actual string content. 1463 1464 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1465 to convert the value to an appropriate type. 1466 """ 1467 1468 s = s[index:min(index+needed, end)] 1469 1470 # Not a complete occurrence. 1471 1472 if len(s) < needed: 1473 return s 1474 1475 # Test for a well-formed value. 1476 1477 try: 1478 first = base == 8 and 1 or 2 1479 value = int(s[first:needed], base) 1480 except ValueError: 1481 return s 1482 else: 1483 return fn(value) 1484 1485 # Attribute chain decoding. 1486 1487 def get_attrnames(attrnames): 1488 1489 """ 1490 Split the qualified attribute chain 'attrnames' into its components, 1491 handling special attributes starting with "#" that indicate type 1492 conformance. 1493 """ 1494 1495 if attrnames.startswith("#"): 1496 return [attrnames] 1497 else: 1498 return attrnames.split(".") 1499 1500 def get_name_path(path, name): 1501 1502 "Return a suitable qualified name from the given 'path' and 'name'." 1503 1504 if "." in name: 1505 return name 1506 else: 1507 return "%s.%s" % (path, name) 1508 1509 # Usage-related functions. 1510 1511 def get_types_for_usage(attrnames, objects): 1512 1513 """ 1514 Identify the types that can support the given 'attrnames', using the 1515 given 'objects' as the catalogue of type details. 1516 """ 1517 1518 types = [] 1519 for name, _attrnames in objects.items(): 1520 if set(attrnames).issubset(_attrnames): 1521 types.append(name) 1522 return types 1523 1524 def get_invoked_attributes(usage): 1525 1526 "Obtain invoked attribute from the given 'usage'." 1527 1528 invoked = [] 1529 if usage: 1530 for attrname, invocation, assignment in usage: 1531 if invocation: 1532 invoked.append(attrname) 1533 return invoked 1534 1535 def get_assigned_attributes(usage): 1536 1537 "Obtain assigned attribute from the given 'usage'." 1538 1539 assigned = [] 1540 if usage: 1541 for attrname, invocation, assignment in usage: 1542 if assignment: 1543 assigned.append(attrname) 1544 return assigned 1545 1546 # Type and module functions. 1547 # NOTE: This makes assumptions about the __builtins__ structure. 1548 1549 def get_builtin_module(name): 1550 1551 "Return the module name containing the given type 'name'." 1552 1553 if name == "string": 1554 modname = "str" 1555 elif name == "utf8string": 1556 modname = "unicode" 1557 elif name == "NoneType": 1558 modname = "none" 1559 else: 1560 modname = name 1561 1562 return "__builtins__.%s" % modname 1563 1564 def get_builtin_type(name): 1565 1566 "Return the type name provided by the given Python value 'name'." 1567 1568 if name == "str": 1569 return "string" 1570 elif name == "unicode": 1571 return "utf8string" 1572 else: 1573 return name 1574 1575 def get_builtin_class(name): 1576 1577 "Return the full name of the built-in class having the given 'name'." 1578 1579 typename = get_builtin_type(name) 1580 module = get_builtin_module(typename) 1581 return "%s.%s" % (module, typename) 1582 1583 # Useful data. 1584 1585 predefined_constants = "False", "None", "NotImplemented", "True" 1586 1587 unary_operator_functions = { 1588 1589 # Unary operations. 1590 1591 "Invert" : "invert", 1592 "UnaryAdd" : "pos", 1593 "UnarySub" : "neg", 1594 } 1595 1596 operator_functions = { 1597 1598 # Fundamental operations. 1599 1600 "is" : "is_", 1601 "is not" : "is_not", 1602 1603 # Binary operations. 1604 1605 "in" : "in_", 1606 "not in" : "not_in", 1607 "Add" : "add", 1608 "Bitand" : "and_", 1609 "Bitor" : "or_", 1610 "Bitxor" : "xor", 1611 "Div" : "div", 1612 "FloorDiv" : "floordiv", 1613 "LeftShift" : "lshift", 1614 "Mod" : "mod", 1615 "Mul" : "mul", 1616 "Power" : "pow", 1617 "RightShift" : "rshift", 1618 "Sub" : "sub", 1619 1620 # Augmented assignment. 1621 1622 "+=" : "iadd", 1623 "-=" : "isub", 1624 "*=" : "imul", 1625 "/=" : "idiv", 1626 "//=" : "ifloordiv", 1627 "%=" : "imod", 1628 "**=" : "ipow", 1629 "<<=" : "ilshift", 1630 ">>=" : "irshift", 1631 "&=" : "iand", 1632 "^=" : "ixor", 1633 "|=" : "ior", 1634 1635 # Comparisons. 1636 1637 "==" : "eq", 1638 "!=" : "ne", 1639 "<" : "lt", 1640 "<=" : "le", 1641 ">=" : "ge", 1642 ">" : "gt", 1643 } 1644 1645 operator_functions.update(unary_operator_functions) 1646 1647 # vim: tabstop=4 expandtab shiftwidth=4