1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007-2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from compiler.transformer import Transformer 23 from errors import InspectError 24 from os import listdir, makedirs, remove 25 from os.path import exists, getmtime, isdir, join, split 26 from results import ConstantValueRef, LiteralSequenceRef, NameRef 27 import compiler.ast 28 29 class CommonOutput: 30 31 "Common output functionality." 32 33 def check_output(self, options=None): 34 35 "Check the existing output and remove it if irrelevant." 36 37 if not exists(self.output): 38 makedirs(self.output) 39 40 details = self.importer.get_cache_details() 41 recorded_details = self.get_output_details() 42 43 # Combine cache details with any options. 44 45 full_details = options and (details + " " + options) or details 46 47 if recorded_details != full_details: 48 self.remove_output() 49 50 writefile(self.get_output_details_filename(), full_details) 51 52 def get_output_details_filename(self): 53 54 "Return the output details filename." 55 56 return join(self.output, "$details") 57 58 def get_output_details(self): 59 60 "Return details of the existing output." 61 62 details_filename = self.get_output_details_filename() 63 64 if not exists(details_filename): 65 return None 66 else: 67 return readfile(details_filename) 68 69 def remove_output(self, dirname=None): 70 71 "Remove the output." 72 73 dirname = dirname or self.output 74 75 for filename in listdir(dirname): 76 path = join(dirname, filename) 77 if isdir(path): 78 self.remove_output(path) 79 else: 80 remove(path) 81 82 def copy(source, target, only_if_newer=True): 83 84 "Copy a text file from 'source' to 'target'." 85 86 if isdir(target): 87 target = join(target, split(source)[-1]) 88 89 if only_if_newer and not is_newer(source, target): 90 return 91 92 infile = open(source) 93 outfile = open(target, "w") 94 95 try: 96 outfile.write(infile.read()) 97 finally: 98 outfile.close() 99 infile.close() 100 101 def is_newer(source, target): 102 103 "Return whether 'source' is newer than 'target'." 104 105 if exists(target): 106 target_mtime = getmtime(target) 107 source_mtime = getmtime(source) 108 return source_mtime > target_mtime 109 110 return True 111 112 class CommonModule: 113 114 "A common module representation." 115 116 def __init__(self, name, importer): 117 118 """ 119 Initialise this module with the given 'name' and an 'importer' which is 120 used to provide access to other modules when required. 121 """ 122 123 self.name = name 124 self.importer = importer 125 self.filename = None 126 127 # Inspection-related attributes. 128 129 self.astnode = None 130 self.encoding = None 131 self.temp = {} 132 self.lambdas = {} 133 134 # Constants, literals and values. 135 136 self.constants = {} 137 self.constant_values = {} 138 self.literals = {} 139 self.literal_types = {} 140 141 # Nested namespaces. 142 143 self.namespace_path = [] 144 self.in_function = False 145 146 # Retain the assignment value expression and track invocations. 147 148 self.in_assignment = None 149 self.in_invocation = None 150 151 # Attribute chain state management. 152 153 self.attrs = [] 154 self.chain_assignment = [] 155 self.chain_invocation = [] 156 157 def __repr__(self): 158 return "CommonModule(%r, %r)" % (self.name, self.importer) 159 160 def parse_file(self, filename): 161 162 "Parse the file with the given 'filename', initialising attributes." 163 164 self.filename = filename 165 166 # Use the Transformer directly to obtain encoding information. 167 168 t = Transformer() 169 f = open(filename) 170 171 try: 172 self.astnode = t.parsesuite(f.read() + "\n") 173 self.encoding = t.encoding 174 finally: 175 f.close() 176 177 # Module-relative naming. 178 179 def get_global_path(self, name): 180 return "%s.%s" % (self.name, name) 181 182 def get_namespace_path(self): 183 return ".".join([self.name] + self.namespace_path) 184 185 def get_object_path(self, name): 186 return ".".join([self.name] + self.namespace_path + [name]) 187 188 # Namespace management. 189 190 def enter_namespace(self, name): 191 192 "Enter the namespace having the given 'name'." 193 194 self.namespace_path.append(name) 195 196 def exit_namespace(self): 197 198 "Exit the current namespace." 199 200 self.namespace_path.pop() 201 202 # Constant reference naming. 203 204 def get_constant_name(self, value, value_type, encoding=None): 205 206 """ 207 Add a new constant to the current namespace for 'value' with 208 'value_type'. 209 """ 210 211 path = self.get_namespace_path() 212 init_item(self.constants, path, dict) 213 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 214 215 # Literal reference naming. 216 217 def get_literal_name(self): 218 219 "Add a new literal to the current namespace." 220 221 path = self.get_namespace_path() 222 init_item(self.literals, path, lambda: 0) 223 return "$C%d" % self.literals[path] 224 225 def next_literal(self): 226 self.literals[self.get_namespace_path()] += 1 227 228 # Temporary variable naming. 229 230 def get_temporary_name(self): 231 path = self.get_namespace_path() 232 init_item(self.temp, path, lambda: 0) 233 return "$t%d" % self.temp[path] 234 235 def next_temporary(self): 236 self.temp[self.get_namespace_path()] += 1 237 238 # Arbitrary function naming. 239 240 def get_lambda_name(self): 241 path = self.get_namespace_path() 242 init_item(self.lambdas, path, lambda: 0) 243 name = "$l%d" % self.lambdas[path] 244 self.lambdas[path] += 1 245 return name 246 247 def reset_lambdas(self): 248 self.lambdas = {} 249 250 # Constant and literal recording. 251 252 def get_constant_value(self, value, literals=None): 253 254 """ 255 Encode the 'value' if appropriate, returning a value, a typename and any 256 encoding. 257 """ 258 259 if isinstance(value, unicode): 260 return value.encode("utf-8"), "unicode", self.encoding 261 262 # Attempt to convert plain strings to text. 263 264 elif isinstance(value, str) and self.encoding: 265 try: 266 return get_string_details(literals, self.encoding) 267 except UnicodeDecodeError: 268 pass 269 270 return value, value.__class__.__name__, None 271 272 def get_constant_reference(self, ref, value, encoding=None): 273 274 """ 275 Return a constant reference for the given 'ref' type and 'value', with 276 the optional 'encoding' applying to text values. 277 """ 278 279 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 280 281 # Return a reference for the constant. 282 283 objpath = self.get_object_path(constant_name) 284 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 285 286 # Record the value and type for the constant. 287 288 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 289 return name_ref 290 291 def reserve_constant(self, objpath, value, origin, encoding=None): 292 293 """ 294 Reserve a constant within 'objpath' with the given 'value' and having a 295 type with the given 'origin', with the optional 'encoding' applying to 296 text values. 297 """ 298 299 constant_name = self.get_constant_name(value, origin) 300 objpath = self.get_object_path(constant_name) 301 self._reserve_constant(objpath, value, origin, encoding) 302 303 def _reserve_constant(self, objpath, value, origin, encoding): 304 305 """ 306 Store a constant for 'objpath' with the given 'value' and 'origin', with 307 the optional 'encoding' applying to text values. 308 """ 309 310 self.constant_values[objpath] = value, origin, encoding 311 312 def get_literal_reference(self, name, ref, items, cls): 313 314 """ 315 Return a literal reference for the given type 'name', literal 'ref', 316 node 'items' and employing the given 'cls' as the class of the returned 317 reference object. 318 """ 319 320 # Construct an invocation using the items as arguments. 321 322 typename = "$L%s" % name 323 324 invocation = compiler.ast.CallFunc( 325 compiler.ast.Name(typename), 326 items 327 ) 328 329 # Get a name for the actual literal. 330 331 instname = self.get_literal_name() 332 self.next_literal() 333 334 # Record the type for the literal. 335 336 objpath = self.get_object_path(instname) 337 self.literal_types[objpath] = ref.get_origin() 338 339 # Return a wrapper for the invocation exposing the items. 340 341 return cls( 342 instname, 343 ref.instance_of(), 344 self.process_structure_node(invocation), 345 invocation.args 346 ) 347 348 # Node handling. 349 350 def process_structure(self, node): 351 352 """ 353 Within the given 'node', process the program structure. 354 355 During inspection, this will process global declarations, adjusting the 356 module namespace, and import statements, building a module dependency 357 hierarchy. 358 359 During translation, this will consult deduced program information and 360 output translated code. 361 """ 362 363 l = [] 364 for n in node.getChildNodes(): 365 l.append(self.process_statement_node(n)) 366 return l 367 368 def process_statement_node(self, node): 369 370 "Process the given statement 'node'." 371 372 return self.process_structure_node(node) 373 374 def process_augassign_node(self, n): 375 376 "Process the given augmented assignment node 'n'." 377 378 op = operator_functions[n.op] 379 380 if isinstance(n.node, compiler.ast.Getattr): 381 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 382 elif isinstance(n.node, compiler.ast.Name): 383 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 384 else: 385 target = n.node 386 387 assignment = compiler.ast.Assign( 388 [target], 389 compiler.ast.CallFunc( 390 compiler.ast.Name("$op%s" % op), 391 [n.node, n.expr])) 392 393 return self.process_structure_node(assignment) 394 395 def process_assignment_for_object(self, original_name, source): 396 397 """ 398 Return an assignment operation making 'original_name' refer to the given 399 'source'. 400 """ 401 402 assignment = compiler.ast.Assign( 403 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 404 source 405 ) 406 407 return self.process_structure_node(assignment) 408 409 def process_assignment_node_items(self, n, expr): 410 411 """ 412 Process the given assignment node 'n' whose children are to be assigned 413 items of 'expr'. 414 """ 415 416 name_ref = self.process_structure_node(expr) 417 418 # Either unpack the items and present them directly to each assignment 419 # node. 420 421 if isinstance(name_ref, LiteralSequenceRef) and \ 422 self.process_literal_sequence_items(n, name_ref): 423 424 pass 425 426 # Or have the assignment nodes access each item via the sequence API. 427 428 else: 429 self.process_assignment_node_items_by_position(n, expr, name_ref) 430 431 def process_assignment_node_items_by_position(self, n, expr, name_ref): 432 433 """ 434 Process the given sequence assignment node 'n', converting the node to 435 the separate assignment of each target using positional access on a 436 temporary variable representing the sequence. Use 'expr' as the assigned 437 value and 'name_ref' as the reference providing any existing temporary 438 variable. 439 """ 440 441 statements = [] 442 443 # Employ existing names to access the sequence. 444 # Literal sequences do not provide names of accessible objects. 445 446 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 447 temp = name_ref.name 448 449 # For other expressions, create a temporary name to reference the items. 450 451 else: 452 temp = self.get_temporary_name() 453 self.next_temporary() 454 455 statements.append( 456 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 457 ) 458 459 # Generate a test for the length of the expression object. 460 461 statements.append(compiler.ast.Discard( 462 compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"), 463 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))]))) 464 465 # Assign the items to the target nodes. 466 467 for i, node in enumerate(n.nodes): 468 statements.append( 469 compiler.ast.Assign([node], compiler.ast.CallFunc( 470 compiler.ast.Getattr(compiler.ast.Name(temp), 471 "__get_single_item_unchecked__", 472 privileged=True), 473 [compiler.ast.Const(i, str(i))])) 474 ) 475 476 return self.process_structure_node(compiler.ast.Stmt(statements)) 477 478 def process_literal_sequence_items(self, n, name_ref): 479 480 """ 481 Process the given assignment node 'n', obtaining from the given 482 'name_ref' the items to be assigned to the assignment targets. 483 484 Return whether this method was able to process the assignment node as 485 a sequence of direct assignments. 486 """ 487 488 if len(n.nodes) == len(name_ref.items): 489 assigned_names, count = get_names_from_nodes(n.nodes) 490 accessed_names, _count = get_names_from_nodes(name_ref.items) 491 492 # Only assign directly between items if all assigned names are 493 # plain names (not attribute assignments), and if the assigned names 494 # do not appear in the accessed names. 495 496 if len(assigned_names) == count and \ 497 not assigned_names.intersection(accessed_names): 498 499 for node, item in zip(n.nodes, name_ref.items): 500 self.process_assignment_node(node, item) 501 502 return True 503 504 # Otherwise, use the position-based mechanism to obtain values. 505 506 else: 507 return False 508 else: 509 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 510 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 511 512 def process_compare_node(self, n): 513 514 """ 515 Process the given comparison node 'n', converting an operator sequence 516 from... 517 518 <expr1> <op1> <expr2> <op2> <expr3> 519 520 ...to... 521 522 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 523 """ 524 525 invocations = [] 526 last = n.expr 527 528 for op, op_node in n.ops: 529 op = operator_functions.get(op) 530 531 invocations.append(compiler.ast.CallFunc( 532 compiler.ast.Name("$op%s" % op), 533 [last, op_node])) 534 535 last = op_node 536 537 if len(invocations) > 1: 538 result = compiler.ast.And(invocations) 539 else: 540 result = invocations[0] 541 542 return self.process_structure_node(result) 543 544 def process_dict_node(self, node): 545 546 """ 547 Process the given dictionary 'node', returning a list of (key, value) 548 tuples. 549 """ 550 551 l = [] 552 for key, value in node.items: 553 l.append(( 554 self.process_structure_node(key), 555 self.process_structure_node(value))) 556 return l 557 558 def process_for_node(self, n): 559 560 """ 561 Generate attribute accesses for {n.list}.__iter__ and the next method on 562 the iterator, producing a replacement node for the original. 563 """ 564 565 t0 = self.get_temporary_name() 566 self.next_temporary() 567 t1 = self.get_temporary_name() 568 self.next_temporary() 569 t2 = self.get_temporary_name() 570 self.next_temporary() 571 t3 = self.get_temporary_name() 572 self.next_temporary() 573 574 node = compiler.ast.Stmt([ 575 576 # <t0> = {n.list} 577 # <t1> = <t0>.__iter__() 578 579 compiler.ast.Assign( 580 [compiler.ast.AssName(t0, "OP_ASSIGN")], 581 n.list), 582 583 compiler.ast.Assign( 584 [compiler.ast.AssName(t1, "OP_ASSIGN")], 585 compiler.ast.CallFunc( 586 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 587 [])), 588 589 # <t2> = <t1>.next 590 # try: 591 # while True: 592 # try: 593 # <t3> = <t2>() 594 # except StopIteration: 595 # raise LoopExit 596 # <var>... = <t3> 597 # {n.body} 598 # except LoopExit: 599 # {n.else_} 600 # pass 601 602 compiler.ast.Assign( 603 [compiler.ast.AssName(t2, "OP_ASSIGN")], 604 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 605 606 # try: 607 608 compiler.ast.TryExcept( 609 610 # while True: 611 612 compiler.ast.While( 613 compiler.ast.Name("True"), 614 compiler.ast.Stmt([ 615 616 # try: 617 618 compiler.ast.TryExcept( 619 620 # <t3> = <t2>() 621 622 compiler.ast.Assign( 623 [compiler.ast.AssName(t3, "OP_ASSIGN")], 624 compiler.ast.CallFunc( 625 compiler.ast.Name(t2), 626 [])), 627 628 # except StopIteration: 629 # raise LoopExit 630 631 [(compiler.ast.Name("StopIteration"), None, 632 compiler.ast.Raise(compiler.ast.Name("LoopExit")))], 633 None), 634 635 # <var>... = <t3> 636 637 compiler.ast.Assign( 638 [n.assign], 639 compiler.ast.Name(t3)), 640 n.body]), 641 None), 642 643 # except LoopExit: 644 # {n.else_} 645 # pass 646 647 [(compiler.ast.Name("LoopExit"), None, n.else_ or compiler.ast.Pass())], 648 None) 649 ]) 650 651 self.process_structure_node(node) 652 653 def process_literal_sequence_node(self, n, name, ref, cls): 654 655 """ 656 Process the given literal sequence node 'n' as a function invocation, 657 with 'name' indicating the type of the sequence, and 'ref' being a 658 reference to the type. The 'cls' is used to instantiate a suitable name 659 reference. 660 """ 661 662 if name == "dict": 663 items = [] 664 for key, value in n.items: 665 items.append(compiler.ast.Tuple([key, value])) 666 else: # name in ("list", "tuple"): 667 items = n.nodes 668 669 return self.get_literal_reference(name, ref, items, cls) 670 671 def process_operator_node(self, n): 672 673 """ 674 Process the given operator node 'n' as an operator function invocation. 675 """ 676 677 opname = n.__class__.__name__ 678 operands = n.getChildNodes() 679 680 # Convert a unary operation to an invocation. 681 682 op = unary_operator_functions.get(opname) 683 684 if op: 685 invocation = compiler.ast.CallFunc( 686 compiler.ast.Name("$op%s" % op), 687 [operands[0]] 688 ) 689 690 # Convert a single operator with a list of operands to a combination of 691 # pairwise operations. 692 693 else: 694 op = operator_functions[opname] 695 invocation = self._process_operator_node(op, operands) 696 697 return self.process_structure_node(invocation) 698 699 def _process_operator_node(self, op, operands): 700 701 """ 702 Process the given 'op', being an operator function, together with the 703 supplied 'operands', returning either a single remaining operand or an 704 invocation combining the operands. 705 """ 706 707 remaining = operands[1:] 708 if not remaining: 709 return operands[0] 710 711 return compiler.ast.CallFunc( 712 compiler.ast.Name("$op%s" % op), 713 [operands[0], self._process_operator_node(op, remaining)] 714 ) 715 716 def process_print_node(self, n): 717 718 """ 719 Process the given print node 'n' as an invocation on a stream of the 720 form... 721 722 $print(dest, args, nl) 723 724 The special function name will be translated elsewhere. 725 """ 726 727 nl = isinstance(n, compiler.ast.Printnl) 728 invocation = compiler.ast.CallFunc( 729 compiler.ast.Name("$print"), 730 [n.dest or compiler.ast.Name("None"), 731 compiler.ast.List(list(n.nodes)), 732 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 733 ) 734 return self.process_structure_node(invocation) 735 736 def process_slice_node(self, n, expr=None): 737 738 """ 739 Process the given slice node 'n' as a method invocation. 740 """ 741 742 if n.flags == "OP_ASSIGN": op = "__setslice__" 743 elif n.flags == "OP_DELETE": op = "__delslice__" 744 else: op = "__getslice__" 745 746 invocation = compiler.ast.CallFunc( 747 compiler.ast.Getattr(n.expr, op), 748 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 749 (expr and [expr] or []) 750 ) 751 752 # Fix parse tree structure. 753 754 if op == "__delslice__": 755 invocation = compiler.ast.Discard(invocation) 756 757 return self.process_structure_node(invocation) 758 759 def process_sliceobj_node(self, n): 760 761 """ 762 Process the given slice object node 'n' as a slice constructor. 763 """ 764 765 op = "slice" 766 invocation = compiler.ast.CallFunc( 767 compiler.ast.Name("$op%s" % op), 768 n.nodes 769 ) 770 return self.process_structure_node(invocation) 771 772 def process_subscript_node(self, n, expr=None): 773 774 """ 775 Process the given subscript node 'n' as a method invocation. 776 """ 777 778 if n.flags == "OP_ASSIGN": op = "__setitem__" 779 elif n.flags == "OP_DELETE": op = "__delitem__" 780 else: op = "__getitem__" 781 782 invocation = compiler.ast.CallFunc( 783 compiler.ast.Getattr(n.expr, op), 784 list(n.subs) + (expr and [expr] or []) 785 ) 786 787 # Fix parse tree structure. 788 789 if op == "__delitem__": 790 invocation = compiler.ast.Discard(invocation) 791 792 return self.process_structure_node(invocation) 793 794 def process_attribute_chain(self, n): 795 796 """ 797 Process the given attribute access node 'n'. Return a reference 798 describing the expression. 799 """ 800 801 # AssAttr/Getattr are nested with the outermost access being the last 802 # access in any chain. 803 804 self.attrs.insert(0, n.attrname) 805 attrs = self.attrs 806 807 # Break attribute chains where non-access nodes are found. 808 809 if not self.have_access_expression(n): 810 self.reset_attribute_chain() 811 812 # Descend into the expression, extending backwards any existing chain, 813 # or building another for the expression. 814 815 name_ref = self.process_structure_node(n.expr) 816 817 # Restore chain information applying to this node. 818 819 if not self.have_access_expression(n): 820 self.restore_attribute_chain(attrs) 821 822 # Return immediately if the expression was another access and thus a 823 # continuation backwards along the chain. The above processing will 824 # have followed the chain all the way to its conclusion. 825 826 if self.have_access_expression(n): 827 del self.attrs[0] 828 829 return name_ref 830 831 # Attribute chain handling. 832 833 def reset_attribute_chain(self): 834 835 "Reset the attribute chain for a subexpression of an attribute access." 836 837 self.attrs = [] 838 self.chain_assignment.append(self.in_assignment) 839 self.chain_invocation.append(self.in_invocation) 840 self.in_assignment = None 841 self.in_invocation = None 842 843 def restore_attribute_chain(self, attrs): 844 845 "Restore the attribute chain for an attribute access." 846 847 self.attrs = attrs 848 self.in_assignment = self.chain_assignment.pop() 849 self.in_invocation = self.chain_invocation.pop() 850 851 def have_access_expression(self, node): 852 853 "Return whether the expression associated with 'node' is Getattr." 854 855 return isinstance(node.expr, compiler.ast.Getattr) 856 857 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 858 859 """ 860 Return the name to be used for attribute usage observations involving 861 the given 'name' in the current namespace. 862 863 If the name is being used outside a function, and if 'name_ref' is 864 given and indicates a global or if 'is_global' is specified as a true 865 value, a path featuring the name in the global namespace is returned. 866 Otherwise, a path computed using the current namespace and the given 867 name is returned. 868 869 The intention of this method is to provide a suitably-qualified name 870 that can be tracked across namespaces. Where globals are being 871 referenced in class namespaces, they should be referenced using their 872 path within the module, not using a path within each class. 873 874 It may not be possible to identify a global within a function at the 875 time of inspection (since a global may appear later in a file). 876 Consequently, globals are identified by their local name rather than 877 their module-qualified path. 878 """ 879 880 # For functions, use the appropriate local names. 881 882 if self.in_function: 883 return name 884 885 # For global names outside functions, use a global name. 886 887 elif is_global or name_ref and name_ref.is_global_name(): 888 return self.get_global_path(name) 889 890 # Otherwise, establish a name in the current namespace. 891 892 else: 893 return self.get_object_path(name) 894 895 def get_path_for_access(self): 896 897 "Outside functions, register accesses at the module level." 898 899 if not self.in_function: 900 return self.name 901 else: 902 return self.get_namespace_path() 903 904 def get_module_name(self, node): 905 906 """ 907 Using the given From 'node' in this module, calculate any relative import 908 information, returning a tuple containing a module to import along with any 909 names to import based on the node's name information. 910 911 Where the returned module is given as None, whole module imports should 912 be performed for the returned modules using the returned names. 913 """ 914 915 # Absolute import. 916 917 if node.level == 0: 918 return node.modname, node.names 919 920 # Relative to an ancestor of this module. 921 922 else: 923 path = self.name.split(".") 924 level = node.level 925 926 # Relative imports treat package roots as submodules. 927 928 if split(self.filename)[-1] == "__init__.py": 929 level -= 1 930 931 if level > len(path): 932 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 933 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 934 935 basename = ".".join(path[:len(path)-level]) 936 937 # Name imports from a module. 938 939 if node.modname: 940 return "%s.%s" % (basename, node.modname), node.names 941 942 # Relative whole module imports. 943 944 else: 945 return basename, node.names 946 947 def get_argnames(args): 948 949 """ 950 Return a list of all names provided by 'args'. Since tuples may be 951 employed, the arguments are traversed depth-first. 952 """ 953 954 l = [] 955 for arg in args: 956 if isinstance(arg, tuple): 957 l += get_argnames(arg) 958 else: 959 l.append(arg) 960 return l 961 962 def get_names_from_nodes(nodes): 963 964 """ 965 Return the names employed in the given 'nodes' along with the number of 966 nodes excluding sequences. 967 """ 968 969 names = set() 970 count = 0 971 972 for node in nodes: 973 974 # Add names and count them. 975 976 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 977 names.add(node.name) 978 count += 1 979 980 # Add names from sequences and incorporate their counts. 981 982 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 983 compiler.ast.List, compiler.ast.Set, 984 compiler.ast.Tuple)): 985 _names, _count = get_names_from_nodes(node.nodes) 986 names.update(_names) 987 count += _count 988 989 # Count non-name, non-sequence nodes. 990 991 else: 992 count += 1 993 994 return names, count 995 996 # Location classes. 997 998 class Location: 999 1000 "A generic program location." 1001 1002 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 1003 self.path = path 1004 self.name = name 1005 self.attrnames = attrnames 1006 self.version = version 1007 self.access_number = access_number 1008 1009 def __repr__(self): 1010 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 1011 1012 def as_tuple(self): 1013 return (self.path, self.name, self.attrnames, self.version, self.access_number) 1014 1015 def __hash__(self): 1016 return hash(self.as_tuple()) 1017 1018 def __eq__(self, other): 1019 return self.as_tuple() == other.as_tuple() 1020 1021 def __cmp__(self, other): 1022 return cmp(self.as_tuple(), other.as_tuple()) 1023 1024 def get_attrname(self): 1025 1026 """ 1027 Extract the first attribute from the attribute names employed in this 1028 location. 1029 """ 1030 1031 attrnames = self.attrnames 1032 if not attrnames: 1033 return attrnames 1034 return get_attrnames(attrnames)[0] 1035 1036 class AccessLocation(Location): 1037 1038 "A specialised access location." 1039 1040 def __init__(self, path, name, attrnames, access_number): 1041 1042 """ 1043 Initialise an access location featuring 'path', 'name', 'attrnames' and 1044 'access_number'. 1045 """ 1046 1047 Location.__init__(self, path, name, attrnames, None, access_number) 1048 1049 def __repr__(self): 1050 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1051 1052 # Result classes. 1053 1054 class InstructionSequence: 1055 1056 "A generic sequence of instructions." 1057 1058 def __init__(self, instructions): 1059 self.instructions = instructions 1060 1061 def get_value_instruction(self): 1062 return self.instructions[-1] 1063 1064 def get_init_instructions(self): 1065 return self.instructions[:-1] 1066 1067 # Dictionary utilities. 1068 1069 def init_item(d, key, fn): 1070 1071 """ 1072 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1073 value where no entry already exists. 1074 """ 1075 1076 if not d.has_key(key): 1077 d[key] = fn() 1078 return d[key] 1079 1080 def dict_for_keys(d, keys): 1081 1082 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1083 1084 nd = {} 1085 for key in keys: 1086 if d.has_key(key): 1087 nd[key] = d[key] 1088 return nd 1089 1090 def make_key(s): 1091 1092 "Make sequence 's' into a tuple-based key, first sorting its contents." 1093 1094 l = list(s) 1095 l.sort() 1096 return tuple(l) 1097 1098 def add_counter_item(d, key): 1099 1100 """ 1101 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1102 maintaining a mapping of keys to their order of insertion. 1103 """ 1104 1105 if not d.has_key(key): 1106 d[key] = len(d.keys()) 1107 return d[key] 1108 1109 def remove_items(d1, d2): 1110 1111 "Remove from 'd1' all items from 'd2'." 1112 1113 for key in d2.keys(): 1114 if d1.has_key(key): 1115 del d1[key] 1116 1117 # Set utilities. 1118 1119 def first(s): 1120 return list(s)[0] 1121 1122 def same(s1, s2): 1123 return set(s1) == set(s2) 1124 1125 def order_dependencies(all_depends): 1126 1127 """ 1128 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1129 has the form "A depends on B, C...". The result will order A, B, C, and so 1130 on. 1131 """ 1132 1133 usage = init_reverse_dependencies(all_depends) 1134 1135 # Produce an ordering by obtaining exposed items (required by items already 1136 # processed) and putting them at the start of the list. 1137 1138 ordered = [] 1139 1140 while usage: 1141 have_next = False 1142 1143 for key, n in usage.items(): 1144 1145 # Add items needed by no other items to the ordering. 1146 1147 if not n: 1148 remove_dependency(key, all_depends, usage, ordered) 1149 have_next = True 1150 1151 if not have_next: 1152 raise ValueError, usage 1153 1154 return ordered 1155 1156 def order_dependencies_partial(all_depends): 1157 1158 """ 1159 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1160 has the form "A depends on B, C...". The result will order A, B, C, and so 1161 on. Where cycles exist, they will be broken and a partial ordering returned. 1162 """ 1163 1164 usage = init_reverse_dependencies(all_depends) 1165 1166 # Duplicate the dependencies for subsequent modification. 1167 1168 new_depends = {} 1169 for key, values in all_depends.items(): 1170 new_depends[key] = set(values) 1171 1172 all_depends = new_depends 1173 1174 # Produce an ordering by obtaining exposed items (required by items already 1175 # processed) and putting them at the start of the list. 1176 1177 ordered = [] 1178 1179 while usage: 1180 least = None 1181 least_key = None 1182 1183 for key, n in usage.items(): 1184 1185 # Add items needed by no other items to the ordering. 1186 1187 if not n: 1188 remove_dependency(key, all_depends, usage, ordered) 1189 least = 0 1190 1191 # When breaking cycles, note the least used items. 1192 1193 elif least is None or len(n) < least: 1194 least_key = key 1195 least = len(n) 1196 1197 if least: 1198 transfer_dependencies(least_key, all_depends, usage, ordered) 1199 1200 return ordered 1201 1202 def init_reverse_dependencies(all_depends): 1203 1204 """ 1205 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1206 record the reverse dependencies, making a mapping of the form 1207 "B is needed by A", "C is needed by A", and so on. 1208 """ 1209 1210 usage = {} 1211 1212 # Record path-based dependencies. 1213 1214 for key in all_depends.keys(): 1215 usage[key] = set() 1216 1217 for key, depends in all_depends.items(): 1218 for depend in depends: 1219 init_item(usage, depend, set) 1220 usage[depend].add(key) 1221 1222 return usage 1223 1224 def transfer_dependencies(key, all_depends, usage, ordered): 1225 1226 """ 1227 Transfer items needed by 'key' to those items needing 'key', found using 1228 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1229 collection of dependencies. 1230 1231 If "A is needed by X" and "B is needed by A", then transferring items needed 1232 by A will cause "B is needed by X" to be recorded as a consequence. 1233 1234 Transferring items also needs to occur in the reverse mapping, so that 1235 "A needs B" and "X needs A", then the consequence must be recorded as 1236 "X needs B". 1237 """ 1238 1239 ordered.insert(0, key) 1240 1241 needing = usage[key] # A is needed by X 1242 needed = all_depends.get(key) # A needs B 1243 1244 if needing: 1245 for depend in needing: 1246 l = all_depends.get(depend) 1247 if not l: 1248 continue 1249 1250 l.remove(key) # X needs (A) 1251 1252 if needed: 1253 l.update(needed) # X needs B... 1254 1255 # Prevent self references. 1256 1257 if depend in needed: 1258 l.remove(depend) 1259 1260 if needed: 1261 for depend in needed: 1262 l = usage.get(depend) 1263 if not l: 1264 continue 1265 1266 l.remove(key) # B is needed by (A) 1267 l.update(needing) # B is needed by X... 1268 1269 # Prevent self references. 1270 1271 if depend in needing: 1272 l.remove(depend) 1273 1274 if needed: 1275 del all_depends[key] 1276 del usage[key] 1277 1278 def remove_dependency(key, all_depends, usage, ordered): 1279 1280 """ 1281 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1282 'ordered' collection of dependencies. 1283 1284 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1285 upon removing A from 'usage', the outcome is that all keys needing A will 1286 have A removed from their 'usage' records. 1287 1288 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1289 as a consequence. 1290 """ 1291 1292 ordered.insert(0, key) 1293 1294 depends = all_depends.get(key) 1295 1296 # Reduce usage of the referenced items. 1297 1298 if depends: 1299 for depend in depends: 1300 usage[depend].remove(key) 1301 1302 del usage[key] 1303 1304 # General input/output. 1305 1306 def readfile(filename): 1307 1308 "Return the contents of 'filename'." 1309 1310 f = open(filename) 1311 try: 1312 return f.read() 1313 finally: 1314 f.close() 1315 1316 def writefile(filename, s): 1317 1318 "Write to 'filename' the string 's'." 1319 1320 f = open(filename, "w") 1321 try: 1322 f.write(s) 1323 finally: 1324 f.close() 1325 1326 # General encoding. 1327 1328 def sorted_output(x): 1329 1330 "Sort sequence 'x' and return a string with commas separating the values." 1331 1332 x = map(str, x) 1333 x.sort() 1334 return ", ".join(x) 1335 1336 def get_string_details(literals, encoding): 1337 1338 """ 1339 Determine whether 'literals' represent Unicode strings or byte strings, 1340 using 'encoding' to reproduce byte sequences. 1341 1342 Each literal is the full program representation including prefix and quotes 1343 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1344 string needs to be translated back to its original encoding. 1345 1346 Return a single encoded literal value, a type name, and the original 1347 encoding as a tuple. 1348 """ 1349 1350 typename = "unicode" 1351 1352 l = [] 1353 1354 for s in literals: 1355 out, _typename = get_literal_details(s) 1356 if _typename == "str": 1357 typename = "str" 1358 l.append(out) 1359 1360 out = "".join(l) 1361 1362 # For Unicode values, convert to the UTF-8 program representation. 1363 1364 if typename == "unicode": 1365 return out.encode("utf-8"), typename, encoding 1366 1367 # For byte string values, convert back to the original encoding. 1368 1369 else: 1370 return out.encode(encoding), typename, encoding 1371 1372 def get_literal_details(s): 1373 1374 """ 1375 Determine whether 's' represents a Unicode string or a byte string, where 1376 's' contains the full program representation of a literal including prefix 1377 and quotes, recoded by the parser to UTF-8. 1378 1379 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1380 and byte or Unicode values starting with <backslash><octal digit> or 1381 <backslash>x. 1382 1383 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1384 to be considered as Unicode values. Otherwise, they produce byte values and 1385 cause unprefixed strings to be considered as byte strings. 1386 1387 Literals prefixed with "r" do not have their backslash-encoded values 1388 converted unless also prefixed with "u", in which case only the above value 1389 formats are converted, not any of the other special sequences for things 1390 like newlines. 1391 1392 Return the literal value as a Unicode object together with the appropriate 1393 type name in a tuple. 1394 """ 1395 1396 l = [] 1397 1398 # Identify the quote character and use it to identify the prefix. 1399 1400 quote_type = s[-1] 1401 prefix_end = s.find(quote_type) 1402 prefix = s[:prefix_end].lower() 1403 1404 if prefix not in ("", "b", "br", "r", "u", "ur"): 1405 raise ValueError, "String literal does not have a supported prefix: %s" % s 1406 1407 if "b" in prefix: 1408 typename = "str" 1409 else: 1410 typename = "unicode" 1411 1412 # Identify triple quotes or single quotes. 1413 1414 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1415 quote = s[prefix_end:prefix_end+3] 1416 current = prefix_end + 3 1417 end = len(s) - 3 1418 else: 1419 quote = s[prefix_end] 1420 current = prefix_end + 1 1421 end = len(s) - 1 1422 1423 # Conversions of some quoted values. 1424 1425 searches = { 1426 "u" : (6, 16), 1427 "U" : (10, 16), 1428 "x" : (4, 16), 1429 } 1430 1431 octal_digits = map(str, range(0, 8)) 1432 1433 # Translations of some quoted values. 1434 1435 escaped = { 1436 "\\" : "\\", "'" : "'", '"' : '"', 1437 "a" : "\a", "b" : "\b", "f" : "\f", 1438 "n" : "\n", "r" : "\r", "t" : "\t", 1439 } 1440 1441 while current < end: 1442 1443 # Look for quoted values. 1444 1445 index = s.find("\\", current) 1446 if index == -1 or index + 1 == end: 1447 l.append(s[current:end]) 1448 break 1449 1450 # Add the preceding text. 1451 1452 l.append(s[current:index]) 1453 1454 # Handle quoted text. 1455 1456 term = s[index+1] 1457 1458 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1459 # produce Unicode values. 1460 1461 if typename == "unicode" and ( 1462 term in ("u", "U") or 1463 "u" in prefix and (term == "x" or term in octal_digits)): 1464 1465 needed, base = searches.get(term, (4, 8)) 1466 value = convert_quoted_value(s, index, needed, end, base, unichr) 1467 l.append(value) 1468 current = index + needed 1469 1470 # Add raw byte values, changing the string type. 1471 1472 elif "r" not in prefix and ( 1473 term == "x" or term in octal_digits): 1474 1475 needed, base = searches.get(term, (4, 8)) 1476 value = convert_quoted_value(s, index, needed, end, base, chr) 1477 l.append(value) 1478 typename = "str" 1479 current = index + needed 1480 1481 # Add other escaped values. 1482 1483 elif "r" not in prefix and escaped.has_key(term): 1484 l.append(escaped[term]) 1485 current = index + 2 1486 1487 # Add other text as found. 1488 1489 else: 1490 l.append(s[index:index+2]) 1491 current = index + 2 1492 1493 # Collect the components into a single Unicode object. Since the literal 1494 # text was already in UTF-8 form, interpret plain strings as UTF-8 1495 # sequences. 1496 1497 out = [] 1498 1499 for value in l: 1500 if isinstance(value, unicode): 1501 out.append(value) 1502 else: 1503 out.append(unicode(value, "utf-8")) 1504 1505 return "".join(out), typename 1506 1507 def convert_quoted_value(s, index, needed, end, base, fn): 1508 1509 """ 1510 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1511 positions, and with the given 'end' indicating the first position after the 1512 end of the actual string content. 1513 1514 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1515 to convert the value to an appropriate type. 1516 """ 1517 1518 s = s[index:min(index+needed, end)] 1519 1520 # Not a complete occurrence. 1521 1522 if len(s) < needed: 1523 return s 1524 1525 # Test for a well-formed value. 1526 1527 try: 1528 first = base == 8 and 1 or 2 1529 value = int(s[first:needed], base) 1530 except ValueError: 1531 return s 1532 else: 1533 return fn(value) 1534 1535 # Attribute chain decoding. 1536 1537 def get_attrnames(attrnames): 1538 1539 """ 1540 Split the qualified attribute chain 'attrnames' into its components, 1541 handling special attributes starting with "#" that indicate type 1542 conformance. 1543 """ 1544 1545 if attrnames.startswith("#"): 1546 return [attrnames] 1547 else: 1548 return attrnames.split(".") 1549 1550 def get_name_path(path, name): 1551 1552 "Return a suitable qualified name from the given 'path' and 'name'." 1553 1554 if "." in name: 1555 return name 1556 else: 1557 return "%s.%s" % (path, name) 1558 1559 # Usage-related functions. 1560 1561 def get_types_for_usage(attrnames, objects): 1562 1563 """ 1564 Identify the types that can support the given 'attrnames', using the 1565 given 'objects' as the catalogue of type details. 1566 """ 1567 1568 types = [] 1569 for name, _attrnames in objects.items(): 1570 if set(attrnames).issubset(_attrnames): 1571 types.append(name) 1572 return types 1573 1574 def get_invoked_attributes(usage): 1575 1576 "Obtain invoked attribute from the given 'usage'." 1577 1578 invoked = [] 1579 if usage: 1580 for attrname, invocation, assignment in usage: 1581 if invocation: 1582 invoked.append(attrname) 1583 return invoked 1584 1585 def get_assigned_attributes(usage): 1586 1587 "Obtain assigned attribute from the given 'usage'." 1588 1589 assigned = [] 1590 if usage: 1591 for attrname, invocation, assignment in usage: 1592 if assignment: 1593 assigned.append(attrname) 1594 return assigned 1595 1596 # Type and module functions. 1597 # NOTE: This makes assumptions about the __builtins__ structure. 1598 1599 def get_builtin_module(name): 1600 1601 "Return the module name containing the given type 'name'." 1602 1603 if name == "NoneType": 1604 modname = "none" 1605 else: 1606 modname = name 1607 1608 return "__builtins__.%s" % modname 1609 1610 def get_builtin_type(name): 1611 1612 "Return the type name provided by the given Python value 'name'." 1613 1614 return name 1615 1616 def get_builtin_class(name): 1617 1618 "Return the full name of the built-in class having the given 'name'." 1619 1620 typename = get_builtin_type(name) 1621 module = get_builtin_module(typename) 1622 return "%s.%s" % (module, typename) 1623 1624 # Useful data. 1625 1626 predefined_constants = "False", "None", "NotImplemented", "True" 1627 1628 privileged_attributes = [ 1629 "__get_single_item_unchecked__", 1630 ] 1631 1632 unary_operator_functions = { 1633 1634 # Unary operations. 1635 1636 "Invert" : "invert", 1637 "UnaryAdd" : "pos", 1638 "UnarySub" : "neg", 1639 } 1640 1641 operator_functions = { 1642 1643 # Fundamental operations. 1644 1645 "is" : "is_", 1646 "is not" : "is_not", 1647 1648 # Binary operations. 1649 1650 "in" : "in_", 1651 "not in" : "not_in", 1652 "Add" : "add", 1653 "Bitand" : "and_", 1654 "Bitor" : "or_", 1655 "Bitxor" : "xor", 1656 "Div" : "div", 1657 "FloorDiv" : "floordiv", 1658 "LeftShift" : "lshift", 1659 "Mod" : "mod", 1660 "Mul" : "mul", 1661 "Power" : "pow", 1662 "RightShift" : "rshift", 1663 "Sub" : "sub", 1664 1665 # Augmented assignment. 1666 1667 "+=" : "iadd", 1668 "-=" : "isub", 1669 "*=" : "imul", 1670 "/=" : "idiv", 1671 "//=" : "ifloordiv", 1672 "%=" : "imod", 1673 "**=" : "ipow", 1674 "<<=" : "ilshift", 1675 ">>=" : "irshift", 1676 "&=" : "iand", 1677 "^=" : "ixor", 1678 "|=" : "ior", 1679 1680 # Comparisons. 1681 1682 "==" : "eq", 1683 "!=" : "ne", 1684 "<" : "lt", 1685 "<=" : "le", 1686 ">=" : "ge", 1687 ">" : "gt", 1688 } 1689 1690 operator_functions.update(unary_operator_functions) 1691 1692 # vim: tabstop=4 expandtab shiftwidth=4