1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 7 2017, 2018, 2019 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 statements = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 statements.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Generate a test for the length of the expression object. 455 456 statements.append(compiler.ast.Discard( 457 compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"), 458 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))]))) 459 460 # Assign the items to the target nodes. 461 462 for i, node in enumerate(n.nodes): 463 statements.append( 464 compiler.ast.Assign([node], compiler.ast.CallFunc( 465 compiler.ast.Getattr(compiler.ast.Name(temp), 466 "__get_single_item_unchecked__", 467 privileged=True), 468 [compiler.ast.Const(i, str(i))])) 469 ) 470 471 return self.process_structure_node(compiler.ast.Stmt(statements)) 472 473 def process_literal_sequence_items(self, n, name_ref): 474 475 """ 476 Process the given assignment node 'n', obtaining from the given 477 'name_ref' the items to be assigned to the assignment targets. 478 479 Return whether this method was able to process the assignment node as 480 a sequence of direct assignments. 481 """ 482 483 if len(n.nodes) == len(name_ref.items): 484 assigned_names, count = get_names_from_nodes(n.nodes) 485 accessed_names, _count = get_names_from_nodes(name_ref.items) 486 487 # Only assign directly between items if all assigned names are 488 # plain names (not attribute assignments), and if the assigned names 489 # do not appear in the accessed names. 490 491 if len(assigned_names) == count and \ 492 not assigned_names.intersection(accessed_names): 493 494 for node, item in zip(n.nodes, name_ref.items): 495 self.process_assignment_node(node, item) 496 497 return True 498 499 # Otherwise, use the position-based mechanism to obtain values. 500 501 else: 502 return False 503 else: 504 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 505 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 506 507 def process_compare_node(self, n): 508 509 """ 510 Process the given comparison node 'n', converting an operator sequence 511 from... 512 513 <expr1> <op1> <expr2> <op2> <expr3> 514 515 ...to... 516 517 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 518 """ 519 520 invocations = [] 521 last = n.expr 522 523 for op, op_node in n.ops: 524 op = operator_functions.get(op) 525 526 invocations.append(compiler.ast.CallFunc( 527 compiler.ast.Name("$op%s" % op), 528 [last, op_node])) 529 530 last = op_node 531 532 if len(invocations) > 1: 533 result = compiler.ast.And(invocations) 534 else: 535 result = invocations[0] 536 537 return self.process_structure_node(result) 538 539 def process_dict_node(self, node): 540 541 """ 542 Process the given dictionary 'node', returning a list of (key, value) 543 tuples. 544 """ 545 546 l = [] 547 for key, value in node.items: 548 l.append(( 549 self.process_structure_node(key), 550 self.process_structure_node(value))) 551 return l 552 553 def process_for_node(self, n): 554 555 """ 556 Generate attribute accesses for {n.list}.__iter__ and the next method on 557 the iterator, producing a replacement node for the original. 558 """ 559 560 t0 = self.get_temporary_name() 561 self.next_temporary() 562 t1 = self.get_temporary_name() 563 self.next_temporary() 564 t2 = self.get_temporary_name() 565 self.next_temporary() 566 t3 = self.get_temporary_name() 567 self.next_temporary() 568 569 node = compiler.ast.Stmt([ 570 571 # <t0> = {n.list} 572 # <t1> = <t0>.__iter__() 573 574 compiler.ast.Assign( 575 [compiler.ast.AssName(t0, "OP_ASSIGN")], 576 n.list), 577 578 compiler.ast.Assign( 579 [compiler.ast.AssName(t1, "OP_ASSIGN")], 580 compiler.ast.CallFunc( 581 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 582 [])), 583 584 # <t2> = <t1>.next 585 # try: 586 # while True: 587 # try: 588 # <t3> = <t2>() 589 # except StopIteration: 590 # raise LoopExit 591 # <var>... = <t3> 592 # {n.body} 593 # except LoopExit: 594 # {n.else_} 595 # pass 596 597 compiler.ast.Assign( 598 [compiler.ast.AssName(t2, "OP_ASSIGN")], 599 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 600 601 # try: 602 603 compiler.ast.TryExcept( 604 605 # while True: 606 607 compiler.ast.While( 608 compiler.ast.Name("True"), 609 compiler.ast.Stmt([ 610 611 # try: 612 613 compiler.ast.TryExcept( 614 615 # <t3> = <t2>() 616 617 compiler.ast.Assign( 618 [compiler.ast.AssName(t3, "OP_ASSIGN")], 619 compiler.ast.CallFunc( 620 compiler.ast.Name(t2), 621 [])), 622 623 # except StopIteration: 624 # raise LoopExit 625 626 [(compiler.ast.Name("StopIteration"), None, 627 compiler.ast.Raise(compiler.ast.Name("LoopExit")))], 628 None), 629 630 # <var>... = <t3> 631 632 compiler.ast.Assign( 633 [n.assign], 634 compiler.ast.Name(t3)), 635 n.body]), 636 None), 637 638 # except LoopExit: 639 # {n.else_} 640 # pass 641 642 [(compiler.ast.Name("LoopExit"), None, n.else_ or compiler.ast.Pass())], 643 None) 644 ]) 645 646 self.process_structure_node(node) 647 648 def process_literal_sequence_node(self, n, name, ref, cls): 649 650 """ 651 Process the given literal sequence node 'n' as a function invocation, 652 with 'name' indicating the type of the sequence, and 'ref' being a 653 reference to the type. The 'cls' is used to instantiate a suitable name 654 reference. 655 """ 656 657 if name == "dict": 658 items = [] 659 for key, value in n.items: 660 items.append(compiler.ast.Tuple([key, value])) 661 else: # name in ("list", "tuple"): 662 items = n.nodes 663 664 return self.get_literal_reference(name, ref, items, cls) 665 666 def process_operator_node(self, n): 667 668 """ 669 Process the given operator node 'n' as an operator function invocation. 670 """ 671 672 opname = n.__class__.__name__ 673 operands = n.getChildNodes() 674 675 # Convert a unary operation to an invocation. 676 677 op = unary_operator_functions.get(opname) 678 679 if op: 680 invocation = compiler.ast.CallFunc( 681 compiler.ast.Name("$op%s" % op), 682 [operands[0]] 683 ) 684 685 # Convert a single operator with a list of operands to a combination of 686 # pairwise operations. 687 688 else: 689 op = operator_functions[opname] 690 invocation = self._process_operator_node(op, operands) 691 692 return self.process_structure_node(invocation) 693 694 def _process_operator_node(self, op, operands): 695 696 """ 697 Process the given 'op', being an operator function, together with the 698 supplied 'operands', returning either a single remaining operand or an 699 invocation combining the operands. 700 """ 701 702 remaining = operands[1:] 703 if not remaining: 704 return operands[0] 705 706 return compiler.ast.CallFunc( 707 compiler.ast.Name("$op%s" % op), 708 [operands[0], self._process_operator_node(op, remaining)] 709 ) 710 711 def process_print_node(self, n): 712 713 """ 714 Process the given print node 'n' as an invocation on a stream of the 715 form... 716 717 $print(dest, args, nl) 718 719 The special function name will be translated elsewhere. 720 """ 721 722 nl = isinstance(n, compiler.ast.Printnl) 723 invocation = compiler.ast.CallFunc( 724 compiler.ast.Name("$print"), 725 [n.dest or compiler.ast.Name("None"), 726 compiler.ast.List(list(n.nodes)), 727 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 728 ) 729 return self.process_structure_node(invocation) 730 731 def process_slice_node(self, n, expr=None): 732 733 """ 734 Process the given slice node 'n' as a method invocation. 735 """ 736 737 if n.flags == "OP_ASSIGN": op = "__setslice__" 738 elif n.flags == "OP_DELETE": op = "__delslice__" 739 else: op = "__getslice__" 740 741 invocation = compiler.ast.CallFunc( 742 compiler.ast.Getattr(n.expr, op), 743 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 744 (expr and [expr] or []) 745 ) 746 747 # Fix parse tree structure. 748 749 if op == "__delslice__": 750 invocation = compiler.ast.Discard(invocation) 751 752 return self.process_structure_node(invocation) 753 754 def process_sliceobj_node(self, n): 755 756 """ 757 Process the given slice object node 'n' as a slice constructor. 758 """ 759 760 op = "slice" 761 invocation = compiler.ast.CallFunc( 762 compiler.ast.Name("$op%s" % op), 763 n.nodes 764 ) 765 return self.process_structure_node(invocation) 766 767 def process_subscript_node(self, n, expr=None): 768 769 """ 770 Process the given subscript node 'n' as a method invocation. 771 """ 772 773 if n.flags == "OP_ASSIGN": op = "__setitem__" 774 elif n.flags == "OP_DELETE": op = "__delitem__" 775 else: op = "__getitem__" 776 777 invocation = compiler.ast.CallFunc( 778 compiler.ast.Getattr(n.expr, op), 779 list(n.subs) + (expr and [expr] or []) 780 ) 781 782 # Fix parse tree structure. 783 784 if op == "__delitem__": 785 invocation = compiler.ast.Discard(invocation) 786 787 return self.process_structure_node(invocation) 788 789 def process_attribute_chain(self, n): 790 791 """ 792 Process the given attribute access node 'n'. Return a reference 793 describing the expression. 794 """ 795 796 # AssAttr/Getattr are nested with the outermost access being the last 797 # access in any chain. 798 799 self.attrs.insert(0, n.attrname) 800 attrs = self.attrs 801 802 # Break attribute chains where non-access nodes are found. 803 804 if not self.have_access_expression(n): 805 self.reset_attribute_chain() 806 807 # Descend into the expression, extending backwards any existing chain, 808 # or building another for the expression. 809 810 name_ref = self.process_structure_node(n.expr) 811 812 # Restore chain information applying to this node. 813 814 if not self.have_access_expression(n): 815 self.restore_attribute_chain(attrs) 816 817 # Return immediately if the expression was another access and thus a 818 # continuation backwards along the chain. The above processing will 819 # have followed the chain all the way to its conclusion. 820 821 if self.have_access_expression(n): 822 del self.attrs[0] 823 824 return name_ref 825 826 # Attribute chain handling. 827 828 def reset_attribute_chain(self): 829 830 "Reset the attribute chain for a subexpression of an attribute access." 831 832 self.attrs = [] 833 self.chain_assignment.append(self.in_assignment) 834 self.chain_invocation.append(self.in_invocation) 835 self.in_assignment = None 836 self.in_invocation = None 837 838 def restore_attribute_chain(self, attrs): 839 840 "Restore the attribute chain for an attribute access." 841 842 self.attrs = attrs 843 self.in_assignment = self.chain_assignment.pop() 844 self.in_invocation = self.chain_invocation.pop() 845 846 def have_access_expression(self, node): 847 848 "Return whether the expression associated with 'node' is Getattr." 849 850 return isinstance(node.expr, compiler.ast.Getattr) 851 852 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 853 854 """ 855 Return the name to be used for attribute usage observations involving 856 the given 'name' in the current namespace. 857 858 If the name is being used outside a function, and if 'name_ref' is 859 given and indicates a global or if 'is_global' is specified as a true 860 value, a path featuring the name in the global namespace is returned. 861 Otherwise, a path computed using the current namespace and the given 862 name is returned. 863 864 The intention of this method is to provide a suitably-qualified name 865 that can be tracked across namespaces. Where globals are being 866 referenced in class namespaces, they should be referenced using their 867 path within the module, not using a path within each class. 868 869 It may not be possible to identify a global within a function at the 870 time of inspection (since a global may appear later in a file). 871 Consequently, globals are identified by their local name rather than 872 their module-qualified path. 873 """ 874 875 # For functions, use the appropriate local names. 876 877 if self.in_function: 878 return name 879 880 # For global names outside functions, use a global name. 881 882 elif is_global or name_ref and name_ref.is_global_name(): 883 return self.get_global_path(name) 884 885 # Otherwise, establish a name in the current namespace. 886 887 else: 888 return self.get_object_path(name) 889 890 def get_path_for_access(self): 891 892 "Outside functions, register accesses at the module level." 893 894 if not self.in_function: 895 return self.name 896 else: 897 return self.get_namespace_path() 898 899 def get_module_name(self, node): 900 901 """ 902 Using the given From 'node' in this module, calculate any relative import 903 information, returning a tuple containing a module to import along with any 904 names to import based on the node's name information. 905 906 Where the returned module is given as None, whole module imports should 907 be performed for the returned modules using the returned names. 908 """ 909 910 # Absolute import. 911 912 if node.level == 0: 913 return node.modname, node.names 914 915 # Relative to an ancestor of this module. 916 917 else: 918 path = self.name.split(".") 919 level = node.level 920 921 # Relative imports treat package roots as submodules. 922 923 if split(self.filename)[-1] == "__init__.py": 924 level -= 1 925 926 if level > len(path): 927 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 928 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 929 930 basename = ".".join(path[:len(path)-level]) 931 932 # Name imports from a module. 933 934 if node.modname: 935 return "%s.%s" % (basename, node.modname), node.names 936 937 # Relative whole module imports. 938 939 else: 940 return basename, node.names 941 942 def get_argnames(args): 943 944 """ 945 Return a list of all names provided by 'args'. Since tuples may be 946 employed, the arguments are traversed depth-first. 947 """ 948 949 l = [] 950 for arg in args: 951 if isinstance(arg, tuple): 952 l += get_argnames(arg) 953 else: 954 l.append(arg) 955 return l 956 957 def get_names_from_nodes(nodes): 958 959 """ 960 Return the names employed in the given 'nodes' along with the number of 961 nodes excluding sequences. 962 """ 963 964 names = set() 965 count = 0 966 967 for node in nodes: 968 969 # Add names and count them. 970 971 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 972 names.add(node.name) 973 count += 1 974 975 # Add names from sequences and incorporate their counts. 976 977 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 978 compiler.ast.List, compiler.ast.Set, 979 compiler.ast.Tuple)): 980 _names, _count = get_names_from_nodes(node.nodes) 981 names.update(_names) 982 count += _count 983 984 # Count non-name, non-sequence nodes. 985 986 else: 987 count += 1 988 989 return names, count 990 991 # Location classes. 992 993 class Location: 994 995 "A generic program location." 996 997 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 998 self.path = path 999 self.name = name 1000 self.attrnames = attrnames 1001 self.version = version 1002 self.access_number = access_number 1003 1004 def __repr__(self): 1005 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 1006 1007 def as_tuple(self): 1008 return (self.path, self.name, self.attrnames, self.version, self.access_number) 1009 1010 def __hash__(self): 1011 return hash(self.as_tuple()) 1012 1013 def __eq__(self, other): 1014 return self.as_tuple() == other.as_tuple() 1015 1016 def __cmp__(self, other): 1017 return cmp(self.as_tuple(), other.as_tuple()) 1018 1019 def get_attrname(self): 1020 1021 """ 1022 Extract the first attribute from the attribute names employed in this 1023 location. 1024 """ 1025 1026 attrnames = self.attrnames 1027 if not attrnames: 1028 return attrnames 1029 return get_attrnames(attrnames)[0] 1030 1031 class AccessLocation(Location): 1032 1033 "A specialised access location." 1034 1035 def __init__(self, path, name, attrnames, access_number): 1036 1037 """ 1038 Initialise an access location featuring 'path', 'name', 'attrnames' and 1039 'access_number'. 1040 """ 1041 1042 Location.__init__(self, path, name, attrnames, None, access_number) 1043 1044 def __repr__(self): 1045 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1046 1047 # Result classes. 1048 1049 class InstructionSequence: 1050 1051 "A generic sequence of instructions." 1052 1053 def __init__(self, instructions): 1054 self.instructions = instructions 1055 1056 def get_value_instruction(self): 1057 return self.instructions[-1] 1058 1059 def get_init_instructions(self): 1060 return self.instructions[:-1] 1061 1062 # Dictionary utilities. 1063 1064 def init_item(d, key, fn): 1065 1066 """ 1067 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1068 value where no entry already exists. 1069 """ 1070 1071 if not d.has_key(key): 1072 d[key] = fn() 1073 return d[key] 1074 1075 def dict_for_keys(d, keys): 1076 1077 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1078 1079 nd = {} 1080 for key in keys: 1081 if d.has_key(key): 1082 nd[key] = d[key] 1083 return nd 1084 1085 def make_key(s): 1086 1087 "Make sequence 's' into a tuple-based key, first sorting its contents." 1088 1089 l = list(s) 1090 l.sort() 1091 return tuple(l) 1092 1093 def add_counter_item(d, key): 1094 1095 """ 1096 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1097 maintaining a mapping of keys to their order of insertion. 1098 """ 1099 1100 if not d.has_key(key): 1101 d[key] = len(d.keys()) 1102 return d[key] 1103 1104 def remove_items(d1, d2): 1105 1106 "Remove from 'd1' all items from 'd2'." 1107 1108 for key in d2.keys(): 1109 if d1.has_key(key): 1110 del d1[key] 1111 1112 # Set utilities. 1113 1114 def first(s): 1115 return list(s)[0] 1116 1117 def same(s1, s2): 1118 return set(s1) == set(s2) 1119 1120 def order_dependencies(all_depends): 1121 1122 """ 1123 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1124 has the form "A depends on B, C...". The result will order A, B, C, and so 1125 on. 1126 """ 1127 1128 usage = init_reverse_dependencies(all_depends) 1129 1130 # Produce an ordering by obtaining exposed items (required by items already 1131 # processed) and putting them at the start of the list. 1132 1133 ordered = [] 1134 1135 while usage: 1136 have_next = False 1137 1138 for key, n in usage.items(): 1139 1140 # Add items needed by no other items to the ordering. 1141 1142 if not n: 1143 remove_dependency(key, all_depends, usage, ordered) 1144 have_next = True 1145 1146 if not have_next: 1147 raise ValueError, usage 1148 1149 return ordered 1150 1151 def order_dependencies_partial(all_depends): 1152 1153 """ 1154 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1155 has the form "A depends on B, C...". The result will order A, B, C, and so 1156 on. Where cycles exist, they will be broken and a partial ordering returned. 1157 """ 1158 1159 usage = init_reverse_dependencies(all_depends) 1160 1161 # Duplicate the dependencies for subsequent modification. 1162 1163 new_depends = {} 1164 for key, values in all_depends.items(): 1165 new_depends[key] = set(values) 1166 1167 all_depends = new_depends 1168 1169 # Produce an ordering by obtaining exposed items (required by items already 1170 # processed) and putting them at the start of the list. 1171 1172 ordered = [] 1173 1174 while usage: 1175 least = None 1176 least_key = None 1177 1178 for key, n in usage.items(): 1179 1180 # Add items needed by no other items to the ordering. 1181 1182 if not n: 1183 remove_dependency(key, all_depends, usage, ordered) 1184 least = 0 1185 1186 # When breaking cycles, note the least used items. 1187 1188 elif least is None or len(n) < least: 1189 least_key = key 1190 least = len(n) 1191 1192 if least: 1193 transfer_dependencies(least_key, all_depends, usage, ordered) 1194 1195 return ordered 1196 1197 def init_reverse_dependencies(all_depends): 1198 1199 """ 1200 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1201 record the reverse dependencies, making a mapping of the form 1202 "B is needed by A", "C is needed by A", and so on. 1203 """ 1204 1205 usage = {} 1206 1207 # Record path-based dependencies. 1208 1209 for key in all_depends.keys(): 1210 usage[key] = set() 1211 1212 for key, depends in all_depends.items(): 1213 for depend in depends: 1214 init_item(usage, depend, set) 1215 usage[depend].add(key) 1216 1217 return usage 1218 1219 def transfer_dependencies(key, all_depends, usage, ordered): 1220 1221 """ 1222 Transfer items needed by 'key' to those items needing 'key', found using 1223 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1224 collection of dependencies. 1225 1226 If "A is needed by X" and "B is needed by A", then transferring items needed 1227 by A will cause "B is needed by X" to be recorded as a consequence. 1228 1229 Transferring items also needs to occur in the reverse mapping, so that 1230 "A needs B" and "X needs A", then the consequence must be recorded as 1231 "X needs B". 1232 """ 1233 1234 ordered.insert(0, key) 1235 1236 needing = usage[key] # A is needed by X 1237 needed = all_depends.get(key) # A needs B 1238 1239 if needing: 1240 for depend in needing: 1241 l = all_depends.get(depend) 1242 if not l: 1243 continue 1244 1245 l.remove(key) # X needs (A) 1246 1247 if needed: 1248 l.update(needed) # X needs B... 1249 1250 # Prevent self references. 1251 1252 if depend in needed: 1253 l.remove(depend) 1254 1255 if needed: 1256 for depend in needed: 1257 l = usage.get(depend) 1258 if not l: 1259 continue 1260 1261 l.remove(key) # B is needed by (A) 1262 l.update(needing) # B is needed by X... 1263 1264 # Prevent self references. 1265 1266 if depend in needing: 1267 l.remove(depend) 1268 1269 if needed: 1270 del all_depends[key] 1271 del usage[key] 1272 1273 def remove_dependency(key, all_depends, usage, ordered): 1274 1275 """ 1276 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1277 'ordered' collection of dependencies. 1278 1279 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1280 upon removing A from 'usage', the outcome is that all keys needing A will 1281 have A removed from their 'usage' records. 1282 1283 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1284 as a consequence. 1285 """ 1286 1287 ordered.insert(0, key) 1288 1289 depends = all_depends.get(key) 1290 1291 # Reduce usage of the referenced items. 1292 1293 if depends: 1294 for depend in depends: 1295 usage[depend].remove(key) 1296 1297 del usage[key] 1298 1299 # General input/output. 1300 1301 def readfile(filename): 1302 1303 "Return the contents of 'filename'." 1304 1305 f = open(filename) 1306 try: 1307 return f.read() 1308 finally: 1309 f.close() 1310 1311 def writefile(filename, s): 1312 1313 "Write to 'filename' the string 's'." 1314 1315 f = open(filename, "w") 1316 try: 1317 f.write(s) 1318 finally: 1319 f.close() 1320 1321 # General encoding. 1322 1323 def sorted_output(x): 1324 1325 "Sort sequence 'x' and return a string with commas separating the values." 1326 1327 x = map(str, x) 1328 x.sort() 1329 return ", ".join(x) 1330 1331 def get_string_details(literals, encoding): 1332 1333 """ 1334 Determine whether 'literals' represent Unicode strings or byte strings, 1335 using 'encoding' to reproduce byte sequences. 1336 1337 Each literal is the full program representation including prefix and quotes 1338 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1339 string needs to be translated back to its original encoding. 1340 1341 Return a single encoded literal value, a type name, and the original 1342 encoding as a tuple. 1343 """ 1344 1345 typename = "unicode" 1346 1347 l = [] 1348 1349 for s in literals: 1350 out, _typename = get_literal_details(s) 1351 if _typename == "str": 1352 typename = "str" 1353 l.append(out) 1354 1355 out = "".join(l) 1356 1357 # For Unicode values, convert to the UTF-8 program representation. 1358 1359 if typename == "unicode": 1360 return out.encode("utf-8"), typename, encoding 1361 1362 # For byte string values, convert back to the original encoding. 1363 1364 else: 1365 return out.encode(encoding), typename, encoding 1366 1367 def get_literal_details(s): 1368 1369 """ 1370 Determine whether 's' represents a Unicode string or a byte string, where 1371 's' contains the full program representation of a literal including prefix 1372 and quotes, recoded by the parser to UTF-8. 1373 1374 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1375 and byte or Unicode values starting with <backslash><octal digit> or 1376 <backslash>x. 1377 1378 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1379 to be considered as Unicode values. Otherwise, they produce byte values and 1380 cause unprefixed strings to be considered as byte strings. 1381 1382 Literals prefixed with "r" do not have their backslash-encoded values 1383 converted unless also prefixed with "u", in which case only the above value 1384 formats are converted, not any of the other special sequences for things 1385 like newlines. 1386 1387 Return the literal value as a Unicode object together with the appropriate 1388 type name in a tuple. 1389 """ 1390 1391 l = [] 1392 1393 # Identify the quote character and use it to identify the prefix. 1394 1395 quote_type = s[-1] 1396 prefix_end = s.find(quote_type) 1397 prefix = s[:prefix_end].lower() 1398 1399 if prefix not in ("", "b", "br", "r", "u", "ur"): 1400 raise ValueError, "String literal does not have a supported prefix: %s" % s 1401 1402 if "b" in prefix: 1403 typename = "str" 1404 else: 1405 typename = "unicode" 1406 1407 # Identify triple quotes or single quotes. 1408 1409 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1410 quote = s[prefix_end:prefix_end+3] 1411 current = prefix_end + 3 1412 end = len(s) - 3 1413 else: 1414 quote = s[prefix_end] 1415 current = prefix_end + 1 1416 end = len(s) - 1 1417 1418 # Conversions of some quoted values. 1419 1420 searches = { 1421 "u" : (6, 16), 1422 "U" : (10, 16), 1423 "x" : (4, 16), 1424 } 1425 1426 octal_digits = map(str, range(0, 8)) 1427 1428 # Translations of some quoted values. 1429 1430 escaped = { 1431 "\\" : "\\", "'" : "'", '"' : '"', 1432 "a" : "\a", "b" : "\b", "f" : "\f", 1433 "n" : "\n", "r" : "\r", "t" : "\t", 1434 } 1435 1436 while current < end: 1437 1438 # Look for quoted values. 1439 1440 index = s.find("\\", current) 1441 if index == -1 or index + 1 == end: 1442 l.append(s[current:end]) 1443 break 1444 1445 # Add the preceding text. 1446 1447 l.append(s[current:index]) 1448 1449 # Handle quoted text. 1450 1451 term = s[index+1] 1452 1453 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1454 # produce Unicode values. 1455 1456 if typename == "unicode" and ( 1457 term in ("u", "U") or 1458 "u" in prefix and (term == "x" or term in octal_digits)): 1459 1460 needed, base = searches.get(term, (4, 8)) 1461 value = convert_quoted_value(s, index, needed, end, base, unichr) 1462 l.append(value) 1463 current = index + needed 1464 1465 # Add raw byte values, changing the string type. 1466 1467 elif "r" not in prefix and ( 1468 term == "x" or term in octal_digits): 1469 1470 needed, base = searches.get(term, (4, 8)) 1471 value = convert_quoted_value(s, index, needed, end, base, chr) 1472 l.append(value) 1473 typename = "str" 1474 current = index + needed 1475 1476 # Add other escaped values. 1477 1478 elif "r" not in prefix and escaped.has_key(term): 1479 l.append(escaped[term]) 1480 current = index + 2 1481 1482 # Add other text as found. 1483 1484 else: 1485 l.append(s[index:index+2]) 1486 current = index + 2 1487 1488 # Collect the components into a single Unicode object. Since the literal 1489 # text was already in UTF-8 form, interpret plain strings as UTF-8 1490 # sequences. 1491 1492 out = [] 1493 1494 for value in l: 1495 if isinstance(value, unicode): 1496 out.append(value) 1497 else: 1498 out.append(unicode(value, "utf-8")) 1499 1500 return "".join(out), typename 1501 1502 def convert_quoted_value(s, index, needed, end, base, fn): 1503 1504 """ 1505 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1506 positions, and with the given 'end' indicating the first position after the 1507 end of the actual string content. 1508 1509 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1510 to convert the value to an appropriate type. 1511 """ 1512 1513 s = s[index:min(index+needed, end)] 1514 1515 # Not a complete occurrence. 1516 1517 if len(s) < needed: 1518 return s 1519 1520 # Test for a well-formed value. 1521 1522 try: 1523 first = base == 8 and 1 or 2 1524 value = int(s[first:needed], base) 1525 except ValueError: 1526 return s 1527 else: 1528 return fn(value) 1529 1530 # Attribute chain decoding. 1531 1532 def get_attrnames(attrnames): 1533 1534 """ 1535 Split the qualified attribute chain 'attrnames' into its components, 1536 handling special attributes starting with "#" that indicate type 1537 conformance. 1538 """ 1539 1540 if attrnames.startswith("#"): 1541 return [attrnames] 1542 else: 1543 return attrnames.split(".") 1544 1545 def get_name_path(path, name): 1546 1547 "Return a suitable qualified name from the given 'path' and 'name'." 1548 1549 if "." in name: 1550 return name 1551 else: 1552 return "%s.%s" % (path, name) 1553 1554 # Usage-related functions. 1555 1556 def get_types_for_usage(attrnames, objects): 1557 1558 """ 1559 Identify the types that can support the given 'attrnames', using the 1560 given 'objects' as the catalogue of type details. 1561 """ 1562 1563 types = [] 1564 for name, _attrnames in objects.items(): 1565 if set(attrnames).issubset(_attrnames): 1566 types.append(name) 1567 return types 1568 1569 def get_invoked_attributes(usage): 1570 1571 "Obtain invoked attribute from the given 'usage'." 1572 1573 invoked = [] 1574 if usage: 1575 for attrname, invocation, assignment in usage: 1576 if invocation: 1577 invoked.append(attrname) 1578 return invoked 1579 1580 def get_assigned_attributes(usage): 1581 1582 "Obtain assigned attribute from the given 'usage'." 1583 1584 assigned = [] 1585 if usage: 1586 for attrname, invocation, assignment in usage: 1587 if assignment: 1588 assigned.append(attrname) 1589 return assigned 1590 1591 # Type and module functions. 1592 # NOTE: This makes assumptions about the __builtins__ structure. 1593 1594 def get_builtin_module(name): 1595 1596 "Return the module name containing the given type 'name'." 1597 1598 if name == "string": 1599 modname = "str" 1600 elif name == "utf8string": 1601 modname = "unicode" 1602 elif name == "NoneType": 1603 modname = "none" 1604 else: 1605 modname = name 1606 1607 return "__builtins__.%s" % modname 1608 1609 def get_builtin_type(name): 1610 1611 "Return the type name provided by the given Python value 'name'." 1612 1613 if name == "str": 1614 return "string" 1615 elif name == "unicode": 1616 return "utf8string" 1617 else: 1618 return name 1619 1620 def get_builtin_class(name): 1621 1622 "Return the full name of the built-in class having the given 'name'." 1623 1624 typename = get_builtin_type(name) 1625 module = get_builtin_module(typename) 1626 return "%s.%s" % (module, typename) 1627 1628 # Useful data. 1629 1630 predefined_constants = "False", "None", "NotImplemented", "True" 1631 1632 privileged_attributes = [ 1633 "__get_single_item_unchecked__", 1634 ] 1635 1636 unary_operator_functions = { 1637 1638 # Unary operations. 1639 1640 "Invert" : "invert", 1641 "UnaryAdd" : "pos", 1642 "UnarySub" : "neg", 1643 } 1644 1645 operator_functions = { 1646 1647 # Fundamental operations. 1648 1649 "is" : "is_", 1650 "is not" : "is_not", 1651 1652 # Binary operations. 1653 1654 "in" : "in_", 1655 "not in" : "not_in", 1656 "Add" : "add", 1657 "Bitand" : "and_", 1658 "Bitor" : "or_", 1659 "Bitxor" : "xor", 1660 "Div" : "div", 1661 "FloorDiv" : "floordiv", 1662 "LeftShift" : "lshift", 1663 "Mod" : "mod", 1664 "Mul" : "mul", 1665 "Power" : "pow", 1666 "RightShift" : "rshift", 1667 "Sub" : "sub", 1668 1669 # Augmented assignment. 1670 1671 "+=" : "iadd", 1672 "-=" : "isub", 1673 "*=" : "imul", 1674 "/=" : "idiv", 1675 "//=" : "ifloordiv", 1676 "%=" : "imod", 1677 "**=" : "ipow", 1678 "<<=" : "ilshift", 1679 ">>=" : "irshift", 1680 "&=" : "iand", 1681 "^=" : "ixor", 1682 "|=" : "ior", 1683 1684 # Comparisons. 1685 1686 "==" : "eq", 1687 "!=" : "ne", 1688 "<" : "lt", 1689 "<=" : "le", 1690 ">=" : "ge", 1691 ">" : "gt", 1692 } 1693 1694 operator_functions.update(unary_operator_functions) 1695 1696 # vim: tabstop=4 expandtab shiftwidth=4