1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 def get_parent_path(self): 190 return ".".join([self.name] + self.namespace_path[:-1]) 191 192 # Namespace management. 193 194 def enter_namespace(self, name): 195 196 "Enter the namespace having the given 'name'." 197 198 self.namespace_path.append(name) 199 200 def exit_namespace(self): 201 202 "Exit the current namespace." 203 204 self.namespace_path.pop() 205 206 # Constant reference naming. 207 208 def get_constant_name(self, value, value_type, encoding=None): 209 210 """ 211 Add a new constant to the current namespace for 'value' with 212 'value_type'. 213 """ 214 215 path = self.get_namespace_path() 216 init_item(self.constants, path, dict) 217 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 218 219 # Literal reference naming. 220 221 def get_literal_name(self): 222 223 "Add a new literal to the current namespace." 224 225 path = self.get_namespace_path() 226 init_item(self.literals, path, lambda: 0) 227 return "$C%d" % self.literals[path] 228 229 def next_literal(self): 230 self.literals[self.get_namespace_path()] += 1 231 232 # Temporary variable naming. 233 234 def get_temporary_name(self): 235 path = self.get_namespace_path() 236 init_item(self.temp, path, lambda: 0) 237 return "$t%d" % self.temp[path] 238 239 def next_temporary(self): 240 self.temp[self.get_namespace_path()] += 1 241 242 # Arbitrary function naming. 243 244 def get_lambda_name(self): 245 path = self.get_namespace_path() 246 init_item(self.lambdas, path, lambda: 0) 247 name = "$l%d" % self.lambdas[path] 248 self.lambdas[path] += 1 249 return name 250 251 def reset_lambdas(self): 252 self.lambdas = {} 253 254 # Constant and literal recording. 255 256 def get_constant_value(self, value, literals=None): 257 258 """ 259 Encode the 'value' if appropriate, returning a value, a typename and any 260 encoding. 261 """ 262 263 if isinstance(value, unicode): 264 return value.encode("utf-8"), "unicode", self.encoding 265 266 # Attempt to convert plain strings to text. 267 268 elif isinstance(value, str) and self.encoding: 269 try: 270 return get_string_details(literals, self.encoding) 271 except UnicodeDecodeError: 272 pass 273 274 return value, value.__class__.__name__, None 275 276 def get_constant_reference(self, ref, value, encoding=None): 277 278 """ 279 Return a constant reference for the given 'ref' type and 'value', with 280 the optional 'encoding' applying to text values. 281 """ 282 283 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 284 285 # Return a reference for the constant. 286 287 objpath = self.get_object_path(constant_name) 288 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 289 290 # Record the value and type for the constant. 291 292 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 293 return name_ref 294 295 def reserve_constant(self, objpath, value, origin, encoding=None): 296 297 """ 298 Reserve a constant within 'objpath' with the given 'value' and having a 299 type with the given 'origin', with the optional 'encoding' applying to 300 text values. 301 """ 302 303 constant_name = self.get_constant_name(value, origin) 304 objpath = self.get_object_path(constant_name) 305 self._reserve_constant(objpath, value, origin, encoding) 306 307 def _reserve_constant(self, objpath, value, origin, encoding): 308 309 """ 310 Store a constant for 'objpath' with the given 'value' and 'origin', with 311 the optional 'encoding' applying to text values. 312 """ 313 314 self.constant_values[objpath] = value, origin, encoding 315 316 def get_literal_reference(self, name, ref, items, cls): 317 318 """ 319 Return a literal reference for the given type 'name', literal 'ref', 320 node 'items' and employing the given 'cls' as the class of the returned 321 reference object. 322 """ 323 324 # Construct an invocation using the items as arguments. 325 326 typename = "$L%s" % name 327 328 invocation = compiler.ast.CallFunc( 329 compiler.ast.Name(typename), 330 items 331 ) 332 333 # Get a name for the actual literal. 334 335 instname = self.get_literal_name() 336 self.next_literal() 337 338 # Record the type for the literal. 339 340 objpath = self.get_object_path(instname) 341 self.literal_types[objpath] = ref.get_origin() 342 343 # Return a wrapper for the invocation exposing the items. 344 345 return cls( 346 instname, 347 ref.instance_of(), 348 self.process_structure_node(invocation), 349 invocation.args 350 ) 351 352 # Node handling. 353 354 def process_structure(self, node): 355 356 """ 357 Within the given 'node', process the program structure. 358 359 During inspection, this will process global declarations, adjusting the 360 module namespace, and import statements, building a module dependency 361 hierarchy. 362 363 During translation, this will consult deduced program information and 364 output translated code. 365 """ 366 367 l = [] 368 for n in node.getChildNodes(): 369 l.append(self.process_structure_node(n)) 370 return l 371 372 def process_augassign_node(self, n): 373 374 "Process the given augmented assignment node 'n'." 375 376 op = operator_functions[n.op] 377 378 if isinstance(n.node, compiler.ast.Getattr): 379 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 380 elif isinstance(n.node, compiler.ast.Name): 381 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 382 else: 383 target = n.node 384 385 assignment = compiler.ast.Assign( 386 [target], 387 compiler.ast.CallFunc( 388 compiler.ast.Name("$op%s" % op), 389 [n.node, n.expr])) 390 391 return self.process_structure_node(assignment) 392 393 def process_assignment_for_object(self, original_name, source): 394 395 """ 396 Return an assignment operation making 'original_name' refer to the given 397 'source'. 398 """ 399 400 assignment = compiler.ast.Assign( 401 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 402 source 403 ) 404 405 return self.process_structure_node(assignment) 406 407 def process_assignment_node_items(self, n, expr): 408 409 """ 410 Process the given assignment node 'n' whose children are to be assigned 411 items of 'expr'. 412 """ 413 414 name_ref = self.process_structure_node(expr) 415 416 # Either unpack the items and present them directly to each assignment 417 # node. 418 419 if isinstance(name_ref, LiteralSequenceRef) and \ 420 self.process_literal_sequence_items(n, name_ref): 421 422 pass 423 424 # Or have the assignment nodes access each item via the sequence API. 425 426 else: 427 self.process_assignment_node_items_by_position(n, expr, name_ref) 428 429 def process_assignment_node_items_by_position(self, n, expr, name_ref): 430 431 """ 432 Process the given sequence assignment node 'n', converting the node to 433 the separate assignment of each target using positional access on a 434 temporary variable representing the sequence. Use 'expr' as the assigned 435 value and 'name_ref' as the reference providing any existing temporary 436 variable. 437 """ 438 439 assignments = [] 440 441 # Employ existing names to access the sequence. 442 # Literal sequences do not provide names of accessible objects. 443 444 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 445 temp = name_ref.name 446 447 # For other expressions, create a temporary name to reference the items. 448 449 else: 450 temp = self.get_temporary_name() 451 self.next_temporary() 452 453 assignments.append( 454 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 455 ) 456 457 # Assign the items to the target nodes. 458 459 for i, node in enumerate(n.nodes): 460 assignments.append( 461 compiler.ast.Assign([node], compiler.ast.Subscript( 462 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 463 ) 464 465 return self.process_structure_node(compiler.ast.Stmt(assignments)) 466 467 def process_literal_sequence_items(self, n, name_ref): 468 469 """ 470 Process the given assignment node 'n', obtaining from the given 471 'name_ref' the items to be assigned to the assignment targets. 472 473 Return whether this method was able to process the assignment node as 474 a sequence of direct assignments. 475 """ 476 477 if len(n.nodes) == len(name_ref.items): 478 assigned_names, count = get_names_from_nodes(n.nodes) 479 accessed_names, _count = get_names_from_nodes(name_ref.items) 480 481 # Only assign directly between items if all assigned names are 482 # plain names (not attribute assignments), and if the assigned names 483 # do not appear in the accessed names. 484 485 if len(assigned_names) == count and \ 486 not assigned_names.intersection(accessed_names): 487 488 for node, item in zip(n.nodes, name_ref.items): 489 self.process_assignment_node(node, item) 490 491 return True 492 493 # Otherwise, use the position-based mechanism to obtain values. 494 495 else: 496 return False 497 else: 498 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 499 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 500 501 def process_compare_node(self, n): 502 503 """ 504 Process the given comparison node 'n', converting an operator sequence 505 from... 506 507 <expr1> <op1> <expr2> <op2> <expr3> 508 509 ...to... 510 511 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 512 """ 513 514 invocations = [] 515 last = n.expr 516 517 for op, op_node in n.ops: 518 op = operator_functions.get(op) 519 520 invocations.append(compiler.ast.CallFunc( 521 compiler.ast.Name("$op%s" % op), 522 [last, op_node])) 523 524 last = op_node 525 526 if len(invocations) > 1: 527 result = compiler.ast.And(invocations) 528 else: 529 result = invocations[0] 530 531 return self.process_structure_node(result) 532 533 def process_dict_node(self, node): 534 535 """ 536 Process the given dictionary 'node', returning a list of (key, value) 537 tuples. 538 """ 539 540 l = [] 541 for key, value in node.items: 542 l.append(( 543 self.process_structure_node(key), 544 self.process_structure_node(value))) 545 return l 546 547 def process_for_node(self, n): 548 549 """ 550 Generate attribute accesses for {n.list}.__iter__ and the next method on 551 the iterator, producing a replacement node for the original. 552 """ 553 554 i0 = self.get_temporary_name() 555 self.next_temporary() 556 557 node = compiler.ast.Stmt([ 558 559 # <next> = {n.list}.__iter__().next 560 561 compiler.ast.Assign( 562 [compiler.ast.AssName(i0, "OP_ASSIGN")], 563 compiler.ast.Getattr( 564 compiler.ast.CallFunc( 565 compiler.ast.Getattr(n.list, "__iter__"), 566 [] 567 ), "next")), 568 569 # try: 570 # while True: 571 # <var>... = <next>() 572 # ... 573 # except StopIteration: 574 # pass 575 576 compiler.ast.TryExcept( 577 compiler.ast.While( 578 compiler.ast.Name("True"), 579 compiler.ast.Stmt([ 580 compiler.ast.Assign( 581 [n.assign], 582 compiler.ast.CallFunc( 583 compiler.ast.Name(i0), 584 [] 585 )), 586 n.body]), 587 None), 588 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 589 None) 590 ]) 591 592 self.process_structure_node(node) 593 594 def process_literal_sequence_node(self, n, name, ref, cls): 595 596 """ 597 Process the given literal sequence node 'n' as a function invocation, 598 with 'name' indicating the type of the sequence, and 'ref' being a 599 reference to the type. The 'cls' is used to instantiate a suitable name 600 reference. 601 """ 602 603 if name == "dict": 604 items = [] 605 for key, value in n.items: 606 items.append(compiler.ast.Tuple([key, value])) 607 else: # name in ("list", "tuple"): 608 items = n.nodes 609 610 return self.get_literal_reference(name, ref, items, cls) 611 612 def process_operator_node(self, n): 613 614 """ 615 Process the given operator node 'n' as an operator function invocation. 616 """ 617 618 op = operator_functions[n.__class__.__name__] 619 invocation = compiler.ast.CallFunc( 620 compiler.ast.Name("$op%s" % op), 621 list(n.getChildNodes()) 622 ) 623 return self.process_structure_node(invocation) 624 625 def process_print_node(self, n): 626 627 """ 628 Process the given print node 'n' as an invocation on a stream of the 629 form... 630 631 $print(dest, args, nl) 632 633 The special function name will be translated elsewhere. 634 """ 635 636 nl = isinstance(n, compiler.ast.Printnl) 637 invocation = compiler.ast.CallFunc( 638 compiler.ast.Name("$print"), 639 [n.dest or compiler.ast.Name("None"), 640 compiler.ast.List(list(n.nodes)), 641 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 642 ) 643 return self.process_structure_node(invocation) 644 645 def process_slice_node(self, n, expr=None): 646 647 """ 648 Process the given slice node 'n' as an operator function invocation. 649 """ 650 651 if n.flags == "OP_ASSIGN": op = "setslice" 652 elif n.flags == "OP_DELETE": op = "delslice" 653 else: op = "getslice" 654 655 invocation = compiler.ast.CallFunc( 656 compiler.ast.Name("$op%s" % op), 657 [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 658 (expr and [expr] or []) 659 ) 660 661 # Fix parse tree structure. 662 663 if op == "delslice": 664 invocation = compiler.ast.Discard(invocation) 665 666 return self.process_structure_node(invocation) 667 668 def process_sliceobj_node(self, n): 669 670 """ 671 Process the given slice object node 'n' as a slice constructor. 672 """ 673 674 op = "slice" 675 invocation = compiler.ast.CallFunc( 676 compiler.ast.Name("$op%s" % op), 677 n.nodes 678 ) 679 return self.process_structure_node(invocation) 680 681 def process_subscript_node(self, n, expr=None): 682 683 """ 684 Process the given subscript node 'n' as an operator function invocation. 685 """ 686 687 if n.flags == "OP_ASSIGN": op = "setitem" 688 elif n.flags == "OP_DELETE": op = "delitem" 689 else: op = "getitem" 690 691 invocation = compiler.ast.CallFunc( 692 compiler.ast.Name("$op%s" % op), 693 [n.expr] + list(n.subs) + (expr and [expr] or []) 694 ) 695 696 # Fix parse tree structure. 697 698 if op == "delitem": 699 invocation = compiler.ast.Discard(invocation) 700 701 return self.process_structure_node(invocation) 702 703 def process_attribute_chain(self, n): 704 705 """ 706 Process the given attribute access node 'n'. Return a reference 707 describing the expression. 708 """ 709 710 # AssAttr/Getattr are nested with the outermost access being the last 711 # access in any chain. 712 713 self.attrs.insert(0, n.attrname) 714 attrs = self.attrs 715 716 # Break attribute chains where non-access nodes are found. 717 718 if not self.have_access_expression(n): 719 self.reset_attribute_chain() 720 721 # Descend into the expression, extending backwards any existing chain, 722 # or building another for the expression. 723 724 name_ref = self.process_structure_node(n.expr) 725 726 # Restore chain information applying to this node. 727 728 if not self.have_access_expression(n): 729 self.restore_attribute_chain(attrs) 730 731 # Return immediately if the expression was another access and thus a 732 # continuation backwards along the chain. The above processing will 733 # have followed the chain all the way to its conclusion. 734 735 if self.have_access_expression(n): 736 del self.attrs[0] 737 738 return name_ref 739 740 # Attribute chain handling. 741 742 def reset_attribute_chain(self): 743 744 "Reset the attribute chain for a subexpression of an attribute access." 745 746 self.attrs = [] 747 self.chain_assignment.append(self.in_assignment) 748 self.chain_invocation.append(self.in_invocation) 749 self.in_assignment = None 750 self.in_invocation = None 751 752 def restore_attribute_chain(self, attrs): 753 754 "Restore the attribute chain for an attribute access." 755 756 self.attrs = attrs 757 self.in_assignment = self.chain_assignment.pop() 758 self.in_invocation = self.chain_invocation.pop() 759 760 def have_access_expression(self, node): 761 762 "Return whether the expression associated with 'node' is Getattr." 763 764 return isinstance(node.expr, compiler.ast.Getattr) 765 766 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 767 768 """ 769 Return the name to be used for attribute usage observations involving 770 the given 'name' in the current namespace. 771 772 If the name is being used outside a function, and if 'name_ref' is 773 given and indicates a global or if 'is_global' is specified as a true 774 value, a path featuring the name in the global namespace is returned. 775 Otherwise, a path computed using the current namespace and the given 776 name is returned. 777 778 The intention of this method is to provide a suitably-qualified name 779 that can be tracked across namespaces. Where globals are being 780 referenced in class namespaces, they should be referenced using their 781 path within the module, not using a path within each class. 782 783 It may not be possible to identify a global within a function at the 784 time of inspection (since a global may appear later in a file). 785 Consequently, globals are identified by their local name rather than 786 their module-qualified path. 787 """ 788 789 # For functions, use the appropriate local names. 790 791 if self.in_function: 792 return name 793 794 # For global names outside functions, use a global name. 795 796 elif is_global or name_ref and name_ref.is_global_name(): 797 return self.get_global_path(name) 798 799 # Otherwise, establish a name in the current namespace. 800 801 else: 802 return self.get_object_path(name) 803 804 def get_path_for_access(self): 805 806 "Outside functions, register accesses at the module level." 807 808 if not self.in_function: 809 return self.name 810 else: 811 return self.get_namespace_path() 812 813 def get_module_name(self, node): 814 815 """ 816 Using the given From 'node' in this module, calculate any relative import 817 information, returning a tuple containing a module to import along with any 818 names to import based on the node's name information. 819 820 Where the returned module is given as None, whole module imports should 821 be performed for the returned modules using the returned names. 822 """ 823 824 # Absolute import. 825 826 if node.level == 0: 827 return node.modname, node.names 828 829 # Relative to an ancestor of this module. 830 831 else: 832 path = self.name.split(".") 833 level = node.level 834 835 # Relative imports treat package roots as submodules. 836 837 if split(self.filename)[-1] == "__init__.py": 838 level -= 1 839 840 if level > len(path): 841 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 842 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 843 844 basename = ".".join(path[:len(path)-level]) 845 846 # Name imports from a module. 847 848 if node.modname: 849 return "%s.%s" % (basename, node.modname), node.names 850 851 # Relative whole module imports. 852 853 else: 854 return basename, node.names 855 856 def get_argnames(args): 857 858 """ 859 Return a list of all names provided by 'args'. Since tuples may be 860 employed, the arguments are traversed depth-first. 861 """ 862 863 l = [] 864 for arg in args: 865 if isinstance(arg, tuple): 866 l += get_argnames(arg) 867 else: 868 l.append(arg) 869 return l 870 871 def get_names_from_nodes(nodes): 872 873 """ 874 Return the names employed in the given 'nodes' along with the number of 875 nodes excluding sequences. 876 """ 877 878 names = set() 879 count = 0 880 881 for node in nodes: 882 883 # Add names and count them. 884 885 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 886 names.add(node.name) 887 count += 1 888 889 # Add names from sequences and incorporate their counts. 890 891 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 892 compiler.ast.List, compiler.ast.Set, 893 compiler.ast.Tuple)): 894 _names, _count = get_names_from_nodes(node.nodes) 895 names.update(_names) 896 count += _count 897 898 # Count non-name, non-sequence nodes. 899 900 else: 901 count += 1 902 903 return names, count 904 905 # Result classes. 906 907 class InstructionSequence: 908 909 "A generic sequence of instructions." 910 911 def __init__(self, instructions): 912 self.instructions = instructions 913 914 def get_value_instruction(self): 915 return self.instructions[-1] 916 917 def get_init_instructions(self): 918 return self.instructions[:-1] 919 920 # Dictionary utilities. 921 922 def init_item(d, key, fn): 923 924 """ 925 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 926 value where no entry already exists. 927 """ 928 929 if not d.has_key(key): 930 d[key] = fn() 931 return d[key] 932 933 def dict_for_keys(d, keys): 934 935 "Return a new dictionary containing entries from 'd' for the given 'keys'." 936 937 nd = {} 938 for key in keys: 939 if d.has_key(key): 940 nd[key] = d[key] 941 return nd 942 943 def make_key(s): 944 945 "Make sequence 's' into a tuple-based key, first sorting its contents." 946 947 l = list(s) 948 l.sort() 949 return tuple(l) 950 951 def add_counter_item(d, key): 952 953 """ 954 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 955 maintaining a mapping of keys to their order of insertion. 956 """ 957 958 if not d.has_key(key): 959 d[key] = len(d.keys()) 960 return d[key] 961 962 def remove_items(d1, d2): 963 964 "Remove from 'd1' all items from 'd2'." 965 966 for key in d2.keys(): 967 if d1.has_key(key): 968 del d1[key] 969 970 # Set utilities. 971 972 def first(s): 973 return list(s)[0] 974 975 def same(s1, s2): 976 return set(s1) == set(s2) 977 978 # General input/output. 979 980 def readfile(filename): 981 982 "Return the contents of 'filename'." 983 984 f = open(filename) 985 try: 986 return f.read() 987 finally: 988 f.close() 989 990 def writefile(filename, s): 991 992 "Write to 'filename' the string 's'." 993 994 f = open(filename, "w") 995 try: 996 f.write(s) 997 finally: 998 f.close() 999 1000 # General encoding. 1001 1002 def sorted_output(x): 1003 1004 "Sort sequence 'x' and return a string with commas separating the values." 1005 1006 x = map(str, x) 1007 x.sort() 1008 return ", ".join(x) 1009 1010 def get_string_details(literals, encoding): 1011 1012 """ 1013 Determine whether 'literals' represent Unicode strings or byte strings, 1014 using 'encoding' to reproduce byte sequences. 1015 1016 Each literal is the full program representation including prefix and quotes 1017 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1018 string needs to be translated back to its original encoding. 1019 1020 Return a single encoded literal value, a type name, and the original 1021 encoding as a tuple. 1022 """ 1023 1024 typename = "unicode" 1025 1026 l = [] 1027 1028 for s in literals: 1029 out, _typename = get_literal_details(s) 1030 if _typename == "str": 1031 typename = "str" 1032 l.append(out) 1033 1034 out = "".join(l) 1035 1036 # For Unicode values, convert to the UTF-8 program representation. 1037 1038 if typename == "unicode": 1039 return out.encode("utf-8"), typename, encoding 1040 1041 # For byte string values, convert back to the original encoding. 1042 1043 else: 1044 return out.encode(encoding), typename, encoding 1045 1046 def get_literal_details(s): 1047 1048 """ 1049 Determine whether 's' represents a Unicode string or a byte string, where 1050 's' contains the full program representation of a literal including prefix 1051 and quotes, recoded by the parser to UTF-8. 1052 1053 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1054 and byte or Unicode values starting with <backslash><octal digit> or 1055 <backslash>x. 1056 1057 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1058 to be considered as Unicode values. Otherwise, they produce byte values and 1059 cause unprefixed strings to be considered as byte strings. 1060 1061 Literals prefixed with "r" do not have their backslash-encoded values 1062 converted unless also prefixed with "u", in which case only the above value 1063 formats are converted, not any of the other special sequences for things 1064 like newlines. 1065 1066 Return the literal value as a Unicode object together with the appropriate 1067 type name in a tuple. 1068 """ 1069 1070 l = [] 1071 1072 # Identify the quote character and use it to identify the prefix. 1073 1074 quote_type = s[-1] 1075 prefix_end = s.find(quote_type) 1076 prefix = s[:prefix_end].lower() 1077 1078 if prefix not in ("", "b", "br", "r", "u", "ur"): 1079 raise ValueError, "String literal does not have a supported prefix: %s" % s 1080 1081 if "b" in prefix: 1082 typename = "str" 1083 else: 1084 typename = "unicode" 1085 1086 # Identify triple quotes or single quotes. 1087 1088 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1089 quote = s[prefix_end:prefix_end+3] 1090 current = prefix_end + 3 1091 end = len(s) - 3 1092 else: 1093 quote = s[prefix_end] 1094 current = prefix_end + 1 1095 end = len(s) - 1 1096 1097 # Conversions of some quoted values. 1098 1099 searches = { 1100 "u" : (6, 16), 1101 "U" : (10, 16), 1102 "x" : (4, 16), 1103 } 1104 1105 octal_digits = map(str, range(0, 8)) 1106 1107 # Translations of some quoted values. 1108 1109 escaped = { 1110 "\\" : "\\", "'" : "'", '"' : '"', 1111 "a" : "\a", "b" : "\b", "f" : "\f", 1112 "n" : "\n", "r" : "\r", "t" : "\t", 1113 } 1114 1115 while current < end: 1116 1117 # Look for quoted values. 1118 1119 index = s.find("\\", current) 1120 if index == -1 or index + 1 == end: 1121 l.append(s[current:end]) 1122 break 1123 1124 # Add the preceding text. 1125 1126 l.append(s[current:index]) 1127 1128 # Handle quoted text. 1129 1130 term = s[index+1] 1131 1132 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1133 # produce Unicode values. 1134 1135 if typename == "unicode" and ( 1136 term in ("u", "U") or 1137 "u" in prefix and (term == "x" or term in octal_digits)): 1138 1139 needed, base = searches.get(term, (4, 8)) 1140 value = convert_quoted_value(s, index, needed, end, base, unichr) 1141 l.append(value) 1142 current = index + needed 1143 1144 # Add raw byte values, changing the string type. 1145 1146 elif "r" not in prefix and ( 1147 term == "x" or term in octal_digits): 1148 1149 needed, base = searches.get(term, (4, 8)) 1150 value = convert_quoted_value(s, index, needed, end, base, chr) 1151 l.append(value) 1152 typename = "str" 1153 current = index + needed 1154 1155 # Add other escaped values. 1156 1157 elif "r" not in prefix and escaped.has_key(term): 1158 l.append(escaped[term]) 1159 current = index + 2 1160 1161 # Add other text as found. 1162 1163 else: 1164 l.append(s[index:index+2]) 1165 current = index + 2 1166 1167 # Collect the components into a single Unicode object. Since the literal 1168 # text was already in UTF-8 form, interpret plain strings as UTF-8 1169 # sequences. 1170 1171 out = [] 1172 1173 for value in l: 1174 if isinstance(value, unicode): 1175 out.append(value) 1176 else: 1177 out.append(unicode(value, "utf-8")) 1178 1179 return "".join(out), typename 1180 1181 def convert_quoted_value(s, index, needed, end, base, fn): 1182 1183 """ 1184 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1185 positions, and with the given 'end' indicating the first position after the 1186 end of the actual string content. 1187 1188 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1189 to convert the value to an appropriate type. 1190 """ 1191 1192 s = s[index:min(index+needed, end)] 1193 1194 # Not a complete occurrence. 1195 1196 if len(s) < needed: 1197 return s 1198 1199 # Test for a well-formed value. 1200 1201 try: 1202 first = base == 8 and 1 or 2 1203 value = int(s[first:needed], base) 1204 except ValueError: 1205 return s 1206 else: 1207 return fn(value) 1208 1209 # Attribute chain decoding. 1210 1211 def get_attrnames(attrnames): 1212 1213 """ 1214 Split the qualified attribute chain 'attrnames' into its components, 1215 handling special attributes starting with "#" that indicate type 1216 conformance. 1217 """ 1218 1219 if attrnames.startswith("#"): 1220 return [attrnames] 1221 else: 1222 return attrnames.split(".") 1223 1224 def get_attrname_from_location(location): 1225 1226 """ 1227 Extract the first attribute from the attribute names employed in a 1228 'location'. 1229 """ 1230 1231 path, name, attrnames, access = location 1232 if not attrnames: 1233 return attrnames 1234 return get_attrnames(attrnames)[0] 1235 1236 def get_name_path(path, name): 1237 1238 "Return a suitable qualified name from the given 'path' and 'name'." 1239 1240 if "." in name: 1241 return name 1242 else: 1243 return "%s.%s" % (path, name) 1244 1245 # Usage-related functions. 1246 1247 def get_types_for_usage(attrnames, objects): 1248 1249 """ 1250 Identify the types that can support the given 'attrnames', using the 1251 given 'objects' as the catalogue of type details. 1252 """ 1253 1254 types = [] 1255 for name, _attrnames in objects.items(): 1256 if set(attrnames).issubset(_attrnames): 1257 types.append(name) 1258 return types 1259 1260 def get_invoked_attributes(usage): 1261 1262 "Obtain invoked attribute from the given 'usage'." 1263 1264 invoked = [] 1265 if usage: 1266 for attrname, invocation, assignment in usage: 1267 if invocation: 1268 invoked.append(attrname) 1269 return invoked 1270 1271 def get_assigned_attributes(usage): 1272 1273 "Obtain assigned attribute from the given 'usage'." 1274 1275 assigned = [] 1276 if usage: 1277 for attrname, invocation, assignment in usage: 1278 if assignment: 1279 assigned.append(attrname) 1280 return assigned 1281 1282 # Type and module functions. 1283 # NOTE: This makes assumptions about the __builtins__ structure. 1284 1285 def get_builtin_module(name): 1286 1287 "Return the module name containing the given type 'name'." 1288 1289 if name == "string": 1290 modname = "str" 1291 elif name == "utf8string": 1292 modname = "unicode" 1293 elif name == "NoneType": 1294 modname = "none" 1295 else: 1296 modname = name 1297 1298 return "__builtins__.%s" % modname 1299 1300 def get_builtin_type(name): 1301 1302 "Return the type name provided by the given Python value 'name'." 1303 1304 if name == "str": 1305 return "string" 1306 elif name == "unicode": 1307 return "utf8string" 1308 else: 1309 return name 1310 1311 def get_builtin_class(name): 1312 1313 "Return the full name of the built-in class having the given 'name'." 1314 1315 typename = get_builtin_type(name) 1316 module = get_builtin_module(typename) 1317 return "%s.%s" % (module, typename) 1318 1319 # Useful data. 1320 1321 predefined_constants = "False", "None", "NotImplemented", "True" 1322 1323 operator_functions = { 1324 1325 # Fundamental operations. 1326 1327 "is" : "is_", 1328 "is not" : "is_not", 1329 1330 # Binary operations. 1331 1332 "in" : "in_", 1333 "not in" : "not_in", 1334 "Add" : "add", 1335 "Bitand" : "and_", 1336 "Bitor" : "or_", 1337 "Bitxor" : "xor", 1338 "Div" : "div", 1339 "FloorDiv" : "floordiv", 1340 "LeftShift" : "lshift", 1341 "Mod" : "mod", 1342 "Mul" : "mul", 1343 "Power" : "pow", 1344 "RightShift" : "rshift", 1345 "Sub" : "sub", 1346 1347 # Unary operations. 1348 1349 "Invert" : "invert", 1350 "UnaryAdd" : "pos", 1351 "UnarySub" : "neg", 1352 1353 # Augmented assignment. 1354 1355 "+=" : "iadd", 1356 "-=" : "isub", 1357 "*=" : "imul", 1358 "/=" : "idiv", 1359 "//=" : "ifloordiv", 1360 "%=" : "imod", 1361 "**=" : "ipow", 1362 "<<=" : "ilshift", 1363 ">>=" : "irshift", 1364 "&=" : "iand", 1365 "^=" : "ixor", 1366 "|=" : "ior", 1367 1368 # Comparisons. 1369 1370 "==" : "eq", 1371 "!=" : "ne", 1372 "<" : "lt", 1373 "<=" : "le", 1374 ">=" : "ge", 1375 ">" : "gt", 1376 } 1377 1378 # vim: tabstop=4 expandtab shiftwidth=4