1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 def add(self, node): 42 last = self.node(-1) 43 if last and last.empty(): 44 self.nodes[-1] = node 45 else: 46 self.append(node) 47 48 def append_inline(self, node): 49 50 "Append 'node' inline within the appropriate container." 51 52 n = self.append_point() 53 54 # Redirect the addition if another container is to accept the node. 55 56 if n is not self: 57 n.append_inline(node) 58 59 # Otherwise, append within this container. 60 61 else: 62 n.append(node) 63 64 def append_inline_many(self, nodes): 65 for node in nodes: 66 self.append_inline(node) 67 68 def append_point(self): 69 70 "Return the container to which inline nodes are added." 71 72 last = self.node(-1) 73 74 if isinstance(last, Block): 75 return last 76 else: 77 return self 78 79 def empty(self): 80 return not self.nodes 81 82 def insert_after(self, old, new): 83 84 "Insert after 'old' in the children the 'new' node." 85 86 index = self.nodes.index(old) 87 self.nodes.insert(index + 1, new) 88 89 def node(self, index): 90 try: 91 return self.nodes[index] 92 except IndexError: 93 return None 94 95 def normalise(self): 96 97 "Combine adjacent text nodes." 98 99 nodes = self.nodes 100 self.nodes = [] 101 text = None 102 103 for node in nodes: 104 105 # Open a text node or merge text into an open node. 106 107 if isinstance(node, Text): 108 if not text: 109 text = node 110 else: 111 text.merge(node) 112 113 # Close any open text node and append the current node. 114 115 else: 116 if text: 117 self.append(text) 118 text = None 119 self.append(node) 120 121 # Add any open text node. 122 123 if text: 124 self.append(text) 125 126 def remove(self, node): 127 128 "Remove 'node' from the children." 129 130 self.nodes.remove(node) 131 132 def replace(self, old, new): 133 134 "Replace 'old' with 'new' in the children." 135 136 i = self.nodes.index(old) 137 self.nodes[i] = new 138 139 def split_at(self, node): 140 141 """ 142 Split the container at 'node', returning a new container holding the 143 nodes following 'node' that are moved from this container. 144 """ 145 146 i = self.nodes.index(node) 147 following = self.__class__(self.nodes[i+1:]) 148 149 # Remove the node and the following parts from this container. 150 151 del self.nodes[i:] 152 return following 153 154 def text_content(self): 155 156 """ 157 Return a string containing the content of text nodes within this 158 container. 159 """ 160 161 l = [] 162 163 for node in self.nodes: 164 if isinstance(node, Text): 165 l.append(node.s) 166 elif isinstance(node, Container): 167 l.append(node.text_content()) 168 169 return "".join(l) 170 171 def whitespace_only(self): 172 173 "Return whether the container provides only whitespace text." 174 175 return not self.text_content().strip() 176 177 def __str__(self): 178 return self.prettyprint() 179 180 def _prettyprint(self, l, indent=""): 181 for node in self.nodes: 182 l.append(node.prettyprint(indent + " ")) 183 return "\n".join(l) 184 185 def _to_string(self, out): 186 for node in self.nodes: 187 node.to_string(out) 188 189 class Region(Container): 190 191 "A region of the page." 192 193 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 194 transparent=True, extra=None): 195 Container.__init__(self, nodes) 196 self.level = level 197 self.indent = indent 198 self.type = type 199 self.args = args 200 self.transparent = transparent 201 self.extra = extra 202 203 def append_point(self): 204 205 "Return the container to which inline nodes are added." 206 207 if self.transparent: 208 return self.nodes[-1] 209 else: 210 return self 211 212 def have_end(self, s): 213 return self.level and s.startswith("}") and self.level == len(s) 214 215 def __repr__(self): 216 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 217 self.indent, self.type, self.args, self.transparent, self.extra) 218 219 def prettyprint(self, indent=""): 220 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 221 self.level, self.indent, self.type, self.args, self.extra)] 222 return self._prettyprint(l, indent) 223 224 def to_string(self, out): 225 out.start_region(self.level, self.indent, self.type, self.args, self.extra) 226 227 # Obtain a serialiser for the region from the same format family. 228 # Retain the same serialiser if no appropriate serialiser could be 229 # obtained. 230 231 serialiser_name = "%s.%s" % (out.formats[0], self.type) 232 serialiser = out.get_serialiser(serialiser_name) 233 234 # Serialise the region. 235 236 self._to_string(serialiser) 237 238 # End the region with the previous serialiser. 239 240 out.end_region(self.level, self.indent, self.type, self.args, self.extra) 241 242 243 244 # Block nodes. 245 246 class Block(Container): 247 248 "A block in the page." 249 250 def __repr__(self): 251 return "Block(%r)" % self.nodes 252 253 def prettyprint(self, indent=""): 254 l = ["%sBlock" % indent] 255 return self._prettyprint(l, indent) 256 257 def to_string(self, out): 258 out.start_block() 259 self._to_string(out) 260 out.end_block() 261 262 class DefItem(Container): 263 264 "A definition item." 265 266 def __init__(self, nodes, pad, extra): 267 Container.__init__(self, nodes) 268 self.pad = pad 269 self.extra = extra 270 271 def __repr__(self): 272 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 273 274 def prettyprint(self, indent=""): 275 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 276 return self._prettyprint(l, indent) 277 278 def to_string(self, out): 279 out.start_defitem(self.pad, self.extra) 280 self._to_string(out) 281 out.end_defitem(self.pad, self.extra) 282 283 class DefTerm(Container): 284 285 "A definition term." 286 287 def __init__(self, nodes, pad, extra=""): 288 Container.__init__(self, nodes) 289 self.pad = pad 290 self.extra = extra 291 292 def __repr__(self): 293 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 294 295 def prettyprint(self, indent=""): 296 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 297 return self._prettyprint(l, indent) 298 299 def to_string(self, out): 300 out.start_defterm(self.pad, self.extra) 301 self._to_string(out) 302 out.end_defterm(self.pad, self.extra) 303 304 class FontStyle(Container): 305 306 "Emphasised and/or strong text." 307 308 def __init__(self, nodes, emphasis=False, strong=False): 309 Container.__init__(self, nodes) 310 self.emphasis = emphasis 311 self.strong = strong 312 313 def close_emphasis(self): 314 if self.strong: 315 span = FontStyle(self.nodes, emphasis=True) 316 self.nodes = [span] 317 self.emphasis = False 318 return self.strong 319 320 def close_strong(self): 321 if self.emphasis: 322 span = FontStyle(self.nodes, strong=True) 323 self.nodes = [span] 324 self.strong = False 325 return self.emphasis 326 327 def __repr__(self): 328 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 329 330 def prettyprint(self, indent=""): 331 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 332 return self._prettyprint(l, indent) 333 334 def to_string(self, out): 335 if self.emphasis: 336 out.start_emphasis() 337 elif self.strong: 338 out.start_strong() 339 self._to_string(out) 340 if self.emphasis: 341 out.end_emphasis() 342 elif self.strong: 343 out.end_strong() 344 345 class Heading(Container): 346 347 "A heading." 348 349 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 350 identifier=None): 351 Container.__init__(self, nodes) 352 self.level = level 353 self.start_extra = start_extra 354 self.start_pad = start_pad 355 self.end_pad = end_pad 356 self.end_extra = end_extra 357 self.identifier = identifier 358 359 def __repr__(self): 360 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 361 self.nodes, self.level, self.start_extra, self.start_pad, 362 self.end_pad, self.end_extra, self.identifier) 363 364 def prettyprint(self, indent=""): 365 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 366 " end_extra=%r identifier=%r" % ( 367 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 368 self.end_extra, self.identifier)] 369 return self._prettyprint(l, indent) 370 371 def to_string(self, out): 372 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 373 self._to_string(out) 374 out.end_heading(self.level, self.end_pad, self.end_extra) 375 376 class LinkLabel(Container): 377 378 "A link or transclusion label." 379 380 def __repr__(self): 381 return "LinkLabel(%r)" % self.nodes 382 383 def prettyprint(self, indent=""): 384 l = ["%sLinkLabel" % indent] 385 return self._prettyprint(l, indent) 386 387 def to_string(self, out): 388 out.link_label(self.nodes) 389 390 class LinkParameter(Container): 391 392 "A link or transclusion parameter." 393 394 def __repr__(self): 395 return "LinkParameter(%r)" % self.nodes 396 397 def prettyprint(self, indent=""): 398 l = ["%sLinkParameter" % indent] 399 return self._prettyprint(l, indent) 400 401 def to_string(self, out): 402 s = self.text_content() 403 t = s.split("=", 1) 404 out.link_parameter(t) 405 406 class List(Container): 407 408 "A list." 409 410 def __init__(self, nodes): 411 Container.__init__(self, nodes) 412 self.init() 413 414 def init(self): 415 self.first = first = self.nodes and self.nodes[0] or None 416 self.indent = first and first.indent 417 self.marker = first and first.marker 418 self.num = first and first.num 419 420 def __repr__(self): 421 return "List(%r)" % self.nodes 422 423 def prettyprint(self, indent=""): 424 if not self.first: 425 self.init() 426 l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 427 return self._prettyprint(l, indent) 428 429 def to_string(self, out): 430 if not self.first: 431 self.init() 432 out.start_list(self.indent, self.marker, self.num) 433 self._to_string(out) 434 out.end_list(self.indent, self.marker, self.num) 435 436 class ListItem(Container): 437 438 "A list item." 439 440 def __init__(self, nodes, indent, marker, space, num): 441 Container.__init__(self, nodes) 442 self.indent = indent 443 self.marker = marker 444 self.space = space 445 self.num = num 446 447 # Forbid blocks within list items for simpler structure. 448 449 self.allow_blocks = False 450 451 def __repr__(self): 452 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 453 454 def prettyprint(self, indent=""): 455 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 456 return self._prettyprint(l, indent) 457 458 def to_string(self, out): 459 out.start_listitem(self.indent, self.marker, self.space, self.num) 460 self._to_string(out) 461 out.end_listitem(self.indent, self.marker, self.space, self.num) 462 463 class TableAttrs(Container): 464 465 "A collection of table attributes." 466 467 def __init__(self, nodes): 468 Container.__init__(self, nodes) 469 470 # Parsing state flags, inconsequential to any final document tree. 471 # If incomplete remains set, the attributes are discarded. 472 473 self.incomplete = True 474 self.found_cell = False 475 476 def __repr__(self): 477 return "TableAttrs(%r)" % self.nodes 478 479 def prettyprint(self, indent=""): 480 l = ["%sTableAttrs:" % indent] 481 return self._prettyprint(l, indent) 482 483 def to_string(self, out): 484 out.start_table_attrs() 485 out.table_attrs(self.nodes) 486 if not self.incomplete: 487 out.end_table_attrs() 488 489 class Table(Container): 490 491 "A table." 492 493 def __repr__(self): 494 return "Table(%r)" % self.nodes 495 496 def prettyprint(self, indent=""): 497 l = ["%sTable:" % indent] 498 return self._prettyprint(l, indent) 499 500 def to_string(self, out): 501 out.start_table() 502 self._to_string(out) 503 out.end_table() 504 505 class TableCell(Container): 506 507 "A table cell." 508 509 def __init__(self, nodes, attrs=None, leading="", padding=""): 510 Container.__init__(self, nodes) 511 self.attrs = attrs 512 self.leading = leading 513 self.padding = padding 514 515 def __repr__(self): 516 return "TableCell(%r, %r, %r, %r)" % (self.nodes, self.attrs, 517 self.leading, self.padding) 518 519 def prettyprint(self, indent=""): 520 l = ["%sTableCell: leading=%r padding=%r" % (indent, self.leading, 521 self.padding)] 522 return self._prettyprint(l, indent) 523 524 def to_string(self, out): 525 out.start_table_cell(self.attrs, self.leading, self.padding) 526 self._to_string(out) 527 out.end_table_cell() 528 529 class TableRow(Container): 530 531 "A table row." 532 533 def __init__(self, nodes, trailing="", leading="", padding=""): 534 Container.__init__(self, nodes) 535 self.trailing = trailing 536 self.leading = leading 537 self.padding = padding 538 539 def __repr__(self): 540 return "TableRow(%r, %r, %r, %r)" % (self.nodes, self.trailing, 541 self.leading, self.padding) 542 543 def prettyprint(self, indent=""): 544 l = ["%sTableRow: trailing=%r leading=%r padding=%r" % ( 545 indent, self.trailing, self.leading, self.padding)] 546 return self._prettyprint(l, indent) 547 548 def to_string(self, out): 549 out.start_table_row(self.leading, self.padding) 550 self._to_string(out) 551 out.end_table_row(self.trailing) 552 553 554 555 # Inline nodes with children. 556 557 class Inline(Container): 558 559 "Generic inline formatting." 560 561 def __repr__(self): 562 return "%s(%r)" % (self.__class__.__name__, self.nodes) 563 564 def prettyprint(self, indent=""): 565 l = ["%s%s" % (indent, self.__class__.__name__)] 566 return self._prettyprint(l, indent) 567 568 class Larger(Inline): 569 570 "Larger text." 571 572 def to_string(self, out): 573 out.start_larger() 574 self._to_string(out) 575 out.end_larger() 576 577 class Link(Container): 578 579 "Link details." 580 581 def __init__(self, nodes, target): 582 Container.__init__(self, nodes) 583 self.target = target 584 585 def __repr__(self): 586 return "Link(%r, %r)" % (self.nodes, self.target) 587 588 def prettyprint(self, indent=""): 589 l = ["%sLink: target=%r" % (indent, self.target)] 590 return self._prettyprint(l, indent) 591 592 def to_string(self, out): 593 out.link(self.target, self.nodes) 594 595 class Macro(Container): 596 597 "Macro details." 598 599 def __init__(self, name, args, parent, region, nodes=None, inline=True): 600 Container.__init__(self, nodes or []) 601 self.name = name 602 self.args = args 603 self.parent = parent 604 self.region = region 605 self.inline = inline 606 607 def __repr__(self): 608 return "Macro(%r, %r, %r, %r, %r, %r)" % (self.name, self.args, 609 self.parent, self.region, 610 self.nodes, self.inline) 611 612 def prettyprint(self, indent=""): 613 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 614 return self._prettyprint(l, indent) 615 616 def to_string(self, out): 617 out.start_macro(self.name, self.args, self.nodes, self.inline) 618 if self.nodes: 619 self._to_string(out) 620 out.end_macro(self.inline) 621 622 class Monospace(Inline): 623 624 "Monospaced text." 625 626 def to_string(self, out): 627 out.start_monospace() 628 self._to_string(out) 629 out.end_monospace() 630 631 class Smaller(Inline): 632 633 "Smaller text." 634 635 def to_string(self, out): 636 out.start_smaller() 637 self._to_string(out) 638 out.end_smaller() 639 640 class Strikethrough(Inline): 641 642 "Crossed-out text." 643 644 def to_string(self, out): 645 out.start_strikethrough() 646 self._to_string(out) 647 out.end_strikethrough() 648 649 class Subscript(Inline): 650 651 "Subscripted text." 652 653 def to_string(self, out): 654 out.start_subscript() 655 self._to_string(out) 656 out.end_subscript() 657 658 class Superscript(Inline): 659 660 "Superscripted text." 661 662 def to_string(self, out): 663 out.start_superscript() 664 self._to_string(out) 665 out.end_superscript() 666 667 class Transclusion(Container): 668 669 "Transclusion details." 670 671 def __init__(self, nodes, target): 672 Container.__init__(self, nodes) 673 self.target = target 674 675 def __repr__(self): 676 return "Transclusion(%r, %r)" % (self.nodes, self.target) 677 678 def prettyprint(self, indent=""): 679 l = ["%sTransclusion: target=%r" % (indent, self.target)] 680 return self._prettyprint(l, indent) 681 682 def to_string(self, out): 683 out.transclusion(self.target, self.nodes) 684 685 class Underline(Inline): 686 687 "Underlined text." 688 689 def to_string(self, out): 690 out.start_underline() 691 self._to_string(out) 692 out.end_underline() 693 694 695 696 # Nodes without children. 697 698 class Node: 699 700 "A document node without children." 701 702 def empty(self): 703 return False 704 705 class Anchor(Node): 706 707 "Anchor details." 708 709 def __init__(self, target): 710 self.target = target 711 712 def __repr__(self): 713 return "Anchor(%r)" % self.target 714 715 def prettyprint(self, indent=""): 716 return "%sAnchor: target=%r" % (indent, self.target) 717 718 def to_string(self, out): 719 out.anchor(self.target) 720 721 class Break(Node): 722 723 "A paragraph break." 724 725 def __repr__(self): 726 return "Break()" 727 728 def prettyprint(self, indent=""): 729 return "%sBreak" % indent 730 731 def to_string(self, out): 732 out.break_() 733 734 class Comment(Node): 735 736 "A comment." 737 738 def __init__(self, comment, extra): 739 self.comment = comment 740 self.extra = extra 741 742 def __repr__(self): 743 return "Comment(%r, %r)" % (self.comment, self.extra) 744 745 def prettyprint(self, indent=""): 746 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 747 748 def to_string(self, out): 749 out.comment(self.comment, self.extra) 750 751 class Directive(Node): 752 753 "A processing directive." 754 755 def __init__(self, directive, extra): 756 self.directive = directive 757 self.extra = extra 758 759 def __repr__(self): 760 return "Directive(%r, %r)" % (self.directive, self.extra) 761 762 def prettyprint(self, indent=""): 763 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 764 765 def to_string(self, out): 766 out.directive(self.directive, self.extra) 767 768 class LineBreak(Node): 769 770 "A line break within a block." 771 772 def __repr__(self): 773 return "LineBreak()" 774 775 def prettyprint(self, indent=""): 776 return "%sLineBreak" % indent 777 778 def to_string(self, out): 779 out.linebreak() 780 781 class NonBreakingSpace(Node): 782 783 "A non-breaking space within a block." 784 785 def __repr__(self): 786 return "NonBreakingSpace()" 787 788 def prettyprint(self, indent=""): 789 return "%sNonBreakingSpace" % indent 790 791 def to_string(self, out): 792 out.nbsp() 793 794 class Rule(Node): 795 796 "A horizontal rule." 797 798 def __init__(self, height): 799 self.height = height 800 801 def __repr__(self): 802 return "Rule(%d)" % self.height 803 804 def prettyprint(self, indent=""): 805 return "%sRule: height=%d" % (indent, self.height) 806 807 def to_string(self, out): 808 out.rule(self.height) 809 810 class TableAttr(Node): 811 812 "A table attribute." 813 814 def __init__(self, name, value=None, concise=False, quote=None): 815 self.name = name 816 self.value = value 817 self.concise = concise 818 self.quote = quote 819 820 def __repr__(self): 821 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 822 823 def prettyprint(self, indent=""): 824 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 825 826 def to_string(self, out): 827 out.table_attr(self.name, self.value, self.concise, self.quote) 828 829 class Text(Node): 830 831 "A text node." 832 833 def __init__(self, s): 834 self.s = s 835 836 def empty(self): 837 return not self.s 838 839 def multiline(self): 840 return "\n" in self.s 841 842 def merge(self, text): 843 self.s += text.s 844 845 def __repr__(self): 846 return "Text(%r)" % self.s 847 848 def prettyprint(self, indent=""): 849 return "%sText: %r" % (indent, self.s) 850 851 def to_string(self, out): 852 out.text(self.s) 853 854 class Verbatim(Node): 855 856 "Verbatim text." 857 858 def __init__(self, text): 859 self.text = text 860 861 def __repr__(self): 862 return "Verbatim(%r)" % self.text 863 864 def prettyprint(self, indent=""): 865 return "%sVerbatim: text=%r" % (indent, self.text) 866 867 def to_string(self, out): 868 out.verbatim(self.text) 869 870 # vim: tabstop=4 expandtab shiftwidth=4