1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019, 2020, 2021 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 last = self.node(-1) 68 69 if isinstance(last, Block): 70 return last 71 else: 72 return self 73 74 def empty(self): 75 return not self.nodes 76 77 def insert_after(self, old, new): 78 79 "Insert after 'old' in the children the 'new' node." 80 81 index = self.nodes.index(old) 82 self.nodes.insert(index + 1, new) 83 84 def node(self, index): 85 try: 86 return self.nodes[index] 87 except IndexError: 88 return None 89 90 def normalise(self): 91 92 "Combine adjacent text nodes." 93 94 nodes = self.nodes 95 self.nodes = [] 96 text = None 97 98 for node in nodes: 99 100 # Open a text node or merge text into an open node. 101 102 if isinstance(node, Text): 103 if not text: 104 text = node 105 else: 106 text.merge(node) 107 108 # Close any open text node and append the current node. 109 110 else: 111 if text: 112 self.append(text) 113 text = None 114 self.append(node) 115 116 # Add any open text node. 117 118 if text: 119 self.append(text) 120 121 def remove(self, node): 122 123 "Remove 'node' from the children." 124 125 self.nodes.remove(node) 126 127 def replace(self, old, new): 128 129 "Replace 'old' with 'new' in the children." 130 131 i = self.nodes.index(old) 132 self.nodes[i] = new 133 134 def split_at(self, node): 135 136 """ 137 Split the container at 'node', returning a new container holding the 138 nodes following 'node' that are moved from this container. 139 """ 140 141 i = self.nodes.index(node) 142 following = self.__class__(self.nodes[i+1:]) 143 144 # Remove the node and the following parts from this container. 145 146 del self.nodes[i:] 147 return following 148 149 def text_content(self): 150 151 """ 152 Return a string containing the content of text nodes within this 153 container. 154 """ 155 156 l = [] 157 158 for node in self.nodes: 159 if isinstance(node, Text): 160 l.append(node.s) 161 elif isinstance(node, Container): 162 l.append(node.text_content()) 163 164 return "".join(l) 165 166 def whitespace_only(self): 167 168 "Return whether the container provides only whitespace text." 169 170 return not self.text_content().strip() 171 172 def __str__(self): 173 return self.prettyprint() 174 175 def _prettyprint(self, l, indent=""): 176 for node in self.nodes: 177 l.append(node.prettyprint(indent + " ")) 178 return "\n".join(l) 179 180 def _to_string(self, out): 181 for node in self.nodes: 182 node.to_string(out) 183 184 class Region(Container): 185 186 "A region of the page." 187 188 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 189 transparent=True, extra=None): 190 Container.__init__(self, nodes) 191 self.level = level 192 self.indent = indent 193 self.type = type 194 self.args = args 195 self.transparent = transparent 196 self.extra = extra 197 198 def add(self, node): 199 last = self.node(-1) 200 if last and last.empty(): 201 self.nodes[-1] = node 202 else: 203 self.append(node) 204 205 def append_point(self): 206 207 "Return the container to which inline nodes are added." 208 209 if self.transparent: 210 return self.nodes[-1] 211 else: 212 return self 213 214 def have_end(self, s): 215 return self.level and s.startswith("}") and self.level == len(s) 216 217 def __repr__(self): 218 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 219 self.indent, self.type, self.args, self.transparent, self.extra) 220 221 def prettyprint(self, indent=""): 222 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 223 self.level, self.indent, self.type, self.args, self.extra)] 224 return self._prettyprint(l, indent) 225 226 def to_string(self, out): 227 out.start_region(self.level, self.indent, self.type, self.extra) 228 229 # Obtain a serialiser for the region from the same format family. 230 # Retain the same serialiser if no appropriate serialiser could be 231 # obtained. 232 233 serialiser_name = "%s.%s" % (out.formats[0], self.type) 234 serialiser = out.get_serialiser(serialiser_name) 235 236 # Serialise the region. 237 238 self._to_string(serialiser) 239 240 # End the region with the previous serialiser. 241 242 out.end_region(self.level, self.indent, self.type, self.extra) 243 244 245 246 # Block nodes. 247 248 class Block(Container): 249 250 "A block in the page." 251 252 def __repr__(self): 253 return "Block(%r)" % self.nodes 254 255 def prettyprint(self, indent=""): 256 l = ["%sBlock" % indent] 257 return self._prettyprint(l, indent) 258 259 def to_string(self, out): 260 out.start_block() 261 self._to_string(out) 262 out.end_block() 263 264 class DefItem(Container): 265 266 "A definition item." 267 268 def __init__(self, nodes, pad, extra): 269 Container.__init__(self, nodes) 270 self.pad = pad 271 self.extra = extra 272 273 def __repr__(self): 274 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 275 276 def prettyprint(self, indent=""): 277 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 278 return self._prettyprint(l, indent) 279 280 def to_string(self, out): 281 out.start_defitem(self.pad, self.extra) 282 self._to_string(out) 283 out.end_defitem(self.pad, self.extra) 284 285 class DefTerm(Container): 286 287 "A definition term." 288 289 def __init__(self, nodes, pad, extra=""): 290 Container.__init__(self, nodes) 291 self.pad = pad 292 self.extra = extra 293 294 def __repr__(self): 295 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 296 297 def prettyprint(self, indent=""): 298 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 299 return self._prettyprint(l, indent) 300 301 def to_string(self, out): 302 out.start_defterm(self.pad, self.extra) 303 self._to_string(out) 304 out.end_defterm(self.pad, self.extra) 305 306 class FontStyle(Container): 307 308 "Emphasised and/or strong text." 309 310 def __init__(self, nodes, emphasis=False, strong=False): 311 Container.__init__(self, nodes) 312 self.emphasis = emphasis 313 self.strong = strong 314 315 def close_emphasis(self): 316 if self.strong: 317 span = FontStyle(self.nodes, emphasis=True) 318 self.nodes = [span] 319 self.emphasis = False 320 return self.strong 321 322 def close_strong(self): 323 if self.emphasis: 324 span = FontStyle(self.nodes, strong=True) 325 self.nodes = [span] 326 self.strong = False 327 return self.emphasis 328 329 def __repr__(self): 330 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 331 332 def prettyprint(self, indent=""): 333 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 334 return self._prettyprint(l, indent) 335 336 def to_string(self, out): 337 if self.emphasis: 338 out.start_emphasis() 339 elif self.strong: 340 out.start_strong() 341 self._to_string(out) 342 if self.emphasis: 343 out.end_emphasis() 344 elif self.strong: 345 out.end_strong() 346 347 class Heading(Container): 348 349 "A heading." 350 351 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 352 identifier=None): 353 Container.__init__(self, nodes) 354 self.level = level 355 self.start_extra = start_extra 356 self.start_pad = start_pad 357 self.end_pad = end_pad 358 self.end_extra = end_extra 359 self.identifier = identifier 360 361 def __repr__(self): 362 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 363 self.nodes, self.level, self.start_extra, self.start_pad, 364 self.end_pad, self.end_extra, self.identifier) 365 366 def prettyprint(self, indent=""): 367 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 368 " end_extra=%r identifier=%r" % ( 369 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 370 self.end_extra, self.identifier)] 371 return self._prettyprint(l, indent) 372 373 def to_string(self, out): 374 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 375 self._to_string(out) 376 out.end_heading(self.level, self.end_pad, self.end_extra) 377 378 class LinkLabel(Container): 379 380 "A link or transclusion label." 381 382 def __repr__(self): 383 return "LinkLabel(%r)" % self.nodes 384 385 def prettyprint(self, indent=""): 386 l = ["%sLinkLabel" % indent] 387 return self._prettyprint(l, indent) 388 389 def to_string(self, out): 390 out.link_label(self.nodes) 391 392 class LinkParameter(Container): 393 394 "A link or transclusion parameter." 395 396 def __repr__(self): 397 return "LinkParameter(%r)" % self.nodes 398 399 def prettyprint(self, indent=""): 400 l = ["%sLinkParameter" % indent] 401 return self._prettyprint(l, indent) 402 403 def to_string(self, out): 404 s = self.text_content() 405 t = s.split("=", 1) 406 out.link_parameter(t) 407 408 class List(Container): 409 410 "A list." 411 412 def __init__(self, nodes): 413 Container.__init__(self, nodes) 414 self.init() 415 416 def init(self): 417 self.first = first = self.nodes and self.nodes[0] or None 418 self.indent = first and first.indent 419 self.marker = first and first.marker 420 self.num = first and first.num 421 422 def __repr__(self): 423 return "List(%r)" % self.nodes 424 425 def prettyprint(self, indent=""): 426 if not self.first: 427 self.init() 428 l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 429 return self._prettyprint(l, indent) 430 431 def to_string(self, out): 432 if not self.first: 433 self.init() 434 out.start_list(self.indent, self.marker, self.num) 435 self._to_string(out) 436 out.end_list(self.indent, self.marker, self.num) 437 438 class ListItem(Container): 439 440 "A list item." 441 442 def __init__(self, nodes, indent, marker, space, num): 443 Container.__init__(self, nodes) 444 self.indent = indent 445 self.marker = marker 446 self.space = space 447 self.num = num 448 449 # Forbid blocks within list items for simpler structure. 450 451 self.allow_blocks = False 452 453 def __repr__(self): 454 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 455 456 def prettyprint(self, indent=""): 457 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 458 return self._prettyprint(l, indent) 459 460 def to_string(self, out): 461 out.start_listitem(self.indent, self.marker, self.space, self.num) 462 self._to_string(out) 463 out.end_listitem(self.indent, self.marker, self.space, self.num) 464 465 class TableAttrs(Container): 466 467 "A collection of table attributes." 468 469 def __init__(self, nodes): 470 Container.__init__(self, nodes) 471 472 # Parsing state flags, inconsequential to any final document tree. 473 # If incomplete remains set, the attributes are discarded. 474 475 self.incomplete = True 476 self.found_cell = False 477 478 def __repr__(self): 479 return "TableAttrs(%r)" % self.nodes 480 481 def prettyprint(self, indent=""): 482 l = ["%sTableAttrs:" % indent] 483 return self._prettyprint(l, indent) 484 485 def to_string(self, out): 486 out.start_table_attrs() 487 out.table_attrs(self.nodes) 488 if not self.incomplete: 489 out.end_table_attrs() 490 491 class Table(Container): 492 493 "A table." 494 495 def __repr__(self): 496 return "Table(%r)" % self.nodes 497 498 def prettyprint(self, indent=""): 499 l = ["%sTable:" % indent] 500 return self._prettyprint(l, indent) 501 502 def to_string(self, out): 503 out.start_table() 504 self._to_string(out) 505 out.end_table() 506 507 class TableCell(Container): 508 509 "A table cell." 510 511 def __init__(self, nodes, attrs=None, leading="", padding=""): 512 Container.__init__(self, nodes) 513 self.attrs = attrs 514 self.leading = leading 515 self.padding = padding 516 517 def __repr__(self): 518 return "TableCell(%r, %r, %r, %r)" % (self.nodes, self.attrs, 519 self.leading, self.padding) 520 521 def prettyprint(self, indent=""): 522 l = ["%sTableCell: leading=%r padding=%r" % (indent, self.leading, 523 self.padding)] 524 return self._prettyprint(l, indent) 525 526 def to_string(self, out): 527 out.start_table_cell(self.attrs, self.leading, self.padding) 528 self._to_string(out) 529 out.end_table_cell() 530 531 class TableRow(Container): 532 533 "A table row." 534 535 def __init__(self, nodes, trailing="", leading="", padding=""): 536 Container.__init__(self, nodes) 537 self.trailing = trailing 538 self.leading = leading 539 self.padding = padding 540 541 def __repr__(self): 542 return "TableRow(%r, %r, %r, %r)" % (self.nodes, self.trailing, 543 self.leading, self.padding) 544 545 def prettyprint(self, indent=""): 546 l = ["%sTableRow: trailing=%r leading=%r padding=%r" % ( 547 indent, self.trailing, self.leading, self.padding)] 548 return self._prettyprint(l, indent) 549 550 def to_string(self, out): 551 out.start_table_row(self.leading, self.padding) 552 self._to_string(out) 553 out.end_table_row(self.trailing) 554 555 556 557 # Inline nodes with children. 558 559 class Inline(Container): 560 561 "Generic inline formatting." 562 563 def __repr__(self): 564 return "%s(%r)" % (self.__class__.__name__, self.nodes) 565 566 def prettyprint(self, indent=""): 567 l = ["%s%s" % (indent, self.__class__.__name__)] 568 return self._prettyprint(l, indent) 569 570 class Larger(Inline): 571 572 "Larger text." 573 574 def to_string(self, out): 575 out.start_larger() 576 self._to_string(out) 577 out.end_larger() 578 579 class Link(Container): 580 581 "Link details." 582 583 def __init__(self, nodes, target): 584 Container.__init__(self, nodes) 585 self.target = target 586 587 def __repr__(self): 588 return "Link(%r, %r)" % (self.nodes, self.target) 589 590 def prettyprint(self, indent=""): 591 l = ["%sLink: target=%r" % (indent, self.target)] 592 return self._prettyprint(l, indent) 593 594 def to_string(self, out): 595 out.link(self.target, self.nodes) 596 597 class Macro(Container): 598 599 "Macro details." 600 601 def __init__(self, name, args, parent, region, nodes=None, inline=True): 602 Container.__init__(self, nodes or []) 603 self.name = name 604 self.args = args 605 self.parent = parent 606 self.region = region 607 self.inline = inline 608 609 def __repr__(self): 610 return "Macro(%r, %r, %r, %r, %r, %r)" % (self.name, self.args, 611 self.parent, self.region, 612 self.nodes, self.inline) 613 614 def prettyprint(self, indent=""): 615 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 616 return self._prettyprint(l, indent) 617 618 def to_string(self, out): 619 out.start_macro(self.name, self.args, self.nodes, self.inline) 620 if self.nodes: 621 self._to_string(out) 622 out.end_macro(self.inline) 623 624 class Monospace(Inline): 625 626 "Monospaced text." 627 628 def to_string(self, out): 629 out.start_monospace() 630 self._to_string(out) 631 out.end_monospace() 632 633 class Smaller(Inline): 634 635 "Smaller text." 636 637 def to_string(self, out): 638 out.start_smaller() 639 self._to_string(out) 640 out.end_smaller() 641 642 class Strikethrough(Inline): 643 644 "Crossed-out text." 645 646 def to_string(self, out): 647 out.start_strikethrough() 648 self._to_string(out) 649 out.end_strikethrough() 650 651 class Subscript(Inline): 652 653 "Subscripted text." 654 655 def to_string(self, out): 656 out.start_subscript() 657 self._to_string(out) 658 out.end_subscript() 659 660 class Superscript(Inline): 661 662 "Superscripted text." 663 664 def to_string(self, out): 665 out.start_superscript() 666 self._to_string(out) 667 out.end_superscript() 668 669 class Transclusion(Container): 670 671 "Transclusion details." 672 673 def __init__(self, nodes, target): 674 Container.__init__(self, nodes) 675 self.target = target 676 677 def __repr__(self): 678 return "Transclusion(%r, %r)" % (self.nodes, self.target) 679 680 def prettyprint(self, indent=""): 681 l = ["%sTransclusion: target=%r" % (indent, self.target)] 682 return self._prettyprint(l, indent) 683 684 def to_string(self, out): 685 out.transclusion(self.target, self.nodes) 686 687 class Underline(Inline): 688 689 "Underlined text." 690 691 def to_string(self, out): 692 out.start_underline() 693 self._to_string(out) 694 out.end_underline() 695 696 697 698 # Nodes without children. 699 700 class Node: 701 702 "A document node without children." 703 704 def empty(self): 705 return False 706 707 class Anchor(Node): 708 709 "Anchor details." 710 711 def __init__(self, target): 712 self.target = target 713 714 def __repr__(self): 715 return "Anchor(%r)" % self.target 716 717 def prettyprint(self, indent=""): 718 return "%sAnchor: target=%r" % (indent, self.target) 719 720 def to_string(self, out): 721 out.anchor(self.target) 722 723 class Break(Node): 724 725 "A paragraph break." 726 727 def __repr__(self): 728 return "Break()" 729 730 def prettyprint(self, indent=""): 731 return "%sBreak" % indent 732 733 def to_string(self, out): 734 out.break_() 735 736 class Comment(Node): 737 738 "A comment." 739 740 def __init__(self, comment, extra): 741 self.comment = comment 742 self.extra = extra 743 744 def __repr__(self): 745 return "Comment(%r, %r)" % (self.comment, self.extra) 746 747 def prettyprint(self, indent=""): 748 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 749 750 def to_string(self, out): 751 out.comment(self.comment, self.extra) 752 753 class Directive(Node): 754 755 "A processing directive." 756 757 def __init__(self, directive, extra): 758 self.directive = directive 759 self.extra = extra 760 761 def __repr__(self): 762 return "Directive(%r, %r)" % (self.directive, self.extra) 763 764 def prettyprint(self, indent=""): 765 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 766 767 def to_string(self, out): 768 out.directive(self.directive, self.extra) 769 770 class LineBreak(Node): 771 772 "A line break within a block." 773 774 def __repr__(self): 775 return "LineBreak()" 776 777 def prettyprint(self, indent=""): 778 return "%sLineBreak" % indent 779 780 def to_string(self, out): 781 out.linebreak() 782 783 class Rule(Node): 784 785 "A horizontal rule." 786 787 def __init__(self, height): 788 self.height = height 789 790 def __repr__(self): 791 return "Rule(%d)" % self.height 792 793 def prettyprint(self, indent=""): 794 return "%sRule: height=%d" % (indent, self.height) 795 796 def to_string(self, out): 797 out.rule(self.height) 798 799 class TableAttr(Node): 800 801 "A table attribute." 802 803 def __init__(self, name, value=None, concise=False, quote=None): 804 self.name = name 805 self.value = value 806 self.concise = concise 807 self.quote = quote 808 809 def __repr__(self): 810 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 811 812 def prettyprint(self, indent=""): 813 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 814 815 def to_string(self, out): 816 out.table_attr(self.name, self.value, self.concise, self.quote) 817 818 class Text(Node): 819 820 "A text node." 821 822 def __init__(self, s): 823 self.s = s 824 825 def empty(self): 826 return not self.s 827 828 def multiline(self): 829 return "\n" in self.s 830 831 def merge(self, text): 832 self.s += text.s 833 834 def __repr__(self): 835 return "Text(%r)" % self.s 836 837 def prettyprint(self, indent=""): 838 return "%sText: %r" % (indent, self.s) 839 840 def to_string(self, out): 841 out.text(self.s) 842 843 class Verbatim(Node): 844 845 "Verbatim text." 846 847 def __init__(self, text): 848 self.text = text 849 850 def __repr__(self): 851 return "Verbatim(%r)" % self.text 852 853 def prettyprint(self, indent=""): 854 return "%sVerbatim: text=%r" % (indent, self.text) 855 856 def to_string(self, out): 857 out.verbatim(self.text) 858 859 # vim: tabstop=4 expandtab shiftwidth=4