1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019, 2020 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 last = self.node(-1) 68 69 if isinstance(last, Block): 70 return last 71 else: 72 return self 73 74 def empty(self): 75 return not self.nodes 76 77 def insert_after(self, old, new): 78 79 "Insert after 'old' in the children the 'new' node." 80 81 index = self.nodes.index(old) 82 self.nodes.insert(index + 1, new) 83 84 def node(self, index): 85 try: 86 return self.nodes[index] 87 except IndexError: 88 return None 89 90 def normalise(self): 91 92 "Combine adjacent text nodes." 93 94 nodes = self.nodes 95 self.nodes = [] 96 text = None 97 98 for node in nodes: 99 100 # Open a text node or merge text into an open node. 101 102 if isinstance(node, Text): 103 if not text: 104 text = node 105 else: 106 text.merge(node) 107 108 # Close any open text node and append the current node. 109 110 else: 111 if text: 112 self.append(text) 113 text = None 114 self.append(node) 115 116 # Add any open text node. 117 118 if text: 119 self.append(text) 120 121 def remove(self, node): 122 123 "Remove 'node' from the children." 124 125 self.nodes.remove(node) 126 127 def replace(self, old, new): 128 129 "Replace 'old' with 'new' in the children." 130 131 i = self.nodes.index(old) 132 self.nodes[i] = new 133 134 def split_at(self, node): 135 136 """ 137 Split the container at 'node', returning a new container holding the 138 nodes following 'node' that are moved from this container. 139 """ 140 141 i = self.nodes.index(node) 142 following = self.__class__(self.nodes[i+1:]) 143 144 # Remove the node and the following parts from this container. 145 146 del self.nodes[i:] 147 return following 148 149 def text_content(self): 150 151 """ 152 Return a string containing the content of text nodes within this 153 container. 154 """ 155 156 l = [] 157 158 for node in self.nodes: 159 if isinstance(node, Text): 160 l.append(node.s) 161 elif isinstance(node, Container): 162 l.append(node.text_content()) 163 164 return "".join(l) 165 166 def whitespace_only(self): 167 168 "Return whether the container provides only whitespace text." 169 170 return not self.text_content().strip() 171 172 def __str__(self): 173 return self.prettyprint() 174 175 def _prettyprint(self, l, indent=""): 176 for node in self.nodes: 177 l.append(node.prettyprint(indent + " ")) 178 return "\n".join(l) 179 180 def _to_string(self, out): 181 for node in self.nodes: 182 node.to_string(out) 183 184 class Region(Container): 185 186 "A region of the page." 187 188 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 189 transparent=True, extra=None): 190 Container.__init__(self, nodes) 191 self.level = level 192 self.indent = indent 193 self.type = type 194 self.args = args 195 self.transparent = transparent 196 self.extra = extra 197 198 def add(self, node): 199 last = self.node(-1) 200 if last and last.empty(): 201 self.nodes[-1] = node 202 else: 203 self.append(node) 204 205 def append_point(self): 206 207 "Return the container to which inline nodes are added." 208 209 if self.transparent: 210 return self.nodes[-1] 211 else: 212 return self 213 214 def have_end(self, s): 215 return self.level and s.startswith("}") and self.level == len(s) 216 217 def __repr__(self): 218 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 219 self.indent, self.type, self.args, self.transparent, self.extra) 220 221 def prettyprint(self, indent=""): 222 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 223 self.level, self.indent, self.type, self.args, self.extra)] 224 return self._prettyprint(l, indent) 225 226 def to_string(self, out): 227 out.start_region(self.level, self.indent, self.type, self.extra) 228 229 # Obtain a serialiser for the region from the same format family. 230 # Retain the same serialiser if no appropriate serialiser could be 231 # obtained. 232 233 serialiser_name = "%s.%s" % (out.format, self.type) 234 serialiser = out.get_serialiser(serialiser_name) 235 236 # Serialise the region. 237 238 self._to_string(serialiser) 239 240 # End the region with the previous serialiser. 241 242 out.end_region(self.level, self.indent, self.type, self.extra) 243 244 245 246 # Block nodes. 247 248 class Block(Container): 249 250 "A block in the page." 251 252 def __repr__(self): 253 return "Block(%r)" % self.nodes 254 255 def prettyprint(self, indent=""): 256 l = ["%sBlock" % indent] 257 return self._prettyprint(l, indent) 258 259 def to_string(self, out): 260 out.start_block() 261 self._to_string(out) 262 out.end_block() 263 264 class DefItem(Container): 265 266 "A definition item." 267 268 def __init__(self, nodes, pad, extra): 269 Container.__init__(self, nodes) 270 self.pad = pad 271 self.extra = extra 272 273 def __repr__(self): 274 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 275 276 def prettyprint(self, indent=""): 277 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 278 return self._prettyprint(l, indent) 279 280 def to_string(self, out): 281 out.start_defitem(self.pad, self.extra) 282 self._to_string(out) 283 out.end_defitem(self.pad, self.extra) 284 285 class DefTerm(Container): 286 287 "A definition term." 288 289 def __init__(self, nodes, pad, extra=""): 290 Container.__init__(self, nodes) 291 self.pad = pad 292 self.extra = extra 293 294 def __repr__(self): 295 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 296 297 def prettyprint(self, indent=""): 298 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 299 return self._prettyprint(l, indent) 300 301 def to_string(self, out): 302 out.start_defterm(self.pad, self.extra) 303 self._to_string(out) 304 out.end_defterm(self.pad, self.extra) 305 306 class FontStyle(Container): 307 308 "Emphasised and/or strong text." 309 310 def __init__(self, nodes, emphasis=False, strong=False): 311 Container.__init__(self, nodes) 312 self.emphasis = emphasis 313 self.strong = strong 314 315 def close_emphasis(self): 316 if self.strong: 317 span = FontStyle(self.nodes, emphasis=True) 318 self.nodes = [span] 319 self.emphasis = False 320 return self.strong 321 322 def close_strong(self): 323 if self.emphasis: 324 span = FontStyle(self.nodes, strong=True) 325 self.nodes = [span] 326 self.strong = False 327 return self.emphasis 328 329 def __repr__(self): 330 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 331 332 def prettyprint(self, indent=""): 333 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 334 return self._prettyprint(l, indent) 335 336 def to_string(self, out): 337 if self.emphasis: 338 out.start_emphasis() 339 elif self.strong: 340 out.start_strong() 341 self._to_string(out) 342 if self.emphasis: 343 out.end_emphasis() 344 elif self.strong: 345 out.end_strong() 346 347 class Heading(Container): 348 349 "A heading." 350 351 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 352 identifier=None): 353 Container.__init__(self, nodes) 354 self.level = level 355 self.start_extra = start_extra 356 self.start_pad = start_pad 357 self.end_pad = end_pad 358 self.end_extra = end_extra 359 self.identifier = identifier 360 361 def __repr__(self): 362 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 363 self.nodes, self.level, self.start_extra, self.start_pad, 364 self.end_pad, self.end_extra, self.identifier) 365 366 def prettyprint(self, indent=""): 367 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 368 " end_extra=%r identifier=%r" % ( 369 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 370 self.end_extra, self.identifier)] 371 return self._prettyprint(l, indent) 372 373 def to_string(self, out): 374 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 375 self._to_string(out) 376 out.end_heading(self.level, self.end_pad, self.end_extra) 377 378 class LinkLabel(Container): 379 380 "A link or transclusion label." 381 382 def __repr__(self): 383 return "LinkLabel(%r)" % self.nodes 384 385 def prettyprint(self, indent=""): 386 l = ["%sLinkLabel" % indent] 387 return self._prettyprint(l, indent) 388 389 def to_string(self, out): 390 out.link_label(self.nodes) 391 392 class LinkParameter(Container): 393 394 "A link or transclusion parameter." 395 396 def __repr__(self): 397 return "LinkParameter(%r)" % self.nodes 398 399 def prettyprint(self, indent=""): 400 l = ["%sLinkParameter" % indent] 401 return self._prettyprint(l, indent) 402 403 def to_string(self, out): 404 s = self.text_content() 405 t = s.split("=", 1) 406 out.link_parameter(t) 407 408 class List(Container): 409 410 "A list." 411 412 def __init__(self, nodes): 413 Container.__init__(self, nodes) 414 self.init() 415 416 def init(self): 417 self.first = first = self.nodes and self.nodes[0] or None 418 self.indent = first and first.indent 419 self.marker = first and first.marker 420 self.num = first and first.num 421 422 def __repr__(self): 423 return "List(%r)" % self.nodes 424 425 def prettyprint(self, indent=""): 426 if not self.first: 427 self.init() 428 l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 429 return self._prettyprint(l, indent) 430 431 def to_string(self, out): 432 if not self.first: 433 self.init() 434 out.start_list(self.indent, self.marker, self.num) 435 self._to_string(out) 436 out.end_list(self.indent, self.marker, self.num) 437 438 class ListItem(Container): 439 440 "A list item." 441 442 def __init__(self, nodes, indent, marker, space, num): 443 Container.__init__(self, nodes) 444 self.indent = indent 445 self.marker = marker 446 self.space = space 447 self.num = num 448 449 # Forbid blocks within list items for simpler structure. 450 451 self.allow_blocks = False 452 453 def __repr__(self): 454 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 455 456 def prettyprint(self, indent=""): 457 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 458 return self._prettyprint(l, indent) 459 460 def to_string(self, out): 461 out.start_listitem(self.indent, self.marker, self.space, self.num) 462 self._to_string(out) 463 out.end_listitem(self.indent, self.marker, self.space, self.num) 464 465 class TableAttrs(Container): 466 467 "A collection of table attributes." 468 469 def __init__(self, nodes): 470 Container.__init__(self, nodes) 471 472 # Parsing state flags, inconsequential to any final document tree. 473 # If incomplete remains set, the attributes are discarded. 474 475 self.incomplete = True 476 self.found_cell = False 477 478 def __repr__(self): 479 return "TableAttrs(%r)" % self.nodes 480 481 def prettyprint(self, indent=""): 482 l = ["%sTableAttrs:" % indent] 483 return self._prettyprint(l, indent) 484 485 def to_string(self, out): 486 out.start_table_attrs() 487 out.table_attrs(self.nodes) 488 if not self.incomplete: 489 out.end_table_attrs() 490 491 class Table(Container): 492 493 "A table." 494 495 def __repr__(self): 496 return "Table(%r)" % self.nodes 497 498 def prettyprint(self, indent=""): 499 l = ["%sTable:" % indent] 500 return self._prettyprint(l, indent) 501 502 def to_string(self, out): 503 out.start_table() 504 self._to_string(out) 505 out.end_table() 506 507 class TableCell(Container): 508 509 "A table cell." 510 511 def __init__(self, nodes, attrs=None): 512 Container.__init__(self, nodes) 513 self.attrs = attrs 514 515 def __repr__(self): 516 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 517 518 def prettyprint(self, indent=""): 519 l = ["%sTableCell:" % indent] 520 return self._prettyprint(l, indent) 521 522 def to_string(self, out): 523 out.start_table_cell(self.attrs) 524 self._to_string(out) 525 out.end_table_cell() 526 527 class TableRow(Container): 528 529 "A table row." 530 531 def __init__(self, nodes, trailing=""): 532 Container.__init__(self, nodes) 533 self.trailing = trailing 534 535 def __repr__(self): 536 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 537 538 def prettyprint(self, indent=""): 539 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 540 return self._prettyprint(l, indent) 541 542 def to_string(self, out): 543 out.start_table_row() 544 self._to_string(out) 545 out.end_table_row(self.trailing) 546 547 548 549 # Inline nodes with children. 550 551 class Inline(Container): 552 553 "Generic inline formatting." 554 555 def __repr__(self): 556 return "%s(%r)" % (self.__class__.__name__, self.nodes) 557 558 def prettyprint(self, indent=""): 559 l = ["%s%s" % (indent, self.__class__.__name__)] 560 return self._prettyprint(l, indent) 561 562 class Larger(Inline): 563 564 "Larger text." 565 566 def to_string(self, out): 567 out.start_larger() 568 self._to_string(out) 569 out.end_larger() 570 571 class Link(Container): 572 573 "Link details." 574 575 def __init__(self, nodes, target): 576 Container.__init__(self, nodes) 577 self.target = target 578 579 def __repr__(self): 580 return "Link(%r, %r)" % (self.nodes, self.target) 581 582 def prettyprint(self, indent=""): 583 l = ["%sLink: target=%r" % (indent, self.target)] 584 return self._prettyprint(l, indent) 585 586 def to_string(self, out): 587 out.link(self.target, self.nodes) 588 589 class Macro(Container): 590 591 "Macro details." 592 593 def __init__(self, name, args, parent, region, nodes=None, inline=True): 594 Container.__init__(self, nodes or []) 595 self.name = name 596 self.args = args 597 self.parent = parent 598 self.region = region 599 self.inline = inline 600 601 def __repr__(self): 602 return "Macro(%r, %r, %r, %r, %r, %r)" % (self.name, self.args, 603 self.parent, self.region, 604 self.nodes, self.inline) 605 606 def prettyprint(self, indent=""): 607 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 608 return self._prettyprint(l, indent) 609 610 def to_string(self, out): 611 out.start_macro(self.name, self.args, self.nodes, self.inline) 612 if self.nodes: 613 self._to_string(out) 614 out.end_macro(self.inline) 615 616 class Monospace(Inline): 617 618 "Monospaced text." 619 620 def to_string(self, out): 621 out.start_monospace() 622 self._to_string(out) 623 out.end_monospace() 624 625 class Smaller(Inline): 626 627 "Smaller text." 628 629 def to_string(self, out): 630 out.start_smaller() 631 self._to_string(out) 632 out.end_smaller() 633 634 class Strikethrough(Inline): 635 636 "Crossed-out text." 637 638 def to_string(self, out): 639 out.start_strikethrough() 640 self._to_string(out) 641 out.end_strikethrough() 642 643 class Subscript(Inline): 644 645 "Subscripted text." 646 647 def to_string(self, out): 648 out.start_subscript() 649 self._to_string(out) 650 out.end_subscript() 651 652 class Superscript(Inline): 653 654 "Superscripted text." 655 656 def to_string(self, out): 657 out.start_superscript() 658 self._to_string(out) 659 out.end_superscript() 660 661 class Transclusion(Container): 662 663 "Transclusion details." 664 665 def __init__(self, nodes, target): 666 Container.__init__(self, nodes) 667 self.target = target 668 669 def __repr__(self): 670 return "Transclusion(%r, %r)" % (self.nodes, self.target) 671 672 def prettyprint(self, indent=""): 673 l = ["%sTransclusion: target=%r" % (indent, self.target)] 674 return self._prettyprint(l, indent) 675 676 def to_string(self, out): 677 out.transclusion(self.target, self.nodes) 678 679 class Underline(Inline): 680 681 "Underlined text." 682 683 def to_string(self, out): 684 out.start_underline() 685 self._to_string(out) 686 out.end_underline() 687 688 689 690 # Nodes without children. 691 692 class Node: 693 694 "A document node without children." 695 696 def empty(self): 697 return False 698 699 class Anchor(Node): 700 701 "Anchor details." 702 703 def __init__(self, target): 704 self.target = target 705 706 def __repr__(self): 707 return "Anchor(%r)" % self.target 708 709 def prettyprint(self, indent=""): 710 return "%sAnchor: target=%r" % (indent, self.target) 711 712 def to_string(self, out): 713 out.anchor(self.target) 714 715 class Break(Node): 716 717 "A paragraph break." 718 719 def __repr__(self): 720 return "Break()" 721 722 def prettyprint(self, indent=""): 723 return "%sBreak" % indent 724 725 def to_string(self, out): 726 out.break_() 727 728 class Comment(Node): 729 730 "A comment." 731 732 def __init__(self, comment, extra): 733 self.comment = comment 734 self.extra = extra 735 736 def __repr__(self): 737 return "Comment(%r, %r)" % (self.comment, self.extra) 738 739 def prettyprint(self, indent=""): 740 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 741 742 def to_string(self, out): 743 out.comment(self.comment, self.extra) 744 745 class Directive(Node): 746 747 "A processing directive." 748 749 def __init__(self, directive, extra): 750 self.directive = directive 751 self.extra = extra 752 753 def __repr__(self): 754 return "Directive(%r, %r)" % (self.directive, self.extra) 755 756 def prettyprint(self, indent=""): 757 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 758 759 def to_string(self, out): 760 out.directive(self.directive, self.extra) 761 762 class LineBreak(Node): 763 764 "A line break within a block." 765 766 def __repr__(self): 767 return "LineBreak()" 768 769 def prettyprint(self, indent=""): 770 return "%sLineBreak" % indent 771 772 def to_string(self, out): 773 out.linebreak() 774 775 class Rule(Node): 776 777 "A horizontal rule." 778 779 def __init__(self, height): 780 self.height = height 781 782 def __repr__(self): 783 return "Rule(%d)" % self.height 784 785 def prettyprint(self, indent=""): 786 return "%sRule: height=%d" % (indent, self.height) 787 788 def to_string(self, out): 789 out.rule(self.height) 790 791 class TableAttr(Node): 792 793 "A table attribute." 794 795 def __init__(self, name, value=None, concise=False, quote=None): 796 self.name = name 797 self.value = value 798 self.concise = concise 799 self.quote = quote 800 801 def __repr__(self): 802 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 803 804 def prettyprint(self, indent=""): 805 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 806 807 def to_string(self, out): 808 out.table_attr(self.name, self.value, self.concise, self.quote) 809 810 class Text(Node): 811 812 "A text node." 813 814 def __init__(self, s): 815 self.s = s 816 817 def empty(self): 818 return not self.s 819 820 def multiline(self): 821 return "\n" in self.s 822 823 def merge(self, text): 824 self.s += text.s 825 826 def __repr__(self): 827 return "Text(%r)" % self.s 828 829 def prettyprint(self, indent=""): 830 return "%sText: %r" % (indent, self.s) 831 832 def to_string(self, out): 833 out.text(self.s) 834 835 class Verbatim(Node): 836 837 "Verbatim text." 838 839 def __init__(self, text): 840 self.text = text 841 842 def __repr__(self): 843 return "Verbatim(%r)" % self.text 844 845 def prettyprint(self, indent=""): 846 return "%sVerbatim: text=%r" % (indent, self.text) 847 848 def to_string(self, out): 849 out.verbatim(self.text) 850 851 # vim: tabstop=4 expandtab shiftwidth=4