1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 last = self.node(-1) 68 69 if isinstance(last, Block): 70 return last 71 else: 72 return self 73 74 def empty(self): 75 return not self.nodes 76 77 def insert_after(self, old, new): 78 79 "Insert after 'old' in the children the 'new' node." 80 81 index = self.nodes.index(old) 82 self.nodes.insert(index + 1, new) 83 84 def node(self, index): 85 try: 86 return self.nodes[index] 87 except IndexError: 88 return None 89 90 def normalise(self): 91 92 "Combine adjacent text nodes." 93 94 nodes = self.nodes 95 self.nodes = [] 96 text = None 97 98 for node in nodes: 99 100 # Open a text node or merge text into an open node. 101 102 if isinstance(node, Text): 103 if not text: 104 text = node 105 else: 106 text.merge(node) 107 108 # Close any open text node and append the current node. 109 110 else: 111 if text: 112 self.append(text) 113 text = None 114 self.append(node) 115 116 # Add any open text node. 117 118 if text: 119 self.append(text) 120 121 def remove(self, node): 122 123 "Remove 'node' from the children." 124 125 self.nodes.remove(node) 126 127 def replace(self, old, new): 128 129 "Replace 'old' with 'new' in the children." 130 131 i = self.nodes.index(old) 132 self.nodes[i] = new 133 134 def split_at(self, node): 135 136 """ 137 Split the container at 'node', returning a new container holding the 138 nodes following 'node' that are moved from this container. 139 """ 140 141 i = self.nodes.index(node) 142 following = self.__class__(self.nodes[i+1:]) 143 144 # Remove the node and the following parts from this container. 145 146 del self.nodes[i:] 147 return following 148 149 def text_content(self): 150 151 """ 152 Return a string containing the content of text nodes within this 153 container. 154 """ 155 156 l = [] 157 158 for node in self.nodes: 159 if isinstance(node, Text): 160 l.append(node.s) 161 elif isinstance(node, Container): 162 l.append(node.text_content()) 163 164 return "".join(l) 165 166 def whitespace_only(self): 167 168 "Return whether the container provides only whitespace text." 169 170 return not self.text_content().strip() 171 172 def __str__(self): 173 return self.prettyprint() 174 175 def _prettyprint(self, l, indent=""): 176 for node in self.nodes: 177 l.append(node.prettyprint(indent + " ")) 178 return "\n".join(l) 179 180 def _to_string(self, out): 181 for node in self.nodes: 182 node.to_string(out) 183 184 class Region(Container): 185 186 "A region of the page." 187 188 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 189 transparent=True, extra=None): 190 Container.__init__(self, nodes) 191 self.level = level 192 self.indent = indent 193 self.type = type 194 self.args = args 195 self.transparent = transparent 196 self.extra = extra 197 198 def add(self, node): 199 last = self.node(-1) 200 if last and last.empty(): 201 self.nodes[-1] = node 202 else: 203 self.append(node) 204 205 def append_point(self): 206 207 "Return the container to which inline nodes are added." 208 209 if self.transparent: 210 return self.nodes[-1] 211 else: 212 return self 213 214 def have_end(self, s): 215 return self.level and s.startswith("}") and self.level == len(s) 216 217 def __repr__(self): 218 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 219 self.indent, self.type, self.args, self.transparent, self.extra) 220 221 def prettyprint(self, indent=""): 222 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 223 self.level, self.indent, self.type, self.args, self.extra)] 224 return self._prettyprint(l, indent) 225 226 def to_string(self, out): 227 out.start_region(self.level, self.indent, self.type, self.extra) 228 229 # Obtain a serialiser for the region from the same format family. 230 # Retain the same serialiser if no appropriate serialiser could be 231 # obtained. 232 233 serialiser_name = "%s.%s" % (out.format, self.type) 234 serialiser = out.get_serialiser(serialiser_name) 235 236 # Serialise the region. 237 238 self._to_string(serialiser) 239 240 # End the region with the previous serialiser. 241 242 out.end_region(self.level, self.indent, self.type, self.extra) 243 244 245 246 # Block nodes. 247 248 class Block(Container): 249 250 "A block in the page." 251 252 def __repr__(self): 253 return "Block(%r)" % self.nodes 254 255 def prettyprint(self, indent=""): 256 l = ["%sBlock" % indent] 257 return self._prettyprint(l, indent) 258 259 def to_string(self, out): 260 out.start_block() 261 self._to_string(out) 262 out.end_block() 263 264 class DefItem(Container): 265 266 "A definition item." 267 268 def __init__(self, nodes, pad, extra): 269 Container.__init__(self, nodes) 270 self.pad = pad 271 self.extra = extra 272 273 def __repr__(self): 274 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 275 276 def prettyprint(self, indent=""): 277 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 278 return self._prettyprint(l, indent) 279 280 def to_string(self, out): 281 out.start_defitem(self.pad, self.extra) 282 self._to_string(out) 283 out.end_defitem(self.pad, self.extra) 284 285 class DefTerm(Container): 286 287 "A definition term." 288 289 def __init__(self, nodes, pad, extra=""): 290 Container.__init__(self, nodes) 291 self.pad = pad 292 self.extra = extra 293 294 def __repr__(self): 295 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 296 297 def prettyprint(self, indent=""): 298 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 299 return self._prettyprint(l, indent) 300 301 def to_string(self, out): 302 out.start_defterm(self.pad, self.extra) 303 self._to_string(out) 304 out.end_defterm(self.pad, self.extra) 305 306 class FontStyle(Container): 307 308 "Emphasised and/or strong text." 309 310 def __init__(self, nodes, emphasis=False, strong=False): 311 Container.__init__(self, nodes) 312 self.emphasis = emphasis 313 self.strong = strong 314 315 def close_emphasis(self): 316 if self.strong: 317 span = FontStyle(self.nodes, emphasis=True) 318 self.nodes = [span] 319 self.emphasis = False 320 return self.strong 321 322 def close_strong(self): 323 if self.emphasis: 324 span = FontStyle(self.nodes, strong=True) 325 self.nodes = [span] 326 self.strong = False 327 return self.emphasis 328 329 def __repr__(self): 330 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 331 332 def prettyprint(self, indent=""): 333 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 334 return self._prettyprint(l, indent) 335 336 def to_string(self, out): 337 if self.emphasis: 338 out.start_emphasis() 339 elif self.strong: 340 out.start_strong() 341 self._to_string(out) 342 if self.emphasis: 343 out.end_emphasis() 344 elif self.strong: 345 out.end_strong() 346 347 class Heading(Container): 348 349 "A heading." 350 351 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 352 identifier=None): 353 Container.__init__(self, nodes) 354 self.level = level 355 self.start_extra = start_extra 356 self.start_pad = start_pad 357 self.end_pad = end_pad 358 self.end_extra = end_extra 359 self.identifier = identifier 360 361 def __repr__(self): 362 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 363 self.nodes, self.level, self.start_extra, self.start_pad, 364 self.end_pad, self.end_extra, self.identifier) 365 366 def prettyprint(self, indent=""): 367 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 368 " end_extra=%r identifier=%r" % ( 369 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 370 self.end_extra, self.identifier)] 371 return self._prettyprint(l, indent) 372 373 def to_string(self, out): 374 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 375 self._to_string(out) 376 out.end_heading(self.level, self.end_pad, self.end_extra) 377 378 class LinkLabel(Container): 379 380 "A link or transclusion label." 381 382 def __repr__(self): 383 return "LinkLabel(%r)" % self.nodes 384 385 def prettyprint(self, indent=""): 386 l = ["%sLinkLabel" % indent] 387 return self._prettyprint(l, indent) 388 389 def to_string(self, out): 390 out.link_label(self.nodes) 391 392 class LinkParameter(Container): 393 394 "A link or transclusion parameter." 395 396 def __repr__(self): 397 return "LinkParameter(%r)" % self.nodes 398 399 def prettyprint(self, indent=""): 400 l = ["%sLinkParameter" % indent] 401 return self._prettyprint(l, indent) 402 403 def to_string(self, out): 404 s = self.text_content() 405 t = s.split("=", 1) 406 out.link_parameter(t) 407 408 class List(Container): 409 410 "A list." 411 412 def __init__(self, nodes): 413 Container.__init__(self, nodes) 414 self.init() 415 416 def init(self): 417 self.first = first = self.nodes and self.nodes[0] or None 418 self.indent = first and first.indent 419 self.marker = first and first.marker 420 self.num = first and first.num 421 422 def __repr__(self): 423 return "List(%r)" % self.nodes 424 425 def prettyprint(self, indent=""): 426 if not self.first: 427 self.init() 428 l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 429 return self._prettyprint(l, indent) 430 431 def to_string(self, out): 432 if not self.first: 433 self.init() 434 out.start_list(self.indent, self.marker, self.num) 435 self._to_string(out) 436 out.end_list(self.indent, self.marker, self.num) 437 438 class ListItem(Container): 439 440 "A list item." 441 442 def __init__(self, nodes, indent, marker, space, num): 443 Container.__init__(self, nodes) 444 self.indent = indent 445 self.marker = marker 446 self.space = space 447 self.num = num 448 449 # Forbid blocks within list items for simpler structure. 450 451 self.allow_blocks = False 452 453 def __repr__(self): 454 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 455 456 def prettyprint(self, indent=""): 457 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 458 return self._prettyprint(l, indent) 459 460 def to_string(self, out): 461 out.start_listitem(self.indent, self.marker, self.space, self.num) 462 self._to_string(out) 463 out.end_listitem(self.indent, self.marker, self.space, self.num) 464 465 class TableAttrs(Container): 466 467 "A collection of table attributes." 468 469 def __repr__(self): 470 return "TableAttrs(%r)" % self.nodes 471 472 def prettyprint(self, indent=""): 473 l = ["%sTableAttrs:" % indent] 474 return self._prettyprint(l, indent) 475 476 def to_string(self, out): 477 out.start_table_attrs() 478 out.table_attrs(self.nodes) 479 out.end_table_attrs() 480 481 class Table(Container): 482 483 "A table." 484 485 def __repr__(self): 486 return "Table(%r)" % self.nodes 487 488 def prettyprint(self, indent=""): 489 l = ["%sTable:" % indent] 490 return self._prettyprint(l, indent) 491 492 def to_string(self, out): 493 out.start_table() 494 self._to_string(out) 495 out.end_table() 496 497 class TableCell(Container): 498 499 "A table cell." 500 501 def __init__(self, nodes, attrs=None): 502 Container.__init__(self, nodes) 503 self.attrs = attrs 504 505 def __repr__(self): 506 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 507 508 def prettyprint(self, indent=""): 509 l = ["%sTableCell:" % indent] 510 return self._prettyprint(l, indent) 511 512 def to_string(self, out): 513 out.start_table_cell(self.attrs) 514 self._to_string(out) 515 out.end_table_cell() 516 517 class TableRow(Container): 518 519 "A table row." 520 521 def __init__(self, nodes, trailing=""): 522 Container.__init__(self, nodes) 523 self.trailing = trailing 524 525 def __repr__(self): 526 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 527 528 def prettyprint(self, indent=""): 529 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 530 return self._prettyprint(l, indent) 531 532 def to_string(self, out): 533 out.start_table_row() 534 self._to_string(out) 535 out.end_table_row(self.trailing) 536 537 538 539 # Inline nodes with children. 540 541 class Inline(Container): 542 543 "Generic inline formatting." 544 545 def __repr__(self): 546 return "%s(%r)" % (self.__class__.__name__, self.nodes) 547 548 def prettyprint(self, indent=""): 549 l = ["%s%s" % (indent, self.__class__.__name__)] 550 return self._prettyprint(l, indent) 551 552 class Larger(Inline): 553 554 "Larger text." 555 556 def to_string(self, out): 557 out.start_larger() 558 self._to_string(out) 559 out.end_larger() 560 561 class Link(Container): 562 563 "Link details." 564 565 def __init__(self, nodes, target): 566 Container.__init__(self, nodes) 567 self.target = target 568 569 def __repr__(self): 570 return "Link(%r, %r)" % (self.nodes, self.target) 571 572 def prettyprint(self, indent=""): 573 l = ["%sLink: target=%r" % (indent, self.target)] 574 return self._prettyprint(l, indent) 575 576 def to_string(self, out): 577 out.link(self.target, self.nodes) 578 579 class Macro(Container): 580 581 "Macro details." 582 583 def __init__(self, name, args, parent, region, nodes=None, inline=True): 584 Container.__init__(self, nodes or []) 585 self.name = name 586 self.args = args 587 self.parent = parent 588 self.region = region 589 self.inline = inline 590 591 def __repr__(self): 592 return "Macro(%r, %r, %r, %r, %r, %r)" % (self.name, self.args, 593 self.parent, self.region, 594 self.nodes, self.inline) 595 596 def prettyprint(self, indent=""): 597 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 598 return self._prettyprint(l, indent) 599 600 def to_string(self, out): 601 out.start_macro(self.name, self.args, self.nodes, self.inline) 602 if self.nodes: 603 self._to_string(out) 604 out.end_macro(self.inline) 605 606 class Monospace(Inline): 607 608 "Monospaced text." 609 610 def to_string(self, out): 611 out.start_monospace() 612 self._to_string(out) 613 out.end_monospace() 614 615 class Smaller(Inline): 616 617 "Smaller text." 618 619 def to_string(self, out): 620 out.start_smaller() 621 self._to_string(out) 622 out.end_smaller() 623 624 class Strikethrough(Inline): 625 626 "Crossed-out text." 627 628 def to_string(self, out): 629 out.start_strikethrough() 630 self._to_string(out) 631 out.end_strikethrough() 632 633 class Subscript(Inline): 634 635 "Subscripted text." 636 637 def to_string(self, out): 638 out.start_subscript() 639 self._to_string(out) 640 out.end_subscript() 641 642 class Superscript(Inline): 643 644 "Superscripted text." 645 646 def to_string(self, out): 647 out.start_superscript() 648 self._to_string(out) 649 out.end_superscript() 650 651 class Transclusion(Container): 652 653 "Transclusion details." 654 655 def __init__(self, nodes, target): 656 Container.__init__(self, nodes) 657 self.target = target 658 659 def __repr__(self): 660 return "Transclusion(%r, %r)" % (self.nodes, self.target) 661 662 def prettyprint(self, indent=""): 663 l = ["%sTransclusion: target=%r" % (indent, self.target)] 664 return self._prettyprint(l, indent) 665 666 def to_string(self, out): 667 out.transclusion(self.target, self.nodes) 668 669 class Underline(Inline): 670 671 "Underlined text." 672 673 def to_string(self, out): 674 out.start_underline() 675 self._to_string(out) 676 out.end_underline() 677 678 679 680 # Nodes without children. 681 682 class Node: 683 684 "A document node without children." 685 686 def empty(self): 687 return False 688 689 class Anchor(Node): 690 691 "Anchor details." 692 693 def __init__(self, target): 694 self.target = target 695 696 def __repr__(self): 697 return "Anchor(%r)" % self.target 698 699 def prettyprint(self, indent=""): 700 return "%sAnchor: target=%r" % (indent, self.target) 701 702 def to_string(self, out): 703 out.anchor(self.target) 704 705 class Break(Node): 706 707 "A paragraph break." 708 709 def __repr__(self): 710 return "Break()" 711 712 def prettyprint(self, indent=""): 713 return "%sBreak" % indent 714 715 def to_string(self, out): 716 out.break_() 717 718 class Comment(Node): 719 720 "A comment." 721 722 def __init__(self, comment, extra): 723 self.comment = comment 724 self.extra = extra 725 726 def __repr__(self): 727 return "Comment(%r, %r)" % (self.comment, self.extra) 728 729 def prettyprint(self, indent=""): 730 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 731 732 def to_string(self, out): 733 out.comment(self.comment, self.extra) 734 735 class Directive(Node): 736 737 "A processing directive." 738 739 def __init__(self, directive, extra): 740 self.directive = directive 741 self.extra = extra 742 743 def __repr__(self): 744 return "Directive(%r, %r)" % (self.directive, self.extra) 745 746 def prettyprint(self, indent=""): 747 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 748 749 def to_string(self, out): 750 out.directive(self.directive, self.extra) 751 752 class LineBreak(Node): 753 754 "A line break within a block." 755 756 def __repr__(self): 757 return "LineBreak()" 758 759 def prettyprint(self, indent=""): 760 return "%sLineBreak" % indent 761 762 def to_string(self, out): 763 out.linebreak() 764 765 class Rule(Node): 766 767 "A horizontal rule." 768 769 def __init__(self, height): 770 self.height = height 771 772 def __repr__(self): 773 return "Rule(%d)" % self.height 774 775 def prettyprint(self, indent=""): 776 return "%sRule: height=%d" % (indent, self.height) 777 778 def to_string(self, out): 779 out.rule(self.height) 780 781 class TableAttr(Node): 782 783 "A table attribute." 784 785 def __init__(self, name, value=None, concise=False, quote=None): 786 self.name = name 787 self.value = value 788 self.concise = concise 789 self.quote = quote 790 791 def __repr__(self): 792 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 793 794 def prettyprint(self, indent=""): 795 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 796 797 def to_string(self, out): 798 out.table_attr(self.name, self.value, self.concise, self.quote) 799 800 class Text(Node): 801 802 "A text node." 803 804 def __init__(self, s): 805 self.s = s 806 807 def empty(self): 808 return not self.s 809 810 def multiline(self): 811 return "\n" in self.s 812 813 def merge(self, text): 814 self.s += text.s 815 816 def __repr__(self): 817 return "Text(%r)" % self.s 818 819 def prettyprint(self, indent=""): 820 return "%sText: %r" % (indent, self.s) 821 822 def to_string(self, out): 823 out.text(self.s) 824 825 class Verbatim(Node): 826 827 "Verbatim text." 828 829 def __init__(self, text): 830 self.text = text 831 832 def __repr__(self): 833 return "Verbatim(%r)" % self.text 834 835 def prettyprint(self, indent=""): 836 return "%sVerbatim: text=%r" % (indent, self.text) 837 838 def to_string(self, out): 839 out.verbatim(self.text) 840 841 # vim: tabstop=4 expandtab shiftwidth=4