1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 return self 68 69 def empty(self): 70 return not self.nodes 71 72 def insert_after(self, old, new): 73 74 "Insert after 'old' in the children the 'new' node." 75 76 index = self.nodes.index(old) 77 self.nodes.insert(index + 1, new) 78 79 def node(self, index): 80 try: 81 return self.nodes[index] 82 except IndexError: 83 return None 84 85 def normalise(self): 86 87 "Combine adjacent text nodes." 88 89 nodes = self.nodes 90 self.nodes = [] 91 text = None 92 93 for node in nodes: 94 95 # Open a text node or merge text into an open node. 96 97 if isinstance(node, Text): 98 if not text: 99 text = node 100 else: 101 text.merge(node) 102 103 # Close any open text node and append the current node. 104 105 else: 106 if text: 107 self.append(text) 108 text = None 109 self.append(node) 110 111 # Add any open text node. 112 113 if text: 114 self.append(text) 115 116 def remove(self, node): 117 118 "Remove 'node' from the children." 119 120 self.nodes.remove(node) 121 122 def replace(self, old, new): 123 124 "Replace 'old' with 'new' in the children." 125 126 i = self.nodes.index(old) 127 self.nodes[i] = new 128 129 def split_at(self, node): 130 131 """ 132 Split the container at 'node', returning a new container holding the 133 nodes following 'node' that are moved from this container. 134 """ 135 136 i = self.nodes.index(node) 137 following = self.__class__(self.nodes[i+1:]) 138 139 # Remove the node and the following parts from this container. 140 141 del self.nodes[i:] 142 return following 143 144 def text_content(self): 145 146 """ 147 Return a string containing the content of text nodes within this 148 container. 149 """ 150 151 l = [] 152 153 for node in self.nodes: 154 if isinstance(node, Text): 155 l.append(node.s) 156 elif isinstance(node, Container): 157 l.append(node.text_content()) 158 159 return "".join(l) 160 161 def whitespace_only(self): 162 163 "Return whether the container provides only whitespace text." 164 165 return not self.text_content().strip() 166 167 def __str__(self): 168 return self.prettyprint() 169 170 def _prettyprint(self, l, indent=""): 171 for node in self.nodes: 172 l.append(node.prettyprint(indent + " ")) 173 return "\n".join(l) 174 175 def _to_string(self, out): 176 for node in self.nodes: 177 node.to_string(out) 178 179 class Region(Container): 180 181 "A region of the page." 182 183 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 184 transparent=True, extra=None): 185 Container.__init__(self, nodes) 186 self.level = level 187 self.indent = indent 188 self.type = type 189 self.args = args 190 self.transparent = transparent 191 self.extra = extra 192 193 def add(self, node): 194 last = self.node(-1) 195 if last and last.empty(): 196 self.nodes[-1] = node 197 else: 198 self.append(node) 199 200 def append_point(self): 201 202 "Return the container to which inline nodes are added." 203 204 if self.transparent: 205 return self.nodes[-1] 206 else: 207 return self 208 209 def have_end(self, s): 210 return self.level and s.startswith("}") and self.level == len(s) 211 212 def __repr__(self): 213 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 214 self.indent, self.type, self.args, self.transparent, self.extra) 215 216 def prettyprint(self, indent=""): 217 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 218 self.level, self.indent, self.type, self.args, self.extra)] 219 return self._prettyprint(l, indent) 220 221 def to_string(self, out): 222 out.start_region(self.level, self.indent, self.type, self.extra) 223 224 # Obtain a serialiser for the region from the same format family. 225 # Retain the same serialiser if no appropriate serialiser could be 226 # obtained. 227 228 serialiser_name = "%s.%s" % (out.format, self.type) 229 serialiser = out.get_serialiser(serialiser_name) 230 231 # Serialise the region. 232 233 self._to_string(serialiser) 234 235 # End the region with the previous serialiser. 236 237 out.end_region(self.level, self.indent, self.type, self.extra) 238 239 240 241 # Block nodes. 242 243 class Block(Container): 244 245 "A block in the page." 246 247 def __repr__(self): 248 return "Block(%r)" % self.nodes 249 250 def prettyprint(self, indent=""): 251 l = ["%sBlock" % indent] 252 return self._prettyprint(l, indent) 253 254 def to_string(self, out): 255 out.start_block() 256 self._to_string(out) 257 out.end_block() 258 259 class DefItem(Container): 260 261 "A definition item." 262 263 def __init__(self, nodes, pad, extra): 264 Container.__init__(self, nodes) 265 self.pad = pad 266 self.extra = extra 267 268 def __repr__(self): 269 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 270 271 def prettyprint(self, indent=""): 272 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 273 return self._prettyprint(l, indent) 274 275 def to_string(self, out): 276 out.start_defitem(self.pad, self.extra) 277 self._to_string(out) 278 out.end_defitem(self.pad, self.extra) 279 280 class DefTerm(Container): 281 282 "A definition term." 283 284 def __init__(self, nodes, pad, extra=""): 285 Container.__init__(self, nodes) 286 self.pad = pad 287 self.extra = extra 288 289 def __repr__(self): 290 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 291 292 def prettyprint(self, indent=""): 293 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 294 return self._prettyprint(l, indent) 295 296 def to_string(self, out): 297 out.start_defterm(self.pad, self.extra) 298 self._to_string(out) 299 out.end_defterm(self.pad, self.extra) 300 301 class FontStyle(Container): 302 303 "Emphasised and/or strong text." 304 305 def __init__(self, nodes, emphasis=False, strong=False): 306 Container.__init__(self, nodes) 307 self.emphasis = emphasis 308 self.strong = strong 309 310 def close_emphasis(self): 311 if self.strong: 312 span = FontStyle(self.nodes, emphasis=True) 313 self.nodes = [span] 314 self.emphasis = False 315 return self.strong 316 317 def close_strong(self): 318 if self.emphasis: 319 span = FontStyle(self.nodes, strong=True) 320 self.nodes = [span] 321 self.strong = False 322 return self.emphasis 323 324 def __repr__(self): 325 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 326 327 def prettyprint(self, indent=""): 328 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 329 return self._prettyprint(l, indent) 330 331 def to_string(self, out): 332 if self.emphasis: 333 out.start_emphasis() 334 elif self.strong: 335 out.start_strong() 336 self._to_string(out) 337 if self.emphasis: 338 out.end_emphasis() 339 elif self.strong: 340 out.end_strong() 341 342 class Heading(Container): 343 344 "A heading." 345 346 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 347 identifier=None): 348 Container.__init__(self, nodes) 349 self.level = level 350 self.start_extra = start_extra 351 self.start_pad = start_pad 352 self.end_pad = end_pad 353 self.end_extra = end_extra 354 self.identifier = identifier 355 356 def __repr__(self): 357 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 358 self.nodes, self.level, self.start_extra, self.start_pad, 359 self.end_pad, self.end_extra, self.identifier) 360 361 def prettyprint(self, indent=""): 362 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 363 " end_extra=%r identifier=%r" % ( 364 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 365 self.end_extra, self.identifier)] 366 return self._prettyprint(l, indent) 367 368 def to_string(self, out): 369 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 370 self._to_string(out) 371 out.end_heading(self.level, self.end_pad, self.end_extra) 372 373 class LinkLabel(Container): 374 375 "A link or transclusion label." 376 377 def __repr__(self): 378 return "LinkLabel(%r)" % self.nodes 379 380 def prettyprint(self, indent=""): 381 l = ["%sLinkLabel" % indent] 382 return self._prettyprint(l, indent) 383 384 def to_string(self, out): 385 out.link_label(self.nodes) 386 387 class LinkParameter(Container): 388 389 "A link or transclusion parameter." 390 391 def __repr__(self): 392 return "LinkParameter(%r)" % self.nodes 393 394 def prettyprint(self, indent=""): 395 l = ["%sLinkParameter" % indent] 396 return self._prettyprint(l, indent) 397 398 def to_string(self, out): 399 s = self.text_content() 400 t = s.split("=", 1) 401 out.link_parameter(t) 402 403 class List(Container): 404 405 "A list." 406 407 def __init__(self, nodes): 408 Container.__init__(self, nodes) 409 self.init() 410 411 def init(self): 412 self.first = first = self.nodes and self.nodes[0] or None 413 self.indent = first and first.indent 414 self.marker = first and first.marker 415 self.num = first and first.num 416 417 def __repr__(self): 418 return "List(%r)" % self.nodes 419 420 def prettyprint(self, indent=""): 421 if not self.first: 422 self.init() 423 l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 424 return self._prettyprint(l, indent) 425 426 def to_string(self, out): 427 if not self.first: 428 self.init() 429 out.start_list(self.indent, self.marker, self.num) 430 self._to_string(out) 431 out.end_list(self.indent, self.marker, self.num) 432 433 class ListItem(Container): 434 435 "A list item." 436 437 def __init__(self, nodes, indent, marker, space, num): 438 Container.__init__(self, nodes) 439 self.indent = indent 440 self.marker = marker 441 self.space = space 442 self.num = num 443 444 # Forbid blocks within list items for simpler structure. 445 446 self.allow_blocks = False 447 448 def __repr__(self): 449 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 450 451 def prettyprint(self, indent=""): 452 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 453 return self._prettyprint(l, indent) 454 455 def to_string(self, out): 456 out.start_listitem(self.indent, self.marker, self.space, self.num) 457 self._to_string(out) 458 out.end_listitem(self.indent, self.marker, self.space, self.num) 459 460 class TableAttrs(Container): 461 462 "A collection of table attributes." 463 464 def __repr__(self): 465 return "TableAttrs(%r)" % self.nodes 466 467 def prettyprint(self, indent=""): 468 l = ["%sTableAttrs:" % indent] 469 return self._prettyprint(l, indent) 470 471 def to_string(self, out): 472 out.start_table_attrs() 473 out.table_attrs(self.nodes) 474 out.end_table_attrs() 475 476 class Table(Container): 477 478 "A table." 479 480 def __repr__(self): 481 return "Table(%r)" % self.nodes 482 483 def prettyprint(self, indent=""): 484 l = ["%sTable:" % indent] 485 return self._prettyprint(l, indent) 486 487 def to_string(self, out): 488 out.start_table() 489 self._to_string(out) 490 out.end_table() 491 492 class TableCell(Container): 493 494 "A table cell." 495 496 def __init__(self, nodes, attrs=None): 497 Container.__init__(self, nodes) 498 self.attrs = attrs 499 500 def __repr__(self): 501 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 502 503 def prettyprint(self, indent=""): 504 l = ["%sTableCell:" % indent] 505 return self._prettyprint(l, indent) 506 507 def to_string(self, out): 508 out.start_table_cell(self.attrs) 509 self._to_string(out) 510 out.end_table_cell() 511 512 class TableRow(Container): 513 514 "A table row." 515 516 def __init__(self, nodes, trailing=""): 517 Container.__init__(self, nodes) 518 self.trailing = trailing 519 520 def __repr__(self): 521 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 522 523 def prettyprint(self, indent=""): 524 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 525 return self._prettyprint(l, indent) 526 527 def to_string(self, out): 528 out.start_table_row() 529 self._to_string(out) 530 out.end_table_row(self.trailing) 531 532 533 534 # Inline nodes with children. 535 536 class Inline(Container): 537 538 "Generic inline formatting." 539 540 def __repr__(self): 541 return "%s(%r)" % (self.__class__.__name__, self.nodes) 542 543 def prettyprint(self, indent=""): 544 l = ["%s%s" % (indent, self.__class__.__name__)] 545 return self._prettyprint(l, indent) 546 547 class Larger(Inline): 548 549 "Larger text." 550 551 def to_string(self, out): 552 out.start_larger() 553 self._to_string(out) 554 out.end_larger() 555 556 class Link(Container): 557 558 "Link details." 559 560 def __init__(self, nodes, target): 561 Container.__init__(self, nodes) 562 self.target = target 563 564 def __repr__(self): 565 return "Link(%r, %r)" % (self.nodes, self.target) 566 567 def prettyprint(self, indent=""): 568 l = ["%sLink: target=%r" % (indent, self.target)] 569 return self._prettyprint(l, indent) 570 571 def to_string(self, out): 572 out.link(self.target, self.nodes) 573 574 class Macro(Container): 575 576 "Macro details." 577 578 def __init__(self, name, args, parent, region, nodes=None): 579 Container.__init__(self, nodes or []) 580 self.name = name 581 self.args = args 582 self.parent = parent 583 self.region = region 584 585 def __repr__(self): 586 return "Macro(%r, %r, %r, %r, %r)" % (self.name, self.args, self.parent, self.region, self.nodes) 587 588 def prettyprint(self, indent=""): 589 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 590 return self._prettyprint(l, indent) 591 592 def to_string(self, out): 593 out.start_macro(self.name, self.args, self.nodes) 594 if self.nodes: 595 self._to_string(out) 596 out.end_macro() 597 598 class Monospace(Inline): 599 600 "Monospaced text." 601 602 def to_string(self, out): 603 out.start_monospace() 604 self._to_string(out) 605 out.end_monospace() 606 607 class Smaller(Inline): 608 609 "Smaller text." 610 611 def to_string(self, out): 612 out.start_smaller() 613 self._to_string(out) 614 out.end_smaller() 615 616 class Strikethrough(Inline): 617 618 "Crossed-out text." 619 620 def to_string(self, out): 621 out.start_strikethrough() 622 self._to_string(out) 623 out.end_strikethrough() 624 625 class Subscript(Inline): 626 627 "Subscripted text." 628 629 def to_string(self, out): 630 out.start_subscript() 631 self._to_string(out) 632 out.end_subscript() 633 634 class Superscript(Inline): 635 636 "Superscripted text." 637 638 def to_string(self, out): 639 out.start_superscript() 640 self._to_string(out) 641 out.end_superscript() 642 643 class Transclusion(Container): 644 645 "Transclusion details." 646 647 def __init__(self, nodes, target): 648 Container.__init__(self, nodes) 649 self.target = target 650 651 def __repr__(self): 652 return "Transclusion(%r, %r)" % (self.nodes, self.target) 653 654 def prettyprint(self, indent=""): 655 l = ["%sTransclusion: target=%r" % (indent, self.target)] 656 return self._prettyprint(l, indent) 657 658 def to_string(self, out): 659 out.transclusion(self.target, self.nodes) 660 661 class Underline(Inline): 662 663 "Underlined text." 664 665 def to_string(self, out): 666 out.start_underline() 667 self._to_string(out) 668 out.end_underline() 669 670 671 672 # Nodes without children. 673 674 class Node: 675 676 "A document node without children." 677 678 def empty(self): 679 return False 680 681 class Anchor(Node): 682 683 "Anchor details." 684 685 def __init__(self, target): 686 self.target = target 687 688 def __repr__(self): 689 return "Anchor(%r)" % self.target 690 691 def prettyprint(self, indent=""): 692 return "%sAnchor: target=%r" % (indent, self.target) 693 694 def to_string(self, out): 695 out.anchor(self.target) 696 697 class Break(Node): 698 699 "A paragraph break." 700 701 def __repr__(self): 702 return "Break()" 703 704 def prettyprint(self, indent=""): 705 return "%sBreak" % indent 706 707 def to_string(self, out): 708 out.break_() 709 710 class Comment(Node): 711 712 "A comment." 713 714 def __init__(self, comment, extra): 715 self.comment = comment 716 self.extra = extra 717 718 def __repr__(self): 719 return "Comment(%r, %r)" % (self.comment, self.extra) 720 721 def prettyprint(self, indent=""): 722 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 723 724 def to_string(self, out): 725 out.comment(self.comment, self.extra) 726 727 class Directive(Node): 728 729 "A processing directive." 730 731 def __init__(self, directive, extra): 732 self.directive = directive 733 self.extra = extra 734 735 def __repr__(self): 736 return "Directive(%r, %r)" % (self.directive, self.extra) 737 738 def prettyprint(self, indent=""): 739 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 740 741 def to_string(self, out): 742 out.directive(self.directive, self.extra) 743 744 class LineBreak(Node): 745 746 "A line break within a block." 747 748 def __repr__(self): 749 return "LineBreak()" 750 751 def prettyprint(self, indent=""): 752 return "%sLineBreak" % indent 753 754 def to_string(self, out): 755 out.linebreak() 756 757 class Rule(Node): 758 759 "A horizontal rule." 760 761 def __init__(self, length): 762 self.length = length 763 764 def __repr__(self): 765 return "Rule(%d)" % self.length 766 767 def prettyprint(self, indent=""): 768 return "%sRule: length=%d" % (indent, self.length) 769 770 def to_string(self, out): 771 out.rule(self.length) 772 773 class TableAttr(Node): 774 775 "A table attribute." 776 777 def __init__(self, name, value=None, concise=False, quote=None): 778 self.name = name 779 self.value = value 780 self.concise = concise 781 self.quote = quote 782 783 def __repr__(self): 784 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 785 786 def prettyprint(self, indent=""): 787 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 788 789 def to_string(self, out): 790 out.table_attr(self.name, self.value, self.concise, self.quote) 791 792 class Text(Node): 793 794 "A text node." 795 796 def __init__(self, s): 797 self.s = s 798 799 def empty(self): 800 return not self.s 801 802 def multiline(self): 803 return "\n" in self.s 804 805 def merge(self, text): 806 self.s += text.s 807 808 def __repr__(self): 809 return "Text(%r)" % self.s 810 811 def prettyprint(self, indent=""): 812 return "%sText: %r" % (indent, self.s) 813 814 def to_string(self, out): 815 out.text(self.s) 816 817 class Verbatim(Node): 818 819 "Verbatim text." 820 821 def __init__(self, text): 822 self.text = text 823 824 def __repr__(self): 825 return "Verbatim(%r)" % self.text 826 827 def prettyprint(self, indent=""): 828 return "%sVerbatim: text=%r" % (indent, self.text) 829 830 def to_string(self, out): 831 out.verbatim(self.text) 832 833 # vim: tabstop=4 expandtab shiftwidth=4