1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 return self 68 69 def empty(self): 70 return not self.nodes 71 72 def insert_after(self, old, new): 73 74 "Insert after 'old' in the children the 'new' node." 75 76 index = self.nodes.index(old) 77 self.nodes.insert(index + 1, new) 78 79 def node(self, index): 80 try: 81 return self.nodes[index] 82 except IndexError: 83 return None 84 85 def normalise(self): 86 87 "Combine adjacent text nodes." 88 89 nodes = self.nodes 90 self.nodes = [] 91 text = None 92 93 for node in nodes: 94 95 # Open a text node or merge text into an open node. 96 97 if isinstance(node, Text): 98 if not text: 99 text = node 100 else: 101 text.merge(node) 102 103 # Close any open text node and append the current node. 104 105 else: 106 if text: 107 self.append(text) 108 text = None 109 self.append(node) 110 111 # Add any open text node. 112 113 if text: 114 self.append(text) 115 116 def remove(self, node): 117 118 "Remove 'node' from the children." 119 120 self.nodes.remove(node) 121 122 def replace(self, old, new): 123 124 "Replace 'old' with 'new' in the children." 125 126 i = self.nodes.index(old) 127 self.nodes[i] = new 128 129 def split_at(self, node): 130 131 """ 132 Split the container at 'node', returning a new container holding the 133 nodes following 'node' that are moved from this container. 134 """ 135 136 i = self.nodes.index(node) 137 following = self.__class__(self.nodes[i+1:]) 138 139 # Remove the node and the following parts from this container. 140 141 del self.nodes[i:] 142 return following 143 144 def text_content(self): 145 146 """ 147 Return a string containing the content of text nodes within this 148 container. 149 """ 150 151 l = [] 152 153 for node in self.nodes: 154 if isinstance(node, Text): 155 l.append(node.s) 156 elif isinstance(node, Container): 157 l.append(node.text_content()) 158 159 return "".join(l) 160 161 def whitespace_only(self): 162 163 "Return whether the container provides only whitespace text." 164 165 return not self.text_content().strip() 166 167 def __str__(self): 168 return self.prettyprint() 169 170 def _prettyprint(self, l, indent=""): 171 for node in self.nodes: 172 l.append(node.prettyprint(indent + " ")) 173 return "\n".join(l) 174 175 def _to_string(self, out): 176 for node in self.nodes: 177 node.to_string(out) 178 179 class Region(Container): 180 181 "A region of the page." 182 183 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 184 transparent=True, extra=None): 185 Container.__init__(self, nodes) 186 self.level = level 187 self.indent = indent 188 self.type = type 189 self.args = args 190 self.transparent = transparent 191 self.extra = extra 192 193 def add(self, node): 194 last = self.node(-1) 195 if last and last.empty(): 196 self.nodes[-1] = node 197 else: 198 self.append(node) 199 200 def append_point(self): 201 202 "Return the container to which inline nodes are added." 203 204 if self.transparent: 205 return self.nodes[-1] 206 else: 207 return self 208 209 def have_end(self, s): 210 return self.level and s.startswith("}") and self.level == len(s) 211 212 def __repr__(self): 213 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 214 self.indent, self.type, self.args, self.transparent, self.extra) 215 216 def prettyprint(self, indent=""): 217 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 218 self.level, self.indent, self.type, self.args, self.extra)] 219 return self._prettyprint(l, indent) 220 221 def to_string(self, out): 222 out.start_region(self.level, self.indent, self.type, self.extra) 223 224 # Obtain a serialiser for the region from the same format family. 225 # Retain the same serialiser if no appropriate serialiser could be 226 # obtained. 227 228 serialiser_name = "%s.%s" % (out.format, self.type) 229 serialiser = out.get_serialiser(serialiser_name) 230 231 # Serialise the region. 232 233 self._to_string(serialiser) 234 235 # End the region with the previous serialiser. 236 237 out.end_region(self.level, self.indent, self.type, self.extra) 238 239 240 241 # Block nodes. 242 243 class Block(Container): 244 245 "A block in the page." 246 247 def __repr__(self): 248 return "Block(%r)" % self.nodes 249 250 def prettyprint(self, indent=""): 251 l = ["%sBlock" % indent] 252 return self._prettyprint(l, indent) 253 254 def to_string(self, out): 255 out.start_block() 256 self._to_string(out) 257 out.end_block() 258 259 class DefItem(Container): 260 261 "A definition item." 262 263 def __init__(self, nodes, pad, extra): 264 Container.__init__(self, nodes) 265 self.pad = pad 266 self.extra = extra 267 268 def __repr__(self): 269 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 270 271 def prettyprint(self, indent=""): 272 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 273 return self._prettyprint(l, indent) 274 275 def to_string(self, out): 276 out.start_defitem(self.pad, self.extra) 277 self._to_string(out) 278 out.end_defitem(self.pad, self.extra) 279 280 class DefTerm(Container): 281 282 "A definition term." 283 284 def __init__(self, nodes, pad, extra=""): 285 Container.__init__(self, nodes) 286 self.pad = pad 287 self.extra = extra 288 289 def __repr__(self): 290 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 291 292 def prettyprint(self, indent=""): 293 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 294 return self._prettyprint(l, indent) 295 296 def to_string(self, out): 297 out.start_defterm(self.pad, self.extra) 298 self._to_string(out) 299 out.end_defterm(self.pad, self.extra) 300 301 class FontStyle(Container): 302 303 "Emphasised and/or strong text." 304 305 def __init__(self, nodes, emphasis=False, strong=False): 306 Container.__init__(self, nodes) 307 self.emphasis = emphasis 308 self.strong = strong 309 310 def close_emphasis(self): 311 if self.strong: 312 span = FontStyle(self.nodes, emphasis=True) 313 self.nodes = [span] 314 self.emphasis = False 315 return self.strong 316 317 def close_strong(self): 318 if self.emphasis: 319 span = FontStyle(self.nodes, strong=True) 320 self.nodes = [span] 321 self.strong = False 322 return self.emphasis 323 324 def __repr__(self): 325 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 326 327 def prettyprint(self, indent=""): 328 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 329 return self._prettyprint(l, indent) 330 331 def to_string(self, out): 332 if self.emphasis: 333 out.start_emphasis() 334 elif self.strong: 335 out.start_strong() 336 self._to_string(out) 337 if self.emphasis: 338 out.end_emphasis() 339 elif self.strong: 340 out.end_strong() 341 342 class Heading(Container): 343 344 "A heading." 345 346 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 347 identifier=None): 348 Container.__init__(self, nodes) 349 self.level = level 350 self.start_extra = start_extra 351 self.start_pad = start_pad 352 self.end_pad = end_pad 353 self.end_extra = end_extra 354 self.identifier = identifier 355 356 def __repr__(self): 357 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 358 self.nodes, self.level, self.start_extra, self.start_pad, 359 self.end_pad, self.end_extra, self.identifier) 360 361 def prettyprint(self, indent=""): 362 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 363 " end_extra=%r identifier=%r" % ( 364 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 365 self.end_extra, self.identifier)] 366 return self._prettyprint(l, indent) 367 368 def to_string(self, out): 369 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 370 self._to_string(out) 371 out.end_heading(self.level, self.end_pad, self.end_extra) 372 373 class LinkLabel(Container): 374 375 "A link or transclusion label." 376 377 def __repr__(self): 378 return "LinkLabel(%r)" % self.nodes 379 380 def prettyprint(self, indent=""): 381 l = ["%sLinkLabel" % indent] 382 return self._prettyprint(l, indent) 383 384 def to_string(self, out): 385 out.link_label(self.nodes) 386 387 class LinkParameter(Container): 388 389 "A link or transclusion parameter." 390 391 def __repr__(self): 392 return "LinkParameter(%r)" % self.nodes 393 394 def prettyprint(self, indent=""): 395 l = ["%sLinkParameter" % indent] 396 return self._prettyprint(l, indent) 397 398 def to_string(self, out): 399 s = self.text_content() 400 t = s.split("=", 1) 401 out.link_parameter(t) 402 403 class List(Container): 404 405 "A list." 406 407 def __init__(self, nodes): 408 Container.__init__(self, nodes) 409 first = nodes and nodes[0] or None 410 self.indent = first and first.indent 411 self.marker = first and first.marker 412 self.num = first and first.num 413 414 def __repr__(self): 415 return "List(%r)" % self.nodes 416 417 def prettyprint(self, indent=""): 418 l = ["%sList: indent=%d marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 419 return self._prettyprint(l, indent) 420 421 def to_string(self, out): 422 out.start_list(self.indent, self.marker, self.num) 423 self._to_string(out) 424 out.end_list(self.indent, self.marker, self.num) 425 426 class ListItem(Container): 427 428 "A list item." 429 430 def __init__(self, nodes, indent, marker, space, num): 431 Container.__init__(self, nodes) 432 self.indent = indent 433 self.marker = marker 434 self.space = space 435 self.num = num 436 437 # Forbid blocks within list items for simpler structure. 438 439 self.allow_blocks = False 440 441 def __repr__(self): 442 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 443 444 def prettyprint(self, indent=""): 445 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 446 return self._prettyprint(l, indent) 447 448 def to_string(self, out): 449 out.start_listitem(self.indent, self.marker, self.space, self.num) 450 self._to_string(out) 451 out.end_listitem(self.indent, self.marker, self.space, self.num) 452 453 class TableAttrs(Container): 454 455 "A collection of table attributes." 456 457 def __repr__(self): 458 return "TableAttrs(%r)" % self.nodes 459 460 def prettyprint(self, indent=""): 461 l = ["%sTableAttrs:" % indent] 462 return self._prettyprint(l, indent) 463 464 def to_string(self, out): 465 out.start_table_attrs() 466 out.table_attrs(self.nodes) 467 out.end_table_attrs() 468 469 class Table(Container): 470 471 "A table." 472 473 def __repr__(self): 474 return "Table(%r)" % self.nodes 475 476 def prettyprint(self, indent=""): 477 l = ["%sTable:" % indent] 478 return self._prettyprint(l, indent) 479 480 def to_string(self, out): 481 out.start_table() 482 self._to_string(out) 483 out.end_table() 484 485 class TableCell(Container): 486 487 "A table cell." 488 489 def __init__(self, nodes, attrs=None): 490 Container.__init__(self, nodes) 491 self.attrs = attrs 492 493 def __repr__(self): 494 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 495 496 def prettyprint(self, indent=""): 497 l = ["%sTableCell:" % indent] 498 return self._prettyprint(l, indent) 499 500 def to_string(self, out): 501 out.start_table_cell(self.attrs) 502 self._to_string(out) 503 out.end_table_cell() 504 505 class TableRow(Container): 506 507 "A table row." 508 509 def __init__(self, nodes, trailing=""): 510 Container.__init__(self, nodes) 511 self.trailing = trailing 512 513 def __repr__(self): 514 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 515 516 def prettyprint(self, indent=""): 517 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 518 return self._prettyprint(l, indent) 519 520 def to_string(self, out): 521 out.start_table_row() 522 self._to_string(out) 523 out.end_table_row(self.trailing) 524 525 526 527 # Inline nodes with children. 528 529 class Inline(Container): 530 531 "Generic inline formatting." 532 533 def __repr__(self): 534 return "%s(%r)" % (self.__class__.__name__, self.nodes) 535 536 def prettyprint(self, indent=""): 537 l = ["%s%s" % (indent, self.__class__.__name__)] 538 return self._prettyprint(l, indent) 539 540 class Larger(Inline): 541 542 "Larger text." 543 544 def to_string(self, out): 545 out.start_larger() 546 self._to_string(out) 547 out.end_larger() 548 549 class Link(Container): 550 551 "Link details." 552 553 def __init__(self, nodes, target): 554 Container.__init__(self, nodes) 555 self.target = target 556 557 def __repr__(self): 558 return "Link(%r, %r)" % (self.nodes, self.target) 559 560 def prettyprint(self, indent=""): 561 l = ["%sLink: target=%r" % (indent, self.target)] 562 return self._prettyprint(l, indent) 563 564 def to_string(self, out): 565 out.link(self.target, self.nodes) 566 567 class Macro(Container): 568 569 "Macro details." 570 571 def __init__(self, name, args, parent, region, nodes=None): 572 Container.__init__(self, nodes or []) 573 self.name = name 574 self.args = args 575 self.parent = parent 576 self.region = region 577 578 def __repr__(self): 579 return "Macro(%r, %r, %r, %r, %r)" % (self.name, self.args, self.parent, self.region, self.nodes) 580 581 def prettyprint(self, indent=""): 582 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 583 return self._prettyprint(l, indent) 584 585 def to_string(self, out): 586 out.start_macro(self.name, self.args, self.nodes) 587 if self.nodes: 588 self._to_string(out) 589 out.end_macro() 590 591 class Monospace(Inline): 592 593 "Monospaced text." 594 595 def to_string(self, out): 596 out.start_monospace() 597 self._to_string(out) 598 out.end_monospace() 599 600 class Smaller(Inline): 601 602 "Smaller text." 603 604 def to_string(self, out): 605 out.start_smaller() 606 self._to_string(out) 607 out.end_smaller() 608 609 class Strikethrough(Inline): 610 611 "Crossed-out text." 612 613 def to_string(self, out): 614 out.start_strikethrough() 615 self._to_string(out) 616 out.end_strikethrough() 617 618 class Subscript(Inline): 619 620 "Subscripted text." 621 622 def to_string(self, out): 623 out.start_subscript() 624 self._to_string(out) 625 out.end_subscript() 626 627 class Superscript(Inline): 628 629 "Superscripted text." 630 631 def to_string(self, out): 632 out.start_superscript() 633 self._to_string(out) 634 out.end_superscript() 635 636 class Transclusion(Container): 637 638 "Transclusion details." 639 640 def __init__(self, nodes, target): 641 Container.__init__(self, nodes) 642 self.target = target 643 644 def __repr__(self): 645 return "Transclusion(%r, %r)" % (self.nodes, self.target) 646 647 def prettyprint(self, indent=""): 648 l = ["%sTransclusion: target=%r" % (indent, self.target)] 649 return self._prettyprint(l, indent) 650 651 def to_string(self, out): 652 out.transclusion(self.target, self.nodes) 653 654 class Underline(Inline): 655 656 "Underlined text." 657 658 def to_string(self, out): 659 out.start_underline() 660 self._to_string(out) 661 out.end_underline() 662 663 664 665 # Nodes without children. 666 667 class Node: 668 669 "A document node without children." 670 671 def empty(self): 672 return False 673 674 class Anchor(Node): 675 676 "Anchor details." 677 678 def __init__(self, target): 679 self.target = target 680 681 def __repr__(self): 682 return "Anchor(%r)" % self.target 683 684 def prettyprint(self, indent=""): 685 return "%sAnchor: target=%r" % (indent, self.target) 686 687 def to_string(self, out): 688 out.anchor(self.target) 689 690 class Break(Node): 691 692 "A paragraph break." 693 694 def __repr__(self): 695 return "Break()" 696 697 def prettyprint(self, indent=""): 698 return "%sBreak" % indent 699 700 def to_string(self, out): 701 out.break_() 702 703 class Comment(Node): 704 705 "A comment." 706 707 def __init__(self, comment, extra): 708 self.comment = comment 709 self.extra = extra 710 711 def __repr__(self): 712 return "Comment(%r, %r)" % (self.comment, self.extra) 713 714 def prettyprint(self, indent=""): 715 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 716 717 def to_string(self, out): 718 out.comment(self.comment, self.extra) 719 720 class Directive(Node): 721 722 "A processing directive." 723 724 def __init__(self, directive, extra): 725 self.directive = directive 726 self.extra = extra 727 728 def __repr__(self): 729 return "Directive(%r, %r)" % (self.directive, self.extra) 730 731 def prettyprint(self, indent=""): 732 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 733 734 def to_string(self, out): 735 out.directive(self.directive, self.extra) 736 737 class LineBreak(Node): 738 739 "A line break within a block." 740 741 def __repr__(self): 742 return "LineBreak()" 743 744 def prettyprint(self, indent=""): 745 return "%sLineBreak" % indent 746 747 def to_string(self, out): 748 out.linebreak() 749 750 class Rule(Node): 751 752 "A horizontal rule." 753 754 def __init__(self, length): 755 self.length = length 756 757 def __repr__(self): 758 return "Rule(%d)" % self.length 759 760 def prettyprint(self, indent=""): 761 return "%sRule: length=%d" % (indent, self.length) 762 763 def to_string(self, out): 764 out.rule(self.length) 765 766 class TableAttr(Node): 767 768 "A table attribute." 769 770 def __init__(self, name, value=None, concise=False, quote=None): 771 self.name = name 772 self.value = value 773 self.concise = concise 774 self.quote = quote 775 776 def __repr__(self): 777 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 778 779 def prettyprint(self, indent=""): 780 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 781 782 def to_string(self, out): 783 out.table_attr(self.name, self.value, self.concise, self.quote) 784 785 class Text(Node): 786 787 "A text node." 788 789 def __init__(self, s): 790 self.s = s 791 792 def empty(self): 793 return not self.s 794 795 def multiline(self): 796 return "\n" in self.s 797 798 def merge(self, text): 799 self.s += text.s 800 801 def __repr__(self): 802 return "Text(%r)" % self.s 803 804 def prettyprint(self, indent=""): 805 return "%sText: %r" % (indent, self.s) 806 807 def to_string(self, out): 808 out.text(self.s) 809 810 class Verbatim(Node): 811 812 "Verbatim text." 813 814 def __init__(self, text): 815 self.text = text 816 817 def __repr__(self): 818 return "Verbatim(%r)" % self.text 819 820 def prettyprint(self, indent=""): 821 return "%sVerbatim: text=%r" % (indent, self.text) 822 823 def to_string(self, out): 824 out.verbatim(self.text) 825 826 # vim: tabstop=4 expandtab shiftwidth=4