1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 return self 68 69 def empty(self): 70 return not self.nodes 71 72 def insert_after(self, old, new): 73 74 "Insert after 'old' in the children the 'new' node." 75 76 index = self.nodes.index(old) 77 self.nodes.insert(index + 1, new) 78 79 def node(self, index): 80 try: 81 return self.nodes[index] 82 except IndexError: 83 return None 84 85 def normalise(self): 86 87 "Combine adjacent text nodes." 88 89 nodes = self.nodes 90 self.nodes = [] 91 text = None 92 93 for node in nodes: 94 95 # Open a text node or merge text into an open node. 96 97 if isinstance(node, Text): 98 if not text: 99 text = node 100 else: 101 text.merge(node) 102 103 # Close any open text node and append the current node. 104 105 else: 106 if text: 107 self.append(text) 108 text = None 109 self.append(node) 110 111 # Add any open text node. 112 113 if text: 114 self.append(text) 115 116 def remove(self, node): 117 118 "Remove 'node' from the children." 119 120 self.nodes.remove(node) 121 122 def replace(self, old, new): 123 124 "Replace 'old' with 'new' in the children." 125 126 i = self.nodes.index(old) 127 self.nodes[i] = new 128 129 def split_at(self, node): 130 131 """ 132 Split the container at 'node', returning a new container holding the 133 nodes following 'node' that are moved from this container. 134 """ 135 136 i = self.nodes.index(node) 137 following = self.__class__(self.nodes[i+1:]) 138 139 # Remove the node and the following parts from this container. 140 141 del self.nodes[i:] 142 return following 143 144 def text_content(self): 145 146 """ 147 Return a string containing the content of text nodes within this 148 container. 149 """ 150 151 l = [] 152 153 for node in self.nodes: 154 if isinstance(node, Text): 155 l.append(node.s) 156 elif isinstance(node, Container): 157 l.append(node.text_content()) 158 159 return "".join(l) 160 161 def whitespace_only(self): 162 163 "Return whether the container provides only whitespace text." 164 165 return not self.text_content().strip() 166 167 def __str__(self): 168 return self.prettyprint() 169 170 def _prettyprint(self, l, indent=""): 171 for node in self.nodes: 172 l.append(node.prettyprint(indent + " ")) 173 return "\n".join(l) 174 175 def _to_string(self, out): 176 for node in self.nodes: 177 node.to_string(out) 178 179 class Region(Container): 180 181 "A region of the page." 182 183 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 184 transparent=True, extra=None): 185 Container.__init__(self, nodes) 186 self.level = level 187 self.indent = indent 188 self.type = type 189 self.args = args 190 self.transparent = transparent 191 self.extra = extra 192 193 def add(self, node): 194 last = self.node(-1) 195 if last and last.empty(): 196 self.nodes[-1] = node 197 else: 198 self.append(node) 199 200 def append_point(self): 201 202 "Return the container to which inline nodes are added." 203 204 if self.transparent: 205 return self.nodes[-1] 206 else: 207 return self 208 209 def have_end(self, s): 210 return self.level and s.startswith("}") and self.level == len(s) 211 212 def __repr__(self): 213 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 214 self.indent, self.type, self.args, self.transparent, self.extra) 215 216 def prettyprint(self, indent=""): 217 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 218 self.level, self.indent, self.type, self.args, self.extra)] 219 return self._prettyprint(l, indent) 220 221 def to_string(self, out): 222 out.start_region(self.level, self.indent, self.type, self.extra) 223 224 # Obtain a serialiser for the region from the same format family. 225 # Retain the same serialiser if no appropriate serialiser could be 226 # obtained. 227 228 serialiser_name = "%s.%s" % (out.format, self.type) 229 serialiser = out.get_serialiser(serialiser_name) 230 231 # Serialise the region. 232 233 self._to_string(serialiser) 234 235 # End the region with the previous serialiser. 236 237 out.end_region(self.level, self.indent, self.type, self.extra) 238 239 240 241 # Block nodes. 242 243 class Block(Container): 244 245 "A block in the page." 246 247 def __repr__(self): 248 return "Block(%r)" % self.nodes 249 250 def prettyprint(self, indent=""): 251 l = ["%sBlock" % indent] 252 return self._prettyprint(l, indent) 253 254 def to_string(self, out): 255 out.start_block() 256 self._to_string(out) 257 out.end_block() 258 259 class DefItem(Container): 260 261 "A definition item." 262 263 def __init__(self, nodes, pad, extra): 264 Container.__init__(self, nodes) 265 self.pad = pad 266 self.extra = extra 267 268 def __repr__(self): 269 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 270 271 def prettyprint(self, indent=""): 272 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 273 return self._prettyprint(l, indent) 274 275 def to_string(self, out): 276 out.start_defitem(self.pad, self.extra) 277 self._to_string(out) 278 out.end_defitem(self.pad, self.extra) 279 280 class DefTerm(Container): 281 282 "A definition term." 283 284 def __init__(self, nodes, pad, extra=""): 285 Container.__init__(self, nodes) 286 self.pad = pad 287 self.extra = extra 288 289 def __repr__(self): 290 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 291 292 def prettyprint(self, indent=""): 293 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 294 return self._prettyprint(l, indent) 295 296 def to_string(self, out): 297 out.start_defterm(self.pad, self.extra) 298 self._to_string(out) 299 out.end_defterm(self.pad, self.extra) 300 301 class FontStyle(Container): 302 303 "Emphasised and/or strong text." 304 305 def __init__(self, nodes, emphasis=False, strong=False): 306 Container.__init__(self, nodes) 307 self.emphasis = emphasis 308 self.strong = strong 309 310 def close_emphasis(self): 311 if self.strong: 312 span = FontStyle(self.nodes, emphasis=True) 313 self.nodes = [span] 314 self.emphasis = False 315 return self.strong 316 317 def close_strong(self): 318 if self.emphasis: 319 span = FontStyle(self.nodes, strong=True) 320 self.nodes = [span] 321 self.strong = False 322 return self.emphasis 323 324 def __repr__(self): 325 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 326 327 def prettyprint(self, indent=""): 328 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 329 return self._prettyprint(l, indent) 330 331 def to_string(self, out): 332 if self.emphasis: 333 out.start_emphasis() 334 elif self.strong: 335 out.start_strong() 336 self._to_string(out) 337 if self.emphasis: 338 out.end_emphasis() 339 elif self.strong: 340 out.end_strong() 341 342 class Heading(Container): 343 344 "A heading." 345 346 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 347 identifier=None): 348 Container.__init__(self, nodes) 349 self.level = level 350 self.start_extra = start_extra 351 self.start_pad = start_pad 352 self.end_pad = end_pad 353 self.end_extra = end_extra 354 self.identifier = identifier 355 356 def __repr__(self): 357 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 358 self.nodes, self.level, self.start_extra, self.start_pad, 359 self.end_pad, self.end_extra, self.identifier) 360 361 def prettyprint(self, indent=""): 362 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 363 " end_extra=%r identifier=%r" % ( 364 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 365 self.end_extra, self.identifier)] 366 return self._prettyprint(l, indent) 367 368 def to_string(self, out): 369 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 370 self._to_string(out) 371 out.end_heading(self.level, self.end_pad, self.end_extra) 372 373 class LinkLabel(Container): 374 375 "A link or transclusion label." 376 377 def __repr__(self): 378 return "LinkLabel(%r)" % self.nodes 379 380 def prettyprint(self, indent=""): 381 l = ["%sLinkLabel" % indent] 382 return self._prettyprint(l, indent) 383 384 def to_string(self, out): 385 out.link_label(self.nodes) 386 387 class LinkParameter(Container): 388 389 "A link or transclusion parameter." 390 391 def __repr__(self): 392 return "LinkParameter(%r)" % self.nodes 393 394 def prettyprint(self, indent=""): 395 l = ["%sLinkParameter" % indent] 396 return self._prettyprint(l, indent) 397 398 def to_string(self, out): 399 s = self.text_content() 400 t = s.split("=", 1) 401 out.link_parameter(t) 402 403 class List(Container): 404 405 "A list." 406 407 def __init__(self, nodes, indent, marker, num): 408 Container.__init__(self, nodes) 409 self.indent = indent 410 self.marker = marker 411 self.num = num 412 413 def __repr__(self): 414 return "List(%r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.num) 415 416 def prettyprint(self, indent=""): 417 l = ["%sList: indent=%d marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 418 return self._prettyprint(l, indent) 419 420 def to_string(self, out): 421 out.start_list(self.indent, self.marker, self.num) 422 self._to_string(out) 423 out.end_list(self.indent, self.marker, self.num) 424 425 class ListItem(Container): 426 427 "A list item." 428 429 def __init__(self, nodes, indent, marker, space, num): 430 Container.__init__(self, nodes) 431 self.indent = indent 432 self.marker = marker 433 self.space = space 434 self.num = num 435 436 # Forbid blocks within list items for simpler structure. 437 438 self.allow_blocks = False 439 440 def __repr__(self): 441 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 442 443 def prettyprint(self, indent=""): 444 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 445 return self._prettyprint(l, indent) 446 447 def to_string(self, out): 448 out.start_listitem(self.indent, self.marker, self.space, self.num) 449 self._to_string(out) 450 out.end_listitem(self.indent, self.marker, self.space, self.num) 451 452 class TableAttrs(Container): 453 454 "A collection of table attributes." 455 456 def __repr__(self): 457 return "TableAttrs(%r)" % self.nodes 458 459 def prettyprint(self, indent=""): 460 l = ["%sTableAttrs:" % indent] 461 return self._prettyprint(l, indent) 462 463 def to_string(self, out): 464 out.start_table_attrs() 465 out.table_attrs(self.nodes) 466 out.end_table_attrs() 467 468 class Table(Container): 469 470 "A table." 471 472 def __repr__(self): 473 return "Table(%r)" % self.nodes 474 475 def prettyprint(self, indent=""): 476 l = ["%sTable:" % indent] 477 return self._prettyprint(l, indent) 478 479 def to_string(self, out): 480 out.start_table() 481 self._to_string(out) 482 out.end_table() 483 484 class TableCell(Container): 485 486 "A table cell." 487 488 def __init__(self, nodes, attrs=None): 489 Container.__init__(self, nodes) 490 self.attrs = attrs 491 492 def __repr__(self): 493 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 494 495 def prettyprint(self, indent=""): 496 l = ["%sTableCell:" % indent] 497 return self._prettyprint(l, indent) 498 499 def to_string(self, out): 500 out.start_table_cell(self.attrs) 501 self._to_string(out) 502 out.end_table_cell() 503 504 class TableRow(Container): 505 506 "A table row." 507 508 def __init__(self, nodes, trailing=""): 509 Container.__init__(self, nodes) 510 self.trailing = trailing 511 512 def __repr__(self): 513 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 514 515 def prettyprint(self, indent=""): 516 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 517 return self._prettyprint(l, indent) 518 519 def to_string(self, out): 520 out.start_table_row() 521 self._to_string(out) 522 out.end_table_row(self.trailing) 523 524 525 526 # Inline nodes with children. 527 528 class Inline(Container): 529 530 "Generic inline formatting." 531 532 def __repr__(self): 533 return "%s(%r)" % (self.__class__.__name__, self.nodes) 534 535 def prettyprint(self, indent=""): 536 l = ["%s%s" % (indent, self.__class__.__name__)] 537 return self._prettyprint(l, indent) 538 539 class Larger(Inline): 540 541 "Larger text." 542 543 def to_string(self, out): 544 out.start_larger() 545 self._to_string(out) 546 out.end_larger() 547 548 class Link(Container): 549 550 "Link details." 551 552 def __init__(self, nodes, target): 553 Container.__init__(self, nodes) 554 self.target = target 555 556 def __repr__(self): 557 return "Link(%r, %r)" % (self.nodes, self.target) 558 559 def prettyprint(self, indent=""): 560 l = ["%sLink: target=%r" % (indent, self.target)] 561 return self._prettyprint(l, indent) 562 563 def to_string(self, out): 564 out.link(self.target, self.nodes) 565 566 class Macro(Container): 567 568 "Macro details." 569 570 def __init__(self, name, args, parent, region, nodes=None): 571 Container.__init__(self, nodes or []) 572 self.name = name 573 self.args = args 574 self.parent = parent 575 self.region = region 576 577 def __repr__(self): 578 return "Macro(%r, %r, %r, %r, %r)" % (self.name, self.args, self.parent, self.region, self.nodes) 579 580 def prettyprint(self, indent=""): 581 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 582 return self._prettyprint(l, indent) 583 584 def to_string(self, out): 585 out.start_macro(self.name, self.args, self.nodes) 586 if self.nodes: 587 self._to_string(out) 588 out.end_macro() 589 590 class Monospace(Inline): 591 592 "Monospaced text." 593 594 def to_string(self, out): 595 out.start_monospace() 596 self._to_string(out) 597 out.end_monospace() 598 599 class Smaller(Inline): 600 601 "Smaller text." 602 603 def to_string(self, out): 604 out.start_smaller() 605 self._to_string(out) 606 out.end_smaller() 607 608 class Strikethrough(Inline): 609 610 "Crossed-out text." 611 612 def to_string(self, out): 613 out.start_strikethrough() 614 self._to_string(out) 615 out.end_strikethrough() 616 617 class Subscript(Inline): 618 619 "Subscripted text." 620 621 def to_string(self, out): 622 out.start_subscript() 623 self._to_string(out) 624 out.end_subscript() 625 626 class Superscript(Inline): 627 628 "Superscripted text." 629 630 def to_string(self, out): 631 out.start_superscript() 632 self._to_string(out) 633 out.end_superscript() 634 635 class Transclusion(Container): 636 637 "Transclusion details." 638 639 def __init__(self, nodes, target): 640 Container.__init__(self, nodes) 641 self.target = target 642 643 def __repr__(self): 644 return "Transclusion(%r, %r)" % (self.nodes, self.target) 645 646 def prettyprint(self, indent=""): 647 l = ["%sTransclusion: target=%r" % (indent, self.target)] 648 return self._prettyprint(l, indent) 649 650 def to_string(self, out): 651 out.transclusion(self.target, self.nodes) 652 653 class Underline(Inline): 654 655 "Underlined text." 656 657 def to_string(self, out): 658 out.start_underline() 659 self._to_string(out) 660 out.end_underline() 661 662 663 664 # Nodes without children. 665 666 class Node: 667 668 "A document node without children." 669 670 def empty(self): 671 return False 672 673 class Anchor(Node): 674 675 "Anchor details." 676 677 def __init__(self, target): 678 self.target = target 679 680 def __repr__(self): 681 return "Anchor(%r)" % self.target 682 683 def prettyprint(self, indent=""): 684 return "%sAnchor: target=%r" % (indent, self.target) 685 686 def to_string(self, out): 687 out.anchor(self.target) 688 689 class Break(Node): 690 691 "A paragraph break." 692 693 def __repr__(self): 694 return "Break()" 695 696 def prettyprint(self, indent=""): 697 return "%sBreak" % indent 698 699 def to_string(self, out): 700 out.break_() 701 702 class Comment(Node): 703 704 "A comment." 705 706 def __init__(self, comment, extra): 707 self.comment = comment 708 self.extra = extra 709 710 def __repr__(self): 711 return "Comment(%r, %r)" % (self.comment, self.extra) 712 713 def prettyprint(self, indent=""): 714 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 715 716 def to_string(self, out): 717 out.comment(self.comment, self.extra) 718 719 class Directive(Node): 720 721 "A processing directive." 722 723 def __init__(self, directive, extra): 724 self.directive = directive 725 self.extra = extra 726 727 def __repr__(self): 728 return "Directive(%r, %r)" % (self.directive, self.extra) 729 730 def prettyprint(self, indent=""): 731 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 732 733 def to_string(self, out): 734 out.directive(self.directive, self.extra) 735 736 class LineBreak(Node): 737 738 "A line break within a block." 739 740 def __repr__(self): 741 return "LineBreak()" 742 743 def prettyprint(self, indent=""): 744 return "%sLineBreak" % indent 745 746 def to_string(self, out): 747 out.linebreak() 748 749 class Rule(Node): 750 751 "A horizontal rule." 752 753 def __init__(self, length): 754 self.length = length 755 756 def __repr__(self): 757 return "Rule(%d)" % self.length 758 759 def prettyprint(self, indent=""): 760 return "%sRule: length=%d" % (indent, self.length) 761 762 def to_string(self, out): 763 out.rule(self.length) 764 765 class TableAttr(Node): 766 767 "A table attribute." 768 769 def __init__(self, name, value=None, concise=False, quote=None): 770 self.name = name 771 self.value = value 772 self.concise = concise 773 self.quote = quote 774 775 def __repr__(self): 776 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 777 778 def prettyprint(self, indent=""): 779 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 780 781 def to_string(self, out): 782 out.table_attr(self.name, self.value, self.concise, self.quote) 783 784 class Text(Node): 785 786 "A text node." 787 788 def __init__(self, s): 789 self.s = s 790 791 def empty(self): 792 return not self.s 793 794 def multiline(self): 795 return "\n" in self.s 796 797 def merge(self, text): 798 self.s += text.s 799 800 def __repr__(self): 801 return "Text(%r)" % self.s 802 803 def prettyprint(self, indent=""): 804 return "%sText: %r" % (indent, self.s) 805 806 def to_string(self, out): 807 out.text(self.s) 808 809 class Verbatim(Node): 810 811 "Verbatim text." 812 813 def __init__(self, text): 814 self.text = text 815 816 def __repr__(self): 817 return "Verbatim(%r)" % self.text 818 819 def prettyprint(self, indent=""): 820 return "%sVerbatim: text=%r" % (indent, self.text) 821 822 def to_string(self, out): 823 out.verbatim(self.text) 824 825 # vim: tabstop=4 expandtab shiftwidth=4