1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 return self 68 69 def empty(self): 70 return not self.nodes 71 72 def node(self, index): 73 try: 74 return self.nodes[index] 75 except IndexError: 76 return None 77 78 def normalise(self): 79 80 "Combine adjacent text nodes." 81 82 nodes = self.nodes 83 self.nodes = [] 84 text = None 85 86 for node in nodes: 87 88 # Open a text node or merge text into an open node. 89 90 if isinstance(node, Text): 91 if not text: 92 text = node 93 else: 94 text.merge(node) 95 96 # Close any open text node and append the current node. 97 98 else: 99 if text: 100 self.append(text) 101 text = None 102 self.append(node) 103 104 # Add any open text node. 105 106 if text: 107 self.append(text) 108 109 def replace(self, old, new): 110 111 "Replace 'old' with 'new' in the children." 112 113 i = self.nodes.index(old) 114 self.nodes[i] = new 115 116 def text_content(self): 117 118 """ 119 Return a string containing the content of text nodes within this 120 container. 121 """ 122 123 l = [] 124 125 for node in self.nodes: 126 if isinstance(node, Text): 127 l.append(node.s) 128 elif isinstance(node, Container): 129 l.append(node.text_content()) 130 131 return "".join(l) 132 133 def __str__(self): 134 return self.prettyprint() 135 136 def _prettyprint(self, l, indent=""): 137 for node in self.nodes: 138 l.append(node.prettyprint(indent + " ")) 139 return "\n".join(l) 140 141 def _to_string(self, out): 142 for node in self.nodes: 143 node.to_string(out) 144 145 class Region(Container): 146 147 "A region of the page." 148 149 def __init__(self, nodes, level=0, indent=0, type=None, transparent=True, extra=None): 150 Container.__init__(self, nodes) 151 self.level = level 152 self.indent = indent 153 self.type = type 154 self.transparent = transparent 155 self.extra = extra 156 157 def add(self, node): 158 last = self.node(-1) 159 if last and last.empty(): 160 self.nodes[-1] = node 161 else: 162 self.append(node) 163 164 def append_point(self): 165 166 "Return the container to which inline nodes are added." 167 168 if self.transparent: 169 return self.nodes[-1] 170 else: 171 return self 172 173 def have_end(self, s): 174 return self.level and s.startswith("}") and self.level == len(s) 175 176 def __repr__(self): 177 return "Region(%r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 178 self.indent, self.type, self.transparent, self.extra) 179 180 def prettyprint(self, indent=""): 181 l = ["%sRegion: level=%d indent=%d type=%s extra=%r" % (indent, 182 self.level, self.indent, self.type, self.extra)] 183 return self._prettyprint(l, indent) 184 185 def to_string(self, out): 186 out.start_region(self.level, self.indent, self.type, self.extra) 187 188 # Obtain a serialiser for the region from the same format family. 189 # Retain the same serialiser if no appropriate serialiser could be 190 # obtained. 191 192 serialiser_name = "%s.%s" % (out.format, self.type) 193 serialiser = out.get_serialiser(serialiser_name) 194 195 # Serialise the region. 196 197 self._to_string(serialiser) 198 199 # End the region with the previous serialiser. 200 201 out.end_region(self.level, self.indent, self.type, self.extra) 202 203 204 205 # Block nodes. 206 207 class Block(Container): 208 209 "A block in the page." 210 211 def __repr__(self): 212 return "Block(%r)" % self.nodes 213 214 def prettyprint(self, indent=""): 215 l = ["%sBlock" % indent] 216 return self._prettyprint(l, indent) 217 218 def to_string(self, out): 219 out.start_block() 220 self._to_string(out) 221 out.end_block() 222 223 class DefItem(Container): 224 225 "A definition item." 226 227 def __init__(self, nodes, pad, extra): 228 Container.__init__(self, nodes) 229 self.pad = pad 230 self.extra = extra 231 232 def __repr__(self): 233 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 234 235 def prettyprint(self, indent=""): 236 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 237 return self._prettyprint(l, indent) 238 239 def to_string(self, out): 240 out.start_defitem(self.pad, self.extra) 241 self._to_string(out) 242 out.end_defitem(self.pad, self.extra) 243 244 class DefTerm(Container): 245 246 "A definition term." 247 248 def __init__(self, nodes, pad, extra=""): 249 Container.__init__(self, nodes) 250 self.pad = pad 251 self.extra = extra 252 253 def __repr__(self): 254 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 255 256 def prettyprint(self, indent=""): 257 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 258 return self._prettyprint(l, indent) 259 260 def to_string(self, out): 261 out.start_defterm(self.pad, self.extra) 262 self._to_string(out) 263 out.end_defterm(self.pad, self.extra) 264 265 class FontStyle(Container): 266 267 "Emphasised and/or strong text." 268 269 def __init__(self, nodes, emphasis=False, strong=False): 270 Container.__init__(self, nodes) 271 self.emphasis = emphasis 272 self.strong = strong 273 274 def close_emphasis(self): 275 if self.strong: 276 span = FontStyle(self.nodes, emphasis=True) 277 self.nodes = [span] 278 self.emphasis = False 279 return self.strong 280 281 def close_strong(self): 282 if self.emphasis: 283 span = FontStyle(self.nodes, strong=True) 284 self.nodes = [span] 285 self.strong = False 286 return self.emphasis 287 288 def __repr__(self): 289 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 290 291 def prettyprint(self, indent=""): 292 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 293 return self._prettyprint(l, indent) 294 295 def to_string(self, out): 296 if self.emphasis: 297 out.start_emphasis() 298 elif self.strong: 299 out.start_strong() 300 self._to_string(out) 301 if self.emphasis: 302 out.end_emphasis() 303 elif self.strong: 304 out.end_strong() 305 306 class Heading(Container): 307 308 "A heading." 309 310 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra=""): 311 Container.__init__(self, nodes) 312 self.level = level 313 self.start_extra = start_extra 314 self.start_pad = start_pad 315 self.end_pad = end_pad 316 self.end_extra = end_extra 317 318 def __repr__(self): 319 return "Heading(%r, %d, %r, %r, %r, %r)" % ( 320 self.nodes, self.level, self.start_extra, self.start_pad, self.end_pad, self.end_extra) 321 322 def prettyprint(self, indent=""): 323 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r end_extra=%r" % ( 324 indent, self.level, self.start_extra, self.start_pad, self.end_pad, self.end_extra)] 325 return self._prettyprint(l, indent) 326 327 def to_string(self, out): 328 out.start_heading(self.level, self.start_extra, self.start_pad, self.text_content()) 329 self._to_string(out) 330 out.end_heading(self.level, self.end_pad, self.end_extra) 331 332 class List(Container): 333 334 "A list." 335 336 def __init__(self, nodes, indent, marker, num): 337 Container.__init__(self, nodes) 338 self.indent = indent 339 self.marker = marker 340 self.num = num 341 342 def __repr__(self): 343 return "List(%r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.num) 344 345 def prettyprint(self, indent=""): 346 l = ["%sList: indent=%d marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 347 return self._prettyprint(l, indent) 348 349 def to_string(self, out): 350 out.start_list(self.indent, self.marker, self.num) 351 self._to_string(out) 352 out.end_list(self.indent, self.marker, self.num) 353 354 class ListItem(Container): 355 356 "A list item." 357 358 def __init__(self, nodes, indent, marker, space, num): 359 Container.__init__(self, nodes) 360 self.indent = indent 361 self.marker = marker 362 self.space = space 363 self.num = num 364 365 # Forbid blocks within list items for simpler structure. 366 367 self.allow_blocks = False 368 369 def __repr__(self): 370 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 371 372 def prettyprint(self, indent=""): 373 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 374 return self._prettyprint(l, indent) 375 376 def to_string(self, out): 377 out.start_listitem(self.indent, self.marker, self.space, self.num) 378 self._to_string(out) 379 out.end_listitem(self.indent, self.marker, self.space, self.num) 380 381 class TableAttrs(Container): 382 383 "A collection of table attributes." 384 385 def __repr__(self): 386 return "TableAttrs(%r)" % self.nodes 387 388 def prettyprint(self, indent=""): 389 l = ["%sTableAttrs:" % indent] 390 return self._prettyprint(l, indent) 391 392 def to_string(self, out): 393 out.start_table_attrs() 394 out.table_attrs(self.nodes) 395 out.end_table_attrs() 396 397 class Table(Container): 398 399 "A table." 400 401 def __repr__(self): 402 return "Table(%r)" % self.nodes 403 404 def prettyprint(self, indent=""): 405 l = ["%sTable:" % indent] 406 return self._prettyprint(l, indent) 407 408 def to_string(self, out): 409 out.start_table() 410 self._to_string(out) 411 out.end_table() 412 413 class TableCell(Container): 414 415 "A table cell." 416 417 def __init__(self, nodes, attrs=None): 418 Container.__init__(self, nodes) 419 self.attrs = attrs 420 421 def __repr__(self): 422 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 423 424 def prettyprint(self, indent=""): 425 l = ["%sTableCell:" % indent] 426 return self._prettyprint(l, indent) 427 428 def to_string(self, out): 429 out.start_table_cell(self.attrs) 430 self._to_string(out) 431 out.end_table_cell() 432 433 class TableRow(Container): 434 435 "A table row." 436 437 def __init__(self, nodes, trailing=""): 438 Container.__init__(self, nodes) 439 self.trailing = trailing 440 441 def __repr__(self): 442 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 443 444 def prettyprint(self, indent=""): 445 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 446 return self._prettyprint(l, indent) 447 448 def to_string(self, out): 449 out.start_table_row() 450 self._to_string(out) 451 out.end_table_row(self.trailing) 452 453 454 455 # Inline nodes with children. 456 457 class Inline(Container): 458 459 "Generic inline formatting." 460 461 def __repr__(self): 462 return "%s(%r)" % (self.__class__.__name__, self.nodes) 463 464 def prettyprint(self, indent=""): 465 l = ["%s%s" % (indent, self.__class__.__name__)] 466 return self._prettyprint(l, indent) 467 468 class Larger(Inline): 469 470 "Larger text." 471 472 def to_string(self, out): 473 out.start_larger() 474 self._to_string(out) 475 out.end_larger() 476 477 class Link(Container): 478 479 "Link details." 480 481 def __init__(self, nodes, target): 482 Container.__init__(self, nodes) 483 self.target = target 484 485 def __repr__(self): 486 return "Link(%r, %r)" % (self.nodes, self.target) 487 488 def prettyprint(self, indent=""): 489 l = ["%sLink: target=%r" % (indent, self.target)] 490 return self._prettyprint(l, indent) 491 492 def to_string(self, out): 493 out.start_link(self.target, self.nodes) 494 if self.nodes: 495 out.start_linktext() 496 self._to_string(out) 497 out.end_linktext() 498 out.end_link() 499 500 class Macro(Container): 501 502 "Macro details." 503 504 def __init__(self, name, args, parent, nodes=None): 505 Container.__init__(self, nodes or []) 506 self.name = name 507 self.parent = parent 508 self.args = args 509 510 def __repr__(self): 511 return "Macro(%r, %r, %r, %r)" % (self.name, self.args, self.parent, self.nodes) 512 513 def prettyprint(self, indent=""): 514 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 515 return self._prettyprint(l, indent) 516 517 def to_string(self, out): 518 out.start_macro(self.name, self.args, self.nodes) 519 if self.nodes: 520 self._to_string(out) 521 out.end_macro() 522 523 class Monospace(Inline): 524 525 "Monospaced text." 526 527 def to_string(self, out): 528 out.start_monospace() 529 self._to_string(out) 530 out.end_monospace() 531 532 class Smaller(Inline): 533 534 "Smaller text." 535 536 def to_string(self, out): 537 out.start_smaller() 538 self._to_string(out) 539 out.end_smaller() 540 541 class Strikethrough(Inline): 542 543 "Crossed-out text." 544 545 def to_string(self, out): 546 out.start_strikethrough() 547 self._to_string(out) 548 out.end_strikethrough() 549 550 class Subscript(Inline): 551 552 "Subscripted text." 553 554 def to_string(self, out): 555 out.start_subscript() 556 self._to_string(out) 557 out.end_subscript() 558 559 class Superscript(Inline): 560 561 "Superscripted text." 562 563 def to_string(self, out): 564 out.start_superscript() 565 self._to_string(out) 566 out.end_superscript() 567 568 class Underline(Inline): 569 570 "Underlined text." 571 572 def to_string(self, out): 573 out.start_underline() 574 self._to_string(out) 575 out.end_underline() 576 577 578 579 # Nodes without children. 580 581 class Node: 582 583 "A document node without children." 584 585 def empty(self): 586 return False 587 588 class Anchor(Node): 589 590 "Anchor details." 591 592 def __init__(self, target): 593 self.target = target 594 595 def __repr__(self): 596 return "Anchor(%r)" % self.target 597 598 def prettyprint(self, indent=""): 599 return "%sAnchor: target=%r" % (indent, self.target) 600 601 def to_string(self, out): 602 out.anchor(self.target) 603 604 class Break(Node): 605 606 "A paragraph break." 607 608 def __repr__(self): 609 return "Break()" 610 611 def prettyprint(self, indent=""): 612 return "%sBreak" % indent 613 614 def to_string(self, out): 615 out.break_() 616 617 class LineBreak(Node): 618 619 "A line break within a block." 620 621 def __repr__(self): 622 return "LineBreak()" 623 624 def prettyprint(self, indent=""): 625 return "%sLineBreak" % indent 626 627 def to_string(self, out): 628 out.linebreak() 629 630 class Rule(Node): 631 632 "A horizontal rule." 633 634 def __init__(self, length): 635 self.length = length 636 637 def __repr__(self): 638 return "Rule(%d)" % self.length 639 640 def prettyprint(self, indent=""): 641 return "%sRule: length=%d" % (indent, self.length) 642 643 def to_string(self, out): 644 out.rule(self.length) 645 646 class TableAttr(Node): 647 648 "A table attribute." 649 650 def __init__(self, name, value=None, concise=False, quote=None): 651 self.name = name 652 self.value = value 653 self.concise = concise 654 self.quote = quote 655 656 def __repr__(self): 657 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 658 659 def prettyprint(self, indent=""): 660 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 661 662 def to_string(self, out): 663 out.table_attr(self.name, self.value, self.concise, self.quote) 664 665 class Text(Node): 666 667 "A text node." 668 669 def __init__(self, s): 670 self.s = s 671 672 def empty(self): 673 return not self.s 674 675 def multiline(self): 676 return "\n" in self.s 677 678 def merge(self, text): 679 self.s += text.s 680 681 def __repr__(self): 682 return "Text(%r)" % self.s 683 684 def prettyprint(self, indent=""): 685 return "%sText: %r" % (indent, self.s) 686 687 def to_string(self, out): 688 out.text(self.s) 689 690 # vim: tabstop=4 expandtab shiftwidth=4