1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 return self 68 69 def empty(self): 70 return not self.nodes 71 72 def node(self, index): 73 try: 74 return self.nodes[index] 75 except IndexError: 76 return None 77 78 def normalise(self): 79 80 "Combine adjacent text nodes." 81 82 nodes = self.nodes 83 self.nodes = [] 84 text = None 85 86 for node in nodes: 87 88 # Open a text node or merge text into an open node. 89 90 if isinstance(node, Text): 91 if not text: 92 text = node 93 else: 94 text.merge(node) 95 96 # Close any open text node and append the current node. 97 98 else: 99 if text: 100 self.append(text) 101 text = None 102 self.append(node) 103 104 # Add any open text node. 105 106 if text: 107 self.append(text) 108 109 def replace(self, old, new): 110 111 "Replace 'old' with 'new' in the children." 112 113 i = self.nodes.index(old) 114 self.nodes[i] = new 115 116 def text_content(self): 117 118 """ 119 Return a string containing the content of text nodes within this 120 container. 121 """ 122 123 l = [] 124 125 for node in self.nodes: 126 if isinstance(node, Text): 127 l.append(node.s) 128 elif isinstance(node, Container): 129 l.append(node.text_content()) 130 131 return "".join(l) 132 133 def __str__(self): 134 return self.prettyprint() 135 136 def _prettyprint(self, l, indent=""): 137 for node in self.nodes: 138 l.append(node.prettyprint(indent + " ")) 139 return "\n".join(l) 140 141 def _to_string(self, out): 142 for node in self.nodes: 143 node.to_string(out) 144 145 class Region(Container): 146 147 "A region of the page." 148 149 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 150 transparent=True, extra=None): 151 Container.__init__(self, nodes) 152 self.level = level 153 self.indent = indent 154 self.type = type 155 self.args = args 156 self.transparent = transparent 157 self.extra = extra 158 159 def add(self, node): 160 last = self.node(-1) 161 if last and last.empty(): 162 self.nodes[-1] = node 163 else: 164 self.append(node) 165 166 def append_point(self): 167 168 "Return the container to which inline nodes are added." 169 170 if self.transparent: 171 return self.nodes[-1] 172 else: 173 return self 174 175 def have_end(self, s): 176 return self.level and s.startswith("}") and self.level == len(s) 177 178 def __repr__(self): 179 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 180 self.indent, self.type, self.args, self.transparent, self.extra) 181 182 def prettyprint(self, indent=""): 183 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 184 self.level, self.indent, self.type, self.args, self.extra)] 185 return self._prettyprint(l, indent) 186 187 def to_string(self, out): 188 out.start_region(self.level, self.indent, self.type, self.extra) 189 190 # Obtain a serialiser for the region from the same format family. 191 # Retain the same serialiser if no appropriate serialiser could be 192 # obtained. 193 194 serialiser_name = "%s.%s" % (out.format, self.type) 195 serialiser = out.get_serialiser(serialiser_name) 196 197 # Serialise the region. 198 199 self._to_string(serialiser) 200 201 # End the region with the previous serialiser. 202 203 out.end_region(self.level, self.indent, self.type, self.extra) 204 205 206 207 # Block nodes. 208 209 class Block(Container): 210 211 "A block in the page." 212 213 def __repr__(self): 214 return "Block(%r)" % self.nodes 215 216 def prettyprint(self, indent=""): 217 l = ["%sBlock" % indent] 218 return self._prettyprint(l, indent) 219 220 def to_string(self, out): 221 out.start_block() 222 self._to_string(out) 223 out.end_block() 224 225 class DefItem(Container): 226 227 "A definition item." 228 229 def __init__(self, nodes, pad, extra): 230 Container.__init__(self, nodes) 231 self.pad = pad 232 self.extra = extra 233 234 def __repr__(self): 235 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 236 237 def prettyprint(self, indent=""): 238 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 239 return self._prettyprint(l, indent) 240 241 def to_string(self, out): 242 out.start_defitem(self.pad, self.extra) 243 self._to_string(out) 244 out.end_defitem(self.pad, self.extra) 245 246 class DefTerm(Container): 247 248 "A definition term." 249 250 def __init__(self, nodes, pad, extra=""): 251 Container.__init__(self, nodes) 252 self.pad = pad 253 self.extra = extra 254 255 def __repr__(self): 256 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 257 258 def prettyprint(self, indent=""): 259 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 260 return self._prettyprint(l, indent) 261 262 def to_string(self, out): 263 out.start_defterm(self.pad, self.extra) 264 self._to_string(out) 265 out.end_defterm(self.pad, self.extra) 266 267 class FontStyle(Container): 268 269 "Emphasised and/or strong text." 270 271 def __init__(self, nodes, emphasis=False, strong=False): 272 Container.__init__(self, nodes) 273 self.emphasis = emphasis 274 self.strong = strong 275 276 def close_emphasis(self): 277 if self.strong: 278 span = FontStyle(self.nodes, emphasis=True) 279 self.nodes = [span] 280 self.emphasis = False 281 return self.strong 282 283 def close_strong(self): 284 if self.emphasis: 285 span = FontStyle(self.nodes, strong=True) 286 self.nodes = [span] 287 self.strong = False 288 return self.emphasis 289 290 def __repr__(self): 291 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 292 293 def prettyprint(self, indent=""): 294 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 295 return self._prettyprint(l, indent) 296 297 def to_string(self, out): 298 if self.emphasis: 299 out.start_emphasis() 300 elif self.strong: 301 out.start_strong() 302 self._to_string(out) 303 if self.emphasis: 304 out.end_emphasis() 305 elif self.strong: 306 out.end_strong() 307 308 class Heading(Container): 309 310 "A heading." 311 312 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra=""): 313 Container.__init__(self, nodes) 314 self.level = level 315 self.start_extra = start_extra 316 self.start_pad = start_pad 317 self.end_pad = end_pad 318 self.end_extra = end_extra 319 320 def __repr__(self): 321 return "Heading(%r, %d, %r, %r, %r, %r)" % ( 322 self.nodes, self.level, self.start_extra, self.start_pad, self.end_pad, self.end_extra) 323 324 def prettyprint(self, indent=""): 325 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r end_extra=%r" % ( 326 indent, self.level, self.start_extra, self.start_pad, self.end_pad, self.end_extra)] 327 return self._prettyprint(l, indent) 328 329 def to_string(self, out): 330 out.start_heading(self.level, self.start_extra, self.start_pad, self.text_content()) 331 self._to_string(out) 332 out.end_heading(self.level, self.end_pad, self.end_extra) 333 334 class List(Container): 335 336 "A list." 337 338 def __init__(self, nodes, indent, marker, num): 339 Container.__init__(self, nodes) 340 self.indent = indent 341 self.marker = marker 342 self.num = num 343 344 def __repr__(self): 345 return "List(%r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.num) 346 347 def prettyprint(self, indent=""): 348 l = ["%sList: indent=%d marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 349 return self._prettyprint(l, indent) 350 351 def to_string(self, out): 352 out.start_list(self.indent, self.marker, self.num) 353 self._to_string(out) 354 out.end_list(self.indent, self.marker, self.num) 355 356 class ListItem(Container): 357 358 "A list item." 359 360 def __init__(self, nodes, indent, marker, space, num): 361 Container.__init__(self, nodes) 362 self.indent = indent 363 self.marker = marker 364 self.space = space 365 self.num = num 366 367 # Forbid blocks within list items for simpler structure. 368 369 self.allow_blocks = False 370 371 def __repr__(self): 372 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 373 374 def prettyprint(self, indent=""): 375 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 376 return self._prettyprint(l, indent) 377 378 def to_string(self, out): 379 out.start_listitem(self.indent, self.marker, self.space, self.num) 380 self._to_string(out) 381 out.end_listitem(self.indent, self.marker, self.space, self.num) 382 383 class TableAttrs(Container): 384 385 "A collection of table attributes." 386 387 def __repr__(self): 388 return "TableAttrs(%r)" % self.nodes 389 390 def prettyprint(self, indent=""): 391 l = ["%sTableAttrs:" % indent] 392 return self._prettyprint(l, indent) 393 394 def to_string(self, out): 395 out.start_table_attrs() 396 out.table_attrs(self.nodes) 397 out.end_table_attrs() 398 399 class Table(Container): 400 401 "A table." 402 403 def __repr__(self): 404 return "Table(%r)" % self.nodes 405 406 def prettyprint(self, indent=""): 407 l = ["%sTable:" % indent] 408 return self._prettyprint(l, indent) 409 410 def to_string(self, out): 411 out.start_table() 412 self._to_string(out) 413 out.end_table() 414 415 class TableCell(Container): 416 417 "A table cell." 418 419 def __init__(self, nodes, attrs=None): 420 Container.__init__(self, nodes) 421 self.attrs = attrs 422 423 def __repr__(self): 424 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 425 426 def prettyprint(self, indent=""): 427 l = ["%sTableCell:" % indent] 428 return self._prettyprint(l, indent) 429 430 def to_string(self, out): 431 out.start_table_cell(self.attrs) 432 self._to_string(out) 433 out.end_table_cell() 434 435 class TableRow(Container): 436 437 "A table row." 438 439 def __init__(self, nodes, trailing=""): 440 Container.__init__(self, nodes) 441 self.trailing = trailing 442 443 def __repr__(self): 444 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 445 446 def prettyprint(self, indent=""): 447 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 448 return self._prettyprint(l, indent) 449 450 def to_string(self, out): 451 out.start_table_row() 452 self._to_string(out) 453 out.end_table_row(self.trailing) 454 455 456 457 # Inline nodes with children. 458 459 class Inline(Container): 460 461 "Generic inline formatting." 462 463 def __repr__(self): 464 return "%s(%r)" % (self.__class__.__name__, self.nodes) 465 466 def prettyprint(self, indent=""): 467 l = ["%s%s" % (indent, self.__class__.__name__)] 468 return self._prettyprint(l, indent) 469 470 class Larger(Inline): 471 472 "Larger text." 473 474 def to_string(self, out): 475 out.start_larger() 476 self._to_string(out) 477 out.end_larger() 478 479 class Link(Container): 480 481 "Link details." 482 483 def __init__(self, nodes, target): 484 Container.__init__(self, nodes) 485 self.target = target 486 487 def __repr__(self): 488 return "Link(%r, %r)" % (self.nodes, self.target) 489 490 def prettyprint(self, indent=""): 491 l = ["%sLink: target=%r" % (indent, self.target)] 492 return self._prettyprint(l, indent) 493 494 def to_string(self, out): 495 out.start_link(self.target, self.nodes) 496 if self.nodes: 497 out.start_linktext() 498 self._to_string(out) 499 out.end_linktext() 500 out.end_link() 501 502 class Macro(Container): 503 504 "Macro details." 505 506 def __init__(self, name, args, parent, nodes=None): 507 Container.__init__(self, nodes or []) 508 self.name = name 509 self.parent = parent 510 self.args = args 511 512 def __repr__(self): 513 return "Macro(%r, %r, %r, %r)" % (self.name, self.args, self.parent, self.nodes) 514 515 def prettyprint(self, indent=""): 516 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 517 return self._prettyprint(l, indent) 518 519 def to_string(self, out): 520 out.start_macro(self.name, self.args, self.nodes) 521 if self.nodes: 522 self._to_string(out) 523 out.end_macro() 524 525 class Monospace(Inline): 526 527 "Monospaced text." 528 529 def to_string(self, out): 530 out.start_monospace() 531 self._to_string(out) 532 out.end_monospace() 533 534 class Smaller(Inline): 535 536 "Smaller text." 537 538 def to_string(self, out): 539 out.start_smaller() 540 self._to_string(out) 541 out.end_smaller() 542 543 class Strikethrough(Inline): 544 545 "Crossed-out text." 546 547 def to_string(self, out): 548 out.start_strikethrough() 549 self._to_string(out) 550 out.end_strikethrough() 551 552 class Subscript(Inline): 553 554 "Subscripted text." 555 556 def to_string(self, out): 557 out.start_subscript() 558 self._to_string(out) 559 out.end_subscript() 560 561 class Superscript(Inline): 562 563 "Superscripted text." 564 565 def to_string(self, out): 566 out.start_superscript() 567 self._to_string(out) 568 out.end_superscript() 569 570 class Underline(Inline): 571 572 "Underlined text." 573 574 def to_string(self, out): 575 out.start_underline() 576 self._to_string(out) 577 out.end_underline() 578 579 580 581 # Nodes without children. 582 583 class Node: 584 585 "A document node without children." 586 587 def empty(self): 588 return False 589 590 class Anchor(Node): 591 592 "Anchor details." 593 594 def __init__(self, target): 595 self.target = target 596 597 def __repr__(self): 598 return "Anchor(%r)" % self.target 599 600 def prettyprint(self, indent=""): 601 return "%sAnchor: target=%r" % (indent, self.target) 602 603 def to_string(self, out): 604 out.anchor(self.target) 605 606 class Break(Node): 607 608 "A paragraph break." 609 610 def __repr__(self): 611 return "Break()" 612 613 def prettyprint(self, indent=""): 614 return "%sBreak" % indent 615 616 def to_string(self, out): 617 out.break_() 618 619 class LineBreak(Node): 620 621 "A line break within a block." 622 623 def __repr__(self): 624 return "LineBreak()" 625 626 def prettyprint(self, indent=""): 627 return "%sLineBreak" % indent 628 629 def to_string(self, out): 630 out.linebreak() 631 632 class Rule(Node): 633 634 "A horizontal rule." 635 636 def __init__(self, length): 637 self.length = length 638 639 def __repr__(self): 640 return "Rule(%d)" % self.length 641 642 def prettyprint(self, indent=""): 643 return "%sRule: length=%d" % (indent, self.length) 644 645 def to_string(self, out): 646 out.rule(self.length) 647 648 class TableAttr(Node): 649 650 "A table attribute." 651 652 def __init__(self, name, value=None, concise=False, quote=None): 653 self.name = name 654 self.value = value 655 self.concise = concise 656 self.quote = quote 657 658 def __repr__(self): 659 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 660 661 def prettyprint(self, indent=""): 662 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 663 664 def to_string(self, out): 665 out.table_attr(self.name, self.value, self.concise, self.quote) 666 667 class Text(Node): 668 669 "A text node." 670 671 def __init__(self, s): 672 self.s = s 673 674 def empty(self): 675 return not self.s 676 677 def multiline(self): 678 return "\n" in self.s 679 680 def merge(self, text): 681 self.s += text.s 682 683 def __repr__(self): 684 return "Text(%r)" % self.s 685 686 def prettyprint(self, indent=""): 687 return "%sText: %r" % (indent, self.s) 688 689 def to_string(self, out): 690 out.text(self.s) 691 692 # vim: tabstop=4 expandtab shiftwidth=4