1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022, 7 2023 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 class Container: 24 25 "A container of document nodes." 26 27 def __init__(self, nodes): 28 self.nodes = nodes 29 30 # In principle, allow blocks within containers. Some nodes may forbid 31 # them to simplify the document structure. 32 33 self.allow_blocks = True 34 35 def append(self, node): 36 self.nodes.append(node) 37 38 def append_many(self, nodes): 39 for node in nodes: 40 self.append(node) 41 42 def add(self, node): 43 last = self.node(-1) 44 if last and last.empty(): 45 self.nodes[-1] = node 46 else: 47 self.append(node) 48 49 def append_inline(self, node): 50 51 "Append 'node' inline within the appropriate container." 52 53 n = self.append_point() 54 55 # Redirect the addition if another container is to accept the node. 56 57 if n is not self: 58 n.append_inline(node) 59 60 # Otherwise, append within this container. 61 62 else: 63 n.append(node) 64 65 def append_inline_many(self, nodes): 66 for node in nodes: 67 self.append_inline(node) 68 69 def append_point(self): 70 71 "Return the container to which inline nodes are added." 72 73 last = self.node(-1) 74 75 if isinstance(last, Block): 76 return last 77 else: 78 return self 79 80 def empty(self): 81 return not self.nodes 82 83 def insert_after(self, old, new): 84 85 "Insert after 'old' in the children the 'new' node." 86 87 index = self.nodes.index(old) 88 self.nodes.insert(index + 1, new) 89 90 def node(self, index): 91 try: 92 return self.nodes[index] 93 except IndexError: 94 return None 95 96 def normalise(self): 97 98 "Combine adjacent text nodes." 99 100 nodes = self.nodes 101 self.nodes = [] 102 text = None 103 104 for node in nodes: 105 106 # Open a text node or merge text into an open node. 107 108 if isinstance(node, Text): 109 if not text: 110 text = node 111 else: 112 text.merge(node) 113 114 # Close any open text node and append the current node. 115 116 else: 117 if text: 118 self.append(text) 119 text = None 120 self.append(node) 121 122 # Add any open text node. 123 124 if text: 125 self.append(text) 126 127 def remove(self, node): 128 129 "Remove 'node' from the children." 130 131 self.nodes.remove(node) 132 133 def replace(self, old, new): 134 135 "Replace 'old' with 'new' in the children." 136 137 i = self.nodes.index(old) 138 self.nodes[i] = new 139 140 def split_at(self, node): 141 142 """ 143 Split the container at 'node', returning a new container holding the 144 nodes following 'node' that are moved from this container. 145 """ 146 147 i = self.nodes.index(node) 148 following = self.__class__(self.nodes[i+1:]) 149 150 # Remove the node and the following parts from this container. 151 152 del self.nodes[i:] 153 return following 154 155 def text_content(self): 156 157 """ 158 Return a string containing the content of text nodes within this 159 container. 160 """ 161 162 l = [] 163 164 for node in self.nodes: 165 if isinstance(node, Text): 166 l.append(node.s) 167 elif isinstance(node, Container): 168 l.append(node.text_content()) 169 170 return "".join(l) 171 172 def whitespace_only(self): 173 174 "Return whether the container provides only whitespace text." 175 176 return not self.text_content().strip() 177 178 class Region(Container): 179 180 "A region of the page." 181 182 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 183 transparent=True, extra=None): 184 Container.__init__(self, nodes) 185 self.level = level 186 self.indent = indent 187 self.type = type 188 self.args = args 189 self.transparent = transparent 190 self.extra = extra 191 192 def append_point(self): 193 194 "Return the container to which inline nodes are added." 195 196 if self.transparent: 197 return self.nodes[-1] 198 else: 199 return self 200 201 def have_end(self, s): 202 return self.level and s.startswith("}") and self.level == len(s) 203 204 def __repr__(self): 205 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 206 self.indent, self.type, self.args, self.transparent, self.extra) 207 208 def visit(self, visitor): 209 return visitor.region(self) 210 211 212 213 # Block nodes. 214 215 class Block(Container): 216 217 "A block in the page." 218 219 def __repr__(self): 220 return "Block(%r)" % self.nodes 221 222 def visit(self, visitor): 223 return visitor.block(self) 224 225 class DefItem(Container): 226 227 "A definition item." 228 229 def __init__(self, nodes, pad, extra): 230 Container.__init__(self, nodes) 231 self.pad = pad 232 self.extra = extra 233 234 def __repr__(self): 235 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 236 237 def visit(self, visitor): 238 return visitor.defitem(self) 239 240 class DefTerm(Container): 241 242 "A definition term." 243 244 def __init__(self, nodes, pad, extra=""): 245 Container.__init__(self, nodes) 246 self.pad = pad 247 self.extra = extra 248 249 def __repr__(self): 250 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 251 252 def visit(self, visitor): 253 return visitor.defterm(self) 254 255 class FontStyle(Container): 256 257 "Emphasised and/or strong text." 258 259 def __init__(self, nodes, emphasis=False, strong=False): 260 Container.__init__(self, nodes) 261 self.emphasis = emphasis 262 self.strong = strong 263 264 def close_emphasis(self): 265 if self.strong: 266 span = FontStyle(self.nodes, emphasis=True) 267 self.nodes = [span] 268 self.emphasis = False 269 return self.strong 270 271 def close_strong(self): 272 if self.emphasis: 273 span = FontStyle(self.nodes, strong=True) 274 self.nodes = [span] 275 self.strong = False 276 return self.emphasis 277 278 def __repr__(self): 279 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 280 281 def visit(self, visitor): 282 return visitor.fontstyle(self) 283 284 class Heading(Container): 285 286 "A heading." 287 288 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 289 identifier=None): 290 Container.__init__(self, nodes) 291 self.level = level 292 self.start_extra = start_extra 293 self.start_pad = start_pad 294 self.end_pad = end_pad 295 self.end_extra = end_extra 296 self.identifier = identifier 297 298 def __repr__(self): 299 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 300 self.nodes, self.level, self.start_extra, self.start_pad, 301 self.end_pad, self.end_extra, self.identifier) 302 303 def visit(self, visitor): 304 return visitor.heading(self) 305 306 class LinkLabel(Container): 307 308 "A link or transclusion label." 309 310 def __repr__(self): 311 return "LinkLabel(%r)" % self.nodes 312 313 def visit(self, visitor): 314 return visitor.link_label(self) 315 316 class LinkParameter(Container): 317 318 "A link or transclusion parameter." 319 320 def __repr__(self): 321 return "LinkParameter(%r)" % self.nodes 322 323 def visit(self, visitor): 324 return visitor.link_parameter(self) 325 326 class List(Container): 327 328 "A list." 329 330 def __init__(self, nodes): 331 Container.__init__(self, nodes) 332 self.init() 333 334 def init(self): 335 self.first = first = self.nodes and self.nodes[0] or None 336 self.indent = first and first.indent 337 self.marker = first and first.marker 338 self.num = first and first.num 339 340 def __repr__(self): 341 return "List(%r)" % self.nodes 342 343 def visit(self, visitor): 344 if not self.first: 345 self.init() 346 return visitor.list(self) 347 348 class ListItem(Container): 349 350 "A list item." 351 352 def __init__(self, nodes, indent, marker, space, num): 353 Container.__init__(self, nodes) 354 self.indent = indent 355 self.marker = marker 356 self.space = space 357 self.num = num 358 359 # Forbid blocks within list items for simpler structure. 360 361 self.allow_blocks = False 362 363 def __repr__(self): 364 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 365 366 def visit(self, visitor): 367 return visitor.listitem(self) 368 369 class Table(Container): 370 371 "A table." 372 373 def __repr__(self): 374 return "Table(%r)" % self.nodes 375 376 def visit(self, visitor): 377 return visitor.table(self) 378 379 class TableAttrs(Container): 380 381 "A collection of table attributes." 382 383 def __init__(self, nodes): 384 Container.__init__(self, nodes) 385 386 # Parsing state flags, inconsequential to any final document tree. 387 # If incomplete remains set, the attributes are discarded. 388 389 self.incomplete = True 390 self.found_cell = False 391 392 def __repr__(self): 393 return "TableAttrs(%r)" % self.nodes 394 395 def visit(self, visitor): 396 return visitor.table_attrs(self) 397 398 class TableCell(Container): 399 400 "A table cell." 401 402 def __init__(self, nodes, attrs=None, leading="", padding=""): 403 Container.__init__(self, nodes) 404 self.attrs = attrs 405 self.leading = leading 406 self.padding = padding 407 408 def __repr__(self): 409 return "TableCell(%r, %r, %r, %r)" % (self.nodes, self.attrs, 410 self.leading, self.padding) 411 412 def visit(self, visitor): 413 return visitor.table_cell(self) 414 415 class TableRow(Container): 416 417 "A table row." 418 419 def __init__(self, nodes, trailing="", leading="", padding=""): 420 Container.__init__(self, nodes) 421 self.trailing = trailing 422 self.leading = leading 423 self.padding = padding 424 425 def __repr__(self): 426 return "TableRow(%r, %r, %r, %r)" % (self.nodes, self.trailing, 427 self.leading, self.padding) 428 429 def visit(self, visitor): 430 return visitor.table_row(self) 431 432 433 434 # Inline nodes with children. 435 436 class Inline(Container): 437 438 "Generic inline formatting." 439 440 def __repr__(self): 441 return "%s(%r)" % (self.__class__.__name__, self.nodes) 442 443 class Larger(Inline): 444 445 "Larger text." 446 447 def visit(self, visitor): 448 return visitor.larger(self) 449 450 class Link(Container): 451 452 "Link details." 453 454 def __init__(self, nodes, target): 455 Container.__init__(self, nodes) 456 self.target = target 457 458 def __repr__(self): 459 return "Link(%r, %r)" % (self.nodes, self.target) 460 461 def visit(self, visitor): 462 return visitor.link(self) 463 464 class Macro(Container): 465 466 "Macro details." 467 468 def __init__(self, name, args, parent, region, nodes=None, inline=True): 469 Container.__init__(self, nodes or []) 470 self.name = name 471 self.args = args 472 self.parent = parent 473 self.region = region 474 self.inline = inline 475 476 def __repr__(self): 477 return "Macro(%r, %r, %r, %r, %r, %r)" % (self.name, self.args, 478 self.parent, self.region, 479 self.nodes, self.inline) 480 481 def visit(self, visitor): 482 return visitor.macro(self) 483 484 class Monospace(Inline): 485 486 "Monospaced text." 487 488 def visit(self, visitor): 489 return visitor.monospace(self) 490 491 class Smaller(Inline): 492 493 "Smaller text." 494 495 def visit(self, visitor): 496 return visitor.smaller(self) 497 498 class Strikethrough(Inline): 499 500 "Crossed-visitor text." 501 502 def visit(self, visitor): 503 return visitor.strikethrough(self) 504 505 class Subscript(Inline): 506 507 "Subscripted text." 508 509 def visit(self, visitor): 510 return visitor.subscript(self) 511 512 class Superscript(Inline): 513 514 "Superscripted text." 515 516 def visit(self, visitor): 517 return visitor.superscript(self) 518 519 class Transclusion(Container): 520 521 "Transclusion details." 522 523 def __init__(self, nodes, target): 524 Container.__init__(self, nodes) 525 self.target = target 526 527 def __repr__(self): 528 return "Transclusion(%r, %r)" % (self.nodes, self.target) 529 530 def visit(self, visitor): 531 return visitor.transclusion(self) 532 533 class Underline(Inline): 534 535 "Underlined text." 536 537 def visit(self, visitor): 538 return visitor.underline(self) 539 540 541 542 # Nodes without children. 543 544 class Node: 545 546 "A document node without children." 547 548 def empty(self): 549 return False 550 551 class Anchor(Node): 552 553 "Anchor details." 554 555 def __init__(self, target): 556 self.target = target 557 558 def __repr__(self): 559 return "Anchor(%r)" % self.target 560 561 def visit(self, visitor): 562 return visitor.anchor(self) 563 564 class Break(Node): 565 566 "A paragraph break." 567 568 def __repr__(self): 569 return "Break()" 570 571 def visit(self, visitor): 572 return visitor.break_(self) 573 574 class Comment(Node): 575 576 "A comment." 577 578 def __init__(self, comment, extra): 579 self.comment = comment 580 self.extra = extra 581 582 def __repr__(self): 583 return "Comment(%r, %r)" % (self.comment, self.extra) 584 585 def visit(self, visitor): 586 return visitor.comment(self) 587 588 class Directive(Node): 589 590 "A processing directive." 591 592 def __init__(self, directive, extra): 593 self.directive = directive 594 self.extra = extra 595 596 def __repr__(self): 597 return "Directive(%r, %r)" % (self.directive, self.extra) 598 599 def visit(self, visitor): 600 return visitor.directive(self) 601 602 class LineBreak(Node): 603 604 "A line break within a block." 605 606 def __repr__(self): 607 return "LineBreak()" 608 609 def visit(self, visitor): 610 return visitor.linebreak(self) 611 612 class NonBreakingSpace(Node): 613 614 "A non-breaking space within a block." 615 616 def __repr__(self): 617 return "NonBreakingSpace()" 618 619 def visit(self, visitor): 620 return visitor.nbsp(self) 621 622 class Rule(Node): 623 624 "A horizontal rule." 625 626 def __init__(self, height): 627 self.height = height 628 629 def __repr__(self): 630 return "Rule(%d)" % self.height 631 632 def visit(self, visitor): 633 return visitor.rule(self) 634 635 class TableAttr(Node): 636 637 "A table attribute." 638 639 def __init__(self, name, value=None, concise=False, quote=None): 640 self.name = name 641 self.value = value 642 self.concise = concise 643 self.quote = quote 644 645 def __repr__(self): 646 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 647 648 def visit(self, visitor): 649 return visitor.table_attr(self) 650 651 class Text(Node): 652 653 "A text node." 654 655 def __init__(self, s): 656 self.s = s 657 658 def empty(self): 659 return not self.s 660 661 def multiline(self): 662 return "\n" in self.s 663 664 def merge(self, text): 665 self.s += text.s 666 667 def __repr__(self): 668 return "Text(%r)" % self.s 669 670 def visit(self, visitor): 671 return visitor.text(self) 672 673 class Verbatim(Node): 674 675 "Verbatim text." 676 677 def __init__(self, text): 678 self.text = text 679 680 def __repr__(self): 681 return "Verbatim(%r)" % self.text 682 683 def visit(self, visitor): 684 return visitor.verbatim(self) 685 686 # vim: tabstop=4 expandtab shiftwidth=4