paul@222 | 1 | #!/usr/bin/env python |
paul@222 | 2 | |
paul@222 | 3 | """ |
paul@222 | 4 | Link target parsing. |
paul@222 | 5 | |
paul@319 | 6 | Copyright (C) 2018, 2019, 2022 Paul Boddie <paul@boddie.org.uk> |
paul@222 | 7 | |
paul@222 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@222 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@222 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@222 | 11 | version. |
paul@222 | 12 | |
paul@222 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@222 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@222 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@222 | 16 | details. |
paul@222 | 17 | |
paul@222 | 18 | You should have received a copy of the GNU General Public License along with |
paul@222 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@222 | 20 | """ |
paul@222 | 21 | |
paul@222 | 22 | from urlparse import urlparse |
paul@222 | 23 | |
paul@222 | 24 | class LinkTarget: |
paul@222 | 25 | |
paul@222 | 26 | "A link target abstraction." |
paul@222 | 27 | |
paul@319 | 28 | def __init__(self, type, text, identifier=None, pagename=None): |
paul@222 | 29 | |
paul@319 | 30 | """ |
paul@319 | 31 | Initialise the link with the given 'type', 'text', and optional |
paul@319 | 32 | 'identifier' and 'pagename'. |
paul@319 | 33 | """ |
paul@222 | 34 | |
paul@222 | 35 | self.type = type |
paul@222 | 36 | self.text = text |
paul@222 | 37 | self.identifier = identifier |
paul@319 | 38 | self.pagename = pagename |
paul@222 | 39 | |
paul@222 | 40 | def __repr__(self): |
paul@319 | 41 | return "LinkTarget(%r, %r, %r, %r)" % (self.type, self.text, |
paul@319 | 42 | self.identifier, self.pagename) |
paul@222 | 43 | |
paul@222 | 44 | def __str__(self): |
paul@222 | 45 | return self.text |
paul@222 | 46 | |
paul@222 | 47 | __unicode__ = __str__ |
paul@222 | 48 | |
paul@222 | 49 | def get_identifier(self): |
paul@248 | 50 | if self.identifier is not None: |
paul@248 | 51 | return self.identifier |
paul@248 | 52 | else: |
paul@248 | 53 | return self.text |
paul@222 | 54 | |
paul@319 | 55 | def get_pagename(self): |
paul@319 | 56 | return self.pagename |
paul@319 | 57 | |
paul@222 | 58 | def get_text(self): |
paul@222 | 59 | return self.text |
paul@222 | 60 | |
paul@222 | 61 | def get_type(self): |
paul@222 | 62 | return self.type |
paul@222 | 63 | |
paul@222 | 64 | # Parsing and recognition functions. |
paul@222 | 65 | |
paul@222 | 66 | def is_url(target): |
paul@222 | 67 | |
paul@222 | 68 | "Return whether the 'target' references a URL." |
paul@222 | 69 | |
paul@222 | 70 | scheme, host, path, params, query, fragment = urlparse(target) |
paul@222 | 71 | return scheme and target or None |
paul@222 | 72 | |
paul@222 | 73 | def parse_link_target(target, metadata=None): |
paul@222 | 74 | |
paul@222 | 75 | """ |
paul@222 | 76 | Parse a link 'target', returning a link target object. Use any 'metadata' |
paul@222 | 77 | to identify certain link types. |
paul@222 | 78 | """ |
paul@222 | 79 | |
paul@222 | 80 | # Fragments. |
paul@222 | 81 | |
paul@222 | 82 | if target.startswith("#"): |
paul@222 | 83 | return LinkTarget("fragment", target, target.lstrip("#")) |
paul@222 | 84 | |
paul@222 | 85 | # Sub-pages. |
paul@222 | 86 | |
paul@222 | 87 | if target.startswith("/"): |
paul@222 | 88 | return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/")) |
paul@222 | 89 | |
paul@222 | 90 | # Sibling (of ancestor) pages. |
paul@222 | 91 | |
paul@222 | 92 | if target.startswith("../"): |
paul@222 | 93 | return LinkTarget("sibling-page", target, target.rstrip("/")) |
paul@222 | 94 | |
paul@222 | 95 | # Attachment or interwiki link. |
paul@222 | 96 | |
paul@222 | 97 | result = parse_qualified_link_target(target, metadata) |
paul@222 | 98 | if result: |
paul@222 | 99 | return result |
paul@222 | 100 | |
paul@222 | 101 | # Plain URL. |
paul@222 | 102 | |
paul@222 | 103 | if is_url(target): |
paul@222 | 104 | return LinkTarget("url", target) |
paul@222 | 105 | |
paul@222 | 106 | # Top-level pages. |
paul@222 | 107 | |
paul@222 | 108 | return LinkTarget("page", target) |
paul@222 | 109 | |
paul@222 | 110 | def parse_qualified_link_target(target, metadata=None): |
paul@222 | 111 | |
paul@222 | 112 | """ |
paul@222 | 113 | Parse a possible qualified link 'target', returning a link target object or |
paul@222 | 114 | None if the target is not suitable. Use any 'metadata' to identify certain |
paul@222 | 115 | link types. |
paul@222 | 116 | """ |
paul@222 | 117 | |
paul@222 | 118 | t = target.split(":", 1) |
paul@222 | 119 | |
paul@222 | 120 | if len(t) != 2: |
paul@222 | 121 | return None |
paul@222 | 122 | |
paul@222 | 123 | prefix, identifier = t |
paul@222 | 124 | |
paul@222 | 125 | mapping = metadata and metadata.get("mapping") |
paul@222 | 126 | |
paul@248 | 127 | if prefix == "attachment" or mapping and mapping.get(prefix): |
paul@222 | 128 | return LinkTarget(prefix, target, identifier) |
paul@222 | 129 | |
paul@222 | 130 | return None |
paul@222 | 131 | |
paul@222 | 132 | # vim: tabstop=4 expandtab shiftwidth=4 |