1 #!/usr/bin/env python 2 3 """ 4 Link target parsing. 5 6 Copyright (C) 2018, 2019, 2022, 2023 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from urlparse import urlparse 23 24 class LinkTarget: 25 26 "A link target abstraction." 27 28 def __init__(self, type, text, identifier=None, pagename=None): 29 30 """ 31 Initialise the link with the given 'type', 'text', and optional 32 'identifier' and 'pagename'. 33 """ 34 35 self.type = type 36 self.text = text 37 self.identifier = identifier 38 self.pagename = pagename 39 40 def __repr__(self): 41 return "LinkTarget(%r, %r, %r, %r)" % (self.type, self.text, 42 self.identifier, self.pagename) 43 44 def __str__(self): 45 return self.text 46 47 __unicode__ = __str__ 48 49 def get_identifier(self): 50 if self.identifier is not None: 51 return self.identifier 52 else: 53 return self.text 54 55 def get_pagename(self): 56 return self.pagename 57 58 def get_text(self): 59 return self.text 60 61 def get_type(self): 62 return self.type 63 64 # Parsing and recognition functions. 65 66 def is_url(target): 67 68 "Return whether the 'target' references a URL." 69 70 scheme, host, path, params, query, fragment = urlparse(target) 71 return scheme and target or None 72 73 def parse_link_target(target, metadata=None): 74 75 """ 76 Parse a link 'target', returning a link target object. Use any 'metadata' 77 to identify certain link types. 78 """ 79 80 # Fragments. 81 82 if target.startswith("#"): 83 return LinkTarget("fragment", target, target.lstrip("#")) 84 85 # Sub-pages. 86 87 if target.startswith("/"): 88 return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/")) 89 90 # Sibling (of ancestor) pages. 91 92 if target.startswith("../"): 93 return LinkTarget("sibling-page", target, target.rstrip("/")) 94 95 # Attachment or interwiki link. 96 97 result = parse_qualified_link_target(target, metadata) 98 if result: 99 return result 100 101 # Plain URL. 102 103 if is_url(target): 104 return LinkTarget("url", target) 105 106 # Top-level pages. 107 108 return LinkTarget("page", target) 109 110 def parse_qualified_link_target(target, metadata=None): 111 112 """ 113 Parse a possible qualified link 'target', returning a link target object or 114 None if the target is not suitable. Use any 'metadata' to identify certain 115 link types. 116 """ 117 118 t = target.split(":", 1) 119 120 if len(t) != 2: 121 return None 122 123 prefix, identifier = t 124 125 mapping = metadata and metadata.get("mapping") 126 pagename = metadata and metadata.get("pagename") 127 128 if prefix == "attachment" or mapping and mapping.get(prefix): 129 return LinkTarget(prefix, target, identifier, pagename) 130 131 return None 132 133 # vim: tabstop=4 expandtab shiftwidth=4