1 #!/usr/bin/env python 2 3 """ 4 Link target parsing. 5 6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from urlparse import urlparse 23 24 class LinkTarget: 25 26 "A link target abstraction." 27 28 def __init__(self, type, text, identifier=None): 29 30 "Initialise the link with the given 'type', 'text' and 'identifier'." 31 32 self.type = type 33 self.text = text 34 self.identifier = identifier 35 36 def __repr__(self): 37 return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier) 38 39 def __str__(self): 40 return self.text 41 42 __unicode__ = __str__ 43 44 def get_identifier(self): 45 if self.identifier is not None: 46 return self.identifier 47 else: 48 return self.text 49 50 def get_text(self): 51 return self.text 52 53 def get_type(self): 54 return self.type 55 56 # Parsing and recognition functions. 57 58 def is_url(target): 59 60 "Return whether the 'target' references a URL." 61 62 scheme, host, path, params, query, fragment = urlparse(target) 63 return scheme and target or None 64 65 def parse_link_target(target, metadata=None): 66 67 """ 68 Parse a link 'target', returning a link target object. Use any 'metadata' 69 to identify certain link types. 70 """ 71 72 # Fragments. 73 74 if target.startswith("#"): 75 return LinkTarget("fragment", target, target.lstrip("#")) 76 77 # Sub-pages. 78 79 if target.startswith("/"): 80 return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/")) 81 82 # Sibling (of ancestor) pages. 83 84 if target.startswith("../"): 85 return LinkTarget("sibling-page", target, target.rstrip("/")) 86 87 # Attachment or interwiki link. 88 89 result = parse_qualified_link_target(target, metadata) 90 if result: 91 return result 92 93 # Plain URL. 94 95 if is_url(target): 96 return LinkTarget("url", target) 97 98 # Top-level pages. 99 100 return LinkTarget("page", target) 101 102 def parse_qualified_link_target(target, metadata=None): 103 104 """ 105 Parse a possible qualified link 'target', returning a link target object or 106 None if the target is not suitable. Use any 'metadata' to identify certain 107 link types. 108 """ 109 110 t = target.split(":", 1) 111 112 if len(t) != 2: 113 return None 114 115 prefix, identifier = t 116 117 mapping = metadata and metadata.get("mapping") 118 119 if prefix == "attachment" or mapping and mapping.get(prefix): 120 return LinkTarget(prefix, target, identifier) 121 122 return None 123 124 # vim: tabstop=4 expandtab shiftwidth=4