1 #!/usr/bin/env python 2 3 """ 4 Link target parsing. 5 6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from urlparse import urlparse 23 24 class LinkTarget: 25 26 "A link target abstraction." 27 28 def __init__(self, type, text, identifier=None): 29 30 "Initialise the link with the given 'type', 'text' and 'identifier'." 31 32 self.type = type 33 self.text = text 34 self.identifier = identifier 35 36 def __repr__(self): 37 return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier) 38 39 def __str__(self): 40 return self.text 41 42 __unicode__ = __str__ 43 44 def get_identifier(self): 45 return self.identifier or self.text 46 47 def get_text(self): 48 return self.text 49 50 def get_type(self): 51 return self.type 52 53 # Parsing and recognition functions. 54 55 def is_url(target): 56 57 "Return whether the 'target' references a URL." 58 59 scheme, host, path, params, query, fragment = urlparse(target) 60 return scheme and target or None 61 62 def parse_link_target(target, metadata=None): 63 64 """ 65 Parse a link 'target', returning a link target object. Use any 'metadata' 66 to identify certain link types. 67 """ 68 69 # Fragments. 70 71 if target.startswith("#"): 72 return LinkTarget("fragment", target, target.lstrip("#")) 73 74 # Sub-pages. 75 76 if target.startswith("/"): 77 return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/")) 78 79 # Sibling (of ancestor) pages. 80 81 if target.startswith("../"): 82 return LinkTarget("sibling-page", target, target.rstrip("/")) 83 84 # Attachment or interwiki link. 85 86 result = parse_qualified_link_target(target, metadata) 87 if result: 88 return result 89 90 # Plain URL. 91 92 if is_url(target): 93 return LinkTarget("url", target) 94 95 # Top-level pages. 96 97 return LinkTarget("page", target) 98 99 def parse_qualified_link_target(target, metadata=None): 100 101 """ 102 Parse a possible qualified link 'target', returning a link target object or 103 None if the target is not suitable. Use any 'metadata' to identify certain 104 link types. 105 """ 106 107 t = target.split(":", 1) 108 109 if len(t) != 2: 110 return None 111 112 prefix, identifier = t 113 114 mapping = metadata and metadata.get("mapping") 115 116 if prefix == "attachment" or mapping and mapping.get(prefix): 117 return LinkTarget(prefix, target, identifier) 118 119 return None 120 121 # vim: tabstop=4 expandtab shiftwidth=4