1.1 --- a/parser.py Mon Apr 23 22:55:08 2012 +0200
1.2 +++ b/parser.py Mon Apr 23 23:52:27 2012 +0200
1.3 @@ -33,9 +33,11 @@
1.4
1.5 import re
1.6
1.7 +URL_SCHEMES = ("http", "https", "ftp", "mailto")
1.8 +
1.9 # Section extraction.
1.10
1.11 -sections_regexp_str = r"(?<!{){(?P<type>[^{}\n:]+)(:[^}\n]+)?}.*?{(?P=type)}"
1.12 +sections_regexp_str = r"(?<!{){(?P<type>[^-_*+{}\n:]+)(:[^}\n]+)?}.*?{(?P=type)}"
1.13 sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE)
1.14
1.15 def get_regions(s):
1.16 @@ -160,11 +162,14 @@
1.17
1.18 # Table row inspection.
1.19
1.20 +monospace_regexp_str = r"{{(?P<monotext>.*?)}}"
1.21 link_regexp_str = r"[[](?P<linktext>.*?)]"
1.22 image_regexp_str = r"!(?P<imagetext>.*?)!"
1.23 cellsep_regexp_str = r"(?P<celltype>[|]{1,2})"
1.24
1.25 content_regexp_str = (
1.26 + "(" + monospace_regexp_str + ")"
1.27 + "|"
1.28 "(" + link_regexp_str + ")"
1.29 "|"
1.30 "(" + image_regexp_str + ")"
1.31 @@ -183,26 +188,39 @@
1.32
1.33 "Translate the content described by the given 'match', returning a string."
1.34
1.35 - if match.group("linktext"):
1.36 + if match.group("monotext"):
1.37 + return "{{{%s}}}" % match.group("monotext")
1.38 +
1.39 + elif match.group("linktext"):
1.40 parts = match.group("linktext").split("|")
1.41
1.42 # NOTE: Proper detection of external links required.
1.43
1.44 - if len(parts) > 1 and parts[1].startswith("http"):
1.45 + if len(parts) == 1:
1.46 + label, target = None, parts[0]
1.47 + elif len(parts) == 2:
1.48 + label, target = parts
1.49 + else:
1.50 + label, target, title = parts
1.51 +
1.52 + if target.find(":") != -1:
1.53 prefix = ""
1.54 - elif parts[0].startswith("#"):
1.55 + space, rest = target.split(":", 1)
1.56 + if space not in URL_SCHEMES:
1.57 + target = "%s/%s" % (space, rest)
1.58 + elif target.startswith("#"):
1.59 prefix = ""
1.60 - elif parts[0].startswith("^"):
1.61 + elif target.startswith("^"):
1.62 prefix = "attachment:"
1.63 else:
1.64 prefix = "../"
1.65
1.66 if len(parts) == 1:
1.67 - return "[[%s%s]]" % (prefix, parts[0])
1.68 + return "[[%s%s]]" % (prefix, target)
1.69 elif len(parts) == 2:
1.70 - return "[[%s%s|%s]]" % (prefix, parts[1], parts[0])
1.71 + return "[[%s%s|%s]]" % (prefix, target, label)
1.72 else:
1.73 - return "[[%s%s|%s|title=%s]]" % (prefix, parts[1], parts[0], parts[2])
1.74 + return "[[%s%s|%s|title=%s]]" % (prefix, target, label, title)
1.75
1.76 elif match.group("imagetext"):
1.77 parts = match.group("imagetext").split("|")
1.78 @@ -219,7 +237,7 @@
1.79 if len(parts) == 1:
1.80 return "{{%s%s}}" % (prefix, parts[0])
1.81 else:
1.82 - return "{{%s%s|%s}}" % (prefix, parts[1], parts[0])
1.83 + return "{{%s%s|%s}}" % (prefix, parts[0], parts[1])
1.84
1.85 else:
1.86 return match.group()