Added monospaced text region support. Added various characters to the excluded set in section type (and macro) names, since certain characters can appear within { and } (for example, asterisk can be used as {*}) in order to mark the start and end of text effect regions within words. Improved link processing, adding support for space-qualified links. Fixed the ordering of image targets and options.

     1.1 --- a/parser.py	Mon Apr 23 22:55:08 2012 +0200
     1.2 +++ b/parser.py	Mon Apr 23 23:52:27 2012 +0200
     1.3 @@ -33,9 +33,11 @@
     1.4  
     1.5  import re
     1.6  
     1.7 +URL_SCHEMES = ("http", "https", "ftp", "mailto")
     1.8 +
     1.9  # Section extraction.
    1.10  
    1.11 -sections_regexp_str = r"(?<!{){(?P<type>[^{}\n:]+)(:[^}\n]+)?}.*?{(?P=type)}"
    1.12 +sections_regexp_str = r"(?<!{){(?P<type>[^-_*+{}\n:]+)(:[^}\n]+)?}.*?{(?P=type)}"
    1.13  sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE)
    1.14  
    1.15  def get_regions(s):
    1.16 @@ -160,11 +162,14 @@
    1.17  
    1.18  # Table row inspection.
    1.19  
    1.20 +monospace_regexp_str = r"{{(?P<monotext>.*?)}}"
    1.21  link_regexp_str = r"[[](?P<linktext>.*?)]"
    1.22  image_regexp_str = r"!(?P<imagetext>.*?)!"
    1.23  cellsep_regexp_str = r"(?P<celltype>[|]{1,2})"
    1.24  
    1.25  content_regexp_str = (
    1.26 +    "(" + monospace_regexp_str + ")"
    1.27 +    "|"
    1.28      "(" + link_regexp_str + ")"
    1.29      "|"
    1.30      "(" + image_regexp_str + ")"
    1.31 @@ -183,26 +188,39 @@
    1.32  
    1.33      "Translate the content described by the given 'match', returning a string."
    1.34  
    1.35 -    if match.group("linktext"):
    1.36 +    if match.group("monotext"):
    1.37 +        return "{{{%s}}}" % match.group("monotext")
    1.38 +
    1.39 +    elif match.group("linktext"):
    1.40          parts = match.group("linktext").split("|")
    1.41  
    1.42          # NOTE: Proper detection of external links required.
    1.43  
    1.44 -        if len(parts) > 1 and parts[1].startswith("http"):
    1.45 +        if len(parts) == 1:
    1.46 +            label, target = None, parts[0]
    1.47 +        elif len(parts) == 2:
    1.48 +            label, target = parts
    1.49 +        else:
    1.50 +            label, target, title = parts
    1.51 +
    1.52 +        if target.find(":") != -1:
    1.53              prefix = ""
    1.54 -        elif parts[0].startswith("#"):
    1.55 +            space, rest = target.split(":", 1)
    1.56 +            if space not in URL_SCHEMES:
    1.57 +                target = "%s/%s" % (space, rest)
    1.58 +        elif target.startswith("#"):
    1.59              prefix = ""
    1.60 -        elif parts[0].startswith("^"):
    1.61 +        elif target.startswith("^"):
    1.62              prefix = "attachment:"
    1.63          else:
    1.64              prefix = "../"
    1.65  
    1.66          if len(parts) == 1:
    1.67 -            return "[[%s%s]]" % (prefix, parts[0])
    1.68 +            return "[[%s%s]]" % (prefix, target)
    1.69          elif len(parts) == 2:
    1.70 -            return "[[%s%s|%s]]" % (prefix, parts[1], parts[0])
    1.71 +            return "[[%s%s|%s]]" % (prefix, target, label)
    1.72          else:
    1.73 -            return "[[%s%s|%s|title=%s]]" % (prefix, parts[1], parts[0], parts[2])
    1.74 +            return "[[%s%s|%s|title=%s]]" % (prefix, target, label, title)
    1.75  
    1.76      elif match.group("imagetext"):
    1.77          parts = match.group("imagetext").split("|")
    1.78 @@ -219,7 +237,7 @@
    1.79          if len(parts) == 1:
    1.80              return "{{%s%s}}" % (prefix, parts[0])
    1.81          else:
    1.82 -            return "{{%s%s|%s}}" % (prefix, parts[1], parts[0])
    1.83 +            return "{{%s%s|%s}}" % (prefix, parts[0], parts[1])
    1.84  
    1.85      else:
    1.86          return match.group()
2012-04-23	Paul Boddie	raw files shortlog changelog graph	Added monospaced text region support. Added various characters to the excluded set in section type (and macro) names, since certain characters can appear within { and } (for example, asterisk can be used as {*}) in order to mark the start and end of text effect regions within words. Improved link processing, adding support for space-qualified links. Fixed the ordering of image targets and options.
			parser.py (file)