1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/parser.py Sat Mar 31 18:01:47 2012 +0200
1.3 @@ -0,0 +1,54 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +import re
1.7 +
1.8 +# Section extraction.
1.9 +
1.10 +sections_regexp_str = r"(?<!{)(?P<section>{(?P<type>[^{}]+)}.*?{(?P=type)})"
1.11 +sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE)
1.12 +
1.13 +# Section inspection.
1.14 +
1.15 +section_regexp_str = r"{(?P<sectiontype>.*?)}(?P<section>.*){(?P=sectiontype)}"
1.16 +section_regexp = re.compile(section_regexp_str, re.DOTALL | re.MULTILINE)
1.17 +
1.18 +def get_regions(s):
1.19 +
1.20 + """
1.21 + Return a list of regions from 's'. Each region is specified using a tuple of
1.22 + the form (type, text).
1.23 + """
1.24 +
1.25 + last = 0
1.26 + regions = []
1.27 + for match in sections_regexp.finditer(s):
1.28 + start, end = match.span()
1.29 + regions.append((None, s[last:start]))
1.30 + regions.append(get_section_details(s[start:end]))
1.31 + last = end
1.32 + regions.append((None, s[last:]))
1.33 + return regions
1.34 +
1.35 +def get_section_details(s):
1.36 +
1.37 + "Return the details of a section in the form (type, text)."
1.38 +
1.39 + match = section_regexp.match(s)
1.40 + if match:
1.41 + return match.group("sectiontype"), match.group("section")
1.42 + else:
1.43 + return None, s
1.44 +
1.45 +if __name__ == "__main__":
1.46 + import sys
1.47 +
1.48 + s = sys.stdin.read()
1.49 +
1.50 + for type, text in get_regions(s):
1.51 + print "Region type:", type
1.52 + print "Region:"
1.53 + print text
1.54 + print
1.55 + print "-" * 60
1.56 +
1.57 +# vim: tabstop=4 expandtab shiftwidth=4