1.1 --- a/MoinSupport.py Sun Nov 18 21:17:05 2012 +0100
1.2 +++ b/MoinSupport.py Thu Nov 22 23:05:50 2012 +0100
1.3 @@ -33,6 +33,10 @@
1.4 marker_regexp_str = r"([{]{3,}|[}]{3,})"
1.5 marker_regexp = re.compile(marker_regexp_str, re.MULTILINE | re.DOTALL) # {{{... or }}}...
1.6
1.7 +# Extraction of headings.
1.8 +
1.9 +heading_regexp = re.compile(r"^(?P<level>=+)(?P<heading>.*?)(?P=level)$", re.UNICODE | re.MULTILINE)
1.10 +
1.11 # Category extraction from pages.
1.12
1.13 category_regexp = None
1.14 @@ -309,6 +313,25 @@
1.15
1.16 return getFragmentsFromRegions(getRegions(s, include_non_regions))
1.17
1.18 +# Heading extraction.
1.19 +
1.20 +def getHeadings(s):
1.21 +
1.22 + """
1.23 + Return tuples of the form (level, title, span) for headings found within the
1.24 + given string 's'. The span is itself a (start, end) tuple indicating the
1.25 + matching region of 's' for a heading declaration.
1.26 + """
1.27 +
1.28 + headings = []
1.29 +
1.30 + for match in heading_regexp.finditer(s):
1.31 + headings.append(
1.32 + (len(match.group("level")), match.group("heading"), match.span())
1.33 + )
1.34 +
1.35 + return headings
1.36 +
1.37 # Region/section attribute parsing.
1.38
1.39 def parseAttributes(s, escape=True):
2.1 --- a/README.txt Sun Nov 18 21:17:05 2012 +0100
2.2 +++ b/README.txt Thu Nov 22 23:05:50 2012 +0100
2.3 @@ -68,6 +68,7 @@
2.4 * Added section argument processing functions from the ImprovedTableParser
2.5 distribution to MoinSupport.
2.6 * Added region/section parsing functions to MoinSupport.
2.7 + * Added heading extraction functionality from ImprovedMoinSearch.
2.8 * Added parsing/formatting-related functions from EventAggregator and
2.9 ImprovedTableParser to MoinSupport.
2.10 * Added category-, WikiDict- and various parsing/encoding-related functions