# HG changeset patch # User Paul Boddie # Date 1353621950 -3600 # Node ID 9c2d3c3d29ed0b3078dc404a8cb11a1e0ce8c702 # Parent 4589bad4665366522f31f654a41e4fef85f2ccd9 Moved heading extraction functionality from ImprovedMoinSearch. diff -r 4589bad46653 -r 9c2d3c3d29ed MoinSupport.py --- a/MoinSupport.py Sun Nov 18 21:17:05 2012 +0100 +++ b/MoinSupport.py Thu Nov 22 23:05:50 2012 +0100 @@ -33,6 +33,10 @@ marker_regexp_str = r"([{]{3,}|[}]{3,})" marker_regexp = re.compile(marker_regexp_str, re.MULTILINE | re.DOTALL) # {{{... or }}}... +# Extraction of headings. + +heading_regexp = re.compile(r"^(?P=+)(?P.*?)(?P=level)$", re.UNICODE | re.MULTILINE) + # Category extraction from pages. category_regexp = None @@ -309,6 +313,25 @@ return getFragmentsFromRegions(getRegions(s, include_non_regions)) +# Heading extraction. + +def getHeadings(s): + + """ + Return tuples of the form (level, title, span) for headings found within the + given string 's'. The span is itself a (start, end) tuple indicating the + matching region of 's' for a heading declaration. + """ + + headings = [] + + for match in heading_regexp.finditer(s): + headings.append( + (len(match.group("level")), match.group("heading"), match.span()) + ) + + return headings + # Region/section attribute parsing. def parseAttributes(s, escape=True): diff -r 4589bad46653 -r 9c2d3c3d29ed README.txt --- a/README.txt Sun Nov 18 21:17:05 2012 +0100 +++ b/README.txt Thu Nov 22 23:05:50 2012 +0100 @@ -68,6 +68,7 @@ * Added section argument processing functions from the ImprovedTableParser distribution to MoinSupport. * Added region/section parsing functions to MoinSupport. + * Added heading extraction functionality from ImprovedMoinSearch. * Added parsing/formatting-related functions from EventAggregator and ImprovedTableParser to MoinSupport. * Added category-, WikiDict- and various parsing/encoding-related functions