# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1353621950 -3600
# Node ID 9c2d3c3d29ed0b3078dc404a8cb11a1e0ce8c702
# Parent  4589bad4665366522f31f654a41e4fef85f2ccd9
Moved heading extraction functionality from ImprovedMoinSearch.

diff -r 4589bad46653 -r 9c2d3c3d29ed MoinSupport.py
--- a/MoinSupport.py	Sun Nov 18 21:17:05 2012 +0100
+++ b/MoinSupport.py	Thu Nov 22 23:05:50 2012 +0100
@@ -33,6 +33,10 @@
 marker_regexp_str = r"([{]{3,}|[}]{3,})"
 marker_regexp = re.compile(marker_regexp_str, re.MULTILINE | re.DOTALL) # {{{... or }}}...
 
+# Extraction of headings.
+
+heading_regexp = re.compile(r"^(?P<level>=+)(?P<heading>.*?)(?P=level)$", re.UNICODE | re.MULTILINE)
+
 # Category extraction from pages.
 
 category_regexp = None
@@ -309,6 +313,25 @@
 
     return getFragmentsFromRegions(getRegions(s, include_non_regions))
 
+# Heading extraction.
+
+def getHeadings(s):
+
+    """
+    Return tuples of the form (level, title, span) for headings found within the
+    given string 's'. The span is itself a (start, end) tuple indicating the
+    matching region of 's' for a heading declaration.
+    """
+
+    headings = []
+
+    for match in heading_regexp.finditer(s):
+        headings.append(
+            (len(match.group("level")), match.group("heading"), match.span())
+            )
+
+    return headings
+
 # Region/section attribute parsing.
 
 def parseAttributes(s, escape=True):
diff -r 4589bad46653 -r 9c2d3c3d29ed README.txt
--- a/README.txt	Sun Nov 18 21:17:05 2012 +0100
+++ b/README.txt	Thu Nov 22 23:05:50 2012 +0100
@@ -68,6 +68,7 @@
   * Added section argument processing functions from the ImprovedTableParser
     distribution to MoinSupport.
   * Added region/section parsing functions to MoinSupport.
+  * Added heading extraction functionality from ImprovedMoinSearch.
   * Added parsing/formatting-related functions from EventAggregator and
     ImprovedTableParser to MoinSupport.
   * Added category-, WikiDict- and various parsing/encoding-related functions