ImprovedMoinSearch

Changeset

5:b9d34861708c
2010-09-26 Paul Boddie raw files shortlog changelog graph Added control over which parts of page names will be shown in search results.
ImprovedMoinSearch.py (file)
     1.1 --- a/ImprovedMoinSearch.py	Tue Sep 14 00:21:31 2010 +0200
     1.2 +++ b/ImprovedMoinSearch.py	Sun Sep 26 22:22:16 2010 +0200
     1.3 @@ -15,13 +15,30 @@
     1.4  paragraph_regexp = re.compile(r"(?P<paragraph>(?:^[^#=\s].*$\n)+)", re.UNICODE | re.MULTILINE)
     1.5  
     1.6  def range_groups(min_name, max_name):
     1.7 -    return r"(?P<%s>\d+)?(?:\s*-\s*(?P<%s>\d+))?" % (min_name, max_name)
     1.8 +    return r"(?P<%s>-?\d+)?(?:\s*-\s*(?P<%s>-?\d+))?" % (min_name, max_name)
     1.9  
    1.10  format_options_regexp = re.compile(r"("
    1.11      "(?P<heading>(heading|title|h)\s*" + range_groups("min_heading", "max_heading") + ")"
    1.12      "|(?P<paragraph>(paragraph|para|p)\s*(?P<paragraph_number>\d+)?)"
    1.13 +    "|(?P<name>(name|page)\s*" + range_groups("first", "last") + ")"
    1.14      ")", re.UNICODE)
    1.15  
    1.16 +def convert_index(i, length):
    1.17 +
    1.18 +    """
    1.19 +    Convert from a 1-based indexing scheme to a 0-based scheme for the given
    1.20 +    index 'i' in a sequence having the given 'length'.
    1.21 +    """
    1.22 +
    1.23 +    if i is None:
    1.24 +        return i
    1.25 +    elif i > 0:
    1.26 +        return i - 1
    1.27 +    elif i < 0:
    1.28 +        return length + i
    1.29 +    else:
    1.30 +        return i
    1.31 +
    1.32  def getSearchResultPages(request, query, **kw):
    1.33  
    1.34      """
    1.35 @@ -62,6 +79,14 @@
    1.36  
    1.37  def getParagraph(request, page, start=0, number=None):
    1.38  
    1.39 +    """
    1.40 +    Using the given 'request', return from the given 'page', starting from the
    1.41 +    optional 'start' offset (or the beginning, if no such offset is specified),
    1.42 +    the first paragraph or, if the optional 'number' is given, the paragraph
    1.43 +    whose position corresponds to that number, with a number of 1 being the
    1.44 +    first paragraph found, 2 being the second, and so on.
    1.45 +    """
    1.46 +
    1.47      full_page = Page(request, page.page_name)
    1.48      body = full_page.get_raw_body()
    1.49      if start != 0:
    1.50 @@ -73,8 +98,37 @@
    1.51  
    1.52      return None
    1.53  
    1.54 -def getPageName(request, page):
    1.55 -    return page.page_name
    1.56 +def getPageName(request, page, start=0, first=None, last=None):
    1.57 +
    1.58 +    """
    1.59 +    Using the given 'request', return the name of the given 'page'. The optional
    1.60 +    'start' offset refers to the body of the page and is returned as the start
    1.61 +    and end of the result span if specified.
    1.62 +
    1.63 +    If the optional 'first' or 'last' parameters are specified, only the
    1.64 +    specified span of parts extracted from the page name will be returned, where
    1.65 +    the parts of the name are obtained by splitting the full name where the
    1.66 +    slash ("/") character is found. The first part has an index of 1, and the
    1.67 +    last part can be referred to using an index of -1.
    1.68 +    """
    1.69 +
    1.70 +    parts = page.page_name.split("/")
    1.71 +
    1.72 +    first = convert_index(first, len(parts))
    1.73 +    last = convert_index(last, len(parts))
    1.74 +
    1.75 +    if first is None:
    1.76 +        if last is None:
    1.77 +            pass
    1.78 +        else:
    1.79 +            parts = parts[:last+1]
    1.80 +    else:
    1.81 +        if last is None:
    1.82 +            parts = parts[first:]
    1.83 +        else:
    1.84 +            parts = parts[first:last+1]
    1.85 +
    1.86 +    return "/".join(parts), (start, start)
    1.87  
    1.88  def formatResultPages(request, formatter, pages, paging, format, page_from=0):
    1.89  
    1.90 @@ -96,6 +150,8 @@
    1.91                  actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading")))))
    1.92              elif match.group("paragraph"):
    1.93                  actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),))))
    1.94 +            elif match.group("name"):
    1.95 +                actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last")))))
    1.96      else:
    1.97          actions.append((getPageName, ()))
    1.98