# HG changeset patch # User Paul Boddie # Date 1285532536 -7200 # Node ID b9d34861708c6557f4a8755e7bce60f9110841fd # Parent f036b082e99b59be0e035e32dbaa0613ed31fc21 Added control over which parts of page names will be shown in search results. diff -r f036b082e99b -r b9d34861708c ImprovedMoinSearch.py --- a/ImprovedMoinSearch.py Tue Sep 14 00:21:31 2010 +0200 +++ b/ImprovedMoinSearch.py Sun Sep 26 22:22:16 2010 +0200 @@ -15,13 +15,30 @@ paragraph_regexp = re.compile(r"(?P(?:^[^#=\s].*$\n)+)", re.UNICODE | re.MULTILINE) def range_groups(min_name, max_name): - return r"(?P<%s>\d+)?(?:\s*-\s*(?P<%s>\d+))?" % (min_name, max_name) + return r"(?P<%s>-?\d+)?(?:\s*-\s*(?P<%s>-?\d+))?" % (min_name, max_name) format_options_regexp = re.compile(r"(" "(?P(heading|title|h)\s*" + range_groups("min_heading", "max_heading") + ")" "|(?P(paragraph|para|p)\s*(?P\d+)?)" + "|(?P(name|page)\s*" + range_groups("first", "last") + ")" ")", re.UNICODE) +def convert_index(i, length): + + """ + Convert from a 1-based indexing scheme to a 0-based scheme for the given + index 'i' in a sequence having the given 'length'. + """ + + if i is None: + return i + elif i > 0: + return i - 1 + elif i < 0: + return length + i + else: + return i + def getSearchResultPages(request, query, **kw): """ @@ -62,6 +79,14 @@ def getParagraph(request, page, start=0, number=None): + """ + Using the given 'request', return from the given 'page', starting from the + optional 'start' offset (or the beginning, if no such offset is specified), + the first paragraph or, if the optional 'number' is given, the paragraph + whose position corresponds to that number, with a number of 1 being the + first paragraph found, 2 being the second, and so on. + """ + full_page = Page(request, page.page_name) body = full_page.get_raw_body() if start != 0: @@ -73,8 +98,37 @@ return None -def getPageName(request, page): - return page.page_name +def getPageName(request, page, start=0, first=None, last=None): + + """ + Using the given 'request', return the name of the given 'page'. The optional + 'start' offset refers to the body of the page and is returned as the start + and end of the result span if specified. + + If the optional 'first' or 'last' parameters are specified, only the + specified span of parts extracted from the page name will be returned, where + the parts of the name are obtained by splitting the full name where the + slash ("/") character is found. The first part has an index of 1, and the + last part can be referred to using an index of -1. + """ + + parts = page.page_name.split("/") + + first = convert_index(first, len(parts)) + last = convert_index(last, len(parts)) + + if first is None: + if last is None: + pass + else: + parts = parts[:last+1] + else: + if last is None: + parts = parts[first:] + else: + parts = parts[first:last+1] + + return "/".join(parts), (start, start) def formatResultPages(request, formatter, pages, paging, format, page_from=0): @@ -96,6 +150,8 @@ actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading"))))) elif match.group("paragraph"): actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),)))) + elif match.group("name"): + actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last"))))) else: actions.append((getPageName, ()))