ImprovedMoinSearch (file ImprovedMoinSearch.py at d9316d0e772d)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - ImprovedMoinSearch library     4      5     @copyright: 2010, 2011 Paul Boddie <paul@boddie.org.uk>     6     @license: GNU GPL (v2 or later), see COPYING.txt for details.     7 """     8      9 from MoinMoin.search import searchPages    10 from MoinMoin.Page import Page    11 from MoinMoin import wikiutil    12 import re    13     14 heading_regexp = re.compile(r"^(?P<level>=+)(?P<heading>.*?)(?P=level)$", re.UNICODE | re.MULTILINE)    15 paragraph_regexp = re.compile(r"(?P<paragraph>(?:^[^#=\s].*$\n)+)", re.UNICODE | re.MULTILINE)    16     17 def range_groups(min_name, max_name):    18     return \    19         r"(?:"                              # non-grouping containment of...    20         r"\("                               # brackets around the arguments    21         r"(?P<%s>-?\d+)?"                   # optional first numeric argument    22         r"(?:\s*-\s*"                       # non-grouping containment of dash and spaces    23         r"(?P<%s>-?\d+)"                    # second numeric argument    24         r")?"                               # end containment, making dash and numeric argument optional    25         r"\)"                               # closing bracket    26         r")?" % (min_name, max_name)        # end containment, making entire bracketed region optional    27     28 format_options_regexp = re.compile(    29     r"("                                    # optional formatting qualifiers...    30     r"(?P<link>(link|l):)"                  # link:    31     r"|(?P<strong>(strong|str|bold|b):)"    # strong:    32     r"|(?P<em>(emphasis|em|italic|i):)"     # emphasis:    33     r")*"    34     r"("    35     r"(?P<heading>(heading|title|h)\s*" + range_groups("min_heading", "max_heading") + ")"    36     r"|(?P<paragraph>(paragraph|para|p)\s*" + r"(?:\((?P<paragraph_number>\d+)?\))?" + ")"    37     r"|(?P<name>(name|page)\s*" + range_groups("first", "last") + ")"    38     r"|(?P<break>(break|br))"    39     r")",    40     re.UNICODE)    41     42 def convert_index(i, length):    43     44     """    45     Convert from a 1-based indexing scheme to a 0-based scheme for the given    46     index 'i' in a sequence having the given 'length'.    47     """    48     49     if i is None:    50         return i    51     elif i > 0:    52         return i - 1    53     elif i < 0:    54         return length + i    55     else:    56         return i    57     58 def getSearchResultPages(request, query, **kw):    59     60     """    61     Return matching pages using the given 'request' and search 'query'. Optional    62     keyword arguments are passed to the underlying search infrastructure.    63     """    64     65     results = searchPages(request, query, **kw)    66     return results.hits    67     68 # Action functions.    69     70 def getFirstPageHeading(request, page, start=0, min_level=None, max_level=None):    71     72     """    73     Using the given 'request', return the first heading in the given 'page'    74     from the given 'start' point (optional, defaulting to the start of the page)    75     having a heading level of at least 'min_level' (which is undefined if not    76     specified) and at most 'max_level' (which is undefined if not specified).    77     78     A tuple containing the heading and the span (the start offset and the end    79     offset as a tuple) is returned for a successful retrieval. Otherwise, None    80     is returned.    81     """    82     83     full_page = Page(request, page.page_name)    84     body = full_page.get_raw_body()    85     if start != 0:    86         body = body[start:]    87     88     for match in heading_regexp.finditer(body):    89         level = len(match.group("level"))    90     91         if (min_level is None or level >= min_level) and \    92             (max_level is None or level <= max_level):    93     94             return match.group("heading"), match.span()    95     96     return None    97     98 def getParagraph(request, page, start=0, number=None):    99    100     """   101     Using the given 'request', return from the given 'page', starting from the   102     optional 'start' offset (or the beginning, if no such offset is specified),   103     the first paragraph or, if the optional 'number' is given, the paragraph   104     whose position corresponds to that number, with a number of 1 being the   105     first paragraph found, 2 being the second, and so on.   106     """   107    108     full_page = Page(request, page.page_name)   109     body = full_page.get_raw_body()   110     if start != 0:   111         body = body[start:]   112    113     for i, match in enumerate(paragraph_regexp.finditer(body)):   114         if number is None or i == max(0, number - 1):   115             return match.group("paragraph"), match.span()   116    117     return None   118    119 def getPageName(request, page, start=0, first=None, last=None):   120    121     """   122     Using the given 'request', return the name of the given 'page'. The optional   123     'start' offset refers to the body of the page and is returned as the start   124     and end of the result span if specified.   125    126     If the optional 'first' or 'last' parameters are specified, only the   127     specified span of parts extracted from the page name will be returned, where   128     the parts of the name are obtained by splitting the full name where the   129     slash ("/") character is found. The first part has an index of 1, and the   130     last part can be referred to using an index of -1.   131     """   132    133     parts = page.page_name.split("/")   134    135     first = convert_index(first, len(parts))   136     last = convert_index(last, len(parts))   137    138     if first is None:   139         if last is None:   140             pass   141         else:   142             parts = parts[:last+1]   143     else:   144         if last is None:   145             parts = parts[first:]   146         else:   147             parts = parts[first:last+1]   148    149     return "/".join(parts), (start, start)   150    151 # Formatting styles.   152    153 def asLink(styles, formatter, text, page):   154     output = []   155     output.append(formatter.pagelink(on=1, pagename=page.page_name))   156     if not styles:   157         output.append(asText(None, formatter, text, page))   158     else:   159         output.append(next_style(styles, formatter, text, page))   160     output.append(formatter.pagelink(on=0))   161     return u''.join(output)   162    163 def _asStyledText(styles, formatter, text, page, fn):   164     output = []   165     output.append(fn(on=1))   166     if not styles:   167         output.append(asText(None, formatter, text, page))   168     else:   169         output.append(next_style(styles, formatter, text, page))   170     output.append(fn(on=0))   171     return u''.join(output)   172    173 def asStrong(styles, formatter, text, page):   174     return _asStyledText(styles, formatter, text, page, formatter.strong)   175    176 def asEmphasis(styles, formatter, text, page):   177     return _asStyledText(styles, formatter, text, page, formatter.emphasis)   178    179 def asText(styles, formatter, text, page):   180     if not styles:   181         return formatter.text(text)   182     else:   183         return next_style(styles, formatter, text, page)   184    185 def asBreak(styles, formatter, text, page):   186     return formatter.linebreak(0)   187    188 def next_style(styles, formatter, text, page):   189     return styles[0](styles[1:], formatter, text, page)   190    191 style_functions = {   192     "link" : asLink,   193     "strong" : asStrong,   194     "em" : asEmphasis,   195     }   196    197 # Formatting functions.   198    199 def formatResultPages(request, formatter, pages, paging, format, page_from=0):   200    201     """   202     Using the given 'request' and 'formatter', return a formatted string showing   203     the result 'pages', providing paging controls when 'paging' is set to a true   204     value, and providing page details according to the given 'format'.   205    206     If the optional 'pages_from' parameter is set, the result pages from the   207     given result (specified within a range from 0 to the length of the 'pages'   208     collection) will be shown.   209     """   210    211     actions = []   212    213     if format:   214         for match in format_options_regexp.finditer(format):   215    216             # Apply styles by gathering style functions.   217    218             styles = []   219             for style in ("strong", "em", "link"):   220                 if match.group(style):   221                     styles.append(style_functions[style])   222             styles.append(asText)   223    224             # Add actions, arguments and styles.   225    226             if match.group("heading"):   227                 actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading"))), styles))   228             elif match.group("paragraph"):   229                 actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),)), styles))   230             elif match.group("name"):   231                 actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last"))), styles))   232             elif match.group("break"):   233                 actions.append((None, None, [asBreak]))   234     else:   235         actions.append((getPageName, (), [asLink]))   236    237     # Use paging only when there are enough results.   238    239     results_per_page = request.cfg.search_results_per_page   240     paging = paging and len(pages) > results_per_page   241    242     if paging:   243         pages_to_show = pages[page_from:page_from + results_per_page]   244     else:   245         pages_to_show = pages   246    247     # Prepare the output.   248    249     output = []   250     output.append(formatter.number_list(on=1, start=page_from + 1))   251    252     for page in pages_to_show:   253         output.append(formatter.listitem(on=1))   254    255         start = 0   256         first = 1   257         for action, args, styles in actions:   258    259             # Process requested actions.   260    261             if action is not None:   262                 result = action(request, page, start, *args)   263                 if result is not None:   264                     text, span = result   265    266             # Or handle null actions.   267    268             else:   269                 text, span = None, None   270    271             # Where actions are performed, there must be a result.   272    273             if action is None or result is not None:    274    275                 if not first:   276                     output.append(" ")   277    278                 output.append(next_style(styles, formatter, text, page))   279    280                 # Position the search for the next action.   281    282                 if span is not None:   283                     _start, _end = span   284                     start = _end + 1   285    286                 first = 0   287    288         output.append(formatter.listitem(on=0))   289    290     output.append(formatter.number_list(on=0))   291    292     # Show paging navigation.   293    294     if paging:   295         output.append(formatPagingNavigation(request, formatter, pages, page_from))   296    297     return "".join(output)   298    299 def formatPagingNavigation(request, formatter, pages, page_from=0):   300    301     """   302     Using the given 'request' and 'formatter', return a formatted string showing   303     the paging navigation for the result 'pages', according to the 'page_from'   304     indicator which provides the current position in the result set.   305     """   306    307     page = formatter.page   308     pagename = page.page_name   309     _ = request.getText   310    311     output = []   312    313     results_per_page = request.cfg.search_results_per_page   314     number_of_results = len(pages)   315    316     pages_total = number_of_results / results_per_page   317     pages_before = page_from / results_per_page   318     pages_after = ((number_of_results - page_from) / results_per_page) - 1   319    320     querydict = wikiutil.parseQueryString(request.query_string)   321    322     output.append(formatter.paragraph(on=1))   323     output.append(formatter.text(_("Result pages:")))   324     output.append(formatter.text(" "))   325    326     n = 0   327     while n < pages_before:   328         output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page)))   329         output.append(formatter.text(str(n + 1)))   330         output.append(formatter.pagelink(on=0))   331         output.append(formatter.text(" "))   332         n += 1   333    334     output.append(formatter.text(str(n + 1)))   335     output.append(formatter.text(" "))   336     n += 1   337    338     while n <= pages_total:   339         output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page)))   340         output.append(formatter.text(str(n + 1)))   341         output.append(formatter.pagelink(on=0))   342         output.append(formatter.text(" "))   343         n += 1   344    345     output.append(formatter.paragraph(on=0))   346    347     return "".join(output)   348    349 def getPagingQueryString(querydict, page_from):   350     querydict["from"] = page_from   351     return wikiutil.makeQueryString(querydict)   352    353 def int_or_none(x):   354     if x is None:   355         return x   356     else:   357         return int(x)   358    359 # vim: tabstop=4 expandtab shiftwidth=4