paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - ImprovedMoinSearch library |
paul@0 | 4 | |
paul@0 | 5 | @copyright: 2010 Paul Boddie <paul@boddie.org.uk> |
paul@0 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 7 | """ |
paul@0 | 8 | |
paul@0 | 9 | from MoinMoin.search import searchPages |
paul@0 | 10 | from MoinMoin.Page import Page |
paul@1 | 11 | from MoinMoin import wikiutil |
paul@0 | 12 | import re |
paul@0 | 13 | |
paul@0 | 14 | heading_regexp = re.compile(r"^(?P<level>=+)(?P<heading>.*?)(?P=level)$", re.UNICODE | re.MULTILINE) |
paul@3 | 15 | paragraph_regexp = re.compile(r"(?P<paragraph>(?:^[^#=\s].*$\n)+)", re.UNICODE | re.MULTILINE) |
paul@3 | 16 | |
paul@3 | 17 | def range_groups(min_name, max_name): |
paul@5 | 18 | return r"(?P<%s>-?\d+)?(?:\s*-\s*(?P<%s>-?\d+))?" % (min_name, max_name) |
paul@3 | 19 | |
paul@3 | 20 | format_options_regexp = re.compile(r"(" |
paul@3 | 21 | "(?P<heading>(heading|title|h)\s*" + range_groups("min_heading", "max_heading") + ")" |
paul@3 | 22 | "|(?P<paragraph>(paragraph|para|p)\s*(?P<paragraph_number>\d+)?)" |
paul@5 | 23 | "|(?P<name>(name|page)\s*" + range_groups("first", "last") + ")" |
paul@3 | 24 | ")", re.UNICODE) |
paul@0 | 25 | |
paul@5 | 26 | def convert_index(i, length): |
paul@5 | 27 | |
paul@5 | 28 | """ |
paul@5 | 29 | Convert from a 1-based indexing scheme to a 0-based scheme for the given |
paul@5 | 30 | index 'i' in a sequence having the given 'length'. |
paul@5 | 31 | """ |
paul@5 | 32 | |
paul@5 | 33 | if i is None: |
paul@5 | 34 | return i |
paul@5 | 35 | elif i > 0: |
paul@5 | 36 | return i - 1 |
paul@5 | 37 | elif i < 0: |
paul@5 | 38 | return length + i |
paul@5 | 39 | else: |
paul@5 | 40 | return i |
paul@5 | 41 | |
paul@0 | 42 | def getSearchResultPages(request, query, **kw): |
paul@0 | 43 | |
paul@0 | 44 | """ |
paul@0 | 45 | Return matching pages using the given 'request' and search 'query'. Optional |
paul@0 | 46 | keyword arguments are passed to the underlying search infrastructure. |
paul@0 | 47 | """ |
paul@0 | 48 | |
paul@0 | 49 | results = searchPages(request, query, **kw) |
paul@0 | 50 | return results.hits |
paul@0 | 51 | |
paul@4 | 52 | def getFirstPageHeading(request, page, start=0, min_level=None, max_level=None): |
paul@0 | 53 | |
paul@0 | 54 | """ |
paul@0 | 55 | Using the given 'request', return the first heading in the given 'page' |
paul@4 | 56 | from the given 'start' point (optional, defaulting to the start of the page) |
paul@3 | 57 | having a heading level of at least 'min_level' (which is undefined if not |
paul@3 | 58 | specified) and at most 'max_level' (which is undefined if not specified). |
paul@4 | 59 | |
paul@4 | 60 | A tuple containing the heading and the span (the start offset and the end |
paul@4 | 61 | offset as a tuple) is returned for a successful retrieval. Otherwise, None |
paul@4 | 62 | is returned. |
paul@0 | 63 | """ |
paul@0 | 64 | |
paul@0 | 65 | full_page = Page(request, page.page_name) |
paul@0 | 66 | body = full_page.get_raw_body() |
paul@4 | 67 | if start != 0: |
paul@4 | 68 | body = body[start:] |
paul@0 | 69 | |
paul@0 | 70 | for match in heading_regexp.finditer(body): |
paul@0 | 71 | level = len(match.group("level")) |
paul@0 | 72 | |
paul@0 | 73 | if (min_level is None or level >= min_level) and \ |
paul@0 | 74 | (max_level is None or level <= max_level): |
paul@0 | 75 | |
paul@4 | 76 | return match.group("heading"), match.span() |
paul@0 | 77 | |
paul@0 | 78 | return None |
paul@0 | 79 | |
paul@4 | 80 | def getParagraph(request, page, start=0, number=None): |
paul@3 | 81 | |
paul@5 | 82 | """ |
paul@5 | 83 | Using the given 'request', return from the given 'page', starting from the |
paul@5 | 84 | optional 'start' offset (or the beginning, if no such offset is specified), |
paul@5 | 85 | the first paragraph or, if the optional 'number' is given, the paragraph |
paul@5 | 86 | whose position corresponds to that number, with a number of 1 being the |
paul@5 | 87 | first paragraph found, 2 being the second, and so on. |
paul@5 | 88 | """ |
paul@5 | 89 | |
paul@3 | 90 | full_page = Page(request, page.page_name) |
paul@3 | 91 | body = full_page.get_raw_body() |
paul@4 | 92 | if start != 0: |
paul@4 | 93 | body = body[start:] |
paul@3 | 94 | |
paul@3 | 95 | for i, match in enumerate(paragraph_regexp.finditer(body)): |
paul@4 | 96 | if number is None or i == max(0, number - 1): |
paul@4 | 97 | return match.group("paragraph"), match.span() |
paul@3 | 98 | |
paul@3 | 99 | return None |
paul@3 | 100 | |
paul@5 | 101 | def getPageName(request, page, start=0, first=None, last=None): |
paul@5 | 102 | |
paul@5 | 103 | """ |
paul@5 | 104 | Using the given 'request', return the name of the given 'page'. The optional |
paul@5 | 105 | 'start' offset refers to the body of the page and is returned as the start |
paul@5 | 106 | and end of the result span if specified. |
paul@5 | 107 | |
paul@5 | 108 | If the optional 'first' or 'last' parameters are specified, only the |
paul@5 | 109 | specified span of parts extracted from the page name will be returned, where |
paul@5 | 110 | the parts of the name are obtained by splitting the full name where the |
paul@5 | 111 | slash ("/") character is found. The first part has an index of 1, and the |
paul@5 | 112 | last part can be referred to using an index of -1. |
paul@5 | 113 | """ |
paul@5 | 114 | |
paul@5 | 115 | parts = page.page_name.split("/") |
paul@5 | 116 | |
paul@5 | 117 | first = convert_index(first, len(parts)) |
paul@5 | 118 | last = convert_index(last, len(parts)) |
paul@5 | 119 | |
paul@5 | 120 | if first is None: |
paul@5 | 121 | if last is None: |
paul@5 | 122 | pass |
paul@5 | 123 | else: |
paul@5 | 124 | parts = parts[:last+1] |
paul@5 | 125 | else: |
paul@5 | 126 | if last is None: |
paul@5 | 127 | parts = parts[first:] |
paul@5 | 128 | else: |
paul@5 | 129 | parts = parts[first:last+1] |
paul@5 | 130 | |
paul@5 | 131 | return "/".join(parts), (start, start) |
paul@3 | 132 | |
paul@1 | 133 | def formatResultPages(request, formatter, pages, paging, format, page_from=0): |
paul@0 | 134 | |
paul@0 | 135 | """ |
paul@0 | 136 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@0 | 137 | the result 'pages', providing paging controls when 'paging' is set to a true |
paul@0 | 138 | value, and providing page details according to the given 'format'. |
paul@1 | 139 | |
paul@1 | 140 | If the optional 'pages_from' parameter is set, the result pages from the |
paul@1 | 141 | given result (specified within a range from 0 to the length of the 'pages' |
paul@1 | 142 | collection) will be shown. |
paul@0 | 143 | """ |
paul@0 | 144 | |
paul@3 | 145 | actions = [] |
paul@1 | 146 | |
paul@3 | 147 | if format: |
paul@3 | 148 | for match in format_options_regexp.finditer(format): |
paul@3 | 149 | if match.group("heading"): |
paul@3 | 150 | actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading"))))) |
paul@3 | 151 | elif match.group("paragraph"): |
paul@3 | 152 | actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),)))) |
paul@5 | 153 | elif match.group("name"): |
paul@5 | 154 | actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last"))))) |
paul@0 | 155 | else: |
paul@3 | 156 | actions.append((getPageName, ())) |
paul@0 | 157 | |
paul@1 | 158 | # Use paging only when there are enough results. |
paul@1 | 159 | |
paul@1 | 160 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 161 | paging = paging and len(pages) > results_per_page |
paul@1 | 162 | |
paul@1 | 163 | if paging: |
paul@1 | 164 | pages_to_show = pages[page_from:page_from + results_per_page] |
paul@1 | 165 | else: |
paul@1 | 166 | pages_to_show = pages |
paul@1 | 167 | |
paul@1 | 168 | # Prepare the output. |
paul@1 | 169 | |
paul@0 | 170 | output = [] |
paul@2 | 171 | output.append(formatter.number_list(on=1, start=page_from + 1)) |
paul@0 | 172 | |
paul@1 | 173 | for page in pages_to_show: |
paul@0 | 174 | output.append(formatter.listitem(on=1)) |
paul@0 | 175 | |
paul@4 | 176 | start = 0 |
paul@3 | 177 | first = 1 |
paul@3 | 178 | for action, args in actions: |
paul@4 | 179 | result = action(request, page, start, *args) |
paul@4 | 180 | |
paul@4 | 181 | if result is not None: |
paul@4 | 182 | if first: |
paul@4 | 183 | output.append(formatter.pagelink(on=1, pagename=page.page_name)) |
paul@4 | 184 | else: |
paul@4 | 185 | output.append(" ") |
paul@0 | 186 | |
paul@4 | 187 | text, span = result |
paul@4 | 188 | output.append(formatter.text(text)) |
paul@4 | 189 | |
paul@4 | 190 | # Position the search for the next action. |
paul@3 | 191 | |
paul@4 | 192 | _start, _end = span |
paul@4 | 193 | start = _end + 1 |
paul@4 | 194 | |
paul@4 | 195 | if first: |
paul@4 | 196 | output.append(formatter.pagelink(on=0)) |
paul@4 | 197 | |
paul@3 | 198 | first = 0 |
paul@3 | 199 | |
paul@0 | 200 | output.append(formatter.listitem(on=0)) |
paul@0 | 201 | |
paul@0 | 202 | output.append(formatter.number_list(on=0)) |
paul@0 | 203 | |
paul@1 | 204 | # Show paging navigation. |
paul@1 | 205 | |
paul@1 | 206 | if paging: |
paul@1 | 207 | output.append(formatPagingNavigation(request, formatter, pages, page_from)) |
paul@1 | 208 | |
paul@0 | 209 | return "".join(output) |
paul@0 | 210 | |
paul@1 | 211 | def formatPagingNavigation(request, formatter, pages, page_from=0): |
paul@1 | 212 | |
paul@1 | 213 | """ |
paul@1 | 214 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@1 | 215 | the paging navigation for the result 'pages', according to the 'page_from' |
paul@1 | 216 | indicator which provides the current position in the result set. |
paul@1 | 217 | """ |
paul@1 | 218 | |
paul@2 | 219 | page = formatter.page |
paul@2 | 220 | pagename = page.page_name |
paul@1 | 221 | _ = request.getText |
paul@1 | 222 | |
paul@1 | 223 | output = [] |
paul@1 | 224 | |
paul@1 | 225 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 226 | number_of_results = len(pages) |
paul@1 | 227 | |
paul@1 | 228 | pages_total = number_of_results / results_per_page |
paul@1 | 229 | pages_before = page_from / results_per_page |
paul@1 | 230 | pages_after = ((number_of_results - page_from) / results_per_page) - 1 |
paul@1 | 231 | |
paul@1 | 232 | querydict = wikiutil.parseQueryString(request.query_string) |
paul@1 | 233 | |
paul@1 | 234 | output.append(formatter.paragraph(on=1)) |
paul@1 | 235 | output.append(formatter.text(_("Result pages:"))) |
paul@1 | 236 | output.append(formatter.text(" ")) |
paul@1 | 237 | |
paul@1 | 238 | n = 0 |
paul@1 | 239 | while n < pages_before: |
paul@2 | 240 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 241 | output.append(formatter.text(str(n + 1))) |
paul@1 | 242 | output.append(formatter.pagelink(on=0)) |
paul@1 | 243 | output.append(formatter.text(" ")) |
paul@1 | 244 | n += 1 |
paul@1 | 245 | |
paul@1 | 246 | output.append(formatter.text(str(n + 1))) |
paul@1 | 247 | output.append(formatter.text(" ")) |
paul@1 | 248 | n += 1 |
paul@1 | 249 | |
paul@2 | 250 | while n <= pages_total: |
paul@2 | 251 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 252 | output.append(formatter.text(str(n + 1))) |
paul@1 | 253 | output.append(formatter.pagelink(on=0)) |
paul@1 | 254 | output.append(formatter.text(" ")) |
paul@1 | 255 | n += 1 |
paul@1 | 256 | |
paul@1 | 257 | output.append(formatter.paragraph(on=0)) |
paul@1 | 258 | |
paul@1 | 259 | return "".join(output) |
paul@1 | 260 | |
paul@1 | 261 | def getPagingQueryString(querydict, page_from): |
paul@1 | 262 | querydict["from"] = page_from |
paul@1 | 263 | return wikiutil.makeQueryString(querydict) |
paul@1 | 264 | |
paul@0 | 265 | def int_or_none(x): |
paul@0 | 266 | if x is None: |
paul@0 | 267 | return x |
paul@0 | 268 | else: |
paul@0 | 269 | return int(x) |
paul@0 | 270 | |
paul@0 | 271 | # vim: tabstop=4 expandtab shiftwidth=4 |