paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - ImprovedMoinSearch library |
paul@0 | 4 | |
paul@0 | 5 | @copyright: 2010 Paul Boddie <paul@boddie.org.uk> |
paul@0 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 7 | """ |
paul@0 | 8 | |
paul@0 | 9 | from MoinMoin.search import searchPages |
paul@0 | 10 | from MoinMoin.Page import Page |
paul@1 | 11 | from MoinMoin import wikiutil |
paul@0 | 12 | import re |
paul@0 | 13 | |
paul@0 | 14 | heading_regexp = re.compile(r"^(?P<level>=+)(?P<heading>.*?)(?P=level)$", re.UNICODE | re.MULTILINE) |
paul@3 | 15 | paragraph_regexp = re.compile(r"(?P<paragraph>(?:^[^#=\s].*$\n)+)", re.UNICODE | re.MULTILINE) |
paul@3 | 16 | |
paul@3 | 17 | def range_groups(min_name, max_name): |
paul@5 | 18 | return r"(?P<%s>-?\d+)?(?:\s*-\s*(?P<%s>-?\d+))?" % (min_name, max_name) |
paul@3 | 19 | |
paul@6 | 20 | format_options_regexp = re.compile( |
paul@6 | 21 | r"(?P<link>\[)?" |
paul@6 | 22 | r"(" |
paul@6 | 23 | r"(?P<heading>(heading|title|h)\s*" + range_groups("min_heading", "max_heading") + ")" |
paul@6 | 24 | r"|(?P<paragraph>(paragraph|para|p)\s*(?P<paragraph_number>\d+)?)" |
paul@6 | 25 | r"|(?P<name>(name|page)\s*" + range_groups("first", "last") + ")" |
paul@6 | 26 | r")" |
paul@6 | 27 | r"(?(link)\]|)", |
paul@6 | 28 | re.UNICODE) |
paul@0 | 29 | |
paul@5 | 30 | def convert_index(i, length): |
paul@5 | 31 | |
paul@5 | 32 | """ |
paul@5 | 33 | Convert from a 1-based indexing scheme to a 0-based scheme for the given |
paul@5 | 34 | index 'i' in a sequence having the given 'length'. |
paul@5 | 35 | """ |
paul@5 | 36 | |
paul@5 | 37 | if i is None: |
paul@5 | 38 | return i |
paul@5 | 39 | elif i > 0: |
paul@5 | 40 | return i - 1 |
paul@5 | 41 | elif i < 0: |
paul@5 | 42 | return length + i |
paul@5 | 43 | else: |
paul@5 | 44 | return i |
paul@5 | 45 | |
paul@0 | 46 | def getSearchResultPages(request, query, **kw): |
paul@0 | 47 | |
paul@0 | 48 | """ |
paul@0 | 49 | Return matching pages using the given 'request' and search 'query'. Optional |
paul@0 | 50 | keyword arguments are passed to the underlying search infrastructure. |
paul@0 | 51 | """ |
paul@0 | 52 | |
paul@0 | 53 | results = searchPages(request, query, **kw) |
paul@0 | 54 | return results.hits |
paul@0 | 55 | |
paul@4 | 56 | def getFirstPageHeading(request, page, start=0, min_level=None, max_level=None): |
paul@0 | 57 | |
paul@0 | 58 | """ |
paul@0 | 59 | Using the given 'request', return the first heading in the given 'page' |
paul@4 | 60 | from the given 'start' point (optional, defaulting to the start of the page) |
paul@3 | 61 | having a heading level of at least 'min_level' (which is undefined if not |
paul@3 | 62 | specified) and at most 'max_level' (which is undefined if not specified). |
paul@4 | 63 | |
paul@4 | 64 | A tuple containing the heading and the span (the start offset and the end |
paul@4 | 65 | offset as a tuple) is returned for a successful retrieval. Otherwise, None |
paul@4 | 66 | is returned. |
paul@0 | 67 | """ |
paul@0 | 68 | |
paul@0 | 69 | full_page = Page(request, page.page_name) |
paul@0 | 70 | body = full_page.get_raw_body() |
paul@4 | 71 | if start != 0: |
paul@4 | 72 | body = body[start:] |
paul@0 | 73 | |
paul@0 | 74 | for match in heading_regexp.finditer(body): |
paul@0 | 75 | level = len(match.group("level")) |
paul@0 | 76 | |
paul@0 | 77 | if (min_level is None or level >= min_level) and \ |
paul@0 | 78 | (max_level is None or level <= max_level): |
paul@0 | 79 | |
paul@4 | 80 | return match.group("heading"), match.span() |
paul@0 | 81 | |
paul@0 | 82 | return None |
paul@0 | 83 | |
paul@4 | 84 | def getParagraph(request, page, start=0, number=None): |
paul@3 | 85 | |
paul@5 | 86 | """ |
paul@5 | 87 | Using the given 'request', return from the given 'page', starting from the |
paul@5 | 88 | optional 'start' offset (or the beginning, if no such offset is specified), |
paul@5 | 89 | the first paragraph or, if the optional 'number' is given, the paragraph |
paul@5 | 90 | whose position corresponds to that number, with a number of 1 being the |
paul@5 | 91 | first paragraph found, 2 being the second, and so on. |
paul@5 | 92 | """ |
paul@5 | 93 | |
paul@3 | 94 | full_page = Page(request, page.page_name) |
paul@3 | 95 | body = full_page.get_raw_body() |
paul@4 | 96 | if start != 0: |
paul@4 | 97 | body = body[start:] |
paul@3 | 98 | |
paul@3 | 99 | for i, match in enumerate(paragraph_regexp.finditer(body)): |
paul@4 | 100 | if number is None or i == max(0, number - 1): |
paul@4 | 101 | return match.group("paragraph"), match.span() |
paul@3 | 102 | |
paul@3 | 103 | return None |
paul@3 | 104 | |
paul@5 | 105 | def getPageName(request, page, start=0, first=None, last=None): |
paul@5 | 106 | |
paul@5 | 107 | """ |
paul@5 | 108 | Using the given 'request', return the name of the given 'page'. The optional |
paul@5 | 109 | 'start' offset refers to the body of the page and is returned as the start |
paul@5 | 110 | and end of the result span if specified. |
paul@5 | 111 | |
paul@5 | 112 | If the optional 'first' or 'last' parameters are specified, only the |
paul@5 | 113 | specified span of parts extracted from the page name will be returned, where |
paul@5 | 114 | the parts of the name are obtained by splitting the full name where the |
paul@5 | 115 | slash ("/") character is found. The first part has an index of 1, and the |
paul@5 | 116 | last part can be referred to using an index of -1. |
paul@5 | 117 | """ |
paul@5 | 118 | |
paul@5 | 119 | parts = page.page_name.split("/") |
paul@5 | 120 | |
paul@5 | 121 | first = convert_index(first, len(parts)) |
paul@5 | 122 | last = convert_index(last, len(parts)) |
paul@5 | 123 | |
paul@5 | 124 | if first is None: |
paul@5 | 125 | if last is None: |
paul@5 | 126 | pass |
paul@5 | 127 | else: |
paul@5 | 128 | parts = parts[:last+1] |
paul@5 | 129 | else: |
paul@5 | 130 | if last is None: |
paul@5 | 131 | parts = parts[first:] |
paul@5 | 132 | else: |
paul@5 | 133 | parts = parts[first:last+1] |
paul@5 | 134 | |
paul@5 | 135 | return "/".join(parts), (start, start) |
paul@3 | 136 | |
paul@1 | 137 | def formatResultPages(request, formatter, pages, paging, format, page_from=0): |
paul@0 | 138 | |
paul@0 | 139 | """ |
paul@0 | 140 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@0 | 141 | the result 'pages', providing paging controls when 'paging' is set to a true |
paul@0 | 142 | value, and providing page details according to the given 'format'. |
paul@1 | 143 | |
paul@1 | 144 | If the optional 'pages_from' parameter is set, the result pages from the |
paul@1 | 145 | given result (specified within a range from 0 to the length of the 'pages' |
paul@1 | 146 | collection) will be shown. |
paul@0 | 147 | """ |
paul@0 | 148 | |
paul@3 | 149 | actions = [] |
paul@1 | 150 | |
paul@3 | 151 | if format: |
paul@3 | 152 | for match in format_options_regexp.finditer(format): |
paul@6 | 153 | as_link = match.group("link") |
paul@3 | 154 | if match.group("heading"): |
paul@6 | 155 | actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading"))), as_link)) |
paul@3 | 156 | elif match.group("paragraph"): |
paul@6 | 157 | actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),)), as_link)) |
paul@5 | 158 | elif match.group("name"): |
paul@6 | 159 | actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last"))), as_link)) |
paul@0 | 160 | else: |
paul@6 | 161 | actions.append((getPageName, (), True)) |
paul@0 | 162 | |
paul@1 | 163 | # Use paging only when there are enough results. |
paul@1 | 164 | |
paul@1 | 165 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 166 | paging = paging and len(pages) > results_per_page |
paul@1 | 167 | |
paul@1 | 168 | if paging: |
paul@1 | 169 | pages_to_show = pages[page_from:page_from + results_per_page] |
paul@1 | 170 | else: |
paul@1 | 171 | pages_to_show = pages |
paul@1 | 172 | |
paul@1 | 173 | # Prepare the output. |
paul@1 | 174 | |
paul@0 | 175 | output = [] |
paul@2 | 176 | output.append(formatter.number_list(on=1, start=page_from + 1)) |
paul@0 | 177 | |
paul@1 | 178 | for page in pages_to_show: |
paul@0 | 179 | output.append(formatter.listitem(on=1)) |
paul@0 | 180 | |
paul@4 | 181 | start = 0 |
paul@3 | 182 | first = 1 |
paul@6 | 183 | for action, args, as_link in actions: |
paul@4 | 184 | result = action(request, page, start, *args) |
paul@4 | 185 | |
paul@4 | 186 | if result is not None: |
paul@6 | 187 | if not first: |
paul@6 | 188 | output.append(" ") |
paul@6 | 189 | if as_link: |
paul@4 | 190 | output.append(formatter.pagelink(on=1, pagename=page.page_name)) |
paul@0 | 191 | |
paul@4 | 192 | text, span = result |
paul@4 | 193 | output.append(formatter.text(text)) |
paul@4 | 194 | |
paul@4 | 195 | # Position the search for the next action. |
paul@3 | 196 | |
paul@4 | 197 | _start, _end = span |
paul@4 | 198 | start = _end + 1 |
paul@4 | 199 | |
paul@6 | 200 | if as_link: |
paul@4 | 201 | output.append(formatter.pagelink(on=0)) |
paul@4 | 202 | |
paul@3 | 203 | first = 0 |
paul@3 | 204 | |
paul@0 | 205 | output.append(formatter.listitem(on=0)) |
paul@0 | 206 | |
paul@0 | 207 | output.append(formatter.number_list(on=0)) |
paul@0 | 208 | |
paul@1 | 209 | # Show paging navigation. |
paul@1 | 210 | |
paul@1 | 211 | if paging: |
paul@1 | 212 | output.append(formatPagingNavigation(request, formatter, pages, page_from)) |
paul@1 | 213 | |
paul@0 | 214 | return "".join(output) |
paul@0 | 215 | |
paul@1 | 216 | def formatPagingNavigation(request, formatter, pages, page_from=0): |
paul@1 | 217 | |
paul@1 | 218 | """ |
paul@1 | 219 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@1 | 220 | the paging navigation for the result 'pages', according to the 'page_from' |
paul@1 | 221 | indicator which provides the current position in the result set. |
paul@1 | 222 | """ |
paul@1 | 223 | |
paul@2 | 224 | page = formatter.page |
paul@2 | 225 | pagename = page.page_name |
paul@1 | 226 | _ = request.getText |
paul@1 | 227 | |
paul@1 | 228 | output = [] |
paul@1 | 229 | |
paul@1 | 230 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 231 | number_of_results = len(pages) |
paul@1 | 232 | |
paul@1 | 233 | pages_total = number_of_results / results_per_page |
paul@1 | 234 | pages_before = page_from / results_per_page |
paul@1 | 235 | pages_after = ((number_of_results - page_from) / results_per_page) - 1 |
paul@1 | 236 | |
paul@1 | 237 | querydict = wikiutil.parseQueryString(request.query_string) |
paul@1 | 238 | |
paul@1 | 239 | output.append(formatter.paragraph(on=1)) |
paul@1 | 240 | output.append(formatter.text(_("Result pages:"))) |
paul@1 | 241 | output.append(formatter.text(" ")) |
paul@1 | 242 | |
paul@1 | 243 | n = 0 |
paul@1 | 244 | while n < pages_before: |
paul@2 | 245 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 246 | output.append(formatter.text(str(n + 1))) |
paul@1 | 247 | output.append(formatter.pagelink(on=0)) |
paul@1 | 248 | output.append(formatter.text(" ")) |
paul@1 | 249 | n += 1 |
paul@1 | 250 | |
paul@1 | 251 | output.append(formatter.text(str(n + 1))) |
paul@1 | 252 | output.append(formatter.text(" ")) |
paul@1 | 253 | n += 1 |
paul@1 | 254 | |
paul@2 | 255 | while n <= pages_total: |
paul@2 | 256 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 257 | output.append(formatter.text(str(n + 1))) |
paul@1 | 258 | output.append(formatter.pagelink(on=0)) |
paul@1 | 259 | output.append(formatter.text(" ")) |
paul@1 | 260 | n += 1 |
paul@1 | 261 | |
paul@1 | 262 | output.append(formatter.paragraph(on=0)) |
paul@1 | 263 | |
paul@1 | 264 | return "".join(output) |
paul@1 | 265 | |
paul@1 | 266 | def getPagingQueryString(querydict, page_from): |
paul@1 | 267 | querydict["from"] = page_from |
paul@1 | 268 | return wikiutil.makeQueryString(querydict) |
paul@1 | 269 | |
paul@0 | 270 | def int_or_none(x): |
paul@0 | 271 | if x is None: |
paul@0 | 272 | return x |
paul@0 | 273 | else: |
paul@0 | 274 | return int(x) |
paul@0 | 275 | |
paul@0 | 276 | # vim: tabstop=4 expandtab shiftwidth=4 |