paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - ImprovedMoinSearch library |
paul@0 | 4 | |
paul@9 | 5 | @copyright: 2010, 2011, 2012 Paul Boddie <paul@boddie.org.uk> |
paul@0 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 7 | """ |
paul@0 | 8 | |
paul@0 | 9 | from MoinMoin.search import searchPages |
paul@0 | 10 | from MoinMoin.Page import Page |
paul@1 | 11 | from MoinMoin import wikiutil |
paul@9 | 12 | from MoinSupport import getHeadings |
paul@0 | 13 | import re |
paul@0 | 14 | |
paul@3 | 15 | paragraph_regexp = re.compile(r"(?P<paragraph>(?:^[^#=\s].*$\n)+)", re.UNICODE | re.MULTILINE) |
paul@3 | 16 | |
paul@3 | 17 | def range_groups(min_name, max_name): |
paul@8 | 18 | return \ |
paul@8 | 19 | r"(?:" # non-grouping containment of... |
paul@8 | 20 | r"\(" # brackets around the arguments |
paul@8 | 21 | r"(?P<%s>-?\d+)?" # optional first numeric argument |
paul@8 | 22 | r"(?:\s*-\s*" # non-grouping containment of dash and spaces |
paul@8 | 23 | r"(?P<%s>-?\d+)" # second numeric argument |
paul@8 | 24 | r")?" # end containment, making dash and numeric argument optional |
paul@8 | 25 | r"\)" # closing bracket |
paul@8 | 26 | r")?" % (min_name, max_name) # end containment, making entire bracketed region optional |
paul@3 | 27 | |
paul@6 | 28 | format_options_regexp = re.compile( |
paul@8 | 29 | r"(" # optional formatting qualifiers... |
paul@8 | 30 | r"(?P<link>(link|l):)" # link: |
paul@8 | 31 | r"|(?P<strong>(strong|str|bold|b):)" # strong: |
paul@8 | 32 | r"|(?P<em>(emphasis|em|italic|i):)" # emphasis: |
paul@7 | 33 | r")*" |
paul@6 | 34 | r"(" |
paul@6 | 35 | r"(?P<heading>(heading|title|h)\s*" + range_groups("min_heading", "max_heading") + ")" |
paul@7 | 36 | r"|(?P<paragraph>(paragraph|para|p)\s*" + r"(?:\((?P<paragraph_number>\d+)?\))?" + ")" |
paul@6 | 37 | r"|(?P<name>(name|page)\s*" + range_groups("first", "last") + ")" |
paul@7 | 38 | r"|(?P<break>(break|br))" |
paul@7 | 39 | r")", |
paul@6 | 40 | re.UNICODE) |
paul@0 | 41 | |
paul@5 | 42 | def convert_index(i, length): |
paul@5 | 43 | |
paul@5 | 44 | """ |
paul@5 | 45 | Convert from a 1-based indexing scheme to a 0-based scheme for the given |
paul@5 | 46 | index 'i' in a sequence having the given 'length'. |
paul@5 | 47 | """ |
paul@5 | 48 | |
paul@5 | 49 | if i is None: |
paul@5 | 50 | return i |
paul@5 | 51 | elif i > 0: |
paul@5 | 52 | return i - 1 |
paul@5 | 53 | elif i < 0: |
paul@5 | 54 | return length + i |
paul@5 | 55 | else: |
paul@5 | 56 | return i |
paul@5 | 57 | |
paul@0 | 58 | def getSearchResultPages(request, query, **kw): |
paul@0 | 59 | |
paul@0 | 60 | """ |
paul@0 | 61 | Return matching pages using the given 'request' and search 'query'. Optional |
paul@0 | 62 | keyword arguments are passed to the underlying search infrastructure. |
paul@0 | 63 | """ |
paul@0 | 64 | |
paul@0 | 65 | results = searchPages(request, query, **kw) |
paul@0 | 66 | return results.hits |
paul@0 | 67 | |
paul@7 | 68 | # Action functions. |
paul@7 | 69 | |
paul@4 | 70 | def getFirstPageHeading(request, page, start=0, min_level=None, max_level=None): |
paul@0 | 71 | |
paul@0 | 72 | """ |
paul@0 | 73 | Using the given 'request', return the first heading in the given 'page' |
paul@4 | 74 | from the given 'start' point (optional, defaulting to the start of the page) |
paul@3 | 75 | having a heading level of at least 'min_level' (which is undefined if not |
paul@3 | 76 | specified) and at most 'max_level' (which is undefined if not specified). |
paul@4 | 77 | |
paul@4 | 78 | A tuple containing the heading and the span (the start offset and the end |
paul@4 | 79 | offset as a tuple) is returned for a successful retrieval. Otherwise, None |
paul@4 | 80 | is returned. |
paul@0 | 81 | """ |
paul@0 | 82 | |
paul@0 | 83 | full_page = Page(request, page.page_name) |
paul@0 | 84 | body = full_page.get_raw_body() |
paul@4 | 85 | if start != 0: |
paul@4 | 86 | body = body[start:] |
paul@0 | 87 | |
paul@9 | 88 | for level, title, span in getHeadings(body): |
paul@0 | 89 | |
paul@0 | 90 | if (min_level is None or level >= min_level) and \ |
paul@0 | 91 | (max_level is None or level <= max_level): |
paul@0 | 92 | |
paul@9 | 93 | return title, span |
paul@0 | 94 | |
paul@0 | 95 | return None |
paul@0 | 96 | |
paul@4 | 97 | def getParagraph(request, page, start=0, number=None): |
paul@3 | 98 | |
paul@5 | 99 | """ |
paul@5 | 100 | Using the given 'request', return from the given 'page', starting from the |
paul@5 | 101 | optional 'start' offset (or the beginning, if no such offset is specified), |
paul@5 | 102 | the first paragraph or, if the optional 'number' is given, the paragraph |
paul@5 | 103 | whose position corresponds to that number, with a number of 1 being the |
paul@5 | 104 | first paragraph found, 2 being the second, and so on. |
paul@5 | 105 | """ |
paul@5 | 106 | |
paul@3 | 107 | full_page = Page(request, page.page_name) |
paul@3 | 108 | body = full_page.get_raw_body() |
paul@4 | 109 | if start != 0: |
paul@4 | 110 | body = body[start:] |
paul@3 | 111 | |
paul@3 | 112 | for i, match in enumerate(paragraph_regexp.finditer(body)): |
paul@4 | 113 | if number is None or i == max(0, number - 1): |
paul@4 | 114 | return match.group("paragraph"), match.span() |
paul@3 | 115 | |
paul@3 | 116 | return None |
paul@3 | 117 | |
paul@5 | 118 | def getPageName(request, page, start=0, first=None, last=None): |
paul@5 | 119 | |
paul@5 | 120 | """ |
paul@5 | 121 | Using the given 'request', return the name of the given 'page'. The optional |
paul@5 | 122 | 'start' offset refers to the body of the page and is returned as the start |
paul@5 | 123 | and end of the result span if specified. |
paul@5 | 124 | |
paul@5 | 125 | If the optional 'first' or 'last' parameters are specified, only the |
paul@5 | 126 | specified span of parts extracted from the page name will be returned, where |
paul@5 | 127 | the parts of the name are obtained by splitting the full name where the |
paul@5 | 128 | slash ("/") character is found. The first part has an index of 1, and the |
paul@5 | 129 | last part can be referred to using an index of -1. |
paul@5 | 130 | """ |
paul@5 | 131 | |
paul@5 | 132 | parts = page.page_name.split("/") |
paul@5 | 133 | |
paul@5 | 134 | first = convert_index(first, len(parts)) |
paul@5 | 135 | last = convert_index(last, len(parts)) |
paul@5 | 136 | |
paul@5 | 137 | if first is None: |
paul@5 | 138 | if last is None: |
paul@5 | 139 | pass |
paul@5 | 140 | else: |
paul@5 | 141 | parts = parts[:last+1] |
paul@5 | 142 | else: |
paul@5 | 143 | if last is None: |
paul@5 | 144 | parts = parts[first:] |
paul@5 | 145 | else: |
paul@5 | 146 | parts = parts[first:last+1] |
paul@5 | 147 | |
paul@5 | 148 | return "/".join(parts), (start, start) |
paul@3 | 149 | |
paul@7 | 150 | # Formatting styles. |
paul@7 | 151 | |
paul@7 | 152 | def asLink(styles, formatter, text, page): |
paul@7 | 153 | output = [] |
paul@7 | 154 | output.append(formatter.pagelink(on=1, pagename=page.page_name)) |
paul@7 | 155 | if not styles: |
paul@7 | 156 | output.append(asText(None, formatter, text, page)) |
paul@7 | 157 | else: |
paul@7 | 158 | output.append(next_style(styles, formatter, text, page)) |
paul@7 | 159 | output.append(formatter.pagelink(on=0)) |
paul@7 | 160 | return u''.join(output) |
paul@7 | 161 | |
paul@7 | 162 | def _asStyledText(styles, formatter, text, page, fn): |
paul@7 | 163 | output = [] |
paul@7 | 164 | output.append(fn(on=1)) |
paul@7 | 165 | if not styles: |
paul@7 | 166 | output.append(asText(None, formatter, text, page)) |
paul@7 | 167 | else: |
paul@7 | 168 | output.append(next_style(styles, formatter, text, page)) |
paul@7 | 169 | output.append(fn(on=0)) |
paul@7 | 170 | return u''.join(output) |
paul@7 | 171 | |
paul@7 | 172 | def asStrong(styles, formatter, text, page): |
paul@7 | 173 | return _asStyledText(styles, formatter, text, page, formatter.strong) |
paul@7 | 174 | |
paul@7 | 175 | def asEmphasis(styles, formatter, text, page): |
paul@7 | 176 | return _asStyledText(styles, formatter, text, page, formatter.emphasis) |
paul@7 | 177 | |
paul@7 | 178 | def asText(styles, formatter, text, page): |
paul@7 | 179 | if not styles: |
paul@7 | 180 | return formatter.text(text) |
paul@7 | 181 | else: |
paul@7 | 182 | return next_style(styles, formatter, text, page) |
paul@7 | 183 | |
paul@7 | 184 | def asBreak(styles, formatter, text, page): |
paul@7 | 185 | return formatter.linebreak(0) |
paul@7 | 186 | |
paul@7 | 187 | def next_style(styles, formatter, text, page): |
paul@7 | 188 | return styles[0](styles[1:], formatter, text, page) |
paul@7 | 189 | |
paul@7 | 190 | style_functions = { |
paul@7 | 191 | "link" : asLink, |
paul@7 | 192 | "strong" : asStrong, |
paul@7 | 193 | "em" : asEmphasis, |
paul@7 | 194 | } |
paul@7 | 195 | |
paul@7 | 196 | # Formatting functions. |
paul@7 | 197 | |
paul@1 | 198 | def formatResultPages(request, formatter, pages, paging, format, page_from=0): |
paul@0 | 199 | |
paul@0 | 200 | """ |
paul@0 | 201 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@0 | 202 | the result 'pages', providing paging controls when 'paging' is set to a true |
paul@0 | 203 | value, and providing page details according to the given 'format'. |
paul@1 | 204 | |
paul@1 | 205 | If the optional 'pages_from' parameter is set, the result pages from the |
paul@1 | 206 | given result (specified within a range from 0 to the length of the 'pages' |
paul@1 | 207 | collection) will be shown. |
paul@0 | 208 | """ |
paul@0 | 209 | |
paul@3 | 210 | actions = [] |
paul@1 | 211 | |
paul@3 | 212 | if format: |
paul@3 | 213 | for match in format_options_regexp.finditer(format): |
paul@7 | 214 | |
paul@7 | 215 | # Apply styles by gathering style functions. |
paul@7 | 216 | |
paul@7 | 217 | styles = [] |
paul@7 | 218 | for style in ("strong", "em", "link"): |
paul@7 | 219 | if match.group(style): |
paul@7 | 220 | styles.append(style_functions[style]) |
paul@7 | 221 | styles.append(asText) |
paul@7 | 222 | |
paul@7 | 223 | # Add actions, arguments and styles. |
paul@7 | 224 | |
paul@3 | 225 | if match.group("heading"): |
paul@7 | 226 | actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading"))), styles)) |
paul@3 | 227 | elif match.group("paragraph"): |
paul@7 | 228 | actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),)), styles)) |
paul@5 | 229 | elif match.group("name"): |
paul@7 | 230 | actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last"))), styles)) |
paul@7 | 231 | elif match.group("break"): |
paul@7 | 232 | actions.append((None, None, [asBreak])) |
paul@0 | 233 | else: |
paul@7 | 234 | actions.append((getPageName, (), [asLink])) |
paul@0 | 235 | |
paul@1 | 236 | # Use paging only when there are enough results. |
paul@1 | 237 | |
paul@1 | 238 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 239 | paging = paging and len(pages) > results_per_page |
paul@1 | 240 | |
paul@1 | 241 | if paging: |
paul@1 | 242 | pages_to_show = pages[page_from:page_from + results_per_page] |
paul@1 | 243 | else: |
paul@1 | 244 | pages_to_show = pages |
paul@1 | 245 | |
paul@1 | 246 | # Prepare the output. |
paul@1 | 247 | |
paul@0 | 248 | output = [] |
paul@2 | 249 | output.append(formatter.number_list(on=1, start=page_from + 1)) |
paul@0 | 250 | |
paul@1 | 251 | for page in pages_to_show: |
paul@0 | 252 | output.append(formatter.listitem(on=1)) |
paul@0 | 253 | |
paul@4 | 254 | start = 0 |
paul@3 | 255 | first = 1 |
paul@7 | 256 | for action, args, styles in actions: |
paul@7 | 257 | |
paul@7 | 258 | # Process requested actions. |
paul@7 | 259 | |
paul@7 | 260 | if action is not None: |
paul@7 | 261 | result = action(request, page, start, *args) |
paul@7 | 262 | if result is not None: |
paul@7 | 263 | text, span = result |
paul@4 | 264 | |
paul@7 | 265 | # Or handle null actions. |
paul@7 | 266 | |
paul@7 | 267 | else: |
paul@7 | 268 | text, span = None, None |
paul@7 | 269 | |
paul@7 | 270 | # Where actions are performed, there must be a result. |
paul@7 | 271 | |
paul@7 | 272 | if action is None or result is not None: |
paul@7 | 273 | |
paul@6 | 274 | if not first: |
paul@6 | 275 | output.append(" ") |
paul@0 | 276 | |
paul@7 | 277 | output.append(next_style(styles, formatter, text, page)) |
paul@4 | 278 | |
paul@4 | 279 | # Position the search for the next action. |
paul@3 | 280 | |
paul@7 | 281 | if span is not None: |
paul@7 | 282 | _start, _end = span |
paul@7 | 283 | start = _end + 1 |
paul@4 | 284 | |
paul@7 | 285 | first = 0 |
paul@3 | 286 | |
paul@0 | 287 | output.append(formatter.listitem(on=0)) |
paul@0 | 288 | |
paul@0 | 289 | output.append(formatter.number_list(on=0)) |
paul@0 | 290 | |
paul@1 | 291 | # Show paging navigation. |
paul@1 | 292 | |
paul@1 | 293 | if paging: |
paul@1 | 294 | output.append(formatPagingNavigation(request, formatter, pages, page_from)) |
paul@1 | 295 | |
paul@0 | 296 | return "".join(output) |
paul@0 | 297 | |
paul@1 | 298 | def formatPagingNavigation(request, formatter, pages, page_from=0): |
paul@1 | 299 | |
paul@1 | 300 | """ |
paul@1 | 301 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@1 | 302 | the paging navigation for the result 'pages', according to the 'page_from' |
paul@1 | 303 | indicator which provides the current position in the result set. |
paul@1 | 304 | """ |
paul@1 | 305 | |
paul@2 | 306 | page = formatter.page |
paul@2 | 307 | pagename = page.page_name |
paul@1 | 308 | _ = request.getText |
paul@1 | 309 | |
paul@1 | 310 | output = [] |
paul@1 | 311 | |
paul@1 | 312 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 313 | number_of_results = len(pages) |
paul@1 | 314 | |
paul@1 | 315 | pages_total = number_of_results / results_per_page |
paul@1 | 316 | pages_before = page_from / results_per_page |
paul@1 | 317 | pages_after = ((number_of_results - page_from) / results_per_page) - 1 |
paul@1 | 318 | |
paul@1 | 319 | querydict = wikiutil.parseQueryString(request.query_string) |
paul@1 | 320 | |
paul@1 | 321 | output.append(formatter.paragraph(on=1)) |
paul@1 | 322 | output.append(formatter.text(_("Result pages:"))) |
paul@1 | 323 | output.append(formatter.text(" ")) |
paul@1 | 324 | |
paul@1 | 325 | n = 0 |
paul@1 | 326 | while n < pages_before: |
paul@2 | 327 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 328 | output.append(formatter.text(str(n + 1))) |
paul@1 | 329 | output.append(formatter.pagelink(on=0)) |
paul@1 | 330 | output.append(formatter.text(" ")) |
paul@1 | 331 | n += 1 |
paul@1 | 332 | |
paul@1 | 333 | output.append(formatter.text(str(n + 1))) |
paul@1 | 334 | output.append(formatter.text(" ")) |
paul@1 | 335 | n += 1 |
paul@1 | 336 | |
paul@2 | 337 | while n <= pages_total: |
paul@2 | 338 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 339 | output.append(formatter.text(str(n + 1))) |
paul@1 | 340 | output.append(formatter.pagelink(on=0)) |
paul@1 | 341 | output.append(formatter.text(" ")) |
paul@1 | 342 | n += 1 |
paul@1 | 343 | |
paul@1 | 344 | output.append(formatter.paragraph(on=0)) |
paul@1 | 345 | |
paul@1 | 346 | return "".join(output) |
paul@1 | 347 | |
paul@1 | 348 | def getPagingQueryString(querydict, page_from): |
paul@1 | 349 | querydict["from"] = page_from |
paul@1 | 350 | return wikiutil.makeQueryString(querydict) |
paul@1 | 351 | |
paul@0 | 352 | def int_or_none(x): |
paul@0 | 353 | if x is None: |
paul@0 | 354 | return x |
paul@0 | 355 | else: |
paul@0 | 356 | return int(x) |
paul@0 | 357 | |
paul@0 | 358 | # vim: tabstop=4 expandtab shiftwidth=4 |