ImprovedMoinSearch (annotate ImprovedMoinSearch.py in a19b22ba78ce)

ImprovedMoinSearch

Annotated ImprovedMoinSearch.py

6:a19b22ba78ce

2010-10-03

Paul Boddie

Introduced specific linking of formatted data.

paul@0	1	# -- coding: iso-8859-1 --
paul@0	2	"""
paul@0	3	MoinMoin - ImprovedMoinSearch library
paul@0	4
paul@0	5	@copyright: 2010 Paul Boddie <paul@boddie.org.uk>
paul@0	6	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@0	7	"""
paul@0	8
paul@0	9	from MoinMoin.search import searchPages
paul@0	10	from MoinMoin.Page import Page
paul@1	11	from MoinMoin import wikiutil
paul@0	12	import re
paul@0	13
paul@0	14	heading_regexp = re.compile(r"^(?P<level>=+)(?P<heading>.*?)(?P=level)$", re.UNICODE \| re.MULTILINE)
paul@3	15	paragraph_regexp = re.compile(r"(?P<paragraph>(?:^[^#=\s].*$\n)+)", re.UNICODE \| re.MULTILINE)
paul@3	16
paul@3	17	def range_groups(min_name, max_name):
paul@5	18	return r"(?P<%s>-?\d+)?(?:\s-\s(?P<%s>-?\d+))?" % (min_name, max_name)
paul@3	19
paul@6	20	format_options_regexp = re.compile(
paul@6	21	r"(?P<link>\[)?"
paul@6	22	r"("
paul@6	23	r"(?P<heading>(heading\|title\|h)\s*" + range_groups("min_heading", "max_heading") + ")"
paul@6	24	r"\|(?P<paragraph>(paragraph\|para\|p)\s*(?P<paragraph_number>\d+)?)"
paul@6	25	r"\|(?P<name>(name\|page)\s*" + range_groups("first", "last") + ")"
paul@6	26	r")"
paul@6	27	r"(?(link)\]\|)",
paul@6	28	re.UNICODE)
paul@0	29
paul@5	30	def convert_index(i, length):
paul@5	31
paul@5	32	"""
paul@5	33	Convert from a 1-based indexing scheme to a 0-based scheme for the given
paul@5	34	index 'i' in a sequence having the given 'length'.
paul@5	35	"""
paul@5	36
paul@5	37	if i is None:
paul@5	38	return i
paul@5	39	elif i > 0:
paul@5	40	return i - 1
paul@5	41	elif i < 0:
paul@5	42	return length + i
paul@5	43	else:
paul@5	44	return i
paul@5	45
paul@0	46	def getSearchResultPages(request, query, **kw):
paul@0	47
paul@0	48	"""
paul@0	49	Return matching pages using the given 'request' and search 'query'. Optional
paul@0	50	keyword arguments are passed to the underlying search infrastructure.
paul@0	51	"""
paul@0	52
paul@0	53	results = searchPages(request, query, **kw)
paul@0	54	return results.hits
paul@0	55
paul@4	56	def getFirstPageHeading(request, page, start=0, min_level=None, max_level=None):
paul@0	57
paul@0	58	"""
paul@0	59	Using the given 'request', return the first heading in the given 'page'
paul@4	60	from the given 'start' point (optional, defaulting to the start of the page)
paul@3	61	having a heading level of at least 'min_level' (which is undefined if not
paul@3	62	specified) and at most 'max_level' (which is undefined if not specified).
paul@4	63
paul@4	64	A tuple containing the heading and the span (the start offset and the end
paul@4	65	offset as a tuple) is returned for a successful retrieval. Otherwise, None
paul@4	66	is returned.
paul@0	67	"""
paul@0	68
paul@0	69	full_page = Page(request, page.page_name)
paul@0	70	body = full_page.get_raw_body()
paul@4	71	if start != 0:
paul@4	72	body = body[start:]
paul@0	73
paul@0	74	for match in heading_regexp.finditer(body):
paul@0	75	level = len(match.group("level"))
paul@0	76
paul@0	77	if (min_level is None or level >= min_level) and \
paul@0	78	(max_level is None or level <= max_level):
paul@0	79
paul@4	80	return match.group("heading"), match.span()
paul@0	81
paul@0	82	return None
paul@0	83
paul@4	84	def getParagraph(request, page, start=0, number=None):
paul@3	85
paul@5	86	"""
paul@5	87	Using the given 'request', return from the given 'page', starting from the
paul@5	88	optional 'start' offset (or the beginning, if no such offset is specified),
paul@5	89	the first paragraph or, if the optional 'number' is given, the paragraph
paul@5	90	whose position corresponds to that number, with a number of 1 being the
paul@5	91	first paragraph found, 2 being the second, and so on.
paul@5	92	"""
paul@5	93
paul@3	94	full_page = Page(request, page.page_name)
paul@3	95	body = full_page.get_raw_body()
paul@4	96	if start != 0:
paul@4	97	body = body[start:]
paul@3	98
paul@3	99	for i, match in enumerate(paragraph_regexp.finditer(body)):
paul@4	100	if number is None or i == max(0, number - 1):
paul@4	101	return match.group("paragraph"), match.span()
paul@3	102
paul@3	103	return None
paul@3	104
paul@5	105	def getPageName(request, page, start=0, first=None, last=None):
paul@5	106
paul@5	107	"""
paul@5	108	Using the given 'request', return the name of the given 'page'. The optional
paul@5	109	'start' offset refers to the body of the page and is returned as the start
paul@5	110	and end of the result span if specified.
paul@5	111
paul@5	112	If the optional 'first' or 'last' parameters are specified, only the
paul@5	113	specified span of parts extracted from the page name will be returned, where
paul@5	114	the parts of the name are obtained by splitting the full name where the
paul@5	115	slash ("/") character is found. The first part has an index of 1, and the
paul@5	116	last part can be referred to using an index of -1.
paul@5	117	"""
paul@5	118
paul@5	119	parts = page.page_name.split("/")
paul@5	120
paul@5	121	first = convert_index(first, len(parts))
paul@5	122	last = convert_index(last, len(parts))
paul@5	123
paul@5	124	if first is None:
paul@5	125	if last is None:
paul@5	126	pass
paul@5	127	else:
paul@5	128	parts = parts[:last+1]
paul@5	129	else:
paul@5	130	if last is None:
paul@5	131	parts = parts[first:]
paul@5	132	else:
paul@5	133	parts = parts[first:last+1]
paul@5	134
paul@5	135	return "/".join(parts), (start, start)
paul@3	136
paul@1	137	def formatResultPages(request, formatter, pages, paging, format, page_from=0):
paul@0	138
paul@0	139	"""
paul@0	140	Using the given 'request' and 'formatter', return a formatted string showing
paul@0	141	the result 'pages', providing paging controls when 'paging' is set to a true
paul@0	142	value, and providing page details according to the given 'format'.
paul@1	143
paul@1	144	If the optional 'pages_from' parameter is set, the result pages from the
paul@1	145	given result (specified within a range from 0 to the length of the 'pages'
paul@1	146	collection) will be shown.
paul@0	147	"""
paul@0	148
paul@3	149	actions = []
paul@1	150
paul@3	151	if format:
paul@3	152	for match in format_options_regexp.finditer(format):
paul@6	153	as_link = match.group("link")
paul@3	154	if match.group("heading"):
paul@6	155	actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading"))), as_link))
paul@3	156	elif match.group("paragraph"):
paul@6	157	actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),)), as_link))
paul@5	158	elif match.group("name"):
paul@6	159	actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last"))), as_link))
paul@0	160	else:
paul@6	161	actions.append((getPageName, (), True))
paul@0	162
paul@1	163	# Use paging only when there are enough results.
paul@1	164
paul@1	165	results_per_page = request.cfg.search_results_per_page
paul@1	166	paging = paging and len(pages) > results_per_page
paul@1	167
paul@1	168	if paging:
paul@1	169	pages_to_show = pages[page_from:page_from + results_per_page]
paul@1	170	else:
paul@1	171	pages_to_show = pages
paul@1	172
paul@1	173	# Prepare the output.
paul@1	174
paul@0	175	output = []
paul@2	176	output.append(formatter.number_list(on=1, start=page_from + 1))
paul@0	177
paul@1	178	for page in pages_to_show:
paul@0	179	output.append(formatter.listitem(on=1))
paul@0	180
paul@4	181	start = 0
paul@3	182	first = 1
paul@6	183	for action, args, as_link in actions:
paul@4	184	result = action(request, page, start, *args)
paul@4	185
paul@4	186	if result is not None:
paul@6	187	if not first:
paul@6	188	output.append(" ")
paul@6	189	if as_link:
paul@4	190	output.append(formatter.pagelink(on=1, pagename=page.page_name))
paul@0	191
paul@4	192	text, span = result
paul@4	193	output.append(formatter.text(text))
paul@4	194
paul@4	195	# Position the search for the next action.
paul@3	196
paul@4	197	_start, _end = span
paul@4	198	start = _end + 1
paul@4	199
paul@6	200	if as_link:
paul@4	201	output.append(formatter.pagelink(on=0))
paul@4	202
paul@3	203	first = 0
paul@3	204
paul@0	205	output.append(formatter.listitem(on=0))
paul@0	206
paul@0	207	output.append(formatter.number_list(on=0))
paul@0	208
paul@1	209	# Show paging navigation.
paul@1	210
paul@1	211	if paging:
paul@1	212	output.append(formatPagingNavigation(request, formatter, pages, page_from))
paul@1	213
paul@0	214	return "".join(output)
paul@0	215
paul@1	216	def formatPagingNavigation(request, formatter, pages, page_from=0):
paul@1	217
paul@1	218	"""
paul@1	219	Using the given 'request' and 'formatter', return a formatted string showing
paul@1	220	the paging navigation for the result 'pages', according to the 'page_from'
paul@1	221	indicator which provides the current position in the result set.
paul@1	222	"""
paul@1	223
paul@2	224	page = formatter.page
paul@2	225	pagename = page.page_name
paul@1	226	_ = request.getText
paul@1	227
paul@1	228	output = []
paul@1	229
paul@1	230	results_per_page = request.cfg.search_results_per_page
paul@1	231	number_of_results = len(pages)
paul@1	232
paul@1	233	pages_total = number_of_results / results_per_page
paul@1	234	pages_before = page_from / results_per_page
paul@1	235	pages_after = ((number_of_results - page_from) / results_per_page) - 1
paul@1	236
paul@1	237	querydict = wikiutil.parseQueryString(request.query_string)
paul@1	238
paul@1	239	output.append(formatter.paragraph(on=1))
paul@1	240	output.append(formatter.text(_("Result pages:")))
paul@1	241	output.append(formatter.text(" "))
paul@1	242
paul@1	243	n = 0
paul@1	244	while n < pages_before:
paul@2	245	output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page)))
paul@1	246	output.append(formatter.text(str(n + 1)))
paul@1	247	output.append(formatter.pagelink(on=0))
paul@1	248	output.append(formatter.text(" "))
paul@1	249	n += 1
paul@1	250
paul@1	251	output.append(formatter.text(str(n + 1)))
paul@1	252	output.append(formatter.text(" "))
paul@1	253	n += 1
paul@1	254
paul@2	255	while n <= pages_total:
paul@2	256	output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page)))
paul@1	257	output.append(formatter.text(str(n + 1)))
paul@1	258	output.append(formatter.pagelink(on=0))
paul@1	259	output.append(formatter.text(" "))
paul@1	260	n += 1
paul@1	261
paul@1	262	output.append(formatter.paragraph(on=0))
paul@1	263
paul@1	264	return "".join(output)
paul@1	265
paul@1	266	def getPagingQueryString(querydict, page_from):
paul@1	267	querydict["from"] = page_from
paul@1	268	return wikiutil.makeQueryString(querydict)
paul@1	269
paul@0	270	def int_or_none(x):
paul@0	271	if x is None:
paul@0	272	return x
paul@0	273	else:
paul@0	274	return int(x)
paul@0	275
paul@0	276	# vim: tabstop=4 expandtab shiftwidth=4