# HG changeset patch # User paulb # Date 1127844309 0 # Node ID bcf32e883b5e40a5167c03d1ff5c44ecbf75a770 # Parent 73be19436b6c765c148301596de78c280dd59170 [project @ 2005-09-27 18:05:09 by paulb] Added channel closure after every window has been processed. Changed the Indexer and Searcher to record where words were found. diff -r 73be19436b6c -r bcf32e883b5e tests/Dict.py --- a/tests/Dict.py Tue Sep 27 18:04:03 2005 +0000 +++ b/tests/Dict.py Tue Sep 27 18:05:09 2005 +0000 @@ -43,14 +43,19 @@ "Add the given word 'entry' (token, dict_location) to the index." token, dict_location = entry + + if not token: + return + slot = self.index for c in token: if not slot.has_key(c): - slot[c] = {}, [] + slot[c] = {}, {} slot, words = slot[c] - if token not in words: - words.append(token) + if not words.has_key(token): + words[token] = [] + words[token].append(dict_location) class Searcher: def __init__(self, index): @@ -68,22 +73,22 @@ return [] slot, words = slot[c] - results = [] - results += words - results += self.get_all_words(slot) + results = {} + results.update(words) + results.update(self.get_all_words(slot)) return results def get_all_words(self, slot): "Get all words under the given index 'slot'." - all_words = [] + all_words = {} keys = slot.keys() keys.sort() for c in keys: this_slot, words = slot[c] - all_words += words - all_words += self.get_all_words(this_slot) + all_words.update(words) + all_words.update(self.get_all_words(this_slot)) return all_words # vim: tabstop=4 expandtab shiftwidth=4 diff -r 73be19436b6c -r bcf32e883b5e tests/start_indexer.py --- a/tests/start_indexer.py Tue Sep 27 18:04:03 2005 +0000 +++ b/tests/start_indexer.py Tue Sep 27 18:05:09 2005 +0000 @@ -27,17 +27,33 @@ entry = channel.receive() master_index.add_entry(entry) + # Tidy up as we go. + + for channel in channels: + channel.close() + # Provide a search interface. return Searcher(master_index.get_index()) if __name__ == "__main__": import sys, os + + # Get the parameters. + window_size, directory = int(sys.argv[1]), sys.argv[2] + + # Build a list of filenames. + filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)] filenames = [filename for filename in filenames if os.path.isfile(filename)] + # Get a searcher using an index built in parallel. + searcher = get_searcher(filenames, window_size) + + # Present a user interface. + while 1: print "Pattern:", pattern = raw_input()