1.1 --- a/tests/Dict.py Tue Sep 27 18:04:03 2005 +0000
1.2 +++ b/tests/Dict.py Tue Sep 27 18:05:09 2005 +0000
1.3 @@ -43,14 +43,19 @@
1.4 "Add the given word 'entry' (token, dict_location) to the index."
1.5
1.6 token, dict_location = entry
1.7 +
1.8 + if not token:
1.9 + return
1.10 +
1.11 slot = self.index
1.12 for c in token:
1.13 if not slot.has_key(c):
1.14 - slot[c] = {}, []
1.15 + slot[c] = {}, {}
1.16 slot, words = slot[c]
1.17
1.18 - if token not in words:
1.19 - words.append(token)
1.20 + if not words.has_key(token):
1.21 + words[token] = []
1.22 + words[token].append(dict_location)
1.23
1.24 class Searcher:
1.25 def __init__(self, index):
1.26 @@ -68,22 +73,22 @@
1.27 return []
1.28 slot, words = slot[c]
1.29
1.30 - results = []
1.31 - results += words
1.32 - results += self.get_all_words(slot)
1.33 + results = {}
1.34 + results.update(words)
1.35 + results.update(self.get_all_words(slot))
1.36 return results
1.37
1.38 def get_all_words(self, slot):
1.39
1.40 "Get all words under the given index 'slot'."
1.41
1.42 - all_words = []
1.43 + all_words = {}
1.44 keys = slot.keys()
1.45 keys.sort()
1.46 for c in keys:
1.47 this_slot, words = slot[c]
1.48 - all_words += words
1.49 - all_words += self.get_all_words(this_slot)
1.50 + all_words.update(words)
1.51 + all_words.update(self.get_all_words(this_slot))
1.52 return all_words
1.53
1.54 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/tests/start_indexer.py Tue Sep 27 18:04:03 2005 +0000
2.2 +++ b/tests/start_indexer.py Tue Sep 27 18:05:09 2005 +0000
2.3 @@ -27,17 +27,33 @@
2.4 entry = channel.receive()
2.5 master_index.add_entry(entry)
2.6
2.7 + # Tidy up as we go.
2.8 +
2.9 + for channel in channels:
2.10 + channel.close()
2.11 +
2.12 # Provide a search interface.
2.13
2.14 return Searcher(master_index.get_index())
2.15
2.16 if __name__ == "__main__":
2.17 import sys, os
2.18 +
2.19 + # Get the parameters.
2.20 +
2.21 window_size, directory = int(sys.argv[1]), sys.argv[2]
2.22 +
2.23 + # Build a list of filenames.
2.24 +
2.25 filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)]
2.26 filenames = [filename for filename in filenames if os.path.isfile(filename)]
2.27
2.28 + # Get a searcher using an index built in parallel.
2.29 +
2.30 searcher = get_searcher(filenames, window_size)
2.31 +
2.32 + # Present a user interface.
2.33 +
2.34 while 1:
2.35 print "Pattern:",
2.36 pattern = raw_input()