1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/tests/Dict.py Mon Sep 26 21:59:16 2005 +0000
1.3 @@ -0,0 +1,91 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"A simple file indexer."
1.7 +
1.8 +import codecs
1.9 +
1.10 +class Indexer:
1.11 + def __init__(self, dict_location, encoding=None):
1.12 + self.dict_location = dict_location
1.13 + self.encoding = encoding
1.14 +
1.15 + # Initialisation.
1.16 +
1.17 + self.index = self.make_index()
1.18 +
1.19 + def get_index(self):
1.20 + return self.index
1.21 +
1.22 + def make_index(self):
1.23 +
1.24 + "Return a dictionary containing an index structure for the dict."
1.25 +
1.26 + if self.encoding is None:
1.27 + f = open(self.dict_location)
1.28 + else:
1.29 + f = codecs.open(self.dict_location, encoding=self.encoding)
1.30 + s = f.read()
1.31 + f.close()
1.32 +
1.33 + tokens = s.split()
1.34 + index = {}
1.35 +
1.36 + for token in tokens:
1.37 + slot = index
1.38 + for c in token:
1.39 + if not slot.has_key(c):
1.40 + slot[c] = {}, []
1.41 + slot, words = slot[c]
1.42 +
1.43 + if token not in words:
1.44 + words.append(token)
1.45 +
1.46 + return index
1.47 +
1.48 +class Searcher:
1.49 + def __init__(self, index):
1.50 + self.index = index
1.51 +
1.52 + def find(self, pattern):
1.53 +
1.54 + "Find words beginning with the given 'pattern'."
1.55 +
1.56 + slot = self.index
1.57 + words = []
1.58 +
1.59 + for c in pattern:
1.60 + if not slot.has_key(c):
1.61 + return []
1.62 + slot, words = slot[c]
1.63 +
1.64 + results = []
1.65 + results += words
1.66 + results += self.get_all_words(slot)
1.67 + return results
1.68 +
1.69 + def get_all_words(self, slot):
1.70 +
1.71 + "Get all words under the given index 'slot'."
1.72 +
1.73 + all_words = []
1.74 + keys = slot.keys()
1.75 + keys.sort()
1.76 + for c in keys:
1.77 + this_slot, words = slot[c]
1.78 + all_words += words
1.79 + all_words += self.get_all_words(this_slot)
1.80 + return all_words
1.81 +
1.82 +def update(index1, index2):
1.83 + for key in index2.keys():
1.84 + if not index1.has_key(key):
1.85 + index1[key] = index2[key]
1.86 + else:
1.87 + slot1, words1 = index1[key]
1.88 + slot2, words2 = index2[key]
1.89 + for word in words2:
1.90 + if not word in words1:
1.91 + words1.append(word)
1.92 + update(slot1, slot2)
1.93 +
1.94 +# vim: tabstop=4 expandtab shiftwidth=4
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/tests/start_indexer.py Mon Sep 26 21:59:16 2005 +0000
2.3 @@ -0,0 +1,43 @@
2.4 +#!/usr/bin/env python
2.5 +
2.6 +from parallel import start, Exchange
2.7 +from Dict import Indexer, Searcher, update
2.8 +
2.9 +def apply_indexer(channel, filename):
2.10 + print "Indexing", filename
2.11 + indexer = Indexer(filename)
2.12 + channel.send(indexer.get_index())
2.13 +
2.14 +def get_searcher(filenames):
2.15 + master_index = {}
2.16 +
2.17 + # Start indexing.
2.18 +
2.19 + channels = []
2.20 + for filename in filenames:
2.21 + channels.append(start(apply_indexer, filename))
2.22 +
2.23 + # Start listening for responses.
2.24 +
2.25 + exchange = Exchange(channels)
2.26 + while len(channels) > 0:
2.27 + print "Waiting for %d channels..." % len(channels)
2.28 + for channel in exchange.ready():
2.29 + index = channel.receive()
2.30 + update(master_index, index)
2.31 + channels.remove(channel)
2.32 +
2.33 + # Provide a search interface.
2.34 +
2.35 + return Searcher(master_index)
2.36 +
2.37 +if __name__ == "__main__":
2.38 + import sys
2.39 + filenames = sys.argv[1:]
2.40 + searcher = get_searcher(filenames)
2.41 + while 1:
2.42 + print "Pattern:",
2.43 + pattern = raw_input()
2.44 + print searcher.find(pattern)
2.45 +
2.46 +# vim: tabstop=4 expandtab shiftwidth=4