1.1 --- a/tests/Dict.py Tue Sep 27 17:26:44 2005 +0000
1.2 +++ b/tests/Dict.py Tue Sep 27 17:28:25 2005 +0000
1.3 @@ -4,43 +4,53 @@
1.4
1.5 import codecs
1.6
1.7 -class Indexer:
1.8 +class Parser:
1.9 def __init__(self, dict_location, encoding=None):
1.10 self.dict_location = dict_location
1.11 self.encoding = encoding
1.12
1.13 - # Initialisation.
1.14 -
1.15 - self.index = self.make_index()
1.16 -
1.17 - def get_index(self):
1.18 - return self.index
1.19 -
1.20 - def make_index(self):
1.21 -
1.22 - "Return a dictionary containing an index structure for the dict."
1.23 -
1.24 + def _get_file_content(self):
1.25 if self.encoding is None:
1.26 f = open(self.dict_location)
1.27 else:
1.28 f = codecs.open(self.dict_location, encoding=self.encoding)
1.29 s = f.read()
1.30 f.close()
1.31 + return s
1.32
1.33 - tokens = s.split()
1.34 + def send_entries(self, channel):
1.35 +
1.36 + "Send word entries from the file."
1.37 +
1.38 + tokens = self._get_file_content().split()
1.39 index = {}
1.40
1.41 + words = []
1.42 for token in tokens:
1.43 - slot = index
1.44 - for c in token:
1.45 - if not slot.has_key(c):
1.46 - slot[c] = {}, []
1.47 - slot, words = slot[c]
1.48 -
1.49 if token not in words:
1.50 + channel.send((token, self.dict_location))
1.51 words.append(token)
1.52
1.53 - return index
1.54 +class Indexer:
1.55 + def __init__(self):
1.56 + self.index = {}
1.57 +
1.58 + def get_index(self):
1.59 + return self.index
1.60 +
1.61 + def add_entry(self, entry):
1.62 +
1.63 + "Add the given word 'entry' (token, dict_location) to the index."
1.64 +
1.65 + token, dict_location = entry
1.66 + slot = self.index
1.67 + for c in token:
1.68 + if not slot.has_key(c):
1.69 + slot[c] = {}, []
1.70 + slot, words = slot[c]
1.71 +
1.72 + if token not in words:
1.73 + words.append(token)
1.74
1.75 class Searcher:
1.76 def __init__(self, index):
1.77 @@ -76,16 +86,4 @@
1.78 all_words += self.get_all_words(this_slot)
1.79 return all_words
1.80
1.81 -def update(index1, index2):
1.82 - for key in index2.keys():
1.83 - if not index1.has_key(key):
1.84 - index1[key] = index2[key]
1.85 - else:
1.86 - slot1, words1 = index1[key]
1.87 - slot2, words2 = index2[key]
1.88 - for word in words2:
1.89 - if not word in words1:
1.90 - words1.append(word)
1.91 - update(slot1, slot2)
1.92 -
1.93 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/tests/start_indexer.py Tue Sep 27 17:26:44 2005 +0000
2.2 +++ b/tests/start_indexer.py Tue Sep 27 17:28:25 2005 +0000
2.3 @@ -1,22 +1,22 @@
2.4 #!/usr/bin/env python
2.5
2.6 from parallel import start, Exchange
2.7 -from Dict import Indexer, Searcher, update
2.8 +from Dict import Indexer, Searcher, Parser
2.9
2.10 -def apply_indexer(channel, filename):
2.11 - print "Indexing", filename
2.12 - indexer = Indexer(filename)
2.13 - channel.send(indexer.get_index())
2.14 +def apply_parser(channel, filename):
2.15 + print "Parsing", filename
2.16 + parser = Parser(filename)
2.17 + parser.send_entries(channel)
2.18
2.19 def get_searcher(filenames, window_size):
2.20 - master_index = {}
2.21 + master_index = Indexer()
2.22
2.23 # Start indexing.
2.24
2.25 for i in range(0, len(filenames), window_size):
2.26 channels = []
2.27 for filename in filenames[i:i + window_size]:
2.28 - channels.append(start(apply_indexer, filename))
2.29 + channels.append(start(apply_parser, filename))
2.30
2.31 # Start listening for responses.
2.32
2.33 @@ -24,19 +24,18 @@
2.34 while exchange.active():
2.35 print "Waiting for %d channels..." % exchange.active()
2.36 for channel in exchange.ready():
2.37 - index = channel.receive()
2.38 - update(master_index, index)
2.39 - exchange.remove(channel)
2.40 - channel.close()
2.41 + entry = channel.receive()
2.42 + master_index.add_entry(entry)
2.43
2.44 # Provide a search interface.
2.45
2.46 - return Searcher(master_index)
2.47 + return Searcher(master_index.get_index())
2.48
2.49 if __name__ == "__main__":
2.50 import sys, os
2.51 window_size, directory = int(sys.argv[1]), sys.argv[2]
2.52 filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)]
2.53 + filenames = [filename for filename in filenames if os.path.isfile(filename)]
2.54
2.55 searcher = get_searcher(filenames, window_size)
2.56 while 1: