1 #!/usr/bin/env python 2 3 "A simple file indexer." 4 5 import codecs 6 7 class Parser: 8 def __init__(self, dict_location, encoding=None): 9 self.dict_location = dict_location 10 self.encoding = encoding 11 12 def _get_file_content(self): 13 if self.encoding is None: 14 f = open(self.dict_location) 15 else: 16 f = codecs.open(self.dict_location, encoding=self.encoding) 17 s = f.read() 18 f.close() 19 return s 20 21 def send_entries(self, channel): 22 23 "Send word entries from the file." 24 25 tokens = self._get_file_content().split() 26 index = {} 27 28 words = [] 29 for token in tokens: 30 if token not in words: 31 channel.send((token, self.dict_location)) 32 words.append(token) 33 34 class Indexer: 35 def __init__(self): 36 self.index = {} 37 38 def get_index(self): 39 return self.index 40 41 def add_entry(self, entry): 42 43 "Add the given word 'entry' (token, dict_location) to the index." 44 45 token, dict_location = entry 46 slot = self.index 47 for c in token: 48 if not slot.has_key(c): 49 slot[c] = {}, [] 50 slot, words = slot[c] 51 52 if token not in words: 53 words.append(token) 54 55 class Searcher: 56 def __init__(self, index): 57 self.index = index 58 59 def find(self, pattern): 60 61 "Find words beginning with the given 'pattern'." 62 63 slot = self.index 64 words = [] 65 66 for c in pattern: 67 if not slot.has_key(c): 68 return [] 69 slot, words = slot[c] 70 71 results = [] 72 results += words 73 results += self.get_all_words(slot) 74 return results 75 76 def get_all_words(self, slot): 77 78 "Get all words under the given index 'slot'." 79 80 all_words = [] 81 keys = slot.keys() 82 keys.sort() 83 for c in keys: 84 this_slot, words = slot[c] 85 all_words += words 86 all_words += self.get_all_words(this_slot) 87 return all_words 88 89 # vim: tabstop=4 expandtab shiftwidth=4