1 #!/usr/bin/env python 2 3 "A simple file indexer." 4 5 import codecs 6 7 class Dict: 8 def __init__(self, dict_location, encoding=None): 9 self.dict_location = dict_location 10 self.encoding = encoding 11 12 # Initialisation. 13 14 self.index = self.get_index() 15 16 def get_index(self): 17 18 "Return a dictionary containing an index structure for the dict." 19 20 if self.encoding is None: 21 f = open(self.dict_location) 22 else: 23 f = codecs.open(self.dict_location, encoding=self.encoding) 24 s = f.read() 25 f.close() 26 27 tokens = s.split() 28 index = {} 29 30 for token in tokens: 31 slot = index 32 for c in token: 33 if not slot.has_key(c): 34 slot[c] = {}, [] 35 slot, words = slot[c] 36 37 if token not in words: 38 words.append(token) 39 40 return index 41 42 def find(self, pattern): 43 44 "Find words beginning with the given 'pattern'." 45 46 slot = self.index 47 words = [] 48 49 for c in pattern: 50 if not slot.has_key(c): 51 return [] 52 slot, words = slot[c] 53 54 results = [] 55 results += words 56 results += self.get_all_words(slot) 57 return results 58 59 def get_all_words(self, slot): 60 61 "Get all words under the given index 'slot'." 62 63 all_words = [] 64 keys = slot.keys() 65 keys.sort() 66 for c in keys: 67 this_slot, words = slot[c] 68 all_words += words 69 all_words += self.get_all_words(this_slot) 70 return all_words 71 72 # vim: tabstop=4 expandtab shiftwidth=4