1 #!/usr/bin/env python 2 3 "A simple file indexer." 4 5 import codecs 6 7 class Indexer: 8 def __init__(self, dict_location, encoding=None): 9 self.dict_location = dict_location 10 self.encoding = encoding 11 12 # Initialisation. 13 14 self.index = self.make_index() 15 16 def get_index(self): 17 return self.index 18 19 def make_index(self): 20 21 "Return a dictionary containing an index structure for the dict." 22 23 if self.encoding is None: 24 f = open(self.dict_location) 25 else: 26 f = codecs.open(self.dict_location, encoding=self.encoding) 27 s = f.read() 28 f.close() 29 30 tokens = s.split() 31 index = {} 32 33 for token in tokens: 34 slot = index 35 for c in token: 36 if not slot.has_key(c): 37 slot[c] = {}, [] 38 slot, words = slot[c] 39 40 if token not in words: 41 words.append(token) 42 43 return index 44 45 class Searcher: 46 def __init__(self, index): 47 self.index = index 48 49 def find(self, pattern): 50 51 "Find words beginning with the given 'pattern'." 52 53 slot = self.index 54 words = [] 55 56 for c in pattern: 57 if not slot.has_key(c): 58 return [] 59 slot, words = slot[c] 60 61 results = [] 62 results += words 63 results += self.get_all_words(slot) 64 return results 65 66 def get_all_words(self, slot): 67 68 "Get all words under the given index 'slot'." 69 70 all_words = [] 71 keys = slot.keys() 72 keys.sort() 73 for c in keys: 74 this_slot, words = slot[c] 75 all_words += words 76 all_words += self.get_all_words(this_slot) 77 return all_words 78 79 def update(index1, index2): 80 for key in index2.keys(): 81 if not index1.has_key(key): 82 index1[key] = index2[key] 83 else: 84 slot1, words1 = index1[key] 85 slot2, words2 = index2[key] 86 for word in words2: 87 if not word in words1: 88 words1.append(word) 89 update(slot1, slot2) 90 91 # vim: tabstop=4 expandtab shiftwidth=4