pprocess

tests/Dict.py

23:82154960021b
2005-09-28 paulb [project @ 2005-09-28 17:09:07 by paulb] Added copyright and licence information.
     1 #!/usr/bin/env python     2      3 "A simple file indexer."     4      5 import codecs     6      7 class Parser:     8     def __init__(self, dict_location, encoding=None):     9         self.dict_location = dict_location    10         self.encoding = encoding    11     12     def _get_file_content(self):    13         if self.encoding is None:    14             f = open(self.dict_location)    15         else:    16             f = codecs.open(self.dict_location, encoding=self.encoding)    17         s = f.read()    18         f.close()    19         return s    20     21     def send_entries(self, channel):    22     23         "Send word entries from the file."    24     25         tokens = self._get_file_content().split()    26         index = {}    27     28         words = []    29         for token in tokens:    30             token = self._strip(token)    31             if token not in words:    32                 channel.send((token, self.dict_location))    33                 words.append(token)    34     35     def _strip(self, token):    36     37         "Return the token stripped of non-alphanumeric symbols at each end."    38     39         characters = []    40         in_alphanum = 0    41         for c in token:    42             if not c.isalpha() and not c.isdigit():    43                 if in_alphanum:    44                     break    45             else:    46                 in_alphanum = 1    47                 characters.append(c)    48         return "".join(characters)    49     50 class Indexer:    51     def __init__(self):    52         self.index = {}    53     54     def get_index(self):    55         return self.index    56     57     def add_entry(self, entry):    58     59         "Add the given word 'entry' (token, dict_location) to the index."    60     61         token, dict_location = entry    62     63         if not token:    64             return    65     66         slot = self.index    67         for c in token:    68             if not slot.has_key(c):    69                 slot[c] = {}, {}    70             slot, words = slot[c]    71     72         if not words.has_key(token):    73             words[token] = []    74         words[token].append(dict_location)    75     76 class Searcher:    77     def __init__(self, index):    78         self.index = index    79     80     def find(self, pattern):    81     82         "Find words beginning with the given 'pattern'."    83     84         slot = self.index    85         words = []    86     87         for c in pattern:    88             if not slot.has_key(c):    89                 return []    90             slot, words = slot[c]    91     92         results = {}    93         results.update(words)    94         results.update(self.get_all_words(slot))    95         return results    96     97     def get_all_words(self, slot):    98     99         "Get all words under the given index 'slot'."   100    101         all_words = {}   102         keys = slot.keys()   103         keys.sort()   104         for c in keys:   105             this_slot, words = slot[c]   106             all_words.update(words)   107             all_words.update(self.get_all_words(this_slot))   108         return all_words   109    110 # vim: tabstop=4 expandtab shiftwidth=4