pprocess

tests/start_indexer.py

34:db417142f29d
2005-09-29 paulb [project @ 2005-09-29 16:21:30 by paulb] Introduced a window filling mechanism so that the exchange always has an optimal number of channels to monitor. Removed the waitall calls since the exchange should now tidy up created processes.
     1 #!/usr/bin/env python     2      3 from parallel import start, Exchange     4 from Dict import Indexer, Searcher, Parser     5 import os     6      7 def apply_parser(channel, filename):     8     print "Parsing", filename     9     parser = Parser(filename)    10     parser.send_entries(channel)    11     print "Done", filename    12     13 def fill_window(filenames, i, window_size, exchange):    14     limit = min(len(filenames), i + window_size - len(exchange.active()))    15     while i < limit:    16         channel = start(apply_parser, filenames[i])    17         exchange.add(channel)    18         i = i + 1    19     return i    20     21 def get_searcher(filenames, window_size):    22     master_index = Indexer()    23     24     # Start indexing by filling a window with channels.    25     26     exchange = Exchange()    27     i = fill_window(filenames, 0, window_size, exchange)    28     29     # Start listening for responses.    30     31     while exchange.active():    32         print "Waiting for %d channels..." % len(exchange.active())    33         for channel in exchange.ready():    34             entry = channel.receive()    35             master_index.add_entry(entry)    36     37         i = fill_window(filenames, i, window_size, exchange)    38     39     # Provide a search interface.    40     41     return Searcher(master_index.get_index())    42     43 if __name__ == "__main__":    44     import sys, os    45     46     # Get the parameters.    47     48     window_size, directory = int(sys.argv[1]), sys.argv[2]    49     50     # Build a list of filenames.    51     52     filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)]    53     filenames = [filename for filename in filenames if os.path.isfile(filename)]    54     55     # Get a searcher using an index built in parallel.    56     57     searcher = get_searcher(filenames, window_size)    58     59     # Present a user interface.    60     61     if "--noprompt" not in sys.argv:    62         while 1:    63             print "Pattern:",    64             pattern = raw_input()    65             print searcher.find(pattern)    66     67 # vim: tabstop=4 expandtab shiftwidth=4