1 #!/usr/bin/env python 2 3 from parallel import start, Exchange 4 from Dict import Indexer, Searcher, Parser 5 import os 6 7 def apply_parser(channel, filename): 8 print "Parsing", filename 9 parser = Parser(filename) 10 parser.send_entries(channel) 11 print "Done", filename 12 13 def fill_window(filenames, i, window_size, exchange): 14 limit = min(len(filenames), i + window_size - len(exchange.active())) 15 while i < limit: 16 channel = start(apply_parser, filenames[i]) 17 exchange.add(channel) 18 i = i + 1 19 return i 20 21 def get_searcher(filenames, window_size): 22 master_index = Indexer() 23 24 # Start indexing by filling a window with channels. 25 26 exchange = Exchange() 27 i = fill_window(filenames, 0, window_size, exchange) 28 29 # Start listening for responses. 30 31 while exchange.active(): 32 print "Waiting for %d channels..." % len(exchange.active()) 33 for channel in exchange.ready(): 34 entry = channel.receive() 35 master_index.add_entry(entry) 36 37 i = fill_window(filenames, i, window_size, exchange) 38 39 # Provide a search interface. 40 41 return Searcher(master_index.get_index()) 42 43 if __name__ == "__main__": 44 import sys, os 45 46 # Get the parameters. 47 48 window_size, directory = int(sys.argv[1]), sys.argv[2] 49 50 # Build a list of filenames. 51 52 filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)] 53 filenames = [filename for filename in filenames if os.path.isfile(filename)] 54 55 # Get a searcher using an index built in parallel. 56 57 searcher = get_searcher(filenames, window_size) 58 59 # Present a user interface. 60 61 if "--noprompt" not in sys.argv: 62 while 1: 63 print "Pattern:", 64 pattern = raw_input() 65 print searcher.find(pattern) 66 67 # vim: tabstop=4 expandtab shiftwidth=4