1 #!/usr/bin/env python 2 3 from parallel import start, Exchange 4 from Dict import Indexer, Searcher, Parser 5 import os 6 7 def apply_parser(channel, filename): 8 #print "Parsing", filename 9 parser = Parser(filename) 10 parser.send_entries(channel) 11 #print "Done", filename 12 13 def fill_window(filenames, i, window_size, exchange): 14 limit = min(len(filenames), i + window_size - len(exchange.active())) 15 while i < limit: 16 channel = start(apply_parser, filenames[i]) 17 exchange.add(channel) 18 i = i + 1 19 return i 20 21 def get_searcher(filenames, window_size): 22 master_index = Indexer() 23 24 # Start indexing by filling a window with channels. 25 26 exchange = Exchange() 27 i = fill_window(filenames, 0, window_size, exchange) 28 29 # Start listening for responses. 30 31 while exchange.active(): 32 #print "(%d)" % len(exchange.active()), 33 for channel in exchange.ready(): 34 entry = channel.receive() 35 master_index.add_entry(entry) 36 37 i = fill_window(filenames, i, window_size, exchange) 38 39 # Provide a search interface. 40 41 #print 42 return Searcher(master_index.get_index()) 43 44 if __name__ == "__main__": 45 import sys, os 46 47 # Get the parameters. 48 49 window_size, directory = int(sys.argv[1]), sys.argv[2] 50 51 # Build a list of filenames. 52 53 filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)] 54 filenames = [filename for filename in filenames if os.path.isfile(filename)] 55 56 # Get a searcher using an index built in parallel. 57 58 searcher = get_searcher(filenames, window_size) 59 60 # Present a user interface. 61 62 if "--noprompt" not in sys.argv: 63 while 1: 64 print "Pattern:", 65 pattern = raw_input() 66 print searcher.find(pattern) 67 68 # vim: tabstop=4 expandtab shiftwidth=4