1 #!/usr/bin/env python 2 3 from parallel import start, Exchange 4 from Dict import Indexer, Searcher, Parser 5 6 def apply_parser(channel, filename): 7 print "Parsing", filename 8 parser = Parser(filename) 9 parser.send_entries(channel) 10 print "Done", filename 11 12 def get_searcher(filenames, window_size): 13 master_index = Indexer() 14 15 # Start indexing. 16 17 for i in range(0, len(filenames), window_size): 18 channels = [] 19 for filename in filenames[i:i + window_size]: 20 channels.append(start(apply_parser, filename)) 21 22 # Start listening for responses. 23 24 exchange = Exchange(channels) 25 while exchange.active(): 26 print "Waiting for %d channels..." % exchange.active() 27 for channel in exchange.ready(): 28 entry = channel.receive() 29 master_index.add_entry(entry) 30 31 # Tidy up as we go. 32 33 for channel in channels: 34 channel.close() 35 36 # Provide a search interface. 37 38 return Searcher(master_index.get_index()) 39 40 if __name__ == "__main__": 41 import sys, os 42 43 # Get the parameters. 44 45 window_size, directory = int(sys.argv[1]), sys.argv[2] 46 47 # Build a list of filenames. 48 49 filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)] 50 filenames = [filename for filename in filenames if os.path.isfile(filename)] 51 52 # Get a searcher using an index built in parallel. 53 54 searcher = get_searcher(filenames, window_size) 55 56 # Wait for all created processes to terminate. 57 58 try: 59 while 1: 60 os.wait() 61 except OSError: 62 pass 63 64 # Present a user interface. 65 66 while 1: 67 print "Pattern:", 68 pattern = raw_input() 69 print searcher.find(pattern) 70 71 # vim: tabstop=4 expandtab shiftwidth=4