1 #!/usr/bin/env python 2 3 from parallel import start, Exchange 4 from Dict import Indexer, Searcher, Parser 5 6 def apply_parser(channel, filename): 7 print "Parsing", filename 8 parser = Parser(filename) 9 parser.send_entries(channel) 10 11 def get_searcher(filenames, window_size): 12 master_index = Indexer() 13 14 # Start indexing. 15 16 for i in range(0, len(filenames), window_size): 17 channels = [] 18 for filename in filenames[i:i + window_size]: 19 channels.append(start(apply_parser, filename)) 20 21 # Start listening for responses. 22 23 exchange = Exchange(channels) 24 while exchange.active(): 25 print "Waiting for %d channels..." % exchange.active() 26 for channel in exchange.ready(): 27 entry = channel.receive() 28 master_index.add_entry(entry) 29 30 # Tidy up as we go. 31 32 for channel in channels: 33 channel.close() 34 35 # Provide a search interface. 36 37 return Searcher(master_index.get_index()) 38 39 if __name__ == "__main__": 40 import sys, os 41 42 # Get the parameters. 43 44 window_size, directory = int(sys.argv[1]), sys.argv[2] 45 46 # Build a list of filenames. 47 48 filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)] 49 filenames = [filename for filename in filenames if os.path.isfile(filename)] 50 51 # Get a searcher using an index built in parallel. 52 53 searcher = get_searcher(filenames, window_size) 54 55 # Wait for all created processes to terminate. 56 57 try: 58 while 1: 59 os.wait() 60 except OSError: 61 pass 62 63 # Present a user interface. 64 65 while 1: 66 print "Pattern:", 67 pattern = raw_input() 68 print searcher.find(pattern) 69 70 # vim: tabstop=4 expandtab shiftwidth=4