paulb@4 | 1 | #!/usr/bin/env python |
paulb@4 | 2 | |
paulb@34 | 3 | from parallel import start, Exchange |
paulb@10 | 4 | from Dict import Indexer, Searcher, Parser |
paulb@34 | 5 | import os |
paulb@4 | 6 | |
paulb@10 | 7 | def apply_parser(channel, filename): |
paulb@35 | 8 | #print "Parsing", filename |
paulb@10 | 9 | parser = Parser(filename) |
paulb@10 | 10 | parser.send_entries(channel) |
paulb@35 | 11 | #print "Done", filename |
paulb@4 | 12 | |
paulb@34 | 13 | def fill_window(filenames, i, window_size, exchange): |
paulb@34 | 14 | limit = min(len(filenames), i + window_size - len(exchange.active())) |
paulb@34 | 15 | while i < limit: |
paulb@34 | 16 | channel = start(apply_parser, filenames[i]) |
paulb@34 | 17 | exchange.add(channel) |
paulb@34 | 18 | i = i + 1 |
paulb@34 | 19 | return i |
paulb@34 | 20 | |
paulb@6 | 21 | def get_searcher(filenames, window_size): |
paulb@10 | 22 | master_index = Indexer() |
paulb@4 | 23 | |
paulb@34 | 24 | # Start indexing by filling a window with channels. |
paulb@4 | 25 | |
paulb@34 | 26 | exchange = Exchange() |
paulb@34 | 27 | i = fill_window(filenames, 0, window_size, exchange) |
paulb@4 | 28 | |
paulb@34 | 29 | # Start listening for responses. |
paulb@4 | 30 | |
paulb@34 | 31 | while exchange.active(): |
paulb@35 | 32 | #print "(%d)" % len(exchange.active()), |
paulb@34 | 33 | for channel in exchange.ready(): |
paulb@34 | 34 | entry = channel.receive() |
paulb@34 | 35 | master_index.add_entry(entry) |
paulb@4 | 36 | |
paulb@34 | 37 | i = fill_window(filenames, i, window_size, exchange) |
paulb@20 | 38 | |
paulb@4 | 39 | # Provide a search interface. |
paulb@4 | 40 | |
paulb@35 | 41 | #print |
paulb@10 | 42 | return Searcher(master_index.get_index()) |
paulb@4 | 43 | |
paulb@4 | 44 | if __name__ == "__main__": |
paulb@8 | 45 | import sys, os |
paulb@12 | 46 | |
paulb@12 | 47 | # Get the parameters. |
paulb@12 | 48 | |
paulb@8 | 49 | window_size, directory = int(sys.argv[1]), sys.argv[2] |
paulb@12 | 50 | |
paulb@12 | 51 | # Build a list of filenames. |
paulb@12 | 52 | |
paulb@8 | 53 | filenames = [os.path.join(directory, filename) for filename in os.listdir(directory)] |
paulb@10 | 54 | filenames = [filename for filename in filenames if os.path.isfile(filename)] |
paulb@8 | 55 | |
paulb@12 | 56 | # Get a searcher using an index built in parallel. |
paulb@12 | 57 | |
paulb@6 | 58 | searcher = get_searcher(filenames, window_size) |
paulb@13 | 59 | |
paulb@12 | 60 | # Present a user interface. |
paulb@12 | 61 | |
paulb@32 | 62 | if "--noprompt" not in sys.argv: |
paulb@32 | 63 | while 1: |
paulb@32 | 64 | print "Pattern:", |
paulb@32 | 65 | pattern = raw_input() |
paulb@32 | 66 | print searcher.find(pattern) |
paulb@4 | 67 | |
paulb@4 | 68 | # vim: tabstop=4 expandtab shiftwidth=4 |