1 #!/usr/bin/env python 2 3 from simplex import * 4 import sys, time 5 6 def from_index_record(convert, record): 7 values = record.split("\t") 8 key = convert(values[:-1]) 9 pos = int(values[-1]) 10 return key, pos 11 12 try: 13 separator = sys.argv.index("--") 14 filename, numeric, index_filename = sys.argv[1:4] 15 fields = map(int, sys.argv[4:separator]) 16 terms = groups(sys.argv[separator+1:], len(fields)) 17 numeric = numeric == "true" 18 except (IndexError, ValueError): 19 print >>sys.stderr, "Usage: %s <filename> <numeric> <index> <field>... -- <term value>..." % sys.argv[0] 20 sys.exit(1) 21 22 f = open(filename) 23 fi = open(index_filename) 24 accessor = DelimitedRecord(fields, numeric=numeric) 25 26 try: 27 t = time.time() 28 l = [from_index_record(accessor.convert, record) for record in fi] 29 print >>sys.stderr, "Read index (at %s seconds, with %d entries)." % (time.time() - t, len(l)) 30 31 # Now use the index. 32 33 index = IndexedFile(f, l, accessor.get_key) 34 35 for term in terms: 36 37 # Convert the term to the appropriate type. 38 39 term = accessor.convert(term) 40 41 # Perform the search. 42 43 t = time.time() 44 lines = index.find(term) 45 if lines: 46 print >>sys.stderr, "Found (at %s seconds)..." % (time.time() - t) 47 for line in lines: 48 sys.stdout.write(line) 49 50 finally: 51 f.close() 52 fi.close() 53 54 # vim: tabstop=4 expandtab shiftwidth=4