# HG changeset patch # User Paul Boddie # Date 1317594870 -7200 # Node ID e0ed0702667c771a1ae37d4f6771fd8a4cf0c7ca # Parent 0897d076edbb457d2e8f15a3bfa5ce7287eca097 Introduced an abstraction for an indexed file. diff -r 0897d076edbb -r e0ed0702667c simplex/__init__.py --- a/simplex/__init__.py Mon Oct 03 00:17:53 2011 +0200 +++ b/simplex/__init__.py Mon Oct 03 00:34:30 2011 +0200 @@ -32,6 +32,18 @@ from simplex.indexers import * import bisect +class IndexedFile: + + "An indexed file referring to a sorted file." + + def __init__(self, reader, index_reader, get_key): + self.reader = reader + self.index_reader = index_reader + self.get_key = get_key + + def find(self, term): + return find_with_index(self.reader, self.get_key, self.index_reader, term) + def find_with_index(reader, get_key, l, term): """ diff -r 0897d076edbb -r e0ed0702667c test_indexed.py --- a/test_indexed.py Mon Oct 03 00:17:53 2011 +0200 +++ b/test_indexed.py Mon Oct 03 00:34:30 2011 +0200 @@ -14,6 +14,7 @@ f = open(filename) accessor = DelimitedRecord(fields, numeric=(numeric == "true")) + try: t = time.time() l = make_index(f, accessor.get_key, int(interval)) @@ -21,9 +22,17 @@ # Now use the index. + index = IndexedFile(f, l, accessor.get_key) for term in terms: + + # Convert the term to the appropriate type. + + term = accessor.convert(term) + + # Perform the search. + t = time.time() - line = find_with_index(f, accessor.get_key, l, accessor.convert(term)) + line = index.find(term) if line: print "Found (at %s seconds)...\n%s" % (time.time() - t, line) diff -r 0897d076edbb -r e0ed0702667c test_read.py --- a/test_read.py Mon Oct 03 00:17:53 2011 +0200 +++ b/test_read.py Mon Oct 03 00:34:30 2011 +0200 @@ -30,9 +30,18 @@ # Now use the index. + index = IndexedFile(f, l, accessor.get_key) + for term in terms: + + # Convert the term to the appropriate type. + + term = accessor.convert(term) + + # Perform the search. + t = time.time() - line = find_with_index(f, accessor.get_key, l, accessor.convert(term)) + line = index.find(term) if line: print "Found (at %s seconds)...\n%s" % (time.time() - t, line) diff -r 0897d076edbb -r e0ed0702667c test_scan.py --- a/test_scan.py Mon Oct 03 00:17:53 2011 +0200 +++ b/test_scan.py Mon Oct 03 00:34:30 2011 +0200 @@ -14,12 +14,20 @@ f = open(filename) accessor = DelimitedRecord(fields, numeric=(numeric == "true")) + try: for term in terms: + + # Convert the term to the appropriate type. + + term = accessor.convert(term) + + # Perform the search. + f.seek(0) t = time.time() - line = find_in_file(f, accessor.get_key, accessor.convert(term)) + line = find_in_file(f, accessor.get_key, term) if line: print "Found (at %s seconds)...\n%s" % (time.time() - t, line) finally: