# HG changeset patch # User Paul Boddie # Date 1317480029 -7200 # Node ID 6e1d369de4b38dc9934e2a3548d4fdc6b4fdf6cf # Parent 48a194ecc68c42b32cb80f7b43a598c67582cfc0 Incorporated usage of accessors into readers so that they can provide records and keys directly. diff -r 48a194ecc68c -r 6e1d369de4b3 simplex/__init__.py --- a/simplex/__init__.py Sat Oct 01 16:01:22 2011 +0200 +++ b/simplex/__init__.py Sat Oct 01 16:40:29 2011 +0200 @@ -31,13 +31,12 @@ from simplex.readers import * import bisect -def make_index(reader, accessor, interval): +def make_index(reader, interval): """ - Index a resource whose 'reader' provides records and whose 'accessor' can - yield the key for such records, creating an index entry for a record after a - given number of records, defined by 'interval', have been read since the - last entry was produced. + Index a resource whose 'reader' provides records and keys, creating an index + entry for a record after a given number of records, defined by 'interval', + have been read since the last entry was produced. """ l = [] @@ -46,8 +45,7 @@ current_key = None start_pos = 0 - for i, record in enumerate(reader.get_records()): - key = accessor.get_key(record) + for i, (key, record) in enumerate(reader.get_records()): # Where duplicate keys are permitted, the first record employing the key # must be available as an index entry. Otherwise, records preceding the @@ -65,13 +63,12 @@ return l -def find_with_index(reader, accessor, l, term): +def find_with_index(reader, l, term): """ - Find in the resource whose 'reader' provides records and whose 'accessor' - can yield the key for such records, using the given index list 'l', the - given 'term', returning a record employing the term or None if no such - record was found. + In the resource whose 'reader' provides records and keys, using the given + index list 'l', find the given 'term', returning a record employing the term + or None if no such record was found. """ i = bisect.bisect_left(l, (term, None)) @@ -90,18 +87,17 @@ found, pos = l[i] reader.seek(pos) - return find_in_file(reader, accessor, term) + return find_in_file(reader, term) -def find_in_file(reader, accessor, term): +def find_in_file(reader, term): """ - Find in the resource whose 'reader' provides records and whose 'accessor' - can yield the key for such records, the given 'term', returning a record - employing the term or None if no such record was found. + In the resource whose 'reader' provides records and keys, find the given + 'term', returning a record employing the term or None if no such record was + found. """ - for record in reader.get_records(): - key = accessor.get_key(record) + for key, record in reader.get_records(): if term == key: return record diff -r 48a194ecc68c -r 6e1d369de4b3 simplex/readers.py --- a/simplex/readers.py Sat Oct 01 16:01:22 2011 +0200 +++ b/simplex/readers.py Sat Oct 01 16:40:29 2011 +0200 @@ -22,14 +22,34 @@ "A wrapper around text files." - def __init__(self, f): + def __init__(self, f, accessor): self.f = f + self.accessor = accessor def seek(self, pos): self.f.seek(pos) def get_records(self): - return self.f.xreadlines() + return FileIterator(self.f, self.accessor) + +class FileIterator: + + "An iterator over records employing record accessors." + + def __init__(self, resource, accessor): + self.resource = resource + self.accessor = accessor + self.iterator = None + + def __iter__(self): + self.iterator = iter(self.resource.xreadlines()) + return self + + def next(self): + if self.iterator is None: + iter(self) + record = self.iterator.next() + return self.accessor.get_key(record), record class DelimitedRecord: @@ -47,6 +67,9 @@ self.keys = keys or [0] self.delimiter = delimiter + + # Define a conversion method. + self.convert = numeric and self.convert_numeric or (lambda x: x) def convert_numeric(self, term): diff -r 48a194ecc68c -r 6e1d369de4b3 test_indexed.py --- a/test_indexed.py Sat Oct 01 16:01:22 2011 +0200 +++ b/test_indexed.py Sat Oct 01 16:40:29 2011 +0200 @@ -13,18 +13,18 @@ sys.exit(1) f = open(filename) -reader = TextFile(f) accessor = DelimitedRecord(keys, numeric=(numeric == "true")) +reader = TextFile(f, accessor) try: t = time.time() - l = make_index(reader, accessor, int(interval)) + l = make_index(reader, int(interval)) print "Indexed in %s seconds." % (time.time() - t) # Now use the index. for term in terms: t = time.time() - line = find_with_index(reader, accessor, l, accessor.convert(term)) + line = find_with_index(reader, l, accessor.convert(term)) if line: print "Found (at %s seconds)...\n%s" % (time.time() - t, line) diff -r 48a194ecc68c -r 6e1d369de4b3 test_scan.py --- a/test_scan.py Sat Oct 01 16:01:22 2011 +0200 +++ b/test_scan.py Sat Oct 01 16:40:29 2011 +0200 @@ -13,14 +13,14 @@ sys.exit(1) f = open(filename) -reader = TextFile(f) accessor = DelimitedRecord(keys, numeric=(numeric == "true")) +reader = TextFile(f, accessor) try: for term in terms: reader.seek(0) t = time.time() - line = find_in_file(reader, accessor, accessor.convert(term)) + line = find_in_file(reader, accessor.convert(term)) if line: print "Found (at %s seconds)...\n%s" % (time.time() - t, line) finally: