# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1317423376 -7200
# Node ID dc3d2ee38bae3135c0f26db8e00ec299f94f6c82
# Parent  b18d0bb03a08ff2a789b87bdfbffdde5a2f8dc58
Separated the reader and accessor concerns so that different objects can provide
records from resources and access to the details within records.

diff -r b18d0bb03a08 -r dc3d2ee38bae simplex.py
--- a/simplex.py	Sat Oct 01 00:40:38 2011 +0200
+++ b/simplex.py	Sat Oct 01 00:56:16 2011 +0200
@@ -26,13 +26,6 @@
 encouraging multiple seeks and reads are likely to waste time compared to just
 performing a single read operation, even if that operation involves a larger
 quantity of data, at least for storage with hard disk access characteristics.
-
-Potential Improvements
-----------------------
-
-Ideally, the acquisition of records should be done more generally than just
-reading lines, and the selection of matches should involve more than just
-selecting the first column.
 """
 
 import bisect
@@ -56,11 +49,13 @@
         values = record.split(self.delimiter)
         return [values[key] for key in self.keys]
 
-def index_file(f, interval):
+def make_index(reader, accessor, interval):
 
     """
-    Index a file 'f', creating an index entry for a record after a given number,
-    defined by 'interval', have been read since the last entry.
+    Index a resource whose 'reader' provides records and whose 'accessor' can
+    yield the key for such records, creating an index entry for a record after a
+    given number of records, defined by 'interval', have been read since the
+    last entry was produced.
     """
 
     l = []
@@ -69,8 +64,8 @@
     current_key = None
     start_pos = 0
 
-    for i, record in enumerate(f.get_records()):
-        key = f.get_key(record)
+    for i, record in enumerate(reader.get_records()):
+        key = accessor.get_key(record)
 
         # Where duplicate keys are permitted, the first record employing the key
         # must be available as an index entry. Otherwise, records preceding the
@@ -88,11 +83,13 @@
 
     return l
 
-def find_with_index(f, l, term):
+def find_with_index(reader, accessor, l, term):
 
     """
-    Find in file 'f', using the given index list 'l', the given 'term',
-    returning a record employing the term or None if no such record was found.
+    Find in the resource whose 'reader' provides records and whose 'accessor'
+    can yield the key for such records, using the given index list 'l', the
+    given 'term', returning a record employing the term or None if no such
+    record was found.
     """
 
     i = bisect.bisect_left(l, (term, None))
@@ -110,33 +107,23 @@
         i = max(0, i - 1)
         found, pos = l[i]
 
-    f.seek(pos)
-    return find_in_file(f, term)
+    reader.seek(pos)
+    return find_in_file(reader, accessor, term)
 
-def find_in_file(f, term):
+def find_in_file(reader, accessor, term):
 
     """
-    Find in file 'f' the given 'term', returning a record employing the term or
-    None if no such record was found.
+    Find in the resource whose 'reader' provides records and whose 'accessor'
+    can yield the key for such records, the given 'term', returning a record
+    employing the term or None if no such record was found.
     """
 
-    for record in f.get_records():
-        if term == f.get_key(record):
+    for record in reader.get_records():
+        if term == accessor.get_key(record):
             return record
 
     return None
 
-class Index:
-
-    "An index abstraction."
-
-    def __init__(self, entries, f):
-        self.entries = entries
-        self.f = f
-
-    def find(self, term):
-        return find_with_index(self.f, self.entries, term)
-
 def groups(l, length):
 
     "Split 'l' into groups of the given 'length'."
diff -r b18d0bb03a08 -r dc3d2ee38bae test_indexed.py
--- a/test_indexed.py	Sat Oct 01 00:40:38 2011 +0200
+++ b/test_indexed.py	Sat Oct 01 00:56:16 2011 +0200
@@ -13,17 +13,17 @@
     sys.exit(1)
 
 f = open(filename)
-tf = TextFile(f, keys)
+reader = TextFile(f, keys)
 try:
     t = time.time()
-    l = index_file(tf, int(interval))
+    l = make_index(reader, reader, int(interval))
     print "Indexed in %s seconds." % (time.time() - t)
 
     # Now use the index.
 
     for term in terms:
         t = time.time()
-        line = find_with_index(tf, l, term)
+        line = find_with_index(reader, reader, l, term)
         if line:
             print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
 
diff -r b18d0bb03a08 -r dc3d2ee38bae test_scan.py
--- a/test_scan.py	Sat Oct 01 00:40:38 2011 +0200
+++ b/test_scan.py	Sat Oct 01 00:56:16 2011 +0200
@@ -13,13 +13,13 @@
     sys.exit(1)
 
 f = open(filename)
-tf = TextFile(f, keys)
+reader = TextFile(f, keys)
 try:
     for term in terms:
-        tf.seek(0)
+        reader.seek(0)
 
         t = time.time()
-        line = find_in_file(tf, term)
+        line = find_in_file(reader, reader, term)
         if line:
             print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
 finally: