# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1317419187 -7200
# Node ID baee3f51e8f51bee6308dec3182d74f6a0e33b8a
# Parent  729f5f4a3fd2adf3ae4e4243e5ceca02f17ef686
Added a wrapper for text files and changed the basic functions to use the API
provided by the wrapper.
Fixed the copyright information.

diff -r 729f5f4a3fd2 -r baee3f51e8f5 simplex.py
--- a/simplex.py	Fri Sep 30 00:44:45 2011 +0200
+++ b/simplex.py	Fri Sep 30 23:46:27 2011 +0200
@@ -3,7 +3,7 @@
 """
 Simple indexing of sorted files.
 
-Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,29 +37,46 @@
 
 import bisect
 
-def index_by_lines(f, interval):
+class TextFile:
+
+    "A wrapper around text files."
+
+    def __init__(self, f, key=0, delimiter=None):
+        self.f = f
+        self.key = key
+        self.delimiter = delimiter
+
+    def seek(self, pos):
+        self.f.seek(pos)
+
+    def get_records(self):
+        return self.f.xreadlines()
+
+    def get_key(self, record):
+        return record.split(self.delimiter)[self.key]
+
+def index_file(f, interval):
 
     """
-    Index a file 'f', creating an index entry for a line after a given number,
-    defined by 'interval', has been read since the last entry.
+    Index a file 'f', creating an index entry for a record after a given number,
+    defined by 'interval', have been read since the last entry.
     """
 
     l = []
     pos = 0
 
-    for i, line in enumerate(f.xreadlines()):
-        columns = line.split("\t")
+    for i, record in enumerate(f.get_records()):
         if i % interval == 0:
-            l.append((columns[0], pos))
-        pos += len(line)
+            l.append((f.get_key(record), pos))
+        pos += len(record)
 
     return l
 
-def find_line_with_index(f, l, term):
+def find_with_index(f, l, term):
 
     """
     Find in file 'f', using the given index list 'l', the given 'term',
-    returning a line employing the term or None if no such line was found.
+    returning a record employing the term or None if no such record was found.
     """
 
     i = bisect.bisect_left(l, (term, None))
@@ -74,19 +91,18 @@
         found, pos = l[i]
 
     f.seek(pos)
-    return find_line_in_file(f, term)
+    return find_in_file(f, term)
 
-def find_line_in_file(f, term):
+def find_in_file(f, term):
 
     """
-    Find in file 'f' the given 'term', returning a line employing the term or
-    None if no such line was found.
+    Find in file 'f' the given 'term', returning a record employing the term or
+    None if no such record was found.
     """
 
-    for line in f.xreadlines():
-        columns = line.split("\t")
-        if term == columns[0]:
-            return line
+    for record in f.get_records():
+        if term == f.get_key(record):
+            return record
 
     return None
 
@@ -99,6 +115,6 @@
         self.f = f
 
     def find(self, term):
-        return find_line_with_index(self.f, self.entries, term)
+        return find_with_index(self.f, self.entries, term)
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 729f5f4a3fd2 -r baee3f51e8f5 test_indexed.py
--- a/test_indexed.py	Fri Sep 30 00:44:45 2011 +0200
+++ b/test_indexed.py	Fri Sep 30 23:46:27 2011 +0200
@@ -7,16 +7,17 @@
 terms = sys.argv[3:]
 
 f = open(filename)
+tf = TextFile(f)
 try:
     t = time.time()
-    l = index_by_lines(f, int(step))
+    l = index_file(tf, int(step))
     print "Indexed in %s seconds." % (time.time() - t)
 
     # Now use the index.
 
     for term in terms:
         t = time.time()
-        line = find_line_with_index(f, l, term)
+        line = find_with_index(tf, l, term)
         if line:
             print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
 
diff -r 729f5f4a3fd2 -r baee3f51e8f5 test_scan.py
--- a/test_scan.py	Fri Sep 30 00:44:45 2011 +0200
+++ b/test_scan.py	Fri Sep 30 23:46:27 2011 +0200
@@ -7,12 +7,13 @@
 terms = sys.argv[2:]
 
 f = open(filename)
+tf = TextFile(f)
 try:
     for term in terms:
-        f.seek(0)
+        tf.seek(0)
 
         t = time.time()
-        line = find_line_in_file(f, term)
+        line = find_in_file(tf, term)
         if line:
             print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
 finally: