# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1317474437 -7200
# Node ID c120a1af7f6c785940cf75d0dfbc9cf879bdd11b
# Parent  a78ee57c5c7d89d8b109f0f370f6cda9b27c1dba
Replaced the single module with a package.

diff -r a78ee57c5c7d -r c120a1af7f6c simplex.py
--- a/simplex.py	Sat Oct 01 01:14:13 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Simple indexing of sorted files.
-
-Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk>
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; either version 3 of the License, or (at your option) any later
-version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-PARTICULAR PURPOSE.  See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Motivations
------------
-
-When searching files, the cost to scan an entire file typically exceeds the
-cost to navigate to the appropriate region and to scan a smaller portion of the
-file, especially for large files. At the same time, exotic data structures
-encouraging multiple seeks and reads are likely to waste time compared to just
-performing a single read operation, even if that operation involves a larger
-quantity of data, at least for storage with hard disk access characteristics.
-"""
-
-import bisect
-
-class TextFile:
-
-    "A wrapper around text files."
-
-    def __init__(self, f):
-        self.f = f
-
-    def seek(self, pos):
-        self.f.seek(pos)
-
-    def get_records(self):
-        return self.f.xreadlines()
-
-class DelimitedRecord:
-
-    "An accessor using a delimiter to split a record."
-
-    def __init__(self, keys=None, delimiter=None):
-        self.keys = keys or [0]
-        self.delimiter = delimiter
-
-    def get_key(self, record):
-        values = record.split(self.delimiter)
-        return [values[key] for key in self.keys]
-
-def make_index(reader, accessor, interval):
-
-    """
-    Index a resource whose 'reader' provides records and whose 'accessor' can
-    yield the key for such records, creating an index entry for a record after a
-    given number of records, defined by 'interval', have been read since the
-    last entry was produced.
-    """
-
-    l = []
-    pos = 0
-
-    current_key = None
-    start_pos = 0
-
-    for i, record in enumerate(reader.get_records()):
-        key = accessor.get_key(record)
-
-        # Where duplicate keys are permitted, the first record employing the key
-        # must be available as an index entry. Otherwise, records preceding the
-        # one referenced by the entry may have the same key and be missed when
-        # seeking using the index.
-
-        if key != current_key:
-            current_key = key
-            start_pos = pos
-
-        if i % interval == 0:
-            l.append((current_key, start_pos))
-
-        pos += len(record)
-
-    return l
-
-def find_with_index(reader, accessor, l, term):
-
-    """
-    Find in the resource whose 'reader' provides records and whose 'accessor'
-    can yield the key for such records, using the given index list 'l', the
-    given 'term', returning a record employing the term or None if no such
-    record was found.
-    """
-
-    i = bisect.bisect_left(l, (term, None))
-
-    try:
-        found, pos = l[i]
-    except IndexError:
-        return None
-
-    # Since the index is more coarse than the underlying file, the bisect left
-    # operation will most likely point to an index entry for later records than
-    # the desired one.
-
-    if found > term:
-        i = max(0, i - 1)
-        found, pos = l[i]
-
-    reader.seek(pos)
-    return find_in_file(reader, accessor, term)
-
-def find_in_file(reader, accessor, term):
-
-    """
-    Find in the resource whose 'reader' provides records and whose 'accessor'
-    can yield the key for such records, the given 'term', returning a record
-    employing the term or None if no such record was found.
-    """
-
-    for record in reader.get_records():
-        if term == accessor.get_key(record):
-            return record
-
-    return None
-
-def groups(l, length):
-
-    "Split 'l' into groups of the given 'length'."
-
-    if length <= 0:
-        raise ValueError, "Groups must be greater than zero."
-
-    i = 0
-    g = []
-
-    while i < len(l):
-        g.append(l[i:i+length])
-        i += length
-
-    return g
-
-# vim: tabstop=4 expandtab shiftwidth=4
diff -r a78ee57c5c7d -r c120a1af7f6c simplex/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/simplex/__init__.py	Sat Oct 01 15:07:17 2011 +0200
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+
+"""
+Simple indexing of sorted files.
+
+Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Motivations
+-----------
+
+When searching files, the cost to scan an entire file typically exceeds the
+cost to navigate to the appropriate region and to scan a smaller portion of the
+file, especially for large files. At the same time, exotic data structures
+encouraging multiple seeks and reads are likely to waste time compared to just
+performing a single read operation, even if that operation involves a larger
+quantity of data, at least for storage with hard disk access characteristics.
+"""
+
+from simplex.readers import *
+import bisect
+
+def make_index(reader, accessor, interval):
+
+    """
+    Index a resource whose 'reader' provides records and whose 'accessor' can
+    yield the key for such records, creating an index entry for a record after a
+    given number of records, defined by 'interval', have been read since the
+    last entry was produced.
+    """
+
+    l = []
+    pos = 0
+
+    current_key = None
+    start_pos = 0
+
+    for i, record in enumerate(reader.get_records()):
+        key = accessor.get_key(record)
+
+        # Where duplicate keys are permitted, the first record employing the key
+        # must be available as an index entry. Otherwise, records preceding the
+        # one referenced by the entry may have the same key and be missed when
+        # seeking using the index.
+
+        if key != current_key:
+            current_key = key
+            start_pos = pos
+
+        if i % interval == 0:
+            l.append((current_key, start_pos))
+
+        pos += len(record)
+
+    return l
+
+def find_with_index(reader, accessor, l, term):
+
+    """
+    Find in the resource whose 'reader' provides records and whose 'accessor'
+    can yield the key for such records, using the given index list 'l', the
+    given 'term', returning a record employing the term or None if no such
+    record was found.
+    """
+
+    i = bisect.bisect_left(l, (term, None))
+
+    try:
+        found, pos = l[i]
+    except IndexError:
+        return None
+
+    # Since the index is more coarse than the underlying file, the bisect left
+    # operation will most likely point to an index entry for later records than
+    # the desired one.
+
+    if found > term:
+        i = max(0, i - 1)
+        found, pos = l[i]
+
+    reader.seek(pos)
+    return find_in_file(reader, accessor, term)
+
+def find_in_file(reader, accessor, term):
+
+    """
+    Find in the resource whose 'reader' provides records and whose 'accessor'
+    can yield the key for such records, the given 'term', returning a record
+    employing the term or None if no such record was found.
+    """
+
+    for record in reader.get_records():
+        if term == accessor.get_key(record):
+            return record
+
+    return None
+
+def groups(l, length):
+
+    "Split 'l' into groups of the given 'length'."
+
+    if length <= 0:
+        raise ValueError, "Groups must be greater than zero."
+
+    i = 0
+    g = []
+
+    while i < len(l):
+        g.append(l[i:i+length])
+        i += length
+
+    return g
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r a78ee57c5c7d -r c120a1af7f6c simplex/readers.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/simplex/readers.py	Sat Oct 01 15:07:17 2011 +0200
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+"""
+Reader and accessor classes for indexing.
+
+Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+class TextFile:
+
+    "A wrapper around text files."
+
+    def __init__(self, f):
+        self.f = f
+
+    def seek(self, pos):
+        self.f.seek(pos)
+
+    def get_records(self):
+        return self.f.xreadlines()
+
+class DelimitedRecord:
+
+    "An accessor using a delimiter to split a record."
+
+    def __init__(self, keys=None, delimiter=None):
+
+        """
+        Initialise the accessor using a sequence of 'keys' indicating the
+        columns in each record that provide the values in the eventual compound
+        key provided by each record, along with a 'delimiter' indicating how
+        such columns are identified.
+        """
+
+        self.keys = keys or [0]
+        self.delimiter = delimiter
+
+    def get_key(self, record):
+        values = record.split(self.delimiter)
+        return [values[key] for key in self.keys]
+
+# vim: tabstop=4 expandtab shiftwidth=4