1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/simplex/indexers.py Sun Oct 02 20:43:03 2011 +0200
1.3 @@ -0,0 +1,86 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +Indexing classes.
1.8 +
1.9 +Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk>
1.10 +
1.11 +This program is free software; you can redistribute it and/or modify it under
1.12 +the terms of the GNU General Public License as published by the Free Software
1.13 +Foundation; either version 3 of the License, or (at your option) any later
1.14 +version.
1.15 +
1.16 +This program is distributed in the hope that it will be useful, but WITHOUT ANY
1.17 +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
1.18 +PARTICULAR PURPOSE. See the GNU General Public License for more details.
1.19 +
1.20 +You should have received a copy of the GNU General Public License along
1.21 +with this program. If not, see <http://www.gnu.org/licenses/>.
1.22 +"""
1.23 +
1.24 +class Indexer:
1.25 +
1.26 + "An indexer which records an entry periodically."
1.27 +
1.28 + def __init__(self, output, get_key, interval):
1.29 +
1.30 + """
1.31 + Index a resource, recording entries in the given 'output' sequence,
1.32 + using a 'get_key' operation to yield the key for each record, creating
1.33 + an index entry for a record after a given number of records, defined by
1.34 + 'interval', have been appended since the last entry was produced.
1.35 + """
1.36 +
1.37 + self.output = output
1.38 + self.interval = interval
1.39 + self.get_key = get_key
1.40 +
1.41 + self.count = 0
1.42 + self.pos = 0
1.43 +
1.44 + # Information about the current group.
1.45 +
1.46 + self.start_pos = 0
1.47 + self.current_key = None
1.48 +
1.49 + def append(self, record):
1.50 +
1.51 + """
1.52 + Present the given 'record' to the indexer, recording it if appropriate.
1.53 + """
1.54 +
1.55 + key = self.get_key(record)
1.56 +
1.57 + # Where duplicate keys are permitted, the first record employing the key
1.58 + # must be available as an index entry. Otherwise, records preceding the
1.59 + # one referenced by the entry may have the same key and be missed when
1.60 + # seeking using the index.
1.61 +
1.62 + if key != self.current_key:
1.63 + self.current_key = key
1.64 + self.start_pos = self.pos
1.65 +
1.66 + if self.count % self.interval == 0:
1.67 + self.output.append((self.current_key, self.start_pos))
1.68 +
1.69 + self.count += 1
1.70 + self.pos += len(record)
1.71 +
1.72 +def make_index(reader, get_key, interval):
1.73 +
1.74 + """
1.75 + Index a resource whose 'reader' provides records, using a 'get_key'
1.76 + operation to yield the key for such records, creating an index entry for a
1.77 + record after a given number of records, defined by 'interval', have been
1.78 + read since the last entry was produced.
1.79 + """
1.80 +
1.81 + l = []
1.82 + indexer = Indexer(l, get_key, interval)
1.83 +
1.84 + for record in reader:
1.85 + indexer.append(record)
1.86 +
1.87 + return l
1.88 +
1.89 +# vim: tabstop=4 expandtab shiftwidth=4