1 #!/usr/bin/env python 2 3 """ 4 Indexing classes. 5 6 Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 class Indexer: 22 23 "An indexer which records an entry periodically." 24 25 def __init__(self, output, get_key, interval): 26 27 """ 28 Index a resource, recording entries in the given 'output' sequence, 29 using a 'get_key' operation to yield the key for each record, creating 30 an index entry for a record after a given number of records, defined by 31 'interval', have been appended since the last entry was produced. 32 """ 33 34 self.output = output 35 self.interval = interval 36 self.get_key = get_key 37 38 self.count = 0 39 self.pos = 0 40 41 # Information about the current group. 42 43 self.start_pos = 0 44 self.current_key = None 45 46 def append(self, record): 47 48 """ 49 Present the given 'record' to the indexer, recording it if appropriate. 50 """ 51 52 key = self.get_key(record) 53 54 # Where duplicate keys are permitted, the first record employing the key 55 # must be available as an index entry. Otherwise, records preceding the 56 # one referenced by the entry may have the same key and be missed when 57 # seeking using the index. 58 59 if key != self.current_key: 60 self.current_key = key 61 self.start_pos = self.pos 62 63 if self.count % self.interval == 0: 64 self.output.append((self.current_key, self.start_pos)) 65 66 self.count += 1 67 self.pos += len(record) 68 69 def make_index(reader, get_key, interval, output=None): 70 71 """ 72 Index a resource whose 'reader' provides records, using a 'get_key' 73 operation to yield the key for such records, creating an index entry for a 74 record after a given number of records, defined by 'interval', have been 75 read since the last entry was produced. 76 77 Either append index entries to the given 'output' sequence, or populate a 78 new list. 79 """ 80 81 l = output or [] 82 indexer = Indexer(l, get_key, interval) 83 84 for record in reader: 85 indexer.append(record) 86 87 return l 88 89 # vim: tabstop=4 expandtab shiftwidth=4