1.1 --- a/iixr/positions.py Tue Sep 15 00:32:56 2009 +0200
1.2 +++ b/iixr/positions.py Wed Sep 16 00:45:18 2009 +0200
1.3 @@ -214,11 +214,11 @@
1.4 FileReader.__init__(self, f)
1.5 IteratorBase.__init__(self, count)
1.6 self.seek(offset)
1.7 - self.section_count = 0
1.8
1.9 def reset(self):
1.10 self.last_docnum = 0
1.11 self.last_pos_offset = 0
1.12 + self.section_count = 0
1.13
1.14 def read_positions(self):
1.15
1.16 @@ -250,6 +250,7 @@
1.17 docnum, pos_offset, self.section_count = t = self.read_positions()
1.18 return t
1.19 else:
1.20 + assert self.read_documents == self.count
1.21 raise StopIteration
1.22
1.23 class PositionDictionaryWriter:
1.24 @@ -364,14 +365,34 @@
1.25
1.26 def __init__(self, position_opener, position_index_opener, offset, doc_frequency):
1.27 self.position_opener = position_opener
1.28 + self.position_index_opener = position_index_opener
1.29 self.doc_frequency = doc_frequency
1.30 - self.index_iterator = position_index_opener.read_term_positions(offset, doc_frequency)
1.31 +
1.32 + self.index_iterator = None
1.33 self.iterator = None
1.34
1.35 + # Initialise the iterators.
1.36 +
1.37 + self.reset(offset, doc_frequency)
1.38 +
1.39 + def reset(self, offset, doc_frequency):
1.40 +
1.41 # Remember the last values.
1.42
1.43 self.found_docnum, self.found_positions = None, None
1.44
1.45 + # Attempt to reuse the index iterator.
1.46 +
1.47 + if self.index_iterator is not None:
1.48 + self.index_iterator.replenish(doc_frequency)
1.49 + self.index_iterator.seek(offset)
1.50 + self.index_iterator.reset()
1.51 +
1.52 + # Or make a new index iterator.
1.53 +
1.54 + else:
1.55 + self.index_iterator = self.position_index_opener.read_term_positions(offset, doc_frequency)
1.56 +
1.57 # Maintain state for the next index entry, if read.
1.58
1.59 self.next_docnum, self.next_pos_offset, self.next_section_count = None, None, None
1.60 @@ -510,9 +531,17 @@
1.61
1.62 "Initialise the iterator for the section in the position file."
1.63
1.64 + # Attempt to reuse any correctly positioned iterator.
1.65 +
1.66 if self.iterator is not None:
1.67 - self.iterator.close()
1.68 - self.iterator = self.position_opener.read_term_positions(self.pos_offset, self.section_count)
1.69 + self.iterator.replenish(self.section_count)
1.70 + self.iterator.seek(self.pos_offset)
1.71 + self.iterator.reset()
1.72 +
1.73 + # Otherwise, obtain a new iterator.
1.74 +
1.75 + else:
1.76 + self.iterator = self.position_opener.read_term_positions(self.pos_offset, self.section_count)
1.77
1.78 def close(self):
1.79 if self.iterator is not None:
1.80 @@ -522,4 +551,20 @@
1.81 self.index_iterator.close()
1.82 self.index_iterator = None
1.83
1.84 +class ResetPositionDictionaryIterator:
1.85 +
1.86 + """
1.87 + A helper class which permits the reuse of iterators without modifying their
1.88 + state.
1.89 + """
1.90 +
1.91 + def __init__(self, iterator, offset, doc_frequency):
1.92 + self.iterator = iterator
1.93 + self.offset = offset
1.94 + self.doc_frequency = doc_frequency
1.95 +
1.96 + def __iter__(self):
1.97 + self.iterator.reset(self.offset, self.doc_frequency)
1.98 + return iter(self.iterator)
1.99 +
1.100 # vim: tabstop=4 expandtab shiftwidth=4