1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/iixr/phrases.py Tue Sep 22 01:08:13 2009 +0200
1.3 @@ -0,0 +1,56 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +Phrase iterators providing navigation over common positions for a number of
1.8 +different terms.
1.9 +
1.10 +Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
1.11 +
1.12 +This program is free software; you can redistribute it and/or modify it under
1.13 +the terms of the GNU General Public License as published by the Free Software
1.14 +Foundation; either version 3 of the License, or (at your option) any later
1.15 +version.
1.16 +
1.17 +This program is distributed in the hope that it will be useful, but WITHOUT ANY
1.18 +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
1.19 +PARTICULAR PURPOSE. See the GNU General Public License for more details.
1.20 +
1.21 +You should have received a copy of the GNU General Public License along
1.22 +with this program. If not, see <http://www.gnu.org/licenses/>.
1.23 +"""
1.24 +
1.25 +from itermerge import itermerge
1.26 +from bisect import insort_right
1.27 +
1.28 +class PhraseIterator(itermerge):
1.29 +
1.30 + "Iteration over many terms."
1.31 +
1.32 + def __init__(self, sequences):
1.33 + itermerge.__init__(self, sequences)
1.34 +
1.35 + def _add_iter(self, iterator, i):
1.36 +
1.37 + "Store the details of the given 'iterator' at position 'i'."
1.38 +
1.39 + insort_right(self.iters, (len(iterator), i, iterator))
1.40 +
1.41 + def next(self):
1.42 + if self.iters:
1.43 + freq, i, it = self.iters[0]
1.44 + while 1:
1.45 + doc, positions = it.next()
1.46 + values = [(i, positions)]
1.47 + for freq, i, it in self.iters[1:]:
1.48 + positions = it.from_document(doc)
1.49 + if positions is None:
1.50 + break
1.51 + else:
1.52 + values.append((i, positions))
1.53 + else:
1.54 + values.sort()
1.55 + return doc, [positions for (i, positions) in values]
1.56 + else:
1.57 + raise StopIteration
1.58 +
1.59 +# vim: tabstop=4 expandtab shiftwidth=4