paul@60 | 1 | #!/usr/bin/env python |
paul@60 | 2 | |
paul@60 | 3 | """ |
paul@60 | 4 | Phrase iterators providing navigation over common positions for a number of |
paul@60 | 5 | different terms. |
paul@60 | 6 | |
paul@60 | 7 | Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk> |
paul@60 | 8 | |
paul@60 | 9 | This program is free software; you can redistribute it and/or modify it under |
paul@60 | 10 | the terms of the GNU General Public License as published by the Free Software |
paul@60 | 11 | Foundation; either version 3 of the License, or (at your option) any later |
paul@60 | 12 | version. |
paul@60 | 13 | |
paul@60 | 14 | This program is distributed in the hope that it will be useful, but WITHOUT ANY |
paul@60 | 15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
paul@60 | 16 | PARTICULAR PURPOSE. See the GNU General Public License for more details. |
paul@60 | 17 | |
paul@60 | 18 | You should have received a copy of the GNU General Public License along |
paul@60 | 19 | with this program. If not, see <http://www.gnu.org/licenses/>. |
paul@60 | 20 | """ |
paul@60 | 21 | |
paul@60 | 22 | from itermerge import itermerge |
paul@60 | 23 | from bisect import insort_right |
paul@60 | 24 | |
paul@60 | 25 | class PhraseIterator(itermerge): |
paul@60 | 26 | |
paul@60 | 27 | "Iteration over many terms." |
paul@60 | 28 | |
paul@60 | 29 | def __init__(self, sequences): |
paul@60 | 30 | itermerge.__init__(self, sequences) |
paul@60 | 31 | |
paul@61 | 32 | def _add_seq(self, sequence, i): |
paul@60 | 33 | |
paul@61 | 34 | "Store the details of the given 'sequence' at position 'i'." |
paul@60 | 35 | |
paul@61 | 36 | insort_right(self.iters, (len(sequence), i, iter(sequence))) |
paul@60 | 37 | |
paul@60 | 38 | def next(self): |
paul@60 | 39 | if self.iters: |
paul@60 | 40 | while 1: |
paul@61 | 41 | freq, i, it = self.iters[0] |
paul@60 | 42 | doc, positions = it.next() |
paul@60 | 43 | values = [(i, positions)] |
paul@60 | 44 | for freq, i, it in self.iters[1:]: |
paul@60 | 45 | positions = it.from_document(doc) |
paul@60 | 46 | if positions is None: |
paul@60 | 47 | break |
paul@60 | 48 | else: |
paul@61 | 49 | insort_right(values, (i, positions)) |
paul@60 | 50 | else: |
paul@60 | 51 | return doc, [positions for (i, positions) in values] |
paul@60 | 52 | else: |
paul@60 | 53 | raise StopIteration |
paul@60 | 54 | |
paul@61 | 55 | def close(self): |
paul@61 | 56 | for freq, i, it in self.iters: |
paul@61 | 57 | if hasattr(it, "close"): |
paul@61 | 58 | it.close() |
paul@61 | 59 | self.iters = [] |
paul@61 | 60 | |
paul@60 | 61 | # vim: tabstop=4 expandtab shiftwidth=4 |