1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/iixr/merging.py Tue Sep 15 00:15:11 2009 +0200
1.3 @@ -0,0 +1,74 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +Dictionary merging classes.
1.8 +
1.9 +Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
1.10 +
1.11 +This program is free software; you can redistribute it and/or modify it under
1.12 +the terms of the GNU General Public License as published by the Free Software
1.13 +Foundation; either version 3 of the License, or (at your option) any later
1.14 +version.
1.15 +
1.16 +This program is distributed in the hope that it will be useful, but WITHOUT ANY
1.17 +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
1.18 +PARTICULAR PURPOSE. See the GNU General Public License for more details.
1.19 +
1.20 +You should have received a copy of the GNU General Public License along
1.21 +with this program. If not, see <http://www.gnu.org/licenses/>.
1.22 +"""
1.23 +
1.24 +from itermerge import itermerge
1.25 +
1.26 +class Merger:
1.27 +
1.28 + "Merge files."
1.29 +
1.30 + def __init__(self, writer, readers):
1.31 + self.writer = writer
1.32 + self.readers = readers
1.33 +
1.34 + def close(self):
1.35 + for reader in self.readers:
1.36 + reader.close()
1.37 + self.writer.close()
1.38 +
1.39 +class TermDictionaryMerger(Merger):
1.40 +
1.41 + "Merge term and position files."
1.42 +
1.43 + def merge(self):
1.44 +
1.45 + """
1.46 + Merge terms and positions from the readers, sending them to the writer.
1.47 + """
1.48 +
1.49 + last_term = None
1.50 + current_readers = []
1.51 +
1.52 + for term, frequency, doc_frequency, positions in itermerge(self.readers):
1.53 + if term == last_term:
1.54 + current_readers.append(positions)
1.55 + else:
1.56 + if current_readers:
1.57 + self.writer.write_term_positions(last_term, itermerge(current_readers))
1.58 + last_term = term
1.59 + current_readers = [positions]
1.60 + else:
1.61 + if current_readers:
1.62 + self.writer.write_term_positions(last_term, itermerge(current_readers))
1.63 +
1.64 +class FieldDictionaryMerger(Merger):
1.65 +
1.66 + "Merge field files."
1.67 +
1.68 + def merge(self):
1.69 +
1.70 + """
1.71 + Merge fields from the readers, sending them to the writer.
1.72 + """
1.73 +
1.74 + for docnum, fields in itermerge(self.readers):
1.75 + self.writer.write_fields(docnum, fields)
1.76 +
1.77 +# vim: tabstop=4 expandtab shiftwidth=4