1 #!/usr/bin/env python 2 3 """ 4 Dictionary merging classes. 5 6 Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from itermerge import itermerge 22 23 class Merger: 24 25 "Merge files." 26 27 def __init__(self, writer, readers): 28 self.writer = writer 29 self.readers = readers 30 31 def close(self): 32 for reader in self.readers: 33 reader.close() 34 self.readers = [] 35 if self.writer is not None: 36 self.writer.close() 37 self.writer = None 38 39 class TermDictionaryMerger(Merger): 40 41 "Merge term and position files." 42 43 def merge(self): 44 45 """ 46 Merge terms and positions from the readers, sending them to the writer. 47 """ 48 49 last_term = None 50 current_readers = [] 51 52 for term, frequency, doc_frequency, positions in itermerge(self.readers): 53 if term == last_term: 54 current_readers.append(positions) 55 else: 56 if current_readers: 57 self.writer.write_term_positions(last_term, itermerge(current_readers)) 58 last_term = term 59 current_readers = [positions] 60 else: 61 if current_readers: 62 self.writer.write_term_positions(last_term, itermerge(current_readers)) 63 64 class FieldDictionaryMerger(Merger): 65 66 "Merge field files." 67 68 def merge(self): 69 70 """ 71 Merge fields from the readers, sending them to the writer. 72 """ 73 74 for docnum, fields in itermerge(self.readers): 75 self.writer.write_fields(docnum, fields) 76 77 # vim: tabstop=4 expandtab shiftwidth=4