# HG changeset patch # User Paul Boddie # Date 1252451884 -7200 # Node ID dafcd23d5fa9b4db9f9c08bea35ce8d34d96fd92 # Parent c1b98b5509c0d0e9f3e5ce01518df28049cd1fbb Attempted to fix document position merging. diff -r c1b98b5509c0 -r dafcd23d5fa9 iixr.py --- a/iixr.py Wed Sep 09 00:29:30 2009 +0200 +++ b/iixr.py Wed Sep 09 01:18:04 2009 +0200 @@ -26,6 +26,7 @@ from bisect import bisect_right # to find terms in the dictionary index from bisect import insort_right # to maintain a sorted list of data for merging import bz2, zlib # for field compression +from itermerge import itermerge try: set @@ -1311,11 +1312,9 @@ other_term, other_doc_positions, other_partition = entries[i] # For such entries, merge the positions. - # Since document positions should only appear in a single - # partition, a simple update should be sufficient. if other_term == term: - doc_positions.update(other_doc_positions) + doc_positions = itermerge(doc_positions, other_doc_positions) to_update.append(other_partition) i += 1 else: diff -r c1b98b5509c0 -r dafcd23d5fa9 itermerge.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/itermerge.py Wed Sep 09 01:18:04 2009 +0200 @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +def itermerge(seq1, seq2): + + "Merge 'seq1' and 'seq2' to produce an ordered, combined list of results." + + results = [] + + iter1 = iter(seq1) + iter2 = iter(seq2) + + t1 = None + t2 = None + + t1 = _itermerge_next(iter1) + if t1 is None: + _itermerge_fill(iter2, results) + return results + + while 1: + if t1 is None: + t1 = _itermerge_next(iter1) + if t1 is None: + results.append(t2) + _itermerge_fill(iter2, results) + return results + else: + t2 = _itermerge_next(iter2) + if t2 is None: + results.append(t1) + _itermerge_fill(iter1, results) + return results + + if t1 < t2: + results.append(t1) + t1 = None + else: + results.append(t2) + t2 = None + +def _itermerge_next(iter): + try: + return iter.next() + except StopIteration: + return None + +def _itermerge_fill(iter, results): + try: + while 1: + results.append(iter.next()) + except StopIteration: + pass + +# vim: tabstop=4 expandtab shiftwidth=4