# HG changeset patch # User Paul Boddie # Date 1252263220 -7200 # Node ID dec7d2ac1cc916ae976db663d70b391ab15c0c7b # Parent 251e042dada73ea117a196d7db189f9766f23d3e Made separate methods for sorted and unsorted position data. diff -r 251e042dada7 -r dec7d2ac1cc9 iixr.py --- a/iixr.py Sun Sep 06 02:12:16 2009 +0200 +++ b/iixr.py Sun Sep 06 20:53:40 2009 +0200 @@ -203,7 +203,7 @@ def reset(self): self.last_docnum = 0 - def write_positions(self, docnum, positions): + def write_sorted_positions(self, docnum, positions): """ Write for the document 'docnum' the given 'positions'. @@ -225,10 +225,6 @@ self.write_number(len(positions)) - # Make sure that the positions are sorted. - - positions.sort() - # Write the position deltas. last = 0 @@ -241,6 +237,18 @@ return offset + def write_positions(self, docnum, positions): + + """ + Write for the document 'docnum' the given 'positions'. + Return the offset of the written record. + """ + + # Make sure that the positions are sorted. + + positions.sort() + return self.write_sorted_positions(docnum, positions) + class PositionReader(FileReader): "Reading position information from files." @@ -433,7 +441,7 @@ self.position_index_writer = position_index_writer self.interval = interval - def write_term_positions(self, doc_positions): + def write_sorted_term_positions(self, doc_positions): """ Write all 'doc_positions' - a collection of tuples of the form (document @@ -460,8 +468,6 @@ first_offset = None count = 0 - doc_positions.sort() - for docnum, positions in doc_positions: pos_offset = self.position_writer.write_positions(docnum, positions) @@ -506,6 +512,22 @@ return index_offset, frequency, len(doc_positions) + def write_term_positions(self, doc_positions): + + """ + Write all 'doc_positions' - a collection of tuples of the form (document + number, position list) - to the file. + + Add some records to the index, making dictionary entries. + + Return a tuple containing the offset of the written data, the frequency + (number of positions), and document frequency (number of documents) for + the term involved. + """ + + doc_positions.sort() + return self.write_sorted_term_positions(doc_positions) + def close(self): self.position_writer.close() self.position_index_writer.close()