1 #!/usr/bin/env python 2 3 import iixr 4 5 numbers = [12345678, 0, 1, 127, 128, 255, 256] 6 7 f = open("test", "wb") 8 w = iixr.FileWriter(f) 9 for number in numbers: 10 w.write_number(number) 11 w.close() 12 13 f = open("test", "rb") 14 r = iixr.FileReader(f) 15 for number in numbers: 16 n = r.read_number() 17 print number == n, number, n 18 r.close() 19 20 all_doc_positions = [ 21 [ 22 (123, [1, 3, 5, 15, 25]), 23 (124, [0, 100]) 24 ], 25 [ 26 (78, [9]), 27 (196, [10, 11]) 28 ] 29 ] 30 31 f = open("test", "wb") 32 w = iixr.PositionWriter(f) 33 for doc_positions in all_doc_positions: 34 for docnum, positions in doc_positions: 35 w.write_positions(docnum, positions) 36 w.reset() 37 w.close() 38 39 f = open("test", "rb") 40 r = iixr.PositionReader(f) 41 for doc_positions in all_doc_positions: 42 for docnum, positions in doc_positions: 43 d, p = r.read_positions() 44 print docnum == d, docnum, d 45 print positions == p, positions, p 46 r.reset() 47 r.close() 48 49 f = open("test", "wb") 50 w = iixr.PositionWriter(f) 51 offsets = [] 52 for doc_positions in all_doc_positions: 53 offsets.append( 54 w.write_all_positions(doc_positions) 55 ) 56 w.close() 57 58 f = open("test", "rb") 59 r = iixr.PositionReader(f) 60 offsets.reverse() 61 all_doc_positions.reverse() 62 for offset, doc_positions in zip(offsets, all_doc_positions): 63 dp = r.read_all_positions(offset) 64 print doc_positions == dp, doc_positions, dp 65 r.close() 66 67 terms = [ 68 ("aardvark", 100000123), 69 ("anteater", 100000456), 70 ("badger", 100000789), 71 ("bull", 1000001234), 72 ("bulldog", 1000002345), 73 ("cat", 1000003456) 74 ] 75 76 f = open("test", "wb") 77 w = iixr.TermWriter(f) 78 for term, offset in terms: 79 w.write_term(term, offset) 80 w.close() 81 82 f = open("test", "rb") 83 r = iixr.TermReader(f) 84 for term, offset in terms: 85 t, o = r.read_term() 86 print term == t, term, t 87 print offset == o, offset, o 88 r.close() 89 90 indexed_terms = [ 91 ("aardvark", 100000123, 200000321), 92 ("anteater", 100000456, 200000654), 93 ("badger", 100000789, 200000987), 94 ("bull", 1000001234, 200004321), 95 ("bulldog", 1000002345, 200005432), 96 ("cat", 1000003456, 200006543) 97 ] 98 99 f = open("test", "wb") 100 w = iixr.TermIndexWriter(f) 101 for term, offset, info_offset in indexed_terms: 102 w.write_term(term, offset, info_offset) 103 w.close() 104 105 f = open("test", "rb") 106 r = iixr.TermIndexReader(f) 107 for term, offset, info_offset in indexed_terms: 108 t, o, i = r.read_term() 109 print term == t, term, t 110 print offset == o, offset, o 111 print info_offset == i, info_offset, i 112 r.close() 113 114 f = open("test", "wb") 115 w = iixr.TermWriter(f) 116 f2 = open("testI", "wb") 117 w2 = iixr.TermIndexWriter(f2) 118 f3 = open("testP", "wb") 119 w3 = iixr.PositionWriter(f3) 120 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 121 for term, offset in terms: 122 wd.write_term(term, offset) 123 wd.close() 124 125 f = open("test", "rb") 126 r = iixr.TermReader(f) 127 f2 = open("testI", "rb") 128 r2 = iixr.TermIndexReader(f2) 129 f3 = open("testP", "rb") 130 r3 = iixr.PositionReader(f3) 131 rd = iixr.TermDictionaryReader(r, r2, r3) 132 terms_reversed = terms[:] 133 terms_reversed.reverse() 134 for term, offset in terms_reversed: 135 o = rd.find_term(term) 136 print offset == o, offset, o 137 for term in ("dog", "dingo"): 138 o = rd.find_term(term) 139 print o is None, o 140 rd.close() 141 142 terms_with_positions = [ 143 ("aardvark", [(1, [2, 45, 96]), (20, [13])]), 144 ("anteater", [(1, [43, 44])]), 145 ("badger", [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]), 146 ("bull", [(6, [128]), (16, [12])]), 147 ("bulldog", [(43, [17, 19, 256, 512])]), 148 ("cat", [(123, [12, 145, 196]), (1200, [113])]) 149 ] 150 151 f = open("test", "wb") 152 w = iixr.TermWriter(f) 153 f2 = open("testI", "wb") 154 w2 = iixr.TermIndexWriter(f2) 155 f3 = open("testP", "wb") 156 w3 = iixr.PositionWriter(f3) 157 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 158 for term, doc_positions in terms_with_positions: 159 wd.write_term_positions(term, doc_positions) 160 wd.close() 161 162 f = open("test", "rb") 163 r = iixr.TermReader(f) 164 f2 = open("testI", "rb") 165 r2 = iixr.TermIndexReader(f2) 166 f3 = open("testP", "rb") 167 r3 = iixr.PositionReader(f3) 168 rd = iixr.TermDictionaryReader(r, r2, r3) 169 terms_reversed = terms_with_positions[:] 170 terms_reversed.reverse() 171 for term, doc_positions in terms_reversed: 172 dp = rd.find_positions(term) 173 print doc_positions == dp, doc_positions, dp 174 for term in ("dog", "dingo"): 175 dp = rd.find_positions(term) 176 print dp is None, dp 177 rd.close() 178 179 docs = [ 180 (1, "The cat sat on the mat"), 181 (2, "Every good boy deserves football"), 182 (13, "One good turn deserves another"), 183 (14, "Every man for himself"), 184 (25, "Red sky at night shepherd's delight"), 185 (36, "She sells sea shells on the sea shore") 186 ] 187 188 doc_tests = [ 189 ("Every", [(2, [0]), (14, [0])]), 190 ("good", [(2, [1]), (13, [1])]), 191 ("deserves", [(2, [3]), (13, [3])]), 192 ("sea", [(36, [2, 6])]) 193 ] 194 195 index = iixr.Index("test_index") 196 wi = index.get_writer(3) 197 for docnum, text in docs: 198 for position, term in enumerate(text.split()): 199 wi.add_position(term, docnum, position) 200 wi.close() 201 202 rd = index.get_reader() 203 for term, doc_positions in doc_tests: 204 dp = rd.find_positions(term) 205 print doc_positions == dp, doc_positions, dp 206 index.close() 207 208 # vim: tabstop=4 expandtab shiftwidth=4