1 #!/usr/bin/env python 2 3 import iixr 4 5 numbers = [12345678, 0, 1, 127, 128, 255, 256] 6 7 f = open("test", "wb") 8 w = iixr.FileWriter(f) 9 for number in numbers: 10 w.write_number(number) 11 w.close() 12 13 f = open("test", "rb") 14 r = iixr.FileReader(f) 15 for number in numbers: 16 n = r.read_number() 17 print number == n, number, n 18 r.close() 19 20 all_doc_positions = [ 21 [ 22 (123, [1, 3, 5, 15, 25]), 23 (124, [0, 100]) 24 ], 25 [ 26 (78, [9]), 27 (196, [10, 11]) 28 ] 29 ] 30 31 f = open("test", "wb") 32 w = iixr.PositionWriter(f) 33 for doc_positions in all_doc_positions: 34 for docnum, positions in doc_positions: 35 w.write_positions(docnum, positions) 36 w.reset() 37 w.close() 38 39 f = open("test", "rb") 40 r = iixr.PositionReader(f) 41 for doc_positions in all_doc_positions: 42 for docnum, positions in doc_positions: 43 d, p = r.read_positions() 44 print docnum == d, docnum, d 45 print positions == p, positions, p 46 r.reset() 47 r.close() 48 49 f = open("test", "wb") 50 w = iixr.PositionWriter(f) 51 offsets = [] 52 for doc_positions in all_doc_positions: 53 offsets.append( 54 w.write_all_positions(doc_positions) 55 ) 56 w.close() 57 58 f = open("test", "rb") 59 r = iixr.PositionReader(f) 60 offsets.reverse() 61 all_doc_positions.reverse() 62 for offset, doc_positions in zip(offsets, all_doc_positions): 63 dp = r.read_all_positions(offset) 64 print doc_positions == dp, doc_positions, dp 65 r.close() 66 67 doc_fields = [ 68 ["testing", "fields", "stored", "compressed"], 69 ["fields", "for a second", "document"] 70 ] 71 72 f = open("testF", "wb") 73 w = iixr.FieldWriter(f) 74 offsets = [] 75 for fields in doc_fields: 76 offsets.append(w.write_fields(fields)) 77 w.close() 78 79 f = open("testF", "rb") 80 r = iixr.FieldReader(f) 81 for fields in doc_fields: 82 df = r.read_fields() 83 print fields == df, fields, df 84 offsets.reverse() 85 doc_fields.reverse() 86 for offset, fields in zip(offsets, doc_fields): 87 df = r.read_doc_fields(offset) 88 print fields == df, fields, df 89 r.close() 90 91 terms = [ 92 ("aardvark", 100000123), 93 ("anteater", 100000456), 94 ("badger", 100000789), 95 ("bull", 1000001234), 96 ("bulldog", 1000002345), 97 ("cat", 1000003456) 98 ] 99 100 f = open("test", "wb") 101 w = iixr.TermWriter(f) 102 for term, offset in terms: 103 w.write_term(term, offset) 104 w.close() 105 106 f = open("test", "rb") 107 r = iixr.TermReader(f) 108 for term, offset in terms: 109 t, o = r.read_term() 110 print term == t, term, t 111 print offset == o, offset, o 112 r.close() 113 114 indexed_terms = [ 115 ("aardvark", 100000123, 200000321), 116 ("anteater", 100000456, 200000654), 117 ("badger", 100000789, 200000987), 118 ("bull", 1000001234, 200004321), 119 ("bulldog", 1000002345, 200005432), 120 ("cat", 1000003456, 200006543) 121 ] 122 123 f = open("test", "wb") 124 w = iixr.TermIndexWriter(f) 125 for term, offset, info_offset in indexed_terms: 126 w.write_term(term, offset, info_offset) 127 w.close() 128 129 f = open("test", "rb") 130 r = iixr.TermIndexReader(f) 131 for term, offset, info_offset in indexed_terms: 132 t, o, i = r.read_term() 133 print term == t, term, t 134 print offset == o, offset, o 135 print info_offset == i, info_offset, i 136 r.close() 137 138 f = open("test", "wb") 139 w = iixr.TermWriter(f) 140 f2 = open("testI", "wb") 141 w2 = iixr.TermIndexWriter(f2) 142 f3 = open("testP", "wb") 143 w3 = iixr.PositionWriter(f3) 144 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 145 for term, offset in terms: 146 wd.write_term(term, offset) 147 wd.close() 148 149 f = open("test", "rb") 150 r = iixr.TermReader(f) 151 f2 = open("testI", "rb") 152 r2 = iixr.TermIndexReader(f2) 153 f3 = open("testP", "rb") 154 r3 = iixr.PositionReader(f3) 155 rd = iixr.TermDictionaryReader(r, r2, r3) 156 terms_reversed = terms[:] 157 terms_reversed.reverse() 158 for term, offset in terms_reversed: 159 o = rd.find_term(term) 160 print offset == o, offset, o 161 for term in ("dog", "dingo"): 162 o = rd.find_term(term) 163 print o is None, o 164 rd.close() 165 166 terms_with_positions = [ 167 ("aardvark", [(1, [2, 45, 96]), (20, [13])]), 168 ("anteater", [(1, [43, 44])]), 169 ("badger", [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]), 170 ("bull", [(6, [128]), (16, [12])]), 171 ("bulldog", [(43, [17, 19, 256, 512])]), 172 ("cat", [(123, [12, 145, 196]), (1200, [113])]) 173 ] 174 175 f = open("test", "wb") 176 w = iixr.TermWriter(f) 177 f2 = open("testI", "wb") 178 w2 = iixr.TermIndexWriter(f2) 179 f3 = open("testP", "wb") 180 w3 = iixr.PositionWriter(f3) 181 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 182 for term, doc_positions in terms_with_positions: 183 wd.write_term_positions(term, doc_positions) 184 wd.close() 185 186 f = open("test", "rb") 187 r = iixr.TermReader(f) 188 f2 = open("testI", "rb") 189 r2 = iixr.TermIndexReader(f2) 190 f3 = open("testP", "rb") 191 r3 = iixr.PositionReader(f3) 192 rd = iixr.TermDictionaryReader(r, r2, r3) 193 terms_reversed = terms_with_positions[:] 194 terms_reversed.reverse() 195 for term, doc_positions in terms_reversed: 196 dp = rd.find_positions(term) 197 print doc_positions == dp, doc_positions, dp 198 for term in ("dog", "dingo"): 199 dp = rd.find_positions(term) 200 print dp is None, dp 201 rd.close() 202 203 docs = [ 204 (1, "The cat sat on the mat"), 205 (2, "Every good boy deserves football"), 206 (13, "One good turn deserves another"), 207 (14, "Every man for himself"), 208 (25, "Red sky at night shepherd's delight"), 209 (36, "She sells sea shells on the sea shore") 210 ] 211 212 doc_tests = [ 213 ("Every", [(2, [0]), (14, [0])]), 214 ("good", [(2, [1]), (13, [1])]), 215 ("deserves", [(2, [3]), (13, [3])]), 216 ("sea", [(36, [2, 6])]) 217 ] 218 219 index = iixr.Index("test_index") 220 wi = index.get_writer(3) 221 for docnum, text in docs: 222 for position, term in enumerate(text.split()): 223 wi.add_position(term, docnum, position) 224 wi.close() 225 226 rd = index.get_reader() 227 for term, doc_positions in doc_tests: 228 dp = rd.find_positions(term) 229 print doc_positions == dp, doc_positions, dp 230 index.close() 231 232 # vim: tabstop=4 expandtab shiftwidth=4