1 #!/usr/bin/env python 2 3 import iixr 4 5 numbers = [12345678, 0, 1, 127, 128, 255, 256] 6 7 f = open("test", "wb") 8 w = iixr.FileWriter(f) 9 for number in numbers: 10 w.write_number(number) 11 w.close() 12 13 f = open("test", "rb") 14 r = iixr.FileReader(f) 15 for number in numbers: 16 n = r.read_number() 17 print number == n, number, n 18 r.close() 19 20 all_doc_positions = [ 21 [ 22 (123, [1, 3, 5, 15, 25]), 23 (124, [0, 100]) 24 ], 25 [ 26 (78, [9]), 27 (196, [10, 11]) 28 ] 29 ] 30 31 f = open("test", "wb") 32 w = iixr.PositionWriter(f) 33 for doc_positions in all_doc_positions: 34 for docnum, positions in doc_positions: 35 w.write_positions(docnum, positions) 36 w.reset() 37 w.close() 38 39 f = open("test", "rb") 40 r = iixr.PositionReader(f) 41 for doc_positions in all_doc_positions: 42 for docnum, positions in doc_positions: 43 d, p = r.read_positions() 44 print docnum == d, docnum, d 45 print positions == p, positions, p 46 r.reset() 47 r.close() 48 49 f = open("test", "wb") 50 w = iixr.PositionWriter(f) 51 offsets = [] 52 for doc_positions in all_doc_positions: 53 offsets.append( 54 w.write_all_positions(doc_positions) 55 ) 56 w.close() 57 58 f = open("test", "rb") 59 r = iixr.PositionReader(f) 60 offsets.reverse() 61 all_doc_positions.reverse() 62 for offset, doc_positions in zip(offsets, all_doc_positions): 63 dp = r.read_all_positions(offset) 64 print doc_positions == dp, doc_positions, dp 65 r.close() 66 67 terms = [ 68 ("aardvark", 100000123), 69 ("anteater", 100000456), 70 ("badger", 100000789), 71 ("bull", 1000001234), 72 ("bulldog", 1000002345), 73 ("cat", 1000003456) 74 ] 75 76 f = open("test", "wb") 77 w = iixr.TermWriter(f) 78 for term, offset in terms: 79 w.write_term(term, offset) 80 w.close() 81 82 f = open("test", "rb") 83 r = iixr.TermReader(f) 84 for term, offset in terms: 85 t, o = r.read_term() 86 print term == t, term, t 87 print offset == o, offset, o 88 r.close() 89 90 indexed_terms = [ 91 ("aardvark", 100000123, 200000321), 92 ("anteater", 100000456, 200000654), 93 ("badger", 100000789, 200000987), 94 ("bull", 1000001234, 200004321), 95 ("bulldog", 1000002345, 200005432), 96 ("cat", 1000003456, 200006543) 97 ] 98 99 f = open("test", "wb") 100 w = iixr.TermIndexWriter(f) 101 for term, offset, info_offset in indexed_terms: 102 w.write_term(term, offset, info_offset) 103 w.close() 104 105 f = open("test", "rb") 106 r = iixr.TermIndexReader(f) 107 for term, offset, info_offset in indexed_terms: 108 t, o, i = r.read_term() 109 print term == t, term, t 110 print offset == o, offset, o 111 print info_offset == i, info_offset, i 112 r.close() 113 114 f = open("test", "wb") 115 w = iixr.TermWriter(f) 116 f2 = open("testI", "wb") 117 w2 = iixr.TermIndexWriter(f2) 118 f3 = open("testP", "wb") 119 w3 = iixr.PositionWriter(f3) 120 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 121 for term, offset in terms: 122 wd.write_term(term, offset) 123 wd.close() 124 125 f = open("test", "rb") 126 r = iixr.TermReader(f) 127 f2 = open("testI", "rb") 128 r2 = iixr.TermIndexReader(f2) 129 f3 = open("testP", "rb") 130 r3 = iixr.PositionReader(f3) 131 rd = iixr.TermDictionaryReader(r, r2, r3) 132 terms_reversed = terms[:] 133 terms_reversed.reverse() 134 for term, offset in terms_reversed: 135 o = rd.find(term) 136 print offset == o, offset, o 137 for term in ("dog", "dingo"): 138 o = rd.find(term) 139 print o is None, o 140 rd.close() 141 142 terms_with_positions = [ 143 ("aardvark", [(1, [2, 45, 96]), (20, [13])]), 144 ("anteater", [(1, [43, 44])]), 145 ("badger", [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]), 146 ("bull", [(6, [128]), (16, [12])]), 147 ("bulldog", [(43, [17, 19, 256, 512])]), 148 ("cat", [(123, [12, 145, 196]), (1200, [113])]) 149 ] 150 151 f = open("test", "wb") 152 w = iixr.TermWriter(f) 153 f2 = open("testI", "wb") 154 w2 = iixr.TermIndexWriter(f2) 155 f3 = open("testP", "wb") 156 w3 = iixr.PositionWriter(f3) 157 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 158 for term, doc_positions in terms_with_positions: 159 wd.write_term_positions(term, doc_positions) 160 wd.close() 161 162 f = open("test", "rb") 163 r = iixr.TermReader(f) 164 f2 = open("testI", "rb") 165 r2 = iixr.TermIndexReader(f2) 166 f3 = open("testP", "rb") 167 r3 = iixr.PositionReader(f3) 168 rd = iixr.TermDictionaryReader(r, r2, r3) 169 terms_reversed = terms_with_positions[:] 170 terms_reversed.reverse() 171 for term, doc_positions in terms_reversed: 172 dp = rd.find_positions(term) 173 print doc_positions == dp, doc_positions, dp 174 for term in ("dog", "dingo"): 175 dp = rd.find_positions(term) 176 print dp is None, dp 177 rd.close() 178 179 # vim: tabstop=4 expandtab shiftwidth=4