1 #!/usr/bin/env python 2 3 """ 4 Generic file access. 5 6 Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from iixr.data import * 22 from array import array 23 import zlib 24 25 # Constants. 26 27 CACHE_SIZE = 100000 28 29 # Classes. 30 31 class File: 32 33 "A basic file abstraction." 34 35 def __init__(self, f): 36 self.f = f 37 self.record = array('B') # record buffer 38 self.cache = array('B') 39 self.data_start = 0 40 41 def reset(self): 42 43 "To be used to reset the state of the reader or writer between records." 44 45 pass 46 47 def seek(self, offset): 48 self.f.seek(offset) 49 self.reset() 50 51 def rewind(self): 52 self.seek(self.data_start) 53 54 def close(self): 55 if self.f is not None: 56 self.f.close() 57 self.f = None 58 59 class FileWriter(File): 60 61 "Writing basic data types to files." 62 63 def __init__(self, f): 64 File.__init__(self, f) 65 self.written = 0 66 67 def tell(self): 68 # NOTE: Will not be accurate within the current record. 69 return self.written 70 71 def begin_record(self): 72 pass 73 74 def end_record(self): 75 if self.record: 76 length = len(self.record) 77 before = len(self.cache) 78 vint_to_array(length, self.cache) 79 length_size = len(self.cache) - before 80 self.cache += self.record 81 self.written += length_size + length 82 self.record = array('B') 83 self.flush_cache() 84 85 def write_number(self, number): 86 87 "Write 'number' to the file using a variable length encoding." 88 89 vint_to_array(number, self.record) 90 91 def write_numbers(self, numbers): 92 93 "Write 'numbers' to the file using a variable length encoding." 94 95 for number in numbers: 96 vint_to_array(number, self.record) 97 98 def write_string(self, s, compress=0): 99 100 """ 101 Write 's' to the file, recording its length and compressing the string 102 if 'compress' is set to a true value. 103 """ 104 105 # Convert Unicode objects to strings. 106 107 if isinstance(s, unicode): 108 s = s.encode("utf-8") 109 110 # Compress the string if requested. 111 112 if compress: 113 cs = zlib.compress(s) 114 115 # Take any shorter than the original. 116 117 if len(cs) < len(s): 118 flag = "z" 119 s = cs 120 else: 121 flag = "-" 122 123 else: 124 flag = "" 125 126 # Write the length of the data before the data itself. 127 128 length = len(s) 129 self.record.fromstring("".join([flag, vint(length), s])) 130 131 def write_sequence_value(self, value, size): 132 sequence_to_array(value, size, self.record) 133 134 def write_sequence_values(self, values, size): 135 vint_to_array(len(values), self.record) 136 for value in values: 137 self.write_sequence_value(value, size) 138 139 def write_delta_sequence(self, values, size): 140 convert_sequence(values, get_subtractor(values[0])) 141 self.write_sequence_values(values, size) 142 143 def write_monotonic_sequence(self, values, size): 144 convert_sequence(values, get_monotonic_subtractor(values[0])) 145 self.write_sequence_values(values, size) 146 147 def flush(self, force=0): 148 self.end_record() 149 self.flush_cache(force) 150 151 def flush_cache(self, force=0): 152 if self.f is not None: 153 if force or len(self.cache) > CACHE_SIZE: 154 self.cache.tofile(self.f) 155 self.cache = array('B') 156 157 def close(self): 158 self.flush(1) 159 File.close(self) 160 161 class FileReader(File): 162 163 "Reading basic data types from files." 164 165 def __init__(self, f): 166 File.__init__(self, f) 167 self.record_start = 0 168 self.record_end = 0 169 self.cache_start = 0 170 self.begin() 171 172 def begin(self): 173 174 "Initialise file-wide parameters." 175 176 pass 177 178 def begin_record(self): 179 self.start = 0 180 try: 181 size = self.read_number_from_file() 182 self.record = self.from_cache(size) 183 except EOFError: 184 pass 185 186 def end_record(self): 187 pass 188 189 def seek(self, offset): 190 from_cache_start = offset - self.cache_start 191 if 0 <= from_cache_start < len(self.cache): 192 self.record_start = self.record_end = from_cache_start 193 else: 194 self.f.seek(offset) 195 self.cache = array('B') 196 self.cache_start = offset 197 self.record_start = self.record_end = 0 198 self.reset() 199 200 def tell(self): 201 return self.cache_start + self.record_start + self.start 202 203 def ensure_cache(self, size): 204 if size > len(self.cache) - self.record_end: 205 self.cache = self.cache[self.record_end:] 206 self.cache_start += self.record_end 207 s = self.f.read(CACHE_SIZE) 208 self.cache.fromstring(s) 209 self.record_start = 0 210 if not s: 211 raise EOFError 212 else: 213 self.record_start = self.record_end 214 self.record_end = self.record_start + size 215 216 def from_cache(self, size): 217 self.ensure_cache(size) 218 return self.cache[self.record_start:self.record_end] 219 220 def read_number_from_file(self): 221 222 "Read a number from the file." 223 224 # Read each byte, adding it to the number. 225 226 a = array('B') 227 a += self.from_cache(1) 228 csd = a[-1] 229 if csd < 128: 230 return csd 231 else: 232 while csd & 128: 233 a += self.from_cache(1) 234 csd = a[-1] 235 return vint_from_array(a) 236 237 def read_number(self): 238 239 "Read a number from the current record." 240 241 n, self.start = vint_from_array_start(self.record, self.start) 242 return n 243 244 def read_numbers(self, n): 245 l = [] 246 i = 0 247 while i < n: 248 l.append(self.read_number()) 249 i += 1 250 return l 251 252 def read_string(self, decompress=0): 253 254 """ 255 Read a string from the current record, decompressing the stored data if 256 'decompress' is set to a true value. 257 """ 258 259 # Decompress the data if requested. 260 261 if decompress: 262 flag = chr(self.record[self.start]) 263 self.start += 1 264 else: 265 flag = "-" 266 267 length = self.read_number() 268 start = self.start 269 self.start += length 270 s = self.record[start:self.start].tostring() 271 272 # Perform decompression if applicable. 273 274 if flag == "z": 275 s = zlib.decompress(s) 276 277 # Convert strings to Unicode objects. 278 279 return unicode(s, "utf-8") 280 281 def read_sequence_value(self, size): 282 value, self.start = sequence_from_array(self.record, size, self.start) 283 return value 284 285 def read_sequences(self, size): 286 values = [] 287 length = self.read_number() 288 i = 0 289 while i < length: 290 values.append(self.read_sequence_value(size)) 291 i += 1 292 return values 293 294 def read_delta_sequence(self, size): 295 values = self.read_sequences(size) 296 convert_sequence(values, get_adder(values[0])) 297 return values 298 299 def read_monotonic_sequence(self, size): 300 values = self.read_sequences(size) 301 convert_sequence(values, get_monotonic_adder(values[0])) 302 return values 303 304 # vim: tabstop=4 expandtab shiftwidth=4