1.1 --- a/iixr/files.py Fri Feb 11 00:03:22 2011 +0100
1.2 +++ b/iixr/files.py Fri Feb 11 01:46:08 2011 +0100
1.3 @@ -21,10 +21,13 @@
1.4 from iixr.data import *
1.5 from array import array
1.6 import zlib
1.7 -import sys
1.8
1.9 # Constants.
1.10
1.11 +CACHE_SIZE = 1000
1.12 +
1.13 +# Classes.
1.14 +
1.15 class File:
1.16
1.17 "A basic file abstraction."
1.18 @@ -32,6 +35,7 @@
1.19 def __init__(self, f):
1.20 self.f = f
1.21 self.record = array('B') # record buffer
1.22 + self.cache = array('B')
1.23 self.data_start = 0
1.24
1.25 def reset(self):
1.26 @@ -45,8 +49,7 @@
1.27 self.reset()
1.28
1.29 def rewind(self):
1.30 - self.f.seek(self.data_start)
1.31 - self.reset()
1.32 + self.seek(self.data_start)
1.33
1.34 def close(self):
1.35 if self.f is not None:
1.36 @@ -71,11 +74,13 @@
1.37 def end_record(self):
1.38 if self.record:
1.39 length = len(self.record)
1.40 - size = vint(length)
1.41 - self.f.write(size)
1.42 - self.record.tofile(self.f)
1.43 - self.written += len(size) + length
1.44 + before = len(self.cache)
1.45 + vint_to_array(length, self.cache)
1.46 + length_size = len(self.cache) - before
1.47 + self.cache += self.record
1.48 + self.written += length_size + length
1.49 self.record = array('B')
1.50 + self.flush()
1.51
1.52 def write_number(self, number):
1.53
1.54 @@ -139,12 +144,15 @@
1.55 convert_sequence(values, get_monotonic_subtractor(values[0]))
1.56 self.write_sequence_values(values, size)
1.57
1.58 - def flush(self):
1.59 + def flush(self, force=0):
1.60 if self.f is not None:
1.61 self.end_record()
1.62 + if force or len(self.cache) > CACHE_SIZE:
1.63 + self.cache.tofile(self.f)
1.64 + self.cache = array('B')
1.65
1.66 def close(self):
1.67 - self.flush()
1.68 + self.flush(1)
1.69 File.close(self)
1.70
1.71 class FileReader(File):
1.72 @@ -153,12 +161,11 @@
1.73
1.74 def __init__(self, f):
1.75 File.__init__(self, f)
1.76 + self.record_start = 0
1.77 + self.record_end = 0
1.78 + self.cache_start = 0
1.79 self.begin()
1.80
1.81 - def tell(self):
1.82 - # NOTE: Will not be accurate within the current record.
1.83 - return self.f.tell()
1.84 -
1.85 def begin(self):
1.86
1.87 "Initialise file-wide parameters."
1.88 @@ -166,34 +173,61 @@
1.89 pass
1.90
1.91 def begin_record(self):
1.92 - self.record = array('B')
1.93 self.start = 0
1.94 try:
1.95 size = self.read_number_from_file()
1.96 - self.record.fromfile(self.f, size)
1.97 + self.record = self.from_cache(size)
1.98 except EOFError:
1.99 pass
1.100
1.101 def end_record(self):
1.102 pass
1.103
1.104 + def seek(self, offset):
1.105 + if self.cache_start <= offset < self.cache_start + len(self.cache):
1.106 + self.cache = self.cache[offset - self.cache_start:]
1.107 + else:
1.108 + self.f.seek(offset)
1.109 + self.cache = array('B')
1.110 + self.cache_start = offset
1.111 + self.record_start = 0
1.112 + self.record_end = 0
1.113 + self.reset()
1.114 +
1.115 + def tell(self):
1.116 + return self.cache_start + self.record_start + self.start
1.117 +
1.118 + def ensure_cache(self, size):
1.119 + if size > len(self.cache) - self.record_end:
1.120 + self.cache = self.cache[self.record_end:]
1.121 + self.cache_start += self.record_end
1.122 + s = self.f.read(CACHE_SIZE)
1.123 + self.cache.fromstring(s)
1.124 + self.record_start = 0
1.125 + if not s:
1.126 + raise EOFError
1.127 + else:
1.128 + self.record_start = self.record_end
1.129 + self.record_end = self.record_start + size
1.130 +
1.131 + def from_cache(self, size):
1.132 + self.ensure_cache(size)
1.133 + return self.cache[self.record_start:self.record_end]
1.134 +
1.135 def read_number_from_file(self):
1.136
1.137 "Read a number from the file."
1.138
1.139 # Read each byte, adding it to the number.
1.140
1.141 - f = self.f
1.142 a = array('B')
1.143 - fromfile = a.fromfile
1.144 -
1.145 - fromfile(f, 1)
1.146 + a += self.from_cache(1)
1.147 csd = a[-1]
1.148 if csd < 128:
1.149 return csd
1.150 else:
1.151 while csd & 128:
1.152 - fromfile(f, 1)
1.153 + a += self.from_cache(1)
1.154 csd = a[-1]
1.155 return vint_from_array(a)
1.156