paul@6 | 1 | #!/usr/bin/env python |
paul@6 | 2 | |
paul@6 | 3 | """ |
paul@6 | 4 | String objects. |
paul@6 | 5 | |
paul@6 | 6 | Copyright (C) 2015, 2016 Paul Boddie <paul@boddie.org.uk> |
paul@6 | 7 | |
paul@6 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@6 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@6 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@6 | 11 | version. |
paul@6 | 12 | |
paul@6 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@6 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@6 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@6 | 16 | details. |
paul@6 | 17 | |
paul@6 | 18 | You should have received a copy of the GNU General Public License along with |
paul@6 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@6 | 20 | """ |
paul@6 | 21 | |
paul@390 | 22 | from __builtins__.int import maxint |
paul@303 | 23 | from __builtins__.operator import _negate |
paul@292 | 24 | from __builtins__.sequence import itemaccess |
paul@384 | 25 | from __builtins__.types import check_int |
paul@356 | 26 | from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ |
paul@356 | 27 | str_substr |
paul@6 | 28 | |
paul@292 | 29 | class basestring(itemaccess): |
paul@289 | 30 | |
paul@289 | 31 | "The base class for all strings." |
paul@289 | 32 | |
paul@311 | 33 | _p = maxint / 32 |
paul@300 | 34 | _a = 31 |
paul@300 | 35 | |
paul@390 | 36 | def __init__(self, other=None): |
paul@6 | 37 | |
paul@390 | 38 | "Initialise the string, perhaps from 'other'." |
paul@6 | 39 | |
paul@289 | 40 | # Note the __data__ member. Since strings are either initialised from |
paul@289 | 41 | # literals or converted using routines defined for other types, no form |
paul@289 | 42 | # of actual initialisation is performed here. |
paul@6 | 43 | |
paul@390 | 44 | # NOTE: Cannot perform "other and other.__data__ or None" since the |
paul@390 | 45 | # NOTE: __data__ attribute is not a normal attribute. |
paul@390 | 46 | |
paul@390 | 47 | if other: |
paul@390 | 48 | self.__data__ = other.__data__ |
paul@390 | 49 | else: |
paul@390 | 50 | self.__data__ = None |
paul@6 | 51 | |
paul@360 | 52 | # Note the __key__ member. This is also initialised statically. Where |
paul@360 | 53 | # a string is the same as an attribute name, the __key__ member contains |
paul@360 | 54 | # attribute position and code details. |
paul@360 | 55 | |
paul@390 | 56 | if other: |
paul@390 | 57 | self.__key__ = other.__key__ |
paul@390 | 58 | else: |
paul@390 | 59 | self.__key__ = None |
paul@360 | 60 | |
paul@300 | 61 | def __hash__(self): |
paul@300 | 62 | |
paul@300 | 63 | "Return a value for hashing purposes." |
paul@300 | 64 | |
paul@300 | 65 | result = 0 |
paul@300 | 66 | l = self.__len__() |
paul@300 | 67 | i = 0 |
paul@300 | 68 | |
paul@300 | 69 | while i < l: |
paul@300 | 70 | result = (result * self._a + ord(self.__get_single_item__(i))) % self._p |
paul@300 | 71 | i += 1 |
paul@300 | 72 | |
paul@300 | 73 | return result |
paul@300 | 74 | |
paul@303 | 75 | def _binary_op(self, op, other): |
paul@303 | 76 | |
paul@396 | 77 | "Perform 'op' on this object and 'other' if appropriate." |
paul@396 | 78 | |
paul@396 | 79 | # Refuse to operate on specialisations of this class. |
paul@396 | 80 | |
paul@396 | 81 | if self.__class__ is not other.__class__: |
paul@396 | 82 | return NotImplemented |
paul@396 | 83 | |
paul@396 | 84 | # Otherwise, perform the operation on the operands' data. |
paul@396 | 85 | |
paul@396 | 86 | else: |
paul@396 | 87 | return op(self.__data__, other.__data__) |
paul@303 | 88 | |
paul@396 | 89 | def _binary_op_rev(self, op, other): |
paul@396 | 90 | |
paul@396 | 91 | "Perform 'op' on 'other' and this object if appropriate." |
paul@396 | 92 | |
paul@396 | 93 | # Refuse to operate on specialisations of this class. |
paul@396 | 94 | |
paul@396 | 95 | if self.__class__ is not other.__class__: |
paul@396 | 96 | return NotImplemented |
paul@396 | 97 | |
paul@396 | 98 | # Otherwise, perform the operation on the operands' data. |
paul@396 | 99 | |
paul@303 | 100 | else: |
paul@396 | 101 | return op(other.__data__, self.__data__) |
paul@303 | 102 | |
paul@6 | 103 | def __iadd__(self, other): |
paul@289 | 104 | |
paul@303 | 105 | "Return a string combining this string with 'other'." |
paul@289 | 106 | |
paul@356 | 107 | return self._binary_op(str_add, other) |
paul@6 | 108 | |
paul@396 | 109 | __add__ = __iadd__ |
paul@396 | 110 | |
paul@396 | 111 | def __radd__(self, other): |
paul@396 | 112 | |
paul@396 | 113 | "Return a string combining this string with 'other'." |
paul@396 | 114 | |
paul@396 | 115 | return self._binary_op_rev(str_add, other) |
paul@6 | 116 | |
paul@6 | 117 | def __mul__(self, other): pass |
paul@6 | 118 | def __rmul__(self, other): pass |
paul@6 | 119 | def __mod__(self, other): pass |
paul@6 | 120 | def __rmod__(self, other): pass |
paul@6 | 121 | |
paul@6 | 122 | def __lt__(self, other): |
paul@289 | 123 | |
paul@303 | 124 | "Return whether this string is less than 'other'." |
paul@289 | 125 | |
paul@356 | 126 | return self._binary_op(str_lt, other) |
paul@6 | 127 | |
paul@6 | 128 | def __gt__(self, other): |
paul@289 | 129 | |
paul@303 | 130 | "Return whether this string is greater than 'other'." |
paul@289 | 131 | |
paul@356 | 132 | return self._binary_op(str_gt, other) |
paul@6 | 133 | |
paul@6 | 134 | def __le__(self, other): |
paul@289 | 135 | |
paul@303 | 136 | "Return whether this string is less than or equal to 'other'." |
paul@289 | 137 | |
paul@6 | 138 | return _negate(self.__gt__(other)) |
paul@6 | 139 | |
paul@6 | 140 | def __ge__(self, other): |
paul@289 | 141 | |
paul@303 | 142 | "Return whether this string is greater than or equal to 'other'." |
paul@289 | 143 | |
paul@6 | 144 | return _negate(self.__lt__(other)) |
paul@6 | 145 | |
paul@6 | 146 | def __eq__(self, other): |
paul@289 | 147 | |
paul@303 | 148 | "Return whether this string is equal to 'other'." |
paul@289 | 149 | |
paul@356 | 150 | return self._binary_op(str_eq, other) |
paul@6 | 151 | |
paul@6 | 152 | def __ne__(self, other): |
paul@289 | 153 | |
paul@303 | 154 | "Return whether this string is not equal to 'other'." |
paul@289 | 155 | |
paul@6 | 156 | return _negate(self.__eq__(other)) |
paul@6 | 157 | |
paul@403 | 158 | def bytelength(self): |
paul@303 | 159 | |
paul@403 | 160 | "Return the number of bytes in this string." |
paul@303 | 161 | |
paul@356 | 162 | return str_len(self.__data__) |
paul@140 | 163 | |
paul@403 | 164 | __len__ = bytelength |
paul@403 | 165 | |
paul@188 | 166 | def __str__(self): |
paul@289 | 167 | |
paul@222 | 168 | "Return a string representation." |
paul@289 | 169 | |
paul@188 | 170 | return self |
paul@6 | 171 | |
paul@222 | 172 | def __repr__(self): |
paul@289 | 173 | |
paul@227 | 174 | "Return a program representation." |
paul@289 | 175 | |
paul@222 | 176 | # NOTE: To be implemented with proper quoting. |
paul@222 | 177 | b = buffer(['"', self, '"']) |
paul@222 | 178 | return str(b) |
paul@222 | 179 | |
paul@6 | 180 | def __bool__(self): |
paul@435 | 181 | |
paul@435 | 182 | "Return whether the string provides any data." |
paul@435 | 183 | |
paul@356 | 184 | return str_nonempty(self.__data__) |
paul@6 | 185 | |
paul@435 | 186 | def __contains__(self, value): |
paul@435 | 187 | |
paul@435 | 188 | "Return whether this string contains 'value'." |
paul@435 | 189 | |
paul@435 | 190 | return self.find(value) != -1 |
paul@435 | 191 | |
paul@413 | 192 | def endswith(self, s): |
paul@413 | 193 | |
paul@413 | 194 | "Return whether this string ends with 's'." |
paul@413 | 195 | |
paul@413 | 196 | return self[-s.__len__():] == s |
paul@413 | 197 | |
paul@413 | 198 | def find(self, sub, start=None, end=None): |
paul@413 | 199 | |
paul@413 | 200 | """ |
paul@413 | 201 | Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending |
paul@413 | 202 | at 'end' (or the end of the string, if omitted), returning -1 if 'sub' |
paul@413 | 203 | is not present. |
paul@413 | 204 | """ |
paul@413 | 205 | |
paul@413 | 206 | sublen = sub.__len__() |
paul@413 | 207 | |
paul@413 | 208 | i = start or 0 |
paul@413 | 209 | end = end or self.__len__() |
paul@413 | 210 | |
paul@413 | 211 | while i < end - sublen: |
paul@413 | 212 | if sub == self[i:i+sublen]: |
paul@413 | 213 | return i |
paul@413 | 214 | i += 1 |
paul@413 | 215 | |
paul@413 | 216 | return -1 |
paul@413 | 217 | |
paul@413 | 218 | def index(self, sub, start=None, end=None): |
paul@413 | 219 | |
paul@413 | 220 | """ |
paul@413 | 221 | Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending |
paul@413 | 222 | at 'end' (or the end of the string, if omitted), raising ValueError if |
paul@413 | 223 | 'sub' is not present. |
paul@413 | 224 | """ |
paul@413 | 225 | |
paul@413 | 226 | i = self.find(sub, start, end) |
paul@413 | 227 | |
paul@413 | 228 | if i == -1: |
paul@413 | 229 | raise ValueError(sub) |
paul@413 | 230 | else: |
paul@413 | 231 | return i |
paul@342 | 232 | |
paul@342 | 233 | def join(self, l): |
paul@342 | 234 | |
paul@342 | 235 | "Join the elements in 'l' with this string." |
paul@342 | 236 | |
paul@342 | 237 | # Empty strings just cause the list elements to be concatenated. |
paul@342 | 238 | |
paul@342 | 239 | if not self.__bool__(): |
paul@342 | 240 | return str(buffer(l)) |
paul@342 | 241 | |
paul@342 | 242 | # Non-empty strings join the elements together in a buffer. |
paul@342 | 243 | |
paul@342 | 244 | b = buffer() |
paul@342 | 245 | first = True |
paul@342 | 246 | |
paul@342 | 247 | for s in l: |
paul@342 | 248 | if first: |
paul@342 | 249 | first = False |
paul@342 | 250 | else: |
paul@342 | 251 | b.append(self) |
paul@342 | 252 | b.append(s) |
paul@342 | 253 | |
paul@342 | 254 | return str(b) |
paul@342 | 255 | |
paul@6 | 256 | def lower(self): pass |
paul@6 | 257 | def lstrip(self, chars=None): pass |
paul@6 | 258 | def replace(self, old, new, count=None): pass |
paul@6 | 259 | def rfind(self, sub, start=None, end=None): pass |
paul@6 | 260 | def rsplit(self, sep=None, maxsplit=None): pass |
paul@6 | 261 | def rstrip(self, chars=None): pass |
paul@6 | 262 | def split(self, sep=None, maxsplit=None): pass |
paul@6 | 263 | def splitlines(self, keepends=False): pass |
paul@413 | 264 | |
paul@413 | 265 | def startswith(self, s): |
paul@413 | 266 | |
paul@413 | 267 | "Return whether this string starts with 's'." |
paul@413 | 268 | |
paul@413 | 269 | return self[:s.__len__()] == s |
paul@413 | 270 | |
paul@6 | 271 | def strip(self, chars=None): pass |
paul@6 | 272 | def upper(self): pass |
paul@6 | 273 | |
paul@431 | 274 | class string(basestring): |
paul@431 | 275 | |
paul@431 | 276 | "A plain string of bytes." |
paul@431 | 277 | |
paul@292 | 278 | # Special implementation methods. |
paul@292 | 279 | |
paul@292 | 280 | def __get_single_item__(self, index): |
paul@292 | 281 | |
paul@292 | 282 | "Return the item at the normalised (positive) 'index'." |
paul@292 | 283 | |
paul@292 | 284 | self._check_index(index) |
paul@384 | 285 | return str_substr(self.__data__, index, index + 1, 1) |
paul@384 | 286 | |
paul@384 | 287 | def __get_multiple_items__(self, start, end, step): |
paul@384 | 288 | |
paul@384 | 289 | """ |
paul@384 | 290 | Return items from 'start' until (but excluding) 'end', at 'step' |
paul@384 | 291 | intervals. |
paul@384 | 292 | """ |
paul@384 | 293 | |
paul@384 | 294 | self._check_index(start) |
paul@384 | 295 | self._check_end_index(end) |
paul@384 | 296 | check_int(step) |
paul@384 | 297 | |
paul@384 | 298 | if step == 0: |
paul@384 | 299 | raise ValueError(step) |
paul@384 | 300 | |
paul@384 | 301 | if start == end: |
paul@384 | 302 | return "" |
paul@384 | 303 | |
paul@384 | 304 | return str_substr(self.__data__, start, end, step) |
paul@292 | 305 | |
paul@188 | 306 | def str(obj): |
paul@188 | 307 | |
paul@188 | 308 | "Return the string representation of 'obj'." |
paul@188 | 309 | |
paul@248 | 310 | # Class attributes of instances provide __str__. |
paul@248 | 311 | |
paul@274 | 312 | return obj.__str__() |
paul@188 | 313 | |
paul@6 | 314 | # vim: tabstop=4 expandtab shiftwidth=4 |