paul@6 | 1 | #!/usr/bin/env python |
paul@6 | 2 | |
paul@6 | 3 | """ |
paul@6 | 4 | String objects. |
paul@6 | 5 | |
paul@514 | 6 | Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> |
paul@6 | 7 | |
paul@6 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@6 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@6 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@6 | 11 | version. |
paul@6 | 12 | |
paul@6 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@6 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@6 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@6 | 16 | details. |
paul@6 | 17 | |
paul@6 | 18 | You should have received a copy of the GNU General Public License along with |
paul@6 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@6 | 20 | """ |
paul@6 | 21 | |
paul@303 | 22 | from __builtins__.operator import _negate |
paul@459 | 23 | from __builtins__.sequence import hashable, itemaccess |
paul@384 | 24 | from __builtins__.types import check_int |
paul@356 | 25 | from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ |
paul@356 | 26 | str_substr |
paul@6 | 27 | |
paul@515 | 28 | WHITESPACE = (" ", "\f", "\n", "\r", "\t") |
paul@515 | 29 | |
paul@459 | 30 | class basestring(hashable): |
paul@289 | 31 | |
paul@289 | 32 | "The base class for all strings." |
paul@289 | 33 | |
paul@390 | 34 | def __init__(self, other=None): |
paul@6 | 35 | |
paul@390 | 36 | "Initialise the string, perhaps from 'other'." |
paul@6 | 37 | |
paul@289 | 38 | # Note the __data__ member. Since strings are either initialised from |
paul@289 | 39 | # literals or converted using routines defined for other types, no form |
paul@289 | 40 | # of actual initialisation is performed here. |
paul@6 | 41 | |
paul@390 | 42 | # NOTE: Cannot perform "other and other.__data__ or None" since the |
paul@390 | 43 | # NOTE: __data__ attribute is not a normal attribute. |
paul@390 | 44 | |
paul@390 | 45 | if other: |
paul@390 | 46 | self.__data__ = other.__data__ |
paul@390 | 47 | else: |
paul@390 | 48 | self.__data__ = None |
paul@6 | 49 | |
paul@360 | 50 | # Note the __key__ member. This is also initialised statically. Where |
paul@360 | 51 | # a string is the same as an attribute name, the __key__ member contains |
paul@360 | 52 | # attribute position and code details. |
paul@360 | 53 | |
paul@390 | 54 | if other: |
paul@390 | 55 | self.__key__ = other.__key__ |
paul@390 | 56 | else: |
paul@390 | 57 | self.__key__ = None |
paul@360 | 58 | |
paul@300 | 59 | def __hash__(self): |
paul@300 | 60 | |
paul@300 | 61 | "Return a value for hashing purposes." |
paul@300 | 62 | |
paul@459 | 63 | return self._hashvalue(ord) |
paul@300 | 64 | |
paul@303 | 65 | def _binary_op(self, op, other): |
paul@303 | 66 | |
paul@396 | 67 | "Perform 'op' on this object and 'other' if appropriate." |
paul@396 | 68 | |
paul@396 | 69 | # Refuse to operate on specialisations of this class. |
paul@396 | 70 | |
paul@396 | 71 | if self.__class__ is not other.__class__: |
paul@396 | 72 | return NotImplemented |
paul@396 | 73 | |
paul@396 | 74 | # Otherwise, perform the operation on the operands' data. |
paul@396 | 75 | |
paul@396 | 76 | else: |
paul@396 | 77 | return op(self.__data__, other.__data__) |
paul@303 | 78 | |
paul@396 | 79 | def _binary_op_rev(self, op, other): |
paul@396 | 80 | |
paul@396 | 81 | "Perform 'op' on 'other' and this object if appropriate." |
paul@396 | 82 | |
paul@396 | 83 | # Refuse to operate on specialisations of this class. |
paul@396 | 84 | |
paul@396 | 85 | if self.__class__ is not other.__class__: |
paul@396 | 86 | return NotImplemented |
paul@396 | 87 | |
paul@396 | 88 | # Otherwise, perform the operation on the operands' data. |
paul@396 | 89 | |
paul@303 | 90 | else: |
paul@396 | 91 | return op(other.__data__, self.__data__) |
paul@303 | 92 | |
paul@6 | 93 | def __iadd__(self, other): |
paul@289 | 94 | |
paul@303 | 95 | "Return a string combining this string with 'other'." |
paul@289 | 96 | |
paul@356 | 97 | return self._binary_op(str_add, other) |
paul@6 | 98 | |
paul@396 | 99 | __add__ = __iadd__ |
paul@396 | 100 | |
paul@396 | 101 | def __radd__(self, other): |
paul@396 | 102 | |
paul@396 | 103 | "Return a string combining this string with 'other'." |
paul@396 | 104 | |
paul@396 | 105 | return self._binary_op_rev(str_add, other) |
paul@6 | 106 | |
paul@514 | 107 | def __mul__(self, other): |
paul@514 | 108 | |
paul@514 | 109 | "Multiply the string by 'other'." |
paul@514 | 110 | |
paul@514 | 111 | b = buffer() |
paul@514 | 112 | |
paul@514 | 113 | while other > 0: |
paul@514 | 114 | b.append(self) |
paul@514 | 115 | other -= 1 |
paul@514 | 116 | |
paul@514 | 117 | return str(b) |
paul@514 | 118 | |
paul@514 | 119 | __rmul__ = __mul__ |
paul@514 | 120 | |
paul@6 | 121 | def __mod__(self, other): pass |
paul@6 | 122 | def __rmod__(self, other): pass |
paul@6 | 123 | |
paul@6 | 124 | def __lt__(self, other): |
paul@289 | 125 | |
paul@303 | 126 | "Return whether this string is less than 'other'." |
paul@289 | 127 | |
paul@356 | 128 | return self._binary_op(str_lt, other) |
paul@6 | 129 | |
paul@6 | 130 | def __gt__(self, other): |
paul@289 | 131 | |
paul@303 | 132 | "Return whether this string is greater than 'other'." |
paul@289 | 133 | |
paul@356 | 134 | return self._binary_op(str_gt, other) |
paul@6 | 135 | |
paul@6 | 136 | def __le__(self, other): |
paul@289 | 137 | |
paul@303 | 138 | "Return whether this string is less than or equal to 'other'." |
paul@289 | 139 | |
paul@6 | 140 | return _negate(self.__gt__(other)) |
paul@6 | 141 | |
paul@6 | 142 | def __ge__(self, other): |
paul@289 | 143 | |
paul@303 | 144 | "Return whether this string is greater than or equal to 'other'." |
paul@289 | 145 | |
paul@6 | 146 | return _negate(self.__lt__(other)) |
paul@6 | 147 | |
paul@6 | 148 | def __eq__(self, other): |
paul@289 | 149 | |
paul@303 | 150 | "Return whether this string is equal to 'other'." |
paul@289 | 151 | |
paul@356 | 152 | return self._binary_op(str_eq, other) |
paul@6 | 153 | |
paul@6 | 154 | def __ne__(self, other): |
paul@289 | 155 | |
paul@303 | 156 | "Return whether this string is not equal to 'other'." |
paul@289 | 157 | |
paul@6 | 158 | return _negate(self.__eq__(other)) |
paul@6 | 159 | |
paul@403 | 160 | def bytelength(self): |
paul@303 | 161 | |
paul@403 | 162 | "Return the number of bytes in this string." |
paul@303 | 163 | |
paul@356 | 164 | return str_len(self.__data__) |
paul@140 | 165 | |
paul@403 | 166 | __len__ = bytelength |
paul@403 | 167 | |
paul@188 | 168 | def __str__(self): |
paul@289 | 169 | |
paul@222 | 170 | "Return a string representation." |
paul@289 | 171 | |
paul@188 | 172 | return self |
paul@6 | 173 | |
paul@222 | 174 | def __repr__(self): |
paul@289 | 175 | |
paul@227 | 176 | "Return a program representation." |
paul@289 | 177 | |
paul@222 | 178 | # NOTE: To be implemented with proper quoting. |
paul@222 | 179 | b = buffer(['"', self, '"']) |
paul@222 | 180 | return str(b) |
paul@222 | 181 | |
paul@6 | 182 | def __bool__(self): |
paul@435 | 183 | |
paul@435 | 184 | "Return whether the string provides any data." |
paul@435 | 185 | |
paul@356 | 186 | return str_nonempty(self.__data__) |
paul@6 | 187 | |
paul@435 | 188 | def __contains__(self, value): |
paul@435 | 189 | |
paul@435 | 190 | "Return whether this string contains 'value'." |
paul@435 | 191 | |
paul@435 | 192 | return self.find(value) != -1 |
paul@435 | 193 | |
paul@413 | 194 | def endswith(self, s): |
paul@413 | 195 | |
paul@413 | 196 | "Return whether this string ends with 's'." |
paul@413 | 197 | |
paul@413 | 198 | return self[-s.__len__():] == s |
paul@413 | 199 | |
paul@413 | 200 | def find(self, sub, start=None, end=None): |
paul@413 | 201 | |
paul@413 | 202 | """ |
paul@515 | 203 | Find 'sub' in the string if it occurs from or after the 'start' position |
paul@515 | 204 | (or 0, if omitted) and before the 'end' position (or the end of the |
paul@515 | 205 | string, if omitted), returning the earliest occurrence or -1 if 'sub' is |
paul@515 | 206 | not present. |
paul@413 | 207 | """ |
paul@413 | 208 | |
paul@413 | 209 | sublen = sub.__len__() |
paul@413 | 210 | |
paul@413 | 211 | i = start or 0 |
paul@515 | 212 | |
paul@515 | 213 | if end is None: |
paul@515 | 214 | end = self.__len__() |
paul@413 | 215 | |
paul@413 | 216 | while i < end - sublen: |
paul@413 | 217 | if sub == self[i:i+sublen]: |
paul@413 | 218 | return i |
paul@413 | 219 | i += 1 |
paul@413 | 220 | |
paul@413 | 221 | return -1 |
paul@413 | 222 | |
paul@413 | 223 | def index(self, sub, start=None, end=None): |
paul@413 | 224 | |
paul@413 | 225 | """ |
paul@413 | 226 | Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending |
paul@413 | 227 | at 'end' (or the end of the string, if omitted), raising ValueError if |
paul@413 | 228 | 'sub' is not present. |
paul@413 | 229 | """ |
paul@413 | 230 | |
paul@413 | 231 | i = self.find(sub, start, end) |
paul@413 | 232 | |
paul@413 | 233 | if i == -1: |
paul@413 | 234 | raise ValueError(sub) |
paul@413 | 235 | else: |
paul@413 | 236 | return i |
paul@342 | 237 | |
paul@342 | 238 | def join(self, l): |
paul@342 | 239 | |
paul@342 | 240 | "Join the elements in 'l' with this string." |
paul@342 | 241 | |
paul@342 | 242 | # Empty strings just cause the list elements to be concatenated. |
paul@342 | 243 | |
paul@342 | 244 | if not self.__bool__(): |
paul@342 | 245 | return str(buffer(l)) |
paul@342 | 246 | |
paul@342 | 247 | # Non-empty strings join the elements together in a buffer. |
paul@342 | 248 | |
paul@342 | 249 | b = buffer() |
paul@342 | 250 | first = True |
paul@342 | 251 | |
paul@342 | 252 | for s in l: |
paul@342 | 253 | if first: |
paul@342 | 254 | first = False |
paul@342 | 255 | else: |
paul@342 | 256 | b.append(self) |
paul@342 | 257 | b.append(s) |
paul@342 | 258 | |
paul@342 | 259 | return str(b) |
paul@342 | 260 | |
paul@6 | 261 | def lower(self): pass |
paul@6 | 262 | def lstrip(self, chars=None): pass |
paul@6 | 263 | def replace(self, old, new, count=None): pass |
paul@515 | 264 | def rfind(self, sub, start=None, end=None): |
paul@515 | 265 | |
paul@515 | 266 | """ |
paul@515 | 267 | Find 'sub' in the string if it occurs from or after the 'start' position |
paul@515 | 268 | (or 0, if omitted) and before the 'end' position (or the end of the |
paul@515 | 269 | string, if omitted), returning the latest occurrence or -1 if 'sub' is |
paul@515 | 270 | not present. |
paul@515 | 271 | """ |
paul@515 | 272 | |
paul@515 | 273 | sublen = sub.__len__() |
paul@515 | 274 | |
paul@515 | 275 | start = start or 0 |
paul@515 | 276 | |
paul@515 | 277 | if end is None: |
paul@515 | 278 | end = self.__len__() |
paul@515 | 279 | |
paul@515 | 280 | i = end - sublen |
paul@515 | 281 | |
paul@515 | 282 | while i >= start: |
paul@515 | 283 | if sub == self[i:i+sublen]: |
paul@515 | 284 | return i |
paul@515 | 285 | i -= 1 |
paul@515 | 286 | |
paul@515 | 287 | return -1 |
paul@515 | 288 | |
paul@6 | 289 | def rsplit(self, sep=None, maxsplit=None): pass |
paul@6 | 290 | def rstrip(self, chars=None): pass |
paul@515 | 291 | |
paul@515 | 292 | def split(self, sep=None, maxsplit=None): |
paul@515 | 293 | |
paul@515 | 294 | """ |
paul@515 | 295 | Split the string using the given 'sep' as separator (or any whitespace |
paul@515 | 296 | character if omitted or specified as None), splitting at most 'maxsplit' |
paul@515 | 297 | times (or as many times as is possible if omitted or specified as None). |
paul@515 | 298 | """ |
paul@515 | 299 | |
paul@515 | 300 | if sep is not None and not sep: |
paul@515 | 301 | raise ValueError, sep |
paul@515 | 302 | |
paul@515 | 303 | end = self.__len__() |
paul@515 | 304 | seplen = sep and len(sep) |
paul@515 | 305 | splits = 0 |
paul@515 | 306 | |
paul@515 | 307 | l = [] |
paul@515 | 308 | i = last = 0 |
paul@515 | 309 | |
paul@515 | 310 | while i < end and (maxsplit is None or splits < maxsplit): |
paul@515 | 311 | |
paul@515 | 312 | # Find any specified separator. |
paul@515 | 313 | |
paul@515 | 314 | if sep and self[i:i+seplen] == sep: |
paul@515 | 315 | l.append(self[last:i]) |
paul@515 | 316 | i += seplen |
paul@515 | 317 | last = i |
paul@515 | 318 | splits += 1 |
paul@515 | 319 | |
paul@515 | 320 | # Find any whitespace character and skip adjacent characters. |
paul@515 | 321 | |
paul@515 | 322 | elif not sep and self[i] in WHITESPACE: |
paul@515 | 323 | l.append(self[last:i]) |
paul@515 | 324 | while i < end: |
paul@515 | 325 | i += 1 |
paul@515 | 326 | if self[i] not in WHITESPACE: |
paul@515 | 327 | break |
paul@515 | 328 | else: |
paul@515 | 329 | break |
paul@515 | 330 | last = i |
paul@515 | 331 | splits += 1 |
paul@515 | 332 | |
paul@515 | 333 | # Check the next character. |
paul@515 | 334 | |
paul@515 | 335 | else: |
paul@515 | 336 | i += 1 |
paul@515 | 337 | |
paul@515 | 338 | l.append(self[last:]) |
paul@515 | 339 | return l |
paul@515 | 340 | |
paul@6 | 341 | def splitlines(self, keepends=False): pass |
paul@413 | 342 | |
paul@413 | 343 | def startswith(self, s): |
paul@413 | 344 | |
paul@413 | 345 | "Return whether this string starts with 's'." |
paul@413 | 346 | |
paul@413 | 347 | return self[:s.__len__()] == s |
paul@413 | 348 | |
paul@6 | 349 | def strip(self, chars=None): pass |
paul@6 | 350 | def upper(self): pass |
paul@6 | 351 | |
paul@431 | 352 | class string(basestring): |
paul@431 | 353 | |
paul@431 | 354 | "A plain string of bytes." |
paul@431 | 355 | |
paul@292 | 356 | # Special implementation methods. |
paul@292 | 357 | |
paul@292 | 358 | def __get_single_item__(self, index): |
paul@292 | 359 | |
paul@292 | 360 | "Return the item at the normalised (positive) 'index'." |
paul@292 | 361 | |
paul@292 | 362 | self._check_index(index) |
paul@384 | 363 | return str_substr(self.__data__, index, index + 1, 1) |
paul@384 | 364 | |
paul@384 | 365 | def __get_multiple_items__(self, start, end, step): |
paul@384 | 366 | |
paul@384 | 367 | """ |
paul@384 | 368 | Return items from 'start' until (but excluding) 'end', at 'step' |
paul@384 | 369 | intervals. |
paul@384 | 370 | """ |
paul@384 | 371 | |
paul@384 | 372 | self._check_index(start) |
paul@384 | 373 | self._check_end_index(end) |
paul@384 | 374 | check_int(step) |
paul@384 | 375 | |
paul@384 | 376 | if step == 0: |
paul@384 | 377 | raise ValueError(step) |
paul@384 | 378 | |
paul@384 | 379 | if start == end: |
paul@384 | 380 | return "" |
paul@384 | 381 | |
paul@384 | 382 | return str_substr(self.__data__, start, end, step) |
paul@292 | 383 | |
paul@188 | 384 | def str(obj): |
paul@188 | 385 | |
paul@188 | 386 | "Return the string representation of 'obj'." |
paul@188 | 387 | |
paul@248 | 388 | # Class attributes of instances provide __str__. |
paul@248 | 389 | |
paul@274 | 390 | return obj.__str__() |
paul@188 | 391 | |
paul@6 | 392 | # vim: tabstop=4 expandtab shiftwidth=4 |