paul@6 | 1 | #!/usr/bin/env python |
paul@6 | 2 | |
paul@6 | 3 | """ |
paul@6 | 4 | String objects. |
paul@6 | 5 | |
paul@514 | 6 | Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> |
paul@6 | 7 | |
paul@6 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@6 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@6 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@6 | 11 | version. |
paul@6 | 12 | |
paul@6 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@6 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@6 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@6 | 16 | details. |
paul@6 | 17 | |
paul@6 | 18 | You should have received a copy of the GNU General Public License along with |
paul@6 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@6 | 20 | """ |
paul@6 | 21 | |
paul@303 | 22 | from __builtins__.operator import _negate |
paul@459 | 23 | from __builtins__.sequence import hashable, itemaccess |
paul@384 | 24 | from __builtins__.types import check_int |
paul@356 | 25 | from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ |
paul@356 | 26 | str_substr |
paul@6 | 27 | |
paul@515 | 28 | WHITESPACE = (" ", "\f", "\n", "\r", "\t") |
paul@515 | 29 | |
paul@459 | 30 | class basestring(hashable): |
paul@289 | 31 | |
paul@289 | 32 | "The base class for all strings." |
paul@289 | 33 | |
paul@390 | 34 | def __init__(self, other=None): |
paul@6 | 35 | |
paul@390 | 36 | "Initialise the string, perhaps from 'other'." |
paul@6 | 37 | |
paul@289 | 38 | # Note the __data__ member. Since strings are either initialised from |
paul@289 | 39 | # literals or converted using routines defined for other types, no form |
paul@289 | 40 | # of actual initialisation is performed here. |
paul@6 | 41 | |
paul@390 | 42 | # NOTE: Cannot perform "other and other.__data__ or None" since the |
paul@390 | 43 | # NOTE: __data__ attribute is not a normal attribute. |
paul@390 | 44 | |
paul@390 | 45 | if other: |
paul@390 | 46 | self.__data__ = other.__data__ |
paul@390 | 47 | else: |
paul@390 | 48 | self.__data__ = None |
paul@6 | 49 | |
paul@360 | 50 | # Note the __key__ member. This is also initialised statically. Where |
paul@360 | 51 | # a string is the same as an attribute name, the __key__ member contains |
paul@360 | 52 | # attribute position and code details. |
paul@360 | 53 | |
paul@390 | 54 | if other: |
paul@390 | 55 | self.__key__ = other.__key__ |
paul@390 | 56 | else: |
paul@390 | 57 | self.__key__ = None |
paul@360 | 58 | |
paul@300 | 59 | def __hash__(self): |
paul@300 | 60 | |
paul@300 | 61 | "Return a value for hashing purposes." |
paul@300 | 62 | |
paul@459 | 63 | return self._hashvalue(ord) |
paul@300 | 64 | |
paul@303 | 65 | def _binary_op(self, op, other): |
paul@303 | 66 | |
paul@396 | 67 | "Perform 'op' on this object and 'other' if appropriate." |
paul@396 | 68 | |
paul@396 | 69 | # Refuse to operate on specialisations of this class. |
paul@396 | 70 | |
paul@396 | 71 | if self.__class__ is not other.__class__: |
paul@396 | 72 | return NotImplemented |
paul@396 | 73 | |
paul@396 | 74 | # Otherwise, perform the operation on the operands' data. |
paul@396 | 75 | |
paul@396 | 76 | else: |
paul@396 | 77 | return op(self.__data__, other.__data__) |
paul@303 | 78 | |
paul@396 | 79 | def _binary_op_rev(self, op, other): |
paul@396 | 80 | |
paul@396 | 81 | "Perform 'op' on 'other' and this object if appropriate." |
paul@396 | 82 | |
paul@396 | 83 | # Refuse to operate on specialisations of this class. |
paul@396 | 84 | |
paul@396 | 85 | if self.__class__ is not other.__class__: |
paul@396 | 86 | return NotImplemented |
paul@396 | 87 | |
paul@396 | 88 | # Otherwise, perform the operation on the operands' data. |
paul@396 | 89 | |
paul@303 | 90 | else: |
paul@396 | 91 | return op(other.__data__, self.__data__) |
paul@303 | 92 | |
paul@6 | 93 | def __iadd__(self, other): |
paul@289 | 94 | |
paul@303 | 95 | "Return a string combining this string with 'other'." |
paul@289 | 96 | |
paul@356 | 97 | return self._binary_op(str_add, other) |
paul@6 | 98 | |
paul@396 | 99 | __add__ = __iadd__ |
paul@396 | 100 | |
paul@396 | 101 | def __radd__(self, other): |
paul@396 | 102 | |
paul@396 | 103 | "Return a string combining this string with 'other'." |
paul@396 | 104 | |
paul@396 | 105 | return self._binary_op_rev(str_add, other) |
paul@6 | 106 | |
paul@514 | 107 | def __mul__(self, other): |
paul@514 | 108 | |
paul@514 | 109 | "Multiply the string by 'other'." |
paul@514 | 110 | |
paul@514 | 111 | b = buffer() |
paul@514 | 112 | |
paul@514 | 113 | while other > 0: |
paul@514 | 114 | b.append(self) |
paul@514 | 115 | other -= 1 |
paul@514 | 116 | |
paul@514 | 117 | return str(b) |
paul@514 | 118 | |
paul@514 | 119 | __rmul__ = __mul__ |
paul@514 | 120 | |
paul@6 | 121 | def __mod__(self, other): pass |
paul@6 | 122 | def __rmod__(self, other): pass |
paul@6 | 123 | |
paul@6 | 124 | def __lt__(self, other): |
paul@289 | 125 | |
paul@303 | 126 | "Return whether this string is less than 'other'." |
paul@289 | 127 | |
paul@356 | 128 | return self._binary_op(str_lt, other) |
paul@6 | 129 | |
paul@6 | 130 | def __gt__(self, other): |
paul@289 | 131 | |
paul@303 | 132 | "Return whether this string is greater than 'other'." |
paul@289 | 133 | |
paul@356 | 134 | return self._binary_op(str_gt, other) |
paul@6 | 135 | |
paul@6 | 136 | def __le__(self, other): |
paul@289 | 137 | |
paul@303 | 138 | "Return whether this string is less than or equal to 'other'." |
paul@289 | 139 | |
paul@6 | 140 | return _negate(self.__gt__(other)) |
paul@6 | 141 | |
paul@6 | 142 | def __ge__(self, other): |
paul@289 | 143 | |
paul@303 | 144 | "Return whether this string is greater than or equal to 'other'." |
paul@289 | 145 | |
paul@6 | 146 | return _negate(self.__lt__(other)) |
paul@6 | 147 | |
paul@6 | 148 | def __eq__(self, other): |
paul@289 | 149 | |
paul@303 | 150 | "Return whether this string is equal to 'other'." |
paul@289 | 151 | |
paul@356 | 152 | return self._binary_op(str_eq, other) |
paul@6 | 153 | |
paul@6 | 154 | def __ne__(self, other): |
paul@289 | 155 | |
paul@303 | 156 | "Return whether this string is not equal to 'other'." |
paul@289 | 157 | |
paul@6 | 158 | return _negate(self.__eq__(other)) |
paul@6 | 159 | |
paul@403 | 160 | def bytelength(self): |
paul@303 | 161 | |
paul@403 | 162 | "Return the number of bytes in this string." |
paul@303 | 163 | |
paul@356 | 164 | return str_len(self.__data__) |
paul@140 | 165 | |
paul@403 | 166 | __len__ = bytelength |
paul@403 | 167 | |
paul@188 | 168 | def __str__(self): |
paul@289 | 169 | |
paul@222 | 170 | "Return a string representation." |
paul@289 | 171 | |
paul@188 | 172 | return self |
paul@6 | 173 | |
paul@222 | 174 | def __repr__(self): |
paul@289 | 175 | |
paul@227 | 176 | "Return a program representation." |
paul@289 | 177 | |
paul@222 | 178 | # NOTE: To be implemented with proper quoting. |
paul@222 | 179 | b = buffer(['"', self, '"']) |
paul@222 | 180 | return str(b) |
paul@222 | 181 | |
paul@6 | 182 | def __bool__(self): |
paul@435 | 183 | |
paul@435 | 184 | "Return whether the string provides any data." |
paul@435 | 185 | |
paul@356 | 186 | return str_nonempty(self.__data__) |
paul@6 | 187 | |
paul@435 | 188 | def __contains__(self, value): |
paul@435 | 189 | |
paul@435 | 190 | "Return whether this string contains 'value'." |
paul@435 | 191 | |
paul@435 | 192 | return self.find(value) != -1 |
paul@435 | 193 | |
paul@413 | 194 | def endswith(self, s): |
paul@413 | 195 | |
paul@413 | 196 | "Return whether this string ends with 's'." |
paul@413 | 197 | |
paul@413 | 198 | return self[-s.__len__():] == s |
paul@413 | 199 | |
paul@413 | 200 | def find(self, sub, start=None, end=None): |
paul@413 | 201 | |
paul@413 | 202 | """ |
paul@515 | 203 | Find 'sub' in the string if it occurs from or after the 'start' position |
paul@515 | 204 | (or 0, if omitted) and before the 'end' position (or the end of the |
paul@515 | 205 | string, if omitted), returning the earliest occurrence or -1 if 'sub' is |
paul@515 | 206 | not present. |
paul@413 | 207 | """ |
paul@413 | 208 | |
paul@413 | 209 | sublen = sub.__len__() |
paul@413 | 210 | |
paul@515 | 211 | if end is None: |
paul@515 | 212 | end = self.__len__() |
paul@413 | 213 | |
paul@517 | 214 | end -= sublen |
paul@517 | 215 | |
paul@517 | 216 | i = start or 0 |
paul@517 | 217 | |
paul@517 | 218 | while i <= end: |
paul@413 | 219 | if sub == self[i:i+sublen]: |
paul@413 | 220 | return i |
paul@413 | 221 | i += 1 |
paul@413 | 222 | |
paul@413 | 223 | return -1 |
paul@413 | 224 | |
paul@413 | 225 | def index(self, sub, start=None, end=None): |
paul@413 | 226 | |
paul@413 | 227 | """ |
paul@413 | 228 | Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending |
paul@413 | 229 | at 'end' (or the end of the string, if omitted), raising ValueError if |
paul@413 | 230 | 'sub' is not present. |
paul@413 | 231 | """ |
paul@413 | 232 | |
paul@413 | 233 | i = self.find(sub, start, end) |
paul@413 | 234 | |
paul@413 | 235 | if i == -1: |
paul@413 | 236 | raise ValueError(sub) |
paul@413 | 237 | else: |
paul@413 | 238 | return i |
paul@342 | 239 | |
paul@342 | 240 | def join(self, l): |
paul@342 | 241 | |
paul@342 | 242 | "Join the elements in 'l' with this string." |
paul@342 | 243 | |
paul@342 | 244 | # Empty strings just cause the list elements to be concatenated. |
paul@342 | 245 | |
paul@342 | 246 | if not self.__bool__(): |
paul@342 | 247 | return str(buffer(l)) |
paul@342 | 248 | |
paul@342 | 249 | # Non-empty strings join the elements together in a buffer. |
paul@342 | 250 | |
paul@342 | 251 | b = buffer() |
paul@342 | 252 | first = True |
paul@342 | 253 | |
paul@342 | 254 | for s in l: |
paul@342 | 255 | if first: |
paul@342 | 256 | first = False |
paul@342 | 257 | else: |
paul@342 | 258 | b.append(self) |
paul@342 | 259 | b.append(s) |
paul@342 | 260 | |
paul@342 | 261 | return str(b) |
paul@342 | 262 | |
paul@6 | 263 | def lower(self): pass |
paul@517 | 264 | |
paul@517 | 265 | def lstrip(self, chars=None): |
paul@517 | 266 | |
paul@517 | 267 | """ |
paul@517 | 268 | Strip any of the given 'chars' from the start of the string, or strip |
paul@517 | 269 | whitespace characters is 'chars' is omitted or None. |
paul@517 | 270 | """ |
paul@517 | 271 | |
paul@517 | 272 | if chars is not None and not chars: |
paul@517 | 273 | return self |
paul@517 | 274 | |
paul@517 | 275 | i = 0 |
paul@517 | 276 | end = self.__len__() |
paul@517 | 277 | |
paul@517 | 278 | while i < end and self[i] in (chars or WHITESPACE): |
paul@517 | 279 | i += 1 |
paul@517 | 280 | |
paul@517 | 281 | return self[i:] |
paul@517 | 282 | |
paul@6 | 283 | def replace(self, old, new, count=None): pass |
paul@517 | 284 | |
paul@515 | 285 | def rfind(self, sub, start=None, end=None): |
paul@515 | 286 | |
paul@515 | 287 | """ |
paul@515 | 288 | Find 'sub' in the string if it occurs from or after the 'start' position |
paul@515 | 289 | (or 0, if omitted) and before the 'end' position (or the end of the |
paul@515 | 290 | string, if omitted), returning the latest occurrence or -1 if 'sub' is |
paul@515 | 291 | not present. |
paul@515 | 292 | """ |
paul@515 | 293 | |
paul@515 | 294 | sublen = sub.__len__() |
paul@515 | 295 | |
paul@515 | 296 | start = start or 0 |
paul@515 | 297 | |
paul@515 | 298 | if end is None: |
paul@515 | 299 | end = self.__len__() |
paul@515 | 300 | |
paul@515 | 301 | i = end - sublen |
paul@515 | 302 | |
paul@515 | 303 | while i >= start: |
paul@515 | 304 | if sub == self[i:i+sublen]: |
paul@515 | 305 | return i |
paul@515 | 306 | i -= 1 |
paul@515 | 307 | |
paul@515 | 308 | return -1 |
paul@515 | 309 | |
paul@517 | 310 | def rsplit(self, sep=None, maxsplit=None): |
paul@517 | 311 | |
paul@517 | 312 | """ |
paul@517 | 313 | Split the string using the given 'sep' as separator (or any whitespace |
paul@517 | 314 | character if omitted or specified as None), splitting at most 'maxsplit' |
paul@517 | 315 | times (or as many times as is possible if omitted or specified as None). |
paul@517 | 316 | Where 'maxsplit' is given, the number of split points is counted from |
paul@517 | 317 | the end of the string. |
paul@517 | 318 | """ |
paul@517 | 319 | |
paul@517 | 320 | if not maxsplit: |
paul@517 | 321 | return self.split(sep, maxsplit) |
paul@517 | 322 | |
paul@517 | 323 | if sep is not None and not sep: |
paul@517 | 324 | raise ValueError, sep |
paul@517 | 325 | |
paul@517 | 326 | seplen = sep and len(sep) or 1 |
paul@517 | 327 | start = seplen |
paul@517 | 328 | splits = 0 |
paul@517 | 329 | |
paul@517 | 330 | l = [] |
paul@517 | 331 | i = last = self.__len__() |
paul@517 | 332 | |
paul@517 | 333 | while i >= start and (maxsplit is None or splits < maxsplit): |
paul@517 | 334 | |
paul@517 | 335 | # Find any specified separator. |
paul@517 | 336 | |
paul@517 | 337 | if sep and self[i-seplen:i] == sep: |
paul@517 | 338 | l.insert(0, self[i:last]) |
paul@517 | 339 | i -= seplen |
paul@517 | 340 | last = i |
paul@517 | 341 | splits += 1 |
paul@517 | 342 | |
paul@517 | 343 | # Find any whitespace character and skip adjacent characters. |
paul@517 | 344 | |
paul@517 | 345 | elif not sep and self[i-1] in WHITESPACE: |
paul@517 | 346 | l.insert(0, self[i:last]) |
paul@517 | 347 | while i > start: |
paul@517 | 348 | i -= 1 |
paul@517 | 349 | if self[i-1] not in WHITESPACE: |
paul@517 | 350 | break |
paul@517 | 351 | else: |
paul@517 | 352 | break |
paul@517 | 353 | last = i |
paul@517 | 354 | splits += 1 |
paul@517 | 355 | |
paul@517 | 356 | # Check the next character. |
paul@517 | 357 | |
paul@517 | 358 | else: |
paul@517 | 359 | i -= 1 |
paul@517 | 360 | |
paul@517 | 361 | l.insert(0, self[:last]) |
paul@517 | 362 | return l |
paul@517 | 363 | |
paul@517 | 364 | def rstrip(self, chars=None): |
paul@517 | 365 | |
paul@517 | 366 | """ |
paul@517 | 367 | Strip any of the given 'chars' from the end of the string, or strip |
paul@517 | 368 | whitespace characters is 'chars' is omitted or None. |
paul@517 | 369 | """ |
paul@517 | 370 | |
paul@517 | 371 | if chars is not None and not chars: |
paul@517 | 372 | return self |
paul@517 | 373 | |
paul@517 | 374 | i = self.__len__() - 1 |
paul@517 | 375 | |
paul@517 | 376 | while i >= 0 and self[i] in (chars or WHITESPACE): |
paul@517 | 377 | i -= 1 |
paul@517 | 378 | |
paul@517 | 379 | return self[:i+1] |
paul@515 | 380 | |
paul@515 | 381 | def split(self, sep=None, maxsplit=None): |
paul@515 | 382 | |
paul@515 | 383 | """ |
paul@515 | 384 | Split the string using the given 'sep' as separator (or any whitespace |
paul@515 | 385 | character if omitted or specified as None), splitting at most 'maxsplit' |
paul@515 | 386 | times (or as many times as is possible if omitted or specified as None). |
paul@517 | 387 | Where 'maxsplit' is given, the number of split points is counted from |
paul@517 | 388 | the start of the string. |
paul@515 | 389 | """ |
paul@515 | 390 | |
paul@515 | 391 | if sep is not None and not sep: |
paul@515 | 392 | raise ValueError, sep |
paul@515 | 393 | |
paul@517 | 394 | if maxsplit is not None and not maxsplit: |
paul@517 | 395 | return [self] |
paul@517 | 396 | |
paul@517 | 397 | seplen = sep and len(sep) or 1 |
paul@517 | 398 | end = self.__len__() - seplen |
paul@515 | 399 | splits = 0 |
paul@515 | 400 | |
paul@515 | 401 | l = [] |
paul@515 | 402 | i = last = 0 |
paul@515 | 403 | |
paul@517 | 404 | while i <= end and (maxsplit is None or splits < maxsplit): |
paul@515 | 405 | |
paul@515 | 406 | # Find any specified separator. |
paul@515 | 407 | |
paul@515 | 408 | if sep and self[i:i+seplen] == sep: |
paul@515 | 409 | l.append(self[last:i]) |
paul@515 | 410 | i += seplen |
paul@515 | 411 | last = i |
paul@515 | 412 | splits += 1 |
paul@515 | 413 | |
paul@515 | 414 | # Find any whitespace character and skip adjacent characters. |
paul@515 | 415 | |
paul@515 | 416 | elif not sep and self[i] in WHITESPACE: |
paul@515 | 417 | l.append(self[last:i]) |
paul@515 | 418 | while i < end: |
paul@515 | 419 | i += 1 |
paul@515 | 420 | if self[i] not in WHITESPACE: |
paul@515 | 421 | break |
paul@515 | 422 | else: |
paul@515 | 423 | break |
paul@515 | 424 | last = i |
paul@515 | 425 | splits += 1 |
paul@515 | 426 | |
paul@515 | 427 | # Check the next character. |
paul@515 | 428 | |
paul@515 | 429 | else: |
paul@515 | 430 | i += 1 |
paul@515 | 431 | |
paul@515 | 432 | l.append(self[last:]) |
paul@515 | 433 | return l |
paul@515 | 434 | |
paul@6 | 435 | def splitlines(self, keepends=False): pass |
paul@413 | 436 | |
paul@413 | 437 | def startswith(self, s): |
paul@413 | 438 | |
paul@413 | 439 | "Return whether this string starts with 's'." |
paul@413 | 440 | |
paul@413 | 441 | return self[:s.__len__()] == s |
paul@413 | 442 | |
paul@517 | 443 | def strip(self, chars=None): |
paul@517 | 444 | |
paul@517 | 445 | """ |
paul@517 | 446 | Strip any of the given 'chars' from the start and end of the string, or |
paul@517 | 447 | strip whitespace characters is 'chars' is omitted or None. |
paul@517 | 448 | """ |
paul@517 | 449 | |
paul@517 | 450 | return self.lstrip(chars).rstrip(chars) |
paul@517 | 451 | |
paul@6 | 452 | def upper(self): pass |
paul@6 | 453 | |
paul@431 | 454 | class string(basestring): |
paul@431 | 455 | |
paul@431 | 456 | "A plain string of bytes." |
paul@431 | 457 | |
paul@292 | 458 | # Special implementation methods. |
paul@292 | 459 | |
paul@292 | 460 | def __get_single_item__(self, index): |
paul@292 | 461 | |
paul@292 | 462 | "Return the item at the normalised (positive) 'index'." |
paul@292 | 463 | |
paul@292 | 464 | self._check_index(index) |
paul@384 | 465 | return str_substr(self.__data__, index, index + 1, 1) |
paul@384 | 466 | |
paul@384 | 467 | def __get_multiple_items__(self, start, end, step): |
paul@384 | 468 | |
paul@384 | 469 | """ |
paul@384 | 470 | Return items from 'start' until (but excluding) 'end', at 'step' |
paul@384 | 471 | intervals. |
paul@384 | 472 | """ |
paul@384 | 473 | |
paul@384 | 474 | self._check_index(start) |
paul@384 | 475 | self._check_end_index(end) |
paul@384 | 476 | check_int(step) |
paul@384 | 477 | |
paul@384 | 478 | if step == 0: |
paul@384 | 479 | raise ValueError(step) |
paul@384 | 480 | |
paul@384 | 481 | if start == end: |
paul@384 | 482 | return "" |
paul@384 | 483 | |
paul@384 | 484 | return str_substr(self.__data__, start, end, step) |
paul@292 | 485 | |
paul@188 | 486 | def str(obj): |
paul@188 | 487 | |
paul@188 | 488 | "Return the string representation of 'obj'." |
paul@188 | 489 | |
paul@248 | 490 | # Class attributes of instances provide __str__. |
paul@248 | 491 | |
paul@274 | 492 | return obj.__str__() |
paul@188 | 493 | |
paul@6 | 494 | # vim: tabstop=4 expandtab shiftwidth=4 |