1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017, 2021 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import isinstance as _isinstance, \ 26 str_add, str_lt, str_gt, str_eq, str_ord, \ 27 str_size, str_substr 28 29 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 30 31 class basestring(hashable): 32 33 "The base class for all strings." 34 35 def __init__(self, other=None): 36 37 "Initialise the string, perhaps from 'other'." 38 39 # Note the __data__ member. Since strings are either initialised from 40 # literals or converted using routines defined for other types, no form 41 # of actual initialisation is performed here. 42 43 # Note the __key__ member. This is also initialised statically. Where 44 # a string is the same as an attribute name, the __key__ member contains 45 # attribute position and code details. 46 47 # NOTE: Cannot perform "other and other.__data__ or None" since the 48 # NOTE: __data__ attribute is not a normal attribute. 49 50 if other: 51 self.__data__ = other.__data__ 52 self.__key__ = other.__key__ 53 self.__size__ = other.__size__ 54 else: 55 self.__data__ = None 56 self.__key__ = None 57 self.__size__ = None 58 59 # Internal methods. 60 61 def _binary_op(self, op, other, sizes=False): 62 63 "Perform 'op' on this object and 'other' if appropriate." 64 65 # Refuse to operate on specialisations of this class. 66 67 if self.__class__ is not other.__class__: 68 return NotImplemented 69 70 # Otherwise, perform the operation on the operands' data. 71 72 elif sizes: 73 return op(self.__data__, other.__data__, self.__size__, other.__size__) 74 else: 75 return op(self.__data__, other.__data__) 76 77 def _binary_op_rev(self, op, other, sizes=False): 78 79 "Perform 'op' on 'other' and this object if appropriate." 80 81 # Refuse to operate on specialisations of this class. 82 83 if self.__class__ is not other.__class__: 84 return NotImplemented 85 86 # Otherwise, perform the operation on the operands' data. 87 88 elif sizes: 89 return op(other.__data__, self.__data__, other.__size__, self.__size__) 90 else: 91 return op(other.__data__, self.__data__) 92 93 def _quote(self, quote): 94 95 "Return a quoted representation of this string." 96 97 b = buffer([quote]) 98 i = last = 0 99 end = self.__len__() 100 101 while i < end: 102 c = self[i] 103 104 # Handle quotes before anything else. 105 106 if c == quote: 107 b.append("\\") 108 b.append(quote) 109 i += 1 110 last = i 111 continue 112 113 # Extended unquoted text. 114 115 n = ord(c) 116 117 if 32 <= n < 128: 118 i += 1 119 continue 120 121 # Before quoting, emit unquoted text. 122 123 b.append(self[last:i]) 124 125 # Add quoted value. 126 127 if c == "\t": 128 b.append("\\t") 129 elif c == "\n": 130 b.append("\\n") 131 elif c == "\r": 132 b.append("\\r") 133 else: 134 self._quote_value(b, n) 135 136 i += 1 137 last = i 138 139 # Emit remaining unquoted text. 140 141 b.append(self[last:]) 142 b.append(quote) 143 return str(b) 144 145 def _quote_value(self, b, n): 146 147 "Append to 'b' the quoted form of 'n'." 148 149 if n < 0: 150 n += 256 151 b.append("\\x") 152 x = hex(n, "") 153 if len(x) < 2: 154 b.append("0") 155 b.append(x) 156 157 def bytelength(self): 158 159 "Return the number of bytes in this string." 160 161 return str_size(self.__size__) 162 163 # General type methods. 164 165 def __bool__(self): 166 167 "Return whether the string provides any data." 168 169 return str_size(self.__size__).__bool__() 170 171 def __contains__(self, value): 172 173 "Return whether this string contains 'value'." 174 175 return self.find(value) != -1 176 177 def __hash__(self): 178 179 "Return a value for hashing purposes." 180 181 return self._hashvalue(ord) 182 183 __len__ = bytelength 184 185 def __repr__(self): 186 187 "Return a program representation." 188 189 return self._quote('"') 190 191 def __str__(self): 192 193 "Return a string representation." 194 195 return self 196 197 # Operator methods. 198 199 def __iadd__(self, other): 200 201 "Return a string combining this string with 'other'." 202 203 return self._binary_op(str_add, other, True) 204 205 __add__ = __iadd__ 206 207 def __radd__(self, other): 208 209 "Return a string combining this string with 'other'." 210 211 return self._binary_op_rev(str_add, other, True) 212 213 def __mod__(self, other): 214 215 "Format 'other' using this string." 216 217 if not _isinstance(other, tuple): 218 other = [other] 219 220 i = 0 221 first = True 222 b = buffer() 223 224 for s in self.split("%"): 225 if first: 226 b.append(s) 227 first = False 228 continue 229 230 # Handle format codes. 231 # NOTE: To be completed. 232 233 if s.startswith("%"): 234 b.append(s) 235 236 elif s.startswith("s"): 237 b.append(str(other[i])) 238 b.append(s[1:]) 239 i += 1 240 241 elif s.startswith("r"): 242 b.append(repr(other[i])) 243 b.append(s[1:]) 244 i += 1 245 246 # Unrecognised code: probably just a stray %. 247 248 else: 249 b.append("%") 250 b.append(s) 251 252 return str(b) 253 254 def __rmod__(self, other): pass 255 256 def __mul__(self, other): 257 258 "Multiply the string by 'other'." 259 260 b = buffer() 261 262 while other > 0: 263 b.append(self) 264 other -= 1 265 266 return str(b) 267 268 __rmul__ = __mul__ 269 270 # Comparison methods. 271 272 def __eq__(self, other): 273 274 "Return whether this string is equal to 'other'." 275 276 return self._binary_op(str_eq, other) 277 278 def __ge__(self, other): 279 280 "Return whether this string is greater than or equal to 'other'." 281 282 return _negate(self.__lt__(other)) 283 284 def __gt__(self, other): 285 286 "Return whether this string is greater than 'other'." 287 288 return self._binary_op(str_gt, other) 289 290 def __le__(self, other): 291 292 "Return whether this string is less than or equal to 'other'." 293 294 return _negate(self.__gt__(other)) 295 296 def __lt__(self, other): 297 298 "Return whether this string is less than 'other'." 299 300 return self._binary_op(str_lt, other) 301 302 def __ne__(self, other): 303 304 "Return whether this string is not equal to 'other'." 305 306 return _negate(self.__eq__(other)) 307 308 # String-specific methods. 309 310 def __ord__(self): 311 312 "Return the value of the string, if only a single character." 313 314 if self.__len__() == 1: 315 return str_ord(self.__data__) 316 else: 317 raise ValueError, self 318 319 def endswith(self, s): 320 321 "Return whether this string ends with 's'." 322 323 return self[-s.__len__():] == s 324 325 def find(self, sub, start=None, end=None): 326 327 """ 328 Find 'sub' in the string if it occurs from or after the 'start' position 329 (or 0, if omitted) and before the 'end' position (or the end of the 330 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 331 not present. 332 """ 333 334 sublen = sub.__len__() 335 336 if end is None: 337 end = self.__len__() 338 339 end -= sublen 340 341 i = start or 0 342 343 while i <= end: 344 if sub == self[i:i+sublen]: 345 return i 346 i += 1 347 348 return -1 349 350 def index(self, sub, start=None, end=None): 351 352 """ 353 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 354 at 'end' (or the end of the string, if omitted), raising ValueError if 355 'sub' is not present. 356 """ 357 358 i = self.find(sub, start, end) 359 360 if i == -1: 361 raise ValueError(sub) 362 else: 363 return i 364 365 def join(self, l): 366 367 "Join the elements in 'l' with this string." 368 369 # Empty strings just cause the list elements to be concatenated. 370 371 if not self.__bool__(): 372 return str(buffer(l)) 373 374 # Non-empty strings join the elements together in a buffer. 375 376 b = buffer() 377 first = True 378 379 for s in l: 380 if first: 381 first = False 382 else: 383 b.append(self) 384 b.append(s) 385 386 return str(b) 387 388 def lower(self): pass 389 390 def lstrip(self, chars=None): 391 392 """ 393 Strip any of the given 'chars' from the start of the string, or strip 394 whitespace characters is 'chars' is omitted or None. 395 """ 396 397 if chars is not None and not chars: 398 return self 399 400 i = 0 401 end = self.__len__() 402 403 while i < end and self[i] in (chars or WHITESPACE): 404 i += 1 405 406 return self[i:] 407 408 def replace(self, old, new, count=None): pass 409 410 def rfind(self, sub, start=None, end=None): 411 412 """ 413 Find 'sub' in the string if it occurs from or after the 'start' position 414 (or 0, if omitted) and before the 'end' position (or the end of the 415 string, if omitted), returning the latest occurrence or -1 if 'sub' is 416 not present. 417 """ 418 419 sublen = sub.__len__() 420 421 start = start or 0 422 423 if end is None: 424 end = self.__len__() 425 426 i = end - sublen 427 428 while i >= start: 429 if sub == self[i:i+sublen]: 430 return i 431 i -= 1 432 433 return -1 434 435 def rsplit(self, sep=None, maxsplit=None): 436 437 """ 438 Split the string using the given 'sep' as separator (or any whitespace 439 character if omitted or specified as None), splitting at most 'maxsplit' 440 times (or as many times as is possible if omitted or specified as None). 441 Where 'maxsplit' is given, the number of split points is counted from 442 the end of the string. 443 """ 444 445 if not maxsplit: 446 return self.split(sep, maxsplit) 447 448 if sep is not None and not sep: 449 raise ValueError, sep 450 451 seplen = sep and len(sep) or 1 452 start = seplen 453 splits = 0 454 455 l = [] 456 i = last = self.__len__() 457 458 while i >= start and (maxsplit is None or splits < maxsplit): 459 460 # Find any specified separator. 461 462 if sep and self[i-seplen:i] == sep: 463 l.insert(0, self[i:last]) 464 i -= seplen 465 last = i 466 splits += 1 467 468 # Find any whitespace character and skip adjacent characters. 469 470 elif not sep and self[i-1] in WHITESPACE: 471 l.insert(0, self[i:last]) 472 while i > start: 473 i -= 1 474 if self[i-1] not in WHITESPACE: 475 break 476 else: 477 break 478 last = i 479 splits += 1 480 481 # Check the next character. 482 483 else: 484 i -= 1 485 486 l.insert(0, self[:last]) 487 return l 488 489 def rstrip(self, chars=None): 490 491 """ 492 Strip any of the given 'chars' from the end of the string, or strip 493 whitespace characters is 'chars' is omitted or None. 494 """ 495 496 if chars is not None and not chars: 497 return self 498 499 i = self.__len__() - 1 500 501 while i >= 0 and self[i] in (chars or WHITESPACE): 502 i -= 1 503 504 return self[:i+1] 505 506 def split(self, sep=None, maxsplit=None): 507 508 """ 509 Split the string using the given 'sep' as separator (or any whitespace 510 character if omitted or specified as None), splitting at most 'maxsplit' 511 times (or as many times as is possible if omitted or specified as None). 512 Where 'maxsplit' is given, the number of split points is counted from 513 the start of the string. 514 """ 515 516 if sep is not None and not sep: 517 raise ValueError, sep 518 519 if maxsplit is not None and not maxsplit: 520 return [self] 521 522 seplen = sep and len(sep) or 1 523 end = self.__len__() - seplen 524 splits = 0 525 526 l = [] 527 i = last = 0 528 529 while i <= end and (maxsplit is None or splits < maxsplit): 530 531 # Find any specified separator. 532 533 if sep and self[i:i+seplen] == sep: 534 l.append(self[last:i]) 535 i += seplen 536 last = i 537 splits += 1 538 539 # Find any whitespace character and skip adjacent characters. 540 541 elif not sep and self[i] in WHITESPACE: 542 l.append(self[last:i]) 543 while i < end: 544 i += 1 545 if self[i] not in WHITESPACE: 546 break 547 else: 548 break 549 last = i 550 splits += 1 551 552 # Check the next character. 553 554 else: 555 i += 1 556 557 l.append(self[last:]) 558 return l 559 560 def splitlines(self, keepends=False): pass 561 562 def startswith(self, s): 563 564 "Return whether this string starts with 's'." 565 566 return self[:s.__len__()] == s 567 568 def strip(self, chars=None): 569 570 """ 571 Strip any of the given 'chars' from the start and end of the string, or 572 strip whitespace characters is 'chars' is omitted or None. 573 """ 574 575 return self.lstrip(chars).rstrip(chars) 576 577 def upper(self): pass 578 579 class str(basestring): 580 581 "A plain string of bytes." 582 583 # Special implementation methods. 584 585 def __get_single_item__(self, index): 586 587 "Return the item at the normalised (positive) 'index'." 588 589 self._check_index(index) 590 return str_substr(self.__data__, index, index + 1, 1) 591 592 def __get_multiple_items__(self, start, end, step): 593 594 """ 595 Return items from 'start' until (but excluding) 'end', at 'step' 596 intervals. 597 """ 598 599 if start == end: 600 return "" 601 602 check_int(step) 603 604 if step == 0: 605 raise ValueError(step) 606 607 l = get_using(basestring.__get_multiple_items__, self)(start, end, step) 608 return "".join(l) 609 610 def new_str(obj): 611 612 "Return the string representation of 'obj'." 613 614 # Class attributes of instances provide __str__. 615 616 return obj.__str__() 617 618 # vim: tabstop=4 expandtab shiftwidth=4