1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import str_add, str_lt, str_gt, str_eq, str_ord, \ 26 str_substr 27 28 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 29 30 class basestring(hashable): 31 32 "The base class for all strings." 33 34 def __init__(self, other=None): 35 36 "Initialise the string, perhaps from 'other'." 37 38 # Note the __data__ member. Since strings are either initialised from 39 # literals or converted using routines defined for other types, no form 40 # of actual initialisation is performed here. 41 42 # Note the __key__ member. This is also initialised statically. Where 43 # a string is the same as an attribute name, the __key__ member contains 44 # attribute position and code details. 45 46 # NOTE: Cannot perform "other and other.__data__ or None" since the 47 # NOTE: __data__ attribute is not a normal attribute. 48 49 if other: 50 self.__data__ = other.__data__ 51 self.__key__ = other.__key__ 52 self.__size__ = other.__size__ 53 else: 54 self.__data__ = None 55 self.__key__ = None 56 self.__size__ = 0 57 58 # Internal methods. 59 60 def _binary_op(self, op, other, sizes=False): 61 62 "Perform 'op' on this object and 'other' if appropriate." 63 64 # Refuse to operate on specialisations of this class. 65 66 if self.__class__ is not other.__class__: 67 return NotImplemented 68 69 # Otherwise, perform the operation on the operands' data. 70 71 elif sizes: 72 return op(self.__data__, other.__data__, self.__size__, other.__size__) 73 else: 74 return op(self.__data__, other.__data__) 75 76 def _binary_op_rev(self, op, other, sizes=False): 77 78 "Perform 'op' on 'other' and this object if appropriate." 79 80 # Refuse to operate on specialisations of this class. 81 82 if self.__class__ is not other.__class__: 83 return NotImplemented 84 85 # Otherwise, perform the operation on the operands' data. 86 87 elif sizes: 88 return op(other.__data__, self.__data__, other.__size__, self.__size__) 89 else: 90 return op(other.__data__, self.__data__) 91 92 def _quote(self, quote): 93 94 "Return a quoted representation of this string." 95 96 b = buffer([quote]) 97 i = last = 0 98 end = self.__len__() 99 100 while i < end: 101 c = self[i] 102 103 # Handle quotes before anything else. 104 105 if c == quote: 106 b.append("\\") 107 b.append(quote) 108 i += 1 109 last = i 110 continue 111 112 # Extended unquoted text. 113 114 n = ord(c) 115 116 if 32 <= n < 128: 117 i += 1 118 continue 119 120 # Before quoting, emit unquoted text. 121 122 b.append(self[last:i]) 123 124 # Add quoted value. 125 126 if c == "\t": 127 b.append("\\t") 128 elif c == "\n": 129 b.append("\\n") 130 elif c == "\r": 131 b.append("\\r") 132 else: 133 self._quote_value(b, n) 134 135 i += 1 136 last = i 137 138 # Emit remaining unquoted text. 139 140 b.append(self[last:]) 141 b.append(quote) 142 return str(b) 143 144 def _quote_value(self, b, n): 145 146 "Append to 'b' the quoted form of 'n'." 147 148 if n < 0: 149 n += 256 150 b.append("\\x") 151 x = hex(n, "") 152 if len(x) < 2: 153 b.append("0") 154 b.append(x) 155 156 def bytelength(self): 157 158 "Return the number of bytes in this string." 159 160 return self.__size__ 161 162 # General type methods. 163 164 def __bool__(self): 165 166 "Return whether the string provides any data." 167 168 return self.__size__.__bool__() 169 170 def __contains__(self, value): 171 172 "Return whether this string contains 'value'." 173 174 return self.find(value) != -1 175 176 def __hash__(self): 177 178 "Return a value for hashing purposes." 179 180 return self._hashvalue(ord) 181 182 __len__ = bytelength 183 184 def __repr__(self): 185 186 "Return a program representation." 187 188 return self._quote('"') 189 190 def __str__(self): 191 192 "Return a string representation." 193 194 return self 195 196 # Operator methods. 197 198 def __iadd__(self, other): 199 200 "Return a string combining this string with 'other'." 201 202 return self._binary_op(str_add, other, True) 203 204 __add__ = __iadd__ 205 206 def __radd__(self, other): 207 208 "Return a string combining this string with 'other'." 209 210 return self._binary_op_rev(str_add, other, True) 211 212 def __mod__(self, other): pass 213 def __rmod__(self, other): pass 214 215 def __mul__(self, other): 216 217 "Multiply the string by 'other'." 218 219 b = buffer() 220 221 while other > 0: 222 b.append(self) 223 other -= 1 224 225 return str(b) 226 227 __rmul__ = __mul__ 228 229 # Comparison methods. 230 231 def __eq__(self, other): 232 233 "Return whether this string is equal to 'other'." 234 235 return self._binary_op(str_eq, other) 236 237 def __ge__(self, other): 238 239 "Return whether this string is greater than or equal to 'other'." 240 241 return _negate(self.__lt__(other)) 242 243 def __gt__(self, other): 244 245 "Return whether this string is greater than 'other'." 246 247 return self._binary_op(str_gt, other) 248 249 def __le__(self, other): 250 251 "Return whether this string is less than or equal to 'other'." 252 253 return _negate(self.__gt__(other)) 254 255 def __lt__(self, other): 256 257 "Return whether this string is less than 'other'." 258 259 return self._binary_op(str_lt, other) 260 261 def __ne__(self, other): 262 263 "Return whether this string is not equal to 'other'." 264 265 return _negate(self.__eq__(other)) 266 267 # String-specific methods. 268 269 def __ord__(self): 270 271 "Return the value of the string, if only a single character." 272 273 if self.__len__() == 1: 274 return str_ord(self.__data__) 275 else: 276 raise ValueError, self 277 278 def endswith(self, s): 279 280 "Return whether this string ends with 's'." 281 282 return self[-s.__len__():] == s 283 284 def find(self, sub, start=None, end=None): 285 286 """ 287 Find 'sub' in the string if it occurs from or after the 'start' position 288 (or 0, if omitted) and before the 'end' position (or the end of the 289 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 290 not present. 291 """ 292 293 sublen = sub.__len__() 294 295 if end is None: 296 end = self.__len__() 297 298 end -= sublen 299 300 i = start or 0 301 302 while i <= end: 303 if sub == self[i:i+sublen]: 304 return i 305 i += 1 306 307 return -1 308 309 def index(self, sub, start=None, end=None): 310 311 """ 312 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 313 at 'end' (or the end of the string, if omitted), raising ValueError if 314 'sub' is not present. 315 """ 316 317 i = self.find(sub, start, end) 318 319 if i == -1: 320 raise ValueError(sub) 321 else: 322 return i 323 324 def join(self, l): 325 326 "Join the elements in 'l' with this string." 327 328 # Empty strings just cause the list elements to be concatenated. 329 330 if not self.__bool__(): 331 return str(buffer(l)) 332 333 # Non-empty strings join the elements together in a buffer. 334 335 b = buffer() 336 first = True 337 338 for s in l: 339 if first: 340 first = False 341 else: 342 b.append(self) 343 b.append(s) 344 345 return str(b) 346 347 def lower(self): pass 348 349 def lstrip(self, chars=None): 350 351 """ 352 Strip any of the given 'chars' from the start of the string, or strip 353 whitespace characters is 'chars' is omitted or None. 354 """ 355 356 if chars is not None and not chars: 357 return self 358 359 i = 0 360 end = self.__len__() 361 362 while i < end and self[i] in (chars or WHITESPACE): 363 i += 1 364 365 return self[i:] 366 367 def replace(self, old, new, count=None): pass 368 369 def rfind(self, sub, start=None, end=None): 370 371 """ 372 Find 'sub' in the string if it occurs from or after the 'start' position 373 (or 0, if omitted) and before the 'end' position (or the end of the 374 string, if omitted), returning the latest occurrence or -1 if 'sub' is 375 not present. 376 """ 377 378 sublen = sub.__len__() 379 380 start = start or 0 381 382 if end is None: 383 end = self.__len__() 384 385 i = end - sublen 386 387 while i >= start: 388 if sub == self[i:i+sublen]: 389 return i 390 i -= 1 391 392 return -1 393 394 def rsplit(self, sep=None, maxsplit=None): 395 396 """ 397 Split the string using the given 'sep' as separator (or any whitespace 398 character if omitted or specified as None), splitting at most 'maxsplit' 399 times (or as many times as is possible if omitted or specified as None). 400 Where 'maxsplit' is given, the number of split points is counted from 401 the end of the string. 402 """ 403 404 if not maxsplit: 405 return self.split(sep, maxsplit) 406 407 if sep is not None and not sep: 408 raise ValueError, sep 409 410 seplen = sep and len(sep) or 1 411 start = seplen 412 splits = 0 413 414 l = [] 415 i = last = self.__len__() 416 417 while i >= start and (maxsplit is None or splits < maxsplit): 418 419 # Find any specified separator. 420 421 if sep and self[i-seplen:i] == sep: 422 l.insert(0, self[i:last]) 423 i -= seplen 424 last = i 425 splits += 1 426 427 # Find any whitespace character and skip adjacent characters. 428 429 elif not sep and self[i-1] in WHITESPACE: 430 l.insert(0, self[i:last]) 431 while i > start: 432 i -= 1 433 if self[i-1] not in WHITESPACE: 434 break 435 else: 436 break 437 last = i 438 splits += 1 439 440 # Check the next character. 441 442 else: 443 i -= 1 444 445 l.insert(0, self[:last]) 446 return l 447 448 def rstrip(self, chars=None): 449 450 """ 451 Strip any of the given 'chars' from the end of the string, or strip 452 whitespace characters is 'chars' is omitted or None. 453 """ 454 455 if chars is not None and not chars: 456 return self 457 458 i = self.__len__() - 1 459 460 while i >= 0 and self[i] in (chars or WHITESPACE): 461 i -= 1 462 463 return self[:i+1] 464 465 def split(self, sep=None, maxsplit=None): 466 467 """ 468 Split the string using the given 'sep' as separator (or any whitespace 469 character if omitted or specified as None), splitting at most 'maxsplit' 470 times (or as many times as is possible if omitted or specified as None). 471 Where 'maxsplit' is given, the number of split points is counted from 472 the start of the string. 473 """ 474 475 if sep is not None and not sep: 476 raise ValueError, sep 477 478 if maxsplit is not None and not maxsplit: 479 return [self] 480 481 seplen = sep and len(sep) or 1 482 end = self.__len__() - seplen 483 splits = 0 484 485 l = [] 486 i = last = 0 487 488 while i <= end and (maxsplit is None or splits < maxsplit): 489 490 # Find any specified separator. 491 492 if sep and self[i:i+seplen] == sep: 493 l.append(self[last:i]) 494 i += seplen 495 last = i 496 splits += 1 497 498 # Find any whitespace character and skip adjacent characters. 499 500 elif not sep and self[i] in WHITESPACE: 501 l.append(self[last:i]) 502 while i < end: 503 i += 1 504 if self[i] not in WHITESPACE: 505 break 506 else: 507 break 508 last = i 509 splits += 1 510 511 # Check the next character. 512 513 else: 514 i += 1 515 516 l.append(self[last:]) 517 return l 518 519 def splitlines(self, keepends=False): pass 520 521 def startswith(self, s): 522 523 "Return whether this string starts with 's'." 524 525 return self[:s.__len__()] == s 526 527 def strip(self, chars=None): 528 529 """ 530 Strip any of the given 'chars' from the start and end of the string, or 531 strip whitespace characters is 'chars' is omitted or None. 532 """ 533 534 return self.lstrip(chars).rstrip(chars) 535 536 def upper(self): pass 537 538 class string(basestring): 539 540 "A plain string of bytes." 541 542 # Special implementation methods. 543 544 def __get_single_item__(self, index): 545 546 "Return the item at the normalised (positive) 'index'." 547 548 self._check_index(index) 549 return str_substr(self.__data__, index, index + 1, 1) 550 551 def __get_multiple_items__(self, start, end, step): 552 553 """ 554 Return items from 'start' until (but excluding) 'end', at 'step' 555 intervals. 556 """ 557 558 if start == end: 559 return "" 560 561 check_int(step) 562 563 if step == 0: 564 raise ValueError(step) 565 566 l = get_using(basestring.__get_multiple_items__, self)(start, end, step) 567 return "".join(l) 568 569 def str(obj): 570 571 "Return the string representation of 'obj'." 572 573 # Class attributes of instances provide __str__. 574 575 return obj.__str__() 576 577 # vim: tabstop=4 expandtab shiftwidth=4