1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_ord, \ 26 str_nonempty, str_substr 27 28 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 29 30 class basestring(hashable): 31 32 "The base class for all strings." 33 34 def __init__(self, other=None): 35 36 "Initialise the string, perhaps from 'other'." 37 38 # Note the __data__ member. Since strings are either initialised from 39 # literals or converted using routines defined for other types, no form 40 # of actual initialisation is performed here. 41 42 # NOTE: Cannot perform "other and other.__data__ or None" since the 43 # NOTE: __data__ attribute is not a normal attribute. 44 45 if other: 46 self.__data__ = other.__data__ 47 else: 48 self.__data__ = None 49 50 # Note the __key__ member. This is also initialised statically. Where 51 # a string is the same as an attribute name, the __key__ member contains 52 # attribute position and code details. 53 54 if other: 55 self.__key__ = other.__key__ 56 else: 57 self.__key__ = None 58 59 # Internal methods. 60 61 def _binary_op(self, op, other): 62 63 "Perform 'op' on this object and 'other' if appropriate." 64 65 # Refuse to operate on specialisations of this class. 66 67 if self.__class__ is not other.__class__: 68 return NotImplemented 69 70 # Otherwise, perform the operation on the operands' data. 71 72 else: 73 return op(self.__data__, other.__data__) 74 75 def _binary_op_rev(self, op, other): 76 77 "Perform 'op' on 'other' and this object if appropriate." 78 79 # Refuse to operate on specialisations of this class. 80 81 if self.__class__ is not other.__class__: 82 return NotImplemented 83 84 # Otherwise, perform the operation on the operands' data. 85 86 else: 87 return op(other.__data__, self.__data__) 88 89 def _quote(self, quote): 90 91 "Return a quoted representation of this string." 92 93 b = buffer([quote]) 94 i = last = 0 95 end = self.__len__() 96 97 while i < end: 98 c = self[i] 99 100 # Handle quotes before anything else. 101 102 if c == quote: 103 b.append("\\") 104 b.append(quote) 105 i += 1 106 last = i 107 continue 108 109 # Extended unquoted text. 110 111 n = ord(c) 112 113 if 32 <= n < 128: 114 i += 1 115 continue 116 117 # Before quoting, emit unquoted text. 118 119 b.append(self[last:i]) 120 121 # Add quoted value. 122 123 if c == "\t": 124 b.append("\\t") 125 elif c == "\n": 126 b.append("\\n") 127 elif c == "\r": 128 b.append("\\r") 129 else: 130 self._quote_value(b, n) 131 132 i += 1 133 last = i 134 135 # Emit remaining unquoted text. 136 137 b.append(self[last:]) 138 b.append(quote) 139 return str(b) 140 141 def _quote_value(self, b, n): 142 143 "Append to 'b' the quoted form of 'n'." 144 145 if n < 0: 146 n += 256 147 b.append("\\x") 148 x = hex(n, "") 149 if len(x) < 2: 150 b.append("0") 151 b.append(x) 152 153 def bytelength(self): 154 155 "Return the number of bytes in this string." 156 157 return str_len(self.__data__) 158 159 # General type methods. 160 161 def __bool__(self): 162 163 "Return whether the string provides any data." 164 165 return str_nonempty(self.__data__) 166 167 def __contains__(self, value): 168 169 "Return whether this string contains 'value'." 170 171 return self.find(value) != -1 172 173 def __hash__(self): 174 175 "Return a value for hashing purposes." 176 177 return self._hashvalue(ord) 178 179 __len__ = bytelength 180 181 def __repr__(self): 182 183 "Return a program representation." 184 185 return self._quote('"') 186 187 def __str__(self): 188 189 "Return a string representation." 190 191 return self 192 193 # Operator methods. 194 195 def __iadd__(self, other): 196 197 "Return a string combining this string with 'other'." 198 199 return self._binary_op(str_add, other) 200 201 __add__ = __iadd__ 202 203 def __radd__(self, other): 204 205 "Return a string combining this string with 'other'." 206 207 return self._binary_op_rev(str_add, other) 208 209 def __mod__(self, other): pass 210 def __rmod__(self, other): pass 211 212 def __mul__(self, other): 213 214 "Multiply the string by 'other'." 215 216 b = buffer() 217 218 while other > 0: 219 b.append(self) 220 other -= 1 221 222 return str(b) 223 224 __rmul__ = __mul__ 225 226 # Comparison methods. 227 228 def __eq__(self, other): 229 230 "Return whether this string is equal to 'other'." 231 232 return self._binary_op(str_eq, other) 233 234 def __ge__(self, other): 235 236 "Return whether this string is greater than or equal to 'other'." 237 238 return _negate(self.__lt__(other)) 239 240 def __gt__(self, other): 241 242 "Return whether this string is greater than 'other'." 243 244 return self._binary_op(str_gt, other) 245 246 def __le__(self, other): 247 248 "Return whether this string is less than or equal to 'other'." 249 250 return _negate(self.__gt__(other)) 251 252 def __lt__(self, other): 253 254 "Return whether this string is less than 'other'." 255 256 return self._binary_op(str_lt, other) 257 258 def __ne__(self, other): 259 260 "Return whether this string is not equal to 'other'." 261 262 return _negate(self.__eq__(other)) 263 264 # String-specific methods. 265 266 def __ord__(self): 267 268 "Return the value of the string, if only a single character." 269 270 if self.__len__() == 1: 271 return str_ord(self.__data__) 272 else: 273 raise ValueError, self 274 275 def endswith(self, s): 276 277 "Return whether this string ends with 's'." 278 279 return self[-s.__len__():] == s 280 281 def find(self, sub, start=None, end=None): 282 283 """ 284 Find 'sub' in the string if it occurs from or after the 'start' position 285 (or 0, if omitted) and before the 'end' position (or the end of the 286 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 287 not present. 288 """ 289 290 sublen = sub.__len__() 291 292 if end is None: 293 end = self.__len__() 294 295 end -= sublen 296 297 i = start or 0 298 299 while i <= end: 300 if sub == self[i:i+sublen]: 301 return i 302 i += 1 303 304 return -1 305 306 def index(self, sub, start=None, end=None): 307 308 """ 309 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 310 at 'end' (or the end of the string, if omitted), raising ValueError if 311 'sub' is not present. 312 """ 313 314 i = self.find(sub, start, end) 315 316 if i == -1: 317 raise ValueError(sub) 318 else: 319 return i 320 321 def join(self, l): 322 323 "Join the elements in 'l' with this string." 324 325 # Empty strings just cause the list elements to be concatenated. 326 327 if not self.__bool__(): 328 return str(buffer(l)) 329 330 # Non-empty strings join the elements together in a buffer. 331 332 b = buffer() 333 first = True 334 335 for s in l: 336 if first: 337 first = False 338 else: 339 b.append(self) 340 b.append(s) 341 342 return str(b) 343 344 def lower(self): pass 345 346 def lstrip(self, chars=None): 347 348 """ 349 Strip any of the given 'chars' from the start of the string, or strip 350 whitespace characters is 'chars' is omitted or None. 351 """ 352 353 if chars is not None and not chars: 354 return self 355 356 i = 0 357 end = self.__len__() 358 359 while i < end and self[i] in (chars or WHITESPACE): 360 i += 1 361 362 return self[i:] 363 364 def replace(self, old, new, count=None): pass 365 366 def rfind(self, sub, start=None, end=None): 367 368 """ 369 Find 'sub' in the string if it occurs from or after the 'start' position 370 (or 0, if omitted) and before the 'end' position (or the end of the 371 string, if omitted), returning the latest occurrence or -1 if 'sub' is 372 not present. 373 """ 374 375 sublen = sub.__len__() 376 377 start = start or 0 378 379 if end is None: 380 end = self.__len__() 381 382 i = end - sublen 383 384 while i >= start: 385 if sub == self[i:i+sublen]: 386 return i 387 i -= 1 388 389 return -1 390 391 def rsplit(self, sep=None, maxsplit=None): 392 393 """ 394 Split the string using the given 'sep' as separator (or any whitespace 395 character if omitted or specified as None), splitting at most 'maxsplit' 396 times (or as many times as is possible if omitted or specified as None). 397 Where 'maxsplit' is given, the number of split points is counted from 398 the end of the string. 399 """ 400 401 if not maxsplit: 402 return self.split(sep, maxsplit) 403 404 if sep is not None and not sep: 405 raise ValueError, sep 406 407 seplen = sep and len(sep) or 1 408 start = seplen 409 splits = 0 410 411 l = [] 412 i = last = self.__len__() 413 414 while i >= start and (maxsplit is None or splits < maxsplit): 415 416 # Find any specified separator. 417 418 if sep and self[i-seplen:i] == sep: 419 l.insert(0, self[i:last]) 420 i -= seplen 421 last = i 422 splits += 1 423 424 # Find any whitespace character and skip adjacent characters. 425 426 elif not sep and self[i-1] in WHITESPACE: 427 l.insert(0, self[i:last]) 428 while i > start: 429 i -= 1 430 if self[i-1] not in WHITESPACE: 431 break 432 else: 433 break 434 last = i 435 splits += 1 436 437 # Check the next character. 438 439 else: 440 i -= 1 441 442 l.insert(0, self[:last]) 443 return l 444 445 def rstrip(self, chars=None): 446 447 """ 448 Strip any of the given 'chars' from the end of the string, or strip 449 whitespace characters is 'chars' is omitted or None. 450 """ 451 452 if chars is not None and not chars: 453 return self 454 455 i = self.__len__() - 1 456 457 while i >= 0 and self[i] in (chars or WHITESPACE): 458 i -= 1 459 460 return self[:i+1] 461 462 def split(self, sep=None, maxsplit=None): 463 464 """ 465 Split the string using the given 'sep' as separator (or any whitespace 466 character if omitted or specified as None), splitting at most 'maxsplit' 467 times (or as many times as is possible if omitted or specified as None). 468 Where 'maxsplit' is given, the number of split points is counted from 469 the start of the string. 470 """ 471 472 if sep is not None and not sep: 473 raise ValueError, sep 474 475 if maxsplit is not None and not maxsplit: 476 return [self] 477 478 seplen = sep and len(sep) or 1 479 end = self.__len__() - seplen 480 splits = 0 481 482 l = [] 483 i = last = 0 484 485 while i <= end and (maxsplit is None or splits < maxsplit): 486 487 # Find any specified separator. 488 489 if sep and self[i:i+seplen] == sep: 490 l.append(self[last:i]) 491 i += seplen 492 last = i 493 splits += 1 494 495 # Find any whitespace character and skip adjacent characters. 496 497 elif not sep and self[i] in WHITESPACE: 498 l.append(self[last:i]) 499 while i < end: 500 i += 1 501 if self[i] not in WHITESPACE: 502 break 503 else: 504 break 505 last = i 506 splits += 1 507 508 # Check the next character. 509 510 else: 511 i += 1 512 513 l.append(self[last:]) 514 return l 515 516 def splitlines(self, keepends=False): pass 517 518 def startswith(self, s): 519 520 "Return whether this string starts with 's'." 521 522 return self[:s.__len__()] == s 523 524 def strip(self, chars=None): 525 526 """ 527 Strip any of the given 'chars' from the start and end of the string, or 528 strip whitespace characters is 'chars' is omitted or None. 529 """ 530 531 return self.lstrip(chars).rstrip(chars) 532 533 def upper(self): pass 534 535 class string(basestring): 536 537 "A plain string of bytes." 538 539 # Special implementation methods. 540 541 def __get_single_item__(self, index): 542 543 "Return the item at the normalised (positive) 'index'." 544 545 self._check_index(index) 546 return str_substr(self.__data__, index, index + 1, 1) 547 548 def __get_multiple_items__(self, start, end, step): 549 550 """ 551 Return items from 'start' until (but excluding) 'end', at 'step' 552 intervals. 553 """ 554 555 if start == end: 556 return "" 557 558 check_int(step) 559 560 if step == 0: 561 raise ValueError(step) 562 563 l = get_using(basestring.__get_multiple_items__, self)(start, end, step) 564 return "".join(l) 565 566 def str(obj): 567 568 "Return the string representation of 'obj'." 569 570 # Class attributes of instances provide __str__. 571 572 return obj.__str__() 573 574 # vim: tabstop=4 expandtab shiftwidth=4