1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ 26 str_substr 27 28 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 29 30 class basestring(hashable): 31 32 "The base class for all strings." 33 34 def __init__(self, other=None): 35 36 "Initialise the string, perhaps from 'other'." 37 38 # Note the __data__ member. Since strings are either initialised from 39 # literals or converted using routines defined for other types, no form 40 # of actual initialisation is performed here. 41 42 # NOTE: Cannot perform "other and other.__data__ or None" since the 43 # NOTE: __data__ attribute is not a normal attribute. 44 45 if other: 46 self.__data__ = other.__data__ 47 else: 48 self.__data__ = None 49 50 # Note the __key__ member. This is also initialised statically. Where 51 # a string is the same as an attribute name, the __key__ member contains 52 # attribute position and code details. 53 54 if other: 55 self.__key__ = other.__key__ 56 else: 57 self.__key__ = None 58 59 # Internal methods. 60 61 def _binary_op(self, op, other): 62 63 "Perform 'op' on this object and 'other' if appropriate." 64 65 # Refuse to operate on specialisations of this class. 66 67 if self.__class__ is not other.__class__: 68 return NotImplemented 69 70 # Otherwise, perform the operation on the operands' data. 71 72 else: 73 return op(self.__data__, other.__data__) 74 75 def _binary_op_rev(self, op, other): 76 77 "Perform 'op' on 'other' and this object if appropriate." 78 79 # Refuse to operate on specialisations of this class. 80 81 if self.__class__ is not other.__class__: 82 return NotImplemented 83 84 # Otherwise, perform the operation on the operands' data. 85 86 else: 87 return op(other.__data__, self.__data__) 88 89 def _quote(self, quote): 90 91 "Return a quoted representation of this string." 92 93 b = buffer([quote]) 94 i = last = 0 95 end = self.__len__() 96 97 while i < end: 98 c = self[i] 99 n = ord(c) 100 101 # Extended unquoted text. 102 103 if 32 <= n < 128: 104 i += 1 105 continue 106 107 # Before quoting, emit unquoted text. 108 109 b.append(self[last:i]) 110 111 # Add quoted value. 112 113 if c == quote: 114 b.append("\\") 115 b.append(quote) 116 elif c == "\t": 117 b.append("\\t") 118 elif c == "\n": 119 b.append("\\n") 120 elif c == "\r": 121 b.append("\\r") 122 else: 123 if n < 0: 124 n += 256 125 b.append("\\x") 126 x = hex(n, "") 127 if len(x) < 2: 128 b.append("0") 129 b.append(x) 130 131 i += 1 132 last = i 133 134 # Emit remaining unquoted text. 135 136 b.append(self[last:]) 137 b.append(quote) 138 return str(b) 139 140 def bytelength(self): 141 142 "Return the number of bytes in this string." 143 144 return str_len(self.__data__) 145 146 # General type methods. 147 148 def __bool__(self): 149 150 "Return whether the string provides any data." 151 152 return str_nonempty(self.__data__) 153 154 def __contains__(self, value): 155 156 "Return whether this string contains 'value'." 157 158 return self.find(value) != -1 159 160 def __hash__(self): 161 162 "Return a value for hashing purposes." 163 164 return self._hashvalue(ord) 165 166 __len__ = bytelength 167 168 def __repr__(self): 169 170 "Return a program representation." 171 172 return self._quote('"') 173 174 def __str__(self): 175 176 "Return a string representation." 177 178 return self 179 180 # Operator methods. 181 182 def __iadd__(self, other): 183 184 "Return a string combining this string with 'other'." 185 186 return self._binary_op(str_add, other) 187 188 __add__ = __iadd__ 189 190 def __radd__(self, other): 191 192 "Return a string combining this string with 'other'." 193 194 return self._binary_op_rev(str_add, other) 195 196 def __mod__(self, other): pass 197 def __rmod__(self, other): pass 198 199 def __mul__(self, other): 200 201 "Multiply the string by 'other'." 202 203 b = buffer() 204 205 while other > 0: 206 b.append(self) 207 other -= 1 208 209 return str(b) 210 211 __rmul__ = __mul__ 212 213 # Comparison methods. 214 215 def __eq__(self, other): 216 217 "Return whether this string is equal to 'other'." 218 219 return self._binary_op(str_eq, other) 220 221 def __ge__(self, other): 222 223 "Return whether this string is greater than or equal to 'other'." 224 225 return _negate(self.__lt__(other)) 226 227 def __gt__(self, other): 228 229 "Return whether this string is greater than 'other'." 230 231 return self._binary_op(str_gt, other) 232 233 def __le__(self, other): 234 235 "Return whether this string is less than or equal to 'other'." 236 237 return _negate(self.__gt__(other)) 238 239 def __lt__(self, other): 240 241 "Return whether this string is less than 'other'." 242 243 return self._binary_op(str_lt, other) 244 245 def __ne__(self, other): 246 247 "Return whether this string is not equal to 'other'." 248 249 return _negate(self.__eq__(other)) 250 251 # String-specific methods. 252 253 def endswith(self, s): 254 255 "Return whether this string ends with 's'." 256 257 return self[-s.__len__():] == s 258 259 def find(self, sub, start=None, end=None): 260 261 """ 262 Find 'sub' in the string if it occurs from or after the 'start' position 263 (or 0, if omitted) and before the 'end' position (or the end of the 264 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 265 not present. 266 """ 267 268 sublen = sub.__len__() 269 270 if end is None: 271 end = self.__len__() 272 273 end -= sublen 274 275 i = start or 0 276 277 while i <= end: 278 if sub == self[i:i+sublen]: 279 return i 280 i += 1 281 282 return -1 283 284 def index(self, sub, start=None, end=None): 285 286 """ 287 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 288 at 'end' (or the end of the string, if omitted), raising ValueError if 289 'sub' is not present. 290 """ 291 292 i = self.find(sub, start, end) 293 294 if i == -1: 295 raise ValueError(sub) 296 else: 297 return i 298 299 def join(self, l): 300 301 "Join the elements in 'l' with this string." 302 303 # Empty strings just cause the list elements to be concatenated. 304 305 if not self.__bool__(): 306 return str(buffer(l)) 307 308 # Non-empty strings join the elements together in a buffer. 309 310 b = buffer() 311 first = True 312 313 for s in l: 314 if first: 315 first = False 316 else: 317 b.append(self) 318 b.append(s) 319 320 return str(b) 321 322 def lower(self): pass 323 324 def lstrip(self, chars=None): 325 326 """ 327 Strip any of the given 'chars' from the start of the string, or strip 328 whitespace characters is 'chars' is omitted or None. 329 """ 330 331 if chars is not None and not chars: 332 return self 333 334 i = 0 335 end = self.__len__() 336 337 while i < end and self[i] in (chars or WHITESPACE): 338 i += 1 339 340 return self[i:] 341 342 def replace(self, old, new, count=None): pass 343 344 def rfind(self, sub, start=None, end=None): 345 346 """ 347 Find 'sub' in the string if it occurs from or after the 'start' position 348 (or 0, if omitted) and before the 'end' position (or the end of the 349 string, if omitted), returning the latest occurrence or -1 if 'sub' is 350 not present. 351 """ 352 353 sublen = sub.__len__() 354 355 start = start or 0 356 357 if end is None: 358 end = self.__len__() 359 360 i = end - sublen 361 362 while i >= start: 363 if sub == self[i:i+sublen]: 364 return i 365 i -= 1 366 367 return -1 368 369 def rsplit(self, sep=None, maxsplit=None): 370 371 """ 372 Split the string using the given 'sep' as separator (or any whitespace 373 character if omitted or specified as None), splitting at most 'maxsplit' 374 times (or as many times as is possible if omitted or specified as None). 375 Where 'maxsplit' is given, the number of split points is counted from 376 the end of the string. 377 """ 378 379 if not maxsplit: 380 return self.split(sep, maxsplit) 381 382 if sep is not None and not sep: 383 raise ValueError, sep 384 385 seplen = sep and len(sep) or 1 386 start = seplen 387 splits = 0 388 389 l = [] 390 i = last = self.__len__() 391 392 while i >= start and (maxsplit is None or splits < maxsplit): 393 394 # Find any specified separator. 395 396 if sep and self[i-seplen:i] == sep: 397 l.insert(0, self[i:last]) 398 i -= seplen 399 last = i 400 splits += 1 401 402 # Find any whitespace character and skip adjacent characters. 403 404 elif not sep and self[i-1] in WHITESPACE: 405 l.insert(0, self[i:last]) 406 while i > start: 407 i -= 1 408 if self[i-1] not in WHITESPACE: 409 break 410 else: 411 break 412 last = i 413 splits += 1 414 415 # Check the next character. 416 417 else: 418 i -= 1 419 420 l.insert(0, self[:last]) 421 return l 422 423 def rstrip(self, chars=None): 424 425 """ 426 Strip any of the given 'chars' from the end of the string, or strip 427 whitespace characters is 'chars' is omitted or None. 428 """ 429 430 if chars is not None and not chars: 431 return self 432 433 i = self.__len__() - 1 434 435 while i >= 0 and self[i] in (chars or WHITESPACE): 436 i -= 1 437 438 return self[:i+1] 439 440 def split(self, sep=None, maxsplit=None): 441 442 """ 443 Split the string using the given 'sep' as separator (or any whitespace 444 character if omitted or specified as None), splitting at most 'maxsplit' 445 times (or as many times as is possible if omitted or specified as None). 446 Where 'maxsplit' is given, the number of split points is counted from 447 the start of the string. 448 """ 449 450 if sep is not None and not sep: 451 raise ValueError, sep 452 453 if maxsplit is not None and not maxsplit: 454 return [self] 455 456 seplen = sep and len(sep) or 1 457 end = self.__len__() - seplen 458 splits = 0 459 460 l = [] 461 i = last = 0 462 463 while i <= end and (maxsplit is None or splits < maxsplit): 464 465 # Find any specified separator. 466 467 if sep and self[i:i+seplen] == sep: 468 l.append(self[last:i]) 469 i += seplen 470 last = i 471 splits += 1 472 473 # Find any whitespace character and skip adjacent characters. 474 475 elif not sep and self[i] in WHITESPACE: 476 l.append(self[last:i]) 477 while i < end: 478 i += 1 479 if self[i] not in WHITESPACE: 480 break 481 else: 482 break 483 last = i 484 splits += 1 485 486 # Check the next character. 487 488 else: 489 i += 1 490 491 l.append(self[last:]) 492 return l 493 494 def splitlines(self, keepends=False): pass 495 496 def startswith(self, s): 497 498 "Return whether this string starts with 's'." 499 500 return self[:s.__len__()] == s 501 502 def strip(self, chars=None): 503 504 """ 505 Strip any of the given 'chars' from the start and end of the string, or 506 strip whitespace characters is 'chars' is omitted or None. 507 """ 508 509 return self.lstrip(chars).rstrip(chars) 510 511 def upper(self): pass 512 513 class string(basestring): 514 515 "A plain string of bytes." 516 517 # Special implementation methods. 518 519 def __get_single_item__(self, index): 520 521 "Return the item at the normalised (positive) 'index'." 522 523 self._check_index(index) 524 return str_substr(self.__data__, index, index + 1, 1) 525 526 def __get_multiple_items__(self, start, end, step): 527 528 """ 529 Return items from 'start' until (but excluding) 'end', at 'step' 530 intervals. 531 """ 532 533 self._check_index(start) 534 self._check_end_index(end) 535 check_int(step) 536 537 if step == 0: 538 raise ValueError(step) 539 540 if start == end: 541 return "" 542 543 return str_substr(self.__data__, start, end, step) 544 545 def str(obj): 546 547 "Return the string representation of 'obj'." 548 549 # Class attributes of instances provide __str__. 550 551 return obj.__str__() 552 553 # vim: tabstop=4 expandtab shiftwidth=4