1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ 26 str_substr 27 28 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 29 30 class basestring(hashable): 31 32 "The base class for all strings." 33 34 def __init__(self, other=None): 35 36 "Initialise the string, perhaps from 'other'." 37 38 # Note the __data__ member. Since strings are either initialised from 39 # literals or converted using routines defined for other types, no form 40 # of actual initialisation is performed here. 41 42 # NOTE: Cannot perform "other and other.__data__ or None" since the 43 # NOTE: __data__ attribute is not a normal attribute. 44 45 if other: 46 self.__data__ = other.__data__ 47 else: 48 self.__data__ = None 49 50 # Note the __key__ member. This is also initialised statically. Where 51 # a string is the same as an attribute name, the __key__ member contains 52 # attribute position and code details. 53 54 if other: 55 self.__key__ = other.__key__ 56 else: 57 self.__key__ = None 58 59 def __hash__(self): 60 61 "Return a value for hashing purposes." 62 63 return self._hashvalue(ord) 64 65 def _binary_op(self, op, other): 66 67 "Perform 'op' on this object and 'other' if appropriate." 68 69 # Refuse to operate on specialisations of this class. 70 71 if self.__class__ is not other.__class__: 72 return NotImplemented 73 74 # Otherwise, perform the operation on the operands' data. 75 76 else: 77 return op(self.__data__, other.__data__) 78 79 def _binary_op_rev(self, op, other): 80 81 "Perform 'op' on 'other' and this object if appropriate." 82 83 # Refuse to operate on specialisations of this class. 84 85 if self.__class__ is not other.__class__: 86 return NotImplemented 87 88 # Otherwise, perform the operation on the operands' data. 89 90 else: 91 return op(other.__data__, self.__data__) 92 93 def __iadd__(self, other): 94 95 "Return a string combining this string with 'other'." 96 97 return self._binary_op(str_add, other) 98 99 __add__ = __iadd__ 100 101 def __radd__(self, other): 102 103 "Return a string combining this string with 'other'." 104 105 return self._binary_op_rev(str_add, other) 106 107 def __mul__(self, other): 108 109 "Multiply the string by 'other'." 110 111 b = buffer() 112 113 while other > 0: 114 b.append(self) 115 other -= 1 116 117 return str(b) 118 119 __rmul__ = __mul__ 120 121 def __mod__(self, other): pass 122 def __rmod__(self, other): pass 123 124 def __lt__(self, other): 125 126 "Return whether this string is less than 'other'." 127 128 return self._binary_op(str_lt, other) 129 130 def __gt__(self, other): 131 132 "Return whether this string is greater than 'other'." 133 134 return self._binary_op(str_gt, other) 135 136 def __le__(self, other): 137 138 "Return whether this string is less than or equal to 'other'." 139 140 return _negate(self.__gt__(other)) 141 142 def __ge__(self, other): 143 144 "Return whether this string is greater than or equal to 'other'." 145 146 return _negate(self.__lt__(other)) 147 148 def __eq__(self, other): 149 150 "Return whether this string is equal to 'other'." 151 152 return self._binary_op(str_eq, other) 153 154 def __ne__(self, other): 155 156 "Return whether this string is not equal to 'other'." 157 158 return _negate(self.__eq__(other)) 159 160 def bytelength(self): 161 162 "Return the number of bytes in this string." 163 164 return str_len(self.__data__) 165 166 __len__ = bytelength 167 168 def __str__(self): 169 170 "Return a string representation." 171 172 return self 173 174 def __repr__(self): 175 176 "Return a program representation." 177 178 # NOTE: To be implemented with proper quoting. 179 b = buffer(['"', self, '"']) 180 return str(b) 181 182 def __bool__(self): 183 184 "Return whether the string provides any data." 185 186 return str_nonempty(self.__data__) 187 188 def __contains__(self, value): 189 190 "Return whether this string contains 'value'." 191 192 return self.find(value) != -1 193 194 def endswith(self, s): 195 196 "Return whether this string ends with 's'." 197 198 return self[-s.__len__():] == s 199 200 def find(self, sub, start=None, end=None): 201 202 """ 203 Find 'sub' in the string if it occurs from or after the 'start' position 204 (or 0, if omitted) and before the 'end' position (or the end of the 205 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 206 not present. 207 """ 208 209 sublen = sub.__len__() 210 211 if end is None: 212 end = self.__len__() 213 214 end -= sublen 215 216 i = start or 0 217 218 while i <= end: 219 if sub == self[i:i+sublen]: 220 return i 221 i += 1 222 223 return -1 224 225 def index(self, sub, start=None, end=None): 226 227 """ 228 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 229 at 'end' (or the end of the string, if omitted), raising ValueError if 230 'sub' is not present. 231 """ 232 233 i = self.find(sub, start, end) 234 235 if i == -1: 236 raise ValueError(sub) 237 else: 238 return i 239 240 def join(self, l): 241 242 "Join the elements in 'l' with this string." 243 244 # Empty strings just cause the list elements to be concatenated. 245 246 if not self.__bool__(): 247 return str(buffer(l)) 248 249 # Non-empty strings join the elements together in a buffer. 250 251 b = buffer() 252 first = True 253 254 for s in l: 255 if first: 256 first = False 257 else: 258 b.append(self) 259 b.append(s) 260 261 return str(b) 262 263 def lower(self): pass 264 265 def lstrip(self, chars=None): 266 267 """ 268 Strip any of the given 'chars' from the start of the string, or strip 269 whitespace characters is 'chars' is omitted or None. 270 """ 271 272 if chars is not None and not chars: 273 return self 274 275 i = 0 276 end = self.__len__() 277 278 while i < end and self[i] in (chars or WHITESPACE): 279 i += 1 280 281 return self[i:] 282 283 def replace(self, old, new, count=None): pass 284 285 def rfind(self, sub, start=None, end=None): 286 287 """ 288 Find 'sub' in the string if it occurs from or after the 'start' position 289 (or 0, if omitted) and before the 'end' position (or the end of the 290 string, if omitted), returning the latest occurrence or -1 if 'sub' is 291 not present. 292 """ 293 294 sublen = sub.__len__() 295 296 start = start or 0 297 298 if end is None: 299 end = self.__len__() 300 301 i = end - sublen 302 303 while i >= start: 304 if sub == self[i:i+sublen]: 305 return i 306 i -= 1 307 308 return -1 309 310 def rsplit(self, sep=None, maxsplit=None): 311 312 """ 313 Split the string using the given 'sep' as separator (or any whitespace 314 character if omitted or specified as None), splitting at most 'maxsplit' 315 times (or as many times as is possible if omitted or specified as None). 316 Where 'maxsplit' is given, the number of split points is counted from 317 the end of the string. 318 """ 319 320 if not maxsplit: 321 return self.split(sep, maxsplit) 322 323 if sep is not None and not sep: 324 raise ValueError, sep 325 326 seplen = sep and len(sep) or 1 327 start = seplen 328 splits = 0 329 330 l = [] 331 i = last = self.__len__() 332 333 while i >= start and (maxsplit is None or splits < maxsplit): 334 335 # Find any specified separator. 336 337 if sep and self[i-seplen:i] == sep: 338 l.insert(0, self[i:last]) 339 i -= seplen 340 last = i 341 splits += 1 342 343 # Find any whitespace character and skip adjacent characters. 344 345 elif not sep and self[i-1] in WHITESPACE: 346 l.insert(0, self[i:last]) 347 while i > start: 348 i -= 1 349 if self[i-1] not in WHITESPACE: 350 break 351 else: 352 break 353 last = i 354 splits += 1 355 356 # Check the next character. 357 358 else: 359 i -= 1 360 361 l.insert(0, self[:last]) 362 return l 363 364 def rstrip(self, chars=None): 365 366 """ 367 Strip any of the given 'chars' from the end of the string, or strip 368 whitespace characters is 'chars' is omitted or None. 369 """ 370 371 if chars is not None and not chars: 372 return self 373 374 i = self.__len__() - 1 375 376 while i >= 0 and self[i] in (chars or WHITESPACE): 377 i -= 1 378 379 return self[:i+1] 380 381 def split(self, sep=None, maxsplit=None): 382 383 """ 384 Split the string using the given 'sep' as separator (or any whitespace 385 character if omitted or specified as None), splitting at most 'maxsplit' 386 times (or as many times as is possible if omitted or specified as None). 387 Where 'maxsplit' is given, the number of split points is counted from 388 the start of the string. 389 """ 390 391 if sep is not None and not sep: 392 raise ValueError, sep 393 394 if maxsplit is not None and not maxsplit: 395 return [self] 396 397 seplen = sep and len(sep) or 1 398 end = self.__len__() - seplen 399 splits = 0 400 401 l = [] 402 i = last = 0 403 404 while i <= end and (maxsplit is None or splits < maxsplit): 405 406 # Find any specified separator. 407 408 if sep and self[i:i+seplen] == sep: 409 l.append(self[last:i]) 410 i += seplen 411 last = i 412 splits += 1 413 414 # Find any whitespace character and skip adjacent characters. 415 416 elif not sep and self[i] in WHITESPACE: 417 l.append(self[last:i]) 418 while i < end: 419 i += 1 420 if self[i] not in WHITESPACE: 421 break 422 else: 423 break 424 last = i 425 splits += 1 426 427 # Check the next character. 428 429 else: 430 i += 1 431 432 l.append(self[last:]) 433 return l 434 435 def splitlines(self, keepends=False): pass 436 437 def startswith(self, s): 438 439 "Return whether this string starts with 's'." 440 441 return self[:s.__len__()] == s 442 443 def strip(self, chars=None): 444 445 """ 446 Strip any of the given 'chars' from the start and end of the string, or 447 strip whitespace characters is 'chars' is omitted or None. 448 """ 449 450 return self.lstrip(chars).rstrip(chars) 451 452 def upper(self): pass 453 454 class string(basestring): 455 456 "A plain string of bytes." 457 458 # Special implementation methods. 459 460 def __get_single_item__(self, index): 461 462 "Return the item at the normalised (positive) 'index'." 463 464 self._check_index(index) 465 return str_substr(self.__data__, index, index + 1, 1) 466 467 def __get_multiple_items__(self, start, end, step): 468 469 """ 470 Return items from 'start' until (but excluding) 'end', at 'step' 471 intervals. 472 """ 473 474 self._check_index(start) 475 self._check_end_index(end) 476 check_int(step) 477 478 if step == 0: 479 raise ValueError(step) 480 481 if start == end: 482 return "" 483 484 return str_substr(self.__data__, start, end, step) 485 486 def str(obj): 487 488 "Return the string representation of 'obj'." 489 490 # Class attributes of instances provide __str__. 491 492 return obj.__str__() 493 494 # vim: tabstop=4 expandtab shiftwidth=4