Lichen

lib/__builtins__/str.py

938:799711337453
2021-06-29 Paul Boddie Renamed the string class to str, replacing the str function with the new_str function, this being invoked specially by the string instantiation function. As with the Unicode type renaming, a more general instantiation mechanism might permit the new_str function to be part of the functionality of the str or basestring classes.
     1 #!/usr/bin/env python     2      3 """     4 String objects.     5      6 Copyright (C) 2015, 2016, 2017, 2021 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from __builtins__.operator import _negate    23 from __builtins__.sequence import hashable, itemaccess    24 from __builtins__.types import check_int    25 from native import isinstance as _isinstance, \    26                    str_add, str_lt, str_gt, str_eq, str_ord, \    27                    str_substr    28     29 WHITESPACE = (" ", "\f", "\n", "\r", "\t")    30     31 class basestring(hashable):    32     33     "The base class for all strings."    34     35     def __init__(self, other=None):    36     37         "Initialise the string, perhaps from 'other'."    38     39         # Note the __data__ member. Since strings are either initialised from    40         # literals or converted using routines defined for other types, no form    41         # of actual initialisation is performed here.    42     43         # Note the __key__ member. This is also initialised statically. Where    44         # a string is the same as an attribute name, the __key__ member contains    45         # attribute position and code details.    46     47         # NOTE: Cannot perform "other and other.__data__ or None" since the    48         # NOTE: __data__ attribute is not a normal attribute.    49     50         if other:    51             self.__data__ = other.__data__    52             self.__key__ = other.__key__    53             self.__size__ = other.__size__    54         else:    55             self.__data__ = None    56             self.__key__ = None    57             self.__size__ = 0    58     59     # Internal methods.    60     61     def _binary_op(self, op, other, sizes=False):    62     63         "Perform 'op' on this object and 'other' if appropriate."    64     65         # Refuse to operate on specialisations of this class.    66     67         if self.__class__ is not other.__class__:    68             return NotImplemented    69     70         # Otherwise, perform the operation on the operands' data.    71     72         elif sizes:    73             return op(self.__data__, other.__data__, self.__size__, other.__size__)    74         else:    75             return op(self.__data__, other.__data__)    76     77     def _binary_op_rev(self, op, other, sizes=False):    78     79         "Perform 'op' on 'other' and this object if appropriate."    80     81         # Refuse to operate on specialisations of this class.    82     83         if self.__class__ is not other.__class__:    84             return NotImplemented    85     86         # Otherwise, perform the operation on the operands' data.    87     88         elif sizes:    89             return op(other.__data__, self.__data__, other.__size__, self.__size__)    90         else:    91             return op(other.__data__, self.__data__)    92     93     def _quote(self, quote):    94     95         "Return a quoted representation of this string."    96     97         b = buffer([quote])    98         i = last = 0    99         end = self.__len__()   100    101         while i < end:   102             c = self[i]   103    104             # Handle quotes before anything else.   105    106             if c == quote:   107                 b.append("\\")   108                 b.append(quote)   109                 i += 1   110                 last = i   111                 continue   112    113             # Extended unquoted text.   114    115             n = ord(c)   116    117             if 32 <= n < 128:   118                 i += 1   119                 continue   120    121             # Before quoting, emit unquoted text.   122    123             b.append(self[last:i])   124    125             # Add quoted value.   126    127             if c == "\t":   128                 b.append("\\t")   129             elif c == "\n":   130                 b.append("\\n")   131             elif c == "\r":   132                 b.append("\\r")   133             else:   134                 self._quote_value(b, n)   135    136             i += 1   137             last = i   138    139         # Emit remaining unquoted text.   140    141         b.append(self[last:])   142         b.append(quote)   143         return str(b)   144    145     def _quote_value(self, b, n):   146    147         "Append to 'b' the quoted form of 'n'."   148    149         if n < 0:   150             n += 256   151         b.append("\\x")   152         x = hex(n, "")   153         if len(x) < 2:   154             b.append("0")   155         b.append(x)   156    157     def bytelength(self):   158    159         "Return the number of bytes in this string."   160    161         return self.__size__   162    163     # General type methods.   164    165     def __bool__(self):   166    167         "Return whether the string provides any data."   168    169         return self.__size__.__bool__()   170    171     def __contains__(self, value):   172    173         "Return whether this string contains 'value'."   174    175         return self.find(value) != -1   176    177     def __hash__(self):   178    179         "Return a value for hashing purposes."   180    181         return self._hashvalue(ord)   182    183     __len__ = bytelength   184    185     def __repr__(self):   186    187         "Return a program representation."   188    189         return self._quote('"')   190    191     def __str__(self):   192    193         "Return a string representation."   194    195         return self   196    197     # Operator methods.   198    199     def __iadd__(self, other):   200    201         "Return a string combining this string with 'other'."   202    203         return self._binary_op(str_add, other, True)   204    205     __add__ = __iadd__   206    207     def __radd__(self, other):   208    209         "Return a string combining this string with 'other'."   210    211         return self._binary_op_rev(str_add, other, True)   212    213     def __mod__(self, other):   214    215         "Format 'other' using this string."   216    217         if not _isinstance(other, tuple):   218             other = [other]   219    220         i = 0   221         first = True   222         b = buffer()   223    224         for s in self.split("%"):   225             if first:   226                 b.append(s)   227                 first = False   228                 continue   229    230             # Handle format codes.   231             # NOTE: To be completed.   232    233             if s.startswith("%"):   234                 b.append(s)   235    236             elif s.startswith("s"):   237                 b.append(str(other[i]))   238                 b.append(s[1:])   239                 i += 1   240    241             elif s.startswith("r"):   242                 b.append(repr(other[i]))   243                 b.append(s[1:])   244                 i += 1   245    246             # Unrecognised code: probably just a stray %.   247    248             else:   249                 b.append("%")   250                 b.append(s)   251    252         return str(b)   253    254     def __rmod__(self, other): pass   255    256     def __mul__(self, other):   257    258         "Multiply the string by 'other'."   259    260         b = buffer()   261    262         while other > 0:   263             b.append(self)   264             other -= 1   265    266         return str(b)   267    268     __rmul__ = __mul__   269    270     # Comparison methods.   271    272     def __eq__(self, other):   273    274         "Return whether this string is equal to 'other'."   275    276         return self._binary_op(str_eq, other)   277    278     def __ge__(self, other):   279    280         "Return whether this string is greater than or equal to 'other'."   281    282         return _negate(self.__lt__(other))   283    284     def __gt__(self, other):   285    286         "Return whether this string is greater than 'other'."   287    288         return self._binary_op(str_gt, other)   289    290     def __le__(self, other):   291    292         "Return whether this string is less than or equal to 'other'."   293    294         return _negate(self.__gt__(other))   295    296     def __lt__(self, other):   297    298         "Return whether this string is less than 'other'."   299    300         return self._binary_op(str_lt, other)   301    302     def __ne__(self, other):   303    304         "Return whether this string is not equal to 'other'."   305    306         return _negate(self.__eq__(other))   307    308     # String-specific methods.   309    310     def __ord__(self):   311    312         "Return the value of the string, if only a single character."   313    314         if self.__len__() == 1:   315             return str_ord(self.__data__)   316         else:   317             raise ValueError, self   318    319     def endswith(self, s):   320    321         "Return whether this string ends with 's'."   322    323         return self[-s.__len__():] == s   324    325     def find(self, sub, start=None, end=None):   326    327         """   328         Find 'sub' in the string if it occurs from or after the 'start' position   329         (or 0, if omitted) and before the 'end' position (or the end of the   330         string, if omitted), returning the earliest occurrence or -1 if 'sub' is   331         not present.   332         """   333    334         sublen = sub.__len__()   335    336         if end is None:   337             end = self.__len__()   338    339         end -= sublen   340    341         i = start or 0   342    343         while i <= end:   344             if sub == self[i:i+sublen]:   345                 return i   346             i += 1   347    348         return -1   349    350     def index(self, sub, start=None, end=None):   351    352         """   353         Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending   354         at 'end' (or the end of the string, if omitted), raising ValueError if   355         'sub' is not present.   356         """   357    358         i = self.find(sub, start, end)   359    360         if i == -1:   361             raise ValueError(sub)   362         else:   363             return i   364    365     def join(self, l):   366    367         "Join the elements in 'l' with this string."   368    369         # Empty strings just cause the list elements to be concatenated.   370    371         if not self.__bool__():   372             return str(buffer(l))   373    374         # Non-empty strings join the elements together in a buffer.   375    376         b = buffer()   377         first = True   378    379         for s in l:   380             if first:   381                 first = False   382             else:   383                 b.append(self)   384             b.append(s)   385    386         return str(b)   387    388     def lower(self): pass   389    390     def lstrip(self, chars=None):   391    392         """   393         Strip any of the given 'chars' from the start of the string, or strip   394         whitespace characters is 'chars' is omitted or None.   395         """   396    397         if chars is not None and not chars:   398             return self   399    400         i = 0   401         end = self.__len__()   402    403         while i < end and self[i] in (chars or WHITESPACE):   404             i += 1   405    406         return self[i:]   407    408     def replace(self, old, new, count=None): pass   409    410     def rfind(self, sub, start=None, end=None):   411    412         """   413         Find 'sub' in the string if it occurs from or after the 'start' position   414         (or 0, if omitted) and before the 'end' position (or the end of the   415         string, if omitted), returning the latest occurrence or -1 if 'sub' is   416         not present.   417         """   418    419         sublen = sub.__len__()   420    421         start = start or 0   422    423         if end is None:   424             end = self.__len__()   425    426         i = end - sublen   427    428         while i >= start:   429             if sub == self[i:i+sublen]:   430                 return i   431             i -= 1   432    433         return -1   434    435     def rsplit(self, sep=None, maxsplit=None):   436    437         """   438         Split the string using the given 'sep' as separator (or any whitespace   439         character if omitted or specified as None), splitting at most 'maxsplit'   440         times (or as many times as is possible if omitted or specified as None).   441         Where 'maxsplit' is given, the number of split points is counted from   442         the end of the string.   443         """   444    445         if not maxsplit:   446             return self.split(sep, maxsplit)   447    448         if sep is not None and not sep:   449             raise ValueError, sep   450    451         seplen = sep and len(sep) or 1   452         start = seplen   453         splits = 0   454    455         l = []   456         i = last = self.__len__()   457    458         while i >= start and (maxsplit is None or splits < maxsplit):   459    460             # Find any specified separator.   461    462             if sep and self[i-seplen:i] == sep:   463                 l.insert(0, self[i:last])   464                 i -= seplen   465                 last = i   466                 splits += 1   467    468             # Find any whitespace character and skip adjacent characters.   469    470             elif not sep and self[i-1] in WHITESPACE:   471                 l.insert(0, self[i:last])   472                 while i > start:   473                     i -= 1   474                     if self[i-1] not in WHITESPACE:   475                         break   476                 else:   477                     break   478                 last = i   479                 splits += 1   480    481             # Check the next character.   482    483             else:   484                 i -= 1   485    486         l.insert(0, self[:last])   487         return l   488    489     def rstrip(self, chars=None):   490    491         """   492         Strip any of the given 'chars' from the end of the string, or strip   493         whitespace characters is 'chars' is omitted or None.   494         """   495    496         if chars is not None and not chars:   497             return self   498    499         i = self.__len__() - 1   500    501         while i >= 0 and self[i] in (chars or WHITESPACE):   502             i -= 1   503    504         return self[:i+1]   505    506     def split(self, sep=None, maxsplit=None):   507    508         """   509         Split the string using the given 'sep' as separator (or any whitespace   510         character if omitted or specified as None), splitting at most 'maxsplit'   511         times (or as many times as is possible if omitted or specified as None).   512         Where 'maxsplit' is given, the number of split points is counted from   513         the start of the string.   514         """   515    516         if sep is not None and not sep:   517             raise ValueError, sep   518    519         if maxsplit is not None and not maxsplit:   520             return [self]   521    522         seplen = sep and len(sep) or 1   523         end = self.__len__() - seplen   524         splits = 0   525    526         l = []   527         i = last = 0   528    529         while i <= end and (maxsplit is None or splits < maxsplit):   530    531             # Find any specified separator.   532    533             if sep and self[i:i+seplen] == sep:   534                 l.append(self[last:i])   535                 i += seplen   536                 last = i   537                 splits += 1   538    539             # Find any whitespace character and skip adjacent characters.   540    541             elif not sep and self[i] in WHITESPACE:   542                 l.append(self[last:i])   543                 while i < end:   544                     i += 1   545                     if self[i] not in WHITESPACE:   546                         break   547                 else:   548                     break   549                 last = i   550                 splits += 1   551    552             # Check the next character.   553    554             else:   555                 i += 1   556    557         l.append(self[last:])   558         return l   559    560     def splitlines(self, keepends=False): pass   561    562     def startswith(self, s):   563    564         "Return whether this string starts with 's'."   565    566         return self[:s.__len__()] == s   567    568     def strip(self, chars=None):   569    570         """   571         Strip any of the given 'chars' from the start and end of the string, or   572         strip whitespace characters is 'chars' is omitted or None.   573         """   574    575         return self.lstrip(chars).rstrip(chars)   576    577     def upper(self): pass   578    579 class str(basestring):   580    581     "A plain string of bytes."   582    583     # Special implementation methods.   584    585     def __get_single_item__(self, index):   586    587         "Return the item at the normalised (positive) 'index'."   588    589         self._check_index(index)   590         return str_substr(self.__data__, index, index + 1, 1)   591    592     def __get_multiple_items__(self, start, end, step):   593    594         """   595         Return items from 'start' until (but excluding) 'end', at 'step'   596         intervals.   597         """   598    599         if start == end:   600             return ""   601    602         check_int(step)   603    604         if step == 0:   605             raise ValueError(step)   606    607         l = get_using(basestring.__get_multiple_items__, self)(start, end, step)   608         return "".join(l)   609    610 def new_str(obj):   611    612     "Return the string representation of 'obj'."   613    614     # Class attributes of instances provide __str__.   615    616     return obj.__str__()   617    618 # vim: tabstop=4 expandtab shiftwidth=4