Lichen (file lib/__builtins__/str.py at 149a38af3384)

     1 #!/usr/bin/env python     2      3 """     4 String objects.     5      6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from __builtins__.operator import _negate    23 from __builtins__.sequence import hashable, itemaccess    24 from __builtins__.types import check_int    25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \    26                    str_substr    27     28 WHITESPACE = (" ", "\f", "\n", "\r", "\t")    29     30 class basestring(hashable):    31     32     "The base class for all strings."    33     34     def __init__(self, other=None):    35     36         "Initialise the string, perhaps from 'other'."    37     38         # Note the __data__ member. Since strings are either initialised from    39         # literals or converted using routines defined for other types, no form    40         # of actual initialisation is performed here.    41     42         # NOTE: Cannot perform "other and other.__data__ or None" since the    43         # NOTE: __data__ attribute is not a normal attribute.    44     45         if other:    46             self.__data__ = other.__data__    47         else:    48             self.__data__ = None    49     50         # Note the __key__ member. This is also initialised statically. Where    51         # a string is the same as an attribute name, the __key__ member contains    52         # attribute position and code details.    53     54         if other:    55             self.__key__ = other.__key__    56         else:    57             self.__key__ = None    58     59     def __hash__(self):    60     61         "Return a value for hashing purposes."    62     63         return self._hashvalue(ord)    64     65     def _binary_op(self, op, other):    66     67         "Perform 'op' on this object and 'other' if appropriate."    68     69         # Refuse to operate on specialisations of this class.    70     71         if self.__class__ is not other.__class__:    72             return NotImplemented    73     74         # Otherwise, perform the operation on the operands' data.    75     76         else:    77             return op(self.__data__, other.__data__)    78     79     def _binary_op_rev(self, op, other):    80     81         "Perform 'op' on 'other' and this object if appropriate."    82     83         # Refuse to operate on specialisations of this class.    84     85         if self.__class__ is not other.__class__:    86             return NotImplemented    87     88         # Otherwise, perform the operation on the operands' data.    89     90         else:    91             return op(other.__data__, self.__data__)    92     93     def __iadd__(self, other):    94     95         "Return a string combining this string with 'other'."    96     97         return self._binary_op(str_add, other)    98     99     __add__ = __iadd__   100    101     def __radd__(self, other):   102    103         "Return a string combining this string with 'other'."   104    105         return self._binary_op_rev(str_add, other)   106    107     def __mul__(self, other):   108    109         "Multiply the string by 'other'."   110    111         b = buffer()   112    113         while other > 0:   114             b.append(self)   115             other -= 1   116    117         return str(b)   118    119     __rmul__ = __mul__   120    121     def __mod__(self, other): pass   122     def __rmod__(self, other): pass   123    124     def __lt__(self, other):   125    126         "Return whether this string is less than 'other'."   127    128         return self._binary_op(str_lt, other)   129    130     def __gt__(self, other):   131    132         "Return whether this string is greater than 'other'."   133    134         return self._binary_op(str_gt, other)   135    136     def __le__(self, other):   137    138         "Return whether this string is less than or equal to 'other'."   139    140         return _negate(self.__gt__(other))   141    142     def __ge__(self, other):   143    144         "Return whether this string is greater than or equal to 'other'."   145    146         return _negate(self.__lt__(other))   147    148     def __eq__(self, other):   149    150         "Return whether this string is equal to 'other'."   151    152         return self._binary_op(str_eq, other)   153    154     def __ne__(self, other):   155    156         "Return whether this string is not equal to 'other'."   157    158         return _negate(self.__eq__(other))   159    160     def bytelength(self):   161    162         "Return the number of bytes in this string."   163    164         return str_len(self.__data__)   165    166     __len__ = bytelength   167    168     def __str__(self):   169    170         "Return a string representation."   171    172         return self   173    174     def __repr__(self):   175    176         "Return a program representation."   177    178         # NOTE: To be implemented with proper quoting.   179         b = buffer(['"', self, '"'])   180         return str(b)   181    182     def __bool__(self):   183    184         "Return whether the string provides any data."   185    186         return str_nonempty(self.__data__)   187    188     def __contains__(self, value):   189    190         "Return whether this string contains 'value'."   191    192         return self.find(value) != -1   193    194     def endswith(self, s):   195    196         "Return whether this string ends with 's'."   197    198         return self[-s.__len__():] == s   199    200     def find(self, sub, start=None, end=None):   201    202         """   203         Find 'sub' in the string if it occurs from or after the 'start' position   204         (or 0, if omitted) and before the 'end' position (or the end of the   205         string, if omitted), returning the earliest occurrence or -1 if 'sub' is   206         not present.   207         """   208    209         sublen = sub.__len__()   210    211         if end is None:   212             end = self.__len__()   213    214         end -= sublen   215    216         i = start or 0   217    218         while i <= end:   219             if sub == self[i:i+sublen]:   220                 return i   221             i += 1   222    223         return -1   224    225     def index(self, sub, start=None, end=None):   226    227         """   228         Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending   229         at 'end' (or the end of the string, if omitted), raising ValueError if   230         'sub' is not present.   231         """   232    233         i = self.find(sub, start, end)   234    235         if i == -1:   236             raise ValueError(sub)   237         else:   238             return i   239    240     def join(self, l):   241    242         "Join the elements in 'l' with this string."   243    244         # Empty strings just cause the list elements to be concatenated.   245    246         if not self.__bool__():   247             return str(buffer(l))   248    249         # Non-empty strings join the elements together in a buffer.   250    251         b = buffer()   252         first = True   253    254         for s in l:   255             if first:   256                 first = False   257             else:   258                 b.append(self)   259             b.append(s)   260    261         return str(b)   262    263     def lower(self): pass   264    265     def lstrip(self, chars=None):   266    267         """   268         Strip any of the given 'chars' from the start of the string, or strip   269         whitespace characters is 'chars' is omitted or None.   270         """   271    272         if chars is not None and not chars:   273             return self   274    275         i = 0   276         end = self.__len__()   277    278         while i < end and self[i] in (chars or WHITESPACE):   279             i += 1   280    281         return self[i:]   282    283     def replace(self, old, new, count=None): pass   284    285     def rfind(self, sub, start=None, end=None):   286    287         """   288         Find 'sub' in the string if it occurs from or after the 'start' position   289         (or 0, if omitted) and before the 'end' position (or the end of the   290         string, if omitted), returning the latest occurrence or -1 if 'sub' is   291         not present.   292         """   293    294         sublen = sub.__len__()   295    296         start = start or 0   297    298         if end is None:   299             end = self.__len__()   300    301         i = end - sublen   302    303         while i >= start:   304             if sub == self[i:i+sublen]:   305                 return i   306             i -= 1   307    308         return -1   309    310     def rsplit(self, sep=None, maxsplit=None):   311    312         """   313         Split the string using the given 'sep' as separator (or any whitespace   314         character if omitted or specified as None), splitting at most 'maxsplit'   315         times (or as many times as is possible if omitted or specified as None).   316         Where 'maxsplit' is given, the number of split points is counted from   317         the end of the string.   318         """   319    320         if not maxsplit:   321             return self.split(sep, maxsplit)   322    323         if sep is not None and not sep:   324             raise ValueError, sep   325    326         seplen = sep and len(sep) or 1   327         start = seplen   328         splits = 0   329    330         l = []   331         i = last = self.__len__()   332    333         while i >= start and (maxsplit is None or splits < maxsplit):   334    335             # Find any specified separator.   336    337             if sep and self[i-seplen:i] == sep:   338                 l.insert(0, self[i:last])   339                 i -= seplen   340                 last = i   341                 splits += 1   342    343             # Find any whitespace character and skip adjacent characters.   344    345             elif not sep and self[i-1] in WHITESPACE:   346                 l.insert(0, self[i:last])   347                 while i > start:   348                     i -= 1   349                     if self[i-1] not in WHITESPACE:   350                         break   351                 else:   352                     break   353                 last = i   354                 splits += 1   355    356             # Check the next character.   357    358             else:   359                 i -= 1   360    361         l.insert(0, self[:last])   362         return l   363    364     def rstrip(self, chars=None):   365    366         """   367         Strip any of the given 'chars' from the end of the string, or strip   368         whitespace characters is 'chars' is omitted or None.   369         """   370    371         if chars is not None and not chars:   372             return self   373    374         i = self.__len__() - 1   375    376         while i >= 0 and self[i] in (chars or WHITESPACE):   377             i -= 1   378    379         return self[:i+1]   380    381     def split(self, sep=None, maxsplit=None):   382    383         """   384         Split the string using the given 'sep' as separator (or any whitespace   385         character if omitted or specified as None), splitting at most 'maxsplit'   386         times (or as many times as is possible if omitted or specified as None).   387         Where 'maxsplit' is given, the number of split points is counted from   388         the start of the string.   389         """   390    391         if sep is not None and not sep:   392             raise ValueError, sep   393    394         if maxsplit is not None and not maxsplit:   395             return [self]   396    397         seplen = sep and len(sep) or 1   398         end = self.__len__() - seplen   399         splits = 0   400    401         l = []   402         i = last = 0   403    404         while i <= end and (maxsplit is None or splits < maxsplit):   405    406             # Find any specified separator.   407    408             if sep and self[i:i+seplen] == sep:   409                 l.append(self[last:i])   410                 i += seplen   411                 last = i   412                 splits += 1   413    414             # Find any whitespace character and skip adjacent characters.   415    416             elif not sep and self[i] in WHITESPACE:   417                 l.append(self[last:i])   418                 while i < end:   419                     i += 1   420                     if self[i] not in WHITESPACE:   421                         break   422                 else:   423                     break   424                 last = i   425                 splits += 1   426    427             # Check the next character.   428    429             else:   430                 i += 1   431    432         l.append(self[last:])   433         return l   434    435     def splitlines(self, keepends=False): pass   436    437     def startswith(self, s):   438    439         "Return whether this string starts with 's'."   440    441         return self[:s.__len__()] == s   442    443     def strip(self, chars=None):   444    445         """   446         Strip any of the given 'chars' from the start and end of the string, or   447         strip whitespace characters is 'chars' is omitted or None.   448         """   449    450         return self.lstrip(chars).rstrip(chars)   451    452     def upper(self): pass   453    454 class string(basestring):   455    456     "A plain string of bytes."   457    458     # Special implementation methods.   459    460     def __get_single_item__(self, index):   461    462         "Return the item at the normalised (positive) 'index'."   463    464         self._check_index(index)   465         return str_substr(self.__data__, index, index + 1, 1)   466    467     def __get_multiple_items__(self, start, end, step):   468    469         """   470         Return items from 'start' until (but excluding) 'end', at 'step'   471         intervals.   472         """   473    474         self._check_index(start)   475         self._check_end_index(end)   476         check_int(step)   477    478         if step == 0:   479             raise ValueError(step)   480    481         if start == end:   482             return ""   483    484         return str_substr(self.__data__, start, end, step)   485    486 def str(obj):   487    488     "Return the string representation of 'obj'."   489    490     # Class attributes of instances provide __str__.   491    492     return obj.__str__()   493    494 # vim: tabstop=4 expandtab shiftwidth=4