Lichen (file lib/__builtins__/str.py at c8ba74a474eb)

     1 #!/usr/bin/env python     2      3 """     4 String objects.     5      6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from __builtins__.operator import _negate    23 from __builtins__.sequence import hashable, itemaccess    24 from __builtins__.types import check_int    25 from native import str_add, str_lt, str_gt, str_eq, str_ord, \    26                    str_substr    27     28 WHITESPACE = (" ", "\f", "\n", "\r", "\t")    29     30 class basestring(hashable):    31     32     "The base class for all strings."    33     34     def __init__(self, other=None):    35     36         "Initialise the string, perhaps from 'other'."    37     38         # Note the __data__ member. Since strings are either initialised from    39         # literals or converted using routines defined for other types, no form    40         # of actual initialisation is performed here.    41     42         # Note the __key__ member. This is also initialised statically. Where    43         # a string is the same as an attribute name, the __key__ member contains    44         # attribute position and code details.    45     46         # NOTE: Cannot perform "other and other.__data__ or None" since the    47         # NOTE: __data__ attribute is not a normal attribute.    48     49         if other:    50             self.__data__ = other.__data__    51             self.__key__ = other.__key__    52             self.__size__ = other.__size__    53         else:    54             self.__data__ = None    55             self.__key__ = None    56             self.__size__ = 0    57     58     # Internal methods.    59     60     def _binary_op(self, op, other, sizes=False):    61     62         "Perform 'op' on this object and 'other' if appropriate."    63     64         # Refuse to operate on specialisations of this class.    65     66         if self.__class__ is not other.__class__:    67             return NotImplemented    68     69         # Otherwise, perform the operation on the operands' data.    70     71         elif sizes:    72             return op(self.__data__, other.__data__, self.__size__, other.__size__)    73         else:    74             return op(self.__data__, other.__data__)    75     76     def _binary_op_rev(self, op, other, sizes=False):    77     78         "Perform 'op' on 'other' and this object if appropriate."    79     80         # Refuse to operate on specialisations of this class.    81     82         if self.__class__ is not other.__class__:    83             return NotImplemented    84     85         # Otherwise, perform the operation on the operands' data.    86     87         elif sizes:    88             return op(other.__data__, self.__data__, other.__size__, self.__size__)    89         else:    90             return op(other.__data__, self.__data__)    91     92     def _quote(self, quote):    93     94         "Return a quoted representation of this string."    95     96         b = buffer([quote])    97         i = last = 0    98         end = self.__len__()    99    100         while i < end:   101             c = self[i]   102    103             # Handle quotes before anything else.   104    105             if c == quote:   106                 b.append("\\")   107                 b.append(quote)   108                 i += 1   109                 last = i   110                 continue   111    112             # Extended unquoted text.   113    114             n = ord(c)   115    116             if 32 <= n < 128:   117                 i += 1   118                 continue   119    120             # Before quoting, emit unquoted text.   121    122             b.append(self[last:i])   123    124             # Add quoted value.   125    126             if c == "\t":   127                 b.append("\\t")   128             elif c == "\n":   129                 b.append("\\n")   130             elif c == "\r":   131                 b.append("\\r")   132             else:   133                 self._quote_value(b, n)   134    135             i += 1   136             last = i   137    138         # Emit remaining unquoted text.   139    140         b.append(self[last:])   141         b.append(quote)   142         return str(b)   143    144     def _quote_value(self, b, n):   145    146         "Append to 'b' the quoted form of 'n'."   147    148         if n < 0:   149             n += 256   150         b.append("\\x")   151         x = hex(n, "")   152         if len(x) < 2:   153             b.append("0")   154         b.append(x)   155    156     def bytelength(self):   157    158         "Return the number of bytes in this string."   159    160         return self.__size__   161    162     # General type methods.   163    164     def __bool__(self):   165    166         "Return whether the string provides any data."   167    168         return self.__size__.__bool__()   169    170     def __contains__(self, value):   171    172         "Return whether this string contains 'value'."   173    174         return self.find(value) != -1   175    176     def __hash__(self):   177    178         "Return a value for hashing purposes."   179    180         return self._hashvalue(ord)   181    182     __len__ = bytelength   183    184     def __repr__(self):   185    186         "Return a program representation."   187    188         return self._quote('"')   189    190     def __str__(self):   191    192         "Return a string representation."   193    194         return self   195    196     # Operator methods.   197    198     def __iadd__(self, other):   199    200         "Return a string combining this string with 'other'."   201    202         return self._binary_op(str_add, other, True)   203    204     __add__ = __iadd__   205    206     def __radd__(self, other):   207    208         "Return a string combining this string with 'other'."   209    210         return self._binary_op_rev(str_add, other, True)   211    212     def __mod__(self, other): pass   213     def __rmod__(self, other): pass   214    215     def __mul__(self, other):   216    217         "Multiply the string by 'other'."   218    219         b = buffer()   220    221         while other > 0:   222             b.append(self)   223             other -= 1   224    225         return str(b)   226    227     __rmul__ = __mul__   228    229     # Comparison methods.   230    231     def __eq__(self, other):   232    233         "Return whether this string is equal to 'other'."   234    235         return self._binary_op(str_eq, other)   236    237     def __ge__(self, other):   238    239         "Return whether this string is greater than or equal to 'other'."   240    241         return _negate(self.__lt__(other))   242    243     def __gt__(self, other):   244    245         "Return whether this string is greater than 'other'."   246    247         return self._binary_op(str_gt, other)   248    249     def __le__(self, other):   250    251         "Return whether this string is less than or equal to 'other'."   252    253         return _negate(self.__gt__(other))   254    255     def __lt__(self, other):   256    257         "Return whether this string is less than 'other'."   258    259         return self._binary_op(str_lt, other)   260    261     def __ne__(self, other):   262    263         "Return whether this string is not equal to 'other'."   264    265         return _negate(self.__eq__(other))   266    267     # String-specific methods.   268    269     def __ord__(self):   270    271         "Return the value of the string, if only a single character."   272    273         if self.__len__() == 1:   274             return str_ord(self.__data__)   275         else:   276             raise ValueError, self   277    278     def endswith(self, s):   279    280         "Return whether this string ends with 's'."   281    282         return self[-s.__len__():] == s   283    284     def find(self, sub, start=None, end=None):   285    286         """   287         Find 'sub' in the string if it occurs from or after the 'start' position   288         (or 0, if omitted) and before the 'end' position (or the end of the   289         string, if omitted), returning the earliest occurrence or -1 if 'sub' is   290         not present.   291         """   292    293         sublen = sub.__len__()   294    295         if end is None:   296             end = self.__len__()   297    298         end -= sublen   299    300         i = start or 0   301    302         while i <= end:   303             if sub == self[i:i+sublen]:   304                 return i   305             i += 1   306    307         return -1   308    309     def index(self, sub, start=None, end=None):   310    311         """   312         Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending   313         at 'end' (or the end of the string, if omitted), raising ValueError if   314         'sub' is not present.   315         """   316    317         i = self.find(sub, start, end)   318    319         if i == -1:   320             raise ValueError(sub)   321         else:   322             return i   323    324     def join(self, l):   325    326         "Join the elements in 'l' with this string."   327    328         # Empty strings just cause the list elements to be concatenated.   329    330         if not self.__bool__():   331             return str(buffer(l))   332    333         # Non-empty strings join the elements together in a buffer.   334    335         b = buffer()   336         first = True   337    338         for s in l:   339             if first:   340                 first = False   341             else:   342                 b.append(self)   343             b.append(s)   344    345         return str(b)   346    347     def lower(self): pass   348    349     def lstrip(self, chars=None):   350    351         """   352         Strip any of the given 'chars' from the start of the string, or strip   353         whitespace characters is 'chars' is omitted or None.   354         """   355    356         if chars is not None and not chars:   357             return self   358    359         i = 0   360         end = self.__len__()   361    362         while i < end and self[i] in (chars or WHITESPACE):   363             i += 1   364    365         return self[i:]   366    367     def replace(self, old, new, count=None): pass   368    369     def rfind(self, sub, start=None, end=None):   370    371         """   372         Find 'sub' in the string if it occurs from or after the 'start' position   373         (or 0, if omitted) and before the 'end' position (or the end of the   374         string, if omitted), returning the latest occurrence or -1 if 'sub' is   375         not present.   376         """   377    378         sublen = sub.__len__()   379    380         start = start or 0   381    382         if end is None:   383             end = self.__len__()   384    385         i = end - sublen   386    387         while i >= start:   388             if sub == self[i:i+sublen]:   389                 return i   390             i -= 1   391    392         return -1   393    394     def rsplit(self, sep=None, maxsplit=None):   395    396         """   397         Split the string using the given 'sep' as separator (or any whitespace   398         character if omitted or specified as None), splitting at most 'maxsplit'   399         times (or as many times as is possible if omitted or specified as None).   400         Where 'maxsplit' is given, the number of split points is counted from   401         the end of the string.   402         """   403    404         if not maxsplit:   405             return self.split(sep, maxsplit)   406    407         if sep is not None and not sep:   408             raise ValueError, sep   409    410         seplen = sep and len(sep) or 1   411         start = seplen   412         splits = 0   413    414         l = []   415         i = last = self.__len__()   416    417         while i >= start and (maxsplit is None or splits < maxsplit):   418    419             # Find any specified separator.   420    421             if sep and self[i-seplen:i] == sep:   422                 l.insert(0, self[i:last])   423                 i -= seplen   424                 last = i   425                 splits += 1   426    427             # Find any whitespace character and skip adjacent characters.   428    429             elif not sep and self[i-1] in WHITESPACE:   430                 l.insert(0, self[i:last])   431                 while i > start:   432                     i -= 1   433                     if self[i-1] not in WHITESPACE:   434                         break   435                 else:   436                     break   437                 last = i   438                 splits += 1   439    440             # Check the next character.   441    442             else:   443                 i -= 1   444    445         l.insert(0, self[:last])   446         return l   447    448     def rstrip(self, chars=None):   449    450         """   451         Strip any of the given 'chars' from the end of the string, or strip   452         whitespace characters is 'chars' is omitted or None.   453         """   454    455         if chars is not None and not chars:   456             return self   457    458         i = self.__len__() - 1   459    460         while i >= 0 and self[i] in (chars or WHITESPACE):   461             i -= 1   462    463         return self[:i+1]   464    465     def split(self, sep=None, maxsplit=None):   466    467         """   468         Split the string using the given 'sep' as separator (or any whitespace   469         character if omitted or specified as None), splitting at most 'maxsplit'   470         times (or as many times as is possible if omitted or specified as None).   471         Where 'maxsplit' is given, the number of split points is counted from   472         the start of the string.   473         """   474    475         if sep is not None and not sep:   476             raise ValueError, sep   477    478         if maxsplit is not None and not maxsplit:   479             return [self]   480    481         seplen = sep and len(sep) or 1   482         end = self.__len__() - seplen   483         splits = 0   484    485         l = []   486         i = last = 0   487    488         while i <= end and (maxsplit is None or splits < maxsplit):   489    490             # Find any specified separator.   491    492             if sep and self[i:i+seplen] == sep:   493                 l.append(self[last:i])   494                 i += seplen   495                 last = i   496                 splits += 1   497    498             # Find any whitespace character and skip adjacent characters.   499    500             elif not sep and self[i] in WHITESPACE:   501                 l.append(self[last:i])   502                 while i < end:   503                     i += 1   504                     if self[i] not in WHITESPACE:   505                         break   506                 else:   507                     break   508                 last = i   509                 splits += 1   510    511             # Check the next character.   512    513             else:   514                 i += 1   515    516         l.append(self[last:])   517         return l   518    519     def splitlines(self, keepends=False): pass   520    521     def startswith(self, s):   522    523         "Return whether this string starts with 's'."   524    525         return self[:s.__len__()] == s   526    527     def strip(self, chars=None):   528    529         """   530         Strip any of the given 'chars' from the start and end of the string, or   531         strip whitespace characters is 'chars' is omitted or None.   532         """   533    534         return self.lstrip(chars).rstrip(chars)   535    536     def upper(self): pass   537    538 class string(basestring):   539    540     "A plain string of bytes."   541    542     # Special implementation methods.   543    544     def __get_single_item__(self, index):   545    546         "Return the item at the normalised (positive) 'index'."   547    548         self._check_index(index)   549         return str_substr(self.__data__, index, index + 1, 1)   550    551     def __get_multiple_items__(self, start, end, step):   552    553         """   554         Return items from 'start' until (but excluding) 'end', at 'step'   555         intervals.   556         """   557    558         if start == end:   559             return ""   560    561         check_int(step)   562    563         if step == 0:   564             raise ValueError(step)   565    566         l = get_using(basestring.__get_multiple_items__, self)(start, end, step)   567         return "".join(l)   568    569 def str(obj):   570    571     "Return the string representation of 'obj'."   572    573     # Class attributes of instances provide __str__.   574    575     return obj.__str__()   576    577 # vim: tabstop=4 expandtab shiftwidth=4