Lichen (file lib/__builtins__/str.py at 0495cc21f241)

     1 #!/usr/bin/env python     2      3 """     4 String objects.     5      6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from __builtins__.operator import _negate    23 from __builtins__.sequence import hashable, itemaccess    24 from __builtins__.types import check_int    25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_ord, \    26                    str_nonempty, str_substr    27     28 WHITESPACE = (" ", "\f", "\n", "\r", "\t")    29     30 class basestring(hashable):    31     32     "The base class for all strings."    33     34     def __init__(self, other=None):    35     36         "Initialise the string, perhaps from 'other'."    37     38         # Note the __data__ member. Since strings are either initialised from    39         # literals or converted using routines defined for other types, no form    40         # of actual initialisation is performed here.    41     42         # NOTE: Cannot perform "other and other.__data__ or None" since the    43         # NOTE: __data__ attribute is not a normal attribute.    44     45         if other:    46             self.__data__ = other.__data__    47         else:    48             self.__data__ = None    49     50         # Note the __key__ member. This is also initialised statically. Where    51         # a string is the same as an attribute name, the __key__ member contains    52         # attribute position and code details.    53     54         if other:    55             self.__key__ = other.__key__    56         else:    57             self.__key__ = None    58     59     # Internal methods.    60     61     def _binary_op(self, op, other):    62     63         "Perform 'op' on this object and 'other' if appropriate."    64     65         # Refuse to operate on specialisations of this class.    66     67         if self.__class__ is not other.__class__:    68             return NotImplemented    69     70         # Otherwise, perform the operation on the operands' data.    71     72         else:    73             return op(self.__data__, other.__data__)    74     75     def _binary_op_rev(self, op, other):    76     77         "Perform 'op' on 'other' and this object if appropriate."    78     79         # Refuse to operate on specialisations of this class.    80     81         if self.__class__ is not other.__class__:    82             return NotImplemented    83     84         # Otherwise, perform the operation on the operands' data.    85     86         else:    87             return op(other.__data__, self.__data__)    88     89     def _quote(self, quote):    90     91         "Return a quoted representation of this string."    92     93         b = buffer([quote])    94         i = last = 0    95         end = self.__len__()    96     97         while i < end:    98             c = self[i]    99    100             # Handle quotes before anything else.   101    102             if c == quote:   103                 b.append("\\")   104                 b.append(quote)   105                 i += 1   106                 last = i   107                 continue   108    109             # Extended unquoted text.   110    111             n = ord(c)   112    113             if 32 <= n < 128:   114                 i += 1   115                 continue   116    117             # Before quoting, emit unquoted text.   118    119             b.append(self[last:i])   120    121             # Add quoted value.   122    123             if c == "\t":   124                 b.append("\\t")   125             elif c == "\n":   126                 b.append("\\n")   127             elif c == "\r":   128                 b.append("\\r")   129             else:   130                 self._quote_value(b, n)   131    132             i += 1   133             last = i   134    135         # Emit remaining unquoted text.   136    137         b.append(self[last:])   138         b.append(quote)   139         return str(b)   140    141     def _quote_value(self, b, n):   142    143         "Append to 'b' the quoted form of 'n'."   144    145         if n < 0:   146             n += 256   147         b.append("\\x")   148         x = hex(n, "")   149         if len(x) < 2:   150             b.append("0")   151         b.append(x)   152    153     def bytelength(self):   154    155         "Return the number of bytes in this string."   156    157         return str_len(self.__data__)   158    159     # General type methods.   160    161     def __bool__(self):   162    163         "Return whether the string provides any data."   164    165         return str_nonempty(self.__data__)   166    167     def __contains__(self, value):   168    169         "Return whether this string contains 'value'."   170    171         return self.find(value) != -1   172    173     def __hash__(self):   174    175         "Return a value for hashing purposes."   176    177         return self._hashvalue(ord)   178    179     __len__ = bytelength   180    181     def __repr__(self):   182    183         "Return a program representation."   184    185         return self._quote('"')   186    187     def __str__(self):   188    189         "Return a string representation."   190    191         return self   192    193     # Operator methods.   194    195     def __iadd__(self, other):   196    197         "Return a string combining this string with 'other'."   198    199         return self._binary_op(str_add, other)   200    201     __add__ = __iadd__   202    203     def __radd__(self, other):   204    205         "Return a string combining this string with 'other'."   206    207         return self._binary_op_rev(str_add, other)   208    209     def __mod__(self, other): pass   210     def __rmod__(self, other): pass   211    212     def __mul__(self, other):   213    214         "Multiply the string by 'other'."   215    216         b = buffer()   217    218         while other > 0:   219             b.append(self)   220             other -= 1   221    222         return str(b)   223    224     __rmul__ = __mul__   225    226     # Comparison methods.   227    228     def __eq__(self, other):   229    230         "Return whether this string is equal to 'other'."   231    232         return self._binary_op(str_eq, other)   233    234     def __ge__(self, other):   235    236         "Return whether this string is greater than or equal to 'other'."   237    238         return _negate(self.__lt__(other))   239    240     def __gt__(self, other):   241    242         "Return whether this string is greater than 'other'."   243    244         return self._binary_op(str_gt, other)   245    246     def __le__(self, other):   247    248         "Return whether this string is less than or equal to 'other'."   249    250         return _negate(self.__gt__(other))   251    252     def __lt__(self, other):   253    254         "Return whether this string is less than 'other'."   255    256         return self._binary_op(str_lt, other)   257    258     def __ne__(self, other):   259    260         "Return whether this string is not equal to 'other'."   261    262         return _negate(self.__eq__(other))   263    264     # String-specific methods.   265    266     def __ord__(self):   267    268         "Return the value of the string, if only a single character."   269    270         if self.__len__() == 1:   271             return str_ord(self.__data__)   272         else:   273             raise ValueError, self   274    275     def endswith(self, s):   276    277         "Return whether this string ends with 's'."   278    279         return self[-s.__len__():] == s   280    281     def find(self, sub, start=None, end=None):   282    283         """   284         Find 'sub' in the string if it occurs from or after the 'start' position   285         (or 0, if omitted) and before the 'end' position (or the end of the   286         string, if omitted), returning the earliest occurrence or -1 if 'sub' is   287         not present.   288         """   289    290         sublen = sub.__len__()   291    292         if end is None:   293             end = self.__len__()   294    295         end -= sublen   296    297         i = start or 0   298    299         while i <= end:   300             if sub == self[i:i+sublen]:   301                 return i   302             i += 1   303    304         return -1   305    306     def index(self, sub, start=None, end=None):   307    308         """   309         Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending   310         at 'end' (or the end of the string, if omitted), raising ValueError if   311         'sub' is not present.   312         """   313    314         i = self.find(sub, start, end)   315    316         if i == -1:   317             raise ValueError(sub)   318         else:   319             return i   320    321     def join(self, l):   322    323         "Join the elements in 'l' with this string."   324    325         # Empty strings just cause the list elements to be concatenated.   326    327         if not self.__bool__():   328             return str(buffer(l))   329    330         # Non-empty strings join the elements together in a buffer.   331    332         b = buffer()   333         first = True   334    335         for s in l:   336             if first:   337                 first = False   338             else:   339                 b.append(self)   340             b.append(s)   341    342         return str(b)   343    344     def lower(self): pass   345    346     def lstrip(self, chars=None):   347    348         """   349         Strip any of the given 'chars' from the start of the string, or strip   350         whitespace characters is 'chars' is omitted or None.   351         """   352    353         if chars is not None and not chars:   354             return self   355    356         i = 0   357         end = self.__len__()   358    359         while i < end and self[i] in (chars or WHITESPACE):   360             i += 1   361    362         return self[i:]   363    364     def replace(self, old, new, count=None): pass   365    366     def rfind(self, sub, start=None, end=None):   367    368         """   369         Find 'sub' in the string if it occurs from or after the 'start' position   370         (or 0, if omitted) and before the 'end' position (or the end of the   371         string, if omitted), returning the latest occurrence or -1 if 'sub' is   372         not present.   373         """   374    375         sublen = sub.__len__()   376    377         start = start or 0   378    379         if end is None:   380             end = self.__len__()   381    382         i = end - sublen   383    384         while i >= start:   385             if sub == self[i:i+sublen]:   386                 return i   387             i -= 1   388    389         return -1   390    391     def rsplit(self, sep=None, maxsplit=None):   392    393         """   394         Split the string using the given 'sep' as separator (or any whitespace   395         character if omitted or specified as None), splitting at most 'maxsplit'   396         times (or as many times as is possible if omitted or specified as None).   397         Where 'maxsplit' is given, the number of split points is counted from   398         the end of the string.   399         """   400    401         if not maxsplit:   402             return self.split(sep, maxsplit)   403    404         if sep is not None and not sep:   405             raise ValueError, sep   406    407         seplen = sep and len(sep) or 1   408         start = seplen   409         splits = 0   410    411         l = []   412         i = last = self.__len__()   413    414         while i >= start and (maxsplit is None or splits < maxsplit):   415    416             # Find any specified separator.   417    418             if sep and self[i-seplen:i] == sep:   419                 l.insert(0, self[i:last])   420                 i -= seplen   421                 last = i   422                 splits += 1   423    424             # Find any whitespace character and skip adjacent characters.   425    426             elif not sep and self[i-1] in WHITESPACE:   427                 l.insert(0, self[i:last])   428                 while i > start:   429                     i -= 1   430                     if self[i-1] not in WHITESPACE:   431                         break   432                 else:   433                     break   434                 last = i   435                 splits += 1   436    437             # Check the next character.   438    439             else:   440                 i -= 1   441    442         l.insert(0, self[:last])   443         return l   444    445     def rstrip(self, chars=None):   446    447         """   448         Strip any of the given 'chars' from the end of the string, or strip   449         whitespace characters is 'chars' is omitted or None.   450         """   451    452         if chars is not None and not chars:   453             return self   454    455         i = self.__len__() - 1   456    457         while i >= 0 and self[i] in (chars or WHITESPACE):   458             i -= 1   459    460         return self[:i+1]   461    462     def split(self, sep=None, maxsplit=None):   463    464         """   465         Split the string using the given 'sep' as separator (or any whitespace   466         character if omitted or specified as None), splitting at most 'maxsplit'   467         times (or as many times as is possible if omitted or specified as None).   468         Where 'maxsplit' is given, the number of split points is counted from   469         the start of the string.   470         """   471    472         if sep is not None and not sep:   473             raise ValueError, sep   474    475         if maxsplit is not None and not maxsplit:   476             return [self]   477    478         seplen = sep and len(sep) or 1   479         end = self.__len__() - seplen   480         splits = 0   481    482         l = []   483         i = last = 0   484    485         while i <= end and (maxsplit is None or splits < maxsplit):   486    487             # Find any specified separator.   488    489             if sep and self[i:i+seplen] == sep:   490                 l.append(self[last:i])   491                 i += seplen   492                 last = i   493                 splits += 1   494    495             # Find any whitespace character and skip adjacent characters.   496    497             elif not sep and self[i] in WHITESPACE:   498                 l.append(self[last:i])   499                 while i < end:   500                     i += 1   501                     if self[i] not in WHITESPACE:   502                         break   503                 else:   504                     break   505                 last = i   506                 splits += 1   507    508             # Check the next character.   509    510             else:   511                 i += 1   512    513         l.append(self[last:])   514         return l   515    516     def splitlines(self, keepends=False): pass   517    518     def startswith(self, s):   519    520         "Return whether this string starts with 's'."   521    522         return self[:s.__len__()] == s   523    524     def strip(self, chars=None):   525    526         """   527         Strip any of the given 'chars' from the start and end of the string, or   528         strip whitespace characters is 'chars' is omitted or None.   529         """   530    531         return self.lstrip(chars).rstrip(chars)   532    533     def upper(self): pass   534    535 class string(basestring):   536    537     "A plain string of bytes."   538    539     # Special implementation methods.   540    541     def __get_single_item__(self, index):   542    543         "Return the item at the normalised (positive) 'index'."   544    545         self._check_index(index)   546         return str_substr(self.__data__, index, index + 1, 1)   547    548     def __get_multiple_items__(self, start, end, step):   549    550         """   551         Return items from 'start' until (but excluding) 'end', at 'step'   552         intervals.   553         """   554    555         if start == end:   556             return ""   557    558         check_int(step)   559    560         if step == 0:   561             raise ValueError(step)   562    563         l = get_using(basestring.__get_multiple_items__, self)(start, end, step)   564         return "".join(l)   565    566 def str(obj):   567    568     "Return the string representation of 'obj'."   569    570     # Class attributes of instances provide __str__.   571    572     return obj.__str__()   573    574 # vim: tabstop=4 expandtab shiftwidth=4