1.1 --- a/lib/__builtins__/__init__.py Mon Dec 12 22:30:09 2016 +0100
1.2 +++ b/lib/__builtins__/__init__.py Mon Dec 12 22:33:15 2016 +0100
1.3 @@ -60,7 +60,6 @@
1.4 ValueError
1.5 )
1.6
1.7 -
1.8 # Classes.
1.9
1.10 from __builtins__.boolean import bool, False, True
1.11 @@ -77,8 +76,9 @@
1.12 from __builtins__.none import None, NoneType
1.13 from __builtins__.notimplemented import NotImplemented, NotImplementedType
1.14 from __builtins__.set import frozenset, set
1.15 -from __builtins__.str import basestring, str, string, unicode
1.16 +from __builtins__.str import basestring, str, string
1.17 from __builtins__.tuple import tuple
1.18 +from __builtins__.unicode import unicode, utf8string
1.19
1.20 # Functions.
1.21
2.1 --- a/lib/__builtins__/str.py Mon Dec 12 22:30:09 2016 +0100
2.2 +++ b/lib/__builtins__/str.py Mon Dec 12 22:33:15 2016 +0100
2.3 @@ -19,7 +19,7 @@
2.4 this program. If not, see <http://www.gnu.org/licenses/>.
2.5 """
2.6
2.7 -from __builtins__.int import maxint, minint
2.8 +from __builtins__.int import maxint
2.9 from __builtins__.operator import _negate
2.10 from __builtins__.sequence import itemaccess
2.11 from __builtins__.types import check_int
2.12 @@ -33,21 +33,30 @@
2.13 _p = maxint / 32
2.14 _a = 31
2.15
2.16 - def __init__(self):
2.17 + def __init__(self, other=None):
2.18
2.19 - "Initialise the string."
2.20 + "Initialise the string, perhaps from 'other'."
2.21
2.22 # Note the __data__ member. Since strings are either initialised from
2.23 # literals or converted using routines defined for other types, no form
2.24 # of actual initialisation is performed here.
2.25
2.26 - self.__data__ = None
2.27 + # NOTE: Cannot perform "other and other.__data__ or None" since the
2.28 + # NOTE: __data__ attribute is not a normal attribute.
2.29 +
2.30 + if other:
2.31 + self.__data__ = other.__data__
2.32 + else:
2.33 + self.__data__ = None
2.34
2.35 # Note the __key__ member. This is also initialised statically. Where
2.36 # a string is the same as an attribute name, the __key__ member contains
2.37 # attribute position and code details.
2.38
2.39 - self.__key__ = None
2.40 + if other:
2.41 + self.__key__ = other.__key__
2.42 + else:
2.43 + self.__key__ = None
2.44
2.45 def __hash__(self):
2.46
2.47 @@ -212,10 +221,10 @@
2.48 return str_substr(self.__data__, start, end, step)
2.49
2.50 class string(basestring):
2.51 - pass
2.52
2.53 -class unicode(basestring):
2.54 - def encode(self, encoding): pass
2.55 + "A plain string of bytes."
2.56 +
2.57 + pass
2.58
2.59 def str(obj):
2.60
3.1 --- a/lib/__builtins__/types.py Mon Dec 12 22:30:09 2016 +0100
3.2 +++ b/lib/__builtins__/types.py Mon Dec 12 22:33:15 2016 +0100
3.3 @@ -32,7 +32,7 @@
3.4
3.5 "Check the given string 's'."
3.6
3.7 - if not _isinstance(s, string):
3.8 + if not _isinstance(s, basestring):
3.9 raise ValueError(s)
3.10
3.11 # vim: tabstop=4 expandtab shiftwidth=4
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/lib/__builtins__/unicode.py Mon Dec 12 22:33:15 2016 +0100
4.3 @@ -0,0 +1,60 @@
4.4 +#!/usr/bin/env python
4.5 +
4.6 +"""
4.7 +Unicode objects.
4.8 +
4.9 +Copyright (C) 2015, 2016 Paul Boddie <paul@boddie.org.uk>
4.10 +
4.11 +This program is free software; you can redistribute it and/or modify it under
4.12 +the terms of the GNU General Public License as published by the Free Software
4.13 +Foundation; either version 3 of the License, or (at your option) any later
4.14 +version.
4.15 +
4.16 +This program is distributed in the hope that it will be useful, but WITHOUT
4.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
4.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
4.19 +details.
4.20 +
4.21 +You should have received a copy of the GNU General Public License along with
4.22 +this program. If not, see <http://www.gnu.org/licenses/>.
4.23 +"""
4.24 +
4.25 +from __builtins__.str import basestring
4.26 +from posix.iconv import Converter
4.27 +
4.28 +class utf8string(basestring):
4.29 +
4.30 + "A character string representation based on UTF-8."
4.31 +
4.32 + def encode(self, encoding):
4.33 +
4.34 + "Encode the string to the given 'encoding'."
4.35 +
4.36 + from_utf8 = Converter("UTF-8", encoding)
4.37 + try:
4.38 + from_utf8.feed(self)
4.39 + return str(from_utf8)
4.40 + finally:
4.41 + from_utf8.close()
4.42 +
4.43 +def unicode(s, encoding):
4.44 +
4.45 + "Convert 's' to a Unicode object, interpreting 's' as using 'encoding'."
4.46 +
4.47 + if isinstance(s, utf8string):
4.48 + return s
4.49 +
4.50 + # Obtain a string representation.
4.51 +
4.52 + s = s.__str__()
4.53 +
4.54 + # Convert the string to UTF-8.
4.55 +
4.56 + to_utf8 = Converter(encoding, "UTF-8")
4.57 + try:
4.58 + to_utf8.feed(s)
4.59 + return utf8string(str(to_utf8))
4.60 + finally:
4.61 + to_utf8.close()
4.62 +
4.63 +# vim: tabstop=4 expandtab shiftwidth=4