1 #!/usr/bin/env python 2 3 """ 4 Unicode objects. 5 6 Copyright (C) 2015, 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.str import basestring 23 from posix.iconv import Converter 24 from native import str_add, isinstance as _isinstance 25 26 class utf8string(basestring): 27 28 "A character string representation based on UTF-8." 29 30 def __init__(self, other=None, encoding=None): 31 32 """ 33 Initialise the string, perhaps from 'other', with any original 34 'encoding' indicated. 35 """ 36 37 get_using(basestring.__init__, self)(other) 38 self.encoding = encoding 39 40 def _binary_op(self, op, other): 41 42 "Perform 'op' on this object and 'other' if appropriate." 43 44 # Reject non-strings. 45 46 if not _isinstance(other, basestring): 47 return NotImplemented 48 49 # Combining text with bytes. 50 51 elif not _isinstance(other, utf8string): 52 s = self.encode() 53 return op(s.__data__, other.__data__) 54 55 # Otherwise, perform the operation on the operands' data. 56 57 else: 58 return op(self.__data__, other.__data__) 59 60 def _binary_op_rev(self, op, other): 61 62 "Perform 'op' on 'other' and this object if appropriate." 63 64 # Reject non-strings. 65 66 if not _isinstance(other, basestring): 67 return NotImplemented 68 69 # Combining text with bytes. 70 71 elif not _isinstance(other, utf8string): 72 s = self.encode() 73 return op(other.__data__, s.__data__) 74 75 # Otherwise, perform the operation on the operands' data. 76 77 else: 78 return op(other.__data__, self.__data__) 79 80 def _convert(self, result, other): 81 82 "Convert 'result' to a Unicode object if 'other' already is." 83 84 if _isinstance(other, utf8string): 85 return utf8string(result, self.encoding) 86 else: 87 return result 88 89 def __iadd__(self, other): 90 91 "Return a string combining this string with 'other'." 92 93 return self._convert(self._binary_op(str_add, other), other) 94 95 __add__ = __iadd__ 96 97 def __radd__(self, other): 98 99 "Return a string combining this string with 'other'." 100 101 return self._convert(self._binary_op_rev(str_add, other), other) 102 103 def encode(self, encoding=None): 104 105 """ 106 Encode the string to the given 'encoding' or any original encoding if 107 omitted. 108 """ 109 110 encoding = encoding or self.encoding 111 if not encoding: 112 return self 113 114 from_utf8 = Converter("UTF-8", encoding) 115 116 try: 117 from_utf8.feed(self) 118 return str(from_utf8) 119 120 finally: 121 from_utf8.close() 122 123 def unicode(s, encoding): 124 125 "Convert 's' to a Unicode object, interpreting 's' as using 'encoding'." 126 127 if isinstance(s, utf8string): 128 return s 129 130 # Obtain a string representation. 131 132 s = s.__str__() 133 134 # Convert the string to UTF-8. 135 136 to_utf8 = Converter(encoding, "UTF-8") 137 138 try: 139 to_utf8.feed(s) 140 return utf8string(str(to_utf8), encoding) 141 142 finally: 143 to_utf8.close() 144 145 # vim: tabstop=4 expandtab shiftwidth=4