# HG changeset patch # User Paul Boddie # Date 1481587132 -3600 # Node ID 6d66508a5758afc010bc0963cc4338d842bca39d # Parent fd5cc95c936a833ebbe750318bd266582b909a66 Added encoding attributes and text encoding/decoding support to file streams. Added a test of Unicode object creation and output. diff -r fd5cc95c936a -r 6d66508a5758 lib/__builtins__/file.py --- a/lib/__builtins__/file.py Mon Dec 12 23:33:28 2016 +0100 +++ b/lib/__builtins__/file.py Tue Dec 13 00:58:52 2016 +0100 @@ -20,16 +20,17 @@ """ from __builtins__.types import check_int, check_string -from native import fclose, fopen, fread, fwrite +from native import isinstance as _isinstance, fclose, fopen, fread, fwrite class filestream: "Generic file-oriented stream functionality." - def __init__(self, bufsize=1024): + def __init__(self, encoding=None, bufsize=1024): - "Initialise the stream with the given 'bufsize'." + "Initialise the stream with the given 'encoding' and 'bufsize'." + self.encoding = encoding self.bufsize = bufsize self.__data__ = None @@ -42,7 +43,7 @@ # Read any indicated number of bytes. if n > 0: - return fread(self.__data__, n) + s = fread(self.__data__, n) # Read all remaining bytes. @@ -60,13 +61,26 @@ except EOFError: pass - return "".join(l) + s = "".join(l) + + # Convert bytes to text if necessary. + + if self.encoding: + return unicode(s, self.encoding) + else: + return s def write(self, s): "Write string 's' to the stream." check_string(s) + + # Encode text as bytes if necessary. + + if self.encoding and _isinstance(s, utf8string): + s = s.encode(self.encoding) + fwrite(self.__data__, s) def close(self): @@ -79,11 +93,14 @@ "A file abstraction." - def __init__(self, filename, mode="r", bufsize=1024): + def __init__(self, filename, mode="r", encoding=None, bufsize=1024): - "Open the file with the given 'filename' using the given access 'mode'." + """ + Open the file with the given 'filename' using the given access 'mode', + any specified 'encoding', and the given 'bufsize'. + """ - get_using(filestream.__init__, self)(bufsize) + get_using(filestream.__init__, self)(encoding, bufsize) self.__data__ = fopen(filename, mode) def readline(self, size=None): pass diff -r fd5cc95c936a -r 6d66508a5758 lib/__builtins__/unicode.py --- a/lib/__builtins__/unicode.py Mon Dec 12 23:33:28 2016 +0100 +++ b/lib/__builtins__/unicode.py Tue Dec 13 00:58:52 2016 +0100 @@ -31,9 +31,11 @@ "Encode the string to the given 'encoding'." from_utf8 = Converter("UTF-8", encoding) + try: from_utf8.feed(self) return str(from_utf8) + finally: from_utf8.close() @@ -51,9 +53,11 @@ # Convert the string to UTF-8. to_utf8 = Converter(encoding, "UTF-8") + try: to_utf8.feed(s) return utf8string(str(to_utf8)) + finally: to_utf8.close() diff -r fd5cc95c936a -r 6d66508a5758 lib/posix/io.py --- a/lib/posix/io.py Mon Dec 12 23:33:28 2016 +0100 +++ b/lib/posix/io.py Tue Dec 13 00:58:52 2016 +0100 @@ -62,11 +62,14 @@ "A system-level stream object." - def __init__(self, fd, mode="r", bufsize=1024): + def __init__(self, fd, mode="r", encoding=None, bufsize=1024): - "Initialise the stream with the given 'fd' and 'mode'." + """ + Initialise the stream with the given 'fd', 'mode', 'encoding' and + 'bufsize'. + """ - get_using(filestream.__init__, self)(bufsize) + get_using(filestream.__init__, self)(encoding, bufsize) self.__data__ = fdopen(fd, mode) # Standard streams. diff -r fd5cc95c936a -r 6d66508a5758 tests/unicode.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/unicode.py Tue Dec 13 00:58:52 2016 +0100 @@ -0,0 +1,20 @@ +# -*- coding: ISO-8859-1 -*- + +import sys + +# Print bytes. + +s = "æøå" +print s # æøå + +# Obtain text and print it. + +u = unicode(s, "ISO-8859-1") +print u # æøå +print u.encode("ISO-8859-1") # æøå + +# Inspect and update the encoding of stdout. + +print sys.stdout.encoding # None +sys.stdout.encoding = "ISO-8859-1" +print u # æøå