1.1 --- a/lib/native/__init__.py Mon Dec 12 18:00:17 2016 +0100
1.2 +++ b/lib/native/__init__.py Mon Dec 12 18:30:40 2016 +0100
1.3 @@ -29,7 +29,7 @@
1.4
1.5 from native.introspection import object_getattr, isinstance, issubclass
1.6
1.7 -from native.iconv import iconv, iconv_close, iconv_open
1.8 +from native.iconv import iconv, iconv_close, iconv_open, iconv_reset
1.9
1.10 from native.io import fclose, fopen, fdopen, close, read, write, fread, fwrite
1.11
2.1 --- a/lib/native/iconv.py Mon Dec 12 18:00:17 2016 +0100
2.2 +++ b/lib/native/iconv.py Mon Dec 12 18:30:40 2016 +0100
2.3 @@ -24,13 +24,15 @@
2.4 this program. If not, see <http://www.gnu.org/licenses/>.
2.5 """
2.6
2.7 -def iconv_open(tocode, fromcode):
2.8 +def iconv(cd, state):
2.9 OSError
2.10
2.11 def iconv_close(cd):
2.12 OSError
2.13
2.14 -def iconv(cd, instr, state):
2.15 +def iconv_open(tocode, fromcode):
2.16 OSError
2.17
2.18 +def iconv_reset(cd): pass
2.19 +
2.20 # vim: tabstop=4 expandtab shiftwidth=4
3.1 --- a/lib/posix/iconv.py Mon Dec 12 18:00:17 2016 +0100
3.2 +++ b/lib/posix/iconv.py Mon Dec 12 18:30:40 2016 +0100
3.3 @@ -20,7 +20,12 @@
3.4 """
3.5
3.6 from __builtins__.types import check_int, check_string
3.7 -from native import iconv_close, iconv_open, iconv
3.8 +from native import iconv, iconv_close, iconv_open, iconv_reset
3.9 +
3.10 +# Errors produced by iconv.
3.11 +
3.12 +EINVAL = 22
3.13 +EILSEQ = 84
3.14
3.15 class ConverterError(Exception):
3.16
3.17 @@ -28,10 +33,6 @@
3.18
3.19 pass
3.20
3.21 -E2BIG = 7
3.22 -EINVAL = 22
3.23 -EILSEQ = 84
3.24 -
3.25 class Converter:
3.26
3.27 "A character set converter."
3.28 @@ -43,6 +44,15 @@
3.29 check_string(from_encoding)
3.30 check_string(to_encoding)
3.31 self.__data__ = iconv_open(to_encoding, from_encoding)
3.32 + self.reset()
3.33 +
3.34 + def reset(self):
3.35 +
3.36 + "Reset the state of the converter."
3.37 +
3.38 + self.state = ["", 0, 0]
3.39 + self.result = []
3.40 + iconv_reset(self.__data__)
3.41
3.42 def close(self):
3.43
3.44 @@ -51,29 +61,53 @@
3.45 iconv_close(self.__data__)
3.46 self.__data__ = None
3.47
3.48 - def convert(self, s):
3.49 + def feed(self, s):
3.50
3.51 - "Convert 's' between the converter's encodings."
3.52 + "Feed 's' to the converter."
3.53
3.54 if self.__data__ is None:
3.55 raise ConverterError
3.56
3.57 check_string(s)
3.58
3.59 - result = []
3.60 - state = [0, len(s)]
3.61 + _s, start, remaining = self.state
3.62 +
3.63 + if _s:
3.64 + self.state = [_s + s, start, remaining + len(s)]
3.65 + else:
3.66 + self.state = [s, 0, len(s)]
3.67
3.68 while True:
3.69
3.70 # Obtain converted text and update the state.
3.71
3.72 - out = iconv(self.__data__, s, state)
3.73 - result.append(out)
3.74 + try:
3.75 + out = iconv(self.__data__, self.state)
3.76 +
3.77 + # Incomplete input does not cause an exception.
3.78 +
3.79 + except OSError, exc:
3.80 + if exc.value == EINVAL:
3.81 + self.result.append(exc.arg)
3.82 + return
3.83 + else:
3.84 + raise
3.85 +
3.86 + # Add any returned text to the result.
3.87 +
3.88 + self.result.append(out)
3.89
3.90 # Test for the end of the conversion.
3.91
3.92 - start, remaining = state
3.93 + _s, start, remaining = self.state
3.94 +
3.95 if not remaining:
3.96 - return "".join(result)
3.97 + return
3.98 +
3.99 + def __str__(self):
3.100 +
3.101 + "Return the value of the converted string."
3.102 +
3.103 + return "".join(self.result)
3.104
3.105 # vim: tabstop=4 expandtab shiftwidth=4
4.1 --- a/templates/native/iconv.c Mon Dec 12 18:00:17 2016 +0100
4.2 +++ b/templates/native/iconv.c Mon Dec 12 18:30:40 2016 +0100
4.3 @@ -35,19 +35,17 @@
4.4 __attr __fn_native_iconv_iconv(__attr __args[])
4.5 {
4.6 __attr * const cd = &__args[1];
4.7 - __attr * const instr = &__args[2];
4.8 - __attr * const state = &__args[3];
4.9 + __attr * const state = &__args[2];
4.10 /* cd interpreted as iconv_t */
4.11 iconv_t c = (iconv_t) cd->datavalue;
4.12 - /* instr.__data__ interpreted as string */
4.13 - char *inbuf = __load_via_object(instr->value, __pos___data__).strvalue;
4.14 /* state.__data__ interpreted as list */
4.15 __fragment *f = __load_via_object(state->value, __pos___data__).seqvalue;
4.16
4.17 - /* Obtain the start position from the state. */
4.18 + /* Obtain the string, start position, and remaining bytes from the state. */
4.19
4.20 - int start = __load_via_object(f->attrs[0].value, __pos___data__).intvalue;
4.21 - int remaining = __load_via_object(f->attrs[1].value, __pos___data__).intvalue;
4.22 + char *inbuf = __load_via_object(f->attrs[0].value, __pos___data__).strvalue;
4.23 + int start = __load_via_object(f->attrs[1].value, __pos___data__).intvalue;
4.24 + int remaining = __load_via_object(f->attrs[2].value, __pos___data__).intvalue;
4.25
4.26 /* Allocate a string for the output buffer using the remaining input size
4.27 as a guide. */
4.28 @@ -69,7 +67,7 @@
4.29
4.30 /* Return any string. */
4.31
4.32 - if ((result != -1) || (errno == E2BIG))
4.33 + if ((result != -1) || (errno == E2BIG) || (errno == EINVAL))
4.34 {
4.35 outbytestotal = outbufsize - outbytesleft;
4.36 resultbuf = __ALLOCATE(outbytestotal + 1, sizeof(char));
4.37 @@ -77,8 +75,14 @@
4.38
4.39 /* Mutate the state to indicate the next input buffer position. */
4.40
4.41 - f->attrs[0] = __new_int(start + remaining - inbytesleft);
4.42 - f->attrs[1] = __new_int(inbytesleft);
4.43 + f->attrs[1] = __new_int(start + remaining - inbytesleft);
4.44 + f->attrs[2] = __new_int(inbytesleft);
4.45 +
4.46 + /* Incomplete sequence: raise the string in an OSError instead. */
4.47 +
4.48 + if (errno == EINVAL)
4.49 + __raise_os_error(__new_int(errno), __new_str(resultbuf, outbytestotal));
4.50 +
4.51 return __new_str(resultbuf, outbytestotal);
4.52 }
4.53
4.54 @@ -91,15 +95,6 @@
4.55 __raise_os_error(__new_int(errno), __new_str(resultbuf, inbytesleft));
4.56 }
4.57
4.58 - /* Incomplete sequence. */
4.59 -
4.60 - else if (errno == EINVAL)
4.61 - {
4.62 - resultbuf = __ALLOCATE(inbytesleft + 1, sizeof(char));
4.63 - memcpy(resultbuf, inbuf, inbytesleft);
4.64 - __raise_os_error(__new_int(errno), __new_str(resultbuf, inbytesleft));
4.65 - }
4.66 -
4.67 /* General failure. */
4.68
4.69 else
4.70 @@ -144,6 +139,16 @@
4.71 return attr;
4.72 }
4.73
4.74 +__attr __fn_native_iconv_iconv_reset(__attr __args[])
4.75 +{
4.76 + __attr * const cd = &__args[1];
4.77 + /* cd interpreted as iconv_t */
4.78 + iconv_t c = (iconv_t) cd->datavalue;
4.79 +
4.80 + iconv(c, NULL, NULL, NULL, NULL);
4.81 + return __builtins___none_None;
4.82 +}
4.83 +
4.84 /* Module initialisation. */
4.85
4.86 void __main_native_iconv()
5.1 --- a/templates/native/iconv.h Mon Dec 12 18:00:17 2016 +0100
5.2 +++ b/templates/native/iconv.h Mon Dec 12 18:30:40 2016 +0100
5.3 @@ -26,6 +26,7 @@
5.4 __attr __fn_native_iconv_iconv(__attr __args[]);
5.5 __attr __fn_native_iconv_iconv_close(__attr __args[]);
5.6 __attr __fn_native_iconv_iconv_open(__attr __args[]);
5.7 +__attr __fn_native_iconv_iconv_reset(__attr __args[]);
5.8
5.9 /* Module initialisation. */
5.10
6.1 --- a/tests/iconv.py Mon Dec 12 18:00:17 2016 +0100
6.2 +++ b/tests/iconv.py Mon Dec 12 18:30:40 2016 +0100
6.3 @@ -8,22 +8,44 @@
6.4 from_utf16 = Converter("UTF-16", "ISO-8859-1")
6.5
6.6 try:
6.7 - try:
6.8 - iso = "æøå"
6.9 - print iso # æøå
6.10 - utf = to_utf8.convert(iso)
6.11 - print utf # æøå
6.12 - print from_utf8.convert(utf) # æøå
6.13 - utf = to_utf16.convert(iso)
6.14 - print utf # ...
6.15 - print from_utf16.convert(utf) # æøå
6.16 - except OSError, exc:
6.17 - if exc.value == EINVAL:
6.18 - print "Incomplete input", exc.arg
6.19 - elif exc.value == EILSEQ:
6.20 - print "Invalid input", exc.arg
6.21 - else:
6.22 - print exc.value, exc.arg
6.23 + iso = "æøå"
6.24 + print iso # æøå
6.25 + to_utf8.feed(iso)
6.26 + utf8 = str(to_utf8)
6.27 + print utf8 # æøå
6.28 + from_utf8.feed(utf8)
6.29 + print str(from_utf8) # æøå
6.30 + to_utf16.feed(iso)
6.31 + utf16 = str(to_utf16)
6.32 + print utf16 # ...
6.33 + from_utf16.feed(utf16)
6.34 + print str(from_utf16) # æøå
6.35 +
6.36 + # Convert part of a UTF-16 sequence, then convert the remainder, then obtain
6.37 + # the result.
6.38 +
6.39 + first = utf16[:3]
6.40 + second = utf16[3:]
6.41 +
6.42 + from_utf16.reset()
6.43 + print "first:", first # ...
6.44 + from_utf16.feed(first) # should have handled an incomplete input
6.45 + print "second:", second # ...
6.46 + from_utf16.feed(second) # should have handled the complete input
6.47 + print str(from_utf16) # æøå
6.48 +
6.49 + # Convert part of a UTF-8 sequence, then the remainder, then get the result.
6.50 +
6.51 + first = utf8[:3]
6.52 + second = utf8[3:]
6.53 +
6.54 + from_utf8.reset()
6.55 + print "first:", first # æÃ
6.56 + from_utf8.feed(first) # should have handled an incomplete input
6.57 + print "second:", second # ¸Ã¥
6.58 + from_utf8.feed(second) # should have handled the complete input
6.59 + print str(from_utf8) # æøå
6.60 +
6.61 finally:
6.62 to_utf8.close()
6.63 to_utf16.close()