1.1 --- a/lib/native/__init__.py Mon Dec 12 00:40:23 2016 +0100
1.2 +++ b/lib/native/__init__.py Mon Dec 12 00:40:54 2016 +0100
1.3 @@ -29,6 +29,8 @@
1.4
1.5 from native.introspection import object_getattr, isinstance, issubclass
1.6
1.7 +from native.iconv import iconv, iconv_close, iconv_open
1.8 +
1.9 from native.io import fclose, fopen, fdopen, close, read, write, fread, fwrite
1.10
1.11 from native.limits import get_maxint, get_minint
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/lib/native/iconv.py Mon Dec 12 00:40:54 2016 +0100
2.3 @@ -0,0 +1,36 @@
2.4 +#!/usr/bin/env python
2.5 +
2.6 +"""
2.7 +Native library functions for character set conversion.
2.8 +
2.9 +None of these are actually defined here. Instead, native implementations are
2.10 +substituted when each program is built. It is, however, important to declare
2.11 +non-core exceptions used by the native functions because they need to be
2.12 +identified as being needed by the program.
2.13 +
2.14 +Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
2.15 +
2.16 +This program is free software; you can redistribute it and/or modify it under
2.17 +the terms of the GNU General Public License as published by the Free Software
2.18 +Foundation; either version 3 of the License, or (at your option) any later
2.19 +version.
2.20 +
2.21 +This program is distributed in the hope that it will be useful, but WITHOUT
2.22 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2.23 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
2.24 +details.
2.25 +
2.26 +You should have received a copy of the GNU General Public License along with
2.27 +this program. If not, see <http://www.gnu.org/licenses/>.
2.28 +"""
2.29 +
2.30 +def iconv_open(tocode, fromcode):
2.31 + OSError
2.32 +
2.33 +def iconv_close(cd):
2.34 + OSError
2.35 +
2.36 +def iconv(cd, instr, state):
2.37 + OSError
2.38 +
2.39 +# vim: tabstop=4 expandtab shiftwidth=4
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/lib/posix/iconv.py Mon Dec 12 00:40:54 2016 +0100
3.3 @@ -0,0 +1,79 @@
3.4 +#!/usr/bin/env python
3.5 +
3.6 +"""
3.7 +POSIX character set conversion functions.
3.8 +
3.9 +Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
3.10 +
3.11 +This program is free software; you can redistribute it and/or modify it under
3.12 +the terms of the GNU General Public License as published by the Free Software
3.13 +Foundation; either version 3 of the License, or (at your option) any later
3.14 +version.
3.15 +
3.16 +This program is distributed in the hope that it will be useful, but WITHOUT
3.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
3.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
3.19 +details.
3.20 +
3.21 +You should have received a copy of the GNU General Public License along with
3.22 +this program. If not, see <http://www.gnu.org/licenses/>.
3.23 +"""
3.24 +
3.25 +from __builtins__.types import check_int, check_string
3.26 +from native import iconv_close, iconv_open, iconv
3.27 +
3.28 +class ConverterError(Exception):
3.29 +
3.30 + "An error indicating a failure involving a character set converter."
3.31 +
3.32 + pass
3.33 +
3.34 +E2BIG = 7
3.35 +EINVAL = 22
3.36 +EILSEQ = 84
3.37 +
3.38 +class Converter:
3.39 +
3.40 + "A character set converter."
3.41 +
3.42 + def __init__(self, from_encoding, to_encoding):
3.43 +
3.44 + "Initialise conversion between 'from_encoding' and 'to_encoding'."
3.45 +
3.46 + check_string(from_encoding)
3.47 + check_string(to_encoding)
3.48 + self.__data__ = iconv_open(to_encoding, from_encoding)
3.49 +
3.50 + def close(self):
3.51 +
3.52 + "Close this converter."
3.53 +
3.54 + iconv_close(self.__data__)
3.55 + self.__data__ = None
3.56 +
3.57 + def convert(self, s):
3.58 +
3.59 + "Convert 's' between the converter's encodings."
3.60 +
3.61 + if self.__data__ is None:
3.62 + raise ConverterError
3.63 +
3.64 + check_string(s)
3.65 +
3.66 + result = []
3.67 + state = [0, len(s)]
3.68 +
3.69 + while True:
3.70 +
3.71 + # Obtain converted text and update the state.
3.72 +
3.73 + out = iconv(self.__data__, s, state)
3.74 + result.append(out)
3.75 +
3.76 + # Test for the end of the conversion.
3.77 +
3.78 + start, remaining = state
3.79 + if not remaining:
3.80 + return "".join(result)
3.81 +
3.82 +# vim: tabstop=4 expandtab shiftwidth=4
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/templates/native/iconv.c Mon Dec 12 00:40:54 2016 +0100
4.3 @@ -0,0 +1,151 @@
4.4 +/* Native functions for character set conversion.
4.5 +
4.6 +Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
4.7 +
4.8 +This program is free software; you can redistribute it and/or modify it under
4.9 +the terms of the GNU General Public License as published by the Free Software
4.10 +Foundation; either version 3 of the License, or (at your option) any later
4.11 +version.
4.12 +
4.13 +This program is distributed in the hope that it will be useful, but WITHOUT
4.14 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
4.15 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
4.16 +details.
4.17 +
4.18 +You should have received a copy of the GNU General Public License along with
4.19 +this program. If not, see <http://www.gnu.org/licenses/>.
4.20 +*/
4.21 +
4.22 +#include <iconv.h> /* iconv, iconv_close, iconv_open */
4.23 +#include <string.h> /* memcpy */
4.24 +#include <errno.h> /* errno */
4.25 +#include "native/common.h"
4.26 +#include "types.h"
4.27 +#include "exceptions.h"
4.28 +#include "ops.h"
4.29 +#include "progconsts.h"
4.30 +#include "progops.h"
4.31 +#include "progtypes.h"
4.32 +#include "main.h"
4.33 +
4.34 +static const size_t OUTBUFSIZE_MIN = 16;
4.35 +
4.36 +/* Character set conversion. */
4.37 +
4.38 +__attr __fn_native_iconv_iconv(__attr __args[])
4.39 +{
4.40 + __attr * const cd = &__args[1];
4.41 + __attr * const instr = &__args[2];
4.42 + __attr * const state = &__args[3];
4.43 + /* cd interpreted as iconv_t */
4.44 + iconv_t c = (iconv_t) cd->datavalue;
4.45 + /* instr.__data__ interpreted as string */
4.46 + char *inbuf = __load_via_object(instr->value, __pos___data__).strvalue;
4.47 + /* state.__data__ interpreted as list */
4.48 + __fragment *f = __load_via_object(state->value, __pos___data__).seqvalue;
4.49 +
4.50 + /* Obtain the start position from the state. */
4.51 +
4.52 + int start = __load_via_object(f->attrs[0].value, __pos___data__).intvalue;
4.53 + int remaining = __load_via_object(f->attrs[1].value, __pos___data__).intvalue;
4.54 +
4.55 + /* Allocate a string for the output buffer using the remaining input size
4.56 + as a guide. */
4.57 +
4.58 + size_t outbufsize = remaining < OUTBUFSIZE_MIN ? OUTBUFSIZE_MIN : remaining;
4.59 + size_t outbytesleft = outbufsize;
4.60 + size_t inbytesleft = remaining;
4.61 +
4.62 + char buf[outbytesleft];
4.63 + char *outbuf = buf, *outbufstart = outbuf, *resultbuf;
4.64 + size_t result, outbytestotal;
4.65 +
4.66 + /* Convert from the start point. */
4.67 +
4.68 + inbuf += start;
4.69 +
4.70 + errno = 0;
4.71 + result = iconv(c, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
4.72 +
4.73 + /* Return any string. */
4.74 +
4.75 + if ((result != -1) || (errno == E2BIG))
4.76 + {
4.77 + outbytestotal = outbufsize - outbytesleft;
4.78 + resultbuf = __ALLOCATE(outbytestotal + 1, sizeof(char));
4.79 + memcpy(resultbuf, outbufstart, outbytestotal);
4.80 +
4.81 + /* Mutate the state to indicate the next input buffer position. */
4.82 +
4.83 + f->attrs[0] = __new_int(start + remaining - inbytesleft);
4.84 + f->attrs[1] = __new_int(inbytesleft);
4.85 + return __new_str(resultbuf, outbytestotal);
4.86 + }
4.87 +
4.88 + /* Invalid sequence. */
4.89 +
4.90 + if (errno == EILSEQ)
4.91 + {
4.92 + resultbuf = __ALLOCATE(inbytesleft + 1, sizeof(char));
4.93 + memcpy(resultbuf, inbuf, inbytesleft);
4.94 + __raise_os_error(__new_int(errno), __new_str(resultbuf, inbytesleft));
4.95 + }
4.96 +
4.97 + /* Incomplete sequence. */
4.98 +
4.99 + else if (errno == EINVAL)
4.100 + {
4.101 + resultbuf = __ALLOCATE(inbytesleft + 1, sizeof(char));
4.102 + memcpy(resultbuf, inbuf, inbytesleft);
4.103 + __raise_os_error(__new_int(errno), __new_str(resultbuf, inbytesleft));
4.104 + }
4.105 +
4.106 + /* General failure. */
4.107 +
4.108 + else
4.109 + __raise_os_error(__new_int(errno), __builtins___none_None);
4.110 +}
4.111 +
4.112 +__attr __fn_native_iconv_iconv_close(__attr __args[])
4.113 +{
4.114 + __attr * const cd = &__args[1];
4.115 + /* cd interpreted as iconv_t */
4.116 + iconv_t c = (iconv_t) cd->datavalue;
4.117 +
4.118 + errno = 0;
4.119 +
4.120 + if (iconv_close(c) == -1)
4.121 + __raise_os_error(__new_int(errno), __builtins___none_None);
4.122 +
4.123 + return __builtins___none_None;
4.124 +}
4.125 +
4.126 +__attr __fn_native_iconv_iconv_open(__attr __args[])
4.127 +{
4.128 + __attr * const tocode = &__args[1];
4.129 + __attr * const fromcode = &__args[2];
4.130 + /* tocode.__data__ interpreted as string */
4.131 + char *t = __load_via_object(tocode->value, __pos___data__).strvalue;
4.132 + /* fromcode.__data__ interpreted as string */
4.133 + char *f = __load_via_object(fromcode->value, __pos___data__).strvalue;
4.134 + iconv_t result;
4.135 + __attr attr;
4.136 +
4.137 + errno = 0;
4.138 + result = iconv_open(t, f);
4.139 +
4.140 + if (result == (iconv_t) -1)
4.141 + __raise_os_error(__new_int(errno), __builtins___none_None);
4.142 +
4.143 + /* Return the descriptor as an opaque value. */
4.144 +
4.145 + attr.context = 0;
4.146 + attr.datavalue = (void *) result;
4.147 + return attr;
4.148 +}
4.149 +
4.150 +/* Module initialisation. */
4.151 +
4.152 +void __main_native_iconv()
4.153 +{
4.154 +}
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
5.2 +++ b/templates/native/iconv.h Mon Dec 12 00:40:54 2016 +0100
5.3 @@ -0,0 +1,34 @@
5.4 +/* Native functions for character set conversion.
5.5 +
5.6 +Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
5.7 +
5.8 +This program is free software; you can redistribute it and/or modify it under
5.9 +the terms of the GNU General Public License as published by the Free Software
5.10 +Foundation; either version 3 of the License, or (at your option) any later
5.11 +version.
5.12 +
5.13 +This program is distributed in the hope that it will be useful, but WITHOUT
5.14 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
5.15 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
5.16 +details.
5.17 +
5.18 +You should have received a copy of the GNU General Public License along with
5.19 +this program. If not, see <http://www.gnu.org/licenses/>.
5.20 +*/
5.21 +
5.22 +#ifndef __NATIVE_ICONV_H__
5.23 +#define __NATIVE_ICONV_H__
5.24 +
5.25 +#include "types.h"
5.26 +
5.27 +/* Input/output. */
5.28 +
5.29 +__attr __fn_native_iconv_iconv(__attr __args[]);
5.30 +__attr __fn_native_iconv_iconv_close(__attr __args[]);
5.31 +__attr __fn_native_iconv_iconv_open(__attr __args[]);
5.32 +
5.33 +/* Module initialisation. */
5.34 +
5.35 +void __main_native_iconv();
5.36 +
5.37 +#endif /* __NATIVE_ICONV_H__ */
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/tests/iconv.py Mon Dec 12 00:40:54 2016 +0100
6.3 @@ -0,0 +1,36 @@
6.4 +# -*- coding: ISO-8859-1 -*-
6.5 +
6.6 +from posix.iconv import Converter, EINVAL, EILSEQ
6.7 +
6.8 +to_utf8 = Converter("ISO-8859-1", "UTF-8")
6.9 +to_utf16 = Converter("ISO-8859-1", "UTF-16")
6.10 +from_utf8 = Converter("UTF-8", "ISO-8859-1")
6.11 +from_utf16 = Converter("UTF-16", "ISO-8859-1")
6.12 +
6.13 +try:
6.14 + iso = "æøå"
6.15 + print iso # æøå
6.16 + utf = to_utf8.convert(iso)
6.17 + print utf # æøå
6.18 + print from_utf8.convert(utf) # æøå
6.19 + utf = to_utf16.convert(iso)
6.20 + print utf # ...
6.21 + try:
6.22 + print from_utf16.convert(utf) # æøå
6.23 + except OSError, exc:
6.24 + if exc.value == EINVAL:
6.25 + print "Incomplete input", exc.arg
6.26 + elif exc.value == EILSEQ:
6.27 + print "Invalid input", exc.arg
6.28 + else:
6.29 + print exc.value, exc.arg
6.30 +finally:
6.31 + to_utf8.close()
6.32 + to_utf16.close()
6.33 + from_utf8.close()
6.34 + from_utf16.close()
6.35 +
6.36 +try:
6.37 + Converter("horses", "giraffes")
6.38 +except OSError, exc:
6.39 + print 'Converter("horses", "giraffes"): not valid encodings; error is', exc.value