1.1 --- a/templates/native/unicode.c Fri Feb 03 23:24:15 2017 +0100
1.2 +++ b/templates/native/unicode.c Fri Feb 03 23:25:00 2017 +0100
1.3 @@ -30,6 +30,15 @@
1.4 return ((c & 0xc0) == 0xc0) || !(c & 0x80);
1.5 }
1.6
1.7 +static inline int boundary_value(char c)
1.8 +{
1.9 + if (!(c & 0x80)) return c;
1.10 + else if ((c & 0xf8) == 0xf0) return c & 0x07;
1.11 + else if ((c & 0xf0) == 0xe0) return c & 0x0f;
1.12 + else if ((c & 0xe0) == 0xc0) return c & 0x1f;
1.13 + else return 0;
1.14 +}
1.15 +
1.16 static unsigned int nextpos(char *s, unsigned int size, unsigned int bytestart)
1.17 {
1.18 unsigned int i = bytestart;
1.19 @@ -75,6 +84,39 @@
1.20 return __new_int(c);
1.21 }
1.22
1.23 +__attr __fn_native_unicode_unicode_ord(__attr __args[])
1.24 +{
1.25 + __attr * const _data = &__args[1];
1.26 + /* _data interpreted as string */
1.27 + char *s = _data->strvalue;
1.28 + unsigned int i, c = 0, v;
1.29 +
1.30 + for (i = 0; i < _data->size; i++)
1.31 + {
1.32 + /* Evaluate the current character as a boundary. */
1.33 +
1.34 + v = boundary_value(s[i]);
1.35 +
1.36 + /* Boundary with characters read: stop reading. */
1.37 +
1.38 + if (v && i)
1.39 + break;
1.40 +
1.41 + /* Boundary: initialise with the extracted value. */
1.42 +
1.43 + else if (v)
1.44 + c = v;
1.45 +
1.46 + /* Not a boundary: shift and combine with the continuation value. */
1.47 +
1.48 + else
1.49 + c = (c << 6) | (s[i] & 0x3f);
1.50 + }
1.51 +
1.52 + /* Return the new integer. */
1.53 + return __new_int(c);
1.54 +}
1.55 +
1.56 __attr __fn_native_unicode_unicode_substr(__attr __args[])
1.57 {
1.58 __attr * const _data = &__args[1];