1.1 --- a/lib/__builtins__/character.py Fri Feb 03 23:24:15 2017 +0100
1.2 +++ b/lib/__builtins__/character.py Fri Feb 03 23:25:00 2017 +0100
1.3 @@ -20,7 +20,7 @@
1.4 """
1.5
1.6 from __builtins__.types import check_int, check_string
1.7 -from native import str_chr, str_ord
1.8 +from native import str_chr
1.9
1.10 def chr(i):
1.11
1.12 @@ -85,12 +85,7 @@
1.13
1.14 "Return the value of the given character 'c'."
1.15
1.16 - check_string(c)
1.17 -
1.18 - if c.__len__() == 1:
1.19 - return str_ord(c.__data__)
1.20 - else:
1.21 - raise ValueError, c
1.22 + return c.__ord__()
1.23
1.24 def unichr(i): pass
1.25
2.1 --- a/lib/__builtins__/str.py Fri Feb 03 23:24:15 2017 +0100
2.2 +++ b/lib/__builtins__/str.py Fri Feb 03 23:25:00 2017 +0100
2.3 @@ -22,8 +22,8 @@
2.4 from __builtins__.operator import _negate
2.5 from __builtins__.sequence import hashable, itemaccess
2.6 from __builtins__.types import check_int
2.7 -from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \
2.8 - str_substr
2.9 +from native import str_add, str_lt, str_gt, str_eq, str_len, str_ord, \
2.10 + str_nonempty, str_substr
2.11
2.12 WHITESPACE = (" ", "\f", "\n", "\r", "\t")
2.13
2.14 @@ -257,6 +257,15 @@
2.15
2.16 # String-specific methods.
2.17
2.18 + def __ord__(self):
2.19 +
2.20 + "Return the value of the string, if only a single character."
2.21 +
2.22 + if self.__len__() == 1:
2.23 + return str_ord(self.__data__)
2.24 + else:
2.25 + raise ValueError, self
2.26 +
2.27 def endswith(self, s):
2.28
2.29 "Return whether this string ends with 's'."
3.1 --- a/lib/__builtins__/unicode.py Fri Feb 03 23:24:15 2017 +0100
3.2 +++ b/lib/__builtins__/unicode.py Fri Feb 03 23:25:00 2017 +0100
3.3 @@ -22,7 +22,7 @@
3.4 from __builtins__.str import basestring
3.5 from __builtins__.types import check_int
3.6 from posix.iconv import Converter
3.7 -from native import str_add, unicode_len, unicode_substr, \
3.8 +from native import str_add, unicode_len, unicode_ord, unicode_substr, \
3.9 isinstance as _isinstance
3.10
3.11 class utf8string(basestring):
3.12 @@ -112,6 +112,15 @@
3.13
3.14 return self.length
3.15
3.16 + def __ord__(self):
3.17 +
3.18 + "Return the value of the string, if only a single character."
3.19 +
3.20 + if self.__len__() == 1:
3.21 + return unicode_ord(self.__data__)
3.22 + else:
3.23 + raise ValueError, self
3.24 +
3.25 def encode(self, encoding=None):
3.26
3.27 """
4.1 --- a/lib/native/__init__.py Fri Feb 03 23:24:15 2017 +0100
4.2 +++ b/lib/native/__init__.py Fri Feb 03 23:25:00 2017 +0100
4.3 @@ -47,6 +47,6 @@
4.4
4.5 from native.system import exit, get_argv, get_path
4.6
4.7 -from native.unicode import unicode_len, unicode_substr
4.8 +from native.unicode import unicode_len, unicode_ord, unicode_substr
4.9
4.10 # vim: tabstop=4 expandtab shiftwidth=4
5.1 --- a/lib/native/unicode.py Fri Feb 03 23:24:15 2017 +0100
5.2 +++ b/lib/native/unicode.py Fri Feb 03 23:25:00 2017 +0100
5.3 @@ -8,7 +8,7 @@
5.4 non-core exceptions used by the native functions because they need to be
5.5 identified as being needed by the program.
5.6
5.7 -Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
5.8 +Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>
5.9
5.10 This program is free software; you can redistribute it and/or modify it under
5.11 the terms of the GNU General Public License as published by the Free Software
5.12 @@ -27,6 +27,7 @@
5.13 # Unicode string operations.
5.14
5.15 def unicode_len(data): pass
5.16 +def unicode_ord(data): pass
5.17 def unicode_substr(data, start, end, step): pass
5.18
5.19 # vim: tabstop=4 expandtab shiftwidth=4
6.1 --- a/templates/native/unicode.c Fri Feb 03 23:24:15 2017 +0100
6.2 +++ b/templates/native/unicode.c Fri Feb 03 23:25:00 2017 +0100
6.3 @@ -30,6 +30,15 @@
6.4 return ((c & 0xc0) == 0xc0) || !(c & 0x80);
6.5 }
6.6
6.7 +static inline int boundary_value(char c)
6.8 +{
6.9 + if (!(c & 0x80)) return c;
6.10 + else if ((c & 0xf8) == 0xf0) return c & 0x07;
6.11 + else if ((c & 0xf0) == 0xe0) return c & 0x0f;
6.12 + else if ((c & 0xe0) == 0xc0) return c & 0x1f;
6.13 + else return 0;
6.14 +}
6.15 +
6.16 static unsigned int nextpos(char *s, unsigned int size, unsigned int bytestart)
6.17 {
6.18 unsigned int i = bytestart;
6.19 @@ -75,6 +84,39 @@
6.20 return __new_int(c);
6.21 }
6.22
6.23 +__attr __fn_native_unicode_unicode_ord(__attr __args[])
6.24 +{
6.25 + __attr * const _data = &__args[1];
6.26 + /* _data interpreted as string */
6.27 + char *s = _data->strvalue;
6.28 + unsigned int i, c = 0, v;
6.29 +
6.30 + for (i = 0; i < _data->size; i++)
6.31 + {
6.32 + /* Evaluate the current character as a boundary. */
6.33 +
6.34 + v = boundary_value(s[i]);
6.35 +
6.36 + /* Boundary with characters read: stop reading. */
6.37 +
6.38 + if (v && i)
6.39 + break;
6.40 +
6.41 + /* Boundary: initialise with the extracted value. */
6.42 +
6.43 + else if (v)
6.44 + c = v;
6.45 +
6.46 + /* Not a boundary: shift and combine with the continuation value. */
6.47 +
6.48 + else
6.49 + c = (c << 6) | (s[i] & 0x3f);
6.50 + }
6.51 +
6.52 + /* Return the new integer. */
6.53 + return __new_int(c);
6.54 +}
6.55 +
6.56 __attr __fn_native_unicode_unicode_substr(__attr __args[])
6.57 {
6.58 __attr * const _data = &__args[1];
7.1 --- a/templates/native/unicode.h Fri Feb 03 23:24:15 2017 +0100
7.2 +++ b/templates/native/unicode.h Fri Feb 03 23:25:00 2017 +0100
7.3 @@ -1,6 +1,6 @@
7.4 /* Native functions for Unicode operations.
7.5
7.6 -Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
7.7 +Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>
7.8
7.9 This program is free software; you can redistribute it and/or modify it under
7.10 the terms of the GNU General Public License as published by the Free Software
7.11 @@ -22,6 +22,7 @@
7.12 /* Unicode operations. */
7.13
7.14 __attr __fn_native_unicode_unicode_len(__attr __args[]);
7.15 +__attr __fn_native_unicode_unicode_ord(__attr __args[]);
7.16 __attr __fn_native_unicode_unicode_substr(__attr __args[]);
7.17
7.18 /* Module initialisation. */
8.1 --- a/tests/unicode.py Fri Feb 03 23:24:15 2017 +0100
8.2 +++ b/tests/unicode.py Fri Feb 03 23:25:00 2017 +0100
8.3 @@ -191,3 +191,12 @@
8.4 print len(u[:2]) # 2
8.5 print u[-1::-1] # ורז
8.6 print len(u[-1::-1]) # 3
8.7 +
8.8 +# Test character values.
8.9 +
8.10 +print ord(u[0]) # 230
8.11 +
8.12 +try:
8.13 + print ord(u) # should raise an exception
8.14 +except ValueError, exc:
8.15 + print "ord(u): value is not appropriate", repr(exc.value)