# HG changeset patch # User Paul Boddie # Date 1486160700 -3600 # Node ID 9ec67eea98b56125b0e5fe828b0344ae1a000136 # Parent baf7acd90a11ffb80d9d9e37514bbffaac2473aa Made ord a generic function, introducing __ord__ special methods and Unicode support for obtaining character values. diff -r baf7acd90a11 -r 9ec67eea98b5 lib/__builtins__/character.py --- a/lib/__builtins__/character.py Fri Feb 03 23:24:15 2017 +0100 +++ b/lib/__builtins__/character.py Fri Feb 03 23:25:00 2017 +0100 @@ -20,7 +20,7 @@ """ from __builtins__.types import check_int, check_string -from native import str_chr, str_ord +from native import str_chr def chr(i): @@ -85,12 +85,7 @@ "Return the value of the given character 'c'." - check_string(c) - - if c.__len__() == 1: - return str_ord(c.__data__) - else: - raise ValueError, c + return c.__ord__() def unichr(i): pass diff -r baf7acd90a11 -r 9ec67eea98b5 lib/__builtins__/str.py --- a/lib/__builtins__/str.py Fri Feb 03 23:24:15 2017 +0100 +++ b/lib/__builtins__/str.py Fri Feb 03 23:25:00 2017 +0100 @@ -22,8 +22,8 @@ from __builtins__.operator import _negate from __builtins__.sequence import hashable, itemaccess from __builtins__.types import check_int -from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ - str_substr +from native import str_add, str_lt, str_gt, str_eq, str_len, str_ord, \ + str_nonempty, str_substr WHITESPACE = (" ", "\f", "\n", "\r", "\t") @@ -257,6 +257,15 @@ # String-specific methods. + def __ord__(self): + + "Return the value of the string, if only a single character." + + if self.__len__() == 1: + return str_ord(self.__data__) + else: + raise ValueError, self + def endswith(self, s): "Return whether this string ends with 's'." diff -r baf7acd90a11 -r 9ec67eea98b5 lib/__builtins__/unicode.py --- a/lib/__builtins__/unicode.py Fri Feb 03 23:24:15 2017 +0100 +++ b/lib/__builtins__/unicode.py Fri Feb 03 23:25:00 2017 +0100 @@ -22,7 +22,7 @@ from __builtins__.str import basestring from __builtins__.types import check_int from posix.iconv import Converter -from native import str_add, unicode_len, unicode_substr, \ +from native import str_add, unicode_len, unicode_ord, unicode_substr, \ isinstance as _isinstance class utf8string(basestring): @@ -112,6 +112,15 @@ return self.length + def __ord__(self): + + "Return the value of the string, if only a single character." + + if self.__len__() == 1: + return unicode_ord(self.__data__) + else: + raise ValueError, self + def encode(self, encoding=None): """ diff -r baf7acd90a11 -r 9ec67eea98b5 lib/native/__init__.py --- a/lib/native/__init__.py Fri Feb 03 23:24:15 2017 +0100 +++ b/lib/native/__init__.py Fri Feb 03 23:25:00 2017 +0100 @@ -47,6 +47,6 @@ from native.system import exit, get_argv, get_path -from native.unicode import unicode_len, unicode_substr +from native.unicode import unicode_len, unicode_ord, unicode_substr # vim: tabstop=4 expandtab shiftwidth=4 diff -r baf7acd90a11 -r 9ec67eea98b5 lib/native/unicode.py --- a/lib/native/unicode.py Fri Feb 03 23:24:15 2017 +0100 +++ b/lib/native/unicode.py Fri Feb 03 23:25:00 2017 +0100 @@ -8,7 +8,7 @@ non-core exceptions used by the native functions because they need to be identified as being needed by the program. -Copyright (C) 2016 Paul Boddie +Copyright (C) 2016, 2017 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -27,6 +27,7 @@ # Unicode string operations. def unicode_len(data): pass +def unicode_ord(data): pass def unicode_substr(data, start, end, step): pass # vim: tabstop=4 expandtab shiftwidth=4 diff -r baf7acd90a11 -r 9ec67eea98b5 templates/native/unicode.c --- a/templates/native/unicode.c Fri Feb 03 23:24:15 2017 +0100 +++ b/templates/native/unicode.c Fri Feb 03 23:25:00 2017 +0100 @@ -30,6 +30,15 @@ return ((c & 0xc0) == 0xc0) || !(c & 0x80); } +static inline int boundary_value(char c) +{ + if (!(c & 0x80)) return c; + else if ((c & 0xf8) == 0xf0) return c & 0x07; + else if ((c & 0xf0) == 0xe0) return c & 0x0f; + else if ((c & 0xe0) == 0xc0) return c & 0x1f; + else return 0; +} + static unsigned int nextpos(char *s, unsigned int size, unsigned int bytestart) { unsigned int i = bytestart; @@ -75,6 +84,39 @@ return __new_int(c); } +__attr __fn_native_unicode_unicode_ord(__attr __args[]) +{ + __attr * const _data = &__args[1]; + /* _data interpreted as string */ + char *s = _data->strvalue; + unsigned int i, c = 0, v; + + for (i = 0; i < _data->size; i++) + { + /* Evaluate the current character as a boundary. */ + + v = boundary_value(s[i]); + + /* Boundary with characters read: stop reading. */ + + if (v && i) + break; + + /* Boundary: initialise with the extracted value. */ + + else if (v) + c = v; + + /* Not a boundary: shift and combine with the continuation value. */ + + else + c = (c << 6) | (s[i] & 0x3f); + } + + /* Return the new integer. */ + return __new_int(c); +} + __attr __fn_native_unicode_unicode_substr(__attr __args[]) { __attr * const _data = &__args[1]; diff -r baf7acd90a11 -r 9ec67eea98b5 templates/native/unicode.h --- a/templates/native/unicode.h Fri Feb 03 23:24:15 2017 +0100 +++ b/templates/native/unicode.h Fri Feb 03 23:25:00 2017 +0100 @@ -1,6 +1,6 @@ /* Native functions for Unicode operations. -Copyright (C) 2016 Paul Boddie +Copyright (C) 2016, 2017 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -22,6 +22,7 @@ /* Unicode operations. */ __attr __fn_native_unicode_unicode_len(__attr __args[]); +__attr __fn_native_unicode_unicode_ord(__attr __args[]); __attr __fn_native_unicode_unicode_substr(__attr __args[]); /* Module initialisation. */ diff -r baf7acd90a11 -r 9ec67eea98b5 tests/unicode.py --- a/tests/unicode.py Fri Feb 03 23:24:15 2017 +0100 +++ b/tests/unicode.py Fri Feb 03 23:25:00 2017 +0100 @@ -191,3 +191,12 @@ print len(u[:2]) # 2 print u[-1::-1] # ורז print len(u[-1::-1]) # 3 + +# Test character values. + +print ord(u[0]) # 230 + +try: + print ord(u) # should raise an exception +except ValueError, exc: + print "ord(u): value is not appropriate", repr(exc.value)