paul@403 | 1 | /* Native functions for Unicode operations. |
paul@403 | 2 | |
paul@403 | 3 | Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk> |
paul@403 | 4 | |
paul@403 | 5 | This program is free software; you can redistribute it and/or modify it under |
paul@403 | 6 | the terms of the GNU General Public License as published by the Free Software |
paul@403 | 7 | Foundation; either version 3 of the License, or (at your option) any later |
paul@403 | 8 | version. |
paul@403 | 9 | |
paul@403 | 10 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@403 | 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@403 | 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@403 | 13 | details. |
paul@403 | 14 | |
paul@403 | 15 | You should have received a copy of the GNU General Public License along with |
paul@403 | 16 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@403 | 17 | */ |
paul@403 | 18 | |
paul@403 | 19 | #include "native/common.h" |
paul@403 | 20 | #include "types.h" |
paul@403 | 21 | #include "exceptions.h" |
paul@403 | 22 | #include "ops.h" |
paul@403 | 23 | #include "progconsts.h" |
paul@403 | 24 | #include "progops.h" |
paul@403 | 25 | #include "progtypes.h" |
paul@403 | 26 | #include "main.h" |
paul@403 | 27 | |
paul@431 | 28 | static inline int boundary(char c) |
paul@431 | 29 | { |
paul@431 | 30 | return ((c & 0xc0) == 0xc0) || !(c & 0x80); |
paul@431 | 31 | } |
paul@431 | 32 | |
paul@431 | 33 | static unsigned int nextpos(char *s, unsigned int size, unsigned int bytestart) |
paul@431 | 34 | { |
paul@431 | 35 | unsigned int i = bytestart; |
paul@431 | 36 | |
paul@431 | 37 | while (i < size) |
paul@431 | 38 | { |
paul@431 | 39 | i++; |
paul@431 | 40 | if (boundary(s[i])) |
paul@431 | 41 | break; |
paul@431 | 42 | } |
paul@431 | 43 | |
paul@431 | 44 | return i; |
paul@431 | 45 | } |
paul@431 | 46 | |
paul@431 | 47 | static unsigned int prevpos(char *s, unsigned int bytestart) |
paul@431 | 48 | { |
paul@431 | 49 | unsigned int i = bytestart; |
paul@431 | 50 | |
paul@431 | 51 | while (i > 0) |
paul@431 | 52 | { |
paul@431 | 53 | i--; |
paul@431 | 54 | if (boundary(s[i])) |
paul@431 | 55 | break; |
paul@431 | 56 | } |
paul@431 | 57 | |
paul@431 | 58 | return i; |
paul@431 | 59 | } |
paul@431 | 60 | |
paul@403 | 61 | /* Unicode operations. */ |
paul@403 | 62 | |
paul@403 | 63 | __attr __fn_native_unicode_unicode_len(__attr __args[]) |
paul@403 | 64 | { |
paul@403 | 65 | __attr * const _data = &__args[1]; |
paul@403 | 66 | /* _data interpreted as string */ |
paul@403 | 67 | char *s = _data->strvalue; |
paul@431 | 68 | unsigned int i, c = 0; |
paul@403 | 69 | |
paul@403 | 70 | for (i = 0; i < _data->size; i++) |
paul@431 | 71 | if (boundary(s[i])) |
paul@403 | 72 | c++; |
paul@403 | 73 | |
paul@403 | 74 | /* Return the new integer. */ |
paul@403 | 75 | return __new_int(c); |
paul@403 | 76 | } |
paul@403 | 77 | |
paul@431 | 78 | __attr __fn_native_unicode_unicode_substr(__attr __args[]) |
paul@431 | 79 | { |
paul@431 | 80 | __attr * const _data = &__args[1]; |
paul@431 | 81 | __attr * const start = &__args[2]; |
paul@431 | 82 | __attr * const end = &__args[3]; |
paul@431 | 83 | __attr * const step = &__args[4]; |
paul@431 | 84 | /* _data interpreted as string */ |
paul@431 | 85 | char *s = _data->strvalue, *sub; |
paul@431 | 86 | /* start.__data__ interpreted as int */ |
paul@431 | 87 | int istart = __load_via_object(start->value, __pos___data__).intvalue; |
paul@431 | 88 | /* end.__data__ interpreted as int */ |
paul@431 | 89 | int iend = __load_via_object(end->value, __pos___data__).intvalue; |
paul@431 | 90 | /* step.__data__ interpreted as int */ |
paul@431 | 91 | int istep = __load_via_object(step->value, __pos___data__).intvalue; |
paul@431 | 92 | |
paul@431 | 93 | /* Calculate the number of characters. */ |
paul@431 | 94 | size_t nchar = ((iend - istart - (istep > 0 ? 1 : -1)) / istep) + 1; |
paul@431 | 95 | unsigned int indexes[nchar]; |
paul@431 | 96 | |
paul@431 | 97 | unsigned int c, d, i, to, from, lastbyte = 0; |
paul@431 | 98 | size_t resultsize = 0; |
paul@431 | 99 | |
paul@431 | 100 | /* Find the indexes of the characters. */ |
paul@431 | 101 | if (istep > 0) |
paul@431 | 102 | { |
paul@431 | 103 | /* Get the first byte position. */ |
paul@431 | 104 | for (c = 0; c < istart; c++) |
paul@431 | 105 | lastbyte = nextpos(s, _data->size, lastbyte); |
paul@431 | 106 | |
paul@431 | 107 | /* Get each subsequent byte position. */ |
paul@431 | 108 | for (c = istart, i = 0; i < nchar; c += istep, i++) |
paul@431 | 109 | { |
paul@431 | 110 | indexes[i] = lastbyte; |
paul@431 | 111 | |
paul@431 | 112 | /* Add the character size to the result size. */ |
paul@431 | 113 | resultsize += nextpos(s, _data->size, lastbyte) - lastbyte; |
paul@431 | 114 | |
paul@431 | 115 | for (d = c; d < c + istep; d++) |
paul@431 | 116 | lastbyte = nextpos(s, _data->size, lastbyte); |
paul@431 | 117 | } |
paul@431 | 118 | } |
paul@431 | 119 | else |
paul@431 | 120 | { |
paul@431 | 121 | /* Get the first byte position. */ |
paul@431 | 122 | for (c = 0; c < istart; c++) |
paul@431 | 123 | lastbyte = nextpos(s, _data->size, lastbyte); |
paul@431 | 124 | |
paul@431 | 125 | /* Get each subsequent byte position. */ |
paul@431 | 126 | for (c = istart, i = 0; i < nchar; c += istep, i++) |
paul@431 | 127 | { |
paul@431 | 128 | indexes[i] = lastbyte; |
paul@431 | 129 | |
paul@431 | 130 | /* Add the character size to the result size. */ |
paul@431 | 131 | resultsize += nextpos(s, _data->size, lastbyte) - lastbyte; |
paul@431 | 132 | |
paul@431 | 133 | for (d = c; d > c + istep; d--) |
paul@431 | 134 | lastbyte = prevpos(s, lastbyte); |
paul@431 | 135 | } |
paul@431 | 136 | } |
paul@431 | 137 | |
paul@431 | 138 | /* Reserve space for a new string. */ |
paul@431 | 139 | sub = (char *) __ALLOCATE(resultsize + 1, sizeof(char)); |
paul@431 | 140 | |
paul@431 | 141 | /* Does not null terminate but final byte should be zero. */ |
paul@431 | 142 | for (i = 0, to = 0; i < nchar; i++) |
paul@431 | 143 | { |
paul@431 | 144 | from = indexes[i]; |
paul@431 | 145 | do |
paul@431 | 146 | { |
paul@431 | 147 | sub[to++] = s[from++]; |
paul@431 | 148 | } while (!boundary(s[from])); |
paul@431 | 149 | } |
paul@431 | 150 | |
paul@431 | 151 | return __new_str(sub, resultsize); |
paul@431 | 152 | } |
paul@431 | 153 | |
paul@403 | 154 | /* Module initialisation. */ |
paul@403 | 155 | |
paul@403 | 156 | void __main_native_unicode() |
paul@403 | 157 | { |
paul@403 | 158 | } |