1 /* Native functions for Unicode operations. 2 3 Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk> 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free Software 7 Foundation; either version 3 of the License, or (at your option) any later 8 version. 9 10 This program is distributed in the hope that it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 13 details. 14 15 You should have received a copy of the GNU General Public License along with 16 this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "native/common.h" 20 #include "types.h" 21 #include "exceptions.h" 22 #include "ops.h" 23 #include "progconsts.h" 24 #include "progops.h" 25 #include "progtypes.h" 26 #include "main.h" 27 28 static inline int boundary(char c) 29 { 30 return ((c & 0xc0) == 0xc0) || !(c & 0x80); 31 } 32 33 static unsigned int nextpos(char *s, unsigned int size, unsigned int bytestart) 34 { 35 unsigned int i = bytestart; 36 37 while (i < size) 38 { 39 i++; 40 if (boundary(s[i])) 41 break; 42 } 43 44 return i; 45 } 46 47 static unsigned int prevpos(char *s, unsigned int bytestart) 48 { 49 unsigned int i = bytestart; 50 51 while (i > 0) 52 { 53 i--; 54 if (boundary(s[i])) 55 break; 56 } 57 58 return i; 59 } 60 61 /* Unicode operations. */ 62 63 __attr __fn_native_unicode_unicode_len(__attr __args[]) 64 { 65 __attr * const _data = &__args[1]; 66 /* _data interpreted as string */ 67 char *s = _data->strvalue; 68 unsigned int i, c = 0; 69 70 for (i = 0; i < _data->size; i++) 71 if (boundary(s[i])) 72 c++; 73 74 /* Return the new integer. */ 75 return __new_int(c); 76 } 77 78 __attr __fn_native_unicode_unicode_substr(__attr __args[]) 79 { 80 __attr * const _data = &__args[1]; 81 __attr * const start = &__args[2]; 82 __attr * const end = &__args[3]; 83 __attr * const step = &__args[4]; 84 /* _data interpreted as string */ 85 char *s = _data->strvalue, *sub; 86 /* start.__data__ interpreted as int */ 87 int istart = __load_via_object(start->value, __pos___data__).intvalue; 88 /* end.__data__ interpreted as int */ 89 int iend = __load_via_object(end->value, __pos___data__).intvalue; 90 /* step.__data__ interpreted as int */ 91 int istep = __load_via_object(step->value, __pos___data__).intvalue; 92 93 /* Calculate the number of characters. */ 94 size_t nchar = ((iend - istart - (istep > 0 ? 1 : -1)) / istep) + 1; 95 unsigned int indexes[nchar]; 96 97 unsigned int c, d, i, to, from, lastbyte = 0; 98 size_t resultsize = 0; 99 100 /* Find the indexes of the characters. */ 101 if (istep > 0) 102 { 103 /* Get the first byte position. */ 104 for (c = 0; c < istart; c++) 105 lastbyte = nextpos(s, _data->size, lastbyte); 106 107 /* Get each subsequent byte position. */ 108 for (c = istart, i = 0; i < nchar; c += istep, i++) 109 { 110 indexes[i] = lastbyte; 111 112 /* Add the character size to the result size. */ 113 resultsize += nextpos(s, _data->size, lastbyte) - lastbyte; 114 115 for (d = c; d < c + istep; d++) 116 lastbyte = nextpos(s, _data->size, lastbyte); 117 } 118 } 119 else 120 { 121 /* Get the first byte position. */ 122 for (c = 0; c < istart; c++) 123 lastbyte = nextpos(s, _data->size, lastbyte); 124 125 /* Get each subsequent byte position. */ 126 for (c = istart, i = 0; i < nchar; c += istep, i++) 127 { 128 indexes[i] = lastbyte; 129 130 /* Add the character size to the result size. */ 131 resultsize += nextpos(s, _data->size, lastbyte) - lastbyte; 132 133 for (d = c; d > c + istep; d--) 134 lastbyte = prevpos(s, lastbyte); 135 } 136 } 137 138 /* Reserve space for a new string. */ 139 sub = (char *) __ALLOCATE(resultsize + 1, sizeof(char)); 140 141 /* Does not null terminate but final byte should be zero. */ 142 for (i = 0, to = 0; i < nchar; i++) 143 { 144 from = indexes[i]; 145 do 146 { 147 sub[to++] = s[from++]; 148 } while (!boundary(s[from])); 149 } 150 151 return __new_str(sub, resultsize); 152 } 153 154 /* Module initialisation. */ 155 156 void __main_native_unicode() 157 { 158 }