# HG changeset patch # User Paul Boddie # Date 1487008499 -3600 # Node ID aed28d04304df1e594e435b81abd8b242258536a # Parent d7769551b05117c79acb90286edadbb5eda41c0a Re-added size information to string instances as the __size__ attribute. This fixes problems introduced when using strlen on data likely to contain embedded nulls, which was the reason for having size information explicitly stored in the first place. diff -r d7769551b051 -r aed28d04304d generator.py --- a/generator.py Sun Feb 12 23:24:42 2017 +0100 +++ b/generator.py Mon Feb 13 18:54:59 2017 +0100 @@ -500,6 +500,11 @@ else: attrs["__key__"] = None + # Initialise the size, if a string. + + if attrs.has_key("__size__"): + attrs["__size__"] = len(data) + # Define Unicode constant encoding details. if cls == self.unicode_type: @@ -903,6 +908,12 @@ encode_literal_constant_value(attr))) continue + # Special internal size member. + + elif attrname == "__size__": + structure.append("{.intvalue=%d}" % attr) + continue + # Special internal key member. elif attrname == "__key__": diff -r d7769551b051 -r aed28d04304d lib/__builtins__/str.py --- a/lib/__builtins__/str.py Sun Feb 12 23:24:42 2017 +0100 +++ b/lib/__builtins__/str.py Mon Feb 13 18:54:59 2017 +0100 @@ -22,8 +22,8 @@ from __builtins__.operator import _negate from __builtins__.sequence import hashable, itemaccess from __builtins__.types import check_int -from native import str_add, str_lt, str_gt, str_eq, str_len, str_ord, \ - str_nonempty, str_substr +from native import int_new, str_add, str_lt, str_gt, str_eq, str_ord, \ + str_substr WHITESPACE = (" ", "\f", "\n", "\r", "\t") @@ -39,26 +39,25 @@ # literals or converted using routines defined for other types, no form # of actual initialisation is performed here. + # Note the __key__ member. This is also initialised statically. Where + # a string is the same as an attribute name, the __key__ member contains + # attribute position and code details. + # NOTE: Cannot perform "other and other.__data__ or None" since the # NOTE: __data__ attribute is not a normal attribute. if other: self.__data__ = other.__data__ + self.__key__ = other.__key__ + self.__size__ = other.__size__ else: self.__data__ = None - - # Note the __key__ member. This is also initialised statically. Where - # a string is the same as an attribute name, the __key__ member contains - # attribute position and code details. - - if other: - self.__key__ = other.__key__ - else: self.__key__ = None + self.__size__ = None # Internal methods. - def _binary_op(self, op, other): + def _binary_op(self, op, other, sizes=False): "Perform 'op' on this object and 'other' if appropriate." @@ -69,10 +68,12 @@ # Otherwise, perform the operation on the operands' data. + elif sizes: + return op(self.__data__, other.__data__, self.__size__, other.__size__) else: return op(self.__data__, other.__data__) - def _binary_op_rev(self, op, other): + def _binary_op_rev(self, op, other, sizes=False): "Perform 'op' on 'other' and this object if appropriate." @@ -83,6 +84,8 @@ # Otherwise, perform the operation on the operands' data. + elif sizes: + return op(other.__data__, self.__data__, other.__size__, self.__size__) else: return op(other.__data__, self.__data__) @@ -154,7 +157,7 @@ "Return the number of bytes in this string." - return str_len(self.__data__) + return int_new(self.__size__) # General type methods. @@ -162,7 +165,7 @@ "Return whether the string provides any data." - return str_nonempty(self.__data__) + return int_new(self.__size__).__bool__() def __contains__(self, value): @@ -196,7 +199,7 @@ "Return a string combining this string with 'other'." - return self._binary_op(str_add, other) + return self._binary_op(str_add, other, True) __add__ = __iadd__ @@ -204,7 +207,7 @@ "Return a string combining this string with 'other'." - return self._binary_op_rev(str_add, other) + return self._binary_op_rev(str_add, other, True) def __mod__(self, other): pass def __rmod__(self, other): pass diff -r d7769551b051 -r aed28d04304d lib/__builtins__/unicode.py --- a/lib/__builtins__/unicode.py Sun Feb 12 23:24:42 2017 +0100 +++ b/lib/__builtins__/unicode.py Mon Feb 13 18:54:59 2017 +0100 @@ -133,7 +133,7 @@ "Return the length of this string in characters." if self.length is None: - self.length = unicode_len(self.__data__) + self.length = unicode_len(self.__data__, self.__size__) return self.length @@ -142,7 +142,7 @@ "Return the value of the string, if only a single character." if self.__len__() == 1: - return unicode_ord(self.__data__) + return unicode_ord(self.__data__, self.__size__) else: raise ValueError, self @@ -204,7 +204,7 @@ "Return the item at the normalised (positive) 'index'." self._check_index(index) - return utf8string(unicode_substr(self.__data__, index, index + 1, 1), self.encoding) + return utf8string(unicode_substr(self.__data__, self.__size__, index, index + 1, 1), self.encoding) def __get_multiple_items__(self, start, end, step): diff -r d7769551b051 -r aed28d04304d lib/native/__init__.py --- a/lib/native/__init__.py Sun Feb 12 23:24:42 2017 +0100 +++ b/lib/native/__init__.py Mon Feb 13 18:54:59 2017 +0100 @@ -23,7 +23,8 @@ from native.identity import is_, is_not -from native.int import int_add, int_div, int_mod, int_mul, int_neg, int_pow, \ +from native.int import int_new, \ + int_add, int_div, int_mod, int_mul, int_neg, int_pow, \ int_sub, int_and, int_not, int_or, int_xor, int_lt, \ int_gt, int_eq, int_ne, int_str @@ -42,8 +43,8 @@ from native.program import get_using -from native.str import str_add, str_chr, str_eq, str_gt, str_lt, str_len, \ - str_nonempty, str_ord, str_substr +from native.str import str_add, str_chr, str_eq, str_gt, str_lt, \ + str_ord, str_substr from native.system import exit, get_argv, get_path diff -r d7769551b051 -r aed28d04304d lib/native/int.py --- a/lib/native/int.py Sun Feb 12 23:24:42 2017 +0100 +++ b/lib/native/int.py Mon Feb 13 18:54:59 2017 +0100 @@ -8,7 +8,7 @@ non-core exceptions used by the native functions because they need to be identified as being needed by the program. -Copyright (C) 2011, 2015, 2016 Paul Boddie +Copyright (C) 2011, 2015, 2016, 2017 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,6 +24,8 @@ this program. If not, see . """ +def int_new(data): pass + def int_add(self, other): pass def int_div(self, other): pass def int_mod(self, other): pass diff -r d7769551b051 -r aed28d04304d lib/native/str.py --- a/lib/native/str.py Sun Feb 12 23:24:42 2017 +0100 +++ b/lib/native/str.py Mon Feb 13 18:54:59 2017 +0100 @@ -26,13 +26,11 @@ # String operations. -def str_add(data, other_data): pass +def str_add(data, other_data, size, other_size): pass def str_chr(data): pass def str_eq(data, other_data): pass def str_gt(data, other_data): pass def str_lt(data, other_data): pass -def str_len(data): pass -def str_nonempty(data): pass def str_ord(data): pass def str_substr(data, start, end, step): pass diff -r d7769551b051 -r aed28d04304d lib/native/unicode.py --- a/lib/native/unicode.py Sun Feb 12 23:24:42 2017 +0100 +++ b/lib/native/unicode.py Mon Feb 13 18:54:59 2017 +0100 @@ -26,8 +26,8 @@ # Unicode string operations. -def unicode_len(data): pass -def unicode_ord(data): pass -def unicode_substr(data, start, end, step): pass +def unicode_len(data, size): pass +def unicode_ord(data, size): pass +def unicode_substr(data, size, start, end, step): pass # vim: tabstop=4 expandtab shiftwidth=4 diff -r d7769551b051 -r aed28d04304d templates/native/buffer.c --- a/templates/native/buffer.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/buffer.c Mon Feb 13 18:54:59 2017 +0100 @@ -37,7 +37,7 @@ /* Calculate the size of the string. */ for (i = 0; i < data->size; i++) - size += strlen(__load_via_object(data->attrs[i].value, __pos___data__).strvalue); + size += __load_via_object(data->attrs[i].value, __pos___size__).intvalue; /* Reserve space for a new string. */ s = (char *) __ALLOCATE(size + 1, sizeof(char)); @@ -46,13 +46,13 @@ for (i = 0, j = 0; i < data->size; i++) { o = __load_via_object(data->attrs[i].value, __pos___data__); - n = strlen(o.strvalue); + n = __load_via_object(data->attrs[i].value, __pos___size__).intvalue; memcpy(s + j, o.strvalue, n); /* does not null terminate but final byte should be zero */ j += n; } /* Return a new string. */ - return __new_str(s); + return __new_str(s, size); } /* Module initialisation. */ diff -r d7769551b051 -r aed28d04304d templates/native/common.c --- a/templates/native/common.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/common.c Mon Feb 13 18:54:59 2017 +0100 @@ -34,11 +34,12 @@ return attr; } -__attr __new_str(char *s) +__attr __new_str(char *s, int size) { - /* Create a new string and mutate the __data__ and __key__ attributes. */ + /* Create a new string and mutate the __data__, __size__ and __key__ attributes. */ __attr attr = __NEWINSTANCE(__builtins___str_string); attr.value->attrs[__pos___data__].strvalue = s; + attr.value->attrs[__pos___size__].intvalue = size; attr.value->attrs[__pos___key__] = __NULL; return attr; } diff -r d7769551b051 -r aed28d04304d templates/native/common.h --- a/templates/native/common.h Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/common.h Mon Feb 13 18:54:59 2017 +0100 @@ -24,7 +24,7 @@ /* Utility functions. */ __attr __new_int(int i); -__attr __new_str(char *s); +__attr __new_str(char *s, int size); __attr __new_list(__fragment *f); __fragment *__fragment_append(__fragment *data, __attr * const value); diff -r d7769551b051 -r aed28d04304d templates/native/iconv.c --- a/templates/native/iconv.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/iconv.c Mon Feb 13 18:54:59 2017 +0100 @@ -99,9 +99,9 @@ /* Incomplete sequence: raise the string in an OSError instead. */ if (errno == EINVAL) - __raise_incomplete_sequence_error(__new_int(errno), __new_str(resultbuf)); + __raise_incomplete_sequence_error(__new_int(errno), __new_str(resultbuf, outbytestotal)); - return __new_str(resultbuf); + return __new_str(resultbuf, outbytestotal); } /* Invalid sequence. */ @@ -110,7 +110,7 @@ { resultbuf = __ALLOCATE(inbytesleft + 1, sizeof(char)); memcpy(resultbuf, inbuf, inbytesleft); - __raise_invalid_sequence_error(__new_int(errno), __new_str(resultbuf)); + __raise_invalid_sequence_error(__new_int(errno), __new_str(resultbuf, inbytesleft)); } /* General failure. */ diff -r d7769551b051 -r aed28d04304d templates/native/int.c --- a/templates/native/int.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/int.c Mon Feb 13 18:54:59 2017 +0100 @@ -18,8 +18,9 @@ #include /* INT_MAX, INT_MIN */ #include /* ceil, log10, pow */ -#include /* fdopen, snprintf, strlen */ +#include /* fdopen, snprintf */ #include /* errno */ +#include /* strlen */ #include "native/common.h" #include "types.h" #include "exceptions.h" @@ -31,6 +32,13 @@ /* Integer operations. */ +__attr __fn_native_int_int_new(__attr __args[]) +{ + __attr * const _data = &__args[1]; + + return __new_int(_data->intvalue); +} + __attr __fn_native_int_int_add(__attr __args[]) { __attr * const _data = &__args[1]; @@ -268,7 +276,7 @@ snprintf(s, n, "%d", i); /* Return a new string. */ - return __new_str(s); + return __new_str(s, strlen(s)); } /* Module initialisation. */ diff -r d7769551b051 -r aed28d04304d templates/native/int.h --- a/templates/native/int.h Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/int.h Mon Feb 13 18:54:59 2017 +0100 @@ -1,6 +1,6 @@ /* Native functions for integer operations. -Copyright (C) 2016 Paul Boddie +Copyright (C) 2016, 2017 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -23,6 +23,8 @@ /* Integer operations. */ +__attr __fn_native_int_int_new(__attr __args[]); + __attr __fn_native_int_int_add(__attr __args[]); __attr __fn_native_int_int_div(__attr __args[]); __attr __fn_native_int_int_mod(__attr __args[]); diff -r d7769551b051 -r aed28d04304d templates/native/io.c --- a/templates/native/io.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/io.c Mon Feb 13 18:54:59 2017 +0100 @@ -150,7 +150,7 @@ s = __ALLOCATE(have_read + 1, sizeof(char)); memcpy(s, (char *) buf, have_read); /* does not null terminate but final byte should be zero */ - return __new_str(s); + return __new_str(s, have_read); } __attr __fn_native_io_fwrite(__attr __args[]) @@ -160,9 +160,9 @@ /* fp interpreted as FILE reference */ FILE *f = (FILE *) fp->datavalue; /* str.__data__ interpreted as string */ - __attr sa = __load_via_object(str->value, __pos___data__); - char *s = sa.strvalue; - size_t to_write = strlen(sa.strvalue); + char *s = __load_via_object(str->value, __pos___data__).strvalue; + /* str.__size__ interpreted as int */ + int to_write = __load_via_object(str->value, __pos___size__).intvalue; size_t have_written = fwrite(s, sizeof(char), to_write, f); int error; @@ -212,7 +212,7 @@ s = __ALLOCATE(have_read + 1, 1); memcpy(s, (char *) buf, have_read); /* does not null terminate but final byte should be zero */ - return __new_str(s); + return __new_str(s, have_read); } __attr __fn_native_io_write(__attr __args[]) @@ -222,12 +222,13 @@ /* fd.__data__ interpreted as int */ int i = __load_via_object(fd->value, __pos___data__).intvalue; /* str.__data__ interpreted as string */ - __attr sa = __load_via_object(str->value, __pos___data__); - char *s = sa.strvalue; + char *s = __load_via_object(str->value, __pos___data__).strvalue; + /* str.__size__ interpreted as int */ + int size = __load_via_object(str->value, __pos___size__).intvalue; ssize_t have_written; errno = 0; - have_written = write(i, s, sizeof(char) * strlen(sa.strvalue)); + have_written = write(i, s, sizeof(char) * size); if (have_written == -1) __raise_io_error(__new_int(errno)); diff -r d7769551b051 -r aed28d04304d templates/native/locale.c --- a/templates/native/locale.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/locale.c Mon Feb 13 18:54:59 2017 +0100 @@ -46,7 +46,7 @@ out = __ALLOCATE(length + 1, sizeof(char)); strncpy(out, result, length); - return __new_str(result); + return __new_str(result, length); } __attr __fn_native_locale_setlocale(__attr __args[]) @@ -69,7 +69,7 @@ out = __ALLOCATE(length + 1, sizeof(char)); strncpy(out, result, length); - return __new_str(result); + return __new_str(result, length); } /* Module initialisation. */ diff -r d7769551b051 -r aed28d04304d templates/native/str.c --- a/templates/native/str.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/str.c Mon Feb 13 18:54:59 2017 +0100 @@ -32,10 +32,12 @@ { __attr * const _data = &__args[1]; __attr * const other = &__args[2]; + __attr * const _size = &__args[3]; + __attr * const othersize = &__args[4]; /* _data, other interpreted as string */ char *s = _data->strvalue; char *o = other->strvalue; - size_t ss = strlen(_data->strvalue), os = strlen(other->strvalue); + int ss = _size->intvalue, os = othersize->intvalue; int n = ss + os; char *r = (char *) __ALLOCATE(n + 1, sizeof(char)); @@ -43,7 +45,7 @@ memcpy(r + ss, o, os); /* Return a new string. */ - return __new_str(r); + return __new_str(r, n); } __attr __fn_native_str_str_chr(__attr __args[]) @@ -54,7 +56,7 @@ char *s = (char *) __ALLOCATE(2, sizeof(char)); s[0] = (char) n; - return __new_str(s); + return __new_str(s, 1); } __attr __fn_native_str_str_lt(__attr __args[]) @@ -93,21 +95,6 @@ return strcmp(s, o) == 0 ? __builtins___boolean_True : __builtins___boolean_False; } -__attr __fn_native_str_str_len(__attr __args[]) -{ - __attr * const _data = &__args[1]; - - /* Return the new integer. */ - return __new_int(strlen(_data->strvalue)); -} - -__attr __fn_native_str_str_nonempty(__attr __args[]) -{ - __attr * const _data = &__args[1]; - - return _data->strvalue[0] ? __builtins___boolean_True : __builtins___boolean_False; -} - __attr __fn_native_str_str_ord(__attr __args[]) { __attr * const _data = &__args[1]; @@ -147,7 +134,7 @@ for (from = istart, to = 0; from > iend; from += istep, to++) sub[to] = s[from]; - return __new_str(sub); + return __new_str(sub, resultsize); } /* Module initialisation. */ diff -r d7769551b051 -r aed28d04304d templates/native/str.h --- a/templates/native/str.h Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/str.h Mon Feb 13 18:54:59 2017 +0100 @@ -26,8 +26,6 @@ __attr __fn_native_str_str_lt(__attr __args[]); __attr __fn_native_str_str_gt(__attr __args[]); __attr __fn_native_str_str_eq(__attr __args[]); -__attr __fn_native_str_str_len(__attr __args[]); -__attr __fn_native_str_str_nonempty(__attr __args[]); __attr __fn_native_str_str_ord(__attr __args[]); __attr __fn_native_str_str_substr(__attr __args[]); diff -r d7769551b051 -r aed28d04304d templates/native/unicode.c --- a/templates/native/unicode.c Sun Feb 12 23:24:42 2017 +0100 +++ b/templates/native/unicode.c Mon Feb 13 18:54:59 2017 +0100 @@ -72,11 +72,14 @@ __attr __fn_native_unicode_unicode_len(__attr __args[]) { __attr * const _data = &__args[1]; + __attr * const _size = &__args[2]; /* _data interpreted as string */ char *s = _data->strvalue; + /* _size interpreted as int */ + int size = _size->intvalue; unsigned int i, c = 0; - for (i = 0; s[i] != 0; i++) + for (i = 0; i < size; i++) if (boundary(s[i])) c++; @@ -87,11 +90,14 @@ __attr __fn_native_unicode_unicode_ord(__attr __args[]) { __attr * const _data = &__args[1]; + __attr * const _size = &__args[2]; /* _data interpreted as string */ char *s = _data->strvalue; + /* _size interpreted as int */ + int size = _size->intvalue; unsigned int i, c = 0, v; - for (i = 0; s[i] != 0; i++) + for (i = 0; i < size; i++) { /* Evaluate the current character as a boundary. */ @@ -120,11 +126,14 @@ __attr __fn_native_unicode_unicode_substr(__attr __args[]) { __attr * const _data = &__args[1]; - __attr * const start = &__args[2]; - __attr * const end = &__args[3]; - __attr * const step = &__args[4]; + __attr * const _size = &__args[2]; + __attr * const start = &__args[3]; + __attr * const end = &__args[4]; + __attr * const step = &__args[5]; /* _data interpreted as string */ char *s = _data->strvalue, *sub; + /* _size interpreted as int */ + int ss = _size->intvalue; /* start.__data__ interpreted as int */ int istart = __load_via_object(start->value, __pos___data__).intvalue; /* end.__data__ interpreted as int */ @@ -137,7 +146,7 @@ unsigned int indexes[nchar]; unsigned int c, d, i, to, from, lastbyte = 0; - size_t resultsize = 0, ss = strlen(_data->strvalue); + int resultsize = 0; /* Find the indexes of the characters. */ if (istep > 0) @@ -190,7 +199,7 @@ } while (!boundary(s[from])); } - return __new_str(sub); + return __new_str(sub, resultsize); } /* Module initialisation. */