# HG changeset patch # User Paul Boddie # Date 1481499448 -3600 # Node ID 15348c678a8351c789b60e591d424c65d1bd946d # Parent 4a9521f666e96808dfc05507f9d6c18cd2510f45 Added an explicit size member to __attr for strings, eliminated strlen usage, replaced strncpy usage with memcpy, permitting strings with zero bytes to be stored and manipulated. diff -r 4a9521f666e9 -r 15348c678a83 encoders.py --- a/encoders.py Mon Dec 12 00:33:33 2016 +0100 +++ b/encoders.py Mon Dec 12 00:37:28 2016 +0100 @@ -306,6 +306,15 @@ return "__const%d" % n +def encode_literal_constant_size(value): + + "Encode a size for the literal constant with the given 'value'." + + if isinstance(value, basestring): + return len(value) + else: + return 0 + def encode_literal_constant_member(value): "Encode the member name for the 'value' in the final program." diff -r 4a9521f666e9 -r 15348c678a83 generator.py --- a/generator.py Mon Dec 12 00:33:33 2016 +0100 +++ b/generator.py Mon Dec 12 00:37:28 2016 +0100 @@ -23,7 +23,7 @@ from encoders import encode_bound_reference, encode_function_pointer, \ encode_instantiator_pointer, \ encode_literal_constant, encode_literal_constant_member, \ - encode_literal_constant_value, \ + encode_literal_constant_size, encode_literal_constant_value, \ encode_literal_data_initialiser, \ encode_literal_instantiator, encode_literal_reference, \ encode_path, \ @@ -873,8 +873,10 @@ # Special internal data member. elif attrname == "__data__": - structure.append("{0, .%s=%s}" % (encode_literal_constant_member(attr), - encode_literal_constant_value(attr))) + structure.append("{.size=%d, .%s=%s}" % ( + encode_literal_constant_size(attr), + encode_literal_constant_member(attr), + encode_literal_constant_value(attr))) continue # Special internal key member. diff -r 4a9521f666e9 -r 15348c678a83 templates/native/buffer.c --- a/templates/native/buffer.c Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/native/buffer.c Mon Dec 12 00:37:28 2016 +0100 @@ -16,7 +16,7 @@ this program. If not, see . */ -#include /* strcmp, strncpy, strlen */ +#include /* strcmp, memcpy */ #include "native/common.h" #include "types.h" #include "exceptions.h" @@ -32,11 +32,12 @@ /* _data interpreted as buffer */ __fragment *data = _data->seqvalue; unsigned int size = 0, i, j, n; - char *s, *o; + char *s; + __attr o; /* Calculate the size of the string. */ for (i = 0; i < data->size; i++) - size += strlen(__load_via_object(data->attrs[i].value, __pos___data__).strvalue); + size += __load_via_object(data->attrs[i].value, __pos___data__).size; /* Reserve space for a new string. */ s = (char *) __ALLOCATE(size + 1, sizeof(char)); @@ -44,14 +45,14 @@ /* Build a single string from the buffer contents. */ for (i = 0, j = 0; i < data->size; i++) { - o = __load_via_object(data->attrs[i].value, __pos___data__).strvalue; - n = strlen(o); - strncpy(s + j, o, n); /* does not null terminate but final byte should be zero */ + o = __load_via_object(data->attrs[i].value, __pos___data__); + n = o.size; + memcpy(s + j, o.strvalue, n); /* does not null terminate but final byte should be zero */ j += n; } /* Return a new string. */ - return __new_str(s); + return __new_str(s, size); } /* Module initialisation. */ diff -r 4a9521f666e9 -r 15348c678a83 templates/native/common.c --- a/templates/native/common.c Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/native/common.c Mon Dec 12 00:37:28 2016 +0100 @@ -34,10 +34,11 @@ return attr; } -__attr __new_str(char *s) +__attr __new_str(char *s, size_t size) { /* Create a new string and mutate the __data__ and __key__ attributes. */ __attr attr = __new(&__InstanceTable___builtins___str_string, &__builtins___str_string, sizeof(__obj___builtins___str_string)); + attr.value->attrs[__pos___data__].size = size; attr.value->attrs[__pos___data__].strvalue = s; attr.value->attrs[__pos___key__] = (__attr) {0, 0}; return attr; diff -r 4a9521f666e9 -r 15348c678a83 templates/native/common.h --- a/templates/native/common.h Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/native/common.h Mon Dec 12 00:37:28 2016 +0100 @@ -24,7 +24,7 @@ /* Utility functions. */ __attr __new_int(int i); -__attr __new_str(char *s); +__attr __new_str(char *s, size_t size); __attr __new_list(__fragment *f); __fragment *__fragment_append(__fragment *data, __attr * const value); diff -r 4a9521f666e9 -r 15348c678a83 templates/native/int.c --- a/templates/native/int.c Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/native/int.c Mon Dec 12 00:37:28 2016 +0100 @@ -268,7 +268,7 @@ snprintf(s, n, "%d", i); /* Return a new string. */ - return __new_str(s); + return __new_str(s, n); } /* Module initialisation. */ diff -r 4a9521f666e9 -r 15348c678a83 templates/native/io.c --- a/templates/native/io.c Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/native/io.c Mon Dec 12 00:37:28 2016 +0100 @@ -17,7 +17,7 @@ */ #include /* read, write */ -#include /* strcmp, strncpy, strlen */ +#include /* strcmp, memcpy */ #include /* fdopen, snprintf */ #include /* errno */ #include "native/common.h" @@ -128,8 +128,8 @@ /* Reserve space for a new string. */ s = __ALLOCATE(have_read + 1, sizeof(char)); - strncpy(s, (char *) buf, have_read); /* does not null terminate but final byte should be zero */ - return __new_str(s); + memcpy(s, (char *) buf, have_read); /* does not null terminate but final byte should be zero */ + return __new_str(s, have_read); } __attr __fn_native_io_fwrite(__attr __args[]) @@ -139,8 +139,9 @@ /* fp interpreted as FILE reference */ FILE *f = (FILE *) fp->datavalue; /* str.__data__ interpreted as string */ - char *s = __load_via_object(str->value, __pos___data__).strvalue; - size_t to_write = strlen(s); + __attr sa = __load_via_object(str->value, __pos___data__); + char *s = sa.strvalue; + size_t to_write = sa.size; size_t have_written = fwrite(s, sizeof(char), to_write, f); int error; @@ -189,8 +190,8 @@ /* Reserve space for a new string. */ s = __ALLOCATE(have_read + 1, 1); - strncpy(s, (char *) buf, have_read); /* does not null terminate but final byte should be zero */ - return __new_str(s); + memcpy(s, (char *) buf, have_read); /* does not null terminate but final byte should be zero */ + return __new_str(s, have_read); } __attr __fn_native_io_write(__attr __args[]) @@ -200,11 +201,12 @@ /* fd.__data__ interpreted as int */ int i = __load_via_object(fd->value, __pos___data__).intvalue; /* str.__data__ interpreted as string */ - char *s = __load_via_object(str->value, __pos___data__).strvalue; + __attr sa = __load_via_object(str->value, __pos___data__); + char *s = sa.strvalue; ssize_t have_written; errno = 0; - have_written = write(i, s, sizeof(char) * strlen(s)); + have_written = write(i, s, sizeof(char) * sa.size); if (have_written == -1) __raise_io_error(__new_int(errno)); diff -r 4a9521f666e9 -r 15348c678a83 templates/native/locale.c --- a/templates/native/locale.c Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/native/locale.c Mon Dec 12 00:37:28 2016 +0100 @@ -46,7 +46,7 @@ out = __ALLOCATE(length + 1, sizeof(char)); strncpy(out, result, length); - return __new_str(result); + return __new_str(result, length); } __attr __fn_native_locale_setlocale(__attr __args[]) @@ -69,7 +69,7 @@ out = __ALLOCATE(length + 1, sizeof(char)); strncpy(out, result, length); - return __new_str(result); + return __new_str(result, length); } /* Module initialisation. */ diff -r 4a9521f666e9 -r 15348c678a83 templates/native/str.c --- a/templates/native/str.c Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/native/str.c Mon Dec 12 00:37:28 2016 +0100 @@ -16,7 +16,7 @@ this program. If not, see . */ -#include /* strcmp, strncpy, strlen */ +#include /* strcmp, memcpy */ #include "native/common.h" #include "types.h" #include "exceptions.h" @@ -35,14 +35,14 @@ /* _data, other interpreted as string */ char *s = _data->strvalue; char *o = other->strvalue; - int n = strlen(s) + strlen(o) + 1; - char *r = (char *) __ALLOCATE(n, sizeof(char)); + int n = _data->size + other->size; + char *r = (char *) __ALLOCATE(n + 1, sizeof(char)); - strncpy(r, s, n); - strncpy(r + strlen(s), o, n - strlen(s)); /* should null terminate */ + memcpy(r, s, _data->size); + memcpy(r + _data->size, o, other->size); /* Return a new string. */ - return __new_str(r); + return __new_str(r, n); } __attr __fn_native_str_str_lt(__attr __args[]) @@ -88,7 +88,7 @@ char *s = _data->strvalue; /* Return the new integer. */ - return __new_int(strlen(s)); + return __new_int(_data->size); } __attr __fn_native_str_str_nonempty(__attr __args[]) @@ -97,7 +97,7 @@ /* _data interpreted as string */ char *s = _data->strvalue; - return strlen(s) ? __builtins___boolean_True : __builtins___boolean_False; + return _data->size ? __builtins___boolean_True : __builtins___boolean_False; } __attr __fn_native_str_str_ord(__attr __args[]) @@ -123,8 +123,8 @@ /* Reserve space for a new string. */ sub = (char *) __ALLOCATE(l + 1, sizeof(char)); - strncpy(sub, s + i, l); /* does not null terminate but final byte should be zero */ - return __new_str(sub); + memcpy(sub, s + i, l); /* does not null terminate but final byte should be zero */ + return __new_str(sub, l); } /* Module initialisation. */ diff -r 4a9521f666e9 -r 15348c678a83 templates/types.h --- a/templates/types.h Mon Dec 12 00:33:33 2016 +0100 +++ b/templates/types.h Mon Dec 12 00:37:28 2016 +0100 @@ -19,6 +19,8 @@ #ifndef __TYPES_H__ #define __TYPES_H__ +#include /* size_t */ + /* Attribute tables are lists of codes confirming the presence of attributes. */ typedef struct __table @@ -55,9 +57,11 @@ union { __obj * context; /* attribute context */ + __obj * b; /* bound callable object */ unsigned int min; /* minimum number of parameters */ - __obj * b; /* bound callable object */ unsigned int code; /* parameter table code for key */ + + size_t size; /* size of value */ }; /* One of... */ diff -r 4a9521f666e9 -r 15348c678a83 tests/string.py --- a/tests/string.py Mon Dec 12 00:33:33 2016 +0100 +++ b/tests/string.py Mon Dec 12 00:37:28 2016 +0100 @@ -1,9 +1,11 @@ s = "Hello" s += " world!" print s # Hello world! +print len(s) # 12 s2 = "Hello worlds!" print s2 # Hello worlds! +print len(s2) # 13 print s < s2 # True print s <= s2 # True print s == s2 # False @@ -24,11 +26,18 @@ l = ["Hello", "world!"] s3 = " ".join(l) print s3 # Hello world! +print len(s3) # 12 s4 = "".join(l) print s4 # Helloworld! +print len(s4) # 11 + +s5 = "--".join(l) +print s5 # Hello--world! +print len(s5) # 13 print hash(s) print hash(s2) print hash(s3) print hash(s4) +print hash(s5)