* string.c: Fix memory corruptions when using UTF-16/32 strings.

[Bug #12536] [ruby-dev:49699]

* string.c (TERM_LEN_MAX): Macro for the longest TERM_FILL length,
  the same as largest value of rb_enc_mbminlen(enc) among encodings.

* string.c (str_new, rb_str_buf_new, str_shared_replace): Allocate
  +TERM_LEN_MAX bytes instead of +1. This change may increase memory
  usage.

* string.c (rb_str_new_with_class): Use TERM_LEN of the "obj".

* string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware
  of termlen.

* string.c (str_shared_replace): Copy +termlen bytes instead of +1.

* string.c (rb_str_times): termlen should not be included in capa.

* string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX,
  termlen should be counted with it because embedded strings are
  also processed by TERM_FILL.

* string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto.

* string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55547 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ngoto 2016-06-30 10:20:23 +00:00
Родитель 893bb61bcb
Коммит f2ee22371b
2 изменённых файлов: 53 добавлений и 17 удалений

Просмотреть файл

@ -1,3 +1,32 @@
Thu Jun 30 19:15:13 2016 Naohisa Goto <ngotogenome@gmail.com>
* string.c: Fix memory corruptions when using UTF-16/32 strings.
[Bug #12536] [ruby-dev:49699]
* string.c (TERM_LEN_MAX): Macro for the longest TERM_FILL length,
the same as largest value of rb_enc_mbminlen(enc) among encodings.
* string.c (str_new, rb_str_buf_new, str_shared_replace): Allocate
+TERM_LEN_MAX bytes instead of +1. This change may increase memory
usage.
* string.c (rb_str_new_with_class): Use TERM_LEN of the "obj".
* string.c (rb_str_plus, rb_str_justify): Use str_new0 which is aware
of termlen.
* string.c (str_shared_replace): Copy +termlen bytes instead of +1.
* string.c (rb_str_times): termlen should not be included in capa.
* string.c (RESIZE_CAPA_TERM): When using RSTRING_EMBED_LEN_MAX,
termlen should be counted with it because embedded strings are
also processed by TERM_FILL.
* string.c (rb_str_capacity, str_shared_replace, str_buf_cat): ditto.
* string.c (rb_str_drop_bytes, rb_str_setbyte, str_byte_substr): ditto.
Wed Jun 29 22:24:37 2016 SHIBATA Hiroshi <hsbt@ruby-lang.org>
* ext/psych/lib/psych_jars.rb: removed needless file required to JRuby.

Просмотреть файл

@ -120,6 +120,7 @@ VALUE rb_cSymbol;
if (UNLIKELY(term_fill_len > 1))\
memset(term_fill_ptr, 0, term_fill_len);\
} while (0)
#define TERM_LEN_MAX 4 /* UTF-32LE, UTF-32BE */
#define RESIZE_CAPA(str,capacity) do {\
const int termlen = TERM_LEN(str);\
@ -127,7 +128,7 @@ VALUE rb_cSymbol;
} while (0)
#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
if (STR_EMBED_P(str)) {\
if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
if ((capacity) > RSTRING_EMBED_LEN_MAX + 1 - (termlen)) {\
char *const tmp = ALLOC_N(char, (capacity)+termlen);\
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
@ -650,7 +651,7 @@ size_t
rb_str_capacity(VALUE str)
{
if (STR_EMBED_P(str)) {
return RSTRING_EMBED_LEN_MAX;
return RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str);
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
return RSTRING(str)->as.heap.len;
@ -694,7 +695,7 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
RUBY_DTRACE_CREATE_HOOK(STRING, len);
str = str_alloc(klass);
if (len > RSTRING_EMBED_LEN_MAX) {
if (len > RSTRING_EMBED_LEN_MAX + 1 - termlen) {
RSTRING(str)->as.heap.aux.capa = len;
RSTRING(str)->as.heap.ptr = ALLOC_N(char, len + termlen);
STR_SET_NOEMBED(str);
@ -713,7 +714,7 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
static VALUE
str_new(VALUE klass, const char *ptr, long len)
{
return str_new0(klass, ptr, len, 1);
return str_new0(klass, ptr, len, TERM_LEN_MAX);
}
VALUE
@ -1160,7 +1161,7 @@ str_new_frozen(VALUE klass, VALUE orig)
VALUE
rb_str_new_with_class(VALUE obj, const char *ptr, long len)
{
return str_new(rb_obj_class(obj), ptr, len);
return str_new0(rb_obj_class(obj), ptr, len, TERM_LEN(obj));
}
static VALUE
@ -1184,7 +1185,7 @@ rb_str_buf_new(long capa)
}
FL_SET(str, STR_NOEMBED);
RSTRING(str)->as.heap.aux.capa = capa;
RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa + TERM_LEN_MAX);
RSTRING(str)->as.heap.ptr[0] = '\0';
return str;
@ -1252,16 +1253,18 @@ str_shared_replace(VALUE str, VALUE str2)
{
rb_encoding *enc;
int cr;
int termlen;
ASSUME(str2 != str);
enc = STR_ENC_GET(str2);
cr = ENC_CODERANGE(str2);
str_discard(str);
OBJ_INFECT(str, str2);
termlen = rb_enc_mbminlen(enc);
if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX + 1 - termlen) {
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+termlen);
STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
rb_enc_associate(str, enc);
ENC_CODERANGE_SET(str, cr);
@ -1730,16 +1733,18 @@ rb_str_plus(VALUE str1, VALUE str2)
rb_encoding *enc;
char *ptr1, *ptr2, *ptr3;
long len1, len2;
int termlen;
StringValue(str2);
enc = rb_enc_check_str(str1, str2);
RSTRING_GETMEM(str1, ptr1, len1);
RSTRING_GETMEM(str2, ptr2, len2);
str3 = rb_str_new(0, len1+len2);
termlen = rb_enc_mbminlen(enc);
str3 = str_new0(rb_cString, 0, len1+len2, termlen);
ptr3 = RSTRING_PTR(str3);
memcpy(ptr3, ptr1, len1);
memcpy(ptr3+len1, ptr2, len2);
TERM_FILL(&ptr3[len1+len2], rb_enc_mbminlen(enc));
TERM_FILL(&ptr3[len1+len2], termlen);
FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2));
ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
@ -1787,7 +1792,7 @@ rb_str_times(VALUE str, VALUE times)
len *= RSTRING_LEN(str);
termlen = TERM_LEN(str);
str2 = rb_str_new_with_class(str, 0, (len + termlen - 1));
str2 = str_new0(rb_obj_class(str), 0, len, termlen);
ptr2 = RSTRING_PTR(str2);
if (len) {
n = RSTRING_LEN(str);
@ -2518,7 +2523,7 @@ str_buf_cat(VALUE str, const char *ptr, long len)
rb_str_modify(str);
if (len == 0) return 0;
if (STR_EMBED_P(str)) {
capa = RSTRING_EMBED_LEN_MAX;
capa = RSTRING_EMBED_LEN_MAX + termlen - 1;
sptr = RSTRING(str)->as.ary;
olen = RSTRING_EMBED_LEN(str);
}
@ -4141,7 +4146,7 @@ rb_str_drop_bytes(VALUE str, long len)
str_modifiable(str);
if (len > olen) len = olen;
nlen = olen - len;
if (nlen <= RSTRING_EMBED_LEN_MAX) {
if (nlen <= RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str)) {
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
@ -4992,7 +4997,7 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
enc = STR_ENC_GET(str);
head = RSTRING_PTR(str);
ptr = &head[pos];
if (len > RSTRING_EMBED_LEN_MAX) {
if (len > RSTRING_EMBED_LEN_MAX + 1 - rb_enc_mbminlen(enc)) {
cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT:
@ -5046,7 +5051,7 @@ str_byte_substr(VALUE str, long beg, long len, int empty)
else
p = s + beg;
if (len > RSTRING_EMBED_LEN_MAX && SHARABLE_SUBSTRING_P(beg, len, n)) {
if (len > RSTRING_EMBED_LEN_MAX + 1 - TERM_LEN(str) && SHARABLE_SUBSTRING_P(beg, len, n)) {
str2 = rb_str_new_frozen(str);
str2 = str_new_shared(rb_obj_class(str2), str2);
RSTRING(str2)->as.heap.ptr += beg;
@ -8480,9 +8485,11 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
VALUE pad;
int singlebyte = 1, cr;
int termlen;
rb_scan_args(argc, argv, "11", &w, &pad);
enc = STR_ENC_GET(str);
termlen = rb_enc_mbminlen(enc);
width = NUM2LONG(w);
if (argc == 2) {
StringValue(pad);
@ -8512,7 +8519,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
rb_raise(rb_eArgError, "argument too big");
}
len += size;
res = rb_str_new_with_class(str, 0, len);
res = str_new0(rb_obj_class(str), 0, len, termlen);
p = RSTRING_PTR(res);
if (flen <= 1) {
memset(p, *f, llen);
@ -8546,7 +8553,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
p += rlen2;
}
}
TERM_FILL(p, rb_enc_mbminlen(enc));
TERM_FILL(p, termlen);
STR_SET_LEN(res, p-RSTRING_PTR(res));
OBJ_INFECT_RAW(res, str);
if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad);