string.c: setbyte silently ignores upper bits

The behaviour of String#setbyte has been depending on the width
of int, which is not portable.  Must check explicitly.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65804 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
shyouhei 2018-11-19 09:52:46 +00:00
Родитель d13724306e
Коммит 7213568733
2 изменённых файлов: 19 добавлений и 3 удалений

Просмотреть файл

@ -5413,7 +5413,8 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
long pos = NUM2LONG(index);
int byte = NUM2INT(value);
long len = RSTRING_LEN(str);
char *head, *ptr, *left = 0;
char *head, *left = 0;
unsigned char *ptr;
rb_encoding *enc;
int cr = ENC_CODERANGE_UNKNOWN, width, nlen;
@ -5421,17 +5422,21 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
rb_raise(rb_eIndexError, "index %ld out of string", pos);
if (pos < 0)
pos += len;
if (byte < 0)
rb_raise(rb_eRangeError, "integer %d too small to convert into `unsigned char'", byte);
if (UCHAR_MAX < byte)
rb_raise(rb_eRangeError, "integer %d too big to convert into `unsigned char'", byte);
if (!str_independent(str))
str_make_independent(str);
enc = STR_ENC_GET(str);
head = RSTRING_PTR(str);
ptr = &head[pos];
ptr = (unsigned char *)&head[pos];
if (!STR_EMBED_P(str)) {
cr = ENC_CODERANGE(str);
switch (cr) {
case ENC_CODERANGE_7BIT:
left = ptr;
left = (char *)ptr;
*ptr = byte;
if (ISASCII(byte)) goto end;
nlen = rb_enc_precise_mbclen(left, head+len, enc);

Просмотреть файл

@ -1524,6 +1524,17 @@ class TestM17N < Test::Unit::TestCase
}
end
def test_setbyte_range
s = u("\xE3\x81\x82\xE3\x81\x84")
assert_raise(RangeError) { s.setbyte(0, -1) }
assert_nothing_raised { s.setbyte(0, 0x00) }
assert_nothing_raised { s.setbyte(0, 0x7F) }
assert_nothing_raised { s.setbyte(0, 0x80) }
assert_nothing_raised { s.setbyte(0, 0xff) }
assert_raise(RangeError) { s.setbyte(0, 0x100) }
assert_raise(RangeError) { s.setbyte(0, 0x4f7574206f6620636861722072616e6765) }
end
def test_compatible
assert_nil Encoding.compatible?("",0)
assert_equal(Encoding::UTF_8, Encoding.compatible?(u(""), ua("abc")))