string.c: setbyte silently ignores upper bits

The behaviour of String#setbyte has been depending on the width of int, which is not portable. Must check explicitly. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65804 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-11-19 09:52:46 +00:00 · 2018-11-19 09:52:46 +00:00 · 7213568733
--- a/string.c
+++ b/string.c
@ -5413,7 +5413,8 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
    long pos = NUM2LONG(index);
    int byte = NUM2INT(value);
    long len = RSTRING_LEN(str);
-    char *head, *ptr, *left = 0;
+    char *head, *left = 0;
+    unsigned char *ptr;
    rb_encoding *enc;
    int cr = ENC_CODERANGE_UNKNOWN, width, nlen;

@ -5421,17 +5422,21 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
        rb_raise(rb_eIndexError, "index %ld out of string", pos);
    if (pos < 0)
        pos += len;
+    if (byte < 0)
+        rb_raise(rb_eRangeError, "integer %d too small to convert into `unsigned char'", byte);
+    if (UCHAR_MAX < byte)
+        rb_raise(rb_eRangeError, "integer %d too big to convert into `unsigned char'", byte);

    if (!str_independent(str))
 	str_make_independent(str);
    enc = STR_ENC_GET(str);
    head = RSTRING_PTR(str);
-    ptr = &head[pos];
+    ptr = (unsigned char *)&head[pos];
    if (!STR_EMBED_P(str)) {
 	cr = ENC_CODERANGE(str);
 	switch (cr) {
 	  case ENC_CODERANGE_7BIT:
-	    left = ptr;
+            left = (char *)ptr;
 	    *ptr = byte;
 	    if (ISASCII(byte)) goto end;
 	    nlen = rb_enc_precise_mbclen(left, head+len, enc);
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@ -1524,6 +1524,17 @@ class TestM17N < Test::Unit::TestCase
    }
  end

+  def test_setbyte_range
+    s = u("\xE3\x81\x82\xE3\x81\x84")
+    assert_raise(RangeError) { s.setbyte(0, -1) }
+    assert_nothing_raised    { s.setbyte(0, 0x00) }
+    assert_nothing_raised    { s.setbyte(0, 0x7F) }
+    assert_nothing_raised    { s.setbyte(0, 0x80) }
+    assert_nothing_raised    { s.setbyte(0, 0xff) }
+    assert_raise(RangeError) { s.setbyte(0, 0x100) }
+    assert_raise(RangeError) { s.setbyte(0, 0x4f7574206f6620636861722072616e6765) }
+  end
+
  def test_compatible
    assert_nil Encoding.compatible?("",0)
    assert_equal(Encoding::UTF_8, Encoding.compatible?(u(""), ua("abc")))