Make String#rstrip{,!} raise Encoding::CompatibilityError for broken coderange

It's questionable whether we want to allow rstrip to work for strings
where the broken coderange occurs before the trailing whitespace and
not after, but this approach is probably simpler, and I don't think
users should expect string operations like rstrip to work on broken
strings.

In some cases, this changes rstrip to raise
Encoding::CompatibilityError instead of ArgumentError.  However, as
the problem is related to an encoding issue in the receiver, and due
not due to an issue with an argument, I think
Encoding::CompatibilityError is the more appropriate error.

Fixes [Bug #18931]
This commit is contained in:
Jeremy Evans 2022-08-24 10:31:17 -07:00
Родитель c6330cd32b
Коммит 571d21fd4a
4 изменённых файлов: 30 добавлений и 8 удалений

Просмотреть файл

@ -69,13 +69,27 @@ describe "String#rstrip!" do
-> { "".freeze.rstrip! }.should raise_error(FrozenError)
end
it "raises an ArgumentError if the last non-space codepoint is invalid" do
s = "abc\xDF".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
ruby_version_is "3.2" do
it "raises an Encoding::CompatibilityError if the last non-space codepoint is invalid" do
s = "abc\xDF".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(Encoding::CompatibilityError)
s = "abc\xDF ".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
s = "abc\xDF ".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(Encoding::CompatibilityError)
end
end
ruby_version_is ""..."3.2" do
it "raises an ArgumentError if the last non-space codepoint is invalid" do
s = "abc\xDF".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
s = "abc\xDF ".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
end
end
end

Просмотреть файл

@ -9720,6 +9720,9 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
const char *t;
rb_str_check_dummy_enc(enc);
if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncCompatError, "invalid byte sequence in %s", rb_enc_name(enc));
}
if (!s || s >= e) return 0;
t = e;

Просмотреть файл

@ -28,7 +28,7 @@ class HTTPHeaderTest < Test::Unit::TestCase
assert_raise(NoMethodError){ @c.initialize_http_header("foo"=>[]) }
assert_raise(ArgumentError){ @c.initialize_http_header("foo"=>"a\nb") }
assert_raise(ArgumentError){ @c.initialize_http_header("foo"=>"a\rb") }
assert_raise(ArgumentError){ @c.initialize_http_header("foo"=>"a\xff") }
assert_raise(Encoding::CompatibilityError){ @c.initialize_http_header("foo"=>"a\xff") }
end
def test_initialize_with_symbol

Просмотреть файл

@ -2817,6 +2817,11 @@ CODE
assert_equal("\u3042", s5)
assert_raise(Encoding::CompatibilityError) { S("\u3042".encode("ISO-2022-JP")).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("abc \x80 ".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("abc\x80 ".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("abc \x80".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("\x80".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S(" \x80 ".force_encoding('UTF-8')).rstrip! }
end
def test_lstrip