Add a hint of `ASCII-8BIT` being `BINARY`

[Feature #18576]

Since outright renaming `ASCII-8BIT` is deemed to backward incompatible,
the next best thing would be to only change its `#inspect`, particularly
in exception messages.
This commit is contained in:
Jean Boussier 2024-02-19 13:35:48 +01:00 коммит произвёл Jean Boussier
Родитель b3c59370ca
Коммит 3a7846b1aa
7 изменённых файлов: 40 добавлений и 15 удалений

Просмотреть файл

@ -1015,13 +1015,22 @@ rb_enc_get(VALUE obj)
return rb_enc_from_index(rb_enc_get_index(obj));
}
const char *
rb_enc_inspect_name(rb_encoding *enc)
{
if (enc == global_enc_ascii) {
return "BINARY (ASCII-8BIT)";
}
return enc->name;
}
static rb_encoding*
rb_encoding_check(rb_encoding* enc, VALUE str1, VALUE str2)
{
if (!enc)
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
rb_enc_name(rb_enc_get(str1)),
rb_enc_name(rb_enc_get(str2)));
rb_enc_inspect_name(rb_enc_get(str1)),
rb_enc_inspect_name(rb_enc_get(str2)));
return enc;
}
@ -1263,9 +1272,10 @@ enc_inspect(VALUE self)
if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
rb_raise(rb_eTypeError, "broken Encoding");
}
return rb_enc_sprintf(rb_usascii_encoding(),
"#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
rb_enc_name(enc),
rb_enc_inspect_name(enc),
(ENC_DUMMY_P(enc) ? " (dummy)" : ""),
rb_enc_autoload_p(enc) ? " (autoload)" : "");
}

Просмотреть файл

@ -18,6 +18,7 @@
/* encoding.c */
ID rb_id_encoding(void);
const char * rb_enc_inspect_name(rb_encoding *enc);
rb_encoding *rb_enc_get_from_index(int index);
rb_encoding *rb_enc_check_str(VALUE str1, VALUE str2);
int rb_encdb_replicate(const char *alias, const char *orig);

4
re.c
Просмотреть файл

@ -1530,8 +1530,8 @@ reg_enc_error(VALUE re, VALUE str)
{
rb_raise(rb_eEncCompatError,
"incompatible encoding regexp match (%s regexp with %s string)",
rb_enc_name(rb_enc_get(re)),
rb_enc_name(rb_enc_get(str)));
rb_enc_inspect_name(rb_enc_get(re)),
rb_enc_inspect_name(rb_enc_get(str)));
}
static inline int

Просмотреть файл

@ -5,9 +5,23 @@ describe "Encoding#inspect" do
Encoding::UTF_8.inspect.should be_an_instance_of(String)
end
it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do
Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc|
enc.inspect.should =~ /#<Encoding:#{enc.name}>/
ruby_version_is ""..."3.4" do
it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do
Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc|
enc.inspect.should =~ /#<Encoding:#{enc.name}>/
end
end
end
ruby_version_is "3.4" do
it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do
Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc|
if enc.name == "ASCII-8BIT"
enc.inspect.should == "#<Encoding:BINARY (ASCII-8BIT)>"
else
enc.inspect.should =~ /#<Encoding:#{enc.name}>/
end
end
end
end

Просмотреть файл

@ -3374,7 +3374,7 @@ rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
incompatible:
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
rb_enc_name(str_enc), rb_enc_name(ptr_enc));
rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
UNREACHABLE_RETURN(Qundef);
}
@ -5854,8 +5854,8 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
rb_enc_name(str_enc),
rb_enc_name(STR_ENC_GET(repl)));
rb_enc_inspect_name(str_enc),
rb_enc_inspect_name(STR_ENC_GET(repl)));
}
enc = STR_ENC_GET(repl);
}
@ -11120,7 +11120,7 @@ str_compat_and_valid(VALUE str, rb_encoding *enc)
rb_encoding *e = STR_ENC_GET(str);
if (cr == ENC_CODERANGE_7BIT ? rb_enc_mbminlen(enc) != 1 : enc != e) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
rb_enc_name(enc), rb_enc_name(e));
rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
}
}
return str;

Просмотреть файл

@ -1091,7 +1091,7 @@ class TestM17N < Test::Unit::TestCase
assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3")))
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
a_with_e = /EUC-JP and ASCII-8BIT/
a_with_e = /EUC-JP and BINARY \(ASCII-8BIT\)/
assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do
s.index(a("\xb1\xa3"))
end
@ -1099,7 +1099,7 @@ class TestM17N < Test::Unit::TestCase
s.rindex(a("\xb1\xa3"))
end
a_with_e = /ASCII-8BIT regexp with EUC-JP string/
a_with_e = /BINARY \(ASCII-8BIT\) regexp with EUC-JP string/
assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do
s.index(Regexp.new(a("\xb1\xa3")))
end

Просмотреть файл

@ -1458,7 +1458,7 @@ class TestYJIT < Test::Unit::TestCase
end
def test_str_concat_encoding_mismatch
assert_compiles(<<~'RUBY', result: "incompatible character encodings: ASCII-8BIT and EUC-JP")
assert_compiles(<<~'RUBY', result: "incompatible character encodings: BINARY (ASCII-8BIT) and EUC-JP")
def bar(a, b)
a << b
rescue => e