зеркало из https://github.com/github/ruby.git
* transcode.c (output_replacement_character):
rename from _get_replacement_character. * transcode.c (output_replacement_character): fix replacement on UTF-32{BE,LE}. [ruby-dev:35705] * transcode.c (transcode_loop): ditto. * test/ruby/test_transcode.rb (test_invalid_replace): add for above. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18300 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
f3730b8260
Коммит
00aef398d0
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
|||
Fri Aug 1 05:31:08 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* transcode.c (output_replacement_character):
|
||||
rename from _get_replacement_character.
|
||||
|
||||
* transcode.c (output_replacement_character):
|
||||
fix replacement on UTF-32{BE,LE}. [ruby-dev:35705]
|
||||
|
||||
* transcode.c (transcode_loop): ditto.
|
||||
|
||||
* test/ruby/test_transcode.rb (test_invalid_replace):
|
||||
add for above.
|
||||
|
||||
Fri Aug 1 01:01:49 2008 Yusuke Endoh <mame@tsg.ne.jp>
|
||||
|
||||
* proc.c (rb_proc_call_with_block): reduce comparison.
|
||||
|
|
|
@ -254,6 +254,21 @@ class TestTranscode < Test::Unit::TestCase
|
|||
"\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
|
||||
end
|
||||
|
||||
def test_invalid_replace
|
||||
# arguments only
|
||||
assert_nothing_raised { 'abc'.encode('UTF-8', invalid: :replace) }
|
||||
assert_equal("\xEF\xBF\xBD".force_encoding("UTF-8"),
|
||||
"\x80".encode("UTF-8", "UTF-16BE", invalid: :replace))
|
||||
assert_equal("\xFF\xFD".force_encoding("UTF-16BE"),
|
||||
"\x80".encode("UTF-16BE", "UTF-8", invalid: :replace))
|
||||
assert_equal("\xFD\xFF".force_encoding("UTF-16LE"),
|
||||
"\x80".encode("UTF-16LE", "UTF-8", invalid: :replace))
|
||||
assert_equal("\x00\x00\xFF\xFD".force_encoding("UTF-32BE"),
|
||||
"\x80".encode("UTF-32BE", "UTF-8", invalid: :replace))
|
||||
assert_equal("\xFD\xFF\x00\x00".force_encoding("UTF-32LE"),
|
||||
"\x80".encode("UTF-32LE", "UTF-8", invalid: :replace))
|
||||
end
|
||||
|
||||
def test_shift_jis
|
||||
check_both_ways("\u3000", "\x81\x40", 'shift_jis') # full-width space
|
||||
check_both_ways("\u00D7", "\x81\x7E", 'shift_jis') # ~
|
||||
|
|
43
transcode.c
43
transcode.c
|
@ -91,7 +91,7 @@ rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
|
|||
#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
|
||||
|
||||
static const rb_transcoder *
|
||||
transcode_dispatch(const char* from_encoding, const char* to_encoding)
|
||||
transcode_dispatch(const char *from_encoding, const char *to_encoding)
|
||||
{
|
||||
char *const key = transcoder_key(from_encoding, to_encoding);
|
||||
st_data_t k, val = 0;
|
||||
|
@ -122,9 +122,10 @@ transcode_dispatch(const char* from_encoding, const char* to_encoding)
|
|||
return (rb_transcoder *)val;
|
||||
}
|
||||
|
||||
static const char*
|
||||
get_replacement_character(rb_encoding *enc)
|
||||
static void
|
||||
output_replacement_character(unsigned char **out_pp, rb_encoding *enc)
|
||||
{
|
||||
unsigned char *out_p = *out_pp;
|
||||
static rb_encoding *utf16be_encoding, *utf16le_encoding;
|
||||
static rb_encoding *utf32be_encoding, *utf32le_encoding;
|
||||
if (!utf16be_encoding) {
|
||||
|
@ -133,24 +134,36 @@ get_replacement_character(rb_encoding *enc)
|
|||
utf32be_encoding = rb_enc_find("UTF-32BE");
|
||||
utf32le_encoding = rb_enc_find("UTF-32LE");
|
||||
}
|
||||
if (rb_enc_asciicompat(enc)) {
|
||||
return "?";
|
||||
if (rb_utf8_encoding() == enc) {
|
||||
*out_p++ = 0xEF;
|
||||
*out_p++ = 0xBF;
|
||||
*out_p++ = 0xBD;
|
||||
}
|
||||
else if (utf16be_encoding == enc) {
|
||||
return "\xFF\xFD";
|
||||
*out_p++ = 0xFF;
|
||||
*out_p++ = 0xFD;
|
||||
}
|
||||
else if (utf16le_encoding == enc) {
|
||||
return "\xFD\xFF";
|
||||
*out_p++ = 0xFD;
|
||||
*out_p++ = 0xFF;
|
||||
}
|
||||
else if (utf32be_encoding == enc) {
|
||||
return "\x00\x00\xFF\xFD";
|
||||
*out_p++ = 0x00;
|
||||
*out_p++ = 0x00;
|
||||
*out_p++ = 0xFF;
|
||||
*out_p++ = 0xFD;
|
||||
}
|
||||
else if (utf32le_encoding == enc) {
|
||||
return "\xFD\xFF\x00\x00";
|
||||
*out_p++ = 0xFD;
|
||||
*out_p++ = 0xFF;
|
||||
*out_p++ = 0x00;
|
||||
*out_p++ = 0x00;
|
||||
}
|
||||
else {
|
||||
return "?";
|
||||
*out_p++ = '?';
|
||||
}
|
||||
*out_pp = out_p;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -255,10 +268,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
continue;
|
||||
}
|
||||
else if (opt&INVALID_REPLACE) {
|
||||
const char *rep = get_replacement_character(to_encoding);
|
||||
do {
|
||||
*out_p++ = *rep++;
|
||||
} while (*rep);
|
||||
output_replacement_character(&out_p, to_encoding);
|
||||
continue;
|
||||
}
|
||||
rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
|
||||
|
@ -271,10 +281,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
continue;
|
||||
}
|
||||
else if (opt&UNDEF_REPLACE) {
|
||||
const char *rep = get_replacement_character(to_encoding);
|
||||
do {
|
||||
*out_p++ = *rep++;
|
||||
} while (*rep);
|
||||
output_replacement_character(&out_p, to_encoding);
|
||||
continue;
|
||||
}
|
||||
rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
|
|
Загрузка…
Ссылка в новой задаче