зеркало из https://github.com/github/ruby.git
* transcode.c (get_replacement_character): use U+FFFD as replacement
character when convert to Unicode. * test/ruby/test_transcode.rb (test_unicode_public_review_issue_121): rename from test_public_review_issue_121. * test/ruby/test_transcode.rb (test_unicode_public_review_issue_121): enable option2. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18294 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
97fdfbacec
Коммит
931ba3f3b7
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
||||||
|
Thu Jul 31 19:54:57 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* transcode.c (get_replacement_character): use U+FFFD as replacement
|
||||||
|
character when convert to Unicode.
|
||||||
|
|
||||||
|
* test/ruby/test_transcode.rb (test_unicode_public_review_issue_121):
|
||||||
|
rename from test_public_review_issue_121.
|
||||||
|
|
||||||
|
* test/ruby/test_transcode.rb (test_unicode_public_review_issue_121):
|
||||||
|
enable option2.
|
||||||
|
|
||||||
Thu Jul 31 17:00:10 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
Thu Jul 31 17:00:10 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
* transcode.c (get_replacement_character): fix: invalid byte sequence
|
* transcode.c (get_replacement_character): fix: invalid byte sequence
|
||||||
|
|
|
@ -312,16 +312,13 @@ class TestTranscode < Test::Unit::TestCase
|
||||||
# check_both_ways("\u9299", "\x1b$(Dd!\x1b(B", "iso-2022-jp-1") # JIS X 0212 区68 点01 銙
|
# check_both_ways("\u9299", "\x1b$(Dd!\x1b(B", "iso-2022-jp-1") # JIS X 0212 区68 点01 銙
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_public_review_issue_121 # see http://www.unicode.org/review/pr-121.html
|
def test_unicode_public_review_issue_121 # see http://www.unicode.org/review/pr-121.html
|
||||||
# assert_equal("\x00\x61\x00?\x00\x62".force_encoding('UTF-16BE'),
|
# assert_equal("\x00\x61\x00?\x00\x62".force_encoding('UTF-16BE'),
|
||||||
# "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 1
|
# "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 1
|
||||||
assert_equal("\x00\x61\x00?\x00?\x00?\x00\x62".force_encoding('UTF-16BE'),
|
assert_equal("\x00\x61\xFF\xFD\xFF\xFD\xFF\xFD\x00\x62".force_encoding('UTF-16BE'),
|
||||||
"\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 2
|
"\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 2
|
||||||
# The next test doesn't work because of a bug in the implementation
|
assert_equal("\x61\x00\xFD\xFF\xFD\xFF\xFD\xFF\x62\x00".force_encoding('UTF-16LE'),
|
||||||
# but we currently don't plan to fix that bug because we'll rewrite
|
"\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16LE', 'UTF-8', invalid: :replace)) # option 2
|
||||||
# this stuff a bit anyway.
|
|
||||||
# assert_equal("\x61\x00?\x00?\x00?\x00\x62\x00".force_encoding('UTF-16LE'),
|
|
||||||
# "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16LE', 'UTF-8', invalid: :replace)) # option 2
|
|
||||||
# assert_equal("\x00\x61\x00?\x00?\x00?\x00?\x00?\x00?\x00\x62".force_encoding('UTF-16BE'),
|
# assert_equal("\x00\x61\x00?\x00?\x00?\x00?\x00?\x00?\x00\x62".force_encoding('UTF-16BE'),
|
||||||
# "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 3
|
# "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 3
|
||||||
end
|
end
|
||||||
|
|
|
@ -137,16 +137,16 @@ get_replacement_character(rb_encoding *enc)
|
||||||
return "?";
|
return "?";
|
||||||
}
|
}
|
||||||
else if (utf16be_encoding == enc) {
|
else if (utf16be_encoding == enc) {
|
||||||
return "\x00?";
|
return "\xFF\xFD";
|
||||||
}
|
}
|
||||||
else if (utf16le_encoding == enc) {
|
else if (utf16le_encoding == enc) {
|
||||||
return "?\x00";
|
return "\xFD\xFF";
|
||||||
}
|
}
|
||||||
else if (utf32be_encoding == enc) {
|
else if (utf32be_encoding == enc) {
|
||||||
return "\x00\x00\x00?";
|
return "\x00\x00\xFF\xFD";
|
||||||
}
|
}
|
||||||
else if (utf32le_encoding == enc) {
|
else if (utf32le_encoding == enc) {
|
||||||
return "?\x00\x00\x00";
|
return "\xFD\xFF\x00\x00";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return "?";
|
return "?";
|
||||||
|
|
Загрузка…
Ссылка в новой задаче