зеркало из https://github.com/github/ruby.git
* enc/trans/gbk.trans, gbk-tbl.rb:
new Chinese GBK transcoding (from Yoshihiro Kambayashi) * test/ruby/test_transcode.rb: added tests for the above (from Yoshihiro Kambayashi) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@21315 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
fecce9e5e5
Коммит
deeade6f3e
10
ChangeLog
10
ChangeLog
|
@ -1,4 +1,12 @@
|
|||
Sun Jan 4 17:39:39 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
Sun Jan 4 18:10:10 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/trans/gbk.trans, gbk-tbl.rb:
|
||||
new Chinese GBK transcoding (from Yoshihiro Kambayashi)
|
||||
|
||||
* test/ruby/test_transcode.rb: added tests for the above
|
||||
(from Yoshihiro Kambayashi)
|
||||
|
||||
Sun Jan 4 17:55:55 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* test/ruby/test_transcode.rb: added tests for GB2312
|
||||
(from Yoshihiro Kambayashi)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,16 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
<%
|
||||
require "gbk-tbl"
|
||||
|
||||
transcode_tblgen "GBK", "UTF-8", [["{00-7f}", :nomap], *GBK_TO_UCS_TBL]
|
||||
transcode_tblgen "UTF-8", "GBK", [["{00-7f}", :nomap], *GBK_TO_UCS_TBL.map {|a,b| [b,a] }]
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
||||
void
|
||||
Init_gbk(void)
|
||||
{
|
||||
<%= transcode_register_code %>
|
||||
}
|
|
@ -1448,6 +1448,120 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC7\xE0\xC9\xBD\xD1\xA7\xD4\xBA\xB4\xF3\xD1\xA7", 'GB2312') # 青山学院大学
|
||||
end
|
||||
|
||||
def test_gbk
|
||||
check_both_ways("\u4E02", "\x81\x40", 'GBK') # 丂
|
||||
check_both_ways("\u4E8A", "\x81\x7E", 'GBK') # 亊
|
||||
check_both_ways("\u4E90", "\x81\x80", 'GBK') # 亐
|
||||
check_both_ways("\u4FA2", "\x81\xFE", 'GBK') # 侢
|
||||
check_both_ways("\u5EC6", "\x8F\x40", 'GBK') # 廆
|
||||
check_both_ways("\u5F24", "\x8F\x7E", 'GBK') # 弤
|
||||
check_both_ways("\u5F28", "\x8F\x80", 'GBK') # 弨
|
||||
check_both_ways("\u6007", "\x8F\xFE", 'GBK') # 怇
|
||||
check_both_ways("\u6008", "\x90\x40", 'GBK') # 怈
|
||||
check_both_ways("\u6080", "\x90\x7E", 'GBK') # 悀
|
||||
check_both_ways("\u6081", "\x90\x80", 'GBK') # 悁
|
||||
check_both_ways("\u6146", "\x90\xFE", 'GBK') # 慆
|
||||
check_both_ways("\u70DC", "\x9F\x40", 'GBK') # 烜
|
||||
check_both_ways("\u7134", "\x9F\x7E", 'GBK') # 焴
|
||||
check_both_ways("\u7135", "\x9F\x80", 'GBK') # 焵
|
||||
check_both_ways("\u71D3", "\x9F\xFE", 'GBK') # 燓
|
||||
check_both_ways("\u71D6", "\xA0\x40", 'GBK') # 燖
|
||||
check_both_ways("\u721A", "\xA0\x7E", 'GBK') # 爚
|
||||
check_both_ways("\u721B", "\xA0\x80", 'GBK') # 爛
|
||||
check_both_ways("\u72DB", "\xA0\xFE", 'GBK') # 狛
|
||||
check_both_ways("\u3000", "\xA1\xA1", 'GBK') # full-width space
|
||||
check_both_ways("\u3001", "\xA1\xA2", 'GBK') # 、
|
||||
check_both_ways("\u3013", "\xA1\xFE", 'GBK') # 〓
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA2\xA0".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u2170", "\xA2\xA1", 'GBK') # ⅰ
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB0".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u2488", "\xA2\xB1", 'GBK') # ⒈
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA2\xE4".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u3220", "\xA2\xE5", 'GBK') # ㈠
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF0".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u2160", "\xA2\xF1", 'GBK') # Ⅰ
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA3\xA0".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\uFF01", "\xA3\xA1", 'GBK') # !
|
||||
check_both_ways("\uFFE3", "\xA3\xFE", 'GBK') #  ̄
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA4\xA0".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u3041", "\xA4\xA1", 'GBK') # ぁ
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA5\xA0".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u30A1", "\xA5\xA1", 'GBK') # ァ
|
||||
check_both_ways("\u0391", "\xA6\xA1", 'GBK') # Α
|
||||
check_both_ways("\u03B1", "\xA6\xC1", 'GBK') # α
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA6\xED".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\uFE3B", "\xA6\xEE", 'GBK') # ︻
|
||||
check_both_ways("\u0410", "\xA7\xA1", 'GBK') # А
|
||||
check_both_ways("\u0430", "\xA7\xD1", 'GBK') # а
|
||||
check_both_ways("\u02CA", "\xA8\x40", 'GBK') # ˊ
|
||||
check_both_ways("\u2587", "\xA8\x7E", 'GBK') # ▇
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA8\x96".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u0101", "\xA8\xA1", 'GBK') # ā
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBC".encode("utf-8", 'GBK') }
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBF".encode("utf-8", 'GBK') }
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC4".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u3105", "\xA8\xC5", 'GBK') # ㄅ
|
||||
check_both_ways("\u3021", "\xA9\x40", 'GBK') # 〡
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA9\x58".encode("utf-8", 'GBK') }
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5B".encode("utf-8", 'GBK') }
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5D".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u3007", "\xA9\x96", 'GBK') # 〇
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA9\xA3".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u2500", "\xA9\xA4", 'GBK') # ─
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA9\xF0".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u7588", "\xAF\x40", 'GBK') # 疈
|
||||
check_both_ways("\u7607", "\xAF\x7E", 'GBK') # 瘇
|
||||
check_both_ways("\u7608", "\xAF\x80", 'GBK') # 瘈
|
||||
check_both_ways("\u7644", "\xAF\xA0", 'GBK') # 癄
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xAF\xA1".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u7645", "\xB0\x40", 'GBK') # 癅
|
||||
check_both_ways("\u769B", "\xB0\x7E", 'GBK') # 皛
|
||||
check_both_ways("\u769C", "\xB0\x80", 'GBK') # 皜
|
||||
check_both_ways("\u5265", "\xB0\xFE", 'GBK') # 剥
|
||||
check_both_ways("\u7DFB", "\xBF\x40", 'GBK') # 緻
|
||||
check_both_ways("\u7E39", "\xBF\x7E", 'GBK') # 縹
|
||||
check_both_ways("\u7E3A", "\xBF\x80", 'GBK') # 縺
|
||||
check_both_ways("\u5080", "\xBF\xFE", 'GBK') # 傀
|
||||
check_both_ways("\u7E5E", "\xC0\x40", 'GBK') # 繞
|
||||
check_both_ways("\u7E9E", "\xC0\x7E", 'GBK') # 纞
|
||||
check_both_ways("\u7EAE", "\xC0\x80", 'GBK') # 纮
|
||||
check_both_ways("\u4FD0", "\xC0\xFE", 'GBK') # 俐
|
||||
check_both_ways("\u87A5", "\xCF\x40", 'GBK') # 螥
|
||||
check_both_ways("\u87F8", "\xCF\x7E", 'GBK') # 蟸
|
||||
check_both_ways("\u87FA", "\xCF\x80", 'GBK') # 蟺
|
||||
check_both_ways("\u6653", "\xCF\xFE", 'GBK') # 晓
|
||||
check_both_ways("\u8824", "\xD0\x40", 'GBK') # 蠤
|
||||
check_both_ways("\u887A", "\xD0\x7E", 'GBK') # 衺
|
||||
check_both_ways("\u887B", "\xD0\x80", 'GBK') # 衻
|
||||
check_both_ways("\u7384", "\xD0\xFE", 'GBK') # 玄
|
||||
check_both_ways("\u9019", "\xDF\x40", 'GBK') # 這
|
||||
check_both_ways("\u9081", "\xDF\x7E", 'GBK') # 邁
|
||||
check_both_ways("\u9084", "\xDF\x80", 'GBK') # 還
|
||||
check_both_ways("\u553C", "\xDF\xFE", 'GBK') # 唼
|
||||
check_both_ways("\u90C2", "\xE0\x40", 'GBK') # 郂
|
||||
check_both_ways("\u911C", "\xE0\x7E", 'GBK') # 鄜
|
||||
check_both_ways("\u911D", "\xE0\x80", 'GBK') # 鄝
|
||||
check_both_ways("\u5E3C", "\xE0\xFE", 'GBK') # 帼
|
||||
check_both_ways("\u986F", "\xEF\x40", 'GBK') # 顯
|
||||
check_both_ways("\u98E4", "\xEF\x7E", 'GBK') # 飤
|
||||
check_both_ways("\u98E5", "\xEF\x80", 'GBK') # 飥
|
||||
check_both_ways("\u7A14", "\xEF\xFE", 'GBK') # 稔
|
||||
check_both_ways("\u9908", "\xF0\x40", 'GBK') # 餈
|
||||
check_both_ways("\u9949", "\xF0\x7E", 'GBK') # 饉
|
||||
check_both_ways("\u994A", "\xF0\x80", 'GBK') # 饊
|
||||
check_both_ways("\u7619", "\xF0\xFE", 'GBK') # 瘙
|
||||
check_both_ways("\u9F32", "\xFD\x40", 'GBK') # 鼲
|
||||
check_both_ways("\u9F78", "\xFD\x7E", 'GBK') # 齸
|
||||
check_both_ways("\u9F79", "\xFD\x80", 'GBK') # 齹
|
||||
check_both_ways("\uF9F1", "\xFD\xA0", 'GBK') # 隣
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xFD\xA1".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\uFA0C", "\xFE\x40", 'GBK') # 兀
|
||||
check_both_ways("\uFA29", "\xFE\x4F", 'GBK') # 﨩
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xFE\x50".encode("utf-8", 'GBK') }
|
||||
check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC7\xE0\xC9\xBD\xD1\xA7\xD4\xBA\xB4\xF3\xD1\xA7", 'GBK') # 青山学院大学
|
||||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xC9\xF1\xC1\xD6\xC1\x78\xB2\xA9", 'GBK') # 神林義博
|
||||
end
|
||||
|
||||
def test_Big5
|
||||
check_both_ways("\u3000", "\xA1\x40", 'Big5') # full-width space
|
||||
check_both_ways("\uFE5A", "\xA1\x7E", 'Big5') # ﹚
|
||||
|
|
Загрузка…
Ссылка в новой задаче