зеркало из https://github.com/github/ruby.git
* enc/euc_jp.c: added EUC-JP-2004 and its alias EUC-JISX0213.
[ruby-dev:45571] [Feature #6349] Requested by Kyouhei Yanagita <yanagi@shakenbu.org>. * enc/trans/japanese_euc.trans: ditto. * enc/trans/JIS/JISX0213-[12]%UCS@{BMP,SIP}.src: JIS X 0213:2004 -> Unicode mapping table from NetBSD. * enc/trans/JIS/UCS@{BMP,SIP}%JISX0213-[12].src: Unicode -> JIX X 0213:2004 mapping table from NetBSD. * tool/transcode-tblgen.rb: added SIP support. * test/ruby/test_transcode.rb: tests of above changes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@35460 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
66d247bcb5
Коммит
756ffef448
18
ChangeLog
18
ChangeLog
|
@ -1,3 +1,21 @@
|
|||
Tue Apr 24 19:59:31 2012 NAKAMURA Usaku <usa@ruby-lang.org>
|
||||
|
||||
* enc/euc_jp.c: added EUC-JP-2004 and its alias EUC-JISX0213.
|
||||
[ruby-dev:45571] [Feature #6349]
|
||||
Requested by Kyouhei Yanagita <yanagi@shakenbu.org>.
|
||||
|
||||
* enc/trans/japanese_euc.trans: ditto.
|
||||
|
||||
* enc/trans/JIS/JISX0213-[12]%UCS@{BMP,SIP}.src: JIS X 0213:2004 ->
|
||||
Unicode mapping table from NetBSD.
|
||||
|
||||
* enc/trans/JIS/UCS@{BMP,SIP}%JISX0213-[12].src: Unicode -> JIX X
|
||||
0213:2004 mapping table from NetBSD.
|
||||
|
||||
* tool/transcode-tblgen.rb: added SIP support.
|
||||
|
||||
* test/ruby/test_transcode.rb: tests of above changes.
|
||||
|
||||
Tue Apr 24 18:12:13 2012 Koichi Sasada <ko1@atdot.net>
|
||||
|
||||
* compile.c: fix to output warning when the same literals
|
||||
|
|
|
@ -635,3 +635,10 @@ ENC_ALIAS("euc-jp-ms", "eucJP-ms")
|
|||
* Link: http://msyk.at.webry.info/200511/article_2.html
|
||||
*/
|
||||
ENC_REPLICATE("CP51932", "EUC-JP")
|
||||
|
||||
/*
|
||||
* Name: EUC-JP-2004
|
||||
* Link: http://ja.wikipedia.org/wiki/EUC-JP-2004
|
||||
*/
|
||||
ENC_REPLICATE("EUC-JP-2004", "EUC-JP") /* defined at JIS X 0213:2004 */
|
||||
ENC_ALIAS("EUC-JISX0213", "EUC-JP-2004") /* defined at JIS X 0213:2000, and obsolete at JIS X 0213:2004 */
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,60 @@
|
|||
# $NetBSD: JISX0213-1%UCS@SIP.src,v 1.1 2007/03/05 16:58:33 tnozaki Exp $
|
||||
|
||||
TYPE ROWCOL
|
||||
NAME "JISX0213-1/UCS:SIP"
|
||||
SRC_ZONE 0x21-0x7E / 0x21-0x7E / 8
|
||||
OOB_MODE INVALID
|
||||
DST_INVALID 0xFFFE
|
||||
DST_UNIT_BITS 16
|
||||
|
||||
BEGIN_MAP
|
||||
## JIS X 0213:2004 vs Unicode mapping table
|
||||
##
|
||||
## Date: 22 May 2006
|
||||
## License:
|
||||
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
|
||||
## Copyright (C) 2001 I'O, All Rights Reserved.
|
||||
## Copyright (C) 2006 Project X0213, All Rights Reserved.
|
||||
## You can use, modify, distribute this table freely.
|
||||
## Note:
|
||||
## 3-XXXX JIS X 0213:2004 plane 1 (GL encoding)
|
||||
## 4-XXXX JIS X 0213:2000 plane 2 (GL encoding)
|
||||
## [1983] JIS codepoint defined by JIS X 0208-1983
|
||||
## [1990] JIS codepoint defined by JIS X 0208-1990
|
||||
## [2000] JIS codepoint defined by JIS X 0213:2000
|
||||
## [2004] JIS codepoint defined by JIS X 0213:2004
|
||||
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
|
||||
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
|
||||
## Fullwidth UCS fullwidth form (U+Fxxx)
|
||||
## Windows Windows (CP932) mapping
|
||||
## Some 0213 character can't represent by one UCS character.
|
||||
## In this table, such characters are described as 'U+xxxx+xxxx'.
|
||||
##
|
||||
## JIS Unicode Name Note
|
||||
0x2E22 = 0x000B
|
||||
0x2F42 = 0x123D
|
||||
0x2F4C = 0x131B
|
||||
0x2F60 = 0x146E
|
||||
0x2F7B = 0x18BD
|
||||
0x4F54 = 0x0B9F
|
||||
0x4F63 = 0x16B4
|
||||
0x4F6E = 0x1E34
|
||||
0x753A = 0x31C4
|
||||
0x7572 = 0x35C4
|
||||
0x7629 = 0x373F
|
||||
0x7632 = 0x3763
|
||||
0x7660 = 0x3CFE
|
||||
0x776C = 0x47F1
|
||||
0x787E = 0x548E
|
||||
0x7929 = 0x550E
|
||||
0x7947 = 0x5771
|
||||
0x7954 = 0x59C4
|
||||
0x796E = 0x5DA1
|
||||
0x7A5D = 0x6AFF
|
||||
0x7B33 = 0x6E40
|
||||
0x7B49 = 0x70F4
|
||||
0x7B6C = 0x7684
|
||||
0x7C49 = 0x8277
|
||||
0x7C51 = 0x83CD
|
||||
0x7E66 = 0xA190
|
||||
END_MAP
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,311 @@
|
|||
# $NetBSD: JISX0213-2%UCS@SIP.src,v 1.1 2007/03/05 16:58:33 tnozaki Exp $
|
||||
|
||||
TYPE ROWCOL
|
||||
NAME "JISX0213-2/UCS:SIP"
|
||||
SRC_ZONE 0x21-0x7E / 0x21-0x7E / 8
|
||||
OOB_MODE INVALID
|
||||
DST_INVALID 0xFFFE
|
||||
DST_UNIT_BITS 16
|
||||
|
||||
BEGIN_MAP
|
||||
## JIS X 0213:2004 vs Unicode mapping table
|
||||
##
|
||||
## Date: 22 May 2006
|
||||
## License:
|
||||
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
|
||||
## Copyright (C) 2001 I'O, All Rights Reserved.
|
||||
## Copyright (C) 2006 Project X0213, All Rights Reserved.
|
||||
## You can use, modify, distribute this table freely.
|
||||
## Note:
|
||||
## 3-XXXX JIS X 0213:2004 plane 1 (GL encoding)
|
||||
## 4-XXXX JIS X 0213:2000 plane 2 (GL encoding)
|
||||
## [1983] JIS codepoint defined by JIS X 0208-1983
|
||||
## [1990] JIS codepoint defined by JIS X 0208-1990
|
||||
## [2000] JIS codepoint defined by JIS X 0213:2000
|
||||
## [2004] JIS codepoint defined by JIS X 0213:2004
|
||||
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
|
||||
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
|
||||
## Fullwidth UCS fullwidth form (U+Fxxx)
|
||||
## Windows Windows (CP932) mapping
|
||||
## Some 0213 character can't represent by one UCS character.
|
||||
## In this table, such characters are described as 'U+xxxx+xxxx'.
|
||||
##
|
||||
## JIS Unicode Name Note
|
||||
0x2121 = 0x0089
|
||||
0x212B = 0x00A2
|
||||
0x212E = 0x00A4
|
||||
0x2136 = 0x01A2
|
||||
0x2146 = 0x0213
|
||||
0x2170 = 0x032B
|
||||
0x2177 = 0x0381
|
||||
0x2179 = 0x0371
|
||||
0x2322 = 0x03F9
|
||||
0x2325 = 0x044A
|
||||
0x2327 = 0x0509
|
||||
0x2331 = 0x05D6
|
||||
0x2332 = 0x0628
|
||||
0x2338 = 0x074F
|
||||
0x233F = 0x0807
|
||||
0x2341 = 0x083A
|
||||
0x234A = 0x08B9
|
||||
0x2352 = 0x097C
|
||||
0x2353 = 0x099D
|
||||
0x2359 = 0x0AD3
|
||||
0x235C = 0x0B1D
|
||||
0x2377 = 0x0D45
|
||||
0x242A = 0x0DE1
|
||||
0x2431 = 0x0E95
|
||||
0x2432 = 0x0E6D
|
||||
0x243A = 0x0E64
|
||||
0x243D = 0x0F5F
|
||||
0x2459 = 0x1201
|
||||
0x245C = 0x1255
|
||||
0x245E = 0x127B
|
||||
0x2463 = 0x1274
|
||||
0x246A = 0x12E4
|
||||
0x246B = 0x12D7
|
||||
0x2472 = 0x12FD
|
||||
0x2474 = 0x1336
|
||||
0x2475 = 0x1344
|
||||
0x2525 = 0x13C4
|
||||
0x2532 = 0x146D
|
||||
0x253E = 0x15D7
|
||||
0x2544 = 0x6C29
|
||||
0x2547 = 0x1647
|
||||
0x2555 = 0x1706
|
||||
0x2556 = 0x1742
|
||||
0x257E = 0x19C3
|
||||
0x2830 = 0x1C56
|
||||
0x2837 = 0x1D2D
|
||||
0x2838 = 0x1D45
|
||||
0x283A = 0x1D78
|
||||
0x283B = 0x1D62
|
||||
0x283F = 0x1DA1
|
||||
0x2840 = 0x1D9C
|
||||
0x2845 = 0x1D92
|
||||
0x2848 = 0x1DB7
|
||||
0x284A = 0x1DE0
|
||||
0x284B = 0x1E33
|
||||
0x285B = 0x1F1E
|
||||
0x2866 = 0x1F76
|
||||
0x286C = 0x1FFA
|
||||
0x2C22 = 0x217B
|
||||
0x2C2B = 0x231E
|
||||
0x2C30 = 0x23AD
|
||||
0x2C50 = 0x26F3
|
||||
0x2C65 = 0x285B
|
||||
0x2C6D = 0x28AB
|
||||
0x2C72 = 0x298F
|
||||
0x2D24 = 0x2AB8
|
||||
0x2D29 = 0x2B4F
|
||||
0x2D2A = 0x2B50
|
||||
0x2D32 = 0x2B46
|
||||
0x2D34 = 0x2C1D
|
||||
0x2D35 = 0x2BA6
|
||||
0x2D39 = 0x2C24
|
||||
0x2D56 = 0x2DE1
|
||||
0x2D7D = 0x31C3
|
||||
0x2E23 = 0x31F5
|
||||
0x2E24 = 0x31B6
|
||||
0x2E3A = 0x3372
|
||||
0x2E3C = 0x33D3
|
||||
0x2E3D = 0x33D2
|
||||
0x2E42 = 0x33D0
|
||||
0x2E43 = 0x33E4
|
||||
0x2E44 = 0x33D5
|
||||
0x2E47 = 0x33DA
|
||||
0x2E49 = 0x33DF
|
||||
0x2E55 = 0x344A
|
||||
0x2E56 = 0x3451
|
||||
0x2E57 = 0x344B
|
||||
0x2E5B = 0x3465
|
||||
0x2E77 = 0x34E4
|
||||
0x2E78 = 0x355A
|
||||
0x2F2A = 0x3594
|
||||
0x2F3F = 0x3639
|
||||
0x2F40 = 0x3647
|
||||
0x2F42 = 0x3638
|
||||
0x2F43 = 0x363A
|
||||
0x2F4E = 0x371C
|
||||
0x2F59 = 0x370C
|
||||
0x2F61 = 0x3764
|
||||
0x2F69 = 0x37FF
|
||||
0x2F6A = 0x37E7
|
||||
0x2F70 = 0x3824
|
||||
0x2F75 = 0x383D
|
||||
0x6E23 = 0x3A98
|
||||
0x6E34 = 0x3C7F
|
||||
0x6E49 = 0x3D00
|
||||
0x6E5C = 0x3D40
|
||||
0x6E5E = 0x3DFA
|
||||
0x6E5F = 0x3DF9
|
||||
0x6E60 = 0x3DD3
|
||||
0x6F32 = 0x3F7E
|
||||
0x6F47 = 0x4096
|
||||
0x6F4D = 0x4103
|
||||
0x6F61 = 0x41C6
|
||||
0x6F64 = 0x41FE
|
||||
0x7022 = 0x43BC
|
||||
0x7033 = 0x4629
|
||||
0x7039 = 0x46A5
|
||||
0x7053 = 0x4896
|
||||
0x707B = 0x4A4D
|
||||
0x712E = 0x4B56
|
||||
0x7130 = 0x4B6F
|
||||
0x7135 = 0x4C16
|
||||
0x7144 = 0x4D14
|
||||
0x715D = 0x4E0E
|
||||
0x7161 = 0x4E37
|
||||
0x7166 = 0x4E6A
|
||||
0x7169 = 0x4E8B
|
||||
0x7175 = 0x504A
|
||||
0x7177 = 0x5055
|
||||
0x717A = 0x5122
|
||||
0x7221 = 0x51A9
|
||||
0x7223 = 0x51E5
|
||||
0x7224 = 0x51CD
|
||||
0x7228 = 0x521E
|
||||
0x722C = 0x524C
|
||||
0x723D = 0x542E
|
||||
0x7248 = 0x54D9
|
||||
0x725B = 0x55A7
|
||||
0x7275 = 0x57A9
|
||||
0x7276 = 0x57B4
|
||||
0x7332 = 0x59D4
|
||||
0x733D = 0x5AE4
|
||||
0x733E = 0x5AE3
|
||||
0x7340 = 0x5AF1
|
||||
0x7352 = 0x5BB2
|
||||
0x735D = 0x5C4B
|
||||
0x735E = 0x5C64
|
||||
0x7373 = 0x5E2E
|
||||
0x7374 = 0x5E56
|
||||
0x7375 = 0x5E65
|
||||
0x7377 = 0x5E62
|
||||
0x737B = 0x5ED8
|
||||
0x737D = 0x5EC2
|
||||
0x7422 = 0x5EE8
|
||||
0x7424 = 0x5F23
|
||||
0x7427 = 0x5F5C
|
||||
0x742E = 0x5FE0
|
||||
0x742F = 0x5FD4
|
||||
0x7434 = 0x600C
|
||||
0x7435 = 0x5FFB
|
||||
0x743D = 0x6017
|
||||
0x7442 = 0x6060
|
||||
0x744F = 0x60ED
|
||||
0x7469 = 0x6270
|
||||
0x746B = 0x6286
|
||||
0x7472 = 0x634C
|
||||
0x7475 = 0x3D0E
|
||||
0x7479 = 0x6402
|
||||
0x7535 = 0x667E
|
||||
0x753A = 0x66B0
|
||||
0x7546 = 0x671D
|
||||
0x7556 = 0x68DD
|
||||
0x7558 = 0x68EA
|
||||
0x755A = 0x6951
|
||||
0x755D = 0x696F
|
||||
0x755F = 0x69DD
|
||||
0x7563 = 0x6A1E
|
||||
0x756A = 0x6A58
|
||||
0x7570 = 0x6A8C
|
||||
0x7573 = 0x6AB7
|
||||
0x7644 = 0x6C73
|
||||
0x764E = 0x6CDD
|
||||
0x765D = 0x6E65
|
||||
0x7675 = 0x6F94
|
||||
0x767E = 0x6FF8
|
||||
0x7721 = 0x6FF6
|
||||
0x7722 = 0x6FF7
|
||||
0x7733 = 0x710D
|
||||
0x7736 = 0x7139
|
||||
0x7764 = 0x73DB
|
||||
0x7765 = 0x73DA
|
||||
0x776B = 0x73FE
|
||||
0x776E = 0x7410
|
||||
0x7773 = 0x7449
|
||||
0x7829 = 0x7615
|
||||
0x782A = 0x7614
|
||||
0x782C = 0x7631
|
||||
0x7834 = 0x7693
|
||||
0x783C = 0x770E
|
||||
0x783E = 0x7723
|
||||
0x7842 = 0x7752
|
||||
0x7856 = 0x7985
|
||||
0x7863 = 0x7A84
|
||||
0x7877 = 0x7BB3
|
||||
0x7879 = 0x7BBE
|
||||
0x787A = 0x7BC7
|
||||
0x7925 = 0x7CB8
|
||||
0x792F = 0x7DA0
|
||||
0x7932 = 0x7E10
|
||||
0x7939 = 0x7FB7
|
||||
0x7942 = 0x808A
|
||||
0x7948 = 0x80BB
|
||||
0x7959 = 0x8282
|
||||
0x795E = 0x82F3
|
||||
0x7966 = 0x840C
|
||||
0x796B = 0x8455
|
||||
0x797A = 0x856B
|
||||
0x797E = 0x85C8
|
||||
0x7A21 = 0x85C9
|
||||
0x7A2C = 0x86D7
|
||||
0x7A2F = 0x86FA
|
||||
0x7A4F = 0x8949
|
||||
0x7A50 = 0x8946
|
||||
0x7A57 = 0x896B
|
||||
0x7A65 = 0x8987
|
||||
0x7A66 = 0x8988
|
||||
0x7A71 = 0x89BA
|
||||
0x7A72 = 0x89BB
|
||||
0x7A7E = 0x8A1E
|
||||
0x7B21 = 0x8A29
|
||||
0x7B2C = 0x8A71
|
||||
0x7B2D = 0x8A43
|
||||
0x7B36 = 0x8A99
|
||||
0x7B37 = 0x8ACD
|
||||
0x7B3D = 0x8AE4
|
||||
0x7B3E = 0x8ADD
|
||||
0x7B4E = 0x8BC1
|
||||
0x7B4F = 0x8BEF
|
||||
0x7B57 = 0x8D10
|
||||
0x7B5A = 0x8D71
|
||||
0x7B5C = 0x8DFB
|
||||
0x7B5D = 0x8E1F
|
||||
0x7B61 = 0x8E36
|
||||
0x7B65 = 0x8E89
|
||||
0x7B67 = 0x8EEB
|
||||
0x7B69 = 0x8F32
|
||||
0x7B71 = 0x8FF8
|
||||
0x7C22 = 0x92A0
|
||||
0x7C23 = 0x92B1
|
||||
0x7C38 = 0x9490
|
||||
0x7C42 = 0x95CF
|
||||
0x7C4C = 0x967F
|
||||
0x7C56 = 0x96F0
|
||||
0x7C59 = 0x9719
|
||||
0x7C5D = 0x9750
|
||||
0x7C76 = 0x98C6
|
||||
0x7D2C = 0x9A72
|
||||
0x7D4B = 0x9DDB
|
||||
0x7D4C = 0x9E3D
|
||||
0x7D59 = 0x9E15
|
||||
0x7D5B = 0x9E8A
|
||||
0x7D5D = 0x9E49
|
||||
0x7D67 = 0x9EC4
|
||||
0x7D6D = 0x9EE9
|
||||
0x7D70 = 0x9EDB
|
||||
0x7E25 = 0x9FCE
|
||||
0x7E29 = 0xA02F
|
||||
0x7E2B = 0xA01A
|
||||
0x7E32 = 0xA0F9
|
||||
0x7E35 = 0xA082
|
||||
0x7E53 = 0x2218
|
||||
0x7E58 = 0xA38C
|
||||
0x7E5A = 0xA437
|
||||
0x7E6E = 0xA5F1
|
||||
0x7E70 = 0xA602
|
||||
0x7E72 = 0xA61A
|
||||
0x7E76 = 0xA6B2
|
||||
END_MAP
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,56 @@
|
|||
# $NetBSD: UCS@SIP%JISX0213-1.src,v 1.1 2007/03/05 16:58:34 tnozaki Exp $
|
||||
|
||||
TYPE ROWCOL
|
||||
NAME "UCS:SIP/JISX0213-1"
|
||||
SRC_ZONE 0x000B - 0xA190
|
||||
OOB_MODE INVALID
|
||||
DST_INVALID 0xFFFF
|
||||
DST_UNIT_BITS 16
|
||||
|
||||
BEGIN_MAP
|
||||
## Shift_JIS-2004 (JIS X 0213:2004) vs Unicode mapping table
|
||||
##
|
||||
## Date: 12 Feb 2005 10:15:00 GMT
|
||||
## License:
|
||||
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
|
||||
## Copyright (C) 2001 I'O, All Rights Reserved.
|
||||
## You can use, modify, distribute this table freely.
|
||||
## Note:
|
||||
## [1983] JIS codepoint defined by JIS X 0208-1983
|
||||
## [1990] JIS codepoint defined by JIS X 0208-1990
|
||||
## [2000] JIS codepoint defined by JIS X 0213:2000
|
||||
## [2004] JIS codepoint defined by JIS X 0213:2004
|
||||
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
|
||||
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
|
||||
## Fullwidth UCS fullwidth form (U+Fxxx)
|
||||
## Windows Windows (CP932) mapping
|
||||
## Some 0213 character can't represent by one UCS character.
|
||||
## In this table, such characters are described as 'U+xxxx+xxxx'.
|
||||
##
|
||||
0x000B = 0x2E22
|
||||
0x0B9F = 0x4F54
|
||||
0x123D = 0x2F42
|
||||
0x131B = 0x2F4C
|
||||
0x146E = 0x2F60
|
||||
0x16B4 = 0x4F63
|
||||
0x18BD = 0x2F7B
|
||||
0x1E34 = 0x4F6E
|
||||
0x31C4 = 0x753A
|
||||
0x35C4 = 0x7572
|
||||
0x373F = 0x7629
|
||||
0x3763 = 0x7632
|
||||
0x3CFE = 0x7660
|
||||
0x47F1 = 0x776C
|
||||
0x548E = 0x787E
|
||||
0x550E = 0x7929
|
||||
0x5771 = 0x7947
|
||||
0x59C4 = 0x7954
|
||||
0x5DA1 = 0x796E
|
||||
0x6AFF = 0x7A5D
|
||||
0x6E40 = 0x7B33
|
||||
0x70F4 = 0x7B49
|
||||
0x7684 = 0x7B6C
|
||||
0x8277 = 0x7C49
|
||||
0x83CD = 0x7C51
|
||||
0xA190 = 0x7E66
|
||||
END_MAP
|
|
@ -0,0 +1,307 @@
|
|||
# $NetBSD: UCS@SIP%JISX0213-2.src,v 1.1 2007/03/05 16:58:34 tnozaki Exp $
|
||||
|
||||
TYPE ROWCOL
|
||||
NAME "UCS:SIP/JISX0213-2"
|
||||
SRC_ZONE 0x0089 - 0xA6B2
|
||||
OOB_MODE INVALID
|
||||
DST_INVALID 0xFFFF
|
||||
DST_UNIT_BITS 16
|
||||
|
||||
BEGIN_MAP
|
||||
## Shift_JIS-2004 (JIS X 0213:2004) vs Unicode mapping table
|
||||
##
|
||||
## Date: 12 Feb 2005 10:15:00 GMT
|
||||
## License:
|
||||
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
|
||||
## Copyright (C) 2001 I'O, All Rights Reserved.
|
||||
## You can use, modify, distribute this table freely.
|
||||
## Note:
|
||||
## [1983] JIS codepoint defined by JIS X 0208-1983
|
||||
## [1990] JIS codepoint defined by JIS X 0208-1990
|
||||
## [2000] JIS codepoint defined by JIS X 0213:2000
|
||||
## [2004] JIS codepoint defined by JIS X 0213:2004
|
||||
## [Unicode3.1] UCS codepoint defined by Unicode 3.1
|
||||
## [Unicode3.2] UCS codepoint defined by Unicode 3.2
|
||||
## Fullwidth UCS fullwidth form (U+Fxxx)
|
||||
## Windows Windows (CP932) mapping
|
||||
## Some 0213 character can't represent by one UCS character.
|
||||
## In this table, such characters are described as 'U+xxxx+xxxx'.
|
||||
##
|
||||
0x0089 = 0x2121
|
||||
0x00A2 = 0x212B
|
||||
0x00A4 = 0x212E
|
||||
0x01A2 = 0x2136
|
||||
0x0213 = 0x2146
|
||||
0x032B = 0x2170
|
||||
0x0371 = 0x2179
|
||||
0x0381 = 0x2177
|
||||
0x03F9 = 0x2322
|
||||
0x044A = 0x2325
|
||||
0x0509 = 0x2327
|
||||
0x05D6 = 0x2331
|
||||
0x0628 = 0x2332
|
||||
0x074F = 0x2338
|
||||
0x0807 = 0x233F
|
||||
0x083A = 0x2341
|
||||
0x08B9 = 0x234A
|
||||
0x097C = 0x2352
|
||||
0x099D = 0x2353
|
||||
0x0AD3 = 0x2359
|
||||
0x0B1D = 0x235C
|
||||
0x0D45 = 0x2377
|
||||
0x0DE1 = 0x242A
|
||||
0x0E64 = 0x243A
|
||||
0x0E6D = 0x2432
|
||||
0x0E95 = 0x2431
|
||||
0x0F5F = 0x243D
|
||||
0x1201 = 0x2459
|
||||
0x1255 = 0x245C
|
||||
0x1274 = 0x2463
|
||||
0x127B = 0x245E
|
||||
0x12D7 = 0x246B
|
||||
0x12E4 = 0x246A
|
||||
0x12FD = 0x2472
|
||||
0x1336 = 0x2474
|
||||
0x1344 = 0x2475
|
||||
0x13C4 = 0x2525
|
||||
0x146D = 0x2532
|
||||
0x15D7 = 0x253E
|
||||
0x1647 = 0x2547
|
||||
0x1706 = 0x2555
|
||||
0x1742 = 0x2556
|
||||
0x19C3 = 0x257E
|
||||
0x1C56 = 0x2830
|
||||
0x1D2D = 0x2837
|
||||
0x1D45 = 0x2838
|
||||
0x1D62 = 0x283B
|
||||
0x1D78 = 0x283A
|
||||
0x1D92 = 0x2845
|
||||
0x1D9C = 0x2840
|
||||
0x1DA1 = 0x283F
|
||||
0x1DB7 = 0x2848
|
||||
0x1DE0 = 0x284A
|
||||
0x1E33 = 0x284B
|
||||
0x1F1E = 0x285B
|
||||
0x1F76 = 0x2866
|
||||
0x1FFA = 0x286C
|
||||
0x217B = 0x2C22
|
||||
0x2218 = 0x7E53
|
||||
0x231E = 0x2C2B
|
||||
0x23AD = 0x2C30
|
||||
0x26F3 = 0x2C50
|
||||
0x285B = 0x2C65
|
||||
0x28AB = 0x2C6D
|
||||
0x298F = 0x2C72
|
||||
0x2AB8 = 0x2D24
|
||||
0x2B46 = 0x2D32
|
||||
0x2B4F = 0x2D29
|
||||
0x2B50 = 0x2D2A
|
||||
0x2BA6 = 0x2D35
|
||||
0x2C1D = 0x2D34
|
||||
0x2C24 = 0x2D39
|
||||
0x2DE1 = 0x2D56
|
||||
0x31B6 = 0x2E24
|
||||
0x31C3 = 0x2D7D
|
||||
0x31F5 = 0x2E23
|
||||
0x3372 = 0x2E3A
|
||||
0x33D0 = 0x2E42
|
||||
0x33D2 = 0x2E3D
|
||||
0x33D3 = 0x2E3C
|
||||
0x33D5 = 0x2E44
|
||||
0x33DA = 0x2E47
|
||||
0x33DF = 0x2E49
|
||||
0x33E4 = 0x2E43
|
||||
0x344A = 0x2E55
|
||||
0x344B = 0x2E57
|
||||
0x3451 = 0x2E56
|
||||
0x3465 = 0x2E5B
|
||||
0x34E4 = 0x2E77
|
||||
0x355A = 0x2E78
|
||||
0x3594 = 0x2F2A
|
||||
0x3638 = 0x2F42
|
||||
0x3639 = 0x2F3F
|
||||
0x363A = 0x2F43
|
||||
0x3647 = 0x2F40
|
||||
0x370C = 0x2F59
|
||||
0x371C = 0x2F4E
|
||||
0x3764 = 0x2F61
|
||||
0x37E7 = 0x2F6A
|
||||
0x37FF = 0x2F69
|
||||
0x3824 = 0x2F70
|
||||
0x383D = 0x2F75
|
||||
0x3A98 = 0x6E23
|
||||
0x3C7F = 0x6E34
|
||||
0x3D00 = 0x6E49
|
||||
0x3D0E = 0x7475
|
||||
0x3D40 = 0x6E5C
|
||||
0x3DD3 = 0x6E60
|
||||
0x3DF9 = 0x6E5F
|
||||
0x3DFA = 0x6E5E
|
||||
0x3F7E = 0x6F32
|
||||
0x4096 = 0x6F47
|
||||
0x4103 = 0x6F4D
|
||||
0x41C6 = 0x6F61
|
||||
0x41FE = 0x6F64
|
||||
0x43BC = 0x7022
|
||||
0x4629 = 0x7033
|
||||
0x46A5 = 0x7039
|
||||
0x4896 = 0x7053
|
||||
0x4A4D = 0x707B
|
||||
0x4B56 = 0x712E
|
||||
0x4B6F = 0x7130
|
||||
0x4C16 = 0x7135
|
||||
0x4D14 = 0x7144
|
||||
0x4E0E = 0x715D
|
||||
0x4E37 = 0x7161
|
||||
0x4E6A = 0x7166
|
||||
0x4E8B = 0x7169
|
||||
0x504A = 0x7175
|
||||
0x5055 = 0x7177
|
||||
0x5122 = 0x717A
|
||||
0x51A9 = 0x7221
|
||||
0x51CD = 0x7224
|
||||
0x51E5 = 0x7223
|
||||
0x521E = 0x7228
|
||||
0x524C = 0x722C
|
||||
0x542E = 0x723D
|
||||
0x54D9 = 0x7248
|
||||
0x55A7 = 0x725B
|
||||
0x57A9 = 0x7275
|
||||
0x57B4 = 0x7276
|
||||
0x59D4 = 0x7332
|
||||
0x5AE3 = 0x733E
|
||||
0x5AE4 = 0x733D
|
||||
0x5AF1 = 0x7340
|
||||
0x5BB2 = 0x7352
|
||||
0x5C4B = 0x735D
|
||||
0x5C64 = 0x735E
|
||||
0x5E2E = 0x7373
|
||||
0x5E56 = 0x7374
|
||||
0x5E62 = 0x7377
|
||||
0x5E65 = 0x7375
|
||||
0x5EC2 = 0x737D
|
||||
0x5ED8 = 0x737B
|
||||
0x5EE8 = 0x7422
|
||||
0x5F23 = 0x7424
|
||||
0x5F5C = 0x7427
|
||||
0x5FD4 = 0x742F
|
||||
0x5FE0 = 0x742E
|
||||
0x5FFB = 0x7435
|
||||
0x600C = 0x7434
|
||||
0x6017 = 0x743D
|
||||
0x6060 = 0x7442
|
||||
0x60ED = 0x744F
|
||||
0x6270 = 0x7469
|
||||
0x6286 = 0x746B
|
||||
0x634C = 0x7472
|
||||
0x6402 = 0x7479
|
||||
0x667E = 0x7535
|
||||
0x66B0 = 0x753A
|
||||
0x671D = 0x7546
|
||||
0x68DD = 0x7556
|
||||
0x68EA = 0x7558
|
||||
0x6951 = 0x755A
|
||||
0x696F = 0x755D
|
||||
0x69DD = 0x755F
|
||||
0x6A1E = 0x7563
|
||||
0x6A58 = 0x756A
|
||||
0x6A8C = 0x7570
|
||||
0x6AB7 = 0x7573
|
||||
0x6C29 = 0x2544
|
||||
0x6C73 = 0x7644
|
||||
0x6CDD = 0x764E
|
||||
0x6E65 = 0x765D
|
||||
0x6F94 = 0x7675
|
||||
0x6FF6 = 0x7721
|
||||
0x6FF7 = 0x7722
|
||||
0x6FF8 = 0x767E
|
||||
0x710D = 0x7733
|
||||
0x7139 = 0x7736
|
||||
0x73DA = 0x7765
|
||||
0x73DB = 0x7764
|
||||
0x73FE = 0x776B
|
||||
0x7410 = 0x776E
|
||||
0x7449 = 0x7773
|
||||
0x7614 = 0x782A
|
||||
0x7615 = 0x7829
|
||||
0x7631 = 0x782C
|
||||
0x7693 = 0x7834
|
||||
0x770E = 0x783C
|
||||
0x7723 = 0x783E
|
||||
0x7752 = 0x7842
|
||||
0x7985 = 0x7856
|
||||
0x7A84 = 0x7863
|
||||
0x7BB3 = 0x7877
|
||||
0x7BBE = 0x7879
|
||||
0x7BC7 = 0x787A
|
||||
0x7CB8 = 0x7925
|
||||
0x7DA0 = 0x792F
|
||||
0x7E10 = 0x7932
|
||||
0x7FB7 = 0x7939
|
||||
0x808A = 0x7942
|
||||
0x80BB = 0x7948
|
||||
0x8282 = 0x7959
|
||||
0x82F3 = 0x795E
|
||||
0x840C = 0x7966
|
||||
0x8455 = 0x796B
|
||||
0x856B = 0x797A
|
||||
0x85C8 = 0x797E
|
||||
0x85C9 = 0x7A21
|
||||
0x86D7 = 0x7A2C
|
||||
0x86FA = 0x7A2F
|
||||
0x8946 = 0x7A50
|
||||
0x8949 = 0x7A4F
|
||||
0x896B = 0x7A57
|
||||
0x8987 = 0x7A65
|
||||
0x8988 = 0x7A66
|
||||
0x89BA = 0x7A71
|
||||
0x89BB = 0x7A72
|
||||
0x8A1E = 0x7A7E
|
||||
0x8A29 = 0x7B21
|
||||
0x8A43 = 0x7B2D
|
||||
0x8A71 = 0x7B2C
|
||||
0x8A99 = 0x7B36
|
||||
0x8ACD = 0x7B37
|
||||
0x8ADD = 0x7B3E
|
||||
0x8AE4 = 0x7B3D
|
||||
0x8BC1 = 0x7B4E
|
||||
0x8BEF = 0x7B4F
|
||||
0x8D10 = 0x7B57
|
||||
0x8D71 = 0x7B5A
|
||||
0x8DFB = 0x7B5C
|
||||
0x8E1F = 0x7B5D
|
||||
0x8E36 = 0x7B61
|
||||
0x8E89 = 0x7B65
|
||||
0x8EEB = 0x7B67
|
||||
0x8F32 = 0x7B69
|
||||
0x8FF8 = 0x7B71
|
||||
0x92A0 = 0x7C22
|
||||
0x92B1 = 0x7C23
|
||||
0x9490 = 0x7C38
|
||||
0x95CF = 0x7C42
|
||||
0x967F = 0x7C4C
|
||||
0x96F0 = 0x7C56
|
||||
0x9719 = 0x7C59
|
||||
0x9750 = 0x7C5D
|
||||
0x98C6 = 0x7C76
|
||||
0x9A72 = 0x7D2C
|
||||
0x9DDB = 0x7D4B
|
||||
0x9E15 = 0x7D59
|
||||
0x9E3D = 0x7D4C
|
||||
0x9E49 = 0x7D5D
|
||||
0x9E8A = 0x7D5B
|
||||
0x9EC4 = 0x7D67
|
||||
0x9EDB = 0x7D70
|
||||
0x9EE9 = 0x7D6D
|
||||
0x9FCE = 0x7E25
|
||||
0xA01A = 0x7E2B
|
||||
0xA02F = 0x7E29
|
||||
0xA082 = 0x7E35
|
||||
0xA0F9 = 0x7E32
|
||||
0xA38C = 0x7E58
|
||||
0xA437 = 0x7E5A
|
||||
0xA5F1 = 0x7E6E
|
||||
0xA602 = 0x7E70
|
||||
0xA61A = 0x7E72
|
||||
0xA6B2 = 0x7E76
|
||||
END_MAP
|
|
@ -18,6 +18,12 @@
|
|||
citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,JISX0208:MS/UCS") +
|
||||
citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS")
|
||||
|
||||
transcode_tblgen "EUC-JP-2004", "UTF-8",
|
||||
[["{00-7f}", :nomap]] +
|
||||
citrus_decode_mapsrc("euc", 0x8080, "JISX0208:1990/UCS,JISX0213-1/UCS@BMP,JISX0213-1/UCS@SIP") +
|
||||
citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS") +
|
||||
citrus_decode_mapsrc("euc", 0x8000, "JISX0213-2/UCS@BMP,JISX0213-2/UCS@SIP")
|
||||
|
||||
|
||||
transcode_tblgen "UTF-8", "EUC-JP",
|
||||
[["{00-7f}", :nomap]] +
|
||||
|
@ -35,6 +41,12 @@
|
|||
[["{00-7f}", :nomap]] +
|
||||
citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:MS,UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM") +
|
||||
citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA")
|
||||
|
||||
transcode_tblgen "UTF-8", "EUC-JP-2004",
|
||||
[["{00-7f}", :nomap]] +
|
||||
citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:1990,UCS@BMP/JISX0213-1,UCS@SIP/JISX0213-1") +
|
||||
citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA") +
|
||||
citrus_decode_mapsrc("euc", 0x8000, "UCS@BMP/JISX0213-2,UCS@SIP/JISX0213-2")
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
|
|
@ -65,8 +65,11 @@ class TestTranscode < Test::Unit::TestCase
|
|||
"\x82\xdc\x82\xc2\x82\xe0\x82\xc6 \x82\xe4\x82\xab\x82\xd0\x82\xeb", 'shift_jis') # まつもと ゆきひろ
|
||||
check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D",
|
||||
"\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jp')
|
||||
check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D",
|
||||
"\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jp-2004')
|
||||
check_both_ways("\u677E\u672C\u884C\u5F18", "\x8f\xbc\x96\x7b\x8d\x73\x8d\x4f", 'shift_jis') # 松本行弘
|
||||
check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jp')
|
||||
check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jp-2004')
|
||||
check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-1') # Dürst
|
||||
check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-2')
|
||||
check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-3')
|
||||
|
@ -83,6 +86,7 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u0643\u062A\u0628", "\xE3\xCA\xC8", 'iso-8859-6') # كتب
|
||||
check_both_ways("\u65E5\u8A18", "\x93\xFA\x8BL", 'shift_jis') # 日記
|
||||
check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jp')
|
||||
check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jp-2004')
|
||||
check_both_ways("\uC560\uC778\uAD6C\uD568\u0020\u6734\uC9C0\uC778",
|
||||
"\xBE\xD6\xC0\xCE\xB1\xB8\xC7\xD4\x20\xDA\xD3\xC1\xF6\xC0\xCE", 'euc-kr') # 애인구함 朴지인
|
||||
check_both_ways("\uC544\uD58F\uD58F\u0020\uB620\uBC29\uD6BD\uB2D8\u0020\uC0AC\uB791\uD716",
|
||||
|
@ -1154,10 +1158,16 @@ class TestTranscode < Test::Unit::TestCase
|
|||
|
||||
assert_equal("\uFFFD!",
|
||||
"\xff!".encode("utf-8", "euc-jp", :invalid=>:replace))
|
||||
assert_equal("\uFFFD!",
|
||||
"\xff!".encode("utf-8", "euc-jp-2004", :invalid=>:replace))
|
||||
assert_equal("\uFFFD!",
|
||||
"\xa1!".encode("utf-8", "euc-jp", :invalid=>:replace))
|
||||
assert_equal("\uFFFD!",
|
||||
"\xa1!".encode("utf-8", "euc-jp-2004", :invalid=>:replace))
|
||||
assert_equal("\uFFFD!",
|
||||
"\x8f\xa1!".encode("utf-8", "euc-jp", :invalid=>:replace))
|
||||
assert_equal("\uFFFD!",
|
||||
"\x8f\xa1!".encode("utf-8", "euc-jp-2004", :invalid=>:replace))
|
||||
|
||||
assert_equal("?",
|
||||
"\xdc\x00".encode("EUC-JP", "UTF-16BE", :invalid=>:replace), "[ruby-dev:35776]")
|
||||
|
@ -1174,6 +1184,7 @@ class TestTranscode < Test::Unit::TestCase
|
|||
|
||||
def test_invalid_replace_string
|
||||
assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jp", :invalid=>:replace, :replace=>"<x>"))
|
||||
assert_equal("a<x>A", "a\x80A".encode("us-ascii", "euc-jp-2004", :invalid=>:replace, :replace=>"<x>"))
|
||||
end
|
||||
|
||||
def test_undef_replace
|
||||
|
@ -1288,6 +1299,64 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xBF\xC0\xCE\xD3\xB5\xC1\xC7\xEE", 'euc-jp') # 神林義博
|
||||
end
|
||||
|
||||
def test_euc_jp_2004
|
||||
check_both_ways("\u3000", "\xA1\xA1", 'euc-jp-2004') # full-width space
|
||||
check_both_ways("\u00D7", "\xA1\xDF", 'euc-jp-2004') # ×
|
||||
check_both_ways("\u00F7", "\xA1\xE0", 'euc-jp-2004') # ÷
|
||||
check_both_ways("\u25C7", "\xA1\xFE", 'euc-jp-2004') # ◇
|
||||
check_both_ways("\u25C6", "\xA2\xA1", 'euc-jp-2004') # ◆
|
||||
check_both_ways("\uFF07", "\xA2\xAF", 'euc-jp-2004') # '
|
||||
check_both_ways("\u309F", "\xA2\xB9", 'euc-jp-2004') # ゟ
|
||||
check_both_ways("\u2284", "\xA2\xC2", 'euc-jp-2004') # ⊄
|
||||
check_both_ways("\u2306", "\xA2\xC9", 'euc-jp-2004') # ⌆
|
||||
check_both_ways("\u2295", "\xA2\xD1", 'euc-jp-2004') # ⊕
|
||||
check_both_ways("\u3017", "\xA2\xDB", 'euc-jp-2004') # 〗
|
||||
check_both_ways("\u2262", "\xA2\xEB", 'euc-jp-2004') # ≢
|
||||
check_both_ways("\u2194", "\xA2\xF1", 'euc-jp-2004') # ↔
|
||||
check_both_ways("\u266E", "\xA2\xFA", 'euc-jp-2004') # ♮
|
||||
check_both_ways("\u2669", "\xA2\xFD", 'euc-jp-2004') # ♩
|
||||
check_both_ways("\u25EF", "\xA2\xFE", 'euc-jp-2004') # ◯
|
||||
check_both_ways("\u2935", "\xA3\xAF", 'euc-jp-2004') # ⤵
|
||||
check_both_ways("\u29BF", "\xA3\xBA", 'euc-jp-2004') # ⦿
|
||||
check_both_ways("\u2022", "\xA3\xC0", 'euc-jp-2004') # •
|
||||
check_both_ways("\u2213", "\xA3\xDB", 'euc-jp-2004') # ∓
|
||||
check_both_ways("\u2127", "\xA3\xE0", 'euc-jp-2004') # ℧
|
||||
check_both_ways("\u30A0", "\xA3\xFB", 'euc-jp-2004') # ゠
|
||||
check_both_ways("\uFF54", "\xA3\xF4", 'euc-jp-2004') # t
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\xA5\xF7".encode("utf-8", 'euc-jp-2004') }
|
||||
check_both_ways("\u2664", "\xA6\xB9", 'euc-jp-2004') # ♤
|
||||
check_both_ways("\u2663", "\xA6\xC0", 'euc-jp-2004') # ♣
|
||||
check_both_ways("\u03C2", "\xA6\xD9", 'euc-jp-2004') # ς
|
||||
check_both_ways("\u23BE", "\xA7\xC2", 'euc-jp-2004') # ⎾
|
||||
check_both_ways("\u23CC", "\xA7\xD0", 'euc-jp-2004') # ⏌
|
||||
check_both_ways("\u30F7", "\xA7\xF2", 'euc-jp-2004') # ヷ
|
||||
check_both_ways("\u3251", "\xA8\xC1", 'euc-jp-2004') # ㉑
|
||||
check_both_ways("\u{20B9F}", "\xCF\xD4", 'euc-jp-2004') # 𠮑
|
||||
check_both_ways("\u541E", "\xCF\xFE", 'euc-jp-2004') # 吞
|
||||
check_both_ways("\u6A97", "\xDD\xA1", 'euc-jp-2004') # 檗
|
||||
check_both_ways("\u6BEF", "\xDD\xDF", 'euc-jp-2004') # 毯
|
||||
check_both_ways("\u9EBE", "\xDD\xE0", 'euc-jp-2004') # 麾
|
||||
check_both_ways("\u6CBE", "\xDD\xFE", 'euc-jp-2004') # 沾
|
||||
check_both_ways("\u6CBA", "\xDE\xA1", 'euc-jp-2004') # 沺
|
||||
check_both_ways("\u6ECC", "\xDE\xFE", 'euc-jp-2004') # 滌
|
||||
check_both_ways("\u6F3E", "\xDF\xA1", 'euc-jp-2004') # 漾
|
||||
check_both_ways("\u70DD", "\xDF\xDF", 'euc-jp-2004') # 烝
|
||||
check_both_ways("\u70D9", "\xDF\xE0", 'euc-jp-2004') # 烙
|
||||
check_both_ways("\u71FC", "\xDF\xFE", 'euc-jp-2004') # 燼
|
||||
check_both_ways("\u71F9", "\xE0\xA1", 'euc-jp-2004') # 燹
|
||||
check_both_ways("\u73F1", "\xE0\xFE", 'euc-jp-2004') # 珱
|
||||
check_both_ways("\u5653", "\xF4\xA7", 'euc-jp-2004') # 噓
|
||||
#check_both_ways("\u9ADC", "\xFC\xE3", 'euc-jp') # 髜 (IBM extended)
|
||||
|
||||
check_both_ways("\u9DD7", "\xFE\xE5", 'euc-jp-2004') # 鷗
|
||||
check_both_ways("\u{2000B}", "\xAE\xA2", 'euc-jp-2004') # 𠀋
|
||||
check_both_ways("\u{2A6B2}", "\x8F\xFE\xF6", 'euc-jp-2004') # 𪚲
|
||||
|
||||
check_both_ways("\u677E\u672C\u884C\u5F18", "\xBE\xBE\xCB\xDC\xB9\xD4\xB9\xB0", 'euc-jp-2004') # 松本行弘
|
||||
check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC0\xC4\xBB\xB3\xB3\xD8\xB1\xA1\xC2\xE7\xB3\xD8", 'euc-jp-2004') # 青山学院大学
|
||||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xBF\xC0\xCE\xD3\xB5\xC1\xC7\xEE", 'euc-jp-2004') # 神林義博
|
||||
end
|
||||
|
||||
def test_eucjp_ms
|
||||
check_both_ways("\u2116", "\xAD\xE2", 'eucJP-ms') # NUMERO SIGN
|
||||
check_both_ways("\u221A", "\xA2\xE5", 'eucJP-ms') # SQUARE ROOT
|
||||
|
|
|
@ -704,14 +704,20 @@ def citrus_decode_mapsrc(ces, csid, mapsrcs)
|
|||
mapsrcs.split(',').each do |mapsrc|
|
||||
path = [$srcdir]
|
||||
mode = nil
|
||||
if mapsrc.rindex('UCS', 0)
|
||||
if mapsrc.rindex(/UCS(?:@[A-Z]+)?/, 0)
|
||||
mode = :from_ucs
|
||||
from = mapsrc[4..-1]
|
||||
from = mapsrc[$&.size+1..-1]
|
||||
path << SUBDIR.find{|x| from.rindex(x, 0) }
|
||||
else
|
||||
mode = :to_ucs
|
||||
path << SUBDIR.find{|x| mapsrc.rindex(x, 0) }
|
||||
end
|
||||
if /\bUCS@(BMP|SMP|SIP|TIP|SSP)\b/ =~ mapsrc
|
||||
plane = {"BMP"=>0, "SMP"=>1, "SIP"=>2, "TIP"=>3, "SSP"=>14}[$1]
|
||||
else
|
||||
plane = 0
|
||||
end
|
||||
plane <<= 16
|
||||
path << mapsrc.gsub(':', '@')
|
||||
path = File.join(*path)
|
||||
path << ".src"
|
||||
|
@ -730,14 +736,14 @@ def citrus_decode_mapsrc(ces, csid, mapsrcs)
|
|||
when /0x(\w+)\s*-\s*0x(\w+)\s*=\s*INVALID/
|
||||
# Citrus OOB_MODE
|
||||
when /(0x\w+)\s*=\s*(0x\w+)/
|
||||
table.push << [$1.hex, citrus_cstomb(ces, csid, $2.hex)]
|
||||
table.push << [plane | $1.hex, citrus_cstomb(ces, csid, $2.hex)]
|
||||
else
|
||||
raise "unknown notation '%s'"% l
|
||||
end
|
||||
when :to_ucs
|
||||
case l
|
||||
when /(0x\w+)\s*=\s*(0x\w+)/
|
||||
table.push << [citrus_cstomb(ces, csid, $1.hex), $2.hex]
|
||||
table.push << [citrus_cstomb(ces, csid, $1.hex), plane | $2.hex]
|
||||
else
|
||||
raise "unknown notation '%s'"% l
|
||||
end
|
||||
|
@ -919,6 +925,10 @@ ValidEncoding = {
|
|||
'CP51932' => '{00-7f}
|
||||
{a1-fe}{a1-fe}
|
||||
8e{a1-fe}',
|
||||
'EUC-JP-2004' => '{00-7f}
|
||||
{a1-fe}{a1-fe}
|
||||
8e{a1-fe}
|
||||
8f{a1-fe}{a1-fe}',
|
||||
'Shift_JIS' => '{00-7f}
|
||||
{81-9f,e0-fc}{40-7e,80-fc}
|
||||
{a1-df}',
|
||||
|
|
Загрузка…
Ссылка в новой задаче