зеркало из https://github.com/github/ruby.git
* enc/trans/single_byte.trans: added windows-1252
* enc/trans/windows-1252-tbl.rb: new file (contributed by Yoshihiro Kambayashi) * tool/transcode-tblgen.rb: listed windows-1252 as '1byte' * test/ruby/test_transcode.rb: added test_windows_1252 (contributed by Yoshihiro Kambayashi) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19778 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
9639bf54ec
Коммит
48a303c027
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
|||
Tue Oct 14 13:30:30 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* enc/trans/single_byte.trans: added windows-1252
|
||||
|
||||
* enc/trans/windows-1252-tbl.rb: new file
|
||||
(contributed by Yoshihiro Kambayashi)
|
||||
|
||||
* tool/transcode-tblgen.rb: listed windows-1252 as '1byte'
|
||||
|
||||
* test/ruby/test_transcode.rb: added test_windows_1252
|
||||
(contributed by Yoshihiro Kambayashi)
|
||||
|
||||
Tue Oct 14 12:22:32 2008 Kazuhiro NISHIYAMA <zn@mbf.nifty.com>
|
||||
|
||||
* test/ruby/test_variable.rb (TestVariable#test_variable): add
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
require 'iso-8859-13-tbl'
|
||||
require 'iso-8859-14-tbl'
|
||||
require 'iso-8859-15-tbl'
|
||||
require 'windows-1252-tbl'
|
||||
|
||||
transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map
|
||||
transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map
|
||||
|
@ -35,6 +36,15 @@
|
|||
code
|
||||
end
|
||||
|
||||
def transcode_tblgen_windows(name, tbl_to_ucs)
|
||||
name_ident = name.tr('-','_')
|
||||
code = ''
|
||||
code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs])
|
||||
code << "\n"
|
||||
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
|
||||
code
|
||||
end
|
||||
|
||||
transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL)
|
||||
|
@ -49,6 +59,7 @@
|
|||
transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL)
|
||||
transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL)
|
||||
transcode_tblgen_windows("WINDOWS-1252", WINDOWS_1252_TO_UCS_TBL)
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
WINDOWS_1252_TO_UCS_TBL = [
|
||||
["A0",0xA0],
|
||||
["A1",0xA1],
|
||||
["A2",0xA2],
|
||||
["A3",0xA3],
|
||||
["A4",0xA4],
|
||||
["A5",0xA5],
|
||||
["A6",0xA6],
|
||||
["A7",0xA7],
|
||||
["A8",0xA8],
|
||||
["A9",0xA9],
|
||||
["AA",0xAA],
|
||||
["AB",0xAB],
|
||||
["AC",0xAC],
|
||||
["AD",0xAD],
|
||||
["AE",0xAE],
|
||||
["AF",0xAF],
|
||||
["B0",0xB0],
|
||||
["B1",0xB1],
|
||||
["B2",0xB2],
|
||||
["B3",0xB3],
|
||||
["B4",0xB4],
|
||||
["B5",0xB5],
|
||||
["B6",0xB6],
|
||||
["B7",0xB7],
|
||||
["B8",0xB8],
|
||||
["B9",0xB9],
|
||||
["BA",0xBA],
|
||||
["BB",0xBB],
|
||||
["BC",0xBC],
|
||||
["BD",0xBD],
|
||||
["BE",0xBE],
|
||||
["BF",0xBF],
|
||||
["C0",0xC0],
|
||||
["C1",0xC1],
|
||||
["C2",0xC2],
|
||||
["C3",0xC3],
|
||||
["C4",0xC4],
|
||||
["C5",0xC5],
|
||||
["C6",0xC6],
|
||||
["C7",0xC7],
|
||||
["C8",0xC8],
|
||||
["C9",0xC9],
|
||||
["CA",0xCA],
|
||||
["CB",0xCB],
|
||||
["CC",0xCC],
|
||||
["CD",0xCD],
|
||||
["CE",0xCE],
|
||||
["CF",0xCF],
|
||||
["D0",0xD0],
|
||||
["D1",0xD1],
|
||||
["D2",0xD2],
|
||||
["D3",0xD3],
|
||||
["D4",0xD4],
|
||||
["D5",0xD5],
|
||||
["D6",0xD6],
|
||||
["D7",0xD7],
|
||||
["D8",0xD8],
|
||||
["D9",0xD9],
|
||||
["DA",0xDA],
|
||||
["DB",0xDB],
|
||||
["DC",0xDC],
|
||||
["DD",0xDD],
|
||||
["DE",0xDE],
|
||||
["DF",0xDF],
|
||||
["E0",0xE0],
|
||||
["E1",0xE1],
|
||||
["E2",0xE2],
|
||||
["E3",0xE3],
|
||||
["E4",0xE4],
|
||||
["E5",0xE5],
|
||||
["E6",0xE6],
|
||||
["E7",0xE7],
|
||||
["E8",0xE8],
|
||||
["E9",0xE9],
|
||||
["EA",0xEA],
|
||||
["EB",0xEB],
|
||||
["EC",0xEC],
|
||||
["ED",0xED],
|
||||
["EE",0xEE],
|
||||
["EF",0xEF],
|
||||
["F0",0xF0],
|
||||
["F1",0xF1],
|
||||
["F2",0xF2],
|
||||
["F3",0xF3],
|
||||
["F4",0xF4],
|
||||
["F5",0xF5],
|
||||
["F6",0xF6],
|
||||
["F7",0xF7],
|
||||
["F8",0xF8],
|
||||
["F9",0xF9],
|
||||
["FA",0xFA],
|
||||
["FB",0xFB],
|
||||
["FC",0xFC],
|
||||
["FD",0xFD],
|
||||
["FE",0xFE],
|
||||
["FF",0xFF],
|
||||
["8C",0x152],
|
||||
["9C",0x153],
|
||||
["8A",0x160],
|
||||
["9A",0x161],
|
||||
["9F",0x178],
|
||||
["8E",0x17D],
|
||||
["9E",0x17E],
|
||||
["83",0x192],
|
||||
["88",0x2C6],
|
||||
["98",0x2DC],
|
||||
["96",0x2013],
|
||||
["97",0x2014],
|
||||
["91",0x2018],
|
||||
["92",0x2019],
|
||||
["82",0x201A],
|
||||
["93",0x201C],
|
||||
["94",0x201D],
|
||||
["84",0x201E],
|
||||
["86",0x2020],
|
||||
["87",0x2021],
|
||||
["95",0x2022],
|
||||
["85",0x2026],
|
||||
["89",0x2030],
|
||||
["8B",0x2039],
|
||||
["9B",0x203A],
|
||||
["80",0x20AC],
|
||||
["99",0x2122],
|
||||
]
|
|
@ -133,6 +133,33 @@ class TestTranscode < Test::Unit::TestCase
|
|||
end
|
||||
end
|
||||
|
||||
def test_windows_1252
|
||||
check_both_ways("\u20AC", "\x80", 'windows-1252') # €
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1252') }
|
||||
check_both_ways("\u201A", "\x82", 'windows-1252') # ‚
|
||||
check_both_ways("\u0152", "\x8C", 'windows-1252') # >Œ
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x8D".encode("utf-8", 'windows-1252') }
|
||||
check_both_ways("\u017D", "\x8E", 'windows-1252') # Ž
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1252') }
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1252') }
|
||||
check_both_ways("\u2018", "\x91", 'windows-1252') #‘
|
||||
check_both_ways("\u0153", "\x9C", 'windows-1252') # œ
|
||||
assert_raise(Encoding::UndefinedConversionError) { "\x9D".encode("utf-8", 'windows-1252') }
|
||||
check_both_ways("\u017E", "\x9E", 'windows-1252') # ž
|
||||
check_both_ways("\u00A0", "\xA0", 'windows-1252') # non-breaking space
|
||||
check_both_ways("\u00AF", "\xAF", 'windows-1252') # ¯
|
||||
check_both_ways("\u00B0", "\xB0", 'windows-1252') # °
|
||||
check_both_ways("\u00BF", "\xBF", 'windows-1252') # ¿
|
||||
check_both_ways("\u00C0", "\xC0", 'windows-1252') # À
|
||||
check_both_ways("\u00CF", "\xCF", 'windows-1252') # Ï
|
||||
check_both_ways("\u00D0", "\xD0", 'windows-1252') # Ð
|
||||
check_both_ways("\u00DF", "\xDF", 'windows-1252') # ß
|
||||
check_both_ways("\u00E0", "\xE0", 'windows-1252') # à
|
||||
check_both_ways("\u00EF", "\xEF", 'windows-1252') # ï
|
||||
check_both_ways("\u00F0", "\xF0", 'windows-1252') # ð
|
||||
check_both_ways("\u00FF", "\xFF", 'windows-1252') # ÿ
|
||||
end
|
||||
|
||||
def check_utf_16_both_ways(utf8, raw)
|
||||
copy = raw.dup
|
||||
0.step(copy.length-1, 2) { |i| copy[i+1], copy[i] = copy[i], copy[i+1] }
|
||||
|
|
|
@ -742,6 +742,7 @@ ValidEncoding = {
|
|||
'ISO-8859-13' => '1byte',
|
||||
'ISO-8859-14' => '1byte',
|
||||
'ISO-8859-15' => '1byte',
|
||||
'WINDOWS-1252' => '1byte',
|
||||
'Windows-31J' => 'Shift_JIS',
|
||||
'eucJP-ms' => 'EUC-JP'
|
||||
}.each {|k, v|
|
||||
|
|
Загрузка…
Ссылка в новой задаче