зеркало из https://github.com/github/ruby.git
* enc/trans/japanese.trans: new file.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19086 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
8108417c5e
Коммит
4406629bd6
|
@ -1,3 +1,7 @@
|
|||
Wed Sep 3 20:08:35 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* enc/trans/japanese.trans: new file.
|
||||
|
||||
Wed Sep 3 20:04:33 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* tool/transcode-tblgen.rb (ActionMap#gen_array_code): extracted from
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
<%
|
||||
map = {}
|
||||
map["{00-7f}"] = :nomap
|
||||
map["{a1-fe}{a1-fe}"] = :func_so
|
||||
map["8e{a1-df}"] = :func_so
|
||||
map["8e{e0-fe}"] = :undef
|
||||
map["8f{a1-fe}{a1-fe}"] = :undef
|
||||
transcode_generate_node(ActionMap.parse(map), "eucjp2sjis")
|
||||
|
||||
map = {}
|
||||
map["{00-7f}"] = :nomap
|
||||
map["{81-9f,e0-ef}{40-7e,80-fc}"] = :func_so
|
||||
map["{f0-fc}{40-7e,80-fc}"] = :undef
|
||||
map["{a1-df}"] = :func_so
|
||||
transcode_generate_node(ActionMap.parse(map), "sjis2eucjp")
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
||||
static int
|
||||
fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o)
|
||||
{
|
||||
if (s[0] == 0x8e) {
|
||||
o[0] = s[1];
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
int h, m, l;
|
||||
m = s[0] & 1;
|
||||
h = (s[0]+m) >> 1;
|
||||
h += s[0] < 0xdf ? 0x30 : 0x70;
|
||||
l = s[1] - m * 94 - 3;
|
||||
if (0x7f <= l)
|
||||
l++;
|
||||
o[0] = h;
|
||||
o[1] = l;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o)
|
||||
{
|
||||
if (l == 1) {
|
||||
o[0] = '\x8e';
|
||||
o[1] = s[0];
|
||||
return 2;
|
||||
}
|
||||
else {
|
||||
int h, m, l;
|
||||
h = s[0];
|
||||
l = s[1];
|
||||
if (0xe0 <= h)
|
||||
h -= 64;
|
||||
l += l < 0x80 ? 0x61 : 0x60;
|
||||
h = h * 2 - 0x61;
|
||||
if (0xfe < l) {
|
||||
l -= 94;
|
||||
h += 1;
|
||||
}
|
||||
o[0] = h;
|
||||
o[1] = l;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static const rb_transcoder
|
||||
rb_eucjp2sjis = {
|
||||
"EUC-JP", "Shift_JIS", eucjp2sjis,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
3, /* max_input */
|
||||
2, /* max_output */
|
||||
stateless_converter, /* stateful_type */
|
||||
NULL, NULL, NULL, fun_so_eucjp2sjis
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_sjis2eucjp = {
|
||||
"Shift_JIS", "EUC-JP", sjis2eucjp,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
2, /* max_input */
|
||||
2, /* max_output */
|
||||
stateless_converter, /* stateful_type */
|
||||
NULL, NULL, NULL, fun_so_sjis2eucjp
|
||||
};
|
||||
|
||||
void
|
||||
Init_japanese(void)
|
||||
{
|
||||
rb_register_transcoder(&rb_eucjp2sjis);
|
||||
rb_register_transcoder(&rb_sjis2eucjp);
|
||||
}
|
|
@ -385,6 +385,12 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
def test_errinfo_invalid_euc_jp
|
||||
ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
|
||||
ec.primitive_convert(src="\xff", dst="", nil, 10)
|
||||
assert_errinfo(:invalid_byte_sequence, "EUC-JP", "Shift_JIS", "\xFF", "", ec)
|
||||
end
|
||||
|
||||
def test_errinfo_invalid_euc_jp2
|
||||
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
||||
ec.primitive_convert(src="\xff", dst="", nil, 10)
|
||||
assert_errinfo(:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", "", ec)
|
||||
end
|
||||
|
||||
|
|
|
@ -57,6 +57,11 @@ class TestTranscode < Test::Unit::TestCase
|
|||
assert_equal(raw.force_encoding(encoding), utf8.encode(encoding, 'utf-8'))
|
||||
end
|
||||
|
||||
def check_both_ways2(str1, enc1, str2, enc2)
|
||||
assert_equal(str1.force_encoding(enc1), str2.encode(enc1, enc2))
|
||||
assert_equal(str2.force_encoding(enc2), str1.encode(enc2, enc1))
|
||||
end
|
||||
|
||||
def test_encodings
|
||||
check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D",
|
||||
"\x82\xdc\x82\xc2\x82\xe0\x82\xc6 \x82\xe4\x82\xab\x82\xd0\x82\xeb", 'shift_jis') # まつもと ゆきひろ
|
||||
|
@ -434,6 +439,67 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\uFF5E", "\xA1\xC1", 'eucJP-ms') # WAVE DASH
|
||||
end
|
||||
|
||||
def test_eucjp_sjis
|
||||
check_both_ways2("\xa1\xa1", "EUC-JP", "\x81\x40", "Shift_JIS")
|
||||
check_both_ways2("\xa1\xdf", "EUC-JP", "\x81\x7e", "Shift_JIS")
|
||||
check_both_ways2("\xa1\xe0", "EUC-JP", "\x81\x80", "Shift_JIS")
|
||||
check_both_ways2("\xa1\xfe", "EUC-JP", "\x81\x9e", "Shift_JIS")
|
||||
check_both_ways2("\xa2\xa1", "EUC-JP", "\x81\x9f", "Shift_JIS")
|
||||
check_both_ways2("\xa2\xfe", "EUC-JP", "\x81\xfc", "Shift_JIS")
|
||||
|
||||
check_both_ways2("\xdd\xa1", "EUC-JP", "\x9f\x40", "Shift_JIS")
|
||||
check_both_ways2("\xdd\xdf", "EUC-JP", "\x9f\x7e", "Shift_JIS")
|
||||
check_both_ways2("\xdd\xe0", "EUC-JP", "\x9f\x80", "Shift_JIS")
|
||||
check_both_ways2("\xdd\xfe", "EUC-JP", "\x9f\x9e", "Shift_JIS")
|
||||
check_both_ways2("\xde\xa1", "EUC-JP", "\x9f\x9f", "Shift_JIS")
|
||||
check_both_ways2("\xde\xfe", "EUC-JP", "\x9f\xfc", "Shift_JIS")
|
||||
|
||||
check_both_ways2("\xdf\xa1", "EUC-JP", "\xe0\x40", "Shift_JIS")
|
||||
check_both_ways2("\xdf\xdf", "EUC-JP", "\xe0\x7e", "Shift_JIS")
|
||||
check_both_ways2("\xdf\xe0", "EUC-JP", "\xe0\x80", "Shift_JIS")
|
||||
check_both_ways2("\xdf\xfe", "EUC-JP", "\xe0\x9e", "Shift_JIS")
|
||||
check_both_ways2("\xe0\xa1", "EUC-JP", "\xe0\x9f", "Shift_JIS")
|
||||
check_both_ways2("\xe0\xfe", "EUC-JP", "\xe0\xfc", "Shift_JIS")
|
||||
|
||||
check_both_ways2("\xf4\xa1", "EUC-JP", "\xea\x9f", "Shift_JIS")
|
||||
check_both_ways2("\xf4\xa2", "EUC-JP", "\xea\xa0", "Shift_JIS")
|
||||
check_both_ways2("\xf4\xa3", "EUC-JP", "\xea\xa1", "Shift_JIS")
|
||||
check_both_ways2("\xf4\xa4", "EUC-JP", "\xea\xa2", "Shift_JIS") # end of JIS X 0208 1983
|
||||
check_both_ways2("\xf4\xa5", "EUC-JP", "\xea\xa3", "Shift_JIS")
|
||||
check_both_ways2("\xf4\xa6", "EUC-JP", "\xea\xa4", "Shift_JIS") # end of JIS X 0208 1990
|
||||
|
||||
check_both_ways2("\x8e\xa1", "EUC-JP", "\xa1", "Shift_JIS")
|
||||
check_both_ways2("\x8e\xdf", "EUC-JP", "\xdf", "Shift_JIS")
|
||||
end
|
||||
|
||||
def test_eucjp_sjis_unassigned
|
||||
check_both_ways2("\xfd\xa1", "EUC-JP", "\xef\x40", "Shift_JIS")
|
||||
check_both_ways2("\xfd\xa1", "EUC-JP", "\xef\x40", "Shift_JIS")
|
||||
check_both_ways2("\xfd\xdf", "EUC-JP", "\xef\x7e", "Shift_JIS")
|
||||
check_both_ways2("\xfd\xe0", "EUC-JP", "\xef\x80", "Shift_JIS")
|
||||
check_both_ways2("\xfd\xfe", "EUC-JP", "\xef\x9e", "Shift_JIS")
|
||||
check_both_ways2("\xfe\xa1", "EUC-JP", "\xef\x9f", "Shift_JIS")
|
||||
check_both_ways2("\xfe\xfe", "EUC-JP", "\xef\xfc", "Shift_JIS")
|
||||
end
|
||||
|
||||
def test_eucjp_sjis_undef
|
||||
assert_raise(Encoding::ConversionUndefined) { "\x8e\xe0".encode("Shift_JIS", "EUC-JP") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\x8e\xfe".encode("Shift_JIS", "EUC-JP") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\x8f\xa1\xa1".encode("Shift_JIS", "EUC-JP") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\x8f\xa1\xfe".encode("Shift_JIS", "EUC-JP") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\x8f\xfe\xa1".encode("Shift_JIS", "EUC-JP") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\x8f\xfe\xfe".encode("Shift_JIS", "EUC-JP") }
|
||||
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xf0\x40".encode("EUC-JP", "Shift_JIS") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xf0\x7e".encode("EUC-JP", "Shift_JIS") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xf0\x80".encode("EUC-JP", "Shift_JIS") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xf0\xfc".encode("EUC-JP", "Shift_JIS") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xfc\x40".encode("EUC-JP", "Shift_JIS") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xfc\x7e".encode("EUC-JP", "Shift_JIS") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xfc\x80".encode("EUC-JP", "Shift_JIS") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xfc\xfc".encode("EUC-JP", "Shift_JIS") }
|
||||
end
|
||||
|
||||
def test_iso_2022_jp
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x1b(A".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x1b$(A".encode("utf-8", "iso-2022-jp") }
|
||||
|
|
Загрузка…
Ссылка в новой задаче