зеркало из https://github.com/github/ruby.git
* enc/trans/utf_16_32.trans: add the UTF-32 converter.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29895 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
bcf80a5140
Коммит
38b482be8c
|
@ -1,3 +1,9 @@
|
|||
Wed Nov 24 06:35:32 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* enc/trans/utf_16_32.trans: add the UTF-32 converter.
|
||||
|
||||
Wed Nov 24 05:40:33 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
Wed Nov 24 06:13:32 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* win32/win32.c (filecp, wstr_to_mbstr, mbstr_to_wstr):
|
||||
|
@ -5,7 +11,7 @@ Wed Nov 24 06:13:32 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
|||
|
||||
Wed Nov 24 05:40:33 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* enc/trans/utf_16_32.trans: add a convert from UTF-8 to UTF-16.
|
||||
* enc/trans/utf_16_32.trans: add a converter from UTF-8 to UTF-16.
|
||||
|
||||
Wed Nov 24 03:21:35 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
|
|
|
@ -25,6 +25,10 @@
|
|||
map["{00-ff}{00-ff}"] = :func_si
|
||||
transcode_generate_node(ActionMap.parse(map), "from_UTF_16")
|
||||
|
||||
map = {}
|
||||
map["{00-ff}{00-ff}{00-ff}{00-ff}"] = :func_si
|
||||
transcode_generate_node(ActionMap.parse(map), "from_UTF_32")
|
||||
|
||||
map = {}
|
||||
map["{00-7f}"] = :func_so
|
||||
map["{c2-df}{80-bf}"] = :func_so
|
||||
|
@ -321,6 +325,48 @@ fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char
|
|||
return 0;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
switch (*sp) {
|
||||
case 0:
|
||||
if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) {
|
||||
*sp = BE;
|
||||
return ZERObt;
|
||||
}
|
||||
else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) {
|
||||
*sp = LE;
|
||||
return ZERObt;
|
||||
}
|
||||
break;
|
||||
case BE:
|
||||
if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) ||
|
||||
(s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2]))))
|
||||
return (VALUE)FUNso;
|
||||
break;
|
||||
case LE:
|
||||
if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) ||
|
||||
(s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1]))))
|
||||
return (VALUE)FUNso;
|
||||
break;
|
||||
}
|
||||
return (VALUE)INVALID;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
switch (*sp) {
|
||||
case BE:
|
||||
return fun_so_from_utf_32be(statep, s, l, o, osize);
|
||||
case LE:
|
||||
return fun_so_from_utf_32le(statep, s, l, o, osize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||
{
|
||||
|
@ -334,6 +380,21 @@ fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *
|
|||
return fun_so_to_utf_16be(statep, s, l, o, osize);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
if (*sp == 0) {
|
||||
*o++ = 0x00;
|
||||
*o++ = 0x00;
|
||||
*o++ = 0xFE;
|
||||
*o++ = 0xFF;
|
||||
*sp = 1;
|
||||
return 4 + fun_so_to_utf_32be(statep, s, l, o, osize);
|
||||
}
|
||||
return fun_so_to_utf_32be(statep, s, l, o, osize);
|
||||
}
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_16BE = {
|
||||
"UTF-16BE", "UTF-8", from_UTF_16BE,
|
||||
|
@ -442,6 +503,18 @@ rb_from_UTF_16 = {
|
|||
NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_from_UTF_32 = {
|
||||
"UTF-32", "UTF-8", from_UTF_32,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
4, /* input_unit_length */
|
||||
4, /* max_input */
|
||||
4, /* max_output */
|
||||
asciicompat_decoder, /* asciicompat_type */
|
||||
1, state_init, NULL, /* state_size, state_init, state_fini */
|
||||
NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_16 = {
|
||||
"UTF-8", "UTF-16", from_UTF_8,
|
||||
|
@ -454,6 +527,18 @@ rb_to_UTF_16 = {
|
|||
NULL, NULL, NULL, fun_so_to_utf_16
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_to_UTF_32 = {
|
||||
"UTF-8", "UTF-32", from_UTF_8,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
4, /* max_input */
|
||||
4, /* max_output */
|
||||
asciicompat_encoder, /* asciicompat_type */
|
||||
1, state_init, NULL, /* state_size, state_init, state_fini */
|
||||
NULL, NULL, NULL, fun_so_to_utf_32
|
||||
};
|
||||
|
||||
void
|
||||
Init_utf_16_32(void)
|
||||
{
|
||||
|
@ -467,4 +552,6 @@ Init_utf_16_32(void)
|
|||
rb_register_transcoder(&rb_to_UTF_32LE);
|
||||
rb_register_transcoder(&rb_from_UTF_16);
|
||||
rb_register_transcoder(&rb_to_UTF_16);
|
||||
rb_register_transcoder(&rb_from_UTF_32);
|
||||
rb_register_transcoder(&rb_to_UTF_32);
|
||||
}
|
||||
|
|
|
@ -1027,6 +1027,13 @@ class TestTranscode < Test::Unit::TestCase
|
|||
assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16")}
|
||||
end
|
||||
|
||||
def test_utf_32_bom
|
||||
expected = "\u{3042}\u{3044}\u{20bb7}"
|
||||
assert_equal(expected, %w/fffe00004230000044300000b70b0200/.pack("H*").encode("UTF-8","UTF-32"))
|
||||
check_both_ways(expected, %w/0000feff000030420000304400020bb7/.pack("H*"), "UTF-32")
|
||||
assert_raise(Encoding::InvalidByteSequenceError){%w/0000feff00110000/.pack("H*").encode("UTF-8","UTF-32")}
|
||||
end
|
||||
|
||||
def check_utf_32_both_ways(utf8, raw)
|
||||
copy = raw.dup
|
||||
0.step(copy.length-1, 4) do |i|
|
||||
|
|
Загрузка…
Ссылка в новой задаче