From 7f38397b6c01b65219deb622c027e177bdc23ede Mon Sep 17 00:00:00 2001 From: naruse Date: Tue, 23 Nov 2010 20:49:56 +0000 Subject: [PATCH] * enc/trans/utf_16_32.trans: add a convert from UTF-8 to UTF-16. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 ++++ enc/trans/utf_16_32.trans | 30 ++++++++++++++++++++++++++++-- test/ruby/test_transcode.rb | 6 +++--- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2f778111b3..c0b870e469 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Wed Nov 24 05:40:33 2010 NARUSE, Yui + + * enc/trans/utf_16_32.trans: add a convert from UTF-8 to UTF-16. + Wed Nov 24 03:21:35 2010 NARUSE, Yui * enc/trans/utf_16_32.trans: raise error on unpaired upper diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans index 2d7005993e..01caffe02c 100644 --- a/enc/trans/utf_16_32.trans +++ b/enc/trans/utf_16_32.trans @@ -289,7 +289,7 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l) } break; case BE: - if (s[0] < 0xD8 && 0xDF < s[0]) { + if (s[0] < 0xD8 || 0xDF < s[0]) { return (VALUE)FUNso; } else if (s[0] <= 0xDB) { @@ -297,7 +297,7 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l) } break; case LE: - if (s[1] < 0xD8 && 0xDF < s[1]) { + if (s[1] < 0xD8 || 0xDF < s[1]) { return (VALUE)FUNso; } else if (s[1] <= 0xDB) { @@ -321,6 +321,19 @@ fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char return 0; } +static ssize_t +fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + if (*sp == 0) { + *o++ = 0xFE; + *o++ = 0xFF; + *sp = 1; + return 2 + fun_so_to_utf_16be(statep, s, l, o, osize); + } + return fun_so_to_utf_16be(statep, s, l, o, osize); +} + static const rb_transcoder rb_from_UTF_16BE = { "UTF-16BE", "UTF-8", from_UTF_16BE, @@ -429,6 +442,18 @@ rb_from_UTF_16 = { NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16 }; +static const rb_transcoder +rb_to_UTF_16 = { + "UTF-8", "UTF-16", from_UTF_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 1, state_init, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_utf_16 +}; + void Init_utf_16_32(void) { @@ -441,4 +466,5 @@ Init_utf_16_32(void) rb_register_transcoder(&rb_from_UTF_32LE); rb_register_transcoder(&rb_to_UTF_32LE); rb_register_transcoder(&rb_from_UTF_16); + rb_register_transcoder(&rb_to_UTF_16); } diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index bf4ca2bf6d..cca7aabac9 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -1022,9 +1022,9 @@ class TestTranscode < Test::Unit::TestCase def test_utf_16_bom expected = "\u{3042}\u{3044}\u{20bb7}" assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16")) - assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16")) - assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16")) - assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16")) + check_both_ways(expected, %w/feff30423044d842dfb7/.pack("H*"), "UTF-16") + assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16")} + assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16")} end def check_utf_32_both_ways(utf8, raw)