зеркало из https://github.com/github/ruby.git
* enc/utf_16be.c (UTF16_IS_SURROGATE_FIRST): avoid branch.
(UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16be_mbc_enc_len): validation implemented. * enc/utf_16le.c (UTF16_IS_SURROGATE_FIRST): avoid branch. (UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16le_mbc_enc_len): validation implemented. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15338 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
8e36fa69fd
Коммит
44cfd58dc5
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
||||||
|
Wed Jan 30 12:26:59 2008 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* enc/utf_16be.c (UTF16_IS_SURROGATE_FIRST): avoid branch.
|
||||||
|
(UTF16_IS_SURROGATE_SECOND): ditto.
|
||||||
|
(UTF16_IS_SURROGATE): defined.
|
||||||
|
(utf16be_mbc_enc_len): validation implemented.
|
||||||
|
|
||||||
|
* enc/utf_16le.c (UTF16_IS_SURROGATE_FIRST): avoid branch.
|
||||||
|
(UTF16_IS_SURROGATE_SECOND): ditto.
|
||||||
|
(UTF16_IS_SURROGATE): defined.
|
||||||
|
(utf16le_mbc_enc_len): validation implemented.
|
||||||
|
|
||||||
Wed Jan 30 12:06:43 2008 Tadayoshi Funaba <tadf@dotrb.org>
|
Wed Jan 30 12:06:43 2008 Tadayoshi Funaba <tadf@dotrb.org>
|
||||||
|
|
||||||
* bignum.c (rb_cstr_to_inum): '0_2' is a valid representatin.
|
* bignum.c (rb_cstr_to_inum): '0_2' is a valid representatin.
|
||||||
|
|
|
@ -29,8 +29,9 @@
|
||||||
|
|
||||||
#include "regenc.h"
|
#include "regenc.h"
|
||||||
|
|
||||||
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
|
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||||
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
|
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||||
|
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
|
||||||
|
|
||||||
static const int EncLen_UTF16[] = {
|
static const int EncLen_UTF16[] = {
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
@ -55,7 +56,28 @@ static int
|
||||||
utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED,
|
utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED,
|
||||||
OnigEncoding enc ARG_UNUSED)
|
OnigEncoding enc ARG_UNUSED)
|
||||||
{
|
{
|
||||||
return EncLen_UTF16[*p];
|
int byte = p[0];
|
||||||
|
if (!UTF16_IS_SURROGATE(byte)) {
|
||||||
|
if (2 <= e-p)
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2);
|
||||||
|
else
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
|
||||||
|
}
|
||||||
|
if (UTF16_IS_SURROGATE_FIRST(byte)) {
|
||||||
|
switch (e-p) {
|
||||||
|
case 1: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(3);
|
||||||
|
case 2: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2);
|
||||||
|
case 3:
|
||||||
|
if (UTF16_IS_SURROGATE_SECOND(p[2]))
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (UTF16_IS_SURROGATE_SECOND(p[2]))
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
@ -29,8 +29,9 @@
|
||||||
|
|
||||||
#include "regenc.h"
|
#include "regenc.h"
|
||||||
|
|
||||||
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
|
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||||
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
|
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||||
|
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
|
||||||
|
|
||||||
static const int EncLen_UTF16[] = {
|
static const int EncLen_UTF16[] = {
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
@ -52,10 +53,23 @@ static const int EncLen_UTF16[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
utf16le_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED,
|
utf16le_mbc_enc_len(const UChar* p, const OnigUChar* e,
|
||||||
OnigEncoding enc ARG_UNUSED)
|
OnigEncoding enc ARG_UNUSED)
|
||||||
{
|
{
|
||||||
return EncLen_UTF16[*(p+1)];
|
int len = e-p, byte;
|
||||||
|
if (len < 2)
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
|
||||||
|
byte = p[1];
|
||||||
|
if (!UTF16_IS_SURROGATE(byte)) {
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2);
|
||||||
|
}
|
||||||
|
if (UTF16_IS_SURROGATE_FIRST(byte)) {
|
||||||
|
if (len < 4)
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-len);
|
||||||
|
if (UTF16_IS_SURROGATE_SECOND(p[3]))
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
|
||||||
|
}
|
||||||
|
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
@ -48,8 +48,59 @@ EOT
|
||||||
# tests start
|
# tests start
|
||||||
|
|
||||||
def test_utf16be_valid_encoding
|
def test_utf16be_valid_encoding
|
||||||
s = "\xd8\x00\xd8\x00".force_encoding("utf-16be")
|
[
|
||||||
assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
"\x00\x00",
|
||||||
|
"\xd7\xff",
|
||||||
|
"\xd8\x00\xdc\x00",
|
||||||
|
"\xdb\xff\xdf\xff",
|
||||||
|
"\xe0\x00",
|
||||||
|
"\xff\xff",
|
||||||
|
].each {|s|
|
||||||
|
s.force_encoding("utf-16be")
|
||||||
|
assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||||
|
}
|
||||||
|
[
|
||||||
|
"\x00",
|
||||||
|
"\xd7",
|
||||||
|
"\xd8\x00",
|
||||||
|
"\xd8\x00\xd8\x00",
|
||||||
|
"\xdc\x00",
|
||||||
|
"\xdc\x00\xd8\x00",
|
||||||
|
"\xdc\x00\xdc\x00",
|
||||||
|
"\xe0",
|
||||||
|
"\xff",
|
||||||
|
].each {|s|
|
||||||
|
s.force_encoding("utf-16be")
|
||||||
|
assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_utf16le_valid_encoding
|
||||||
|
[
|
||||||
|
"\x00\x00",
|
||||||
|
"\xff\xd7",
|
||||||
|
"\x00\xd8\x00\xdc",
|
||||||
|
"\xff\xdb\xff\xdf",
|
||||||
|
"\x00\xe0",
|
||||||
|
"\xff\xff",
|
||||||
|
].each {|s|
|
||||||
|
s.force_encoding("utf-16le")
|
||||||
|
assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||||
|
}
|
||||||
|
[
|
||||||
|
"\x00",
|
||||||
|
"\xd7",
|
||||||
|
"\x00\xd8",
|
||||||
|
"\x00\xd8\x00\xd8",
|
||||||
|
"\x00\xdc",
|
||||||
|
"\x00\xdc\x00\xd8",
|
||||||
|
"\x00\xdc\x00\xdc",
|
||||||
|
"\xe0",
|
||||||
|
"\xff",
|
||||||
|
].each {|s|
|
||||||
|
s.force_encoding("utf-16le")
|
||||||
|
assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
|
||||||
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_strftime
|
def test_strftime
|
||||||
|
|
Загрузка…
Ссылка в новой задаче