зеркало из https://github.com/github/ruby.git
* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
validation. * include/ruby/encoding.h (rb_enc_precise_mbclen): declared. (MBCLEN_CHARFOUND): new macro. (MBCLEN_INVALID): new macro. (MBCLEN_NEEDMORE): new macro. * include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len by precise_mbc_enc_len. (ONIGENC_PRECISE_MBC_ENC_LEN): new macro. (ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro. (ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro. (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBCLEN_CHARFOUND): new macro. (ONIGENC_MBCLEN_INVALID): new macro. (ONIGENC_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN. * enc/euc_jp.c: validation implemented. * enc/sjis.c: ditto. * enc/utf8.c: ditto. * string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid encoding. (rb_str_valid_encoding_p): new method String#valid_encoding?. * io.c (rb_io_getc): use rb_enc_precise_mbclen. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
de4ec68991
Коммит
69406aad50
33
ChangeLog
33
ChangeLog
|
@ -1,3 +1,36 @@
|
||||||
|
Thu Dec 6 18:22:11 2007 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
|
||||||
|
validation.
|
||||||
|
|
||||||
|
* include/ruby/encoding.h (rb_enc_precise_mbclen): declared.
|
||||||
|
(MBCLEN_CHARFOUND): new macro.
|
||||||
|
(MBCLEN_INVALID): new macro.
|
||||||
|
(MBCLEN_NEEDMORE): new macro.
|
||||||
|
|
||||||
|
* include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len
|
||||||
|
by precise_mbc_enc_len.
|
||||||
|
(ONIGENC_PRECISE_MBC_ENC_LEN): new macro.
|
||||||
|
(ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro.
|
||||||
|
(ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro.
|
||||||
|
(ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro.
|
||||||
|
(ONIGENC_MBCLEN_CHARFOUND): new macro.
|
||||||
|
(ONIGENC_MBCLEN_INVALID): new macro.
|
||||||
|
(ONIGENC_MBCLEN_NEEDMORE): new macro.
|
||||||
|
(ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN.
|
||||||
|
|
||||||
|
* enc/euc_jp.c: validation implemented.
|
||||||
|
|
||||||
|
* enc/sjis.c: ditto.
|
||||||
|
|
||||||
|
* enc/utf8.c: ditto.
|
||||||
|
|
||||||
|
* string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid
|
||||||
|
encoding.
|
||||||
|
(rb_str_valid_encoding_p): new method String#valid_encoding?.
|
||||||
|
|
||||||
|
* io.c (rb_io_getc): use rb_enc_precise_mbclen.
|
||||||
|
|
||||||
Thu Dec 6 01:37:23 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Thu Dec 6 01:37:23 2007 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* regparse.c (i_apply_case_fold): fix for negative character class. a
|
* regparse.c (i_apply_case_fold): fix for negative character class. a
|
||||||
|
|
77
enc/euc_jp.c
77
enc/euc_jp.c
|
@ -50,10 +50,85 @@ static const int EncLen_EUCJP[] = {
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2 } state_t;
|
||||||
|
#define A ACCEPT
|
||||||
|
#define F FAILURE
|
||||||
|
static const signed char trans[][0x100] = {
|
||||||
|
{ /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, 1, 2,
|
||||||
|
/* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
|
||||||
|
},
|
||||||
|
{ /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F
|
||||||
|
},
|
||||||
|
{ /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
|
||||||
|
},
|
||||||
|
|
||||||
|
};
|
||||||
|
#undef A
|
||||||
|
#undef F
|
||||||
|
|
||||||
static int
|
static int
|
||||||
mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
|
mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
|
||||||
{
|
{
|
||||||
return EncLen_EUCJP[*p];
|
int firstbyte = *p++;
|
||||||
|
state_t s;
|
||||||
|
s = trans[0][firstbyte];
|
||||||
|
if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
|
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-1);
|
||||||
|
s = trans[s][*p++];
|
||||||
|
if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
|
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-2);
|
||||||
|
s = trans[s][*p++];
|
||||||
|
return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
}
|
}
|
||||||
|
|
||||||
static OnigCodePoint
|
static OnigCodePoint
|
||||||
|
|
54
enc/sjis.c
54
enc/sjis.c
|
@ -70,10 +70,62 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
|
||||||
#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
|
#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
|
||||||
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
|
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
|
||||||
|
|
||||||
|
typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t;
|
||||||
|
#define A ACCEPT
|
||||||
|
#define F FAILURE
|
||||||
|
static const signed char trans[][0x100] = {
|
||||||
|
{ /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
|
||||||
|
/* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, F, F, F
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#undef A
|
||||||
|
#undef F
|
||||||
|
|
||||||
static int
|
static int
|
||||||
mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
|
mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
|
||||||
{
|
{
|
||||||
return EncLen_SJIS[*p];
|
int firstbyte = *p++;
|
||||||
|
state_t s;
|
||||||
|
s = trans[0][firstbyte];
|
||||||
|
if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
|
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_SJIS[firstbyte]-1);
|
||||||
|
s = trans[s][*p++];
|
||||||
|
return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
180
enc/utf8.c
180
enc/utf8.c
|
@ -56,13 +56,189 @@ static const int EncLen_UTF8[] = {
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
FAILURE = -2,
|
||||||
|
ACCEPT,
|
||||||
|
S0, S1, S2, S3,
|
||||||
|
S4, S5, S6, S7
|
||||||
|
} state_t;
|
||||||
|
#define A ACCEPT
|
||||||
|
#define F FAILURE
|
||||||
|
static const signed char trans[][0x100] = {
|
||||||
|
{ /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
|
||||||
|
/* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
|
||||||
|
/* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S4 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
/* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S5 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S6 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
{ /* S7 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||||
|
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
/* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
|
||||||
|
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
|
||||||
|
},
|
||||||
|
};
|
||||||
|
#undef A
|
||||||
|
#undef F
|
||||||
|
|
||||||
static int
|
static int
|
||||||
utf8_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
|
utf8_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
|
||||||
{
|
{
|
||||||
return EncLen_UTF8[*p];
|
int firstbyte = *p++;
|
||||||
|
state_t s;
|
||||||
|
s = trans[0][firstbyte];
|
||||||
|
if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
|
|
||||||
|
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-1);
|
||||||
|
s = trans[s][*p++];
|
||||||
|
if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
|
|
||||||
|
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2);
|
||||||
|
s = trans[s][*p++];
|
||||||
|
if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
|
|
||||||
|
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3);
|
||||||
|
s = trans[s][*p++];
|
||||||
|
return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) :
|
||||||
|
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
@ -494,6 +494,12 @@ rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
|
||||||
|
{
|
||||||
|
return ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
rb_enc_codelen(int c, rb_encoding *enc)
|
rb_enc_codelen(int c, rb_encoding *enc)
|
||||||
{
|
{
|
||||||
|
|
|
@ -68,9 +68,15 @@ rb_encoding * rb_enc_find(const char *name);
|
||||||
#define rb_enc_mbminlen(enc) (enc)->min_enc_len
|
#define rb_enc_mbminlen(enc) (enc)->min_enc_len
|
||||||
#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
|
#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
|
||||||
|
|
||||||
/* ptr,encoding -> mbclen */
|
/* ptr,endptr,encoding -> mbclen */
|
||||||
int rb_enc_mbclen(const char*, const char *, rb_encoding*);
|
int rb_enc_mbclen(const char*, const char *, rb_encoding*);
|
||||||
|
|
||||||
|
/* ptr,endptr,encoding -> chlen, invalid or needmore */
|
||||||
|
int rb_enc_precise_mbclen(const char*, const char *, rb_encoding*);
|
||||||
|
#define MBCLEN_CHARFOUND(ret) ONIGENC_MBCLEN_CHARFOUND(ret)
|
||||||
|
#define MBCLEN_INVALID(ret) ONIGENC_MBCLEN_INVALID(ret)
|
||||||
|
#define MBCLEN_NEEDMORE(ret) ONIGENC_MBCLEN_NEEDMORE(ret)
|
||||||
|
|
||||||
/* code,encoding -> codelen */
|
/* code,encoding -> codelen */
|
||||||
int rb_enc_codelen(int, rb_encoding*);
|
int rb_enc_codelen(int, rb_encoding*);
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,7 @@ typedef struct {
|
||||||
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
|
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
|
||||||
|
|
||||||
typedef struct OnigEncodingTypeST {
|
typedef struct OnigEncodingTypeST {
|
||||||
int (*mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
|
int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
|
||||||
const char* name;
|
const char* name;
|
||||||
int max_enc_len;
|
int max_enc_len;
|
||||||
int min_enc_len;
|
int min_enc_len;
|
||||||
|
@ -282,7 +282,32 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
||||||
#define ONIGENC_STEP_BACK(enc,start,s,n) \
|
#define ONIGENC_STEP_BACK(enc,start,s,n) \
|
||||||
onigenc_step_back((enc),(start),(s),(n))
|
onigenc_step_back((enc),(start),(s),(n))
|
||||||
|
|
||||||
#define ONIGENC_MBC_ENC_LEN(enc,p,e) (enc)->mbc_enc_len(p,e,enc)
|
|
||||||
|
#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
|
||||||
|
#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
|
||||||
|
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-n)
|
||||||
|
|
||||||
|
static inline int onigenc_mbclen_charfound(int r) { return 0 < r ? r : 0; }
|
||||||
|
static inline int onigenc_mbclen_needmore(int r) { return r < -1 ? -1 - r : 0; }
|
||||||
|
#define ONIGENC_MBCLEN_CHARFOUND(r) onigenc_mbclen_charfound(r)
|
||||||
|
#define ONIGENC_MBCLEN_INVALID(r) ((r) == -1)
|
||||||
|
#define ONIGENC_MBCLEN_NEEDMORE(r) onigenc_mbclen_needmore(r)
|
||||||
|
|
||||||
|
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
|
||||||
|
|
||||||
|
static inline int onigenc_mbclen_recover(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
|
||||||
|
{
|
||||||
|
int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
|
||||||
|
int r;
|
||||||
|
if (ONIGENC_MBCLEN_INVALID(ret))
|
||||||
|
return 1;
|
||||||
|
else if ((r = ONIGENC_MBCLEN_NEEDMORE(ret)))
|
||||||
|
return e-p+r;
|
||||||
|
else
|
||||||
|
return ONIGENC_MBCLEN_CHARFOUND(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_recover(p,e,enc)
|
||||||
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
||||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
||||||
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
|
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
|
||||||
|
|
32
io.c
32
io.c
|
@ -2127,7 +2127,7 @@ rb_io_getc(VALUE io)
|
||||||
{
|
{
|
||||||
rb_encoding *enc;
|
rb_encoding *enc;
|
||||||
rb_io_t *fptr;
|
rb_io_t *fptr;
|
||||||
int n, left;
|
int r, n;
|
||||||
VALUE str;
|
VALUE str;
|
||||||
|
|
||||||
GetOpenFile(io, fptr);
|
GetOpenFile(io, fptr);
|
||||||
|
@ -2138,22 +2138,30 @@ rb_io_getc(VALUE io)
|
||||||
if (io_fillbuf(fptr) < 0) {
|
if (io_fillbuf(fptr) < 0) {
|
||||||
return Qnil;
|
return Qnil;
|
||||||
}
|
}
|
||||||
n = rb_enc_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_len, enc);
|
r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
|
||||||
if (n < fptr->rbuf_len) {
|
if ((n = MBCLEN_CHARFOUND(r)) != 0 && n <= fptr->rbuf_len) {
|
||||||
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n);
|
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n);
|
||||||
fptr->rbuf_off += n;
|
fptr->rbuf_off += n;
|
||||||
fptr->rbuf_len -= n;
|
fptr->rbuf_len -= n;
|
||||||
}
|
}
|
||||||
|
else if (MBCLEN_NEEDMORE(r)) {
|
||||||
|
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len);
|
||||||
|
fptr->rbuf_len = 0;
|
||||||
|
getc_needmore:
|
||||||
|
if (io_fillbuf(fptr) != -1) {
|
||||||
|
rb_str_cat(str, fptr->rbuf+fptr->rbuf_off, 1);
|
||||||
|
fptr->rbuf_off++;
|
||||||
|
fptr->rbuf_len--;
|
||||||
|
r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc);
|
||||||
|
if (MBCLEN_NEEDMORE(r)) {
|
||||||
|
goto getc_needmore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
str = rb_str_new(0, n);
|
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1);
|
||||||
left = fptr->rbuf_len;
|
fptr->rbuf_off++;
|
||||||
MEMCPY(RSTRING_PTR(str), fptr->rbuf+fptr->rbuf_off, char, left);
|
fptr->rbuf_len--;
|
||||||
if (io_fillbuf(fptr) < 0) {
|
|
||||||
return Qnil;
|
|
||||||
}
|
|
||||||
MEMCPY(RSTRING_PTR(str)+left, fptr->rbuf, char, n-left);
|
|
||||||
fptr->rbuf_off += left;
|
|
||||||
fptr->rbuf_len -= left;
|
|
||||||
}
|
}
|
||||||
rb_enc_associate(str, enc);
|
rb_enc_associate(str, enc);
|
||||||
|
|
||||||
|
|
54
string.c
54
string.c
|
@ -2919,10 +2919,20 @@ rb_str_inspect(VALUE str)
|
||||||
str_cat_char(result, '"', enc);
|
str_cat_char(result, '"', enc);
|
||||||
p = RSTRING_PTR(str); pend = RSTRING_END(str);
|
p = RSTRING_PTR(str); pend = RSTRING_END(str);
|
||||||
while (p < pend) {
|
while (p < pend) {
|
||||||
int c = rb_enc_codepoint(p, pend, enc);
|
int c;
|
||||||
int n = rb_enc_codelen(c, enc);
|
int n;
|
||||||
int cc;
|
int cc;
|
||||||
|
|
||||||
|
n = rb_enc_precise_mbclen(p, pend, enc);
|
||||||
|
if (!MBCLEN_CHARFOUND(n)) {
|
||||||
|
p++;
|
||||||
|
n = 1;
|
||||||
|
goto escape_codepoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = rb_enc_codepoint(p, pend, enc);
|
||||||
|
n = rb_enc_codelen(c, enc);
|
||||||
|
|
||||||
p += n;
|
p += n;
|
||||||
if (c == '"'|| c == '\\' ||
|
if (c == '"'|| c == '\\' ||
|
||||||
(c == '#' && (cc = rb_enc_codepoint(p,pend,enc),
|
(c == '#' && (cc = rb_enc_codepoint(p,pend,enc),
|
||||||
|
@ -2954,19 +2964,21 @@ rb_str_inspect(VALUE str)
|
||||||
prefix_escape(result, 'e', enc);
|
prefix_escape(result, 'e', enc);
|
||||||
}
|
}
|
||||||
else if (rb_enc_isprint(c, enc)) {
|
else if (rb_enc_isprint(c, enc)) {
|
||||||
char buf[5];
|
rb_str_buf_cat(result, p-n, n);
|
||||||
|
|
||||||
rb_enc_mbcput(c, buf, enc);
|
|
||||||
rb_str_buf_cat(result, buf, n);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
char buf[5];
|
char buf[5];
|
||||||
char *s = buf;
|
char *s;
|
||||||
|
char *q;
|
||||||
|
|
||||||
sprintf(buf, "\\%03o", c & 0377);
|
escape_codepoint:
|
||||||
while (*s) {
|
for (q = p-n; q < p; q++) {
|
||||||
str_cat_char(result, *s++, enc);
|
s = buf;
|
||||||
}
|
sprintf(buf, "\\%03o", *q & 0377);
|
||||||
|
while (*s) {
|
||||||
|
str_cat_char(result, *s++, enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
str_cat_char(result, '"', enc);
|
str_cat_char(result, '"', enc);
|
||||||
|
@ -5232,6 +5244,25 @@ rb_str_force_encoding(VALUE str, VALUE enc)
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
rb_str_valid_encoding_p(VALUE str)
|
||||||
|
{
|
||||||
|
char *p = RSTRING_PTR(str);
|
||||||
|
char *pend = RSTRING_END(str);
|
||||||
|
rb_encoding *enc = rb_enc_get(str);
|
||||||
|
|
||||||
|
while (p < pend) {
|
||||||
|
int n;
|
||||||
|
|
||||||
|
n = rb_enc_precise_mbclen(p, pend, enc);
|
||||||
|
if (!MBCLEN_CHARFOUND(n)) {
|
||||||
|
return Qfalse;
|
||||||
|
}
|
||||||
|
p += n;
|
||||||
|
}
|
||||||
|
return Qtrue;
|
||||||
|
}
|
||||||
|
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
* Document-class: Symbol
|
* Document-class: Symbol
|
||||||
*
|
*
|
||||||
|
@ -5644,6 +5675,7 @@ Init_String(void)
|
||||||
|
|
||||||
rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
|
rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
|
||||||
rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
|
rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
|
||||||
|
rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
|
||||||
|
|
||||||
id_to_s = rb_intern("to_s");
|
id_to_s = rb_intern("to_s");
|
||||||
|
|
||||||
|
|
|
@ -26,14 +26,46 @@ class TestM17N < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_string_mixed_unicode
|
def test_string_mixed_unicode
|
||||||
assert_raise(SyntaxError) { eval(a(%{"\xc0\xa0\\u{6666}"})) }
|
assert_raise(SyntaxError) { eval(a(%{"\xc2\xa0\\u{6666}"})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{"\xc0\xa0\\u{6666}"})) }
|
assert_raise(SyntaxError) { eval(e(%{"\xc2\xa0\\u{6666}"})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{"\xc0\xa0\\u{6666}"})) }
|
assert_raise(SyntaxError) { eval(s(%{"\xc2\xa0\\u{6666}"})) }
|
||||||
assert_nothing_raised { eval(u(%{"\xc0\xa0\\u{6666}"})) }
|
assert_nothing_raised { eval(u(%{"\xc2\xa0\\u{6666}"})) }
|
||||||
assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc0\xa0"})) }
|
assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc2\xa0"})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc0\xa0"})) }
|
assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc2\xa0"})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc0\xa0"})) }
|
assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc2\xa0"})) }
|
||||||
assert_nothing_raised { eval(u(%{"\\u{6666}\xc0\xa0"})) }
|
assert_nothing_raised { eval(u(%{"\\u{6666}\xc2\xa0"})) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_string_inspect
|
||||||
|
assert_equal('"\376"', e("\xfe").inspect)
|
||||||
|
assert_equal('"\216"', e("\x8e").inspect)
|
||||||
|
assert_equal('"\217"', e("\x8f").inspect)
|
||||||
|
assert_equal('"\217\241"', e("\x8f\xa1").inspect)
|
||||||
|
assert_equal('"\357"', s("\xef").inspect)
|
||||||
|
assert_equal('"\302"', u("\xc2").inspect)
|
||||||
|
assert_equal('"\340\200"', u("\xe0\x80").inspect)
|
||||||
|
assert_equal('"\360\200\200"', u("\xf0\x80\x80").inspect)
|
||||||
|
assert_equal('"\370\200\200\200"', u("\xf8\x80\x80\x80").inspect)
|
||||||
|
assert_equal('"\374\200\200\200\200"', u("\xfc\x80\x80\x80\x80").inspect)
|
||||||
|
|
||||||
|
assert_equal('"\376 "', e("\xfe ").inspect)
|
||||||
|
assert_equal('"\216 "', e("\x8e ").inspect)
|
||||||
|
assert_equal('"\217 "', e("\x8f ").inspect)
|
||||||
|
assert_equal('"\217\241 "', e("\x8f\xa1 ").inspect)
|
||||||
|
assert_equal('"\357 "', s("\xef ").inspect)
|
||||||
|
assert_equal('"\302 "', u("\xc2 ").inspect)
|
||||||
|
assert_equal('"\340\200 "', u("\xe0\x80 ").inspect)
|
||||||
|
assert_equal('"\360\200\200 "', u("\xf0\x80\x80 ").inspect)
|
||||||
|
assert_equal('"\370\200\200\200 "', u("\xf8\x80\x80\x80 ").inspect)
|
||||||
|
assert_equal('"\374\200\200\200\200 "', u("\xfc\x80\x80\x80\x80 ").inspect)
|
||||||
|
|
||||||
|
|
||||||
|
assert_equal(e("\"\\241\x8f\xa1\xa1\""), e("\xa1\x8f\xa1\xa1").inspect)
|
||||||
|
|
||||||
|
assert_equal('"\201."', s("\x81.").inspect)
|
||||||
|
assert_equal(s("\"\x81@\""), s("\x81@").inspect)
|
||||||
|
|
||||||
|
assert_equal('"\374"', u("\xfc").inspect)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_regexp_too_short_multibyte_character
|
def test_regexp_too_short_multibyte_character
|
||||||
|
@ -42,27 +74,27 @@ class TestM17N < Test::Unit::TestCase
|
||||||
assert_raise(SyntaxError) { eval('/\x8f/e') }
|
assert_raise(SyntaxError) { eval('/\x8f/e') }
|
||||||
assert_raise(SyntaxError) { eval('/\x8f\xa1/e') }
|
assert_raise(SyntaxError) { eval('/\x8f\xa1/e') }
|
||||||
assert_raise(SyntaxError) { eval('/\xef/s') }
|
assert_raise(SyntaxError) { eval('/\xef/s') }
|
||||||
assert_raise(SyntaxError) { eval('/\xc0/u') }
|
assert_raise(SyntaxError) { eval('/\xc2/u') }
|
||||||
assert_raise(SyntaxError) { eval('/\xe0\x80/u') }
|
assert_raise(SyntaxError) { eval('/\xe0\x80/u') }
|
||||||
assert_raise(SyntaxError) { eval('/\xf0\x80\x80/u') }
|
assert_raise(SyntaxError) { eval('/\xf0\x80\x80/u') }
|
||||||
assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
|
#assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
|
||||||
assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
|
#assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
|
||||||
|
|
||||||
# raw 8bit
|
# raw 8bit
|
||||||
assert_raise(SyntaxError) { eval("/\xfe/e") }
|
assert_raise(SyntaxError) { eval("/\xfe/e") }
|
||||||
assert_raise(SyntaxError) { eval("/\xc0/u") }
|
assert_raise(SyntaxError) { eval("/\xc2/u") }
|
||||||
|
|
||||||
# invalid suffix
|
# invalid suffix
|
||||||
assert_raise(SyntaxError) { eval('/\xc0\xff/u') }
|
assert_raise(SyntaxError) { eval('/\xc2\xff/u') }
|
||||||
assert_raise(SyntaxError) { eval('/\xc0 /u') }
|
assert_raise(SyntaxError) { eval('/\xc2 /u') }
|
||||||
#assert_raise(SyntaxError) { eval('/\xc0\x20/u') }
|
#assert_raise(SyntaxError) { eval('/\xc2\x20/u') }
|
||||||
end
|
end
|
||||||
|
|
||||||
def assert_regexp_generic_encoding(r)
|
def assert_regexp_generic_encoding(r)
|
||||||
assert(!r.fixed_encoding?)
|
assert(!r.fixed_encoding?)
|
||||||
%w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
|
%w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
|
||||||
# "\xc0\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Shift_JIS and UTF-8.
|
# "\xc2\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Shift_JIS and UTF-8.
|
||||||
assert_nothing_raised { r =~ "\xc0\xa1".force_encoding(ename) }
|
assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(ename) }
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -71,9 +103,9 @@ class TestM17N < Test::Unit::TestCase
|
||||||
%w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
|
%w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
|
||||||
enc = Encoding.find(ename)
|
enc = Encoding.find(ename)
|
||||||
if enc == r.encoding
|
if enc == r.encoding
|
||||||
assert_nothing_raised { r =~ "\xc0\xa1".force_encoding(enc) }
|
assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(enc) }
|
||||||
else
|
else
|
||||||
assert_raise(ArgumentError) { r =~ "\xc0\xa1".force_encoding(enc) }
|
assert_raise(ArgumentError) { r =~ "\xc2\xa1".force_encoding(enc) }
|
||||||
end
|
end
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
@ -115,77 +147,77 @@ class TestM17N < Test::Unit::TestCase
|
||||||
assert_equal(0, r =~ e("a"))
|
assert_equal(0, r =~ e("a"))
|
||||||
assert_equal(0, r =~ s("a"))
|
assert_equal(0, r =~ s("a"))
|
||||||
assert_equal(0, r =~ u("a"))
|
assert_equal(0, r =~ u("a"))
|
||||||
assert_equal(nil, r =~ a("\xc0\xa1"))
|
assert_equal(nil, r =~ a("\xc2\xa1"))
|
||||||
assert_equal(nil, r =~ e("\xc0\xa1"))
|
assert_equal(nil, r =~ e("\xc2\xa1"))
|
||||||
assert_equal(nil, r =~ s("\xc0\xa1"))
|
assert_equal(nil, r =~ s("\xc2\xa1"))
|
||||||
assert_equal(nil, r =~ u("\xc0\xa1"))
|
assert_equal(nil, r =~ u("\xc2\xa1"))
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_regexp_ascii
|
def test_regexp_ascii
|
||||||
assert_regexp_fixed_ascii8bit(/a/n)
|
assert_regexp_fixed_ascii8bit(/a/n)
|
||||||
assert_regexp_fixed_ascii8bit(/\xc0\xa1/n)
|
assert_regexp_fixed_ascii8bit(/\xc2\xa1/n)
|
||||||
assert_regexp_fixed_ascii8bit(eval(a(%{/\xc0\xa1/})))
|
assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/})))
|
||||||
assert_regexp_fixed_ascii8bit(eval(a(%{/\xc0\xa1/n})))
|
assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/n})))
|
||||||
assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc0\xa1/})))
|
assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc2\xa1/})))
|
||||||
|
|
||||||
[/a/n].each {|r|
|
[/a/n].each {|r|
|
||||||
assert_equal(0, r =~ a("a"))
|
assert_equal(0, r =~ a("a"))
|
||||||
assert_equal(0, r =~ e("a"))
|
assert_equal(0, r =~ e("a"))
|
||||||
assert_equal(0, r =~ s("a"))
|
assert_equal(0, r =~ s("a"))
|
||||||
assert_equal(0, r =~ u("a"))
|
assert_equal(0, r =~ u("a"))
|
||||||
assert_equal(nil, r =~ a("\xc0\xa1"))
|
assert_equal(nil, r =~ a("\xc2\xa1"))
|
||||||
assert_raise(ArgumentError) { r =~ e("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ e("\xc2\xa1") }
|
||||||
assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
||||||
assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
||||||
}
|
}
|
||||||
|
|
||||||
[/\xc0\xa1/n, eval(a(%{/\xc0\xa1/})), eval(a(%{/\xc0\xa1/n}))].each {|r|
|
[/\xc2\xa1/n, eval(a(%{/\xc2\xa1/})), eval(a(%{/\xc2\xa1/n}))].each {|r|
|
||||||
assert_equal(nil, r =~ a("a"))
|
assert_equal(nil, r =~ a("a"))
|
||||||
assert_equal(nil, r =~ e("a"))
|
assert_equal(nil, r =~ e("a"))
|
||||||
assert_equal(nil, r =~ s("a"))
|
assert_equal(nil, r =~ s("a"))
|
||||||
assert_equal(nil, r =~ u("a"))
|
assert_equal(nil, r =~ u("a"))
|
||||||
assert_equal(0, r =~ a("\xc0\xa1"))
|
assert_equal(0, r =~ a("\xc2\xa1"))
|
||||||
assert_raise(ArgumentError) { r =~ e("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ e("\xc2\xa1") }
|
||||||
assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
||||||
assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_regexp_euc
|
def test_regexp_euc
|
||||||
assert_regexp_fixed_eucjp(/a/e)
|
assert_regexp_fixed_eucjp(/a/e)
|
||||||
assert_regexp_fixed_eucjp(/\xc0\xa1/e)
|
assert_regexp_fixed_eucjp(/\xc2\xa1/e)
|
||||||
assert_regexp_fixed_eucjp(eval(e(%{/\xc0\xa1/})))
|
assert_regexp_fixed_eucjp(eval(e(%{/\xc2\xa1/})))
|
||||||
assert_regexp_fixed_eucjp(eval(e(%q{/\xc0\xa1/})))
|
assert_regexp_fixed_eucjp(eval(e(%q{/\xc2\xa1/})))
|
||||||
|
|
||||||
[/a/e].each {|r|
|
[/a/e].each {|r|
|
||||||
assert_equal(0, r =~ a("a"))
|
assert_equal(0, r =~ a("a"))
|
||||||
assert_equal(0, r =~ e("a"))
|
assert_equal(0, r =~ e("a"))
|
||||||
assert_equal(0, r =~ s("a"))
|
assert_equal(0, r =~ s("a"))
|
||||||
assert_equal(0, r =~ u("a"))
|
assert_equal(0, r =~ u("a"))
|
||||||
assert_raise(ArgumentError) { r =~ a("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
|
||||||
assert_equal(nil, r =~ e("\xc0\xa1"))
|
assert_equal(nil, r =~ e("\xc2\xa1"))
|
||||||
assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
||||||
assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
||||||
}
|
}
|
||||||
|
|
||||||
[/\xc0\xa1/e, eval(e(%{/\xc0\xa1/})), eval(e(%q{/\xc0\xa1/}))].each {|r|
|
[/\xc2\xa1/e, eval(e(%{/\xc2\xa1/})), eval(e(%q{/\xc2\xa1/}))].each {|r|
|
||||||
assert_equal(nil, r =~ a("a"))
|
assert_equal(nil, r =~ a("a"))
|
||||||
assert_equal(nil, r =~ e("a"))
|
assert_equal(nil, r =~ e("a"))
|
||||||
assert_equal(nil, r =~ s("a"))
|
assert_equal(nil, r =~ s("a"))
|
||||||
assert_equal(nil, r =~ u("a"))
|
assert_equal(nil, r =~ u("a"))
|
||||||
assert_raise(ArgumentError) { r =~ a("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
|
||||||
assert_equal(0, r =~ e("\xc0\xa1"))
|
assert_equal(0, r =~ e("\xc2\xa1"))
|
||||||
assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
|
||||||
assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
|
assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_regexp_sjis
|
def test_regexp_sjis
|
||||||
assert_regexp_fixed_sjis(/a/s)
|
assert_regexp_fixed_sjis(/a/s)
|
||||||
assert_regexp_fixed_sjis(/\xc0\xa1/s)
|
assert_regexp_fixed_sjis(/\xc2\xa1/s)
|
||||||
assert_regexp_fixed_sjis(eval(s(%{/\xc0\xa1/})))
|
assert_regexp_fixed_sjis(eval(s(%{/\xc2\xa1/})))
|
||||||
assert_regexp_fixed_sjis(eval(s(%q{/\xc0\xa1/})))
|
assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_begin_end_offset
|
def test_begin_end_offset
|
||||||
|
@ -223,10 +255,10 @@ class TestM17N < Test::Unit::TestCase
|
||||||
assert_encoding("ASCII-8BIT", Regexp.quote(s("a")).encoding)
|
assert_encoding("ASCII-8BIT", Regexp.quote(s("a")).encoding)
|
||||||
assert_encoding("ASCII-8BIT", Regexp.quote(u("a")).encoding)
|
assert_encoding("ASCII-8BIT", Regexp.quote(u("a")).encoding)
|
||||||
|
|
||||||
assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc0\xa1")).encoding)
|
assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc2\xa1")).encoding)
|
||||||
assert_encoding("EUC-JP", Regexp.quote(e("\xc0\xa1")).encoding)
|
assert_encoding("EUC-JP", Regexp.quote(e("\xc2\xa1")).encoding)
|
||||||
assert_encoding("Shift_JIS", Regexp.quote(s("\xc0\xa1")).encoding)
|
assert_encoding("Shift_JIS", Regexp.quote(s("\xc2\xa1")).encoding)
|
||||||
assert_encoding("UTF-8", Regexp.quote(u("\xc0\xa1")).encoding)
|
assert_encoding("UTF-8", Regexp.quote(u("\xc2\xa1")).encoding)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_union_0
|
def test_union_0
|
||||||
|
@ -254,10 +286,10 @@ class TestM17N < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_union_1_nonascii_string
|
def test_union_1_nonascii_string
|
||||||
assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc0\xa1")))
|
assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc2\xa1")))
|
||||||
assert_regexp_fixed_eucjp(Regexp.union(e("\xc0\xa1")))
|
assert_regexp_fixed_eucjp(Regexp.union(e("\xc2\xa1")))
|
||||||
assert_regexp_fixed_sjis(Regexp.union(s("\xc0\xa1")))
|
assert_regexp_fixed_sjis(Regexp.union(s("\xc2\xa1")))
|
||||||
assert_regexp_fixed_utf8(Regexp.union(u("\xc0\xa1")))
|
assert_regexp_fixed_utf8(Regexp.union(u("\xc2\xa1")))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_union_1_regexp
|
def test_union_1_regexp
|
||||||
|
@ -271,7 +303,7 @@ class TestM17N < Test::Unit::TestCase
|
||||||
def test_union_2
|
def test_union_2
|
||||||
ary = [
|
ary = [
|
||||||
a(""), e(""), s(""), u(""),
|
a(""), e(""), s(""), u(""),
|
||||||
a("\xc0\xa1"), e("\xc0\xa1"), s("\xc0\xa1"), u("\xc0\xa1")
|
a("\xc2\xa1"), e("\xc2\xa1"), s("\xc2\xa1"), u("\xc2\xa1")
|
||||||
]
|
]
|
||||||
ary.each {|s1|
|
ary.each {|s1|
|
||||||
ary.each {|s2|
|
ary.each {|s2|
|
||||||
|
@ -304,26 +336,26 @@ class TestM17N < Test::Unit::TestCase
|
||||||
|
|
||||||
def test_dynamic_ascii_regexp
|
def test_dynamic_ascii_regexp
|
||||||
assert_regexp_fixed_ascii8bit(/#{}/n)
|
assert_regexp_fixed_ascii8bit(/#{}/n)
|
||||||
assert_regexp_fixed_ascii8bit(/#{}\xc0\xa1/n)
|
assert_regexp_fixed_ascii8bit(/#{}\xc2\xa1/n)
|
||||||
assert_regexp_fixed_ascii8bit(/\xc0\xa1#{}/n)
|
assert_regexp_fixed_ascii8bit(/\xc2\xa1#{}/n)
|
||||||
#assert_raise(SyntaxError) { eval('/\xc0#{}\xa1/s') }
|
#assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/s') }
|
||||||
#assert_raise(SyntaxError) { s1, s2 = s('\xc0'), s('\xa1'); /#{s1}#{s2}/ }
|
#assert_raise(SyntaxError) { s1, s2 = s('\xc2'), s('\xa1'); /#{s1}#{s2}/ }
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_dynamic_eucjp_regexp
|
def test_dynamic_eucjp_regexp
|
||||||
assert_regexp_fixed_eucjp(/#{}/e)
|
assert_regexp_fixed_eucjp(/#{}/e)
|
||||||
assert_regexp_fixed_eucjp(/#{}\xc0\xa1/e)
|
assert_regexp_fixed_eucjp(/#{}\xc2\xa1/e)
|
||||||
assert_regexp_fixed_eucjp(/\xc0\xa1#{}/e)
|
assert_regexp_fixed_eucjp(/\xc2\xa1#{}/e)
|
||||||
assert_raise(RegexpError) { eval('/\xc0#{}/e') }
|
assert_raise(RegexpError) { eval('/\xc2#{}/e') }
|
||||||
assert_raise(RegexpError) { eval('/#{}\xc0/e') }
|
assert_raise(RegexpError) { eval('/#{}\xc2/e') }
|
||||||
#assert_raise(SyntaxError) { eval('/\xc0#{}\xa1/e') }
|
#assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/e') }
|
||||||
#assert_raise(SyntaxError) { s1, s2 = e('\xc0'), e('\xa1'); /#{s1}#{s2}/ }
|
#assert_raise(SyntaxError) { s1, s2 = e('\xc2'), e('\xa1'); /#{s1}#{s2}/ }
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_dynamic_sjis_regexp
|
def test_dynamic_sjis_regexp
|
||||||
assert_regexp_fixed_sjis(/#{}/s)
|
assert_regexp_fixed_sjis(/#{}/s)
|
||||||
assert_regexp_fixed_sjis(/#{}\xc0\xa1/s)
|
assert_regexp_fixed_sjis(/#{}\xc2\xa1/s)
|
||||||
assert_regexp_fixed_sjis(/\xc0\xa1#{}/s)
|
assert_regexp_fixed_sjis(/\xc2\xa1#{}/s)
|
||||||
assert_raise(RegexpError) { eval('/\x81#{}/s') }
|
assert_raise(RegexpError) { eval('/\x81#{}/s') }
|
||||||
assert_raise(RegexpError) { eval('/#{}\x81/s') }
|
assert_raise(RegexpError) { eval('/#{}\x81/s') }
|
||||||
#assert_raise(SyntaxError) { eval('/\x81#{}\xa1/s') }
|
#assert_raise(SyntaxError) { eval('/\x81#{}\xa1/s') }
|
||||||
|
@ -332,49 +364,49 @@ class TestM17N < Test::Unit::TestCase
|
||||||
|
|
||||||
def test_dynamic_utf8_regexp
|
def test_dynamic_utf8_regexp
|
||||||
assert_regexp_fixed_utf8(/#{}/u)
|
assert_regexp_fixed_utf8(/#{}/u)
|
||||||
assert_regexp_fixed_utf8(/#{}\xc0\xa1/u)
|
assert_regexp_fixed_utf8(/#{}\xc2\xa1/u)
|
||||||
assert_regexp_fixed_utf8(/\xc0\xa1#{}/u)
|
assert_regexp_fixed_utf8(/\xc2\xa1#{}/u)
|
||||||
assert_raise(RegexpError) { eval('/\xc0#{}/u') }
|
assert_raise(RegexpError) { eval('/\xc2#{}/u') }
|
||||||
assert_raise(RegexpError) { eval('/#{}\xc0/u') }
|
assert_raise(RegexpError) { eval('/#{}\xc2/u') }
|
||||||
#assert_raise(SyntaxError) { eval('/\xc0#{}\xa1/u') }
|
#assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/u') }
|
||||||
#assert_raise(SyntaxError) { s1, s2 = u('\xc0'), u('\xa1'); /#{s1}#{s2}/ }
|
#assert_raise(SyntaxError) { s1, s2 = u('\xc2'), u('\xa1'); /#{s1}#{s2}/ }
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_regexp_mixed_unicode
|
def test_regexp_mixed_unicode
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\xc0\xa0\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\xc2\xa0\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\xc0\xa0\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\xc2\xa0\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\xc0\xa0\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\xc2\xa0\\u{6666}/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\xc0\xa0\\u{6666}/})) }
|
assert_nothing_raised { eval(u(%{/\xc2\xa0\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc0\xa0/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc2\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc0\xa0/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc2\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc0\xa0/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc2\xa0/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\\u{6666}\xc0\xa0/})) }
|
assert_nothing_raised { eval(u(%{/\\u{6666}\xc2\xa0/})) }
|
||||||
|
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\\xc0\\xa0\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa0\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\\xc0\\xa0\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa0\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\\xc0\\xa0\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa0\\u{6666}/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\\xc0\\xa0\\u{6666}/})) }
|
assert_nothing_raised { eval(u(%{/\\xc2\\xa0\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc0\\xa0/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc2\\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc0\\xa0/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc2\\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc0\\xa0/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc2\\xa0/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\\u{6666}\\xc0\\xa0/})) }
|
assert_nothing_raised { eval(u(%{/\\u{6666}\\xc2\\xa0/})) }
|
||||||
|
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\xc0\xa0#{}\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\xc2\xa0#{}\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\xc0\xa0#{}\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\xc2\xa0#{}\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\xc0\xa0#{}\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\xc2\xa0#{}\\u{6666}/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\xc0\xa0#{}\\u{6666}/})) }
|
assert_nothing_raised { eval(u(%{/\xc2\xa0#{}\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\xc0\xa0/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\xc2\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\xc0\xa0/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\xc2\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\xc0\xa0/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\xc2\xa0/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\\u{6666}#{}\xc0\xa0/})) }
|
assert_nothing_raised { eval(u(%{/\\u{6666}#{}\xc2\xa0/})) }
|
||||||
|
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\\xc0\\xa0#{}\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\\xc0\\xa0#{}\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\\xc0\\xa0#{}\\u{6666}/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\\xc0\\xa0#{}\\u{6666}/})) }
|
assert_nothing_raised { eval(u(%{/\\xc2\\xa0#{}\\u{6666}/})) }
|
||||||
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\\xc0\\xa0/})) }
|
assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\\xc0\\xa0/})) }
|
assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
||||||
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\\xc0\\xa0/})) }
|
assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
||||||
assert_nothing_raised { eval(u(%{/\\u{6666}#{}\\xc0\\xa0/})) }
|
assert_nothing_raised { eval(u(%{/\\u{6666}#{}\\xc2\\xa0/})) }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Загрузка…
Ссылка в новой задаче