зеркало из https://github.com/github/ruby.git
* enc/trans/make_transdb.rb: add for make transdb.h.
* dmytranscode.c: add for miniruby. * enc/gbk.c (gbk_left_adjust_char_head, gbk_is_allowed_reverse_match): fix odd regexp match. [ruby-dev:33502] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15321 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
7a8c02cd47
Коммит
b9821b02a0
|
@ -1,3 +1,12 @@
|
|||
Tue Jan 29 20:37:36 2008 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* enc/trans/make_transdb.rb: add for make transdb.h.
|
||||
|
||||
* dmytranscode.c: add for miniruby.
|
||||
|
||||
* enc/gbk.c (gbk_left_adjust_char_head, gbk_is_allowed_reverse_match):
|
||||
fix odd regexp match. [ruby-dev:33502]
|
||||
|
||||
Tue Jan 29 20:17:06 2008 NAKAMURA Usaku <usa@ruby-lang.org>
|
||||
|
||||
* {bcc32,win32}/Makefile.sub (MINIOBJS): add dmytranscode.$(OBJEXT).
|
||||
|
|
47
enc/gbk.c
47
enc/gbk.c
|
@ -29,7 +29,7 @@
|
|||
|
||||
#include "regenc.h"
|
||||
|
||||
static const int EncLen_gbk[] = {
|
||||
static const int EncLen_GBK[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
@ -48,6 +48,28 @@ static const int EncLen_gbk[] = {
|
|||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static const char GBK_CAN_BE_TRAIL_TABLE[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
|
||||
};
|
||||
|
||||
#define GBK_ISMB_FIRST(byte) (EncLen_GBK[byte] > 1)
|
||||
#define GBK_ISMB_TRAIL(byte) GBK_CAN_BE_TRAIL_TABLE[(byte)]
|
||||
|
||||
typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t;
|
||||
#define A ACCEPT
|
||||
#define F FAILURE
|
||||
|
@ -101,7 +123,7 @@ gbk_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
|
|||
return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
|
||||
ONIGENC_CONSTRUCT_MBCLEN_INVALID()
|
||||
if (s < 0) RETURN(1);
|
||||
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_gbk[firstbyte]-1);
|
||||
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_GBK[firstbyte]-1);
|
||||
s = trans[s][*p++];
|
||||
RETURN(2);
|
||||
#undef RETURN
|
||||
|
@ -142,21 +164,23 @@ gbk_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
|
|||
return onigenc_mb2_is_code_ctype(enc, code, ctype);
|
||||
}
|
||||
|
||||
#define gbk_islead(c) ((c) < 0xa1 || (c) == 0xff)
|
||||
|
||||
static UChar*
|
||||
gbk_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
|
||||
{
|
||||
/* Assumed in this encoding,
|
||||
mb-trail bytes don't mix with single bytes.
|
||||
*/
|
||||
const UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
p = s;
|
||||
|
||||
while (!gbk_islead(*p) && p > start) p--;
|
||||
if (GBK_ISMB_TRAIL(*p)) {
|
||||
while (p > start) {
|
||||
if (! GBK_ISMB_FIRST(*--p)) {
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
len = enclen(enc, p, s);
|
||||
if (p + len > s) return (UChar* )p;
|
||||
p += len;
|
||||
|
@ -167,13 +191,12 @@ static int
|
|||
gbk_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
const UChar c = *s;
|
||||
if (c <= 0x7e) return TRUE;
|
||||
else return FALSE;
|
||||
return (GBK_ISMB_TRAIL(c) ? FALSE : TRUE);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(gbk, gbk) = {
|
||||
OnigEncodingDefine(gbk, GBK) = {
|
||||
gbk_mbc_enc_len,
|
||||
"GBK", /* name */
|
||||
"GBK", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
|
|
Загрузка…
Ссылка в новой задаче