* include/ruby/oniguruma.h: precise mbclen API redesigned to avoid

inline functions.
  (onigenc_mbclen_charfound): removed.
  (onigenc_mbclen_needmore): removed.
  (onigenc_mbclen_recover): removed.
  (ONIGENC_MBCLEN_CHARFOUND): removed.
  (ONIGENC_MBCLEN_CHARFOUND_P): defined.
  (ONIGENC_MBCLEN_CHARFOUND_LEN): defined.
  (ONIGENC_MBCLEN_INVALID): removed.
  (ONIGENC_MBCLEN_INVALID_P): defined.
  (ONIGENC_MBCLEN_NEEDMORE): removed.
  (ONIGENC_MBCLEN_NEEDMORE_P): defined.
  (ONIGENC_MBCLEN_NEEDMORE_LEN): defined.
  (ONIGENC_MBC_ENC_LEN): use onigenc_mbclen_approximate.

* regenc.c (onigenc_mbclen_approximate): defined.

* include/ruby/encoding.h (MBCLEN_CHARFOUND): removed.
  (MBCLEN_INVALID): removed.
  (MBCLEN_NEEDMORE): removed.
  (MBCLEN_CHARFOUND_P): defined.
  (MBCLEN_INVALID_P): defined.
  (MBCLEN_NEEDMORE_P): defined.
  (MBCLEN_CHARFOUND_LEN): defined.
  (MBCLEN_NEEDMORE_LEN): defined.

* encoding.c: use new API.

* re.c: ditto.

* string.c: ditto.

* parse.y: ditto.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15280 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-01-27 14:27:07 +00:00
Родитель 526ab1f0d1
Коммит fc208c1bd5
9 изменённых файлов: 91 добавлений и 49 удалений

Просмотреть файл

@ -1,3 +1,39 @@
Sun Jan 27 23:20:54 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/oniguruma.h: precise mbclen API redesigned to avoid
inline functions.
(onigenc_mbclen_charfound): removed.
(onigenc_mbclen_needmore): removed.
(onigenc_mbclen_recover): removed.
(ONIGENC_MBCLEN_CHARFOUND): removed.
(ONIGENC_MBCLEN_CHARFOUND_P): defined.
(ONIGENC_MBCLEN_CHARFOUND_LEN): defined.
(ONIGENC_MBCLEN_INVALID): removed.
(ONIGENC_MBCLEN_INVALID_P): defined.
(ONIGENC_MBCLEN_NEEDMORE): removed.
(ONIGENC_MBCLEN_NEEDMORE_P): defined.
(ONIGENC_MBCLEN_NEEDMORE_LEN): defined.
(ONIGENC_MBC_ENC_LEN): use onigenc_mbclen_approximate.
* regenc.c (onigenc_mbclen_approximate): defined.
* include/ruby/encoding.h (MBCLEN_CHARFOUND): removed.
(MBCLEN_INVALID): removed.
(MBCLEN_NEEDMORE): removed.
(MBCLEN_CHARFOUND_P): defined.
(MBCLEN_INVALID_P): defined.
(MBCLEN_NEEDMORE_P): defined.
(MBCLEN_CHARFOUND_LEN): defined.
(MBCLEN_NEEDMORE_LEN): defined.
* encoding.c: use new API.
* re.c: ditto.
* string.c: ditto.
* parse.y: ditto.
Sun Jan 27 22:55:27 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* parse.y (value_expr_gen): reverted r12880. [ruby-dev:33388]

Просмотреть файл

@ -749,9 +749,8 @@ int
rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
{
int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
n = MBCLEN_CHARFOUND(n);
if (0 < n && n <= e-p)
return n;
if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
return MBCLEN_CHARFOUND_LEN(n);
else
return 1;
}
@ -782,7 +781,7 @@ rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
return c;
}
l = rb_enc_precise_mbclen(p, e, enc);
if (!MBCLEN_CHARFOUND(l))
if (!MBCLEN_CHARFOUND_P(l))
return -1;
c = rb_enc_codepoint(p, e, enc);
if (!rb_enc_isascii(c, enc))
@ -798,7 +797,7 @@ rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
if (e <= p)
rb_raise(rb_eArgError, "empty string");
r = rb_enc_precise_mbclen(p, e, enc);
if (MBCLEN_CHARFOUND(r))
if (MBCLEN_CHARFOUND_P(r))
return rb_enc_mbc_to_codepoint(p, e, enc);
else
rb_raise(rb_eArgError, "invalid mbstring sequence");

Просмотреть файл

@ -110,9 +110,11 @@ int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
/* -> chlen, invalid or needmore */
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc);
#define MBCLEN_CHARFOUND(ret) ONIGENC_MBCLEN_CHARFOUND(ret)
#define MBCLEN_INVALID(ret) ONIGENC_MBCLEN_INVALID(ret)
#define MBCLEN_NEEDMORE(ret) ONIGENC_MBCLEN_NEEDMORE(ret)
#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret)
#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret)
#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret)
#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret)
#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret)
/* -> 0x00..0x7f, -1 */
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);

Просмотреть файл

@ -229,32 +229,23 @@ ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
static inline int onigenc_mbclen_charfound(int r) { return 0 < r ? r : 0; }
static inline int onigenc_mbclen_needmore(int r) { return r < -1 ? -1 - r : 0; }
#define ONIGENC_MBCLEN_CHARFOUND(r) onigenc_mbclen_charfound(r)
#define ONIGENC_MBCLEN_INVALID(r) ((r) == -1)
#define ONIGENC_MBCLEN_NEEDMORE(r) onigenc_mbclen_needmore(r)
#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
static inline int onigenc_mbclen_recover(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
{
int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
int r;
if (ONIGENC_MBCLEN_INVALID(ret))
return 1;
else if ((r = ONIGENC_MBCLEN_NEEDMORE(ret)))
return e-p+r;
else
return ONIGENC_MBCLEN_CHARFOUND(ret);
}
ONIG_EXTERN
int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc));
#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_recover(p,e,enc)
#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)

7
io.c
Просмотреть файл

@ -2289,12 +2289,13 @@ rb_io_getc(VALUE io)
return Qnil;
}
r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
if ((n = MBCLEN_CHARFOUND(r)) != 0 && n <= fptr->rbuf_len) {
if (MBCLEN_CHARFOUND_P(r) &&
(n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) {
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n);
fptr->rbuf_off += n;
fptr->rbuf_len -= n;
}
else if (MBCLEN_NEEDMORE(r)) {
else if (MBCLEN_NEEDMORE_P(r)) {
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len);
fptr->rbuf_len = 0;
getc_needmore:
@ -2303,7 +2304,7 @@ getc_needmore:
fptr->rbuf_off++;
fptr->rbuf_len--;
r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc);
if (MBCLEN_NEEDMORE(r)) {
if (MBCLEN_NEEDMORE_P(r)) {
goto getc_needmore;
}
}

Просмотреть файл

@ -5299,7 +5299,7 @@ static int
parser_tokadd_mbchar(struct parser_params *parser, int c)
{
int len = parser_precise_mbclen();
if (!MBCLEN_CHARFOUND(len)) {
if (!MBCLEN_CHARFOUND_P(len)) {
compile_error(PARSER_ARG "invalid multibyte char");
return -1;
}

7
re.c
Просмотреть файл

@ -1673,7 +1673,7 @@ unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc,
chbuf[chlen++] = byte;
while (chlen < chmaxlen &&
MBCLEN_NEEDMORE(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
byte = read_escaped_byte(&p, end, err);
if (byte == -1) {
return -1;
@ -1682,7 +1682,7 @@ unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc,
}
l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
if (MBCLEN_INVALID(l)) {
if (MBCLEN_INVALID_P(l)) {
strcpy(err, "invalid multibyte escape");
return -1;
}
@ -1812,10 +1812,11 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
while (p < end) {
int chlen = rb_enc_precise_mbclen(p, end, enc);
if (!MBCLEN_CHARFOUND(chlen)) {
if (!MBCLEN_CHARFOUND_P(chlen)) {
strcpy(err, "invalid multibyte character");
return -1;
}
chlen = MBCLEN_CHARFOUND_LEN(chlen);
if (1 < chlen || (*p & 0x80)) {
rb_str_buf_cat(buf, p, chlen);
p += chlen;

Просмотреть файл

@ -50,6 +50,17 @@ onigenc_set_default_encoding(OnigEncoding enc)
return 0;
}
extern int
onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
{
int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
return e-p+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
return 1;
}
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{

Просмотреть файл

@ -170,11 +170,10 @@ coderange_scan(const char *p, long len, rb_encoding *enc)
}
while (p < e) {
int ret = rb_enc_precise_mbclen(p, e, enc);
int len = MBCLEN_CHARFOUND(ret);
if (!len) {
if (!MBCLEN_CHARFOUND_P(ret)) {
return ENC_CODERANGE_BROKEN;
}
p += len;
p += MBCLEN_CHARFOUND_LEN(ret);
if (p < e) {
p = search_nonascii(p, e);
if (!p) {
@ -190,12 +189,11 @@ coderange_scan(const char *p, long len, rb_encoding *enc)
while (p < e) {
int ret = rb_enc_precise_mbclen(p, e, enc);
int len = MBCLEN_CHARFOUND(ret);
if (!len) {
if (!MBCLEN_CHARFOUND_P(ret)) {
return ENC_CODERANGE_BROKEN;
}
p += len;
p += MBCLEN_CHARFOUND_LEN(ret);
}
if (e < p) {
return ENC_CODERANGE_BROKEN;
@ -2017,7 +2015,8 @@ enc_succ_char(char *p, int len, rb_encoding *enc)
return NEIGHBOR_WRAPPED;
++((unsigned char*)p)[i];
l = rb_enc_precise_mbclen(p, p+len, enc);
if (MBCLEN_CHARFOUND(l)) {
if (MBCLEN_CHARFOUND_P(l)) {
l = MBCLEN_CHARFOUND_LEN(l);
if (l == len) {
return NEIGHBOR_FOUND;
}
@ -2025,11 +2024,11 @@ enc_succ_char(char *p, int len, rb_encoding *enc)
memset(p+l, 0xff, len-l);
}
}
if (MBCLEN_INVALID(l) && i < len-1) {
if (MBCLEN_INVALID_P(l) && i < len-1) {
int len2, l2;
for (len2 = len-1; 0 < len2; len2--) {
l2 = rb_enc_precise_mbclen(p, p+len2, enc);
if (!MBCLEN_INVALID(l2))
if (!MBCLEN_INVALID_P(l2))
break;
}
memset(p+len2+1, 0xff, len-(len2+1));
@ -2048,7 +2047,8 @@ enc_pred_char(char *p, int len, rb_encoding *enc)
return NEIGHBOR_WRAPPED;
--((unsigned char*)p)[i];
l = rb_enc_precise_mbclen(p, p+len, enc);
if (MBCLEN_CHARFOUND(l)) {
if (MBCLEN_CHARFOUND_P(l)) {
l = MBCLEN_CHARFOUND_LEN(l);
if (l == len) {
return NEIGHBOR_FOUND;
}
@ -2056,11 +2056,11 @@ enc_pred_char(char *p, int len, rb_encoding *enc)
memset(p+l, 0, len-l);
}
}
if (MBCLEN_INVALID(l) && i < len-1) {
if (MBCLEN_INVALID_P(l) && i < len-1) {
int len2, l2;
for (len2 = len-1; 0 < len2; len2--) {
l2 = rb_enc_precise_mbclen(p, p+len2, enc);
if (!MBCLEN_INVALID(l2))
if (!MBCLEN_INVALID_P(l2))
break;
}
memset(p+len2+1, 0, len-(len2+1));
@ -3300,11 +3300,12 @@ rb_str_inspect(VALUE str)
int cc;
n = rb_enc_precise_mbclen(p, pend, enc);
if (!MBCLEN_CHARFOUND(n)) {
if (!MBCLEN_CHARFOUND_P(n)) {
p++;
n = 1;
goto escape_codepoint;
}
n = MBCLEN_CHARFOUND_LEN(n);
c = rb_enc_codepoint(p, pend, enc);
n = rb_enc_codelen(c, enc);
@ -3313,7 +3314,7 @@ rb_str_inspect(VALUE str)
if (c == '"'|| c == '\\' ||
(c == '#' &&
p < pend &&
MBCLEN_CHARFOUND(rb_enc_precise_mbclen(p,pend,enc)) &&
MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
(cc = rb_enc_codepoint(p,pend,enc),
(cc == '$' || cc == '@' || cc == '{')))) {
prefix_escape(result, c, enc);