diff --git a/ChangeLog b/ChangeLog index 32c869c77c..d2a6dc56a7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Tue Dec 25 19:01:04 2007 Nobuyoshi Nakada + + * encoding.h (rb_enc_mbc_to_codepoint): wrapper for + ONIGENC_MBC_TO_CODE(). + + * string.c (rb_str_succ): deal with invalid sequence as binary. + Tue Dec 25 18:40:46 2007 Koichi Sasada * iseq.c: all methods need $SAFE < 1. diff --git a/encoding.c b/encoding.c index ee0fdadb39..ea00deb9e2 100644 --- a/encoding.c +++ b/encoding.c @@ -660,14 +660,15 @@ rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc) return c; } -int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +int +rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) { int r; if (e <= p) rb_raise(rb_eArgError, "empty string"); r = rb_enc_precise_mbclen(p, e, enc); if (MBCLEN_CHARFOUND(r)) - return ONIGENC_MBC_TO_CODE(enc,(UChar*)p,(UChar*)e); + return rb_enc_mbc_to_codepoint(p, e, enc); else rb_raise(rb_eArgError, "invalid mbstring sequence"); } diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 49df3b22a8..1b1e3d7c59 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -85,18 +85,19 @@ int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); /* -> code or raise exception */ int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); +#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) /* -> codelen or raise exception */ int rb_enc_codelen(int code, rb_encoding *enc); /* code,ptr,encoding -> write buf */ -#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)buf) +#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf)) /* ptr, ptr, encoding -> prev_char */ -#define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)s,(UChar*)p) +#define rb_enc_prev_char(s,p,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p)) /* ptr, ptr, encoding -> next_char */ -#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)s,(UChar*)p) -#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)s,(UChar*)p) +#define rb_enc_left_char_head(s,p,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p)) +#define rb_enc_right_char_head(s,p,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p)) #define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) #define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) @@ -110,9 +111,11 @@ int rb_enc_codelen(int code, rb_encoding *enc); #define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1) +int rb_enc_casefold(char *to, const char *p, const char *e, rb_encoding *enc); int rb_enc_toupper(int c, rb_encoding *enc); int rb_enc_tolower(int c, rb_encoding *enc); ID rb_intern3(const char*, long, rb_encoding*); +ID rb_interned_id_p(const char *, long, rb_encoding *); int rb_enc_symname_p(const char*, rb_encoding*); int rb_enc_str_coderange(VALUE); int rb_enc_str_asciionly_p(VALUE); diff --git a/string.c b/string.c index aaf14468af..9dc6a13109 100644 --- a/string.c +++ b/string.c @@ -1876,7 +1876,8 @@ rb_str_succ(VALUE orig) s = e = sbeg + RSTRING_LEN(str); while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) { - cc = rb_enc_codepoint(s, e, enc); + if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue; + cc = rb_enc_mbc_to_codepoint(s, e, enc); if (rb_enc_isalnum(cc, enc)) { if (rb_enc_isascii(cc, enc)) { if ((c = succ_char(s)) == 0) break; @@ -1892,7 +1893,8 @@ rb_str_succ(VALUE orig) s = e; while ((s = rb_enc_prev_char(sbeg, s, enc)) != 0) { int limit = 256; - cc = rb_enc_codepoint(s, e, enc); + if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue; + cc = rb_enc_mbc_to_codepoint(s, e, enc); while ((l = rb_enc_mbcput(++cc, carry, enc)) < 0 && --limit); if (l > 0) { if (l == (o = e - s)) goto overlay;