* encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()

and rb_enc_codelen() in one function to reduce calls.

* encoding.c (rb_enc_codepoint): compatibility function.

* sprintf.c (rb_str_format): use rb_enc_codepoint_len().

* string.c (rb_str_inspect, rb_str_upcase_bang,
  rb_str_downcase_bang, rb_str_capitalize_bang,
  rb_str_swapcase_bang, trnext, tr_trans, rb_str_delete_bang,
  rb_str_squeeze_bang, rb_str_count, rb_str_split_m,
  rb_str_each_line, rb_str_each_codepoint, rb_str_lstrip_bang,
  sym_printable): ditto.

* transcode.c (make_econv_exception): use rb_enc_mbc_to_codepoint()

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@23493 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2009-05-19 16:59:22 +00:00
Родитель 2510c468ff
Коммит 91e5ba1cb8
6 изменённых файлов: 81 добавлений и 46 удалений

Просмотреть файл

@ -1,3 +1,21 @@
Wed May 20 00:13:38 2009 Yukihiro Matsumoto <matz@ruby-lang.org>
* encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
and rb_enc_codelen() in one function to reduce calls.
* encoding.c (rb_enc_codepoint): compatibility function.
* sprintf.c (rb_str_format): use rb_enc_codepoint_len().
* string.c (rb_str_inspect, rb_str_upcase_bang,
rb_str_downcase_bang, rb_str_capitalize_bang,
rb_str_swapcase_bang, trnext, tr_trans, rb_str_delete_bang,
rb_str_squeeze_bang, rb_str_count, rb_str_split_m,
rb_str_each_line, rb_str_each_codepoint, rb_str_lstrip_bang,
sym_printable): ditto.
* transcode.c (make_econv_exception): use rb_enc_mbc_to_codepoint()
Wed May 20 00:05:52 2009 Yukihiro Matsumoto <matz@ruby-lang.org> Wed May 20 00:05:52 2009 Yukihiro Matsumoto <matz@ruby-lang.org>
* vm_method.c (rb_attr): should preserve encoding info. * vm_method.c (rb_attr): should preserve encoding info.

Просмотреть файл

@ -774,18 +774,27 @@ rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
} }
unsigned int unsigned int
rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
{ {
int r; int r;
if (e <= p) if (e <= p)
rb_raise(rb_eArgError, "empty string"); rb_raise(rb_eArgError, "empty string");
r = rb_enc_precise_mbclen(p, e, enc); r = rb_enc_precise_mbclen(p, e, enc);
if (MBCLEN_CHARFOUND_P(r)) if (MBCLEN_CHARFOUND_P(r)) {
if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
return rb_enc_mbc_to_codepoint(p, e, enc); return rb_enc_mbc_to_codepoint(p, e, enc);
}
else else
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
} }
#undef rb_enc_codepoint
unsigned int
rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
{
return rb_enc_codepoint_len(p, e, 0, enc);
}
int int
rb_enc_codelen(int c, rb_encoding *enc) rb_enc_codelen(int c, rb_encoding *enc)
{ {

Просмотреть файл

@ -123,8 +123,14 @@ int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc);
/* -> 0x00..0x7f, -1 */ /* -> 0x00..0x7f, -1 */
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
/* -> code or raise exception */
/* -> code (and len) or raise exception */
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc);
/* prototype for obsolete function */
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
/* overriding macro */
#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc))
#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) #define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e))
/* -> codelen>0 or raise exception */ /* -> codelen>0 or raise exception */

Просмотреть файл

@ -625,12 +625,12 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
rb_raise(rb_eArgError, "%%c requires a character"); rb_raise(rb_eArgError, "%%c requires a character");
} }
c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc); c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
} }
else { else {
c = NUM2INT(val); c = NUM2INT(val);
n = rb_enc_codelen(c, enc);
} }
n = rb_enc_codelen(c, enc);
if (n <= 0) { if (n <= 0) {
rb_raise(rb_eArgError, "invalid character"); rb_raise(rb_eArgError, "invalid character");
} }

Просмотреть файл

@ -4168,9 +4168,7 @@ rb_str_inspect(VALUE str)
} }
n = MBCLEN_CHARFOUND_LEN(n); n = MBCLEN_CHARFOUND_LEN(n);
c = rb_enc_codepoint(p, pend, enc); c = rb_enc_codepoint_len(p, pend, &n, enc);
n = rb_enc_codelen(c, enc);
p += n; p += n;
if (c == '"'|| c == '\\' || if (c == '"'|| c == '\\' ||
(c == '#' && (c == '#' &&
@ -4273,7 +4271,7 @@ rb_str_dump(VALUE str)
char buf[32]; char buf[32];
int n = rb_enc_precise_mbclen(p-1, pend, enc); int n = rb_enc_precise_mbclen(p-1, pend, enc);
if (MBCLEN_CHARFOUND_P(n)) { if (MBCLEN_CHARFOUND_P(n)) {
int cc = rb_enc_codepoint(p-1, pend, enc); int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
sprintf(buf, "%x", cc); sprintf(buf, "%x", cc);
len += strlen(buf)+4; len += strlen(buf)+4;
p += MBCLEN_CHARFOUND_LEN(n)-1; p += MBCLEN_CHARFOUND_LEN(n)-1;
@ -4346,7 +4344,7 @@ rb_str_dump(VALUE str)
if (u8) { if (u8) {
int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1; int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
if (MBCLEN_CHARFOUND_P(n)) { if (MBCLEN_CHARFOUND_P(n)) {
int cc = rb_enc_codepoint(p-1, pend, enc); int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
p += n; p += n;
snprintf(q, qend-q, "u{%x}", cc); snprintf(q, qend-q, "u{%x}", cc);
q += strlen(q); q += strlen(q);
@ -4395,6 +4393,7 @@ rb_str_upcase_bang(VALUE str)
rb_encoding *enc; rb_encoding *enc;
char *s, *send; char *s, *send;
int modify = 0; int modify = 0;
int n;
str_modify_keep_cr(str); str_modify_keep_cr(str);
enc = STR_ENC_GET(str); enc = STR_ENC_GET(str);
@ -4425,13 +4424,13 @@ rb_str_upcase_bang(VALUE str)
s++; s++;
} }
else { else {
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint_len(s, send, &n, enc);
if (rb_enc_islower(c, enc)) { if (rb_enc_islower(c, enc)) {
/* assuming toupper returns codepoint with same size */ /* assuming toupper returns codepoint with same size */
rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
modify = 1; modify = 1;
} }
s += rb_enc_codelen(c, enc); s += n;
} }
} }
} }
@ -4498,6 +4497,7 @@ rb_str_downcase_bang(VALUE str)
while (s < send) { while (s < send) {
unsigned int c; unsigned int c;
int n;
if (ascompat && (c = *(unsigned char*)s) < 0x80) { if (ascompat && (c = *(unsigned char*)s) < 0x80) {
if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
@ -4507,13 +4507,13 @@ rb_str_downcase_bang(VALUE str)
s++; s++;
} }
else { else {
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint_len(s, send, &n, enc);
if (rb_enc_isupper(c, enc)) { if (rb_enc_isupper(c, enc)) {
/* assuming toupper returns codepoint with same size */ /* assuming toupper returns codepoint with same size */
rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc); rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
modify = 1; modify = 1;
} }
s += rb_enc_codelen(c, enc); s += n;
} }
} }
} }
@ -4565,6 +4565,7 @@ rb_str_capitalize_bang(VALUE str)
char *s, *send; char *s, *send;
int modify = 0; int modify = 0;
unsigned int c; unsigned int c;
int n;
str_modify_keep_cr(str); str_modify_keep_cr(str);
enc = STR_ENC_GET(str); enc = STR_ENC_GET(str);
@ -4572,19 +4573,19 @@ rb_str_capitalize_bang(VALUE str)
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
s = RSTRING_PTR(str); send = RSTRING_END(str); s = RSTRING_PTR(str); send = RSTRING_END(str);
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint_len(s, send, &n, enc);
if (rb_enc_islower(c, enc)) { if (rb_enc_islower(c, enc)) {
rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
modify = 1; modify = 1;
} }
s += rb_enc_codelen(c, enc); s += n;
while (s < send) { while (s < send) {
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint_len(s, send, &n, enc);
if (rb_enc_isupper(c, enc)) { if (rb_enc_isupper(c, enc)) {
rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc); rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
modify = 1; modify = 1;
} }
s += rb_enc_codelen(c, enc); s += n;
} }
if (modify) return str; if (modify) return str;
@ -4629,13 +4630,14 @@ rb_str_swapcase_bang(VALUE str)
rb_encoding *enc; rb_encoding *enc;
char *s, *send; char *s, *send;
int modify = 0; int modify = 0;
int n;
str_modify_keep_cr(str); str_modify_keep_cr(str);
enc = STR_ENC_GET(str); enc = STR_ENC_GET(str);
rb_str_check_dummy_enc(enc); rb_str_check_dummy_enc(enc);
s = RSTRING_PTR(str); send = RSTRING_END(str); s = RSTRING_PTR(str); send = RSTRING_END(str);
while (s < send) { while (s < send) {
unsigned int c = rb_enc_codepoint(s, send, enc); unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
if (rb_enc_isupper(c, enc)) { if (rb_enc_isupper(c, enc)) {
/* assuming toupper returns codepoint with same size */ /* assuming toupper returns codepoint with same size */
@ -4647,7 +4649,7 @@ rb_str_swapcase_bang(VALUE str)
rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
modify = 1; modify = 1;
} }
s += rb_enc_mbclen(s, send, enc); s += n;
} }
if (modify) return str; if (modify) return str;
@ -4686,19 +4688,21 @@ struct tr {
static unsigned int static unsigned int
trnext(struct tr *t, rb_encoding *enc) trnext(struct tr *t, rb_encoding *enc)
{ {
int n;
for (;;) { for (;;) {
if (!t->gen) { if (!t->gen) {
if (t->p == t->pend) return -1; if (t->p == t->pend) return -1;
if (t->p < t->pend - 1 && *t->p == '\\') { if (t->p < t->pend - 1 && *t->p == '\\') {
t->p++; t->p++;
} }
t->now = rb_enc_codepoint(t->p, t->pend, enc); t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
t->p += rb_enc_codelen(t->now, enc); t->p += n;
if (t->p < t->pend - 1 && *t->p == '-') { if (t->p < t->pend - 1 && *t->p == '-') {
t->p++; t->p++;
if (t->p < t->pend) { if (t->p < t->pend) {
unsigned int c = rb_enc_codepoint(t->p, t->pend, enc); unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
t->p += rb_enc_codelen(c, enc); t->p += n;
if (t->now > c) continue; if (t->now > c) continue;
t->gen = 1; t->gen = 1;
t->max = c; t->max = c;
@ -4819,8 +4823,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
while (s < send) { while (s < send) {
int may_modify = 0; int may_modify = 0;
c0 = c = rb_enc_codepoint(s, send, e1);
clen = rb_enc_codelen(c, e1); c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
s += clen; s += clen;
@ -4897,8 +4901,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
while (s < send) { while (s < send) {
int may_modify = 0; int may_modify = 0;
c0 = c = rb_enc_codepoint(s, send, e1); c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
clen = rb_enc_codelen(c, e1);
tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
if (c < 256) { if (c < 256) {
@ -5125,8 +5128,7 @@ rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
s++; s++;
} }
else { else {
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint_len(s, send, &clen, enc);
clen = rb_enc_codelen(c, enc);
if (tr_find(c, squeez, del, nodel)) { if (tr_find(c, squeez, del, nodel)) {
modify = 1; modify = 1;
@ -5231,8 +5233,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
s++; s++;
} }
else { else {
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint_len(s, send, &clen, enc);
clen = rb_enc_codelen(c, enc);
if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
if (t != s) rb_enc_mbcput(c, t, enc); if (t != s) rb_enc_mbcput(c, t, enc);
@ -5371,8 +5372,7 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
s++; s++;
} }
else { else {
c = rb_enc_codepoint(s, send, enc); c = rb_enc_codepoint_len(s, send, &clen, enc);
clen = rb_enc_codelen(c, enc);
if (tr_find(c, table, del, nodel)) { if (tr_find(c, table, del, nodel)) {
i++; i++;
} }
@ -5542,8 +5542,10 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
} }
else { else {
while (ptr < eptr) { while (ptr < eptr) {
c = rb_enc_codepoint(ptr, eptr, enc); int n;
ptr += rb_enc_mbclen(ptr, eptr, enc);
c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
ptr += n;
if (skip) { if (skip) {
if (rb_enc_isspace(c, enc)) { if (rb_enc_isspace(c, enc)) {
beg = ptr - bptr; beg = ptr - bptr;
@ -5773,13 +5775,12 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str)
} }
while (p < pend) { while (p < pend) {
unsigned int c = rb_enc_codepoint(p, pend, enc); unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
again: again:
n = rb_enc_codelen(c, enc);
if (rslen == 0 && c == newline) { if (rslen == 0 && c == newline) {
p += n; p += n;
if (p < pend && (c = rb_enc_codepoint(p, pend, enc)) != newline) { if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
goto again; goto again;
} }
while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) { while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
@ -5940,8 +5941,7 @@ rb_str_each_codepoint(VALUE str)
end = RSTRING_END(str); end = RSTRING_END(str);
enc = STR_ENC_GET(str); enc = STR_ENC_GET(str);
while (ptr < end) { while (ptr < end) {
c = rb_enc_codepoint(ptr, end, enc); c = rb_enc_codepoint_len(ptr, end, &n, enc);
n = rb_enc_codelen(c, enc);
rb_yield(UINT2NUM(c)); rb_yield(UINT2NUM(c));
ptr += n; ptr += n;
} }
@ -6180,10 +6180,11 @@ rb_str_lstrip_bang(VALUE str)
e = t = RSTRING_END(str); e = t = RSTRING_END(str);
/* remove spaces at head */ /* remove spaces at head */
while (s < e) { while (s < e) {
unsigned int cc = rb_enc_codepoint(s, e, enc); int n;
unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
if (!rb_enc_isspace(cc, enc)) break; if (!rb_enc_isspace(cc, enc)) break;
s += rb_enc_codelen(cc, enc); s += n;
} }
if (s > RSTRING_PTR(str)) { if (s > RSTRING_PTR(str)) {
@ -7057,8 +7058,9 @@ static int
sym_printable(const char *s, const char *send, rb_encoding *enc) sym_printable(const char *s, const char *send, rb_encoding *enc)
{ {
while (s < send) { while (s < send) {
int c = rb_enc_codepoint(s, send, enc); int n;
int n = rb_enc_codelen(c, enc); int c = rb_enc_codepoint_len(s, send, &n, enc);
if (!rb_enc_isprint(c, enc)) return Qfalse; if (!rb_enc_isprint(c, enc)) return Qfalse;
s += n; s += n;
} }

Просмотреть файл

@ -2027,7 +2027,7 @@ make_econv_exception(rb_econv_t *ec)
n = rb_enc_precise_mbclen(start, end, utf8); n = rb_enc_precise_mbclen(start, end, utf8);
if (MBCLEN_CHARFOUND_P(n) && if (MBCLEN_CHARFOUND_P(n) &&
(size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) { (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) {
unsigned int cc = rb_enc_codepoint(start, end, utf8); unsigned int cc = rb_enc_mbc_to_codepoint(start, end, utf8);
dumped = rb_sprintf("U+%04X", cc); dumped = rb_sprintf("U+%04X", cc);
} }
} }