зеркало из https://github.com/github/ruby.git
* string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen.
(str_strlen): UTF-8 character count is only applicable for valid UTF-8 string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15504 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
c2459f35ac
Коммит
a47e8e776c
|
@ -1,3 +1,9 @@
|
|||
Sat Feb 16 16:14:35 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen.
|
||||
(str_strlen): UTF-8 character count is only applicable for valid
|
||||
UTF-8 string.
|
||||
|
||||
Sat Feb 16 13:16:49 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* string.c (rb_str_sub_bang): stringize replacing hash values.
|
||||
|
|
61
string.c
61
string.c
|
@ -597,35 +597,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
|
|||
if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
|
||||
return (e - p) / rb_enc_mbminlen(enc);
|
||||
}
|
||||
#ifdef NONASCII_MASK
|
||||
else if (enc == rb_utf8_encoding()) {
|
||||
if (sizeof(long) * 2 < e - p) {
|
||||
const unsigned long *s, *t;
|
||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
||||
for (c=0; p<(const char *)s; p++) {
|
||||
if (((*p)&0xC0) != 0x80) c++;
|
||||
}
|
||||
while (s < t) {
|
||||
unsigned long d = *s;
|
||||
d = (~d ^ (d&(d<<1)))&NONASCII_MASK;
|
||||
d = (d>>7) + (d>>15);
|
||||
d = d + (d>>16);
|
||||
#if NONASCII_MASK == 0x8080808080808080UL
|
||||
d = d + (d>>32);
|
||||
#endif
|
||||
c += (long)(d&0xF);
|
||||
s++;
|
||||
}
|
||||
p = (const char *)t;
|
||||
}
|
||||
for (; p<e; p++) {
|
||||
if (((*p)&0xC0) != 0x80) c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
else if (rb_enc_asciicompat(enc)) {
|
||||
c = 0;
|
||||
while (p < e) {
|
||||
|
@ -658,6 +630,37 @@ str_strlen(VALUE str, rb_encoding *enc)
|
|||
if (!enc) enc = STR_ENC_GET(str);
|
||||
p = RSTRING_PTR(str);
|
||||
e = RSTRING_END(str);
|
||||
#ifdef NONASCII_MASK
|
||||
if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
|
||||
enc == rb_utf8_encoding()) {
|
||||
len = 0;
|
||||
if (sizeof(long) * 2 < e - p) {
|
||||
const unsigned long *s, *t;
|
||||
const VALUE lowbits = sizeof(unsigned long) - 1;
|
||||
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
|
||||
t = (const unsigned long*)(~lowbits & (VALUE)e);
|
||||
for (len=0; p<(const char *)s; p++) {
|
||||
if (((*p)&0xC0) != 0x80) len++;
|
||||
}
|
||||
while (s < t) {
|
||||
unsigned long d = *s;
|
||||
d = (~d ^ (d&(d<<1)))&NONASCII_MASK;
|
||||
d = (d>>7) + (d>>15);
|
||||
d = d + (d>>16);
|
||||
#if NONASCII_MASK == 0x8080808080808080UL
|
||||
d = d + (d>>32);
|
||||
#endif
|
||||
len += (long)(d&0xF);
|
||||
s++;
|
||||
}
|
||||
p = (const char *)t;
|
||||
}
|
||||
for (; p<e; p++) {
|
||||
if (((*p)&0xC0) != 0x80) len++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
len = rb_enc_strlen(p, e, enc);
|
||||
if (len < 0) {
|
||||
rb_raise(rb_eArgError, "invalid mbstring sequence");
|
||||
|
|
Загрузка…
Ссылка в новой задаче