зеркало из https://github.com/github/ruby.git
Use end of char boundary in start_with?
Previously we used the next character following the found prefix to determine if the match ended on a broken character. This had caused surprising behaviour when a valid character was followed by a UTF-8 continuation byte. This commit changes the behaviour to instead look for the end of the last character in the prefix. [Bug #19784] Co-authored-by: ywenc <ywenc@github.com> Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
Родитель
2ca0f01015
Коммит
d89b15cdce
|
@ -125,6 +125,15 @@ at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
|
|||
return rb_enc_left_char_head(s, p, e, enc) == p;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
at_char_right_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
|
||||
{
|
||||
RUBY_ASSERT(s <= p);
|
||||
RUBY_ASSERT(p <= e);
|
||||
|
||||
return rb_enc_right_char_head(s, p, e, enc) == p;
|
||||
}
|
||||
|
||||
/* expect tail call optimization */
|
||||
// YJIT needs this function to never allocate and never raise
|
||||
static inline VALUE
|
||||
|
|
4
string.c
4
string.c
|
@ -10472,7 +10472,7 @@ rb_str_start_with(int argc, VALUE *argv, VALUE str)
|
|||
p = RSTRING_PTR(str);
|
||||
e = p + slen;
|
||||
s = p + tlen;
|
||||
if (!at_char_boundary(p, s, e, enc))
|
||||
if (!at_char_right_boundary(p, s, e, enc))
|
||||
continue;
|
||||
if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
|
||||
return Qtrue;
|
||||
|
@ -10554,7 +10554,7 @@ deleted_prefix_length(VALUE str, VALUE prefix)
|
|||
}
|
||||
const char *strend = strptr + olen;
|
||||
const char *after_prefix = strptr + prefixlen;
|
||||
if (!at_char_boundary(strptr, after_prefix, strend, enc)) {
|
||||
if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
|
||||
/* prefix does not end at char-boundary */
|
||||
return 0;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче