зеркало из https://github.com/github/ruby.git
parse.y: reject invalid codepoint
* parse.y (parser_tokadd_codepoint): reject invalid codepoint, surrogate blocks and surrogate pair, as well as mruby. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56956 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
df53b1421b
Коммит
74495cfa11
21
parse.y
21
parse.y
|
@ -5757,11 +5757,15 @@ parser_tok_hex(struct parser_params *parser, size_t *numlen)
|
|||
|
||||
#define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n))
|
||||
|
||||
static void
|
||||
static int
|
||||
parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
|
||||
int string_literal, int regexp_literal,
|
||||
int codepoint, int numlen)
|
||||
{
|
||||
if ((codepoint & 0xfffff800) == 0xd800) {
|
||||
yyerror("invalid Unicode codepoint");
|
||||
return FALSE;
|
||||
}
|
||||
lex_p += numlen;
|
||||
if (regexp_literal) {
|
||||
tokcopy(numlen);
|
||||
|
@ -5773,6 +5777,7 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
|
|||
else if (string_literal) {
|
||||
tokadd(codepoint);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* return value is for ?\u3042 */
|
||||
|
@ -5806,8 +5811,11 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
|
|||
yyerror("invalid Unicode codepoint (too large)");
|
||||
return 0;
|
||||
}
|
||||
parser_tokadd_codepoint(parser, encp,string_literal, regexp_literal,
|
||||
codepoint, (int)numlen);
|
||||
if (!parser_tokadd_codepoint(parser, encp,
|
||||
string_literal, regexp_literal,
|
||||
codepoint, (int)numlen)) {
|
||||
return 0;
|
||||
}
|
||||
if (ISSPACE(c = nextc())) last = c;
|
||||
} while (string_literal && c != close_brace);
|
||||
|
||||
|
@ -5824,8 +5832,11 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
|
|||
yyerror("invalid Unicode escape");
|
||||
return 0;
|
||||
}
|
||||
parser_tokadd_codepoint(parser, encp, string_literal, regexp_literal,
|
||||
codepoint, 4);
|
||||
if (!parser_tokadd_codepoint(parser, encp,
|
||||
string_literal, regexp_literal,
|
||||
codepoint, 4)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return codepoint;
|
||||
|
|
|
@ -264,12 +264,9 @@ EOS
|
|||
assert_raise(SyntaxError) { eval %q("\u{ 123 456}")} # extra space
|
||||
assert_raise(SyntaxError) { eval %q("\u{123 456}")} # extra space
|
||||
|
||||
# The utf-8 encoding object currently does not object to codepoints
|
||||
# in the surrogate blocks, so these do not raise an error.
|
||||
# assert_raise(SyntaxError) { "\uD800" } # surrogate block
|
||||
# assert_raise(SyntaxError) { "\uDCBA" } # surrogate block
|
||||
# assert_raise(SyntaxError) { "\uDFFF" } # surrogate block
|
||||
# assert_raise(SyntaxError) { "\uD847\uDD9A" } # surrogate pair
|
||||
|
||||
assert_raise(SyntaxError) { eval %q("\uD800") } # surrogate block
|
||||
assert_raise(SyntaxError) { eval %q("\uDCBA") } # surrogate block
|
||||
assert_raise(SyntaxError) { eval %q("\uDFFF") } # surrogate block
|
||||
assert_raise(SyntaxError) { eval %q("\uD847\uDD9A") } # surrogate pair
|
||||
end
|
||||
end
|
||||
|
|
Загрузка…
Ссылка в новой задаче