parse.y: reject invalid codepoint

* parse.y (parser_tokadd_codepoint): reject invalid codepoint,
  surrogate blocks and surrogate pair, as well as mruby.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56956 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2016-12-01 08:26:39 +00:00
Родитель df53b1421b
Коммит 74495cfa11
2 изменённых файлов: 20 добавлений и 12 удалений

21
parse.y
Просмотреть файл

@ -5757,11 +5757,15 @@ parser_tok_hex(struct parser_params *parser, size_t *numlen)
#define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n)) #define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n))
static void static int
parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp, parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
int string_literal, int regexp_literal, int string_literal, int regexp_literal,
int codepoint, int numlen) int codepoint, int numlen)
{ {
if ((codepoint & 0xfffff800) == 0xd800) {
yyerror("invalid Unicode codepoint");
return FALSE;
}
lex_p += numlen; lex_p += numlen;
if (regexp_literal) { if (regexp_literal) {
tokcopy(numlen); tokcopy(numlen);
@ -5773,6 +5777,7 @@ parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp,
else if (string_literal) { else if (string_literal) {
tokadd(codepoint); tokadd(codepoint);
} }
return TRUE;
} }
/* return value is for ?\u3042 */ /* return value is for ?\u3042 */
@ -5806,8 +5811,11 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
yyerror("invalid Unicode codepoint (too large)"); yyerror("invalid Unicode codepoint (too large)");
return 0; return 0;
} }
parser_tokadd_codepoint(parser, encp,string_literal, regexp_literal, if (!parser_tokadd_codepoint(parser, encp,
codepoint, (int)numlen); string_literal, regexp_literal,
codepoint, (int)numlen)) {
return 0;
}
if (ISSPACE(c = nextc())) last = c; if (ISSPACE(c = nextc())) last = c;
} while (string_literal && c != close_brace); } while (string_literal && c != close_brace);
@ -5824,8 +5832,11 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
yyerror("invalid Unicode escape"); yyerror("invalid Unicode escape");
return 0; return 0;
} }
parser_tokadd_codepoint(parser, encp, string_literal, regexp_literal, if (!parser_tokadd_codepoint(parser, encp,
codepoint, 4); string_literal, regexp_literal,
codepoint, 4)) {
return 0;
}
} }
return codepoint; return codepoint;

Просмотреть файл

@ -264,12 +264,9 @@ EOS
assert_raise(SyntaxError) { eval %q("\u{ 123 456}")} # extra space assert_raise(SyntaxError) { eval %q("\u{ 123 456}")} # extra space
assert_raise(SyntaxError) { eval %q("\u{123 456}")} # extra space assert_raise(SyntaxError) { eval %q("\u{123 456}")} # extra space
# The utf-8 encoding object currently does not object to codepoints assert_raise(SyntaxError) { eval %q("\uD800") } # surrogate block
# in the surrogate blocks, so these do not raise an error. assert_raise(SyntaxError) { eval %q("\uDCBA") } # surrogate block
# assert_raise(SyntaxError) { "\uD800" } # surrogate block assert_raise(SyntaxError) { eval %q("\uDFFF") } # surrogate block
# assert_raise(SyntaxError) { "\uDCBA" } # surrogate block assert_raise(SyntaxError) { eval %q("\uD847\uDD9A") } # surrogate pair
# assert_raise(SyntaxError) { "\uDFFF" } # surrogate block
# assert_raise(SyntaxError) { "\uD847\uDD9A" } # surrogate pair
end end
end end