From 2893550452f6f3cadb17c670da185813d7d0a835 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 24 May 2019 16:10:59 +0900 Subject: [PATCH] Mixed encoding error can continue to parse --- parse.y | 19 ++++++++----------- test/ruby/test_parse.rb | 3 +++ test/ruby/test_syntax.rb | 15 +++++++++++---- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/parse.y b/parse.y index 92e4d05113..2b34e8ac2f 100644 --- a/parse.y +++ b/parse.y @@ -6180,7 +6180,7 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp, } /* return value is for ?\u3042 */ -static int +static void parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp, int string_literal, int symbol_literal, int regexp_literal) { @@ -6214,7 +6214,7 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp, unterminated: literal_flush(p, p->lex.pcur); yyerror0("unterminated Unicode escape"); - return 0; + return; } if (regexp_literal) tokadd(p, close_brace); @@ -6222,11 +6222,11 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp, } else { /* handle \uxxxx form */ if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) { - return 0; + return; } } - return TRUE; + return; } #define ESCAPE_CONTROL 1 @@ -6568,11 +6568,9 @@ tokadd_string(struct parser_params *p, tokadd(p, '\\'); break; } - if (!parser_tokadd_utf8(p, enc, term, - func & STR_FUNC_SYMBOL, - func & STR_FUNC_REGEXP)) { - continue; - } + parser_tokadd_utf8(p, enc, term, + func & STR_FUNC_SYMBOL, + func & STR_FUNC_REGEXP); continue; default: @@ -8070,8 +8068,7 @@ parse_qmark(struct parser_params *p, int space_seen) if (peek(p, 'u')) { nextc(p); enc = rb_utf8_encoding(); - if (!parser_tokadd_utf8(p, &enc, -1, 0, 0)) - return 0; + parser_tokadd_utf8(p, &enc, -1, 0, 0); } else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) { nextc(p); diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb index dc4c143241..c59454f8f7 100644 --- a/test/ruby/test_parse.rb +++ b/test/ruby/test_parse.rb @@ -562,6 +562,9 @@ class TestParse < Test::Unit::TestCase assert_raise(SyntaxError) { eval(" ?a\x8a".force_encoding("utf-8")) } assert_equal("\u{1234}", eval("?\u{1234}")) assert_equal("\u{1234}", eval('?\u{1234}')) + assert_equal("\u{1234}", eval('?\u1234')) + e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape') + assert_not_match(/end-of-input/, e.message) end def test_percent diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index 7bf1e0e43c..e640262d90 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -775,32 +775,39 @@ eom end def test_heredoc_mixed_encoding - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \xe9\x9d\u1234 TEXT HEREDOC - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + assert_not_match(/end-of-input/, e.message) + + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \xe9\x9d \u1234 TEXT HEREDOC - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + assert_not_match(/end-of-input/, e.message) + + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \u1234\xe9\x9d TEXT HEREDOC - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + assert_not_match(/end-of-input/, e.message) + + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \u1234 \xe9\x9d TEXT HEREDOC + assert_not_match(/end-of-input/, e.message) end def test_lineno_operation_brace_block