From d2c75bb9372cf4f2e63082ca493c9bdf74280c3d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 15 Sep 2023 11:36:06 -0400 Subject: [PATCH] [ruby/yarp] Handle missing terminators in parenthesized expression https://github.com/ruby/yarp/commit/a8b54e8ed0 --- test/yarp/errors_test.rb | 19 ++++++++++++++---- yarp/yarp.c | 43 +++++++++++++++++++++++++++++++--------- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/test/yarp/errors_test.rb b/test/yarp/errors_test.rb index d73e6c6ff2..d67b5e411d 100644 --- a/test/yarp/errors_test.rb +++ b/test/yarp/errors_test.rb @@ -167,11 +167,18 @@ module YARP def test_unterminated_parenthesized_expression assert_errors expression('(1 + 2'), '(1 + 2', [ + ["Expected a newline or semicolon after the statement", 6..6], ["Cannot parse the expression", 6..6], ["Expected a matching `)`", 6..6] ] end + def test_missing_terminator_in_parentheses + assert_error_messages "(0 0)", [ + "Expected a newline or semicolon after the statement" + ] + end + def test_unterminated_argument_expression assert_errors expression('a %'), 'a %', [ ["Invalid `%` token", 2..3], @@ -187,6 +194,7 @@ module YARP def test_1_2_3 assert_errors expression("(1, 2, 3)"), "(1, 2, 3)", [ + ["Expected a newline or semicolon after the statement", 2..2], ["Cannot parse the expression", 2..2], ["Expected a matching `)`", 2..2], ["Expected a newline or semicolon after the statement", 2..2], @@ -194,16 +202,17 @@ module YARP ["Expected a newline or semicolon after the statement", 5..5], ["Cannot parse the expression", 5..5], ["Expected a newline or semicolon after the statement", 8..8], - ["Cannot parse the expression", 8..8], + ["Cannot parse the expression", 8..8] ] end def test_return_1_2_3 assert_error_messages "return(1, 2, 3)", [ + "Expected a newline or semicolon after the statement", "Cannot parse the expression", "Expected a matching `)`", "Expected a newline or semicolon after the statement", - "Cannot parse the expression", + "Cannot parse the expression" ] end @@ -215,10 +224,11 @@ module YARP def test_next_1_2_3 assert_errors expression("next(1, 2, 3)"), "next(1, 2, 3)", [ + ["Expected a newline or semicolon after the statement", 6..6], ["Cannot parse the expression", 6..6], ["Expected a matching `)`", 6..6], ["Expected a newline or semicolon after the statement", 12..12], - ["Cannot parse the expression", 12..12], + ["Cannot parse the expression", 12..12] ] end @@ -230,10 +240,11 @@ module YARP def test_break_1_2_3 assert_errors expression("break(1, 2, 3)"), "break(1, 2, 3)", [ + ["Expected a newline or semicolon after the statement", 7..7], ["Cannot parse the expression", 7..7], ["Expected a matching `)`", 7..7], ["Expected a newline or semicolon after the statement", 13..13], - ["Cannot parse the expression", 13..13], + ["Cannot parse the expression", 13..13] ] end diff --git a/yarp/yarp.c b/yarp/yarp.c index 337eae3da1..242abdba82 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -11350,14 +11350,22 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { return (yp_node_t *) yp_parentheses_node_create(parser, &opening, NULL, &parser->previous); } - // Otherwise, we're going to parse the first statement in the list of - // statements within the parentheses. + // Otherwise, we're going to parse the first statement in the list + // of statements within the parentheses. yp_accepts_block_stack_push(parser, true); yp_node_t *statement = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION); - while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); - // If we hit a right parenthesis, then we're done parsing the parentheses - // node, and we can check which kind of node we should return. + // Determine if this statement is followed by a terminator. In the + // case of a single statement, this is fine. But in the case of + // multiple statements it's required. + bool terminator_found = accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + if (terminator_found) { + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + } + + // If we hit a right parenthesis, then we're done parsing the + // parentheses node, and we can check which kind of node we should + // return. if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) { lex_state_set(parser, YP_LEX_STATE_ENDARG); @@ -11404,10 +11412,14 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { yp_statements_node_t *statements = yp_statements_node_create(parser); yp_statements_node_body_append(statements, statement); - while (!match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { - // Ignore semicolon without statements before them - if (accept2(parser, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE)) continue; + // If we didn't find a terminator and we didn't find a right + // parenthesis, then this is a syntax error. + if (!terminator_found) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT); + } + // Parse each statement within the parentheses. + while (true) { yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION); yp_statements_node_body_append(statements, node); @@ -11420,7 +11432,20 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { break; } - if (!accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) break; + // If we couldn't parse an expression at all, then we need to + // bail out of the loop. + if (YP_NODE_TYPE_P(node, YP_MISSING_NODE)) break; + + // If we successfully parsed a statement, then we are going to + // need terminator to delimit them. + if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) break; + } else if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + break; + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT); + } } context_pop(parser);