[ruby/yarp] Handle missing terminators in parenthesized expression

https://github.com/ruby/yarp/commit/a8b54e8ed0
2023-09-15 11:36:06 -04:00 · 2023-09-15 11:36:06 -04:00 · d2c75bb937
--- a/test/yarp/errors_test.rb
+++ b/test/yarp/errors_test.rb
@ -167,11 +167,18 @@ module YARP
    def test_unterminated_parenthesized_expression
      assert_errors expression('(1 + 2'), '(1 + 2', [
        ["Expected a newline or semicolon after the statement", 6..6],
        ["Cannot parse the expression", 6..6],
        ["Expected a matching `)`", 6..6]
      ]
    end
    def test_missing_terminator_in_parentheses
      assert_error_messages "(0 0)", [
        "Expected a newline or semicolon after the statement"
      ]
    end
    def test_unterminated_argument_expression
      assert_errors expression('a %'), 'a %', [
        ["Invalid `%` token", 2..3],
@ -187,6 +194,7 @@ module YARP
    def test_1_2_3
      assert_errors expression("(1, 2, 3)"), "(1, 2, 3)", [
        ["Expected a newline or semicolon after the statement", 2..2],
        ["Cannot parse the expression", 2..2],
        ["Expected a matching `)`", 2..2],
        ["Expected a newline or semicolon after the statement", 2..2],
@ -194,16 +202,17 @@ module YARP
        ["Expected a newline or semicolon after the statement", 5..5],
        ["Cannot parse the expression", 5..5],
        ["Expected a newline or semicolon after the statement", 8..8],
-        ["Cannot parse the expression", 8..8],
+        ["Cannot parse the expression", 8..8]
      ]
    end
    def test_return_1_2_3
      assert_error_messages "return(1, 2, 3)", [
        "Expected a newline or semicolon after the statement",
        "Cannot parse the expression",
        "Expected a matching `)`",
        "Expected a newline or semicolon after the statement",
-        "Cannot parse the expression",
+        "Cannot parse the expression"
      ]
    end
@ -215,10 +224,11 @@ module YARP
    def test_next_1_2_3
      assert_errors expression("next(1, 2, 3)"), "next(1, 2, 3)", [
        ["Expected a newline or semicolon after the statement", 6..6],
        ["Cannot parse the expression", 6..6],
        ["Expected a matching `)`", 6..6],
        ["Expected a newline or semicolon after the statement", 12..12],
-        ["Cannot parse the expression", 12..12],
+        ["Cannot parse the expression", 12..12]
      ]
    end
@ -230,10 +240,11 @@ module YARP
    def test_break_1_2_3
      assert_errors expression("break(1, 2, 3)"), "break(1, 2, 3)", [
        ["Expected a newline or semicolon after the statement", 7..7],
        ["Cannot parse the expression", 7..7],
        ["Expected a matching `)`", 7..7],
        ["Expected a newline or semicolon after the statement", 13..13],
-        ["Cannot parse the expression", 13..13],
+        ["Cannot parse the expression", 13..13]
      ]
    end
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@ -11350,14 +11350,22 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                return (yp_node_t *) yp_parentheses_node_create(parser, &opening, NULL, &parser->previous);
            }
-            // Otherwise, we're going to parse the first statement in the list of
+            // Otherwise, we're going to parse the first statement in the list
-            // statements within the parentheses.
+            // of statements within the parentheses.
            yp_accepts_block_stack_push(parser, true);
            yp_node_t *statement = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION);
            while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
-            // If we hit a right parenthesis, then we're done parsing the parentheses
+            // Determine if this statement is followed by a terminator. In the
-            // node, and we can check which kind of node we should return.
+            // case of a single statement, this is fine. But in the case of
            // multiple statements it's required.
            bool terminator_found = accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
            if (terminator_found) {
                while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
            }
            // If we hit a right parenthesis, then we're done parsing the
            // parentheses node, and we can check which kind of node we should
            // return.
            if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
                if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
                    lex_state_set(parser, YP_LEX_STATE_ENDARG);
@ -11404,10 +11412,14 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
            yp_statements_node_t *statements = yp_statements_node_create(parser);
            yp_statements_node_body_append(statements, statement);
-            while (!match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
+            // If we didn't find a terminator and we didn't find a right
-                // Ignore semicolon without statements before them
+            // parenthesis, then this is a syntax error.
-                if (accept2(parser, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE)) continue;
+            if (!terminator_found) {
                yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT);
            }
            // Parse each statement within the parentheses.
            while (true) {
                yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION);
                yp_statements_node_body_append(statements, node);
@ -11420,7 +11432,20 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                    break;
                }
-                if (!accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) break;
+                // If we couldn't parse an expression at all, then we need to
                // bail out of the loop.
                if (YP_NODE_TYPE_P(node, YP_MISSING_NODE)) break;
                // If we successfully parsed a statement, then we are going to
                // need terminator to delimit them.
                if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
                    while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
                    if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) break;
                } else if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
                    break;
                } else {
                    yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT);
                }
            }
            context_pop(parser);