From d2c75bb9372cf4f2e63082ca493c9bdf74280c3d Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Fri, 15 Sep 2023 11:36:06 -0400
Subject: [PATCH] [ruby/yarp] Handle missing terminators in parenthesized
 expression

https://github.com/ruby/yarp/commit/a8b54e8ed0
---
 test/yarp/errors_test.rb | 19 ++++++++++++++----
 yarp/yarp.c              | 43 +++++++++++++++++++++++++++++++---------
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/test/yarp/errors_test.rb b/test/yarp/errors_test.rb
index d73e6c6ff2..d67b5e411d 100644
--- a/test/yarp/errors_test.rb
+++ b/test/yarp/errors_test.rb
@@ -167,11 +167,18 @@ module YARP
 
     def test_unterminated_parenthesized_expression
       assert_errors expression('(1 + 2'), '(1 + 2', [
+        ["Expected a newline or semicolon after the statement", 6..6],
         ["Cannot parse the expression", 6..6],
         ["Expected a matching `)`", 6..6]
       ]
     end
 
+    def test_missing_terminator_in_parentheses
+      assert_error_messages "(0 0)", [
+        "Expected a newline or semicolon after the statement"
+      ]
+    end
+
     def test_unterminated_argument_expression
       assert_errors expression('a %'), 'a %', [
         ["Invalid `%` token", 2..3],
@@ -187,6 +194,7 @@ module YARP
 
     def test_1_2_3
       assert_errors expression("(1, 2, 3)"), "(1, 2, 3)", [
+        ["Expected a newline or semicolon after the statement", 2..2],
         ["Cannot parse the expression", 2..2],
         ["Expected a matching `)`", 2..2],
         ["Expected a newline or semicolon after the statement", 2..2],
@@ -194,16 +202,17 @@ module YARP
         ["Expected a newline or semicolon after the statement", 5..5],
         ["Cannot parse the expression", 5..5],
         ["Expected a newline or semicolon after the statement", 8..8],
-        ["Cannot parse the expression", 8..8],
+        ["Cannot parse the expression", 8..8]
       ]
     end
 
     def test_return_1_2_3
       assert_error_messages "return(1, 2, 3)", [
+        "Expected a newline or semicolon after the statement",
         "Cannot parse the expression",
         "Expected a matching `)`",
         "Expected a newline or semicolon after the statement",
-        "Cannot parse the expression",
+        "Cannot parse the expression"
       ]
     end
 
@@ -215,10 +224,11 @@ module YARP
 
     def test_next_1_2_3
       assert_errors expression("next(1, 2, 3)"), "next(1, 2, 3)", [
+        ["Expected a newline or semicolon after the statement", 6..6],
         ["Cannot parse the expression", 6..6],
         ["Expected a matching `)`", 6..6],
         ["Expected a newline or semicolon after the statement", 12..12],
-        ["Cannot parse the expression", 12..12],
+        ["Cannot parse the expression", 12..12]
       ]
     end
 
@@ -230,10 +240,11 @@ module YARP
 
     def test_break_1_2_3
       assert_errors expression("break(1, 2, 3)"), "break(1, 2, 3)", [
+        ["Expected a newline or semicolon after the statement", 7..7],
         ["Cannot parse the expression", 7..7],
         ["Expected a matching `)`", 7..7],
         ["Expected a newline or semicolon after the statement", 13..13],
-        ["Cannot parse the expression", 13..13],
+        ["Cannot parse the expression", 13..13]
       ]
     end
 
diff --git a/yarp/yarp.c b/yarp/yarp.c
index 337eae3da1..242abdba82 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -11350,14 +11350,22 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                 return (yp_node_t *) yp_parentheses_node_create(parser, &opening, NULL, &parser->previous);
             }
 
-            // Otherwise, we're going to parse the first statement in the list of
-            // statements within the parentheses.
+            // Otherwise, we're going to parse the first statement in the list
+            // of statements within the parentheses.
             yp_accepts_block_stack_push(parser, true);
             yp_node_t *statement = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION);
-            while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
 
-            // If we hit a right parenthesis, then we're done parsing the parentheses
-            // node, and we can check which kind of node we should return.
+            // Determine if this statement is followed by a terminator. In the
+            // case of a single statement, this is fine. But in the case of
+            // multiple statements it's required.
+            bool terminator_found = accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
+            if (terminator_found) {
+                while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
+            }
+
+            // If we hit a right parenthesis, then we're done parsing the
+            // parentheses node, and we can check which kind of node we should
+            // return.
             if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
                 if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
                     lex_state_set(parser, YP_LEX_STATE_ENDARG);
@@ -11404,10 +11412,14 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
             yp_statements_node_t *statements = yp_statements_node_create(parser);
             yp_statements_node_body_append(statements, statement);
 
-            while (!match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
-                // Ignore semicolon without statements before them
-                if (accept2(parser, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE)) continue;
+            // If we didn't find a terminator and we didn't find a right
+            // parenthesis, then this is a syntax error.
+            if (!terminator_found) {
+                yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT);
+            }
 
+            // Parse each statement within the parentheses.
+            while (true) {
                 yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION);
                 yp_statements_node_body_append(statements, node);
 
@@ -11420,7 +11432,20 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                     break;
                 }
 
-                if (!accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) break;
+                // If we couldn't parse an expression at all, then we need to
+                // bail out of the loop.
+                if (YP_NODE_TYPE_P(node, YP_MISSING_NODE)) break;
+
+                // If we successfully parsed a statement, then we are going to
+                // need terminator to delimit them.
+                if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
+                    while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
+                    if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) break;
+                } else if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
+                    break;
+                } else {
+                    yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT);
+                }
             }
 
             context_pop(parser);