[PRISM] Match unterminated error messages

2024-05-01 17:24:30 -04:00 · 2024-05-01 17:24:30 -04:00 · 4e8ae5d32a
--- a/prism/prism.c
+++ b/prism/prism.c
@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
 #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
    PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)

+/**
+ * Add an error for an expected heredoc terminator. This is a special function
+ * only because it grabs its location off of a lex mode instead of a node or a
+ * token.
+ */
+static void
+pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
+    const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
+    size_t ident_length = lex_mode->as.heredoc.ident_length;
+
+    PM_PARSER_ERR_FORMAT(
+        parser,
+        ident_start,
+        ident_start + ident_length,
+        PM_ERR_HEREDOC_TERM,
+        (int) ident_length,
+        (const char *) ident_start
+    );
+}
+
 /******************************************************************************/
 /* Scope-related functions                                                    */
 /******************************************************************************/
@ -10836,7 +10856,7 @@ parser_lex(pm_parser_t *parser) {
                                        // this is not a valid heredoc declaration. In this case we
                                        // will add an error, but we will still return a heredoc
                                        // start.
-                                        pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
+                                        pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
                                        body_start = parser->end;
                                    } else {
                                        // Otherwise, we want to indicate that the body of the
@ -12163,7 +12183,7 @@ parser_lex(pm_parser_t *parser) {
            // terminator) but still continue parsing so that content after the
            // declaration of the heredoc can be parsed.
            if (parser->current.end >= parser->end) {
-                pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
+                pm_parser_err_heredoc_term(parser, lex_mode);
                parser->next_start = lex_mode->as.heredoc.next_start;
                parser->heredoc_end = parser->current.end;
                lex_state_set(parser, PM_LEX_STATE_END);
@ -17468,8 +17488,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
            if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
                // If we get here, then we have an empty heredoc. We'll create
                // an empty content token and return an empty string node.
-                lex_mode_pop(parser);
-                expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+                if (match1(parser, PM_TOKEN_HEREDOC_END)) {
+                    lex_mode_pop(parser);
+                    parser_lex(parser);
+                } else {
+                    pm_parser_err_heredoc_term(parser, lex_mode);
+                    lex_mode_pop(parser);
+                    parser->previous.start = parser->previous.end;
+                    parser->previous.type = PM_TOKEN_MISSING;
+                }
+
                pm_token_t content = parse_strings_empty_content(parser->previous.start);

                if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@ -17510,8 +17538,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                }

                node = (pm_node_t *) cast;
-                lex_mode_pop(parser);
-                expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+
+                if (match1(parser, PM_TOKEN_HEREDOC_END)) {
+                    lex_mode_pop(parser);
+                    parser_lex(parser);
+                } else {
+                    pm_parser_err_heredoc_term(parser, lex_mode);
+                    lex_mode_pop(parser);
+                    parser->previous.start = parser->previous.end;
+                    parser->previous.type = PM_TOKEN_MISSING;
+                }
            } else {
                // If we get here, then we have multiple parts in the heredoc,
                // so we'll need to create an interpolated string node to hold
@ -17533,8 +17569,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                    pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
                    cast->parts = parts;

-                    lex_mode_pop(parser);
-                    expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+                    if (match1(parser, PM_TOKEN_HEREDOC_END)) {
+                        lex_mode_pop(parser);
+                        parser_lex(parser);
+                    } else {
+                        pm_parser_err_heredoc_term(parser, lex_mode);
+                        lex_mode_pop(parser);
+                        parser->previous.start = parser->previous.end;
+                        parser->previous.type = PM_TOKEN_MISSING;
+                    }

                    pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
                    cast->base.location = cast->opening_loc;
@ -17543,8 +17586,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                    pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
                    pm_node_list_free(&parts);

-                    lex_mode_pop(parser);
-                    expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+                    if (match1(parser, PM_TOKEN_HEREDOC_END)) {
+                        lex_mode_pop(parser);
+                        parser_lex(parser);
+                    } else {
+                        pm_parser_err_heredoc_term(parser, lex_mode);
+                        lex_mode_pop(parser);
+                        parser->previous.start = parser->previous.end;
+                        parser->previous.type = PM_TOKEN_MISSING;
+                    }

                    pm_interpolated_string_node_closing_set(cast, &parser->previous);
                    cast->base.location = cast->opening_loc;
--- a/prism/templates/src/diagnostic.c.erb
+++ b/prism/templates/src/diagnostic.c.erb
@ -207,7 +207,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
    [PM_ERR_HASH_ROCKET]                        = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_HASH_TERM]                          = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_HASH_VALUE]                         = { "expected a value in the hash literal", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_HEREDOC_TERM]                       = { "could not find a terminator for the heredoc", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HEREDOC_TERM]                       = { "unterminated heredoc; can't find string \"%.*s\"", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_INCOMPLETE_QUESTION_MARK]           = { "incomplete expression at `?`", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3]      = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_INCOMPLETE_VARIABLE_CLASS]          = { "'%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
@ -243,13 +243,13 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
    [PM_ERR_LAMBDA_TERM_BRACE]                  = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_LAMBDA_TERM_END]                    = { "expected a lambda block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_LIST_I_LOWER_ELEMENT]               = { "expected a symbol in a `%i` list", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_LIST_I_LOWER_TERM]                  = { "expected a closing delimiter for the `%i` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_I_LOWER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%i`", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_LIST_I_UPPER_ELEMENT]               = { "expected a symbol in a `%I` list", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_LIST_I_UPPER_TERM]                  = { "expected a closing delimiter for the `%I` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_I_UPPER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%I`", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_LIST_W_LOWER_ELEMENT]               = { "expected a string in a `%w` list", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_LIST_W_LOWER_TERM]                  = { "expected a closing delimiter for the `%w` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_W_LOWER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%w`", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_LIST_W_UPPER_ELEMENT]               = { "expected a string in a `%W` list", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_LIST_W_UPPER_TERM]                  = { "expected a closing delimiter for the `%W` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_W_UPPER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%W`", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_MALLOC_FAILED]                      = { "failed to allocate memory", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_MIXED_ENCODING]                     = { "UTF-8 mixed within %s source", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_MODULE_IN_METHOD]                   = { "unexpected module definition in method body", PM_ERROR_LEVEL_SYNTAX },
@ -306,7 +306,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
    [PM_ERR_REGEXP_NON_ESCAPED_MBC]             = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_REGEXP_INVALID_UNICODE_RANGE]       = { "invalid Unicode range: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_REGEXP_UNKNOWN_OPTIONS]             = { "unknown regexp %s: %.*s", PM_ERROR_LEVEL_SYNTAX },
-    [PM_ERR_REGEXP_TERM]                        = { "expected a closing delimiter for the regular expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_TERM]                        = { "unterminated regular expression; expected a closing delimiter", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP]   = { "UTF-8 character in non UTF-8 regexp: /%s/", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_RESCUE_EXPRESSION]                  = { "expected a rescued expression", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_RESCUE_MODIFIER_VALUE]              = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX },
@ -323,7 +323,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
    [PM_ERR_STRING_LITERAL_EOF]                 = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_STRING_LITERAL_TERM]                = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_SYMBOL_INVALID]                     = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719
-    [PM_ERR_SYMBOL_TERM_DYNAMIC]                = { "expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_SYMBOL_TERM_DYNAMIC]                = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_SYMBOL_TERM_INTERPOLATED]           = { "expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_TERNARY_COLON]                      = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX },
    [PM_ERR_TERNARY_EXPRESSION_FALSE]           = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },