From ba06a8259a3f21c9cbee0f4f55b82c016a45a3b9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 29 Jan 2024 17:27:45 -0500 Subject: [PATCH] [ruby/prism] Better error messages for unexpected tokens in prefix https://github.com/ruby/prism/commit/a35b8e45ee --- prism/diagnostic.c | 5 +- prism/diagnostic.h | 12 +- prism/parser.h | 3 + prism/prism.c | 152 +++++++--- prism/prism.h | 10 +- prism/templates/ext/prism/api_node.c.erb | 2 +- prism/templates/src/token_type.c.erb | 357 ++++++++++++++++++++++- test/prism/errors_test.rb | 149 +++++----- test/prism/format_errors_test.rb | 4 +- 9 files changed, 572 insertions(+), 122 deletions(-) diff --git a/prism/diagnostic.c b/prism/diagnostic.c index 3ff4a933c6..bf89ca781a 100644 --- a/prism/diagnostic.c +++ b/prism/diagnostic.c @@ -71,6 +71,8 @@ typedef struct { * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`. */ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = { + [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL }, + // Errors [PM_ERR_ALIAS_ARGUMENT] = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_FATAL }, [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_FATAL }, @@ -106,7 +108,6 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = { [PM_ERR_BLOCK_PARAM_PIPE_TERM] = { "expected the block parameters to end with `|`", PM_ERROR_LEVEL_FATAL }, [PM_ERR_BLOCK_TERM_BRACE] = { "expected a block beginning with `{` to end with `}`", PM_ERROR_LEVEL_FATAL }, [PM_ERR_BLOCK_TERM_END] = { "expected a block beginning with `do` to end with `end`", PM_ERROR_LEVEL_FATAL }, - [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL }, [PM_ERR_CANNOT_PARSE_STRING_PART] = { "cannot parse the string part", PM_ERROR_LEVEL_FATAL }, [PM_ERR_CASE_EXPRESSION_AFTER_CASE] = { "expected an expression after `case`", PM_ERROR_LEVEL_FATAL }, [PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = { "expected an expression after `when`", PM_ERROR_LEVEL_FATAL }, @@ -277,6 +278,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = { [PM_ERR_UNARY_RECEIVER_BANG] = { "expected a receiver for unary `!`", PM_ERROR_LEVEL_FATAL }, [PM_ERR_UNARY_RECEIVER_MINUS] = { "expected a receiver for unary `-`", PM_ERROR_LEVEL_FATAL }, [PM_ERR_UNARY_RECEIVER_PLUS] = { "expected a receiver for unary `+`", PM_ERROR_LEVEL_FATAL }, + [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT] = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_FATAL }, + [PM_ERR_UNEXPECTED_TOKEN_IGNORE] = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_FATAL }, [PM_ERR_UNARY_RECEIVER_TILDE] = { "expected a receiver for unary `~`", PM_ERROR_LEVEL_FATAL }, [PM_ERR_UNTIL_TERM] = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_FATAL }, [PM_ERR_VOID_EXPRESSION] = { "unexpected void value expression", PM_ERROR_LEVEL_FATAL }, diff --git a/prism/diagnostic.h b/prism/diagnostic.h index 9b600208ae..33123262b5 100644 --- a/prism/diagnostic.h +++ b/prism/diagnostic.h @@ -66,6 +66,11 @@ typedef struct { * of errors between the parser and the user. */ typedef enum { + // This is a special error that we can potentially replace by others. For + // an example of how this is used, see parse_expression_prefix. + PM_ERR_CANNOT_PARSE_EXPRESSION, + + // These are the error codes. PM_ERR_ALIAS_ARGUMENT, PM_ERR_AMPAMPEQ_MULTI_ASSIGN, PM_ERR_ARGUMENT_AFTER_BLOCK, @@ -100,7 +105,6 @@ typedef enum { PM_ERR_BLOCK_PARAM_PIPE_TERM, PM_ERR_BLOCK_TERM_BRACE, PM_ERR_BLOCK_TERM_END, - PM_ERR_CANNOT_PARSE_EXPRESSION, PM_ERR_CANNOT_PARSE_STRING_PART, PM_ERR_CASE_EXPRESSION_AFTER_CASE, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, @@ -272,6 +276,8 @@ typedef enum { PM_ERR_UNARY_RECEIVER_MINUS, PM_ERR_UNARY_RECEIVER_PLUS, PM_ERR_UNARY_RECEIVER_TILDE, + PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, + PM_ERR_UNEXPECTED_TOKEN_IGNORE, PM_ERR_UNDEF_ARGUMENT, PM_ERR_UNTIL_TERM, PM_ERR_VOID_EXPRESSION, @@ -280,13 +286,15 @@ typedef enum { PM_ERR_WRITE_TARGET_READONLY, PM_ERR_WRITE_TARGET_UNEXPECTED, PM_ERR_XSTRING_TERM, + + // These are the warning codes. PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS, PM_WARN_AMBIGUOUS_PREFIX_STAR, PM_WARN_AMBIGUOUS_SLASH, PM_WARN_END_IN_METHOD, - /* This must be the last member. */ + // This is the number of diagnostic codes. PM_DIAGNOSTIC_ID_LEN, } pm_diagnostic_id_t; diff --git a/prism/parser.h b/prism/parser.h index c7ebb64b60..6ee215c76d 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -259,6 +259,9 @@ typedef struct pm_parser pm_parser_t; * token that is understood by a parent context but not by the current context. */ typedef enum { + /** a null context, used for returning a value from a function */ + PM_CONTEXT_NONE = 0, + /** a begin statement */ PM_CONTEXT_BEGIN, diff --git a/prism/prism.c b/prism/prism.c index 36699f5894..ea2723cfaf 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) { PRISM_ATTRIBUTE_UNUSED static void debug_token(pm_token_t * token) { - fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start); + fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start); } #endif @@ -6719,21 +6719,27 @@ context_terminator(pm_context_t context, pm_token_t *token) { return token->type == PM_TOKEN_BRACE_RIGHT; case PM_CONTEXT_PREDICATE: return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON; + case PM_CONTEXT_NONE: + return false; } return false; } -static bool -context_recoverable(pm_parser_t *parser, pm_token_t *token) { +/** + * Returns the context that the given token is found to be terminating, or + * returns PM_CONTEXT_NONE. + */ +static pm_context_t +context_recoverable(const pm_parser_t *parser, pm_token_t *token) { pm_context_node_t *context_node = parser->current_context; while (context_node != NULL) { - if (context_terminator(context_node->context, token)) return true; + if (context_terminator(context_node->context, token)) return context_node->context; context_node = context_node->prev; } - return false; + return PM_CONTEXT_NONE; } static bool @@ -6761,7 +6767,7 @@ context_pop(pm_parser_t *parser) { } static bool -context_p(pm_parser_t *parser, pm_context_t context) { +context_p(const pm_parser_t *parser, pm_context_t context) { pm_context_node_t *context_node = parser->current_context; while (context_node != NULL) { @@ -6773,7 +6779,7 @@ context_p(pm_parser_t *parser, pm_context_t context) { } static bool -context_def_p(pm_parser_t *parser) { +context_def_p(const pm_parser_t *parser) { pm_context_node_t *context_node = parser->current_context; while (context_node != NULL) { @@ -6796,6 +6802,55 @@ context_def_p(pm_parser_t *parser) { return false; } +/** + * Returns a human readable string for the given context, used in error + * messages. + */ +static const char * +context_human(pm_context_t context) { + switch (context) { + case PM_CONTEXT_NONE: + assert(false && "unreachable"); + return ""; + case PM_CONTEXT_BEGIN: return "begin statement"; + case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block"; + case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block"; + case PM_CONTEXT_CASE_WHEN: return "'when' clause"; + case PM_CONTEXT_CASE_IN: return "'in' clause"; + case PM_CONTEXT_CLASS: return "class definition"; + case PM_CONTEXT_DEF: return "method definition"; + case PM_CONTEXT_DEF_PARAMS: return "method parameters"; + case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value"; + case PM_CONTEXT_ELSE: return "'else' clause"; + case PM_CONTEXT_ELSIF: return "'elsif' clause"; + case PM_CONTEXT_EMBEXPR: return "embedded expression"; + case PM_CONTEXT_ENSURE: return "'ensure' clause"; + case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause"; + case PM_CONTEXT_FOR: return "for loop"; + case PM_CONTEXT_FOR_INDEX: return "for loop index"; + case PM_CONTEXT_IF: return "if statement"; + case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block"; + case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block"; + case PM_CONTEXT_MAIN: return "top level context"; + case PM_CONTEXT_MODULE: return "module definition"; + case PM_CONTEXT_PARENS: return "parentheses"; + case PM_CONTEXT_POSTEXE: return "'END' block"; + case PM_CONTEXT_PREDICATE: return "predicate"; + case PM_CONTEXT_PREEXE: return "'BEGIN' block"; + case PM_CONTEXT_RESCUE_ELSE: return "'else' clause"; + case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause"; + case PM_CONTEXT_RESCUE: return "'rescue' clause"; + case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause"; + case PM_CONTEXT_SCLASS: return "singleton class definition"; + case PM_CONTEXT_UNLESS: return "unless statement"; + case PM_CONTEXT_UNTIL: return "until statement"; + case PM_CONTEXT_WHILE: return "while statement"; + } + + assert(false && "unreachable"); + return ""; +} + /******************************************************************************/ /* Specific token lexers */ /******************************************************************************/ @@ -14177,7 +14232,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) { * Parse an expression that begins with the previous node that we just lexed. */ static inline pm_node_t * -parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) { +parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) { switch (parser->current.type) { case PM_TOKEN_BRACKET_LEFT_ARRAY: { parser_lex(parser); @@ -14595,30 +14650,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX); - } - else { + } else { // Check if `it` is not going to be assigned. switch (parser->current.type) { - case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: - case PM_TOKEN_AMPERSAND_EQUAL: - case PM_TOKEN_CARET_EQUAL: - case PM_TOKEN_EQUAL: - case PM_TOKEN_GREATER_GREATER_EQUAL: - case PM_TOKEN_LESS_LESS_EQUAL: - case PM_TOKEN_MINUS_EQUAL: - case PM_TOKEN_PARENTHESIS_RIGHT: - case PM_TOKEN_PERCENT_EQUAL: - case PM_TOKEN_PIPE_EQUAL: - case PM_TOKEN_PIPE_PIPE_EQUAL: - case PM_TOKEN_PLUS_EQUAL: - case PM_TOKEN_SLASH_EQUAL: - case PM_TOKEN_STAR_EQUAL: - case PM_TOKEN_STAR_STAR_EQUAL: - break; - default: - // Once we know it's neither a method call nor an assignment, - // we can finally create `it` default parameter. - node = pm_node_check_it(parser, node); + case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: + case PM_TOKEN_AMPERSAND_EQUAL: + case PM_TOKEN_CARET_EQUAL: + case PM_TOKEN_EQUAL: + case PM_TOKEN_GREATER_GREATER_EQUAL: + case PM_TOKEN_LESS_LESS_EQUAL: + case PM_TOKEN_MINUS_EQUAL: + case PM_TOKEN_PARENTHESIS_RIGHT: + case PM_TOKEN_PERCENT_EQUAL: + case PM_TOKEN_PIPE_EQUAL: + case PM_TOKEN_PIPE_PIPE_EQUAL: + case PM_TOKEN_PLUS_EQUAL: + case PM_TOKEN_SLASH_EQUAL: + case PM_TOKEN_STAR_EQUAL: + case PM_TOKEN_STAR_STAR_EQUAL: + break; + default: + // Once we know it's neither a method call nor an + // assignment, we can finally create `it` default + // parameter. + node = pm_node_check_it(parser, node); } } @@ -14656,6 +14711,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we get here, then we tried to find something in the // heredoc but couldn't actually parse anything, so we'll just // return a missing node. + // + // parse_string_part handles its own errors, so there is no need + // for us to add one here. node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { // If we get here, then the part that we parsed was plain string @@ -16301,6 +16359,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // context of a multiple assignment. We enforce that here. We'll // still lex past it though and create a missing node place. if (binding_power != PM_BINDING_POWER_STATEMENT) { + pm_parser_err_previous(parser, diag_id); return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); } @@ -16487,12 +16546,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END); } - default: - if (context_recoverable(parser, &parser->current)) { + default: { + pm_context_t recoverable = context_recoverable(parser, &parser->current); + + if (recoverable != PM_CONTEXT_NONE) { parser->recovering = true; + + // If the given error is not the generic one, then we'll add it + // here because it will provide more context in addition to the + // recoverable error that we will also add. + if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) { + pm_parser_err_previous(parser, diag_id); + } + + // If we get here, then we are assuming this token is closing a + // parent context, so we'll indicate that to the user so that + // they know how we behaved. + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable)); + } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) { + // We're going to make a special case here, because "cannot + // parse expression" is pretty generic, and we know here that we + // have an unexpected token. + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type)); + } else { + pm_parser_err_previous(parser, diag_id); } return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); + } } } @@ -17455,15 +17536,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t */ static pm_node_t * parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) { - pm_token_t recovery = parser->previous; - pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call); + pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id); switch (PM_NODE_TYPE(node)) { case PM_MISSING_NODE: // If we found a syntax error, then the type of node returned by - // parse_expression_prefix is going to be a missing node. In that - // case we need to add the error message to the parser's error list. - pm_parser_err(parser, recovery.end, recovery.end, diag_id); + // parse_expression_prefix is going to be a missing node. return node; case PM_PRE_EXECUTION_NODE: case PM_POST_EXECUTION_NODE: diff --git a/prism/prism.h b/prism/prism.h index 45bfff7a11..08d216cbb5 100644 --- a/prism/prism.h +++ b/prism/prism.h @@ -168,7 +168,15 @@ PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t si * @param token_type The token type to convert to a string. * @return A string representation of the given token type. */ -PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type); +PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type); + +/** + * Returns the human name of the given token type. + * + * @param token_type The token type to convert to a human name. + * @return The human name of the given token type. + */ +const char * pm_token_type_human(pm_token_type_t token_type); /** * Format the errors on the parser into the given buffer. diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb index 93f67f6551..20b3810715 100644 --- a/prism/templates/ext/prism/api_node.c.erb +++ b/prism/templates/ext/prism/api_node.c.erb @@ -19,7 +19,7 @@ pm_location_new(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, V VALUE pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source) { - ID type = rb_intern(pm_token_type_to_str(token->type)); + ID type = rb_intern(pm_token_type_name(token->type)); VALUE location = pm_location_new(parser, token->start, token->end, source); VALUE argv[] = { diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb index d3c1c3f1b8..99f5d1b254 100644 --- a/prism/templates/src/token_type.c.erb +++ b/prism/templates/src/token_type.c.erb @@ -6,15 +6,364 @@ * Returns a string representation of the given token type. */ PRISM_EXPORTED_FUNCTION const char * -pm_token_type_to_str(pm_token_type_t token_type) -{ +pm_token_type_name(pm_token_type_t token_type) { switch (token_type) { <%- tokens.each do |token| -%> case PM_TOKEN_<%= token.name %>: return "<%= token.name %>"; <%- end -%> case PM_TOKEN_MAXIMUM: - return "MAXIMUM"; + assert(false && "unreachable"); + return ""; } - return "\0"; + + // Provide a default, because some compilers can't determine that the above + // switch is exhaustive. + assert(false && "unreachable"); + return ""; +} + +/** + * Returns the human name of the given token type. + */ +const char * +pm_token_type_human(pm_token_type_t token_type) { + switch (token_type) { + case PM_TOKEN_EOF: + return "end of file"; + case PM_TOKEN_MISSING: + return "missing token"; + case PM_TOKEN_NOT_PROVIDED: + return "not provided token"; + case PM_TOKEN_AMPERSAND: + return "'&'"; + case PM_TOKEN_AMPERSAND_AMPERSAND: + return "'&&'"; + case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: + return "'&&='"; + case PM_TOKEN_AMPERSAND_DOT: + return "'&.'"; + case PM_TOKEN_AMPERSAND_EQUAL: + return "'&='"; + case PM_TOKEN_BACKTICK: + return "'`'"; + case PM_TOKEN_BACK_REFERENCE: + return "back reference"; + case PM_TOKEN_BANG: + return "'!'"; + case PM_TOKEN_BANG_EQUAL: + return "'!='"; + case PM_TOKEN_BANG_TILDE: + return "'!~'"; + case PM_TOKEN_BRACE_LEFT: + return "'{'"; + case PM_TOKEN_BRACE_RIGHT: + return "'}'"; + case PM_TOKEN_BRACKET_LEFT: + return "'['"; + case PM_TOKEN_BRACKET_LEFT_ARRAY: + return "'['"; + case PM_TOKEN_BRACKET_LEFT_RIGHT: + return "'[]'"; + case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: + return "'[]='"; + case PM_TOKEN_BRACKET_RIGHT: + return "']'"; + case PM_TOKEN_CARET: + return "'^'"; + case PM_TOKEN_CARET_EQUAL: + return "'^='"; + case PM_TOKEN_CHARACTER_LITERAL: + return "character literal"; + case PM_TOKEN_CLASS_VARIABLE: + return "class variable"; + case PM_TOKEN_COLON: + return "':'"; + case PM_TOKEN_COLON_COLON: + return "'::'"; + case PM_TOKEN_COMMA: + return "','"; + case PM_TOKEN_COMMENT: + return "comment"; + case PM_TOKEN_CONSTANT: + return "constant"; + case PM_TOKEN_DOT: + return "'.'"; + case PM_TOKEN_DOT_DOT: + return "'..'"; + case PM_TOKEN_DOT_DOT_DOT: + return "'...'"; + case PM_TOKEN_EMBDOC_BEGIN: + return "'=begin'"; + case PM_TOKEN_EMBDOC_END: + return "'=end'"; + case PM_TOKEN_EMBDOC_LINE: + return "embedded documentation line"; + case PM_TOKEN_EMBEXPR_BEGIN: + return "'#{'"; + case PM_TOKEN_EMBEXPR_END: + return "'}'"; + case PM_TOKEN_EMBVAR: + return "'#'"; + case PM_TOKEN_EQUAL: + return "'='"; + case PM_TOKEN_EQUAL_EQUAL: + return "'=='"; + case PM_TOKEN_EQUAL_EQUAL_EQUAL: + return "'==='"; + case PM_TOKEN_EQUAL_GREATER: + return "'=>'"; + case PM_TOKEN_EQUAL_TILDE: + return "'=~'"; + case PM_TOKEN_FLOAT: + return "float"; + case PM_TOKEN_FLOAT_IMAGINARY: + return "imaginary"; + case PM_TOKEN_FLOAT_RATIONAL: + return "rational"; + case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: + return "imaginary"; + case PM_TOKEN_GLOBAL_VARIABLE: + return "global variable"; + case PM_TOKEN_GREATER: + return "'>'"; + case PM_TOKEN_GREATER_EQUAL: + return "'>='"; + case PM_TOKEN_GREATER_GREATER: + return "'>>'"; + case PM_TOKEN_GREATER_GREATER_EQUAL: + return "'>>='"; + case PM_TOKEN_HEREDOC_END: + return "heredoc ending"; + case PM_TOKEN_HEREDOC_START: + return "heredoc beginning"; + case PM_TOKEN_IDENTIFIER: + return "local variable or method identifier"; + case PM_TOKEN_IGNORED_NEWLINE: + return "ignored newline"; + case PM_TOKEN_INSTANCE_VARIABLE: + return "instance variable"; + case PM_TOKEN_INTEGER: + return "integer"; + case PM_TOKEN_INTEGER_IMAGINARY: + return "imaginary"; + case PM_TOKEN_INTEGER_RATIONAL: + return "rational"; + case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: + return "imaginary"; + case PM_TOKEN_KEYWORD_ALIAS: + return "'alias'"; + case PM_TOKEN_KEYWORD_AND: + return "'and'"; + case PM_TOKEN_KEYWORD_BEGIN: + return "'begin'"; + case PM_TOKEN_KEYWORD_BEGIN_UPCASE: + return "'BEGIN'"; + case PM_TOKEN_KEYWORD_BREAK: + return "'break'"; + case PM_TOKEN_KEYWORD_CASE: + return "'case'"; + case PM_TOKEN_KEYWORD_CLASS: + return "'class'"; + case PM_TOKEN_KEYWORD_DEF: + return "'def'"; + case PM_TOKEN_KEYWORD_DEFINED: + return "'defined?'"; + case PM_TOKEN_KEYWORD_DO: + return "'do'"; + case PM_TOKEN_KEYWORD_DO_LOOP: + return "'do'"; + case PM_TOKEN_KEYWORD_ELSE: + return "'else'"; + case PM_TOKEN_KEYWORD_ELSIF: + return "'elsif'"; + case PM_TOKEN_KEYWORD_END: + return "'end'"; + case PM_TOKEN_KEYWORD_END_UPCASE: + return "'END'"; + case PM_TOKEN_KEYWORD_ENSURE: + return "'ensure'"; + case PM_TOKEN_KEYWORD_FALSE: + return "'false'"; + case PM_TOKEN_KEYWORD_FOR: + return "'for'"; + case PM_TOKEN_KEYWORD_IF: + return "'if'"; + case PM_TOKEN_KEYWORD_IF_MODIFIER: + return "'if'"; + case PM_TOKEN_KEYWORD_IN: + return "'in'"; + case PM_TOKEN_KEYWORD_MODULE: + return "'module'"; + case PM_TOKEN_KEYWORD_NEXT: + return "'next'"; + case PM_TOKEN_KEYWORD_NIL: + return "'nil'"; + case PM_TOKEN_KEYWORD_NOT: + return "'not'"; + case PM_TOKEN_KEYWORD_OR: + return "'or'"; + case PM_TOKEN_KEYWORD_REDO: + return "'redo'"; + case PM_TOKEN_KEYWORD_RESCUE: + return "'rescue'"; + case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: + return "'rescue'"; + case PM_TOKEN_KEYWORD_RETRY: + return "'retry'"; + case PM_TOKEN_KEYWORD_RETURN: + return "'return'"; + case PM_TOKEN_KEYWORD_SELF: + return "'self'"; + case PM_TOKEN_KEYWORD_SUPER: + return "'super'"; + case PM_TOKEN_KEYWORD_THEN: + return "'then'"; + case PM_TOKEN_KEYWORD_TRUE: + return "'true'"; + case PM_TOKEN_KEYWORD_UNDEF: + return "'undef'"; + case PM_TOKEN_KEYWORD_UNLESS: + return "'unless'"; + case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: + return "'unless'"; + case PM_TOKEN_KEYWORD_UNTIL: + return "'until'"; + case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: + return "'until'"; + case PM_TOKEN_KEYWORD_WHEN: + return "'when'"; + case PM_TOKEN_KEYWORD_WHILE: + return "'while'"; + case PM_TOKEN_KEYWORD_WHILE_MODIFIER: + return "'while'"; + case PM_TOKEN_KEYWORD_YIELD: + return "'yield'"; + case PM_TOKEN_KEYWORD___ENCODING__: + return "'__ENCODING__'"; + case PM_TOKEN_KEYWORD___FILE__: + return "'__FILE__'"; + case PM_TOKEN_KEYWORD___LINE__: + return "'__LINE__'"; + case PM_TOKEN_LABEL: + return "label"; + case PM_TOKEN_LABEL_END: + return "':'"; + case PM_TOKEN_LAMBDA_BEGIN: + return "'{'"; + case PM_TOKEN_LESS: + return "'<'"; + case PM_TOKEN_LESS_EQUAL: + return "'<='"; + case PM_TOKEN_LESS_EQUAL_GREATER: + return "'<=>'"; + case PM_TOKEN_LESS_LESS: + return "'<<'"; + case PM_TOKEN_LESS_LESS_EQUAL: + return "'<<='"; + case PM_TOKEN_METHOD_NAME: + return "method name"; + case PM_TOKEN_MINUS: + return "'-'"; + case PM_TOKEN_MINUS_EQUAL: + return "'-='"; + case PM_TOKEN_MINUS_GREATER: + return "'->'"; + case PM_TOKEN_NEWLINE: + return "newline"; + case PM_TOKEN_NUMBERED_REFERENCE: + return "numbered reference"; + case PM_TOKEN_PARENTHESIS_LEFT: + return "'('"; + case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: + return "'('"; + case PM_TOKEN_PARENTHESIS_RIGHT: + return "')'"; + case PM_TOKEN_PERCENT: + return "'%'"; + case PM_TOKEN_PERCENT_EQUAL: + return "'%='"; + case PM_TOKEN_PERCENT_LOWER_I: + return "'%i'"; + case PM_TOKEN_PERCENT_LOWER_W: + return "'%w'"; + case PM_TOKEN_PERCENT_LOWER_X: + return "'%x'"; + case PM_TOKEN_PERCENT_UPPER_I: + return "'%I'"; + case PM_TOKEN_PERCENT_UPPER_W: + return "'%W'"; + case PM_TOKEN_PIPE: + return "'|'"; + case PM_TOKEN_PIPE_EQUAL: + return "'|='"; + case PM_TOKEN_PIPE_PIPE: + return "'||'"; + case PM_TOKEN_PIPE_PIPE_EQUAL: + return "'||='"; + case PM_TOKEN_PLUS: + return "'+'"; + case PM_TOKEN_PLUS_EQUAL: + return "'+='"; + case PM_TOKEN_QUESTION_MARK: + return "'?'"; + case PM_TOKEN_REGEXP_BEGIN: + return "regular expression beginning"; + case PM_TOKEN_REGEXP_END: + return "regular expression ending"; + case PM_TOKEN_SEMICOLON: + return "';'"; + case PM_TOKEN_SLASH: + return "'/'"; + case PM_TOKEN_SLASH_EQUAL: + return "'/='"; + case PM_TOKEN_STAR: + return "'*'"; + case PM_TOKEN_STAR_EQUAL: + return "'*='"; + case PM_TOKEN_STAR_STAR: + return "'**'"; + case PM_TOKEN_STAR_STAR_EQUAL: + return "'**='"; + case PM_TOKEN_STRING_BEGIN: + return "string beginning"; + case PM_TOKEN_STRING_CONTENT: + return "string content"; + case PM_TOKEN_STRING_END: + return "string ending"; + case PM_TOKEN_SYMBOL_BEGIN: + return "symbol beginning"; + case PM_TOKEN_TILDE: + return "'~'"; + case PM_TOKEN_UAMPERSAND: + return "'&'"; + case PM_TOKEN_UCOLON_COLON: + return "'::'"; + case PM_TOKEN_UDOT_DOT: + return "'..'"; + case PM_TOKEN_UDOT_DOT_DOT: + return "'...'"; + case PM_TOKEN_UMINUS: + return "'-'"; + case PM_TOKEN_UMINUS_NUM: + return "'-'"; + case PM_TOKEN_UPLUS: + return "'+'"; + case PM_TOKEN_USTAR: + return "'*'"; + case PM_TOKEN_USTAR_STAR: + return "'**'"; + case PM_TOKEN_WORDS_SEP: + return "string separator"; + case PM_TOKEN___END__: + return "'__END__'"; + case PM_TOKEN_MAXIMUM: + assert(false && "unreachable"); + return ""; + } + + // Provide a default, because some compilers can't determine that the above + // switch is exhaustive. + assert(false && "unreachable"); + return ""; } diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index 2b45167bee..4518c8a65d 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -26,7 +26,8 @@ module Prism ) assert_errors expected, "module Parent module end", [ - ["expected a constant name after `module`", 20..20] + ["expected a constant name after `module`", 14..20], + ["unexpected 'end', assuming it is closing the parent module definition", 21..24] ] end @@ -98,7 +99,8 @@ module Prism ) assert_errors expected, "BEGIN { 1 + }", [ - ["expected an expression after the operator", 11..11] + ["expected an expression after the operator", 10..11], + ["unexpected '}', assuming it is closing the parent 'BEGIN' block", 12..13] ] end @@ -189,7 +191,7 @@ module Prism def test_unterminated_parenthesized_expression assert_errors expression('(1 + 2'), '(1 + 2', [ ["expected a newline or semicolon after the statement", 6..6], - ["cannot parse the expression", 6..6], + ["unexpected end of file, assuming it is closing the parent top level context", 6..6], ["expected a matching `)`", 6..6] ] end @@ -203,7 +205,8 @@ module Prism def test_unterminated_argument_expression assert_errors expression('a %'), 'a %', [ ["invalid `%` token", 2..3], - ["expected an expression after the operator", 3..3], + ["expected an expression after the operator", 2..3], + ["unexpected end of file, assuming it is closing the parent top level context", 3..3] ] end @@ -222,62 +225,62 @@ module Prism def test_1_2_3 assert_errors expression("(1, 2, 3)"), "(1, 2, 3)", [ ["expected a newline or semicolon after the statement", 2..2], - ["cannot parse the expression", 2..2], + ["unexpected ',', ignoring it", 2..3], ["expected a matching `)`", 2..2], ["expected a newline or semicolon after the statement", 2..2], - ["cannot parse the expression", 2..2], + ["unexpected ',', ignoring it", 2..3], ["expected a newline or semicolon after the statement", 5..5], - ["cannot parse the expression", 5..5], + ["unexpected ',', ignoring it", 5..6], ["expected a newline or semicolon after the statement", 8..8], - ["cannot parse the expression", 8..8] + ["unexpected ')', ignoring it", 8..9] ] end def test_return_1_2_3 assert_error_messages "return(1, 2, 3)", [ "expected a newline or semicolon after the statement", - "cannot parse the expression", + "unexpected ',', ignoring it", "expected a matching `)`", "expected a newline or semicolon after the statement", - "cannot parse the expression" + "unexpected ')', ignoring it" ] end def test_return_1 assert_errors expression("return 1,;"), "return 1,;", [ - ["expected an argument", 9..9] + ["expected an argument", 8..9] ] end def test_next_1_2_3 assert_errors expression("next(1, 2, 3)"), "next(1, 2, 3)", [ ["expected a newline or semicolon after the statement", 6..6], - ["cannot parse the expression", 6..6], + ["unexpected ',', ignoring it", 6..7], ["expected a matching `)`", 6..6], ["expected a newline or semicolon after the statement", 12..12], - ["cannot parse the expression", 12..12] + ["unexpected ')', ignoring it", 12..13] ] end def test_next_1 assert_errors expression("next 1,;"), "next 1,;", [ - ["expected an argument", 7..7] + ["expected an argument", 6..7] ] end def test_break_1_2_3 assert_errors expression("break(1, 2, 3)"), "break(1, 2, 3)", [ ["expected a newline or semicolon after the statement", 7..7], - ["cannot parse the expression", 7..7], + ["unexpected ',', ignoring it", 7..8], ["expected a matching `)`", 7..7], ["expected a newline or semicolon after the statement", 13..13], - ["cannot parse the expression", 13..13] + ["unexpected ')', ignoring it", 13..14] ] end def test_break_1 assert_errors expression("break 1,;"), "break 1,;", [ - ["expected an argument", 8..8] + ["expected an argument", 7..8] ] end @@ -338,22 +341,22 @@ module Prism ["expected a matching `)`", 8..8], ["expected a `.` or `::` after the receiver in a method definition", 8..8], ["expected a delimiter to close the parameters", 9..9], - ["cannot parse the expression", 9..9], - ["cannot parse the expression", 11..11] + ["unexpected ')', ignoring it", 10..11], + ["unexpected '.', ignoring it", 11..12] ] end def test_def_with_empty_expression_receiver assert_errors expression("def ().a; end"), "def ().a; end", [ - ["expected a receiver for the method definition", 5..5] + ["expected a receiver for the method definition", 4..5] ] end def test_block_beginning_with_brace_and_ending_with_end assert_error_messages "x.each { x end", [ "expected a newline or semicolon after the statement", - "cannot parse the expression", - "cannot parse the expression", + "unexpected 'end', ignoring it", + "unexpected end of file, assuming it is closing the parent top level context", "expected a block beginning with `{` to end with `}`" ] end @@ -401,7 +404,7 @@ module Prism assert_error_messages "foo(*bar and baz)", [ "expected a `)` to close the arguments", "expected a newline or semicolon after the statement", - "cannot parse the expression" + "unexpected ')', ignoring it" ] end @@ -1490,8 +1493,8 @@ module Prism assert_errors expression(source), source, [ ["expected a `do` keyword or a `{` to open the lambda block", 3..3], ["expected a newline or semicolon after the statement", 7..7], - ["cannot parse the expression", 7..7], - ["expected a lambda block beginning with `do` to end with `end`", 7..7], + ["unexpected end of file, assuming it is closing the parent top level context", 7..7], + ["expected a lambda block beginning with `do` to end with `end`", 7..7] ] end @@ -1546,10 +1549,11 @@ module Prism def test_while_endless_method source = "while def f = g do end" + assert_errors expression(source), source, [ - ['expected a predicate expression for the `while` statement', 22..22], - ['cannot parse the expression', 22..22], - ['expected an `end` to close the `while` statement', 22..22] + ["expected a predicate expression for the `while` statement", 22..22], + ["unexpected end of file, assuming it is closing the parent top level context", 22..22], + ["expected an `end` to close the `while` statement", 22..22] ] end @@ -1558,13 +1562,12 @@ module Prism a in b + c a => b + c RUBY - message1 = 'expected a newline or semicolon after the statement' - message2 = 'cannot parse the expression' + assert_errors expression(source), source, [ - [message1, 6..6], - [message2, 6..6], - [message1, 17..17], - [message2, 17..17], + ["expected a newline or semicolon after the statement", 6..6], + ["unexpected '+', ignoring it", 7..8], + ["expected a newline or semicolon after the statement", 17..17], + ["unexpected '+', ignoring it", 18..19] ] end @@ -1859,9 +1862,10 @@ module Prism def test_non_assoc_range source = '1....2' + assert_errors expression(source), source, [ - ['expected a newline or semicolon after the statement', 4..4], - ['cannot parse the expression', 4..4], + ["expected a newline or semicolon after the statement", 4..4], + ["unexpected '.', ignoring it", 4..5] ] end @@ -1892,25 +1896,24 @@ module Prism undef x + 1 undef x.z RUBY - message1 = 'expected a newline or semicolon after the statement' - message2 = 'cannot parse the expression' + assert_errors expression(source), source, [ - [message1, 9..9], - [message2, 9..9], - [message1, 23..23], - [message2, 23..23], - [message1, 39..39], - [message2, 39..39], - [message1, 57..57], - [message2, 57..57], - [message1, 71..71], - [message2, 71..71], - [message1, 87..87], - [message2, 87..87], - [message1, 97..97], - [message2, 97..97], - [message1, 109..109], - [message2, 109..109], + ["expected a newline or semicolon after the statement", 9..9], + ["unexpected '+', ignoring it", 10..11], + ["expected a newline or semicolon after the statement", 23..23], + ["unexpected '.', ignoring it", 23..24], + ["expected a newline or semicolon after the statement", 39..39], + ["unexpected '+', ignoring it", 40..41], + ["expected a newline or semicolon after the statement", 57..57], + ["unexpected '.', ignoring it", 57..58], + ["expected a newline or semicolon after the statement", 71..71], + ["unexpected '+', ignoring it", 72..73], + ["expected a newline or semicolon after the statement", 87..87], + ["unexpected '.', ignoring it", 87..88], + ["expected a newline or semicolon after the statement", 97..97], + ["unexpected '+', ignoring it", 98..99], + ["expected a newline or semicolon after the statement", 109..109], + ["unexpected '.', ignoring it", 109..110] ] end @@ -1934,13 +1937,12 @@ module Prism ..1.. ...1.. RUBY - message1 = 'expected a newline or semicolon after the statement' - message2 = 'cannot parse the expression' + assert_errors expression(source), source, [ - [message1, 3..3], - [message2, 3..3], - [message1, 10..10], - [message2, 10..10], + ["expected a newline or semicolon after the statement", 3..3], + ["unexpected '..', ignoring it", 3..5], + ["expected a newline or semicolon after the statement", 10..10], + ["unexpected '..', ignoring it", 10..12] ] end @@ -2047,21 +2049,20 @@ module Prism 1 !~ 2 !~ 3 1 <=> 2 <=> 3 RUBY - message1 = 'expected a newline or semicolon after the statement' - message2 = 'cannot parse the expression' + assert_errors expression(source), source, [ - [message1, 6..6], - [message2, 6..6], - [message1, 18..18], - [message2, 18..18], - [message1, 31..31], - [message2, 31..31], - [message1, 44..44], - [message2, 44..44], - [message1, 56..56], - [message2, 56..56], - [message1, 69..69], - [message2, 69..69], + ["expected a newline or semicolon after the statement", 6..6], + ["unexpected '==', ignoring it", 7..9], + ["expected a newline or semicolon after the statement", 18..18], + ["unexpected '!=', ignoring it", 19..21], + ["expected a newline or semicolon after the statement", 31..31], + ["unexpected '===', ignoring it", 32..35], + ["expected a newline or semicolon after the statement", 44..44], + ["unexpected '=~', ignoring it", 45..47], + ["expected a newline or semicolon after the statement", 56..56], + ["unexpected '!~', ignoring it", 57..59], + ["expected a newline or semicolon after the statement", 69..69], + ["unexpected '<=>', ignoring it", 70..73] ] end diff --git a/test/prism/format_errors_test.rb b/test/prism/format_errors_test.rb index 3533a73863..34d320a7b6 100644 --- a/test/prism/format_errors_test.rb +++ b/test/prism/format_errors_test.rb @@ -9,8 +9,8 @@ module Prism def test_format_errors assert_equal <<~ERROR, Debug.format_errors("<>", false) > 1 | <> - | ^ cannot parse the expression - | ^ cannot parse the expression + | ^ unexpected '<', ignoring it + | ^ unexpected '>', ignoring it ERROR end end