From 4c28a61e835645fefa238536acd6334451fb2dde Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 15 Sep 2023 12:19:56 -0400 Subject: [PATCH] [ruby/yarp] Ensure multi targets are only in valid locations https://github.com/ruby/yarp/commit/8bffb8a762 --- test/yarp/errors_test.rb | 28 ++++++++++++-- yarp/yarp.c | 82 +++++++++++++++++++++++++++++----------- 2 files changed, 85 insertions(+), 25 deletions(-) diff --git a/test/yarp/errors_test.rb b/test/yarp/errors_test.rb index 41cb6b7f47..57a2cab011 100644 --- a/test/yarp/errors_test.rb +++ b/test/yarp/errors_test.rb @@ -1190,10 +1190,32 @@ module YARP def test_invalid_global_variable_write assert_errors expression("$',"), "$',", [ - ["Immutable variable as a write target", 0..2] + ["Immutable variable as a write target", 0..2], + ["Unexpected write target", 0..3] ] end + def test_invalid_multi_target + error_messages = ["Unexpected write target"] + immutable = "Immutable variable as a write target" + + assert_error_messages "foo,", error_messages + assert_error_messages "foo = 1; foo,", error_messages + assert_error_messages "foo.bar,", error_messages + assert_error_messages "*foo,", error_messages + assert_error_messages "@foo,", error_messages + assert_error_messages "@@foo,", error_messages + assert_error_messages "$foo,", error_messages + assert_error_messages "$1,", [immutable, *error_messages] + assert_error_messages "$+,", [immutable, *error_messages] + assert_error_messages "Foo,", error_messages + assert_error_messages "::Foo,", error_messages + assert_error_messages "Foo::Foo,", error_messages + assert_error_messages "Foo::foo,", error_messages + assert_error_messages "foo[foo],", error_messages + assert_error_messages "(foo, bar)", error_messages + end + def test_call_with_block_and_write source = "foo {} &&= 1" assert_errors expression(source), source, [ @@ -1282,8 +1304,8 @@ module YARP assert_equal(errors, result.errors.map { |e| [e.message, e.location.start_offset..e.location.end_offset] }) end - def assert_error_messages(source, errors) - assert_nil Ripper.sexp_raw(source) + def assert_error_messages(source, errors, compare_ripper: RUBY_ENGINE == "ruby") + assert_nil Ripper.sexp_raw(source) if compare_ripper result = YARP.parse(source) assert_equal(errors, result.errors.map(&:message)) end diff --git a/yarp/yarp.c b/yarp/yarp.c index 7c4b347678..aeb015611b 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -8487,6 +8487,20 @@ parse_target(yp_parser_t *parser, yp_node_t *target) { } } +// Parse a write targets and validate that it is in a valid position for +// assignment. +static yp_node_t * +parse_target_validate(yp_parser_t *parser, yp_node_t *target) { + yp_node_t *result = parse_target(parser, target); + + // Ensure that we have either an = or a ) after the targets. + if (!match3(parser, YP_TOKEN_EQUAL, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_KEYWORD_IN)) { + yp_diagnostic_list_append(&parser->error_list, result->location.start, result->location.end, YP_ERR_WRITE_TARGET_UNEXPECTED); + } + + return result; +} + // Convert the given node into a valid write node. static yp_node_t * parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) { @@ -8658,12 +8672,10 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod // target node or a multi-target node. static yp_node_t * parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t binding_power) { - first_target = parse_target(parser, first_target); - if (!match1(parser, YP_TOKEN_COMMA)) return first_target; + bool has_splat = YP_NODE_TYPE_P(first_target, YP_SPLAT_NODE); yp_multi_target_node_t *result = yp_multi_target_node_create(parser); - yp_multi_target_node_targets_append(result, first_target); - bool has_splat = YP_NODE_TYPE_P(first_target, YP_SPLAT_NODE); + yp_multi_target_node_targets_append(result, parse_target(parser, first_target)); while (accept1(parser, YP_TOKEN_COMMA)) { if (accept1(parser, YP_TOKEN_USTAR)) { @@ -8703,6 +8715,20 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b return (yp_node_t *) result; } +// Parse a list of targets and validate that it is in a valid position for +// assignment. +static yp_node_t * +parse_targets_validate(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t binding_power) { + yp_node_t *result = parse_targets(parser, first_target, binding_power); + + // Ensure that we have either an = or a ) after the targets. + if (!match2(parser, YP_TOKEN_EQUAL, YP_TOKEN_PARENTHESIS_RIGHT)) { + yp_diagnostic_list_append(&parser->error_list, result->location.start, result->location.end, YP_ERR_WRITE_TARGET_UNEXPECTED); + } + + return result; +} + // Parse a list of statements separated by newlines or semicolons. static yp_statements_node_t * parse_statements(yp_parser_t *parser, yp_context_t context) { @@ -11450,7 +11476,11 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { multi_target->base.location.start = lparen_loc.start; multi_target->base.location.end = rparen_loc.end; - return parse_targets(parser, (yp_node_t *) multi_target, YP_BINDING_POWER_INDEX); + if (match1(parser, YP_TOKEN_COMMA)) { + return parse_targets_validate(parser, (yp_node_t *) multi_target, YP_BINDING_POWER_INDEX); + } else { + return parse_target_validate(parser, (yp_node_t *) multi_target); + } } // If we have a single statement and are ending on a right parenthesis @@ -11555,7 +11585,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { yp_node_t *node = (yp_node_t *) yp_class_variable_read_node_create(parser, &parser->previous); if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -11581,7 +11611,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { if ((binding_power == YP_BINDING_POWER_STATEMENT) && match1(parser, YP_TOKEN_COMMA)) { // If we get here, then we have a comma immediately following a // constant, so we're going to parse this as a multiple assignment. - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -11596,7 +11626,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { yp_node_t *node = (yp_node_t *)yp_constant_path_node_create(parser, NULL, &delimiter, constant); if ((binding_power == YP_BINDING_POWER_STATEMENT) && match1(parser, YP_TOKEN_COMMA)) { - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -11626,7 +11656,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { yp_node_t *node = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous); if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -11636,7 +11666,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { yp_node_t *node = (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous); if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -11646,7 +11676,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { yp_node_t *node = (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous); if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -11704,7 +11734,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { } if ((binding_power == YP_BINDING_POWER_STATEMENT) && match1(parser, YP_TOKEN_COMMA)) { - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -11821,7 +11851,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { yp_node_t *node = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous); if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { - node = parse_targets(parser, node, YP_BINDING_POWER_INDEX); + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); } return node; @@ -12522,7 +12552,6 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { if (token_begins_expression_p(parser->current.type)) { name = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_EXPECT_EXPRESSION_AFTER_STAR); - name = parse_target(parser, name); } index = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name); @@ -12534,7 +12563,11 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { } // Now, if there are multiple index expressions, parse them out. - index = parse_targets(parser, index, YP_BINDING_POWER_INDEX); + if (match1(parser, YP_TOKEN_COMMA)) { + index = parse_targets(parser, index, YP_BINDING_POWER_INDEX); + } else { + index = parse_target(parser, index); + } yp_do_loop_stack_push(parser, true); @@ -13189,8 +13222,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { parser_lex(parser); // * operators at the beginning of expressions are only valid in the - // context of a multiple assignment. We enforce that here. We'll still lex - // past it though and create a missing node place. + // context of a multiple assignment. We enforce that here. We'll + // still lex past it though and create a missing node place. if (binding_power != YP_BINDING_POWER_STATEMENT) { return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end); } @@ -13203,7 +13236,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { } yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &operator, name); - return parse_targets(parser, splat, YP_BINDING_POWER_INDEX); + + if (match1(parser, YP_TOKEN_COMMA)) { + return parse_targets_validate(parser, splat, YP_BINDING_POWER_INDEX); + } else { + return parse_target_validate(parser, splat); + } } case YP_TOKEN_BANG: { parser_lex(parser); @@ -13884,7 +13922,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t arguments.opening_loc.start == NULL && match1(parser, YP_TOKEN_COMMA) ) { - return parse_targets(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX); + return parse_targets_validate(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX); } else { return (yp_node_t *) call; } @@ -13987,7 +14025,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t // If this is followed by a comma then it is a multiple assignment. if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { - return parse_targets(parser, path, YP_BINDING_POWER_INDEX); + return parse_targets_validate(parser, path, YP_BINDING_POWER_INDEX); } return path; @@ -14006,7 +14044,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t // If this is followed by a comma then it is a multiple assignment. if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { - return parse_targets(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX); + return parse_targets_validate(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX); } return (yp_node_t *) call; @@ -14055,7 +14093,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t // assignment and we should parse the targets. if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { yp_call_node_t *aref = yp_call_node_aref_create(parser, node, &arguments); - return parse_targets(parser, (yp_node_t *) aref, YP_BINDING_POWER_INDEX); + return parse_targets_validate(parser, (yp_node_t *) aref, YP_BINDING_POWER_INDEX); } // If we're at the end of the arguments, we can now check if there is a