From 9afc6a981deae6e23d938cf5c2c4baadfeaafdb1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 13 Sep 2024 11:21:28 -0400 Subject: [PATCH] [PRISM] Only parse shebang on main script Fixes [Bug #20730] --- prism/extension.c | 4 ++++ prism/options.c | 8 ++++++++ prism/options.h | 15 +++++++++++++++ prism/prism.c | 27 ++++++++++++++++++++++----- ruby.c | 2 ++ test/prism/api/command_line_test.rb | 4 ++-- test/prism/result/warnings_test.rb | 23 ++++++++++++----------- 7 files changed, 65 insertions(+), 18 deletions(-) diff --git a/prism/extension.c b/prism/extension.c index 79761770f6..94fc5a104b 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -31,6 +31,7 @@ ID rb_id_option_encoding; ID rb_id_option_filepath; ID rb_id_option_frozen_string_literal; ID rb_id_option_line; +ID rb_id_option_main_script; ID rb_id_option_scopes; ID rb_id_option_version; ID rb_id_source_for; @@ -179,6 +180,8 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { pm_options_command_line_set(options, command_line); } + } else if (key_id == rb_id_option_main_script) { + if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value)); } else { rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key); } @@ -1165,6 +1168,7 @@ Init_prism(void) { rb_id_option_filepath = rb_intern_const("filepath"); rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal"); rb_id_option_line = rb_intern_const("line"); + rb_id_option_main_script = rb_intern_const("main_script"); rb_id_option_scopes = rb_intern_const("scopes"); rb_id_option_version = rb_intern_const("version"); rb_id_source_for = rb_intern("for"); diff --git a/prism/options.c b/prism/options.c index 643de9d95a..e0b4735e4a 100644 --- a/prism/options.c +++ b/prism/options.c @@ -100,6 +100,14 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length } } +/** + * Set the main script option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_main_script_set(pm_options_t *options, bool main_script) { + options->main_script = main_script; +} + // For some reason, GCC analyzer thinks we're leaking allocated scopes and // locals here, even though we definitely aren't. This is a false positive. // Ideally we wouldn't need to suppress this. diff --git a/prism/options.h b/prism/options.h index 52b5380965..3cb7304951 100644 --- a/prism/options.h +++ b/prism/options.h @@ -139,6 +139,13 @@ typedef struct pm_options { * but ignore any encoding magic comments at the top of the file. */ bool encoding_locked; + + /** + * When the file being parsed is the main script, the shebang will be + * considered for command-line flags (or for implicit -x). The caller needs + * to pass this information to the parser so that it can behave correctly. + */ + bool main_script; } pm_options_t; /** @@ -248,6 +255,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, */ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length); +/** + * Set the main script option on the given options struct. + * + * @param options The options struct to set the main script value on. + * @param main_script The main script value to set. + */ +PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script); + /** * Allocate and zero out the scopes array on the given options struct. * diff --git a/prism/prism.c b/prism/prism.c index 72261f2494..c1ef8ed3d4 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -21973,25 +21973,42 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // "ruby" and start parsing from there. bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser); - // If the first two bytes of the source are a shebang, then we'll indicate - // that the encoding comment is at the end of the shebang. + // If the first two bytes of the source are a shebang, then we will do a bit + // of extra processing. + // + // First, we'll indicate that the encoding comment is at the end of the + // shebang. This means that when a shebang is present the encoding comment + // can begin on the second line. + // + // Second, we will check if the shebang includes "ruby". If it does, then we + // we will start parsing from there. We will also potentially warning the + // user if there is a carriage return at the end of the shebang. We will + // also potentially call the shebang callback if this is the main script to + // allow the caller to parse the shebang and find any command-line options. + // If the shebang does not include "ruby" and this is the main script being + // parsed, then we will start searching the file for a shebang that does + // contain "ruby" as if -x were passed on the command line. const uint8_t *newline = next_newline(parser->start, parser->end - parser->start); size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start); if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') { const char *engine; + if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) { if (newline != NULL) { - pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1); parser->encoding_comment_start = newline + 1; + + if (options == NULL || options->main_script) { + pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1); + } } - if (options != NULL && options->shebang_callback != NULL) { + if (options != NULL && options->main_script && options->shebang_callback != NULL) { pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start))); } search_shebang = false; - } else if (!parser->parsing_eval) { + } else if (options->main_script && !parser->parsing_eval) { search_shebang = true; } } diff --git a/ruby.c b/ruby.c index 7bd894a1b6..093df77ba2 100644 --- a/ruby.c +++ b/ruby.c @@ -2155,6 +2155,8 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) pm_options_t *options = &result->options; pm_options_line_set(options, 1); + pm_options_main_script_set(options, true); + const bool read_stdin = (strcmp(opt->script, "-") == 0); if (read_stdin) { diff --git a/test/prism/api/command_line_test.rb b/test/prism/api/command_line_test.rb index a313845ead..a8c4355152 100644 --- a/test/prism/api/command_line_test.rb +++ b/test/prism/api/command_line_test.rb @@ -67,7 +67,7 @@ module Prism end def test_command_line_x_implicit - result = Prism.parse_statement(<<~RUBY) + result = Prism.parse_statement(<<~RUBY, main_script: true) #!/bin/bash exit 1 @@ -90,7 +90,7 @@ module Prism end def test_command_line_x_implicit_fail - result = Prism.parse(<<~RUBY) + result = Prism.parse(<<~RUBY, main_script: true) #!/bin/bash exit 1 RUBY diff --git a/test/prism/result/warnings_test.rb b/test/prism/result/warnings_test.rb index 504458e178..fa87295898 100644 --- a/test/prism/result/warnings_test.rb +++ b/test/prism/result/warnings_test.rb @@ -336,23 +336,24 @@ module Prism def test_shebang_ending_with_carriage_return msg = "shebang line ending with \\r may cause problems" - assert_warning(<<~RUBY, msg, compare: false) + assert_warning(<<~RUBY, msg, compare: false, main_script: true) #!ruby\r p(123) RUBY - assert_warning(<<~RUBY, msg, compare: false) + assert_warning(<<~RUBY, msg, compare: false, main_script: true) #!ruby \r p(123) RUBY - assert_warning(<<~RUBY, msg, compare: false) + assert_warning(<<~RUBY, msg, compare: false, main_script: true) #!ruby -Eutf-8\r p(123) RUBY - # Used with the `-x` object, to ignore the script up until the first shebang that mentioned "ruby". - assert_warning(<<~SCRIPT, msg, compare: false) + # Used with the `-x` object, to ignore the script up until the first + # shebang that mentioned "ruby". + assert_warning(<<~SCRIPT, msg, compare: false, main_script: true) #!/usr/bin/env bash # Some initial shell script or other content # that Ruby should ignore @@ -364,11 +365,11 @@ module Prism puts "Hello from Ruby!" SCRIPT - refute_warning("#ruby not_a_shebang\r\n", compare: false) + refute_warning("#ruby not_a_shebang\r\n", compare: false, main_script: true) - # CRuby doesn't emit the warning if a malformed file only has `\r` and not `\n`. - # https://bugs.ruby-lang.org/issues/20700 - refute_warning("#!ruby\r", compare: false) + # CRuby doesn't emit the warning if a malformed file only has `\r` and + # not `\n`. https://bugs.ruby-lang.org/issues/20700. + refute_warning("#!ruby\r", compare: false, main_script: true) end end @@ -384,8 +385,8 @@ module Prism private - def assert_warning(source, *messages, compare: true) - warnings = Prism.parse(source).warnings + def assert_warning(source, *messages, compare: true, **options) + warnings = Prism.parse(source, **options).warnings assert_equal messages.length, warnings.length, "Expected #{messages.length} warning(s) in #{source.inspect}, got #{warnings.map(&:message).inspect}" warnings.zip(messages).each do |warning, message|