[PRISM] Only parse shebang on main script

Fixes [Bug #20730]
This commit is contained in:
Kevin Newton 2024-09-13 11:21:28 -04:00
Родитель d42d19059d
Коммит 9afc6a981d
7 изменённых файлов: 65 добавлений и 18 удалений

Просмотреть файл

@ -31,6 +31,7 @@ ID rb_id_option_encoding;
ID rb_id_option_filepath;
ID rb_id_option_frozen_string_literal;
ID rb_id_option_line;
ID rb_id_option_main_script;
ID rb_id_option_scopes;
ID rb_id_option_version;
ID rb_id_source_for;
@ -179,6 +180,8 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
pm_options_command_line_set(options, command_line);
}
} else if (key_id == rb_id_option_main_script) {
if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value));
} else {
rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
}
@ -1165,6 +1168,7 @@ Init_prism(void) {
rb_id_option_filepath = rb_intern_const("filepath");
rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal");
rb_id_option_line = rb_intern_const("line");
rb_id_option_main_script = rb_intern_const("main_script");
rb_id_option_scopes = rb_intern_const("scopes");
rb_id_option_version = rb_intern_const("version");
rb_id_source_for = rb_intern("for");

Просмотреть файл

@ -100,6 +100,14 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
}
}
/**
* Set the main script option on the given options struct.
*/
PRISM_EXPORTED_FUNCTION void
pm_options_main_script_set(pm_options_t *options, bool main_script) {
options->main_script = main_script;
}
// For some reason, GCC analyzer thinks we're leaking allocated scopes and
// locals here, even though we definitely aren't. This is a false positive.
// Ideally we wouldn't need to suppress this.

Просмотреть файл

@ -139,6 +139,13 @@ typedef struct pm_options {
* but ignore any encoding magic comments at the top of the file.
*/
bool encoding_locked;
/**
* When the file being parsed is the main script, the shebang will be
* considered for command-line flags (or for implicit -x). The caller needs
* to pass this information to the parser so that it can behave correctly.
*/
bool main_script;
} pm_options_t;
/**
@ -248,6 +255,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
*/
PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
/**
* Set the main script option on the given options struct.
*
* @param options The options struct to set the main script value on.
* @param main_script The main script value to set.
*/
PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
/**
* Allocate and zero out the scopes array on the given options struct.
*

Просмотреть файл

@ -21973,25 +21973,42 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
// "ruby" and start parsing from there.
bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
// If the first two bytes of the source are a shebang, then we'll indicate
// that the encoding comment is at the end of the shebang.
// If the first two bytes of the source are a shebang, then we will do a bit
// of extra processing.
//
// First, we'll indicate that the encoding comment is at the end of the
// shebang. This means that when a shebang is present the encoding comment
// can begin on the second line.
//
// Second, we will check if the shebang includes "ruby". If it does, then we
// we will start parsing from there. We will also potentially warning the
// user if there is a carriage return at the end of the shebang. We will
// also potentially call the shebang callback if this is the main script to
// allow the caller to parse the shebang and find any command-line options.
// If the shebang does not include "ruby" and this is the main script being
// parsed, then we will start searching the file for a shebang that does
// contain "ruby" as if -x were passed on the command line.
const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
const char *engine;
if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
if (newline != NULL) {
pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
parser->encoding_comment_start = newline + 1;
if (options == NULL || options->main_script) {
pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
}
}
if (options != NULL && options->shebang_callback != NULL) {
if (options != NULL && options->main_script && options->shebang_callback != NULL) {
pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
}
search_shebang = false;
} else if (!parser->parsing_eval) {
} else if (options->main_script && !parser->parsing_eval) {
search_shebang = true;
}
}

2
ruby.c
Просмотреть файл

@ -2155,6 +2155,8 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result)
pm_options_t *options = &result->options;
pm_options_line_set(options, 1);
pm_options_main_script_set(options, true);
const bool read_stdin = (strcmp(opt->script, "-") == 0);
if (read_stdin) {

Просмотреть файл

@ -67,7 +67,7 @@ module Prism
end
def test_command_line_x_implicit
result = Prism.parse_statement(<<~RUBY)
result = Prism.parse_statement(<<~RUBY, main_script: true)
#!/bin/bash
exit 1
@ -90,7 +90,7 @@ module Prism
end
def test_command_line_x_implicit_fail
result = Prism.parse(<<~RUBY)
result = Prism.parse(<<~RUBY, main_script: true)
#!/bin/bash
exit 1
RUBY

Просмотреть файл

@ -336,23 +336,24 @@ module Prism
def test_shebang_ending_with_carriage_return
msg = "shebang line ending with \\r may cause problems"
assert_warning(<<~RUBY, msg, compare: false)
assert_warning(<<~RUBY, msg, compare: false, main_script: true)
#!ruby\r
p(123)
RUBY
assert_warning(<<~RUBY, msg, compare: false)
assert_warning(<<~RUBY, msg, compare: false, main_script: true)
#!ruby \r
p(123)
RUBY
assert_warning(<<~RUBY, msg, compare: false)
assert_warning(<<~RUBY, msg, compare: false, main_script: true)
#!ruby -Eutf-8\r
p(123)
RUBY
# Used with the `-x` object, to ignore the script up until the first shebang that mentioned "ruby".
assert_warning(<<~SCRIPT, msg, compare: false)
# Used with the `-x` object, to ignore the script up until the first
# shebang that mentioned "ruby".
assert_warning(<<~SCRIPT, msg, compare: false, main_script: true)
#!/usr/bin/env bash
# Some initial shell script or other content
# that Ruby should ignore
@ -364,11 +365,11 @@ module Prism
puts "Hello from Ruby!"
SCRIPT
refute_warning("#ruby not_a_shebang\r\n", compare: false)
refute_warning("#ruby not_a_shebang\r\n", compare: false, main_script: true)
# CRuby doesn't emit the warning if a malformed file only has `\r` and not `\n`.
# https://bugs.ruby-lang.org/issues/20700
refute_warning("#!ruby\r", compare: false)
# CRuby doesn't emit the warning if a malformed file only has `\r` and
# not `\n`. https://bugs.ruby-lang.org/issues/20700.
refute_warning("#!ruby\r", compare: false, main_script: true)
end
end
@ -384,8 +385,8 @@ module Prism
private
def assert_warning(source, *messages, compare: true)
warnings = Prism.parse(source).warnings
def assert_warning(source, *messages, compare: true, **options)
warnings = Prism.parse(source, **options).warnings
assert_equal messages.length, warnings.length, "Expected #{messages.length} warning(s) in #{source.inspect}, got #{warnings.map(&:message).inspect}"
warnings.zip(messages).each do |warning, message|