diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 0a064a5c94..1ca99db681 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -382,6 +382,9 @@ module Prism template << "l" values << options.fetch(:line, 1) + template << "L" + values << options.fetch(:offset, 0) + template << "L" if (encoding = options[:encoding]) name = encoding.name diff --git a/prism/extension.c b/prism/extension.c index 292e67891f..8c9d7944b5 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -23,13 +23,14 @@ VALUE rb_cPrismParseResult; VALUE rb_cPrismDebugEncoding; -ID rb_option_id_filepath; -ID rb_option_id_encoding; -ID rb_option_id_line; -ID rb_option_id_frozen_string_literal; -ID rb_option_id_version; -ID rb_option_id_scopes; ID rb_option_id_command_line; +ID rb_option_id_encoding; +ID rb_option_id_filepath; +ID rb_option_id_frozen_string_literal; +ID rb_option_id_line; +ID rb_option_id_offset; +ID rb_option_id_scopes; +ID rb_option_id_version; /******************************************************************************/ /* IO of Ruby code */ @@ -138,6 +139,8 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value))); } else if (key_id == rb_option_id_line) { if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value)); + } else if (key_id == rb_option_id_offset) { + if (!NIL_P(value)) pm_options_offset_set(options, NUM2UINT(value)); } else if (key_id == rb_option_id_frozen_string_literal) { if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue); } else if (key_id == rb_option_id_version) { @@ -1297,13 +1300,14 @@ Init_prism(void) { // Intern all of the options that we support so that we don't have to do it // every time we parse. - rb_option_id_filepath = rb_intern_const("filepath"); - rb_option_id_encoding = rb_intern_const("encoding"); - rb_option_id_line = rb_intern_const("line"); - rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal"); - rb_option_id_version = rb_intern_const("version"); - rb_option_id_scopes = rb_intern_const("scopes"); rb_option_id_command_line = rb_intern_const("command_line"); + rb_option_id_encoding = rb_intern_const("encoding"); + rb_option_id_filepath = rb_intern_const("filepath"); + rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal"); + rb_option_id_line = rb_intern_const("line"); + rb_option_id_offset = rb_intern_const("offset"); + rb_option_id_scopes = rb_intern_const("scopes"); + rb_option_id_version = rb_intern_const("version"); /** * The version of the prism library. diff --git a/prism/options.c b/prism/options.c index d94cfad550..cac6b588eb 100644 --- a/prism/options.c +++ b/prism/options.c @@ -24,6 +24,14 @@ pm_options_line_set(pm_options_t *options, int32_t line) { options->line = line; } +/** + * Set the offset option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_offset_set(pm_options_t *options, uint32_t offset) { + options->offset = offset; +} + /** * Set the frozen string literal option on the given options struct. */ @@ -193,6 +201,9 @@ pm_options_read(pm_options_t *options, const char *data) { options->line = pm_options_read_s32(data); data += 4; + options->offset = pm_options_read_u32(data); + data += 4; + uint32_t encoding_length = pm_options_read_u32(data); data += 4; diff --git a/prism/options.h b/prism/options.h index ce979656a5..17ca2ff8a1 100644 --- a/prism/options.h +++ b/prism/options.h @@ -50,6 +50,12 @@ typedef struct { */ int32_t line; + /** + * The offset within the file that the parse starts on. This value is + * 0-indexed. + */ + uint32_t offset; + /** * The name of the encoding that the source file is in. Note that this must * correspond to a name that can be found with Encoding.find in Ruby. @@ -136,6 +142,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons */ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line); +/** + * Set the offset option on the given options struct. + * + * @param options The options struct to set the offset on. + * @param offset The offset to set. + */ +PRISM_EXPORTED_FUNCTION void pm_options_offset_set(pm_options_t *options, uint32_t offset); + /** * Set the encoding option on the given options struct. * @@ -231,6 +245,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); * | `4` | the length of the filepath | * | ... | the filepath bytes | * | `4` | the line number | + * | `4` | the offset | * | `4` | the length the encoding | * | ... | the encoding bytes | * | `1` | frozen string literal | diff --git a/prism/prism.c b/prism/prism.c index be1f73653a..b0c061a432 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -18781,6 +18781,22 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // line option parser->start_line = options->line; + // offset option + if (options->offset != 0) { + const uint8_t *cursor = parser->start; + const uint8_t *offset = cursor + options->offset; + + const uint8_t *newline = NULL; + while ((newline = next_newline(cursor, parser->end - cursor)) != NULL) { + if (newline > offset) break; + pm_newline_list_append(&parser->newline_list, newline); + cursor = newline + 1; + } + + parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = offset, .end = offset }; + parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = offset, .end = offset }; + } + // encoding option size_t encoding_length = pm_string_length(&options->encoding); if (encoding_length > 0) { diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb index ff69ef5417..80f7cb05d3 100644 --- a/test/prism/ruby_api_test.rb +++ b/test/prism/ruby_api_test.rb @@ -231,6 +231,21 @@ module Prism assert_equal 16, base[parse_expression("0x1")] end + def test_offset + source = <<~RUBY + #!/bin/sh + + echo "foo" + exit 0 + + #!/usr/bin/env ruby + + puts "bar" + RUBY + + assert Prism.parse_success?(source, offset: 30) + end + private def parse_expression(source)