diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index cf94232df2..92651cf766 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -8,14 +8,18 @@ module Prism # The source code that this source object represents. attr_reader :source + # The line number where this source starts. + attr_reader :start_line + # The list of newline byte offsets in the source code. attr_reader :offsets # Create a new source object with the given source code and newline byte # offsets. If no newline byte offsets are given, they will be computed from # the source code. - def initialize(source, offsets = compute_offsets(source)) + def initialize(source, start_line = 1, offsets = compute_offsets(source)) @source = source + @start_line = start_line @offsets = offsets end @@ -28,6 +32,25 @@ module Prism # Binary search through the offsets to find the line number for the given # byte offset. def line(value) + start_line + find_line(value) + end + + # Return the byte offset of the start of the line corresponding to the given + # byte offset. + def line_offset(value) + offsets[find_line(value)] + end + + # Return the column number for the given byte offset. + def column(value) + value - offsets[find_line(value)] + end + + private + + # Binary search through the offsets to find the line number for the given + # byte offset. + def find_line(value) left = 0 right = offsets.length - 1 @@ -45,19 +68,6 @@ module Prism left - 1 end - # Return the byte offset of the start of the line corresponding to the given - # byte offset. - def line_offset(value) - offsets[line(value)] - end - - # Return the column number for the given byte offset. - def column(value) - value - offsets[line(value)] - end - - private - # Find all of the newlines in the source code and return their byte offsets # from the start of the string an array. def compute_offsets(code) @@ -118,7 +128,7 @@ module Prism # The line number where this location starts. def start_line - source.line(start_offset) + 1 + source.line(start_offset) end # The content of the line where this location starts before this location. @@ -129,7 +139,7 @@ module Prism # The line number where this location ends. def end_line - source.line(end_offset) + 1 + source.line(end_offset) end # The column number in bytes where this location starts from the start of diff --git a/prism/extension.c b/prism/extension.c index 86221a7bc5..dfd8e76d5a 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -471,8 +471,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback); VALUE offsets = rb_ary_new(); - VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets }; - VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource); + VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets }; + VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource); parse_lex_data_t parse_lex_data = { .source = source, diff --git a/prism/parser.h b/prism/parser.h index a30e61c9bf..edefe70f25 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -639,6 +639,12 @@ struct pm_parser { */ pm_string_t current_string; + /** + * The line number at the start of the parse. This will be used to offset + * the line numbers of all of the locations. + */ + uint32_t start_line; + /** Whether or not we're at the beginning of a command. */ bool command_start; diff --git a/prism/prism.c b/prism/prism.c index 0e135db5cb..be8dd38ccf 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -16360,6 +16360,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .newline_list = { 0 }, .integer_base = 0, .current_string = PM_STRING_EMPTY, + .start_line = 1, .command_start = true, .recovering = false, .encoding_changed = false, @@ -16400,7 +16401,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // line option if (options->line > 0) { - pm_newline_list_force(&parser->newline_list, options->line); + parser->start_line = options->line; } // encoding option diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb index 1ffbf6c9f8..7bc52c1120 100644 --- a/prism/templates/ext/prism/api_node.c.erb +++ b/prism/templates/ext/prism/api_node.c.erb @@ -46,8 +46,8 @@ pm_source_new(pm_parser_t *parser, rb_encoding *encoding) { rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index])); } - VALUE source_argv[] = { source, offsets }; - return rb_class_new_instance(2, source_argv, rb_cPrismSource); + VALUE source_argv[] = { source, ULONG2NUM(parser->start_line), offsets }; + return rb_class_new_instance(3, source_argv, rb_cPrismSource); } typedef struct pm_node_stack_node { diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c index 978ebf3d0e..f27bb75b63 100644 --- a/prism/util/pm_newline_list.c +++ b/prism/util/pm_newline_list.c @@ -19,33 +19,6 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac return true; } -/** - * Set up the newline list such that it believes it is starting on a specific - * line in the source. Basically this entails pushing on pointers to the start - * of the string until we hit the desired line. - */ -bool -pm_newline_list_force(pm_newline_list_t *list, size_t count) { - size_t next_capacity = list->capacity == 0 ? 1 : list->capacity; - while (count > next_capacity) { - next_capacity *= 2; - } - - size_t *offsets = list->offsets; - list->offsets = (size_t *) calloc(next_capacity, sizeof(size_t)); - if (list->offsets == NULL) return false; - - if (offsets != NULL) { - memcpy(list->offsets, offsets, list->size * sizeof(size_t)); - free(offsets); - } - - memset(list->offsets + list->size, 0, count * sizeof(size_t)); - list->size += count; - - return true; -} - /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h index 93816b0656..a31051f4e0 100644 --- a/prism/util/pm_newline_list.h +++ b/prism/util/pm_newline_list.h @@ -61,18 +61,6 @@ typedef struct { */ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity); -/** - * Set up the newline list such that it believes it is starting on a specific - * line in the source. Basically this entails pushing on pointers to the start - * of the string until we hit the desired line. - * - * @param list The list to set up. - * @param count The number of lines to push onto the list. - * @return True if no reallocation was needed or the reallocation of the offsets - * succeeds (if one was necessary), otherwise false. - */ -bool pm_newline_list_force(pm_newline_list_t *list, size_t count); - /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb index 3a2892b970..c20a99a398 100644 --- a/test/prism/newline_test.rb +++ b/test/prism/newline_test.rb @@ -84,7 +84,7 @@ module Prism while node = queue.shift queue.concat(node.compact_child_nodes) - newlines << (result.source.line(node.location.start_offset) + 1) if node&.newline? + newlines << result.source.line(node.location.start_offset) if node&.newline? end newlines.sort diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb index 844a7796a3..a61282cca1 100644 --- a/test/prism/ruby_api_test.rb +++ b/test/prism/ruby_api_test.rb @@ -24,6 +24,9 @@ module Prism assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath + assert_equal 1, Prism.parse("foo").value.statements.body[0].location.start_line + assert_equal 10, Prism.parse("foo", line: 10).value.statements.body[0].location.start_line + refute Prism.parse("\"foo\"").value.statements.body[0].frozen? assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen? refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen?