[ruby/prism] Properly support the start line option

https://github.com/ruby/prism/commit/33cc75a4b7
2023-11-02 15:06:50 -04:00 · 2023-11-02 15:06:50 -04:00 · d7d3243364
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@ -8,14 +8,18 @@ module Prism
    # The source code that this source object represents.
    attr_reader :source
    # The line number where this source starts.
    attr_reader :start_line
    # The list of newline byte offsets in the source code.
    attr_reader :offsets
    # Create a new source object with the given source code and newline byte
    # offsets. If no newline byte offsets are given, they will be computed from
    # the source code.
-    def initialize(source, offsets = compute_offsets(source))
+    def initialize(source, start_line = 1, offsets = compute_offsets(source))
      @source = source
      @start_line = start_line
      @offsets = offsets
    end
@ -28,6 +32,25 @@ module Prism
    # Binary search through the offsets to find the line number for the given
    # byte offset.
    def line(value)
      start_line + find_line(value)
    end
    # Return the byte offset of the start of the line corresponding to the given
    # byte offset.
    def line_offset(value)
      offsets[find_line(value)]
    end
    # Return the column number for the given byte offset.
    def column(value)
      value - offsets[find_line(value)]
    end
    private
    # Binary search through the offsets to find the line number for the given
    # byte offset.
    def find_line(value)
      left = 0
      right = offsets.length - 1
@ -45,19 +68,6 @@ module Prism
      left - 1
    end
    # Return the byte offset of the start of the line corresponding to the given
    # byte offset.
    def line_offset(value)
      offsets[line(value)]
    end
    # Return the column number for the given byte offset.
    def column(value)
      value - offsets[line(value)]
    end
    private
    # Find all of the newlines in the source code and return their byte offsets
    # from the start of the string an array.
    def compute_offsets(code)
@ -118,7 +128,7 @@ module Prism
    # The line number where this location starts.
    def start_line
-      source.line(start_offset) + 1
+      source.line(start_offset)
    end
    # The content of the line where this location starts before this location.
@ -129,7 +139,7 @@ module Prism
    # The line number where this location ends.
    def end_line
-      source.line(end_offset) + 1
+      source.line(end_offset)
    end
    # The column number in bytes where this location starts from the start of
--- a/prism/extension.c
+++ b/prism/extension.c
@ -471,8 +471,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
    pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
    VALUE offsets = rb_ary_new();
-    VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets };
+    VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets };
-    VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource);
+    VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource);
    parse_lex_data_t parse_lex_data = {
        .source = source,
--- a/prism/parser.h
+++ b/prism/parser.h
@ -639,6 +639,12 @@ struct pm_parser {
     */
    pm_string_t current_string;
    /**
     * The line number at the start of the parse. This will be used to offset
     * the line numbers of all of the locations.
     */
    uint32_t start_line;
    /** Whether or not we're at the beginning of a command. */
    bool command_start;
--- a/prism/prism.c
+++ b/prism/prism.c
@ -16360,6 +16360,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
        .newline_list = { 0 },
        .integer_base = 0,
        .current_string = PM_STRING_EMPTY,
        .start_line = 1,
        .command_start = true,
        .recovering = false,
        .encoding_changed = false,
@ -16400,7 +16401,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
        // line option
        if (options->line > 0) {
-            pm_newline_list_force(&parser->newline_list, options->line);
+            parser->start_line = options->line;
        }
        // encoding option
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@ -46,8 +46,8 @@ pm_source_new(pm_parser_t *parser, rb_encoding *encoding) {
        rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index]));
    }
-    VALUE source_argv[] = { source, offsets };
+    VALUE source_argv[] = { source, ULONG2NUM(parser->start_line), offsets };
-    return rb_class_new_instance(2, source_argv, rb_cPrismSource);
+    return rb_class_new_instance(3, source_argv, rb_cPrismSource);
 }
 typedef struct pm_node_stack_node {
--- a/prism/util/pm_newline_list.c
+++ b/prism/util/pm_newline_list.c
@ -19,33 +19,6 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
    return true;
 }
 /**
 * Set up the newline list such that it believes it is starting on a specific
 * line in the source. Basically this entails pushing on pointers to the start
 * of the string until we hit the desired line.
 */
 bool
 pm_newline_list_force(pm_newline_list_t *list, size_t count) {
    size_t next_capacity = list->capacity == 0 ? 1 : list->capacity;
    while (count > next_capacity) {
        next_capacity *= 2;
    }
    size_t *offsets = list->offsets;
    list->offsets = (size_t *) calloc(next_capacity, sizeof(size_t));
    if (list->offsets == NULL) return false;
    if (offsets != NULL) {
        memcpy(list->offsets, offsets, list->size * sizeof(size_t));
        free(offsets);
    }
    memset(list->offsets + list->size, 0, count * sizeof(size_t));
    list->size += count;
    return true;
 }
 /**
 * Append a new offset to the newline list. Returns true if the reallocation of
 * the offsets succeeds (if one was necessary), otherwise returns false.
--- a/prism/util/pm_newline_list.h
+++ b/prism/util/pm_newline_list.h
@ -61,18 +61,6 @@ typedef struct {
 */
 bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
 /**
 * Set up the newline list such that it believes it is starting on a specific
 * line in the source. Basically this entails pushing on pointers to the start
 * of the string until we hit the desired line.
 *
 * @param list The list to set up.
 * @param count The number of lines to push onto the list.
 * @return True if no reallocation was needed or the reallocation of the offsets
 *     succeeds (if one was necessary), otherwise false.
 */
 bool pm_newline_list_force(pm_newline_list_t *list, size_t count);
 /**
 * Append a new offset to the newline list. Returns true if the reallocation of
 * the offsets succeeds (if one was necessary), otherwise returns false.
--- a/test/prism/newline_test.rb
+++ b/test/prism/newline_test.rb
@ -84,7 +84,7 @@ module Prism
      while node = queue.shift
        queue.concat(node.compact_child_nodes)
-        newlines << (result.source.line(node.location.start_offset) + 1) if node&.newline?
+        newlines << result.source.line(node.location.start_offset) if node&.newline?
      end
      newlines.sort
--- a/test/prism/ruby_api_test.rb
+++ b/test/prism/ruby_api_test.rb
@ -24,6 +24,9 @@ module Prism
      assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath
      assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath
      assert_equal 1, Prism.parse("foo").value.statements.body[0].location.start_line
      assert_equal 10, Prism.parse("foo", line: 10).value.statements.body[0].location.start_line
      refute Prism.parse("\"foo\"").value.statements.body[0].frozen?
      assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen?
      refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen?