[ruby/yarp] Fix behaviour of locations for comments

https://github.com/ruby/yarp/commit/b1ced67fba
2023-09-15 15:01:02 -04:00 · 2023-09-15 15:01:02 -04:00 · bbf9f11ce6
--- a/test/yarp/comments_test.rb
+++ b/test/yarp/comments_test.rb
@ -7,7 +7,7 @@ module YARP
    def test_comment_inline
      source = "# comment"

-      assert_comment source, :inline, 0..9
+      assert_comment source, :inline, [0, 9, 1, 1, 0, 9]
      assert_equal [0], Debug.newlines(source)
    end

@ -18,7 +18,7 @@ module YARP
      end
      RUBY

-      assert_comment source, :inline, 10..22
+      assert_comment source, :inline, [10, 21, 2, 2, 2, 13]
    end

    def test_comment___END__
@ -27,13 +27,13 @@ module YARP
        comment
      RUBY

-      assert_comment source, :__END__, 0..16
+      assert_comment source, :__END__, [0, 16, 1, 2, 0, 0]
    end

    def test_comment___END__crlf
      source = "__END__\r\ncomment\r\n"

-      assert_comment source, :__END__, 0..18
+      assert_comment source, :__END__, [0, 18, 1, 2, 0, 0]
    end

    def test_comment_embedded_document
@ -43,7 +43,7 @@ module YARP
        =end
      RUBY

-      assert_comment source, :embdoc, 0..20
+      assert_comment source, :embdoc, [0, 20, 1, 3, 0, 0]
    end

    def test_comment_embedded_document_with_content_on_same_line
@ -52,7 +52,7 @@ module YARP
        =end
      RUBY

-      assert_comment source, :embdoc, 0..24
+      assert_comment source, :embdoc, [0, 24, 1, 2, 0, 0]
    end

    def test_attaching_comments
@ -74,19 +74,40 @@ module YARP
      method_node = class_node.body.body.first
      call_node = method_node.body.body.first

-      assert_equal("# Foo class\n# Foo end\n", class_node.location.comments.map { |c| c.location.slice }.join)
-      assert_equal("# bar method\n# bar end\n", method_node.location.comments.map { |c| c.location.slice }.join)
-      assert_equal("# baz invocation\n", call_node.location.comments.map { |c| c.location.slice }.join)
+      assert_equal("# Foo class\n# Foo end", class_node.location.comments.map { |c| c.location.slice }.join("\n"))
+      assert_equal("# bar method\n# bar end", method_node.location.comments.map { |c| c.location.slice }.join("\n"))
+      assert_equal("# baz invocation", call_node.location.comments.map { |c| c.location.slice }.join("\n"))
    end

    private

-    def assert_comment(source, type, location)
+    def assert_comment(source, type, locations)
+      start_offset, end_offset, start_line, end_line, start_column, end_column = locations
+      expected = {
+        start_offset: start_offset,
+        end_offset: end_offset,
+        start_line: start_line,
+        end_line: end_line,
+        start_column: start_column,
+        end_column: end_column
+      }
+
      result = YARP.parse(source)
      assert result.errors.empty?, result.errors.map(&:message).join("\n")
-      assert_equal result.comments.first.type, type
-      assert_equal result.comments.first.location.start_offset, location.begin
-      assert_equal result.comments.first.location.end_offset, location.end
+      assert_equal type, result.comments.first.type
+
+      first_comment_location = result.comments.first.location
+
+      actual = {
+        start_offset: first_comment_location.start_offset,
+        end_offset: first_comment_location.end_offset,
+        start_line: first_comment_location.start_line,
+        end_line: first_comment_location.end_line,
+        start_column: first_comment_location.start_column,
+        end_column: first_comment_location.end_column
+      }
+
+      assert_equal expected, actual
    end
  end
 end
--- a/test/yarp/parse_test.rb
+++ b/test/yarp/parse_test.rb
@ -126,13 +126,6 @@ module YARP
        # Next, assert that the newlines are in the expected places.
        expected_newlines = [0]
        source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
-
-        # If there's a __END__, then we should trip out those newlines because we
-        # don't actually scan them during parsing (because we don't need to).
-        if found = result.comments.find { |comment| comment.type == :__END__ }
-          expected_newlines = expected_newlines[...found.location.start_line]
-        end
-
        assert_equal expected_newlines, Debug.newlines(source)

        if ripper_should_parse && ripper_should_match
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@ -6207,10 +6207,7 @@ parser_lex(yp_parser_t *parser) {

                case '#': { // comments
                    const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
-
-                    parser->current.end = ending == NULL ? parser->end : ending + 1;
-                    parser->current.type = YP_TOKEN_COMMENT;
-                    parser_lex_callback(parser);
+                    parser->current.end = ending == NULL ? parser->end : ending;

                    // If we found a comment while lexing, then we're going to
                    // add it to the list of comments in the file and keep
@ -6218,6 +6215,10 @@ parser_lex(yp_parser_t *parser) {
                    yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE);
                    yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);

+                    if (ending) parser->current.end++;
+                    parser->current.type = YP_TOKEN_COMMENT;
+                    parser_lex_callback(parser);
+
                    if (parser->current.start == parser->encoding_comment_start) {
                        parser_lex_encoding_comment(parser);
                    }
@ -7212,6 +7213,14 @@ parser_lex(yp_parser_t *parser) {
                        (parser->current.end == parser->end || match_eol(parser))
                        )
                    {
+                        // Since we know we're about to add an __END__ comment, we know we
+                        // need at add all of the newlines to get the correct column
+                        // information for it.
+                        const uint8_t *cursor = parser->current.end;
+                        while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
+                            yp_newline_list_append(&parser->newline_list, cursor++);
+                        }
+
                        parser->current.end = parser->end;
                        parser->current.type = YP_TOKEN___END__;
                        parser_lex_callback(parser);