From 8ed733f8f22cdc56a9d2694078871cfc401cb029 Mon Sep 17 00:00:00 2001 From: alpaca-tc Date: Sat, 4 Nov 2023 23:04:51 +0900 Subject: [PATCH] ast.rb: Fix bug for source of multibyte characters first_column and last_column return byte positions, but existing implementations did not consider multibyte. --- ast.rb | 4 ++-- test/ruby/test_ast.rb | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ast.rb b/ast.rb index fa9b69507c..51ee5b3d59 100644 --- a/ast.rb +++ b/ast.rb @@ -265,8 +265,8 @@ module RubyVM::AbstractSyntaxTree lines = script_lines if lines lines = lines[first_lineno - 1 .. last_lineno - 1] - lines[-1] = lines[-1][0...last_column] - lines[0] = lines[0][first_column..-1] + lines[-1] = lines[-1].byteslice(0...last_column) + lines[0] = lines[0].byteslice(first_column..-1) lines.join else nil diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index c8617d50f0..234c7af219 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -746,6 +746,14 @@ dummy assert_equal("def test_keep_script_lines_for_of\n", node_method.source.lines.first) end + def test_source_with_multibyte_characters + ast = RubyVM::AbstractSyntaxTree.parse(%{a("\u00a7");b("\u00a9")}, keep_script_lines: true) + a_fcall, b_fcall = ast.children[2].children + + assert_equal(%{a("\u00a7")}, a_fcall.source) + assert_equal(%{b("\u00a9")}, b_fcall.source) + end + def test_keep_tokens_for_parse node = RubyVM::AbstractSyntaxTree.parse(<<~END, keep_tokens: true) 1.times do