зеркало из https://github.com/github/ruby.git
[ruby/prism] Fix an AST and token incompatibility for `Prism::Translation::Parser`
This PR fixes an AST and token incompatibility between Parser gem and `Prism::Translation::Parser` for dstring literal: ```ruby "foo #{bar}" ``` ## Parser gem (Expected) ```console $ bundle exec ruby -Ilib -rparser/ruby33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Parser::Ruby33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:str, "foo\n"), s(:str, " "), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` ## `Prism::Translation::Parser` (Actual) Previously, the AST and tokens returned by the Parser gem were different. In this case, `dstr` node should not be nested: ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:dstr, s(:str, "foo\n"), s(:str, " ")), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` After this correction, the AST and tokens returned by the Parser gem are the same: ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:dstr, s(:str, "foo\n"), s(:str, " "), s(:begin, s(:send, nil, :bar))), [], [[:tSTRING_BEG, ["\"", #<Parser::Source::Range example.rb 0...1>]], [:tSTRING_CONTENT, ["foo\n", #<Parser::Source::Range example.rb 1...5>]], [:tSTRING_CONTENT, [" ", #<Parser::Source::Range example.rb 5...7>]], [:tSTRING_DBEG, ["\#{", #<Parser::Source::Range example.rb 7...9>]], [:tIDENTIFIER, ["bar", #<Parser::Source::Range example.rb 9...12>]], [:tSTRING_DEND, ["}", #<Parser::Source::Range example.rb 12...13>]], [:tSTRING_END, ["\"", #<Parser::Source::Range example.rb 13...14>]], [:tNL, [nil, #<Parser::Source::Range example.rb 14...15>]]]] ``` https://github.com/ruby/prism/commit/c1652a9ee7
This commit is contained in:
Родитель
c45ad17fa1
Коммит
c0b8dee95a
|
@ -953,14 +953,35 @@ module Prism
|
|||
def visit_interpolated_string_node(node)
|
||||
if node.heredoc?
|
||||
children, closing = visit_heredoc(node)
|
||||
builder.string_compose(token(node.opening_loc), children, closing)
|
||||
else
|
||||
builder.string_compose(
|
||||
token(node.opening_loc),
|
||||
visit_all(node.parts),
|
||||
token(node.closing_loc)
|
||||
)
|
||||
|
||||
return builder.string_compose(token(node.opening_loc), children, closing)
|
||||
end
|
||||
|
||||
parts = if node.parts.one? { |part| part.type == :string_node }
|
||||
node.parts.flat_map do |node|
|
||||
if node.type == :string_node && node.unescaped.lines.count >= 2
|
||||
start_offset = node.content_loc.start_offset
|
||||
|
||||
node.unescaped.lines.map do |line|
|
||||
end_offset = start_offset + line.length
|
||||
offsets = srange_offsets(start_offset, end_offset)
|
||||
start_offset = end_offset
|
||||
|
||||
builder.string_internal([line, offsets])
|
||||
end
|
||||
else
|
||||
visit(node)
|
||||
end
|
||||
end
|
||||
else
|
||||
visit_all(node.parts)
|
||||
end
|
||||
|
||||
builder.string_compose(
|
||||
token(node.opening_loc),
|
||||
parts,
|
||||
token(node.closing_loc)
|
||||
)
|
||||
end
|
||||
|
||||
# :"foo #{bar}"
|
||||
|
@ -1492,17 +1513,17 @@ module Prism
|
|||
elsif node.opening == "?"
|
||||
builder.character([node.unescaped, srange(node.location)])
|
||||
else
|
||||
parts = if node.unescaped.lines.count <= 1
|
||||
parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
|
||||
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
||||
else
|
||||
start_offset = node.content_loc.start_offset
|
||||
|
||||
node.unescaped.lines.map do |line|
|
||||
end_offset = start_offset + line.length
|
||||
[node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
|
||||
end_offset = start_offset + content_line.length
|
||||
offsets = srange_offsets(start_offset, end_offset)
|
||||
start_offset = end_offset
|
||||
|
||||
builder.string_internal([line, offsets])
|
||||
builder.string_internal([unescaped_line, offsets])
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -295,8 +295,24 @@ module Prism
|
|||
unless (lines = token.value.lines).one?
|
||||
start_offset = offset_cache[token.location.start_offset]
|
||||
lines.map do |line|
|
||||
end_offset = start_offset + line.length
|
||||
tokens << [:tSTRING_CONTENT, [line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
|
||||
newline = line.end_with?("\r\n") ? "\r\n" : "\n"
|
||||
chomped_line = line.chomp
|
||||
if match = chomped_line.match(/(?<backslashes>\\+)\z/)
|
||||
adjustment = match[:backslashes].size / 2
|
||||
adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
|
||||
if match[:backslashes].size.odd?
|
||||
adjusted_line.delete_suffix!("\\")
|
||||
adjustment += 2
|
||||
else
|
||||
adjusted_line << newline
|
||||
end
|
||||
else
|
||||
adjusted_line = line
|
||||
adjustment = 0
|
||||
end
|
||||
|
||||
end_offset = start_offset + adjusted_line.length + adjustment
|
||||
tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
|
||||
start_offset = end_offset
|
||||
end
|
||||
next
|
||||
|
|
|
@ -1,2 +1,29 @@
|
|||
"foo
|
||||
bar"
|
||||
|
||||
"foo
|
||||
#{bar}"
|
||||
|
||||
"fo
|
||||
o" "ba
|
||||
r"
|
||||
|
||||
"
|
||||
foo\
|
||||
"
|
||||
|
||||
"
|
||||
foo\\
|
||||
"
|
||||
|
||||
"
|
||||
foo\\\
|
||||
"
|
||||
|
||||
"
|
||||
foo\\\\
|
||||
"
|
||||
|
||||
"
|
||||
foo\\\\\
|
||||
"
|
||||
|
|
|
@ -1,11 +1,83 @@
|
|||
@ ProgramNode (location: (1,0)-(2,6))
|
||||
@ ProgramNode (location: (1,0)-(29,1))
|
||||
├── locals: []
|
||||
└── statements:
|
||||
@ StatementsNode (location: (1,0)-(2,6))
|
||||
└── body: (length: 1)
|
||||
└── @ StringNode (location: (1,0)-(2,6))
|
||||
@ StatementsNode (location: (1,0)-(29,1))
|
||||
└── body: (length: 8)
|
||||
├── @ StringNode (location: (1,0)-(2,6))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (1,0)-(1,1) = "\""
|
||||
│ ├── content_loc: (1,1)-(2,5) = "foo\n bar"
|
||||
│ ├── closing_loc: (2,5)-(2,6) = "\""
|
||||
│ └── unescaped: "foo\n bar"
|
||||
├── @ InterpolatedStringNode (location: (4,0)-(5,9))
|
||||
│ ├── opening_loc: (4,0)-(4,1) = "\""
|
||||
│ ├── parts: (length: 2)
|
||||
│ │ ├── @ StringNode (location: (4,1)-(5,2))
|
||||
│ │ │ ├── flags: ∅
|
||||
│ │ │ ├── opening_loc: ∅
|
||||
│ │ │ ├── content_loc: (4,1)-(5,2) = "foo\n "
|
||||
│ │ │ ├── closing_loc: ∅
|
||||
│ │ │ └── unescaped: "foo\n "
|
||||
│ │ └── @ EmbeddedStatementsNode (location: (5,2)-(5,8))
|
||||
│ │ ├── opening_loc: (5,2)-(5,4) = "\#{"
|
||||
│ │ ├── statements:
|
||||
│ │ │ @ StatementsNode (location: (5,4)-(5,7))
|
||||
│ │ │ └── body: (length: 1)
|
||||
│ │ │ └── @ CallNode (location: (5,4)-(5,7))
|
||||
│ │ │ ├── flags: variable_call, ignore_visibility
|
||||
│ │ │ ├── receiver: ∅
|
||||
│ │ │ ├── call_operator_loc: ∅
|
||||
│ │ │ ├── name: :bar
|
||||
│ │ │ ├── message_loc: (5,4)-(5,7) = "bar"
|
||||
│ │ │ ├── opening_loc: ∅
|
||||
│ │ │ ├── arguments: ∅
|
||||
│ │ │ ├── closing_loc: ∅
|
||||
│ │ │ └── block: ∅
|
||||
│ │ └── closing_loc: (5,7)-(5,8) = "}"
|
||||
│ └── closing_loc: (5,8)-(5,9) = "\""
|
||||
├── @ InterpolatedStringNode (location: (7,0)-(9,2))
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── parts: (length: 2)
|
||||
│ │ ├── @ StringNode (location: (7,0)-(8,2))
|
||||
│ │ │ ├── flags: ∅
|
||||
│ │ │ ├── opening_loc: (7,0)-(7,1) = "\""
|
||||
│ │ │ ├── content_loc: (7,1)-(8,1) = "fo\no"
|
||||
│ │ │ ├── closing_loc: (8,1)-(8,2) = "\""
|
||||
│ │ │ └── unescaped: "fo\no"
|
||||
│ │ └── @ StringNode (location: (8,3)-(9,2))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (8,3)-(8,4) = "\""
|
||||
│ │ ├── content_loc: (8,4)-(9,1) = "ba\nr"
|
||||
│ │ ├── closing_loc: (9,1)-(9,2) = "\""
|
||||
│ │ └── unescaped: "ba\nr"
|
||||
│ └── closing_loc: ∅
|
||||
├── @ StringNode (location: (11,0)-(13,1))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (11,0)-(11,1) = "\""
|
||||
│ ├── content_loc: (11,1)-(13,0) = "\nfoo\\\n"
|
||||
│ ├── closing_loc: (13,0)-(13,1) = "\""
|
||||
│ └── unescaped: "\nfoo"
|
||||
├── @ StringNode (location: (15,0)-(17,1))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (15,0)-(15,1) = "\""
|
||||
│ ├── content_loc: (15,1)-(17,0) = "\nfoo\\\\\n"
|
||||
│ ├── closing_loc: (17,0)-(17,1) = "\""
|
||||
│ └── unescaped: "\nfoo\\\n"
|
||||
├── @ StringNode (location: (19,0)-(21,1))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (19,0)-(19,1) = "\""
|
||||
│ ├── content_loc: (19,1)-(21,0) = "\nfoo\\\\\\\n"
|
||||
│ ├── closing_loc: (21,0)-(21,1) = "\""
|
||||
│ └── unescaped: "\nfoo\\"
|
||||
├── @ StringNode (location: (23,0)-(25,1))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (23,0)-(23,1) = "\""
|
||||
│ ├── content_loc: (23,1)-(25,0) = "\nfoo\\\\\\\\\n"
|
||||
│ ├── closing_loc: (25,0)-(25,1) = "\""
|
||||
│ └── unescaped: "\nfoo\\\\\n"
|
||||
└── @ StringNode (location: (27,0)-(29,1))
|
||||
├── flags: ∅
|
||||
├── opening_loc: (1,0)-(1,1) = "\""
|
||||
├── content_loc: (1,1)-(2,5) = "foo\n bar"
|
||||
├── closing_loc: (2,5)-(2,6) = "\""
|
||||
└── unescaped: "foo\n bar"
|
||||
├── opening_loc: (27,0)-(27,1) = "\""
|
||||
├── content_loc: (27,1)-(29,0) = "\nfoo\\\\\\\\\\\n"
|
||||
├── closing_loc: (29,0)-(29,1) = "\""
|
||||
└── unescaped: "\nfoo\\\\"
|
||||
|
|
Загрузка…
Ссылка в новой задаче