зеркало из https://github.com/github/ruby.git
[ruby/prism] Fix token incompatibility for `Prism::Translation::Parser::Lexer`
This PR fixes token incompatibility for `Prism::Translation::Parser::Lexer` when using backquoted heredoc indetiner: ```ruby <<-` FOO` a b FOO ``` ## Parser gem (Expected) Returns `tXSTRING_BEG` as the first token: ```console $ bundle exec ruby -Ilib -rparser/ruby33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Parser::Ruby33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr, s(:str, "a\n"), s(:str, "b\n")), [], [[:tXSTRING_BEG, ["<<`", #<Parser::Source::Range example.rb 0...10>]], [:tSTRING_CONTENT, ["a\n", #<Parser::Source::Range example.rb 11...13>]], [:tSTRING_CONTENT, ["b\n", #<Parser::Source::Range example.rb 13...15>]], [:tSTRING_END, [" FOO", #<Parser::Source::Range example.rb 15...23>]], [:tNL, [nil, #<Parser::Source::Range example.rb 10...11>]]]] ``` ## `Prism::Translation::Parser` (Actual) Previously, the tokens returned by the Parser gem were different. The escaped backslash does not match in the `tSTRING_BEG` token and value of `tSTRING_END` token. ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr, s(:str, "a\n"), s(:str, "b\n")), [], [[:tSTRING_BEG, ["<<\"", #<Parser::Source::Range example.rb 0...10>]], [:tSTRING_CONTENT, ["a\n", #<Parser::Source::Range example.rb 11...13>]], [:tSTRING_CONTENT, ["b\n", #<Parser::Source::Range example.rb 13...15>]], [:tSTRING_END, ["` FOO`", #<Parser::Source::Range example.rb 15...23>]], [:tNL, [nil, #<Parser::Source::Range example.rb 10...11>]]]] ``` After this correction, the AST and tokens returned by the Parser gem are the same: ```console $ bunlde exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr, s(:str, "a\n"), s(:str, "b\n")), [], [[:tXSTRING_BEG, ["<<`", #<Parser::Source::Range example.rb 0...10>]], [:tSTRING_CONTENT, ["a\n", #<Parser::Source::Range example.rb 11...13>]], [:tSTRING_CONTENT, ["b\n", #<Parser::Source::Range example.rb 13...15>]], [:tSTRING_END, [" FOO", #<Parser::Source::Range example.rb 15...23>]], [:tNL, [nil, #<Parser::Source::Range example.rb 10...11>]]]] ``` https://github.com/ruby/prism/commit/308f8d85a1
This commit is contained in:
Родитель
815c7e197c
Коммит
3605d6076d
|
@ -278,7 +278,7 @@ module Prism
|
||||||
value = nil
|
value = nil
|
||||||
when :tSTRING_BEG
|
when :tSTRING_BEG
|
||||||
if token.type == :HEREDOC_START
|
if token.type == :HEREDOC_START
|
||||||
heredoc_identifier_stack.push(value.match(/<<[-~]?["']?(?<heredoc_identifier>.*?)["']?\z/)[:heredoc_identifier])
|
heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
|
||||||
end
|
end
|
||||||
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
|
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
|
||||||
next_location = token.location.join(next_token.location)
|
next_location = token.location.join(next_token.location)
|
||||||
|
@ -294,8 +294,13 @@ module Prism
|
||||||
index += 2
|
index += 2
|
||||||
elsif value.start_with?("<<")
|
elsif value.start_with?("<<")
|
||||||
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
|
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
|
||||||
|
if quote == "`"
|
||||||
|
type = :tXSTRING_BEG
|
||||||
|
value = "<<`"
|
||||||
|
else
|
||||||
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
|
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
|
||||||
end
|
end
|
||||||
|
end
|
||||||
when :tSTRING_CONTENT
|
when :tSTRING_CONTENT
|
||||||
unless (lines = token.value.lines).one?
|
unless (lines = token.value.lines).one?
|
||||||
start_offset = offset_cache[token.location.start_offset]
|
start_offset = offset_cache[token.location.start_offset]
|
||||||
|
|
|
@ -8,6 +8,11 @@ a
|
||||||
b
|
b
|
||||||
FOO
|
FOO
|
||||||
|
|
||||||
|
<<-` FOO`
|
||||||
|
a
|
||||||
|
b
|
||||||
|
FOO
|
||||||
|
|
||||||
<<-' FOO'
|
<<-' FOO'
|
||||||
a
|
a
|
||||||
b
|
b
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
@ ProgramNode (location: (1,0)-(21,10))
|
@ ProgramNode (location: (1,0)-(26,10))
|
||||||
├── locals: []
|
├── locals: []
|
||||||
└── statements:
|
└── statements:
|
||||||
@ StatementsNode (location: (1,0)-(21,10))
|
@ StatementsNode (location: (1,0)-(26,10))
|
||||||
└── body: (length: 5)
|
└── body: (length: 6)
|
||||||
├── @ StringNode (location: (1,0)-(1,10))
|
├── @ StringNode (location: (1,0)-(1,10))
|
||||||
│ ├── flags: ∅
|
│ ├── flags: ∅
|
||||||
│ ├── opening_loc: (1,0)-(1,10) = "<<-' FOO'"
|
│ ├── opening_loc: (1,0)-(1,10) = "<<-' FOO'"
|
||||||
|
@ -15,41 +15,47 @@
|
||||||
│ ├── content_loc: (7,0)-(9,0) = "a\nb\n"
|
│ ├── content_loc: (7,0)-(9,0) = "a\nb\n"
|
||||||
│ ├── closing_loc: (9,0)-(10,0) = " FOO\n"
|
│ ├── closing_loc: (9,0)-(10,0) = " FOO\n"
|
||||||
│ └── unescaped: "a\nb\n"
|
│ └── unescaped: "a\nb\n"
|
||||||
├── @ StringNode (location: (11,0)-(11,10))
|
├── @ XStringNode (location: (11,0)-(11,10))
|
||||||
│ ├── flags: ∅
|
│ ├── flags: ∅
|
||||||
│ ├── opening_loc: (11,0)-(11,10) = "<<-' FOO'"
|
│ ├── opening_loc: (11,0)-(11,10) = "<<-` FOO`"
|
||||||
│ ├── content_loc: (12,0)-(14,0) = "a\nb\n"
|
│ ├── content_loc: (12,0)-(14,0) = "a\nb\n"
|
||||||
│ ├── closing_loc: (14,0)-(15,0) = " FOO\n"
|
│ ├── closing_loc: (14,0)-(15,0) = " FOO\n"
|
||||||
│ └── unescaped: "a\nb\n"
|
│ └── unescaped: "a\nb\n"
|
||||||
├── @ InterpolatedStringNode (location: (16,0)-(16,10))
|
├── @ StringNode (location: (16,0)-(16,10))
|
||||||
│ ├── opening_loc: (16,0)-(16,10) = "<<~' FOO'"
|
│ ├── flags: ∅
|
||||||
|
│ ├── opening_loc: (16,0)-(16,10) = "<<-' FOO'"
|
||||||
|
│ ├── content_loc: (17,0)-(19,0) = "a\nb\n"
|
||||||
|
│ ├── closing_loc: (19,0)-(20,0) = " FOO\n"
|
||||||
|
│ └── unescaped: "a\nb\n"
|
||||||
|
├── @ InterpolatedStringNode (location: (21,0)-(21,10))
|
||||||
|
│ ├── opening_loc: (21,0)-(21,10) = "<<~' FOO'"
|
||||||
│ ├── parts: (length: 2)
|
│ ├── parts: (length: 2)
|
||||||
│ │ ├── @ StringNode (location: (17,0)-(18,0))
|
│ │ ├── @ StringNode (location: (22,0)-(23,0))
|
||||||
│ │ │ ├── flags: ∅
|
│ │ │ ├── flags: ∅
|
||||||
│ │ │ ├── opening_loc: ∅
|
│ │ │ ├── opening_loc: ∅
|
||||||
│ │ │ ├── content_loc: (17,0)-(18,0) = "a\n"
|
│ │ │ ├── content_loc: (22,0)-(23,0) = "a\n"
|
||||||
│ │ │ ├── closing_loc: ∅
|
│ │ │ ├── closing_loc: ∅
|
||||||
│ │ │ └── unescaped: "a\n"
|
│ │ │ └── unescaped: "a\n"
|
||||||
│ │ └── @ StringNode (location: (18,0)-(19,0))
|
│ │ └── @ StringNode (location: (23,0)-(24,0))
|
||||||
│ │ ├── flags: ∅
|
│ │ ├── flags: ∅
|
||||||
│ │ ├── opening_loc: ∅
|
│ │ ├── opening_loc: ∅
|
||||||
│ │ ├── content_loc: (18,0)-(19,0) = "b\n"
|
│ │ ├── content_loc: (23,0)-(24,0) = "b\n"
|
||||||
│ │ ├── closing_loc: ∅
|
│ │ ├── closing_loc: ∅
|
||||||
│ │ └── unescaped: "b\n"
|
│ │ └── unescaped: "b\n"
|
||||||
│ └── closing_loc: (19,0)-(20,0) = " FOO\n"
|
│ └── closing_loc: (24,0)-(25,0) = " FOO\n"
|
||||||
└── @ InterpolatedStringNode (location: (21,0)-(21,10))
|
└── @ InterpolatedStringNode (location: (26,0)-(26,10))
|
||||||
├── opening_loc: (21,0)-(21,10) = "<<~' FOO'"
|
├── opening_loc: (26,0)-(26,10) = "<<~' FOO'"
|
||||||
├── parts: (length: 2)
|
├── parts: (length: 2)
|
||||||
│ ├── @ StringNode (location: (22,0)-(23,0))
|
│ ├── @ StringNode (location: (27,0)-(28,0))
|
||||||
│ │ ├── flags: ∅
|
│ │ ├── flags: ∅
|
||||||
│ │ ├── opening_loc: ∅
|
│ │ ├── opening_loc: ∅
|
||||||
│ │ ├── content_loc: (22,0)-(23,0) = "a\n"
|
│ │ ├── content_loc: (27,0)-(28,0) = "a\n"
|
||||||
│ │ ├── closing_loc: ∅
|
│ │ ├── closing_loc: ∅
|
||||||
│ │ └── unescaped: "a\n"
|
│ │ └── unescaped: "a\n"
|
||||||
│ └── @ StringNode (location: (23,0)-(24,0))
|
│ └── @ StringNode (location: (28,0)-(29,0))
|
||||||
│ ├── flags: ∅
|
│ ├── flags: ∅
|
||||||
│ ├── opening_loc: ∅
|
│ ├── opening_loc: ∅
|
||||||
│ ├── content_loc: (23,0)-(24,0) = "b\n"
|
│ ├── content_loc: (28,0)-(29,0) = "b\n"
|
||||||
│ ├── closing_loc: ∅
|
│ ├── closing_loc: ∅
|
||||||
│ └── unescaped: "b\n"
|
│ └── unescaped: "b\n"
|
||||||
└── closing_loc: (24,0)-(25,0) = " FOO\n"
|
└── closing_loc: (29,0)-(30,0) = " FOO\n"
|
||||||
|
|
Загрузка…
Ссылка в новой задаче