https://github.com/ruby/prism/commit/6f886be0a4
This commit is contained in:
Kevin Newton 2024-05-29 10:12:51 -04:00
Родитель 1ab7c412d2
Коммит 72452f4387
60 изменённых файлов: 2198 добавлений и 1774 удалений

Просмотреть файл

@ -200,8 +200,8 @@ module Prism
class << self
# Mirror the Prism.dump API by using the serialization API.
def dump(code, **options)
LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) }
def dump(source, **options)
LibRubyParser::PrismString.with_string(source) { |string| dump_common(string, options) }
end
# Mirror the Prism.dump_file API by using the serialization API.
@ -302,6 +302,27 @@ module Prism
!parse_file_success?(filepath, **options)
end
# Mirror the Prism.profile API by using the serialization API.
def profile(source, **options)
LibRubyParser::PrismString.with_string(source) do |string|
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
nil
end
end
end
# Mirror the Prism.profile_file API by using the serialization API.
def profile_file(filepath, **options)
LibRubyParser::PrismString.with_file(filepath) do |string|
LibRubyParser::PrismBuffer.with do |buffer|
options[:filepath] = filepath
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
nil
end
end
end
private
def dump_common(string, options) # :nodoc:

Просмотреть файл

@ -485,9 +485,9 @@ module Prism
def visit_constant_path_target_node(node)
inner =
if node.parent.nil?
s(node, :colon3, node.child.name)
s(node, :colon3, node.name)
else
s(node, :colon2, visit(node.parent), node.child.name)
s(node, :colon2, visit(node.parent), node.name)
end
s(node, :const, inner)

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class CommandLineTest < TestCase
@ -67,7 +67,7 @@ module Prism
end
def test_command_line_x_implicit
result = Prism.parse(<<~RUBY)
result = Prism.parse_statement(<<~RUBY)
#!/bin/bash
exit 1
@ -75,18 +75,18 @@ module Prism
1
RUBY
assert_kind_of IntegerNode, result.value.statements.body.first
assert_kind_of IntegerNode, result
end
def test_command_line_x_explicit
result = Prism.parse(<<~RUBY, command_line: "x")
result = Prism.parse_statement(<<~RUBY, command_line: "x")
exit 1
#!/usr/bin/env ruby
1
RUBY
assert_kind_of IntegerNode, result.value.statements.body.first
assert_kind_of IntegerNode, result
end
def test_command_line_x_implicit_fail

Просмотреть файл

@ -0,0 +1,56 @@
# frozen_string_literal: true
return if ENV["PRISM_BUILD_MINIMAL"]
require_relative "../test_helper"
module Prism
class DumpTest < TestCase
Fixture.each do |fixture|
define_method(fixture.test_name) { assert_dump(fixture) }
end
def test_dump
filepath = __FILE__
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value
assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath)
serialized = Prism.dump(source, filepath: filepath)
ast1 = Prism.load(source, serialized).value
ast2 = Prism.parse(source, filepath: filepath).value
ast3 = Prism.parse_file(filepath).value
assert_equal_nodes ast1, ast2
assert_equal_nodes ast2, ast3
end
def test_dump_file
assert_nothing_raised do
Prism.dump_file(__FILE__)
end
error = assert_raise Errno::ENOENT do
Prism.dump_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.dump_file(nil)
end
end
private
def assert_dump(fixture)
source = fixture.read
result = Prism.parse(source, filepath: fixture.path)
dumped = Prism.dump(source, filepath: fixture.path)
assert_equal_nodes(result.value, Prism.load(source, dumped).value)
end
end
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class ParseCommentsTest < TestCase
@ -17,5 +17,17 @@ module Prism
assert_kind_of Array, comments
assert_equal 1, comments.length
end
def test_parse_file_comments_error
error = assert_raise Errno::ENOENT do
Prism.parse_file_comments("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.parse_file_comments(nil)
end
end
end
end

Просмотреть файл

@ -1,7 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require "stringio"
require_relative "../test_helper"
module Prism
class ParseStreamTest < TestCase
@ -10,7 +9,7 @@ module Prism
result = Prism.parse_stream(io)
assert result.success?
assert_kind_of Prism::CallNode, result.value.statements.body.first
assert_kind_of Prism::CallNode, result.statement
end
def test_multi_line
@ -18,8 +17,8 @@ module Prism
result = Prism.parse_stream(io)
assert result.success?
assert_kind_of Prism::CallNode, result.value.statements.body.first
assert_kind_of Prism::CallNode, result.value.statements.body.last
assert_kind_of Prism::CallNode, result.statement
assert_kind_of Prism::CallNode, result.statement
end
def test_multi_read
@ -27,7 +26,7 @@ module Prism
result = Prism.parse_stream(io)
assert result.success?
assert_kind_of Prism::CallNode, result.value.statements.body.first
assert_kind_of Prism::CallNode, result.statement
end
def test___END__

Просмотреть файл

@ -0,0 +1,16 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class ParseSuccessTest < TestCase
def test_parse_success?
assert Prism.parse_success?("1")
refute Prism.parse_success?("<>")
end
def test_parse_file_success?
assert Prism.parse_file_success?(__FILE__)
end
end
end

Просмотреть файл

@ -0,0 +1,66 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class ParseTest < TestCase
def test_parse_empty_string
result = Prism.parse("")
assert_equal [], result.value.statements.body
end
def test_parse_takes_file_path
filepath = "filepath.rb"
result = Prism.parse("def foo; __FILE__; end", filepath: filepath)
assert_equal filepath, find_source_file_node(result.value).filepath
end
def test_parse_takes_line
line = 4
result = Prism.parse("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.location.start_line
assert_equal line + 1, find_source_file_node(result.value).location.start_line
result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.first.location.start_line
end
def test_parse_takes_negative_lines
line = -2
result = Prism.parse("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.location.start_line
assert_equal line + 1, find_source_file_node(result.value).location.start_line
result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.first.location.start_line
end
def test_parse_file
node = Prism.parse_file(__FILE__).value
assert_kind_of ProgramNode, node
error = assert_raise Errno::ENOENT do
Prism.parse_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.parse_file(nil)
end
end
private
def find_source_file_node(program)
queue = [program]
while (node = queue.shift)
return node if node.is_a?(SourceFileNode)
queue.concat(node.compact_child_nodes)
end
end
end
end

Просмотреть файл

@ -2,7 +2,7 @@
# Don't bother checking this on these engines, this is such a specific Ripper
# test.
return if RUBY_ENGINE == "jruby" || RUBY_ENGINE == "truffleruby"
return if RUBY_ENGINE != "ruby"
require_relative "test_helper"

Просмотреть файл

@ -0,0 +1,101 @@
# frozen_string_literal: true
return if RUBY_ENGINE != "ruby"
require_relative "../test_helper"
module Prism
class EncodingsTest < TestCase
class ConstantContext < BasicObject
def self.const_missing(const)
const
end
end
class IdentifierContext < BasicObject
def method_missing(name, *)
name
end
end
# These test that we're correctly parsing codepoints for each alias of each
# encoding that prism supports.
each_encoding do |encoding, range|
(encoding.names - %w[external internal filesystem locale]).each do |name|
define_method(:"test_encoding_#{name}") do
assert_encoding(encoding, name, range)
end
end
end
private
def assert_encoding_constant(name, character)
source = "# encoding: #{name}\n#{character}"
expected = ConstantContext.new.instance_eval(source)
result = Prism.parse(source)
assert result.success?
actual = result.value.statements.body.last
assert_kind_of ConstantReadNode, actual
assert_equal expected, actual.name
end
def assert_encoding_identifier(name, character)
source = "# encoding: #{name}\n#{character}"
expected = IdentifierContext.new.instance_eval(source)
result = Prism.parse(source)
assert result.success?
actual = result.value.statements.body.last
assert_kind_of CallNode, actual
assert_equal expected, actual.name
end
# Check that we can properly parse every codepoint in the given encoding.
def assert_encoding(encoding, name, range)
# I'm not entirely sure, but I believe these codepoints are incorrect in
# their parsing in CRuby. They all report as matching `[[:lower:]]` but
# then they are parsed as constants. This is because CRuby determines if
# an identifier is a constant or not by case folding it down to lowercase
# and checking if there is a difference. And even though they report
# themselves as lowercase, their case fold is different. I have reported
# this bug upstream.
case encoding
when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8
range = range.to_a - [
0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b,
0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b,
0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab,
0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc,
]
when Encoding::Windows_1253
range = range.to_a - [0xb5]
end
range.each do |codepoint|
character = codepoint.chr(encoding)
if character.match?(/[[:alpha:]]/)
if character.match?(/[[:upper:]]/)
assert_encoding_constant(name, character)
else
assert_encoding_identifier(name, character)
end
elsif character.match?(/[[:alnum:]]/)
assert_encoding_identifier(name, "_#{character}")
else
next if ["/", "{"].include?(character)
source = "# encoding: #{name}\n/(?##{character})/\n"
assert Prism.parse_success?(source), "Expected #{source.inspect} to parse successfully."
end
rescue RangeError
source = "# encoding: #{name}\n\\x#{codepoint.to_s(16)}"
assert Prism.parse_failure?(source)
end
end
end
end

Просмотреть файл

@ -0,0 +1,131 @@
# frozen_string_literal: true
return unless defined?(RubyVM::InstructionSequence)
return if RubyVM::InstructionSequence.compile("").to_a[4][:parser] == :prism
require_relative "../test_helper"
module Prism
class RegularExpressionEncodingTest < TestCase
each_encoding do |encoding, _|
define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do
assert_regular_expression_encoding_flags(encoding, ["/a/", "/ą/", "//"])
end
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do
assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" })
end
["n", "u", "e", "s"].each do |modifier|
define_method(:"test_regular_expression_encoding_modifiers_/#{modifier}_#{encoding.name}") do
regexp_sources = ["abc", "garçon", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}" ]
assert_regular_expression_encoding_flags(
encoding,
regexp_sources.product(["n", "u", "e", "s"]).map { |r, modifier| "/#{r}/#{modifier}" }
)
end
end
end
private
def assert_regular_expression_encoding_flags(encoding, regexps)
regexps.each do |regexp|
regexp_modifier_used = regexp.end_with?("/u") || regexp.end_with?("/e") || regexp.end_with?("/s") || regexp.end_with?("/n")
source = "# encoding: #{encoding.name}\n#{regexp}"
encoding_errors = ["invalid multibyte char", "escaped non ASCII character in UTF-8 regexp", "differs from source encoding"]
skipped_errors = ["invalid multibyte escape", "incompatible character encoding", "UTF-8 character in non UTF-8 regexp", "invalid Unicode range", "invalid Unicode list"]
# TODO (nirvdrum 21-Feb-2024): Prism currently does not handle Regexp validation unless modifiers are used. So, skip processing those errors for now: https://github.com/ruby/prism/issues/2104
unless regexp_modifier_used
skipped_errors += encoding_errors
encoding_errors.clear
end
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if encoding_errors.find { |e| error.message.include?(e) }
error.message.split("\n").map { |m| m[/: (.+?)$/, 1] }
elsif skipped_errors.find { |e| error.message.include?(e) }
next
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
regexp = result.statement
actual_encoding = if regexp.forced_utf8_encoding?
Encoding::UTF_8
elsif regexp.forced_binary_encoding?
Encoding::ASCII_8BIT
elsif regexp.forced_us_ascii_encoding?
Encoding::US_ASCII
elsif regexp.ascii_8bit?
Encoding::ASCII_8BIT
elsif regexp.utf_8?
Encoding::UTF_8
elsif regexp.euc_jp?
Encoding::EUC_JP
elsif regexp.windows_31j?
Encoding::Windows_31J
else
encoding
end
if regexp.utf_8? && actual_encoding != Encoding::UTF_8
raise "expected regexp encoding to be UTF-8 due to '/u' modifier, but got #{actual_encoding.name}"
elsif regexp.ascii_8bit? && (actual_encoding != Encoding::ASCII_8BIT && actual_encoding != Encoding::US_ASCII)
raise "expected regexp encoding to be ASCII-8BIT or US-ASCII due to '/n' modifier, but got #{actual_encoding.name}"
elsif regexp.euc_jp? && actual_encoding != Encoding::EUC_JP
raise "expected regexp encoding to be EUC-JP due to '/e' modifier, but got #{actual_encoding.name}"
elsif regexp.windows_31j? && actual_encoding != Encoding::Windows_31J
raise "expected regexp encoding to be Windows-31J due to '/s' modifier, but got #{actual_encoding.name}"
end
if regexp.utf_8? && regexp.forced_utf8_encoding?
raise "the forced_utf8 flag should not be set when the UTF-8 modifier (/u) is used"
elsif regexp.ascii_8bit? && regexp.forced_binary_encoding?
raise "the forced_ascii_8bit flag should not be set when the UTF-8 modifier (/u) is used"
end
actual_encoding
else
errors = result.errors.map(&:message)
if errors.last&.include?("UTF-8 mixed within")
nil
else
errors
end
end
end
# TODO (nirvdrum 22-Feb-2024): Remove this workaround once Prism better maps CRuby's error messages.
# This class of error message is tricky. The part not being compared is a representation of the regexp.
# Depending on the source encoding and any encoding modifiers being used, CRuby alters how the regexp is represented.
# Sometimes it's an MBC string. Other times it uses hexadecimal character escapes. And in other cases it uses
# the long-form Unicode escape sequences. This short-circuit checks that the error message is mostly correct.
if expected.is_a?(Array) && actual.is_a?(Array)
if expected.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") &&
actual.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:")
expected.last.clear
actual.last.clear
end
end
assert_equal expected, actual
end
end
end
end

Просмотреть файл

@ -0,0 +1,136 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class StringEncodingTest < TestCase
each_encoding do |encoding, _|
define_method(:"test_#{encoding.name}") do
assert_encoding(encoding)
end
end
def test_coding
actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_coding_with_whitespace
actual = Prism.parse_statement("# coding \t \r \v : \t \v \r ascii-8bit \n'string'").unescaped.encoding
assert_equal Encoding::ASCII_8BIT, actual
end
def test_emacs_style
actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_unix
actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_dos
actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_mac
actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_star
actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_first_lexed_token
encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
assert_equal Encoding::ASCII_8BIT, encoding
end
if !ENV["PRISM_BUILD_MINIMAL"]
# This test may be a little confusing. Basically when we use our strpbrk,
# it takes into account the encoding of the file.
def test_strpbrk_multibyte
result = Prism.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
assert(result.errors.empty?)
assert_equal(
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
result.statement.elements.first.unescaped
)
end
def test_slice_encoding
slice = Prism.parse("# encoding: Shift_JIS\n").value.slice
assert_equal (+"").force_encoding(Encoding::SHIFT_JIS), slice
assert_equal Encoding::SHIFT_JIS, slice.encoding
end
def test_multibyte_escapes
[
["'", "'"],
["\"", "\""],
["`", "`"],
["/", "/"],
["<<'HERE'\n", "\nHERE"],
["<<-HERE\n", "\nHERE"]
].each do |opening, closing|
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
end
end
end
private
def assert_encoding(encoding)
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
escapes.each do |escaped|
source = "# encoding: #{encoding.name}\n\"#{escaped}\""
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if error.message.include?("UTF-8 mixed within")
error.message[/UTF-8 mixed within .+? source/]
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
string = result.statement
if string.forced_utf8_encoding?
Encoding::UTF_8
elsif string.forced_binary_encoding?
Encoding::ASCII_8BIT
else
encoding
end
else
error = result.errors.first
if error.message.include?("mixed")
error.message
else
raise error.message
end
end
end
assert_equal expected, actual
end
end
end
end

Просмотреть файл

@ -0,0 +1,108 @@
# frozen_string_literal: true
return if RUBY_ENGINE != "ruby"
require_relative "../test_helper"
module Prism
class SymbolEncodingTest < TestCase
each_encoding do |encoding, _|
define_method(:"test_symbols_#{encoding.name}") do
assert_symbols(encoding)
end
define_method(:"test_escapes_#{encoding.name}") do
assert_escapes(encoding)
end
end
private
def expected_encoding(source)
eval(source).encoding
end
def actual_encoding(source, encoding)
result = Prism.parse(source)
if result.success?
symbol = result.statement
if symbol.forced_utf8_encoding?
Encoding::UTF_8
elsif symbol.forced_binary_encoding?
Encoding::ASCII_8BIT
elsif symbol.forced_us_ascii_encoding?
Encoding::US_ASCII
else
encoding
end
else
raise SyntaxError.new(result.errors.map(&:message).join("\n"))
end
end
def assert_symbols(encoding)
[:a, :ą, :+].each do |symbol|
source = "# encoding: #{encoding.name}\n#{symbol.inspect}"
expected =
begin
expected_encoding(source)
rescue SyntaxError => error
if error.message.include?("invalid multibyte")
"invalid multibyte"
else
raise
end
end
actual =
begin
actual_encoding(source, encoding)
rescue SyntaxError => error
if error.message.include?("invalid multibyte")
"invalid multibyte"
else
raise
end
end
assert_equal expected, actual
end
end
def assert_escapes(encoding)
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
escapes.each do |escaped|
source = "# encoding: #{encoding.name}\n:\"#{escaped}\""
expected =
begin
expected_encoding(source)
rescue SyntaxError => error
if error.message.include?("UTF-8 mixed within")
error.message[/UTF-8 mixed within .+? source/]
else
raise
end
end
actual =
begin
actual_encoding(source, encoding)
rescue SyntaxError => error
if error.message.include?("mixed")
error.message.split("\n", 2).first
else
raise
end
end
assert_equal expected, actual
end
end
end
end

Просмотреть файл

@ -1,577 +0,0 @@
# frozen_string_literal: true
return if RUBY_ENGINE != "ruby"
require_relative "test_helper"
module Prism
class EncodingTest < TestCase
codepoints_1byte = 0...0x100
encodings = {
Encoding::ASCII_8BIT => codepoints_1byte,
Encoding::US_ASCII => codepoints_1byte
}
if !ENV["PRISM_BUILD_MINIMAL"]
encodings[Encoding::Windows_1253] = codepoints_1byte
end
# By default we don't test every codepoint in these encodings because it
# takes a very long time.
if ENV["PRISM_TEST_ALL_ENCODINGS"]
codepoints_2bytes = 0...0x10000
codepoints_unicode = (0...0x110000)
codepoints_eucjp = [
*(0...0x10000),
*(0...0x10000).map { |bytes| bytes | 0x8F0000 }
]
codepoints_emacs_mule = [
*(0...0x80),
*((0x81...0x90).flat_map { |byte1| (0x90...0x100).map { |byte2| byte1 << 8 | byte2 } }),
*((0x90...0x9C).flat_map { |byte1| (0xA0...0x100).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| byte1 << 16 | byte2 << 8 | byte3 } } }),
*((0xF0...0xF5).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| (0xA0...0x100).flat_map { |byte4| 0x9C << 24 | byte3 << 16 | byte3 << 8 | byte4 } } }),
]
codepoints_gb18030 = [
*(0...0x80),
*((0x81..0xFE).flat_map { |byte1| (0x40...0x100).map { |byte2| byte1 << 8 | byte2 } }),
*((0x81..0xFE).flat_map { |byte1| (0x30...0x40).flat_map { |byte2| (0x81..0xFE).flat_map { |byte3| (0x2F...0x41).map { |byte4| byte1 << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } }),
]
codepoints_euc_tw = [
*(0..0x7F),
*(0xA1..0xFF).flat_map { |byte1| (0xA1..0xFF).map { |byte2| (byte1 << 8) | byte2 } },
*(0xA1..0xB0).flat_map { |byte2| (0xA1..0xFF).flat_map { |byte3| (0xA1..0xFF).flat_map { |byte4| 0x8E << 24 | byte2 << 16 | byte3 << 8 | byte4 } } }
]
encodings.merge!(
Encoding::CP850 => codepoints_1byte,
Encoding::CP852 => codepoints_1byte,
Encoding::CP855 => codepoints_1byte,
Encoding::GB1988 => codepoints_1byte,
Encoding::IBM437 => codepoints_1byte,
Encoding::IBM720 => codepoints_1byte,
Encoding::IBM737 => codepoints_1byte,
Encoding::IBM775 => codepoints_1byte,
Encoding::IBM852 => codepoints_1byte,
Encoding::IBM855 => codepoints_1byte,
Encoding::IBM857 => codepoints_1byte,
Encoding::IBM860 => codepoints_1byte,
Encoding::IBM861 => codepoints_1byte,
Encoding::IBM862 => codepoints_1byte,
Encoding::IBM863 => codepoints_1byte,
Encoding::IBM864 => codepoints_1byte,
Encoding::IBM865 => codepoints_1byte,
Encoding::IBM866 => codepoints_1byte,
Encoding::IBM869 => codepoints_1byte,
Encoding::ISO_8859_1 => codepoints_1byte,
Encoding::ISO_8859_2 => codepoints_1byte,
Encoding::ISO_8859_3 => codepoints_1byte,
Encoding::ISO_8859_4 => codepoints_1byte,
Encoding::ISO_8859_5 => codepoints_1byte,
Encoding::ISO_8859_6 => codepoints_1byte,
Encoding::ISO_8859_7 => codepoints_1byte,
Encoding::ISO_8859_8 => codepoints_1byte,
Encoding::ISO_8859_9 => codepoints_1byte,
Encoding::ISO_8859_10 => codepoints_1byte,
Encoding::ISO_8859_11 => codepoints_1byte,
Encoding::ISO_8859_13 => codepoints_1byte,
Encoding::ISO_8859_14 => codepoints_1byte,
Encoding::ISO_8859_15 => codepoints_1byte,
Encoding::ISO_8859_16 => codepoints_1byte,
Encoding::KOI8_R => codepoints_1byte,
Encoding::KOI8_U => codepoints_1byte,
Encoding::MACCENTEURO => codepoints_1byte,
Encoding::MACCROATIAN => codepoints_1byte,
Encoding::MACCYRILLIC => codepoints_1byte,
Encoding::MACGREEK => codepoints_1byte,
Encoding::MACICELAND => codepoints_1byte,
Encoding::MACROMAN => codepoints_1byte,
Encoding::MACROMANIA => codepoints_1byte,
Encoding::MACTHAI => codepoints_1byte,
Encoding::MACTURKISH => codepoints_1byte,
Encoding::MACUKRAINE => codepoints_1byte,
Encoding::TIS_620 => codepoints_1byte,
Encoding::Windows_1250 => codepoints_1byte,
Encoding::Windows_1251 => codepoints_1byte,
Encoding::Windows_1252 => codepoints_1byte,
Encoding::Windows_1254 => codepoints_1byte,
Encoding::Windows_1255 => codepoints_1byte,
Encoding::Windows_1256 => codepoints_1byte,
Encoding::Windows_1257 => codepoints_1byte,
Encoding::Windows_1258 => codepoints_1byte,
Encoding::Windows_874 => codepoints_1byte,
Encoding::Big5 => codepoints_2bytes,
Encoding::Big5_HKSCS => codepoints_2bytes,
Encoding::Big5_UAO => codepoints_2bytes,
Encoding::CP949 => codepoints_2bytes,
Encoding::CP950 => codepoints_2bytes,
Encoding::CP951 => codepoints_2bytes,
Encoding::EUC_KR => codepoints_2bytes,
Encoding::GBK => codepoints_2bytes,
Encoding::GB12345 => codepoints_2bytes,
Encoding::GB2312 => codepoints_2bytes,
Encoding::MACJAPANESE => codepoints_2bytes,
Encoding::Shift_JIS => codepoints_2bytes,
Encoding::SJIS_DoCoMo => codepoints_2bytes,
Encoding::SJIS_KDDI => codepoints_2bytes,
Encoding::SJIS_SoftBank => codepoints_2bytes,
Encoding::Windows_31J => codepoints_2bytes,
Encoding::UTF_8 => codepoints_unicode,
Encoding::UTF8_MAC => codepoints_unicode,
Encoding::UTF8_DoCoMo => codepoints_unicode,
Encoding::UTF8_KDDI => codepoints_unicode,
Encoding::UTF8_SoftBank => codepoints_unicode,
Encoding::CESU_8 => codepoints_unicode,
Encoding::CP51932 => codepoints_eucjp,
Encoding::EUC_JP => codepoints_eucjp,
Encoding::EUCJP_MS => codepoints_eucjp,
Encoding::EUC_JIS_2004 => codepoints_eucjp,
Encoding::EMACS_MULE => codepoints_emacs_mule,
Encoding::STATELESS_ISO_2022_JP => codepoints_emacs_mule,
Encoding::STATELESS_ISO_2022_JP_KDDI => codepoints_emacs_mule,
Encoding::GB18030 => codepoints_gb18030,
Encoding::EUC_TW => codepoints_euc_tw
)
end
# These test that we're correctly parsing codepoints for each alias of each
# encoding that prism supports.
encodings.each do |encoding, range|
(encoding.names - %w[external internal filesystem locale]).each do |name|
define_method(:"test_encoding_#{name}") do
assert_encoding(encoding, name, range)
end
end
end
# These test that we're correctly setting the flags on strings for each
# encoding that prism supports.
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
symbols = [:a, :ą, :+]
regexps = [/a/, /ą/, //]
encodings.each_key do |encoding|
define_method(:"test_encoding_flags_#{encoding.name}") do
assert_encoding_flags(encoding, escapes)
end
define_method(:"test_symbol_encoding_flags_#{encoding.name}") do
assert_symbol_encoding_flags(encoding, symbols)
end
define_method(:"test_symbol_character_escape_encoding_flags_#{encoding.name}") do
assert_symbol_character_escape_encoding_flags(encoding, escapes)
end
define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do
assert_regular_expression_encoding_flags(encoding, regexps.map(&:inspect))
end
define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do
assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" })
end
end
encoding_modifiers = { ascii_8bit: "n", utf_8: "u", euc_jp: "e", windows_31j: "s" }
regexp_sources = ["abc", "garçon", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}" ]
encoding_modifiers.each_value do |modifier|
encodings.each_key do |encoding|
define_method(:"test_regular_expression_encoding_modifiers_/#{modifier}_#{encoding.name}") do
assert_regular_expression_encoding_flags(
encoding,
regexp_sources.product(encoding_modifiers.values).map { |r, modifier| "/#{r}/#{modifier}" }
)
end
end
end
def test_coding
result = Prism.parse("# coding: utf-8\n'string'")
actual = result.value.statements.body.first.unescaped.encoding
assert_equal Encoding.find("utf-8"), actual
end
def test_coding_with_whitespace
result = Prism.parse("# coding \t \r \v : \t \v \r ascii-8bit \n'string'")
actual = result.value.statements.body.first.unescaped.encoding
assert_equal Encoding.find("ascii-8bit"), actual
end
def test_emacs_style
result = Prism.parse("# -*- coding: utf-8 -*-\n'string'")
actual = result.value.statements.body.first.unescaped.encoding
assert_equal Encoding.find("utf-8"), actual
end
def test_utf_8_variations
%w[
utf-8-unix
utf-8-dos
utf-8-mac
utf-8-*
].each do |encoding|
result = Prism.parse("# coding: #{encoding}\n'string'")
actual = result.value.statements.body.first.unescaped.encoding
assert_equal Encoding.find("utf-8"), actual
end
end
def test_first_lexed_token
encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
assert_equal Encoding.find("ascii-8bit"), encoding
end
if !ENV["PRISM_BUILD_MINIMAL"]
# This test may be a little confusing. Basically when we use our strpbrk,
# it takes into account the encoding of the file.
def test_strpbrk_multibyte
result = Prism.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
assert(result.errors.empty?)
assert_equal(
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
result.value.statements.body.first.elements.first.unescaped
)
end
def test_slice_encoding
slice = Prism.parse("# encoding: Shift_JIS\n").value.slice
assert_equal (+"").force_encoding(Encoding::SHIFT_JIS), slice
assert_equal Encoding::SHIFT_JIS, slice.encoding
end
def test_multibyte_escapes
[
["'", "'"],
["\"", "\""],
["`", "`"],
["/", "/"],
["<<'HERE'\n", "\nHERE"],
["<<-HERE\n", "\nHERE"]
].each do |opening, closing|
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
end
end
end
private
class ConstantContext < BasicObject
def self.const_missing(const)
const
end
end
def constant_context
ConstantContext.new
end
class IdentifierContext < BasicObject
def method_missing(name, *)
name
end
end
def identifier_context
IdentifierContext.new
end
def assert_encoding_constant(name, character)
source = "# encoding: #{name}\n#{character}"
expected = constant_context.instance_eval(source)
result = Prism.parse(source)
assert result.success?
actual = result.value.statements.body.last
assert_kind_of ConstantReadNode, actual
assert_equal expected, actual.name
end
def assert_encoding_identifier(name, character)
source = "# encoding: #{name}\n#{character}"
expected = identifier_context.instance_eval(source)
result = Prism.parse(source)
assert result.success?
actual = result.value.statements.body.last
assert_kind_of CallNode, actual
assert_equal expected, actual.name
end
# Check that we can properly parse every codepoint in the given encoding.
def assert_encoding(encoding, name, range)
# I'm not entirely sure, but I believe these codepoints are incorrect in
# their parsing in CRuby. They all report as matching `[[:lower:]]` but
# then they are parsed as constants. This is because CRuby determines if
# an identifier is a constant or not by case folding it down to lowercase
# and checking if there is a difference. And even though they report
# themselves as lowercase, their case fold is different. I have reported
# this bug upstream.
case encoding
when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8
range = range.to_a - [
0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b,
0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b,
0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab,
0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc,
]
when Encoding::Windows_1253
range = range.to_a - [0xb5]
end
range.each do |codepoint|
character = codepoint.chr(encoding)
if character.match?(/[[:alpha:]]/)
if character.match?(/[[:upper:]]/)
assert_encoding_constant(name, character)
else
assert_encoding_identifier(name, character)
end
elsif character.match?(/[[:alnum:]]/)
assert_encoding_identifier(name, "_#{character}")
else
next if ["/", "{"].include?(character)
source = "# encoding: #{name}\n/(?##{character})/\n"
assert Prism.parse(source).success?, "Expected #{source.inspect} to parse successfully."
end
rescue RangeError
source = "# encoding: #{name}\n\\x#{codepoint.to_s(16)}"
refute Prism.parse(source).success?
end
end
def assert_encoding_flags(encoding, escapes)
escapes.each do |escaped|
source = "# encoding: #{encoding.name}\n\"#{escaped}\""
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if error.message.include?("UTF-8 mixed within")
error.message[/: (.+?)\n/, 1]
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
string = result.value.statements.body.first
if string.forced_utf8_encoding?
Encoding::UTF_8
elsif string.forced_binary_encoding?
Encoding::ASCII_8BIT
else
encoding
end
else
error = result.errors.first
if error.message.include?("mixed")
error.message
else
raise error.message
end
end
end
assert_equal expected, actual
end
end
# Test Symbol literals without any interpolation or escape sequences.
def assert_symbol_encoding_flags(encoding, symbols)
symbols.each do |symbol|
source = "# encoding: #{encoding.name}\n#{symbol.inspect}"
expected =
begin
eval(source).encoding
rescue SyntaxError => error
unless error.message.include?("invalid multibyte char")
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
symbol = result.value.statements.body.first
if symbol.forced_utf8_encoding?
Encoding::UTF_8
elsif symbol.forced_binary_encoding?
Encoding::ASCII_8BIT
elsif symbol.forced_us_ascii_encoding?
Encoding::US_ASCII
else
encoding
end
else
error = result.errors.last
unless error.message.include?("invalid symbol")
raise error.message
end
end
end
assert_equal expected, actual
end
end
def assert_symbol_character_escape_encoding_flags(encoding, escapes)
escapes.each do |escaped|
source = "# encoding: #{encoding.name}\n:\"#{escaped}\""
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if error.message.include?("UTF-8 mixed within")
error.message[/: (.+?)\n/, 1]
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
symbol = result.value.statements.body.first
if symbol.forced_utf8_encoding?
Encoding::UTF_8
elsif symbol.forced_binary_encoding?
Encoding::ASCII_8BIT
elsif symbol.forced_us_ascii_encoding?
Encoding::US_ASCII
else
encoding
end
else
error = result.errors.first
if error.message.include?("mixed")
error.message
else
raise error.message
end
end
end
assert_equal expected, actual
end
end
def assert_regular_expression_encoding_flags(encoding, regexps)
regexps.each do |regexp|
regexp_modifier_used = regexp.end_with?("/u") || regexp.end_with?("/e") || regexp.end_with?("/s") || regexp.end_with?("/n")
source = "# encoding: #{encoding.name}\n#{regexp}"
encoding_errors = ["invalid multibyte char", "escaped non ASCII character in UTF-8 regexp", "differs from source encoding"]
skipped_errors = ["invalid multibyte escape", "incompatible character encoding", "UTF-8 character in non UTF-8 regexp", "invalid Unicode range", "invalid Unicode list"]
# TODO (nirvdrum 21-Feb-2024): Prism currently does not handle Regexp validation unless modifiers are used. So, skip processing those errors for now: https://github.com/ruby/prism/issues/2104
unless regexp_modifier_used
skipped_errors += encoding_errors
encoding_errors.clear
end
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if encoding_errors.find { |e| error.message.include?(e) }
error.message.split("\n").map { |m| m[/: (.+?)$/, 1] }
elsif skipped_errors.find { |e| error.message.include?(e) }
next
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
regexp = result.value.statements.body.first
actual_encoding = if regexp.forced_utf8_encoding?
Encoding::UTF_8
elsif regexp.forced_binary_encoding?
Encoding::ASCII_8BIT
elsif regexp.forced_us_ascii_encoding?
Encoding::US_ASCII
elsif regexp.ascii_8bit?
Encoding::ASCII_8BIT
elsif regexp.utf_8?
Encoding::UTF_8
elsif regexp.euc_jp?
Encoding::EUC_JP
elsif regexp.windows_31j?
Encoding::Windows_31J
else
encoding
end
if regexp.utf_8? && actual_encoding != Encoding::UTF_8
raise "expected regexp encoding to be UTF-8 due to '/u' modifier, but got #{actual_encoding.name}"
elsif regexp.ascii_8bit? && (actual_encoding != Encoding::ASCII_8BIT && actual_encoding != Encoding::US_ASCII)
raise "expected regexp encoding to be ASCII-8BIT or US-ASCII due to '/n' modifier, but got #{actual_encoding.name}"
elsif regexp.euc_jp? && actual_encoding != Encoding::EUC_JP
raise "expected regexp encoding to be EUC-JP due to '/e' modifier, but got #{actual_encoding.name}"
elsif regexp.windows_31j? && actual_encoding != Encoding::Windows_31J
raise "expected regexp encoding to be Windows-31J due to '/s' modifier, but got #{actual_encoding.name}"
end
if regexp.utf_8? && regexp.forced_utf8_encoding?
raise "the forced_utf8 flag should not be set when the UTF-8 modifier (/u) is used"
elsif regexp.ascii_8bit? && regexp.forced_binary_encoding?
raise "the forced_ascii_8bit flag should not be set when the UTF-8 modifier (/u) is used"
end
actual_encoding
else
errors = result.errors.map(&:message)
if errors.last&.include?("UTF-8 mixed within")
nil
else
errors
end
end
end
# TODO (nirvdrum 22-Feb-2024): Remove this workaround once Prism better maps CRuby's error messages.
# This class of error message is tricky. The part not being compared is a representation of the regexp.
# Depending on the source encoding and any encoding modifiers being used, CRuby alters how the regexp is represented.
# Sometimes it's an MBC string. Other times it uses hexadecimal character escapes. And in other cases it uses
# the long-form Unicode escape sequences. This short-circuit checks that the error message is mostly correct.
if expected.is_a?(Array) && actual.is_a?(Array)
if expected.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") &&
actual.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:")
expected.last.clear
actual.last.clear
end
end
assert_equal expected, actual
end
end
end
end

Просмотреть файл

@ -1246,8 +1246,7 @@ module Prism
end
def test_invalid_message_name
result = Prism.parse("+.@foo,+=foo")
assert_equal :"", result.value.statements.body.first.write_name
assert_equal :"", Prism.parse_statement("+.@foo,+=foo").write_name
end
def test_invalid_operator_write_fcall

Просмотреть файл

@ -0,0 +1,21 @@
# frozen_string_literal: true
return if RUBY_VERSION < "3.2.0"
require_relative "test_helper"
module Prism
class FixturesTest < TestCase
except = []
# Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
# characters in the heredoc start.
# Example: <<~' EOF' or <<-' EOF'
# https://bugs.ruby-lang.org/issues/19539
except << "heredocs_leading_whitespace.txt" if RUBY_VERSION < "3.3.0"
Fixture.each(except: except) do |fixture|
define_method(fixture.test_name) { assert_valid_syntax(fixture.read) }
end
end
end

Просмотреть файл

@ -1,7 +1,5 @@
# frozen_string_literal: true
return if ENV["PRISM_BUILD_MINIMAL"]
require_relative "test_helper"
module Prism
@ -9,7 +7,7 @@ module Prism
# invalid memory access.
class FuzzerTest < TestCase
def self.snippet(name, source)
define_method(:"test_fuzzer_#{name}") { Prism.dump(source) }
define_method(:"test_fuzzer_#{name}") { Prism.profile(source) }
end
snippet "incomplete global variable", "$"
@ -39,29 +37,31 @@ module Prism
snippet "escaped unicode at end of file 8", '"\\u33'
snippet "escaped unicode at end of file 9", '"\\u333'
snippet "float suffix at end of file", "1e"
snippet "parameter name that is zero length", "a { |b;"
snippet "statements node with multiple heredocs", <<~EOF
for <<A + <<B
A
B
EOF
snippet "create a binary call node with arg before receiver", <<~EOF
<<-A.g/{/
A
/, ""\\
EOF
snippet "regular expression with start and end out of order", <<~RUBY
<<-A.g//,
A
/{/, ''\\
RUBY
snippet "interpolated regular expression with start and end out of order", <<~RUBY
<<-A.g/{/,
A
a
/{/, ''\\
RUBY
snippet "parameter name that is zero length", "a { |b;"
end
end

Просмотреть файл

@ -4,24 +4,131 @@ require_relative "test_helper"
module Prism
class HeredocDedentTest < TestCase
filepath = File.expand_path("fixtures/tilde_heredocs.txt", __dir__)
def test_content_dedented_interpolation_content
assert_heredoc_dedent(
" a\n" "1\n" " a\n",
"<<~EOF\n" " a\n" "\#{1}\n" " a\n" "EOF\n"
)
end
File.read(filepath).split(/(?=\n)\n(?=<)/).each_with_index do |heredoc, index|
# The first example in this file has incorrect dedent calculated by
# TruffleRuby so we skip it.
next if index == 0 && RUBY_ENGINE == "truffleruby"
def test_content
assert_heredoc_dedent(
"a\n",
"<<~EOF\n" " a\n" "EOF\n"
)
end
define_method "test_heredoc_#{index}" do
node = Prism.parse(heredoc).value.statements.body.first
def test_tabs_dedent_spaces
assert_heredoc_dedent(
"\ta\n" "b\n" "\t\tc\n",
"<<~EOF\n" "\ta\n" " b\n" "\t\tc\n" "EOF\n"
)
end
if node.is_a?(StringNode)
actual = node.unescaped
else
actual = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : "1" }.join
end
def test_interpolation_then_content
assert_heredoc_dedent(
"1 a\n",
"<<~EOF\n" " \#{1} a\n" "EOF\n"
)
end
assert_equal(eval(heredoc), actual, "Expected heredocs to match.")
def test_content_then_interpolation
assert_heredoc_dedent(
"a 1\n",
"<<~EOF\n" " a \#{1}\n" "EOF\n"
)
end
def test_content_dedented_interpolation
assert_heredoc_dedent(
" a\n" "1\n",
"<<~EOF\n" " a\n" " \#{1}\n" "EOF\n"
)
end
def test_content_interpolation
assert_heredoc_dedent(
"a\n" "1\n",
"<<~EOF\n" " a\n" " \#{1}\n" "EOF\n"
)
end
def test_content_content
assert_heredoc_dedent(
"a\n" "b\n",
"<<~EOF\n" " a\n" " b\n" "EOF\n"
)
end
def test_content_indented_content
assert_heredoc_dedent(
"a\n" " b\n",
"<<~EOF\n" " a\n" " b\n" "EOF\n"
)
end
def test_content_dedented_content
assert_heredoc_dedent(
"\ta\n" "b\n",
"<<~EOF\n" "\t\t\ta\n" "\t\tb\n" "EOF\n"
)
end
def test_single_quote
assert_heredoc_dedent(
"a \#{1}\n",
"<<~'EOF'\n" "a \#{1}\n" "EOF\n"
)
end
def test_mixed_indentation
assert_heredoc_dedent(
"a\n" " b\n",
"<<~EOF\n" "\ta\n" "\t b\n" "EOF\n"
)
end
def test_indented_content_content
assert_heredoc_dedent(
" a\n" "b\n",
"<<~EOF\n" "\t a\n" "\tb\n" "EOF\n"
)
end
def test_indent_size
assert_heredoc_dedent(
"a\n" " b\n",
"<<~EOF\n" "\ta\n" " b\n" "EOF\n"
)
end
def test_blank_lines
assert_heredoc_dedent(
"a\n" "\n" "b\n",
"<<~EOF\n" " a\n" "\n" " b\n" "EOF\n"
)
end
def test_many_blank_lines
assert_heredoc_dedent(
"a\n" "\n" "\n" "\n" "\n" "b\n",
"<<~EOF\n" " a\n" "\n" "\n" "\n" "\n" " b\n" "EOF\n"
)
end
private
def assert_heredoc_dedent(expected, source)
node = Prism.parse_statement(source)
if node.is_a?(StringNode)
actual = node.unescaped
else
actual = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : "1" }.join
end
assert_equal(expected, actual)
assert_equal(eval(source), actual)
end
end
end

90
test/prism/lex_test.rb Normal file
Просмотреть файл

@ -0,0 +1,90 @@
# frozen_string_literal: true
return if !(RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0")
require_relative "test_helper"
module Prism
class LexTest < TestCase
except = [
# It seems like there are some oddities with nested heredocs and ripper.
# Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
"seattlerb/heredoc_nested.txt",
"whitequark/dedenting_heredoc.txt",
# Ripper seems to have a bug that the regex portions before and after
# the heredoc are combined into a single token. See
# https://bugs.ruby-lang.org/issues/19838.
"spanning_heredoc.txt",
"spanning_heredoc_newlines.txt"
]
if RUBY_VERSION < "3.3.0"
# This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
# we're on an earlier version.
except << "seattlerb/pct_w_heredoc_interp_nested.txt"
# Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
# characters in the heredoc start.
# Example: <<~' EOF' or <<-' EOF'
# https://bugs.ruby-lang.org/issues/19539
except << "heredocs_leading_whitespace.txt"
end
Fixture.each(except: except) do |fixture|
define_method(fixture.test_name) { assert_lex(fixture) }
end
def test_lex_file
assert_nothing_raised do
Prism.lex_file(__FILE__)
end
error = assert_raise Errno::ENOENT do
Prism.lex_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.lex_file(nil)
end
end
def test_parse_lex
node, tokens = Prism.parse_lex("def foo; end").value
assert_kind_of ProgramNode, node
assert_equal 5, tokens.length
end
def test_parse_lex_file
node, tokens = Prism.parse_lex_file(__FILE__).value
assert_kind_of ProgramNode, node
refute_empty tokens
error = assert_raise Errno::ENOENT do
Prism.parse_lex_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.parse_lex_file(nil)
end
end
private
def assert_lex(fixture)
source = fixture.read
result = Prism.lex_compat(source)
assert_equal [], result.errors
Prism.lex_ripper(source).zip(result.value).each do |(ripper, prism)|
assert_equal ripper, prism
end
end
end
end

Просмотреть файл

@ -3,8 +3,6 @@
require_relative "test_helper"
return if RUBY_PLATFORM !~ /linux/
# TODO: determine why these symbols are incorrect on ppc64le
return if RUBY_PLATFORM =~ /powerpc64le/
module Prism

Просмотреть файл

@ -17,14 +17,14 @@ require_relative "test_helper"
module Prism
class LocalsTest < TestCase
base = File.join(__dir__, "fixtures")
Dir["**/*.txt", base: base].each do |relative|
except = [
# Skip this fixture because it has a different number of locals because
# CRuby is eliminating dead code.
next if relative == "whitequark/ruby_bug_10653.txt"
"whitequark/ruby_bug_10653.txt"
]
filepath = File.join(base, relative)
define_method("test_#{relative}") { assert_locals(filepath) }
Fixture.each(except: except) do |fixture|
define_method(fixture.test_name) { assert_locals(fixture) }
end
def setup
@ -38,8 +38,8 @@ module Prism
private
def assert_locals(filepath)
source = File.read(filepath)
def assert_locals(fixture)
source = fixture.read
expected = cruby_locals(source)
actual = prism_locals(source)
@ -47,14 +47,6 @@ module Prism
assert_equal(expected, actual)
end
def ignore_warnings
previous_verbosity = $VERBOSE
$VERBOSE = nil
yield
ensure
$VERBOSE = previous_verbosity
end
# A wrapper around a RubyVM::InstructionSequence that provides a more
# convenient interface for accessing parts of the iseq.
class ISeq
@ -104,35 +96,29 @@ module Prism
# For the given source, compiles with CRuby and returns a list of all of the
# sets of local variables that were encountered.
def cruby_locals(source)
verbose, $VERBOSE = $VERBOSE, nil
locals = [] #: Array[Array[Symbol | Integer]]
stack = [ISeq.new(ignore_warnings { RubyVM::InstructionSequence.compile(source) }.to_a)]
begin
locals = [] #: Array[Array[Symbol | Integer]]
stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)]
while (iseq = stack.pop)
names = [*iseq.local_table]
names.map!.with_index do |name, index|
# When an anonymous local variable is present in the iseq's local
# table, it is represented as the stack offset from the top.
# However, when these are dumped to binary and read back in, they
# are replaced with the symbol :#arg_rest. To consistently handle
# this, we replace them here with their index.
if name == :"#arg_rest"
names.length - index + 1
else
name
end
while (iseq = stack.pop)
names = [*iseq.local_table]
names.map!.with_index do |name, index|
# When an anonymous local variable is present in the iseq's local
# table, it is represented as the stack offset from the top.
# However, when these are dumped to binary and read back in, they
# are replaced with the symbol :#arg_rest. To consistently handle
# this, we replace them here with their index.
if name == :"#arg_rest"
names.length - index + 1
else
name
end
locals << names
iseq.each_child { |child| stack << child }
end
locals
ensure
$VERBOSE = verbose
locals << names
iseq.each_child { |child| stack << child }
end
locals
end
# For the given source, parses with prism and returns a list of all of the

Просмотреть файл

@ -2,32 +2,109 @@
require_relative "test_helper"
return if RUBY_ENGINE != "ruby"
module Prism
class MagicCommentTest < TestCase
examples = [
"# encoding: ascii",
"# coding: ascii",
"# eNcOdInG: ascii",
"# CoDiNg: ascii",
"# \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v",
"# -*- encoding: ascii -*-",
"# -*- coding: ascii -*-",
"# -*- eNcOdInG: ascii -*-",
"# -*- CoDiNg: ascii -*-",
"# -*- \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v -*-",
"# -*- foo: bar; encoding: ascii -*-",
"# coding \t \r \v : \t \v \r ascii-8bit",
"# vim: filetype=ruby, fileencoding=windows-31j, tabsize=3, shiftwidth=3"
]
if RUBY_ENGINE == "ruby"
class MagicCommentRipper < Ripper
attr_reader :magic_comments
examples.each.with_index(1) do |example, index|
define_method(:"test_magic_comment_#{index}") do
expected = RubyVM::InstructionSequence.compile(%Q{#{example}\n""}).eval.encoding
actual = Prism.parse(example).encoding
def initialize(*)
super
@magic_comments = []
end
def on_magic_comment(key, value)
@magic_comments << [key, value]
super
end
end
Fixture.each do |fixture|
define_method(fixture.test_name) { assert_magic_comments(fixture) }
end
end
def test_encoding
assert_magic_encoding(Encoding::US_ASCII, "# encoding: ascii")
end
def test_coding
assert_magic_encoding(Encoding::US_ASCII, "# coding: ascii")
end
def test_eNcOdInG
assert_magic_encoding(Encoding::US_ASCII, "# eNcOdInG: ascii")
end
def test_CoDiNg
assert_magic_encoding(Encoding::US_ASCII, "# CoDiNg: ascii")
end
def test_encoding_whitespace
assert_magic_encoding(Encoding::US_ASCII, "# \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v")
end
def test_emacs_encoding
assert_magic_encoding(Encoding::US_ASCII, "# -*- encoding: ascii -*-")
end
def test_emacs_coding
assert_magic_encoding(Encoding::US_ASCII, "# -*- coding: ascii -*-")
end
def test_emacs_eNcOdInG
assert_magic_encoding(Encoding::US_ASCII, "# -*- eNcOdInG: ascii -*-")
end
def test_emacs_CoDiNg
assert_magic_encoding(Encoding::US_ASCII, "# -*- CoDiNg: ascii -*-")
end
def test_emacs_whitespace
assert_magic_encoding(Encoding::US_ASCII, "# -*- \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v -*-")
end
def test_emacs_multiple
assert_magic_encoding(Encoding::US_ASCII, "# -*- foo: bar; encoding: ascii -*-")
end
def test_coding_whitespace
assert_magic_encoding(Encoding::ASCII_8BIT, "# coding \t \r \v : \t \v \r ascii-8bit")
end
def test_vim
assert_magic_encoding(Encoding::Windows_31J, "# vim: filetype=ruby, fileencoding=windows-31j, tabsize=3, shiftwidth=3")
end
private
def assert_magic_encoding(expected, line)
source = %Q{#{line}\n""}
actual = Prism.parse(source).encoding
# Compare against our expectation.
assert_equal expected, actual
# Compare against Ruby's expectation.
if defined?(RubyVM::InstructionSequence)
expected = RubyVM::InstructionSequence.compile(source).eval.encoding
assert_equal expected, actual
end
end
def assert_magic_comments(fixture)
source = fixture.read
# Check that we get the correct number of magic comments when lexing with
# ripper.
expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments
actual = Prism.parse(source).magic_comments
assert_equal expected.length, actual.length
expected.zip(actual).each do |(expected_key, expected_value), magic_comment|
assert_equal expected_key, magic_comment.key
assert_equal expected_value, magic_comment.value
end
end
end
end

Просмотреть файл

@ -0,0 +1,22 @@
# frozen_string_literal: true
require_relative "test_helper"
module Prism
class NewlineOffsetsTest < TestCase
Fixture.each do |fixture|
define_method(fixture.test_name) { assert_newline_offsets(fixture) }
end
private
def assert_newline_offsets(fixture)
source = fixture.read
expected = [0]
source.b.scan("\n") { expected << $~.offset(0)[0] + 1 }
assert_equal expected, Prism.parse(source).source.offsets
end
end
end

Просмотреть файл

@ -6,20 +6,23 @@ return unless defined?(RubyVM::InstructionSequence)
module Prism
class NewlineTest < TestCase
base = File.expand_path("../", __FILE__)
filepaths = Dir["*.rb", base: base] - %w[
encoding_test.rb
skips = %w[
errors_test.rb
locals_test.rb
parser_test.rb
regexp_test.rb
static_literals_test.rb
test_helper.rb
unescape_test.rb
warnings_test.rb
encoding/regular_expression_encoding_test.rb
encoding/string_encoding_test.rb
result/static_literals_test.rb
result/warnings_test.rb
ruby/parser_test.rb
ruby/ruby_parser_test.rb
]
filepaths.each do |relative|
define_method("test_newline_flags_#{relative}") do
base = __dir__
(Dir["{,api/,encoding/,result/,ruby/}*.rb", base: base] - skips).each do |relative|
define_method(:"test_#{relative}") do
assert_newlines(base, relative)
end
end
@ -65,14 +68,6 @@ module Prism
assert_equal expected, actual
end
def ignore_warnings
previous_verbosity = $VERBOSE
$VERBOSE = nil
yield
ensure
$VERBOSE = previous_verbosity
end
def rubyvm_lines(source)
queue = [ignore_warnings { RubyVM::InstructionSequence.compile(source) }]
lines = []

Просмотреть файл

@ -1,371 +0,0 @@
# frozen_string_literal: true
require_relative "test_helper"
module Prism
class ParseTest < TestCase
# A subclass of Ripper that extracts out magic comments.
class MagicCommentRipper < Ripper
attr_reader :magic_comments
def initialize(*)
super
@magic_comments = []
end
def on_magic_comment(key, value)
@magic_comments << [key, value]
super
end
end
# When we pretty-print the trees to compare against the snapshots, we want to
# be certain that we print with the same external encoding. This is because
# methods like Symbol#inspect take into account external encoding and it could
# change how the snapshot is generated. On machines with certain settings
# (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
# going to force it to be UTF-8 to keep the snapshots consistent.
def setup
@previous_default_external = Encoding.default_external
ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
end
def teardown
ignore_warnings { Encoding.default_external = @previous_default_external }
end
def test_empty_string
result = Prism.parse("")
assert_equal [], result.value.statements.body
end
def test_parse_takes_file_path
filepath = "filepath.rb"
result = Prism.parse("def foo; __FILE__; end", filepath: filepath)
assert_equal filepath, find_source_file_node(result.value).filepath
end
def test_parse_takes_line
line = 4
result = Prism.parse("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.location.start_line
assert_equal line + 1, find_source_file_node(result.value).location.start_line
result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.first.location.start_line
end
def test_parse_takes_negative_lines
line = -2
result = Prism.parse("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.location.start_line
assert_equal line + 1, find_source_file_node(result.value).location.start_line
result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
assert_equal line, result.value.first.location.start_line
end
def test_parse_lex
node, tokens = Prism.parse_lex("def foo; end").value
assert_kind_of ProgramNode, node
assert_equal 5, tokens.length
end
if !ENV["PRISM_BUILD_MINIMAL"]
def test_dump_file
assert_nothing_raised do
Prism.dump_file(__FILE__)
end
error = assert_raise Errno::ENOENT do
Prism.dump_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.dump_file(nil)
end
end
end
def test_lex_file
assert_nothing_raised do
Prism.lex_file(__FILE__)
end
error = assert_raise Errno::ENOENT do
Prism.lex_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.lex_file(nil)
end
end
def test_parse_lex_file
node, tokens = Prism.parse_lex_file(__FILE__).value
assert_kind_of ProgramNode, node
refute_empty tokens
error = assert_raise Errno::ENOENT do
Prism.parse_lex_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.parse_lex_file(nil)
end
end
def test_parse_file
node = Prism.parse_file(__FILE__).value
assert_kind_of ProgramNode, node
error = assert_raise Errno::ENOENT do
Prism.parse_file("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.parse_file(nil)
end
end
def test_parse_file_success
assert_predicate Prism.parse_file_comments(__FILE__), :any?
error = assert_raise Errno::ENOENT do
Prism.parse_file_comments("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.parse_file_comments(nil)
end
end
def test_parse_file_comments
assert_predicate Prism.parse_file_comments(__FILE__), :any?
error = assert_raise Errno::ENOENT do
Prism.parse_file_comments("idontexist.rb")
end
assert_equal "No such file or directory - idontexist.rb", error.message
assert_raise TypeError do
Prism.parse_file_comments(nil)
end
end
# To accurately compare against Ripper, we need to make sure that we're
# running on CRuby 3.2+.
ripper_enabled = RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0"
# The FOCUS environment variable allows you to specify one particular fixture
# to test, instead of all of them.
base = File.join(__dir__, "fixtures")
relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
relatives.each do |relative|
# These fail on TruffleRuby due to a difference in Symbol#inspect: :测试 vs :"测试"
next if RUBY_ENGINE == "truffleruby" and %w[emoji_method_calls.txt seattlerb/bug202.txt seattlerb/magic_encoding_comment.txt].include?(relative)
filepath = File.join(base, relative)
snapshot = File.expand_path(File.join("snapshots", relative), __dir__)
directory = File.dirname(snapshot)
FileUtils.mkdir_p(directory) unless File.directory?(directory)
ripper_should_match = ripper_enabled
check_valid_syntax = RUBY_VERSION >= "3.2.0"
case relative
when "seattlerb/pct_w_heredoc_interp_nested.txt"
# This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
# we're on an earlier version.
ripper_should_match = false if RUBY_VERSION < "3.3.0"
when "seattlerb/heredoc_nested.txt", "whitequark/dedenting_heredoc.txt"
# It seems like there are some oddities with nested heredocs and ripper.
# Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
ripper_should_match = false
when "spanning_heredoc.txt", "spanning_heredoc_newlines.txt"
# Ripper seems to have a bug that the regex portions before and after
# the heredoc are combined into a single token. See
# https://bugs.ruby-lang.org/issues/19838.
ripper_should_match = false
when "heredocs_leading_whitespace.txt"
# Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
# characters in the heredoc start.
# Example: <<~' EOF' or <<-' EOF'
# https://bugs.ruby-lang.org/issues/19539
if RUBY_VERSION < "3.3.0"
ripper_should_match = false
check_valid_syntax = false
end
end
define_method "test_filepath_#{relative}" do
# First, read the source from the filepath. Use binmode to avoid
# converting CRLF on Windows, and explicitly set the external encoding
# to UTF-8 to override the binmode default.
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
# Make sure that the given source is valid syntax, otherwise we have an
# invalid fixture.
assert_valid_syntax(source) if check_valid_syntax
# Next, assert that there were no errors during parsing.
result = Prism.parse(source, filepath: relative)
assert_empty result.errors
# Next, pretty print the source.
printed = PP.pp(result.value, +"", 79)
if File.exist?(snapshot)
saved = File.read(snapshot)
# If the snapshot file exists, but the printed value does not match the
# snapshot, then update the snapshot file.
if printed != saved
File.write(snapshot, printed)
warn("Updated snapshot at #{snapshot}.")
end
# If the snapshot file exists, then assert that the printed value
# matches the snapshot.
assert_equal(saved, printed)
else
# If the snapshot file does not yet exist, then write it out now.
File.write(snapshot, printed)
warn("Created snapshot at #{snapshot}.")
end
if !ENV["PRISM_BUILD_MINIMAL"]
# Next, assert that the value can be serialized and deserialized
# without changing the shape of the tree.
assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value)
end
# Next, check that the location ranges of each node in the tree are a
# superset of their respective child nodes.
assert_non_overlapping_locations(result.value)
# Next, assert that the newlines are in the expected places.
expected_newlines = [0]
source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
assert_equal expected_newlines, Prism.parse(source).source.offsets
if ripper_should_match
# Finally, assert that we can lex the source and get the same tokens as
# Ripper.
lex_result = Prism.lex_compat(source)
assert_equal [], lex_result.errors
tokens = lex_result.value
begin
Prism.lex_ripper(source).zip(tokens).each do |(ripper, prism)|
assert_equal ripper, prism
end
rescue SyntaxError
raise ArgumentError, "Test file has invalid syntax #{filepath}"
end
# Next, check that we get the correct number of magic comments when
# lexing with ripper.
expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments
actual = result.magic_comments
assert_equal expected.length, actual.length
expected.zip(actual).each do |(expected_key, expected_value), magic_comment|
assert_equal expected_key, magic_comment.key
assert_equal expected_value, magic_comment.value
end
end
end
end
Dir["*.txt", base: base].each do |relative|
next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt"
# We test every snippet (separated by \n\n) in isolation
# to ensure the parser does not try to read bytes further than the end of each snippet
define_method "test_individual_snippets_#{relative}" do
filepath = File.join(base, relative)
# First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows,
# and explicitly set the external encoding to UTF-8 to override the binmode default.
file_contents = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
snippet = snippet.rstrip
result = Prism.parse(snippet, filepath: relative)
assert_empty result.errors
if !ENV["PRISM_BUILD_MINIMAL"]
assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value)
end
end
end
end
private
# Check that the location ranges of each node in the tree are a superset of
# their respective child nodes.
def assert_non_overlapping_locations(node)
queue = [node]
while (current = queue.shift)
# We only want to compare parent/child location overlap in the case that
# we are not looking at a heredoc. That's because heredoc locations are
# special in that they only use the declaration of the heredoc.
compare = !(current.is_a?(StringNode) ||
current.is_a?(XStringNode) ||
current.is_a?(InterpolatedStringNode) ||
current.is_a?(InterpolatedXStringNode)) ||
!current.opening&.start_with?("<<")
current.child_nodes.each do |child|
# child_nodes can return nil values, so we need to skip those.
next unless child
# Now that we know we have a child node, add that to the queue.
queue << child
if compare
assert_operator current.location.start_offset, :<=, child.location.start_offset
assert_operator current.location.end_offset, :>=, child.location.end_offset
end
end
end
end
def find_source_file_node(program)
queue = [program]
while (node = queue.shift)
return node if node.is_a?(SourceFileNode)
queue.concat(node.compact_child_nodes)
end
end
def ignore_warnings
previous_verbosity = $VERBOSE
$VERBOSE = nil
yield
ensure
$VERBOSE = previous_verbosity
end
end
end

Просмотреть файл

@ -1,186 +0,0 @@
# frozen_string_literal: true
require_relative "test_helper"
begin
verbose, $VERBOSE = $VERBOSE, nil
require "parser/ruby33"
require "prism/translation/parser33"
rescue LoadError
# In CRuby's CI, we're not going to test against the parser gem because we
# don't want to have to install it. So in this case we'll just skip this test.
return
ensure
$VERBOSE = verbose
end
# First, opt in to every AST feature.
Parser::Builders::Default.modernize
# Modify the source map == check so that it doesn't check against the node
# itself so we don't get into a recursive loop.
Parser::Source::Map.prepend(
Module.new {
def ==(other)
self.class == other.class &&
(instance_variables - %i[@node]).map do |ivar|
instance_variable_get(ivar) == other.instance_variable_get(ivar)
end.reduce(:&)
end
}
)
# Next, ensure that we're comparing the nodes and also comparing the source
# ranges so that we're getting all of the necessary information.
Parser::AST::Node.prepend(
Module.new {
def ==(other)
super && (location == other.location)
end
}
)
module Prism
class ParserTest < TestCase
base = File.join(__dir__, "fixtures")
# These files are erroring because of the parser gem being wrong.
skip_incorrect = [
"embdoc_no_newline_at_end.txt"
]
# These files are either failing to parse or failing to translate, so we'll
# skip them for now.
skip_all = skip_incorrect | [
"dash_heredocs.txt",
"dos_endings.txt",
"heredocs_with_ignored_newlines.txt",
"regex.txt",
"regex_char_width.txt",
"spanning_heredoc.txt",
"spanning_heredoc_newlines.txt",
"unescaping.txt"
]
# Not sure why these files are failing on JRuby, but skipping them for now.
if RUBY_ENGINE == "jruby"
skip_all.push("emoji_method_calls.txt", "symbols.txt")
end
# These files are failing to translate their lexer output into the lexer
# output expected by the parser gem, so we'll skip them for now.
skip_tokens = [
"comments.txt",
"heredoc_with_comment.txt",
"indented_file_end.txt",
"methods.txt",
"strings.txt",
"tilde_heredocs.txt",
"xstring_with_backslash.txt"
]
Dir["*.txt", base: base].each do |name|
next if skip_all.include?(name)
define_method("test_#{name}") do
assert_equal_parses(File.join(base, name), compare_tokens: !skip_tokens.include?(name))
end
end
private
def assert_equal_parses(filepath, compare_tokens: true)
buffer = Parser::Source::Buffer.new(filepath, 1)
buffer.source = File.read(filepath)
parser = Parser::Ruby33.new
parser.diagnostics.consumer = ->(*) {}
parser.diagnostics.all_errors_are_fatal = true
expected_ast, expected_comments, expected_tokens =
begin
parser.tokenize(buffer)
rescue ArgumentError, Parser::SyntaxError
return
end
actual_ast, actual_comments, actual_tokens =
Prism::Translation::Parser33.new.tokenize(buffer)
assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens
assert_equal_comments(expected_comments, actual_comments)
end
def assert_equal_asts_message(expected_ast, actual_ast)
queue = [[expected_ast, actual_ast]]
while (left, right = queue.shift)
if left.type != right.type
return "expected: #{left.type}\nactual: #{right.type}"
end
if left.location != right.location
return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
end
if left.type == :str && left.children[0] != right.children[0]
return "expected: #{left.inspect}\nactual: #{right.inspect}"
end
left.children.zip(right.children).each do |left_child, right_child|
queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
end
end
"expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
end
def assert_equal_tokens(expected_tokens, actual_tokens)
if expected_tokens != actual_tokens
expected_index = 0
actual_index = 0
while expected_index < expected_tokens.length
expected_token = expected_tokens[expected_index]
actual_token = actual_tokens[actual_index]
expected_index += 1
actual_index += 1
# The parser gem always has a space before a string end in list
# literals, but we don't. So we'll skip over the space.
if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
expected_index += 1
next
end
# There are a lot of tokens that have very specific meaning according
# to the context of the parser. We don't expose that information in
# prism, so we need to normalize these tokens a bit.
case actual_token[0]
when :kDO
actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0])
when :tLPAREN
actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2
when :tPOW
actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR
end
# Now we can assert that the tokens are actually equal.
assert_equal expected_token, actual_token, -> {
"expected: #{expected_token.inspect}\n" \
"actual: #{actual_token.inspect}"
}
end
end
end
def assert_equal_comments(expected_comments, actual_comments)
assert_equal expected_comments, actual_comments, -> {
"expected: #{expected_comments.inspect}\n" \
"actual: #{actual_comments.inspect}"
}
end
end
end

Просмотреть файл

@ -223,12 +223,12 @@ module Prism
def test_last_encoding_option_wins
regex = "/foo/nu"
option = Prism.parse(regex).value.statements.body.first.options
option = Prism.parse_statement(regex).options
assert_equal Regexp::FIXEDENCODING, option
regex = "/foo/un"
option = Prism.parse(regex).value.statements.body.first.options
option = Prism.parse_statement(regex).options
assert_equal Regexp::NOENCODING, option
end
@ -246,7 +246,7 @@ module Prism
def options(flags)
options =
["/foo/#{flags}", "/foo\#{1}/#{flags}"].map do |source|
Prism.parse(source).value.statements.body.first.options
Prism.parse_statement(source).options
end
# Check that we get the same set of options from both regular expressions

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class AttributeWriteTest < TestCase
@ -41,18 +41,14 @@ module Prism
private
def parse(source)
Prism.parse(source).value.statements.body.first
end
def assert_attribute_write(source)
call = parse(source)
call = Prism.parse_statement(source)
assert(call.attribute_write?)
assert_equal(1, eval(source))
end
def refute_attribute_write(source)
call = parse(source)
call = Prism.parse_statement(source)
refute(call.attribute_write?)
refute_equal(1, eval(source))
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class CommentsTest < TestCase

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class ConstantPathNodeTest < TestCase
@ -11,7 +11,7 @@ module Prism
Qux
RUBY
constant_path = Prism.parse(source).value.statements.body.first
constant_path = Prism.parse_statement(source)
assert_equal("Foo::Bar::Baz::Qux", constant_path.full_name)
end
@ -22,7 +22,7 @@ module Prism
Qux
RUBY
constant_path = Prism.parse(source).value.statements.body.first
constant_path = Prism.parse_statement(source)
assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do
constant_path.full_name
end
@ -35,7 +35,7 @@ module Prism
Qux
RUBY
constant_path = Prism.parse(source).value.statements.body.first
constant_path = Prism.parse_statement(source)
assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do
constant_path.full_name
@ -49,7 +49,7 @@ module Prism
Qux, Something = [1, 2]
RUBY
node = Prism.parse(source).value.statements.body.first
node = Prism.parse_statement(source)
assert_equal("Foo::Bar::Baz::Qux", node.lefts.first.full_name)
end
@ -60,7 +60,7 @@ module Prism
Qux, Something = [1, 2]
RUBY
node = Prism.parse(source).value.statements.body.first
node = Prism.parse_statement(source)
assert_equal("::Foo::Bar::Baz::Qux", node.lefts.first.full_name)
end
@ -69,7 +69,7 @@ module Prism
self::Foo, Bar = [1, 2]
RUBY
constant_target = Prism.parse(source).value.statements.body.first
constant_target = Prism.parse_statement(source)
dynamic, static = constant_target.lefts
assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do
@ -84,7 +84,7 @@ module Prism
Bar
RUBY
constant = Prism.parse(source).value.statements.body.first
constant = Prism.parse_statement(source)
assert_equal("Bar", constant.full_name)
end
end

Просмотреть файл

@ -0,0 +1,22 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class EqualityTest < TestCase
def test_equality
assert_operator Prism.parse_statement("1"), :===, Prism.parse_statement("1")
assert_operator Prism.parse("1").value, :===, Prism.parse("1").value
complex_source = "class Something; @var = something.else { _1 }; end"
assert_operator Prism.parse_statement(complex_source), :===, Prism.parse_statement(complex_source)
refute_operator Prism.parse_statement("1"), :===, Prism.parse_statement("2")
refute_operator Prism.parse_statement("1"), :===, Prism.parse_statement("0x1")
complex_source_1 = "class Something; @var = something.else { _1 }; end"
complex_source_2 = "class Something; @var = something.else { _2 }; end"
refute_operator Prism.parse_statement(complex_source_1), :===, Prism.parse_statement(complex_source_2)
end
end
end

Просмотреть файл

@ -0,0 +1,19 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class HeredocTest < TestCase
def test_heredoc?
refute Prism.parse_statement("\"foo\"").heredoc?
refute Prism.parse_statement("\"foo \#{1}\"").heredoc?
refute Prism.parse_statement("`foo`").heredoc?
refute Prism.parse_statement("`foo \#{1}`").heredoc?
assert Prism.parse_statement("<<~HERE\nfoo\nHERE\n").heredoc?
assert Prism.parse_statement("<<~HERE\nfoo \#{1}\nHERE\n").heredoc?
assert Prism.parse_statement("<<~`HERE`\nfoo\nHERE\n").heredoc?
assert Prism.parse_statement("<<~`HERE`\nfoo \#{1}\nHERE\n").heredoc?
end
end
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class IndexWriteTest < TestCase

Просмотреть файл

@ -0,0 +1,33 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class IntegerBaseFlagsTest < TestCase
# Through some bit hackery, we want to allow consumers to use the integer
# base flags as the base itself. It has a nice property that the current
# alignment provides them in the correct order. So here we test that our
# assumption holds so that it doesn't change out from under us.
#
# In C, this would look something like:
#
# ((flags & ~DECIMAL) << 1) || 10
#
# We have to do some other work in Ruby because 0 is truthy and ~ on an
# integer doesn't have a fixed width.
def test_flags
assert_equal 2, base("0b1")
assert_equal 8, base("0o1")
assert_equal 10, base("0d1")
assert_equal 16, base("0x1")
end
private
def base(source)
node = Prism.parse_statement(source)
value = (node.send(:flags) & (0b1111 - IntegerBaseFlags::DECIMAL)) << 1
value == 0 ? 10 : value
end
end
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class IntegerParseTest < TestCase
@ -35,7 +35,7 @@ module Prism
private
def assert_integer_parse(expected, source = expected.to_s)
assert_equal expected, Prism.parse(source).value.statements.body.first.value
assert_equal expected, Prism.parse_statement(source).value
end
end
end

Просмотреть файл

@ -0,0 +1,21 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class NumericValueTest < TestCase
def test_numeric_value
assert_equal 123, Prism.parse_statement("123").value
assert_equal 3.14, Prism.parse_statement("3.14").value
assert_equal 42i, Prism.parse_statement("42i").value
assert_equal 42.1ri, Prism.parse_statement("42.1ri").value
assert_equal 3.14i, Prism.parse_statement("3.14i").value
assert_equal 42r, Prism.parse_statement("42r").value
assert_equal 0.5r, Prism.parse_statement("0.5r").value
assert_equal 42ri, Prism.parse_statement("42ri").value
assert_equal 0.5ri, Prism.parse_statement("0.5ri").value
assert_equal 0xFFr, Prism.parse_statement("0xFFr").value
assert_equal 0xFFri, Prism.parse_statement("0xFFri").value
end
end
end

Просмотреть файл

@ -0,0 +1,43 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class OverlapTest < TestCase
Fixture.each do |fixture|
define_method(fixture.test_name) { assert_overlap(fixture) }
end
private
# Check that the location ranges of each node in the tree are a superset of
# their respective child nodes.
def assert_overlap(fixture)
queue = [Prism.parse_file(fixture.full_path).value]
while (current = queue.shift)
# We only want to compare parent/child location overlap in the case that
# we are not looking at a heredoc. That's because heredoc locations are
# special in that they only use the declaration of the heredoc.
compare = !(current.is_a?(StringNode) ||
current.is_a?(XStringNode) ||
current.is_a?(InterpolatedStringNode) ||
current.is_a?(InterpolatedXStringNode)) ||
!current.opening&.start_with?("<<")
current.child_nodes.each do |child|
# child_nodes can return nil values, so we need to skip those.
next unless child
# Now that we know we have a child node, add that to the queue.
queue << child
if compare
assert_operator current.location.start_offset, :<=, child.location.start_offset
assert_operator current.location.end_offset, :>=, child.location.end_offset
end
end
end
end
end
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class RedundantReturnTest < TestCase

Просмотреть файл

@ -0,0 +1,25 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class RegularExpressionOptionsTest < TestCase
def test_options
assert_equal "", Prism.parse_statement("__FILE__").filepath
assert_equal "foo.rb", Prism.parse_statement("__FILE__", filepath: "foo.rb").filepath
assert_equal 1, Prism.parse_statement("foo").location.start_line
assert_equal 10, Prism.parse_statement("foo", line: 10).location.start_line
refute Prism.parse_statement("\"foo\"").frozen?
assert Prism.parse_statement("\"foo\"", frozen_string_literal: true).frozen?
refute Prism.parse_statement("\"foo\"", frozen_string_literal: false).frozen?
assert_kind_of CallNode, Prism.parse_statement("foo")
assert_kind_of LocalVariableReadNode, Prism.parse_statement("foo", scopes: [[:foo]])
assert_equal 1, Prism.parse_statement("foo", scopes: [[:foo], []]).depth
assert_equal [:foo], Prism.parse("foo", scopes: [[:foo]]).value.locals
end
end
end

Просмотреть файл

@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class LocationTest < TestCase
class SourceLocationTest < TestCase
def test_AliasGlobalVariableNode
assert_location(AliasGlobalVariableNode, "alias $foo $bar")
end
@ -921,7 +921,7 @@ module Prism
def test_all_tested
expected = Prism.constants.grep(/.Node$/).sort - %i[MissingNode ProgramNode]
actual = LocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort
actual = SourceLocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort
assert_equal expected, actual
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class StaticInspectTest < TestCase

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class StaticLiteralsTest < TestCase

Просмотреть файл

@ -2,8 +2,7 @@
return if RUBY_VERSION < "3.1"
require_relative "test_helper"
require "stringio"
require_relative "../test_helper"
module Prism
class WarningsTest < TestCase

Просмотреть файл

@ -1,7 +1,7 @@
# frozen_string_literal: true
# typed: ignore
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class CompilerTest < TestCase

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class DesugarCompilerTest < TestCase

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class DispatcherTest < TestCase

Просмотреть файл

@ -0,0 +1,173 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class LocationTest < TestCase
def test_join
call = Prism.parse_statement("1234 + 567")
receiver = call.receiver
argument = call.arguments.arguments.first
joined = receiver.location.join(argument.location)
assert_equal 0, joined.start_offset
assert_equal 10, joined.length
assert_raise(RuntimeError, "Incompatible locations") do
argument.location.join(receiver.location)
end
other_argument = Prism.parse_statement("1234 + 567").arguments.arguments.first
assert_raise(RuntimeError, "Incompatible sources") do
other_argument.location.join(receiver.location)
end
assert_raise(RuntimeError, "Incompatible sources") do
receiver.location.join(other_argument.location)
end
end
def test_character_offsets
program = Prism.parse("😀 + 😀\n😍 ||= 😍").value
# first 😀
location = program.statements.body.first.receiver.location
assert_equal 0, location.start_character_offset
assert_equal 1, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column
# second 😀
location = program.statements.body.first.arguments.arguments.first.location
assert_equal 4, location.start_character_offset
assert_equal 5, location.end_character_offset
assert_equal 4, location.start_character_column
assert_equal 5, location.end_character_column
# first 😍
location = program.statements.body.last.name_loc
assert_equal 6, location.start_character_offset
assert_equal 7, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column
# second 😍
location = program.statements.body.last.value.location
assert_equal 12, location.start_character_offset
assert_equal 13, location.end_character_offset
assert_equal 6, location.start_character_column
assert_equal 7, location.end_character_column
end
def test_code_units
program = Prism.parse("😀 + 😀\n😍 ||= 😍").value
# first 😀
location = program.statements.body.first.receiver.location
assert_equal 0, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 0, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 0, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 1, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 2, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 1, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
# second 😀
location = program.statements.body.first.arguments.arguments.first.location
assert_equal 4, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 5, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 4, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 5, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 7, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 5, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 4, location.start_code_units_column(Encoding::UTF_8)
assert_equal 5, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 4, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 5, location.end_code_units_column(Encoding::UTF_8)
assert_equal 7, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 5, location.end_code_units_column(Encoding::UTF_32LE)
# first 😍
location = program.statements.body.last.name_loc
assert_equal 6, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 8, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 6, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 7, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 10, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 7, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
# second 😍
location = program.statements.body.last.value.location
assert_equal 12, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 15, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 12, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 13, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 17, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 13, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 6, location.start_code_units_column(Encoding::UTF_8)
assert_equal 7, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 6, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 7, location.end_code_units_column(Encoding::UTF_8)
assert_equal 9, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
end
def test_chop
location = Prism.parse("foo").value.location
assert_equal "fo", location.chop.slice
assert_equal "", location.chop.chop.chop.slice
# Check that we don't go negative.
10.times { location = location.chop }
assert_equal "", location.slice
end
def test_slice_lines
method = Prism.parse_statement("\nprivate def foo\nend\n").arguments.arguments.first
assert_equal "private def foo\nend\n", method.slice_lines
end
def test_adjoin
program = Prism.parse("foo.bar = 1").value
location = program.statements.body.first.message_loc
adjoined = location.adjoin("=")
assert_kind_of Location, adjoined
refute_equal location, adjoined
assert_equal 4, adjoined.start_offset
assert_equal 9, adjoined.end_offset
end
end
end

Просмотреть файл

@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative "test_helper"
return if RUBY_VERSION < "3.2"
require_relative "../test_helper"
module Prism
class ParametersSignatureTest < TestCase
def test_req
@ -56,7 +56,6 @@ module Prism
def test_key_ordering
omit("TruffleRuby returns keys in order they were declared") if RUBY_ENGINE == "truffleruby"
assert_parameters([[:keyreq, :a], [:keyreq, :b], [:key, :c], [:key, :d]], "a:, c: 1, b:, d: 2")
end
@ -75,14 +74,13 @@ module Prism
private
def assert_parameters(expected, source)
eval("def self.m(#{source}); end")
# Compare against our expectation.
assert_equal(expected, signature(source))
begin
assert_equal(expected, method(:m).parameters)
assert_equal(expected, signature(source))
ensure
singleton_class.undef_method(:m)
end
# Compare against Ruby's expectation.
object = Object.new
eval("def object.m(#{source}); end")
assert_equal(expected, object.method(:m).parameters)
end
def signature(source)

Просмотреть файл

@ -0,0 +1,288 @@
# frozen_string_literal: true
require_relative "../test_helper"
begin
verbose, $VERBOSE = $VERBOSE, nil
require "parser/ruby33"
require "prism/translation/parser33"
rescue LoadError
# In CRuby's CI, we're not going to test against the parser gem because we
# don't want to have to install it. So in this case we'll just skip this test.
return
ensure
$VERBOSE = verbose
end
# First, opt in to every AST feature.
Parser::Builders::Default.modernize
# Modify the source map == check so that it doesn't check against the node
# itself so we don't get into a recursive loop.
Parser::Source::Map.prepend(
Module.new {
def ==(other)
self.class == other.class &&
(instance_variables - %i[@node]).map do |ivar|
instance_variable_get(ivar) == other.instance_variable_get(ivar)
end.reduce(:&)
end
}
)
# Next, ensure that we're comparing the nodes and also comparing the source
# ranges so that we're getting all of the necessary information.
Parser::AST::Node.prepend(
Module.new {
def ==(other)
super && (location == other.location)
end
}
)
module Prism
class ParserTest < TestCase
# These files are erroring because of the parser gem being wrong.
skip_incorrect = [
"embdoc_no_newline_at_end.txt"
]
# These files are either failing to parse or failing to translate, so we'll
# skip them for now.
skip_all = skip_incorrect | [
"dash_heredocs.txt",
"dos_endings.txt",
"heredocs_with_ignored_newlines.txt",
"regex.txt",
"regex_char_width.txt",
"spanning_heredoc.txt",
"spanning_heredoc_newlines.txt",
"unescaping.txt",
"seattlerb/backticks_interpolation_line.txt",
"seattlerb/block_decomp_anon_splat_arg.txt",
"seattlerb/block_decomp_arg_splat_arg.txt",
"seattlerb/block_decomp_arg_splat.txt",
"seattlerb/block_decomp_splat.txt",
"seattlerb/block_paren_splat.txt",
"seattlerb/bug190.txt",
"seattlerb/case_in_hash_pat_rest_solo.txt",
"seattlerb/case_in_hash_pat_rest.txt",
"seattlerb/case_in.txt",
"seattlerb/heredoc_nested.txt",
"seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt",
"seattlerb/heredoc_with_carriage_return_escapes_windows.txt",
"seattlerb/heredoc_with_carriage_return_escapes.txt",
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
"seattlerb/heredoc_with_only_carriage_returns_windows.txt",
"seattlerb/heredoc_with_only_carriage_returns.txt",
"seattlerb/masgn_double_paren.txt",
"seattlerb/parse_line_heredoc_hardnewline.txt",
"seattlerb/parse_pattern_044.txt",
"seattlerb/parse_pattern_058_2.txt",
"seattlerb/parse_pattern_058.txt",
"seattlerb/pct_nl.txt",
"seattlerb/pctW_lineno.txt",
"seattlerb/regexp_esc_C_slash.txt",
"seattlerb/TestRubyParserShared.txt",
"unparser/corpus/literal/assignment.txt",
"unparser/corpus/literal/block.txt",
"unparser/corpus/literal/def.txt",
"unparser/corpus/literal/dstr.txt",
"unparser/corpus/literal/literal.txt",
"unparser/corpus/literal/pattern.txt",
"unparser/corpus/semantic/dstr.txt",
"unparser/corpus/semantic/opasgn.txt",
"whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
"whitequark/masgn_nested.txt",
"whitequark/newline_in_hash_argument.txt",
"whitequark/parser_bug_640.txt",
"whitequark/parser_slash_slash_n_escaping_in_literals.txt",
"whitequark/ruby_bug_11989.txt",
"whitequark/slash_newline_in_heredocs.txt",
"whitequark/unary_num_pow_precedence.txt"
]
# Not sure why these files are failing on JRuby, but skipping them for now.
if RUBY_ENGINE == "jruby"
skip_all.push("emoji_method_calls.txt", "symbols.txt")
end
# These files are failing to translate their lexer output into the lexer
# output expected by the parser gem, so we'll skip them for now.
skip_tokens = [
"comments.txt",
"heredoc_with_comment.txt",
"indented_file_end.txt",
"methods.txt",
"strings.txt",
"tilde_heredocs.txt",
"xstring_with_backslash.txt",
"seattlerb/bug169.txt",
"seattlerb/class_comments.txt",
"seattlerb/difficult4__leading_dots2.txt",
"seattlerb/difficult6__7.txt",
"seattlerb/difficult6__8.txt",
"seattlerb/dsym_esc_to_sym.txt",
"seattlerb/heredoc__backslash_dos_format.txt",
"seattlerb/heredoc_backslash_nl.txt",
"seattlerb/heredoc_comma_arg.txt",
"seattlerb/heredoc_squiggly_blank_lines.txt",
"seattlerb/heredoc_squiggly_interp.txt",
"seattlerb/heredoc_squiggly_tabs_extra.txt",
"seattlerb/heredoc_squiggly_tabs.txt",
"seattlerb/heredoc_squiggly_visually_blank_lines.txt",
"seattlerb/heredoc_squiggly.txt",
"seattlerb/heredoc_unicode.txt",
"seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt",
"seattlerb/heredoc_with_interpolation_and_carriage_return_escapes.txt",
"seattlerb/interpolated_symbol_array_line_breaks.txt",
"seattlerb/interpolated_word_array_line_breaks.txt",
"seattlerb/label_vs_string.txt",
"seattlerb/module_comments.txt",
"seattlerb/non_interpolated_symbol_array_line_breaks.txt",
"seattlerb/non_interpolated_word_array_line_breaks.txt",
"seattlerb/parse_line_block_inline_comment_leading_newlines.txt",
"seattlerb/parse_line_block_inline_comment.txt",
"seattlerb/parse_line_block_inline_multiline_comment.txt",
"seattlerb/parse_line_dstr_escaped_newline.txt",
"seattlerb/parse_line_heredoc.txt",
"seattlerb/parse_line_multiline_str_literal_n.txt",
"seattlerb/parse_line_str_with_newline_escape.txt",
"seattlerb/pct_Q_backslash_nl.txt",
"seattlerb/pct_w_heredoc_interp_nested.txt",
"seattlerb/qsymbols_empty_space.txt",
"seattlerb/qw_escape_term.txt",
"seattlerb/qWords_space.txt",
"seattlerb/read_escape_unicode_curlies.txt",
"seattlerb/read_escape_unicode_h4.txt",
"seattlerb/required_kwarg_no_value.txt",
"seattlerb/slashy_newlines_within_string.txt",
"seattlerb/str_double_escaped_newline.txt",
"seattlerb/str_double_newline.txt",
"seattlerb/str_evstr_escape.txt",
"seattlerb/str_newline_hash_line_number.txt",
"seattlerb/str_single_newline.txt",
"seattlerb/symbol_empty.txt",
"seattlerb/symbols_empty_space.txt",
"whitequark/args.txt",
"whitequark/beginless_erange_after_newline.txt",
"whitequark/beginless_irange_after_newline.txt",
"whitequark/bug_ascii_8bit_in_literal.txt",
"whitequark/bug_def_no_paren_eql_begin.txt",
"whitequark/dedenting_heredoc.txt",
"whitequark/dedenting_non_interpolating_heredoc_line_continuation.txt",
"whitequark/forward_arg_with_open_args.txt",
"whitequark/interp_digit_var.txt",
"whitequark/lbrace_arg_after_command_args.txt",
"whitequark/multiple_pattern_matches.txt",
"whitequark/parser_drops_truncated_parts_of_squiggly_heredoc.txt",
"whitequark/ruby_bug_11990.txt",
"whitequark/ruby_bug_14690.txt",
"whitequark/ruby_bug_9669.txt",
"whitequark/space_args_arg_block.txt",
"whitequark/space_args_block.txt"
]
Fixture.each(except: skip_all) do |fixture|
define_method(fixture.test_name) do
assert_equal_parses(fixture, compare_tokens: !skip_tokens.include?(fixture.path))
end
end
private
def assert_equal_parses(fixture, compare_tokens: true)
buffer = Parser::Source::Buffer.new(fixture.path, 1)
buffer.source = fixture.read
parser = Parser::Ruby33.new
parser.diagnostics.consumer = ->(*) {}
parser.diagnostics.all_errors_are_fatal = true
expected_ast, expected_comments, expected_tokens =
begin
ignore_warnings { parser.tokenize(buffer) }
rescue ArgumentError, Parser::SyntaxError
return
end
actual_ast, actual_comments, actual_tokens =
ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) }
assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens
assert_equal_comments(expected_comments, actual_comments)
end
def assert_equal_asts_message(expected_ast, actual_ast)
queue = [[expected_ast, actual_ast]]
while (left, right = queue.shift)
if left.type != right.type
return "expected: #{left.type}\nactual: #{right.type}"
end
if left.location != right.location
return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
end
if left.type == :str && left.children[0] != right.children[0]
return "expected: #{left.inspect}\nactual: #{right.inspect}"
end
left.children.zip(right.children).each do |left_child, right_child|
queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
end
end
"expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
end
def assert_equal_tokens(expected_tokens, actual_tokens)
if expected_tokens != actual_tokens
expected_index = 0
actual_index = 0
while expected_index < expected_tokens.length
expected_token = expected_tokens[expected_index]
actual_token = actual_tokens[actual_index]
expected_index += 1
actual_index += 1
# The parser gem always has a space before a string end in list
# literals, but we don't. So we'll skip over the space.
if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
expected_index += 1
next
end
# There are a lot of tokens that have very specific meaning according
# to the context of the parser. We don't expose that information in
# prism, so we need to normalize these tokens a bit.
case actual_token[0]
when :kDO
actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0])
when :tLPAREN
actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2
when :tPOW
actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR
end
# Now we can assert that the tokens are actually equal.
assert_equal expected_token, actual_token, -> {
"expected: #{expected_token.inspect}\n" \
"actual: #{actual_token.inspect}"
}
end
end
end
def assert_equal_comments(expected_comments, actual_comments)
assert_equal expected_comments, actual_comments, -> {
"expected: #{expected_comments.inspect}\n" \
"actual: #{actual_comments.inspect}"
}
end
end
end

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class PatternTest < TestCase

Просмотреть файл

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class ReflectionTest < TestCase

Просмотреть файл

@ -2,13 +2,11 @@
return if RUBY_VERSION < "3.3"
require_relative "test_helper"
require_relative "../test_helper"
module Prism
class RipperTest < TestCase
base = File.join(__dir__, "fixtures")
relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
# Skip these tests that Ripper is reporting the wrong results for.
incorrect = [
# Ripper incorrectly attributes the block to the keyword.
"seattlerb/block_break.txt",
@ -31,6 +29,7 @@ module Prism
"spanning_heredoc.txt"
]
# Skip these tests that we haven't implemented yet.
omitted = [
"dos_endings.txt",
"heredocs_with_ignored_newlines.txt",
@ -50,30 +49,8 @@ module Prism
"whitequark/slash_newline_in_heredocs.txt"
]
relatives.each do |relative|
# Skip the tests that Ripper is reporting the wrong results for.
next if incorrect.include?(relative)
# Skip the tests we haven't implemented yet.
next if omitted.include?(relative)
filepath = File.join(__dir__, "fixtures", relative)
define_method "test_ripper_#{relative}" do
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
case relative
when /break|next|redo|if|unless|rescue|control|keywords|retry/
source = "-> do\nrescue\n#{source}\nend"
end
case source
when /^ *yield/
source = "def __invalid_yield__\n#{source}\nend"
end
assert_ripper(source)
end
Fixture.each(except: incorrect | omitted) do |fixture|
define_method(fixture.test_name) { assert_ripper(fixture.read) }
end
private

Просмотреть файл

@ -0,0 +1,127 @@
# frozen_string_literal: true
return if RUBY_ENGINE == "jruby"
require_relative "../test_helper"
begin
require "ruby_parser"
rescue LoadError
# In CRuby's CI, we're not going to test against the ruby_parser gem because
# we don't want to have to install it. So in this case we'll just skip this
# test.
return
end
# We want to also compare lines and files to make sure we're setting them
# correctly.
Sexp.prepend(
Module.new do
def ==(other)
super && line == other.line && file == other.file # && line_max == other.line_max
end
end
)
module Prism
class RubyParserTest < TestCase
todos = [
"newline_terminated.txt",
"regex_char_width.txt",
"seattlerb/bug169.txt",
"seattlerb/masgn_colon3.txt",
"seattlerb/messy_op_asgn_lineno.txt",
"seattlerb/op_asgn_primary_colon_const_command_call.txt",
"seattlerb/regexp_esc_C_slash.txt",
"seattlerb/str_lit_concat_bad_encodings.txt",
"unescaping.txt",
"unparser/corpus/literal/kwbegin.txt",
"unparser/corpus/literal/send.txt",
"whitequark/masgn_const.txt",
"whitequark/ruby_bug_12402.txt",
"whitequark/ruby_bug_14690.txt",
"whitequark/space_args_block.txt"
]
# https://github.com/seattlerb/ruby_parser/issues/344
failures = [
"alias.txt",
"dos_endings.txt",
"heredocs_with_ignored_newlines.txt",
"method_calls.txt",
"methods.txt",
"multi_write.txt",
"not.txt",
"patterns.txt",
"regex.txt",
"seattlerb/and_multi.txt",
"seattlerb/heredoc__backslash_dos_format.txt",
"seattlerb/heredoc_bad_hex_escape.txt",
"seattlerb/heredoc_bad_oct_escape.txt",
"seattlerb/heredoc_with_extra_carriage_horrible_mix.txt",
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
"seattlerb/heredoc_with_only_carriage_returns_windows.txt",
"seattlerb/heredoc_with_only_carriage_returns.txt",
"spanning_heredoc_newlines.txt",
"spanning_heredoc.txt",
"tilde_heredocs.txt",
"unparser/corpus/literal/literal.txt",
"while.txt",
"whitequark/cond_eflipflop.txt",
"whitequark/cond_iflipflop.txt",
"whitequark/cond_match_current_line.txt",
"whitequark/dedenting_heredoc.txt",
"whitequark/lvar_injecting_match.txt",
"whitequark/not.txt",
"whitequark/numparam_ruby_bug_19025.txt",
"whitequark/op_asgn_cmd.txt",
"whitequark/parser_bug_640.txt",
"whitequark/parser_slash_slash_n_escaping_in_literals.txt",
"whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt",
"whitequark/pattern_matching_single_line.txt",
"whitequark/ruby_bug_11989.txt",
"whitequark/slash_newline_in_heredocs.txt"
]
Fixture.each(except: failures) do |fixture|
define_method(fixture.test_name) do
assert_ruby_parser(fixture, todos.include?(fixture.path))
end
end
private
def assert_ruby_parser(fixture, allowed_failure)
source = fixture.read
expected = ignore_warnings { ::RubyParser.new.parse(source, fixture.path) }
actual = Prism::Translation::RubyParser.new.parse(source, fixture.path)
if !allowed_failure
assert_equal(expected, actual, -> { message(expected, actual) })
elsif expected == actual
puts "#{name} now passes"
end
end
def message(expected, actual)
if expected == actual
nil
elsif expected.is_a?(Sexp) && actual.is_a?(Sexp)
if expected.line != actual.line
"expected: (#{expected.inspect} line=#{expected.line}), actual: (#{actual.inspect} line=#{actual.line})"
elsif expected.file != actual.file
"expected: (#{expected.inspect} file=#{expected.file}), actual: (#{actual.inspect} file=#{actual.file})"
elsif expected.length != actual.length
"expected: (#{expected.inspect} length=#{expected.length}), actual: (#{actual.inspect} length=#{actual.length})"
else
expected.zip(actual).find do |expected_field, actual_field|
result = message(expected_field, actual_field)
break result if result
end
end
else
"expected: #{expected.inspect}, actual: #{actual.inspect}"
end
end
end
end

Просмотреть файл

@ -0,0 +1,26 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class TunnelTest < TestCase
def test_tunnel
program = Prism.parse("foo(1) +\n bar(2, 3) +\n baz(3, 4, 5)").value
tunnel = program.tunnel(1, 4).last
assert_kind_of IntegerNode, tunnel
assert_equal 1, tunnel.value
tunnel = program.tunnel(2, 6).last
assert_kind_of IntegerNode, tunnel
assert_equal 2, tunnel.value
tunnel = program.tunnel(3, 9).last
assert_kind_of IntegerNode, tunnel
assert_equal 4, tunnel.value
tunnel = program.tunnel(3, 8)
assert_equal [ProgramNode, StatementsNode, CallNode, ArgumentsNode, CallNode, ArgumentsNode], tunnel.map(&:class)
end
end
end

Просмотреть файл

@ -1,307 +0,0 @@
# frozen_string_literal: true
require_relative "test_helper"
module Prism
class RubyAPITest < TestCase
if !ENV["PRISM_BUILD_MINIMAL"]
def test_ruby_api
filepath = __FILE__
source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value
assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath)
serialized = Prism.dump(source, filepath: filepath)
ast1 = Prism.load(source, serialized).value
ast2 = Prism.parse(source, filepath: filepath).value
ast3 = Prism.parse_file(filepath).value
assert_equal_nodes ast1, ast2
assert_equal_nodes ast2, ast3
end
end
def test_parse_success?
assert Prism.parse_success?("1")
refute Prism.parse_success?("<>")
end
def test_parse_file_success?
assert Prism.parse_file_success?(__FILE__)
end
def test_options
assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath
assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath
assert_equal 1, Prism.parse("foo").value.statements.body[0].location.start_line
assert_equal 10, Prism.parse("foo", line: 10).value.statements.body[0].location.start_line
refute Prism.parse("\"foo\"").value.statements.body[0].frozen?
assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen?
refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen?
assert_kind_of Prism::CallNode, Prism.parse("foo").value.statements.body[0]
assert_kind_of Prism::LocalVariableReadNode, Prism.parse("foo", scopes: [[:foo]]).value.statements.body[0]
assert_equal 1, Prism.parse("foo", scopes: [[:foo], []]).value.statements.body[0].depth
assert_equal [:foo], Prism.parse("foo", scopes: [[:foo]]).value.locals
end
def test_literal_value_method
assert_equal 123, parse_expression("123").value
assert_equal 3.14, parse_expression("3.14").value
assert_equal 42i, parse_expression("42i").value
assert_equal 42.1ri, parse_expression("42.1ri").value
assert_equal 3.14i, parse_expression("3.14i").value
assert_equal 42r, parse_expression("42r").value
assert_equal 0.5r, parse_expression("0.5r").value
assert_equal 42ri, parse_expression("42ri").value
assert_equal 0.5ri, parse_expression("0.5ri").value
assert_equal 0xFFr, parse_expression("0xFFr").value
assert_equal 0xFFri, parse_expression("0xFFri").value
end
def test_location_join
recv, args_node, _ = parse_expression("1234 + 567").child_nodes
arg = args_node.arguments[0]
joined = recv.location.join(arg.location)
assert_equal 0, joined.start_offset
assert_equal 10, joined.length
assert_raise RuntimeError, "Incompatible locations" do
arg.location.join(recv.location)
end
other_arg = parse_expression("1234 + 567").arguments.arguments[0]
assert_raise RuntimeError, "Incompatible sources" do
other_arg.location.join(recv.location)
end
assert_raise RuntimeError, "Incompatible sources" do
recv.location.join(other_arg.location)
end
end
def test_location_character_offsets
program = Prism.parse("😀 + 😀\n😍 ||= 😍").value
# first 😀
location = program.statements.body.first.receiver.location
assert_equal 0, location.start_character_offset
assert_equal 1, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column
# second 😀
location = program.statements.body.first.arguments.arguments.first.location
assert_equal 4, location.start_character_offset
assert_equal 5, location.end_character_offset
assert_equal 4, location.start_character_column
assert_equal 5, location.end_character_column
# first 😍
location = program.statements.body.last.name_loc
assert_equal 6, location.start_character_offset
assert_equal 7, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column
# second 😍
location = program.statements.body.last.value.location
assert_equal 12, location.start_character_offset
assert_equal 13, location.end_character_offset
assert_equal 6, location.start_character_column
assert_equal 7, location.end_character_column
end
def test_location_code_units
program = Prism.parse("😀 + 😀\n😍 ||= 😍").value
# first 😀
location = program.statements.body.first.receiver.location
assert_equal 0, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 0, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 0, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 1, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 2, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 1, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
# second 😀
location = program.statements.body.first.arguments.arguments.first.location
assert_equal 4, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 5, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 4, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 5, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 7, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 5, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 4, location.start_code_units_column(Encoding::UTF_8)
assert_equal 5, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 4, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 5, location.end_code_units_column(Encoding::UTF_8)
assert_equal 7, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 5, location.end_code_units_column(Encoding::UTF_32LE)
# first 😍
location = program.statements.body.last.name_loc
assert_equal 6, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 8, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 6, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 7, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 10, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 7, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
# second 😍
location = program.statements.body.last.value.location
assert_equal 12, location.start_code_units_offset(Encoding::UTF_8)
assert_equal 15, location.start_code_units_offset(Encoding::UTF_16LE)
assert_equal 12, location.start_code_units_offset(Encoding::UTF_32LE)
assert_equal 13, location.end_code_units_offset(Encoding::UTF_8)
assert_equal 17, location.end_code_units_offset(Encoding::UTF_16LE)
assert_equal 13, location.end_code_units_offset(Encoding::UTF_32LE)
assert_equal 6, location.start_code_units_column(Encoding::UTF_8)
assert_equal 7, location.start_code_units_column(Encoding::UTF_16LE)
assert_equal 6, location.start_code_units_column(Encoding::UTF_32LE)
assert_equal 7, location.end_code_units_column(Encoding::UTF_8)
assert_equal 9, location.end_code_units_column(Encoding::UTF_16LE)
assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
end
def test_location_chop
location = Prism.parse("foo").value.location
assert_equal "fo", location.chop.slice
assert_equal "", location.chop.chop.chop.slice
# Check that we don't go negative.
10.times { location = location.chop }
assert_equal "", location.slice
end
def test_location_slice_lines
result = Prism.parse("\nprivate def foo\nend\n")
method = result.value.statements.body.first.arguments.arguments.first
assert_equal "private def foo\nend\n", method.slice_lines
end
def test_heredoc?
refute parse_expression("\"foo\"").heredoc?
refute parse_expression("\"foo \#{1}\"").heredoc?
refute parse_expression("`foo`").heredoc?
refute parse_expression("`foo \#{1}`").heredoc?
assert parse_expression("<<~HERE\nfoo\nHERE\n").heredoc?
assert parse_expression("<<~HERE\nfoo \#{1}\nHERE\n").heredoc?
assert parse_expression("<<~`HERE`\nfoo\nHERE\n").heredoc?
assert parse_expression("<<~`HERE`\nfoo \#{1}\nHERE\n").heredoc?
end
# Through some bit hackery, we want to allow consumers to use the integer
# base flags as the base itself. It has a nice property that the current
# alignment provides them in the correct order. So here we test that our
# assumption holds so that it doesn't change out from under us.
#
# In C, this would look something like:
#
# ((flags & ~DECIMAL) << 1) || 10
#
# We have to do some other work in Ruby because 0 is truthy and ~ on an
# integer doesn't have a fixed width.
def test_integer_base_flags
base = -> (node) do
value = (node.send(:flags) & (0b1111 - IntegerBaseFlags::DECIMAL)) << 1
value == 0 ? 10 : value
end
assert_equal 2, base[parse_expression("0b1")]
assert_equal 8, base[parse_expression("0o1")]
assert_equal 10, base[parse_expression("0d1")]
assert_equal 16, base[parse_expression("0x1")]
end
def test_node_equality
assert_operator parse_expression("1"), :===, parse_expression("1")
assert_operator Prism.parse("1").value, :===, Prism.parse("1").value
complex_source = "class Something; @var = something.else { _1 }; end"
assert_operator parse_expression(complex_source), :===, parse_expression(complex_source)
refute_operator parse_expression("1"), :===, parse_expression("2")
refute_operator parse_expression("1"), :===, parse_expression("0x1")
complex_source_1 = "class Something; @var = something.else { _1 }; end"
complex_source_2 = "class Something; @var = something.else { _2 }; end"
refute_operator parse_expression(complex_source_1), :===, parse_expression(complex_source_2)
end
def test_node_tunnel
program = Prism.parse("foo(1) +\n bar(2, 3) +\n baz(3, 4, 5)").value
tunnel = program.tunnel(1, 4).last
assert_kind_of IntegerNode, tunnel
assert_equal 1, tunnel.value
tunnel = program.tunnel(2, 6).last
assert_kind_of IntegerNode, tunnel
assert_equal 2, tunnel.value
tunnel = program.tunnel(3, 9).last
assert_kind_of IntegerNode, tunnel
assert_equal 4, tunnel.value
tunnel = program.tunnel(3, 8)
assert_equal [ProgramNode, StatementsNode, CallNode, ArgumentsNode, CallNode, ArgumentsNode], tunnel.map(&:class)
end
def test_location_adjoin
program = Prism.parse("foo.bar = 1").value
location = program.statements.body.first.message_loc
adjoined = location.adjoin("=")
assert_kind_of Location, adjoined
refute_equal location, adjoined
assert_equal 4, adjoined.start_offset
assert_equal 9, adjoined.end_offset
end
private
def parse_expression(source)
Prism.parse(source).value.statements.body.first
end
end
end

Просмотреть файл

@ -1,135 +0,0 @@
# frozen_string_literal: true
return if RUBY_ENGINE == "jruby"
require_relative "test_helper"
begin
require "ruby_parser"
rescue LoadError
# In CRuby's CI, we're not going to test against the ruby_parser gem because
# we don't want to have to install it. So in this case we'll just skip this
# test.
return
end
# We want to also compare lines and files to make sure we're setting them
# correctly.
Sexp.prepend(
Module.new do
def ==(other)
super && line == other.line && line_max == other.line_max && file == other.file
end
end
)
module Prism
class RubyParserTest < TestCase
base = File.join(__dir__, "fixtures")
todos = %w[
newline_terminated.txt
regex_char_width.txt
seattlerb/bug169.txt
seattlerb/masgn_colon3.txt
seattlerb/messy_op_asgn_lineno.txt
seattlerb/op_asgn_primary_colon_const_command_call.txt
seattlerb/regexp_esc_C_slash.txt
seattlerb/str_lit_concat_bad_encodings.txt
unescaping.txt
unparser/corpus/literal/kwbegin.txt
unparser/corpus/literal/send.txt
whitequark/masgn_const.txt
whitequark/ruby_bug_12402.txt
whitequark/ruby_bug_14690.txt
whitequark/space_args_block.txt
]
# https://github.com/seattlerb/ruby_parser/issues/344
failures = %w[
alias.txt
dos_endings.txt
heredocs_with_ignored_newlines.txt
method_calls.txt
methods.txt
multi_write.txt
not.txt
patterns.txt
regex.txt
seattlerb/and_multi.txt
seattlerb/heredoc__backslash_dos_format.txt
seattlerb/heredoc_bad_hex_escape.txt
seattlerb/heredoc_bad_oct_escape.txt
seattlerb/heredoc_with_extra_carriage_horrible_mix.txt
seattlerb/heredoc_with_extra_carriage_returns_windows.txt
seattlerb/heredoc_with_only_carriage_returns_windows.txt
seattlerb/heredoc_with_only_carriage_returns.txt
spanning_heredoc_newlines.txt
spanning_heredoc.txt
tilde_heredocs.txt
unparser/corpus/literal/literal.txt
while.txt
whitequark/cond_eflipflop.txt
whitequark/cond_iflipflop.txt
whitequark/cond_match_current_line.txt
whitequark/dedenting_heredoc.txt
whitequark/lvar_injecting_match.txt
whitequark/not.txt
whitequark/numparam_ruby_bug_19025.txt
whitequark/op_asgn_cmd.txt
whitequark/parser_bug_640.txt
whitequark/parser_slash_slash_n_escaping_in_literals.txt
whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt
whitequark/pattern_matching_single_line.txt
whitequark/ruby_bug_11989.txt
whitequark/slash_newline_in_heredocs.txt
]
Dir["**/*.txt", base: base].each do |name|
next if failures.include?(name)
define_method("test_#{name}") do
begin
# Parsing with ruby parser tends to be noisy with warnings, so we're
# turning those off.
previous_verbose, $VERBOSE = $VERBOSE, nil
assert_parse_file(base, name, todos.include?(name))
ensure
$VERBOSE = previous_verbose
end
end
end
private
def assert_parse_file(base, name, allowed_failure)
filepath = File.join(base, name)
expected = ::RubyParser.new.parse(File.read(filepath), filepath)
actual = Prism::Translation::RubyParser.parse_file(filepath)
if !allowed_failure
assert_equal_nodes expected, actual
elsif expected == actual
puts "#{name} now passes"
end
end
def assert_equal_nodes(left, right)
return if left == right
if left.is_a?(Sexp) && right.is_a?(Sexp)
if left.line != right.line
assert_equal "(#{left.inspect} line=#{left.line})", "(#{right.inspect} line=#{right.line})"
elsif left.file != right.file
assert_equal "(#{left.inspect} file=#{left.file})", "(#{right.inspect} file=#{right.file})"
elsif left.length != right.length
assert_equal "(#{left.inspect} length=#{left.length})", "(#{right.inspect} length=#{right.length})"
else
left.zip(right).each { |l, r| assert_equal_nodes(l, r) }
end
else
assert_equal left, right
end
end
end
end

Просмотреть файл

@ -0,0 +1,73 @@
# frozen_string_literal: true
require_relative "test_helper"
module Prism
class SnapshotsTest < TestCase
# When we pretty-print the trees to compare against the snapshots, we want
# to be certain that we print with the same external encoding. This is
# because methods like Symbol#inspect take into account external encoding
# and it could change how the snapshot is generated. On machines with
# certain settings (like LANG=C or -Eascii-8bit) this could have been
# changed. So here we're going to force it to be UTF-8 to keep the snapshots
# consistent.
def setup
@previous_default_external = Encoding.default_external
ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
end
def teardown
ignore_warnings { Encoding.default_external = @previous_default_external }
end
except = []
# These fail on TruffleRuby due to a difference in Symbol#inspect:
# :测试 vs :"测试"
if RUBY_ENGINE == "truffleruby"
except.push(
"emoji_method_calls.txt",
"seattlerb/bug202.txt",
"seattlerb/magic_encoding_comment.txt"
)
end
Fixture.each(except: except) do |fixture|
define_method(fixture.test_name) { assert_snapshot(fixture) }
end
private
def assert_snapshot(fixture)
source = fixture.read
result = Prism.parse(source, filepath: fixture.path)
assert result.success?
printed = PP.pp(result.value, +"", 79)
snapshot = fixture.snapshot_path
if File.exist?(snapshot)
saved = File.read(snapshot)
# If the snapshot file exists, but the printed value does not match the
# snapshot, then update the snapshot file.
if printed != saved
File.write(snapshot, printed)
warn("Updated snapshot at #{snapshot}.")
end
# If the snapshot file exists, then assert that the printed value
# matches the snapshot.
assert_equal(saved, printed)
else
# If the snapshot file does not yet exist, then write it out now.
directory = File.dirname(snapshot)
FileUtils.mkdir_p(directory) unless File.directory?(directory)
File.write(snapshot, printed)
warn("Created snapshot at #{snapshot}.")
end
end
end
end

Просмотреть файл

@ -0,0 +1,42 @@
# frozen_string_literal: true
require_relative "test_helper"
module Prism
class SnippetsTest < TestCase
except = [
"newline_terminated.txt",
"seattlerb/begin_rescue_else_ensure_no_bodies.txt",
"seattlerb/case_in.txt",
"seattlerb/parse_line_defn_no_parens.txt",
"seattlerb/pct_nl.txt",
"seattlerb/str_heredoc_interp.txt",
"spanning_heredoc_newlines.txt",
"unparser/corpus/semantic/dstr.txt",
"whitequark/dedenting_heredoc.txt",
"whitequark/multiple_pattern_matches.txt"
]
Fixture.each(except: except) do |fixture|
define_method(fixture.test_name) { assert_snippets(fixture) }
end
private
# We test every snippet (separated by \n\n) in isolation to ensure the
# parser does not try to read bytes further than the end of each snippet.
def assert_snippets(fixture)
fixture.read.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
snippet = snippet.rstrip
result = Prism.parse(snippet, filepath: fixture.path)
assert result.success?
if !ENV["PRISM_BUILD_MINIMAL"]
dumped = Prism.dump(snippet, filepath: fixture.path)
assert_equal_nodes(result.value, Prism.load(snippet, dumped).value)
end
end
end
end
end

Просмотреть файл

@ -1,8 +1,9 @@
# frozen_string_literal: true
require "prism"
require "ripper"
require "pp"
require "ripper"
require "stringio"
require "test/unit"
require "tempfile"
@ -16,19 +17,202 @@ if defined?(Test::Unit::Assertions::AssertionMessage)
end
module Prism
# A convenience method for retrieving the first statement in the source string
# parsed by Prism.
def self.parse_statement(source, **options)
parse(source, **options).value.statements.body.first
end
class ParseResult < Result
# Returns the first statement in the body of the parsed source.
def statement
value.statements.body.first
end
end
class TestCase < ::Test::Unit::TestCase
# We have a set of fixtures that we use to test various aspects of the
# parser. They are all represented as .txt files under the
# test/prism/fixtures directory. Typically in test files you will find calls
# to Fixture.each which yields Fixture objects to the given block. These
# are used to define test methods that assert against each fixture in some
# way.
class Fixture
BASE = File.join(__dir__, "fixtures")
attr_reader :path
def initialize(path)
@path = path
end
def read
File.read(full_path, binmode: true, external_encoding: Encoding::UTF_8)
end
def full_path
File.join(BASE, path)
end
def snapshot_path
File.join(__dir__, "snapshots", path)
end
def test_name
:"test_#{path}"
end
def self.each(except: [], &block)
paths = Dir[ENV.fetch("FOCUS") { File.join("**", "*.txt") }, base: BASE] - except
paths.each { |path| yield Fixture.new(path) }
end
end
# Yield each encoding that we want to test, along with a range of the
# codepoints that should be tested.
def self.each_encoding
codepoints_1byte = 0...0x100
yield Encoding::ASCII_8BIT, codepoints_1byte
yield Encoding::US_ASCII, codepoints_1byte
if !ENV["PRISM_BUILD_MINIMAL"]
yield Encoding::Windows_1253, codepoints_1byte
end
# By default we don't test every codepoint in these encodings because it
# takes a very long time.
return unless ENV["PRISM_TEST_ALL_ENCODINGS"]
yield Encoding::CP850, codepoints_1byte
yield Encoding::CP852, codepoints_1byte
yield Encoding::CP855, codepoints_1byte
yield Encoding::GB1988, codepoints_1byte
yield Encoding::IBM437, codepoints_1byte
yield Encoding::IBM720, codepoints_1byte
yield Encoding::IBM737, codepoints_1byte
yield Encoding::IBM775, codepoints_1byte
yield Encoding::IBM852, codepoints_1byte
yield Encoding::IBM855, codepoints_1byte
yield Encoding::IBM857, codepoints_1byte
yield Encoding::IBM860, codepoints_1byte
yield Encoding::IBM861, codepoints_1byte
yield Encoding::IBM862, codepoints_1byte
yield Encoding::IBM863, codepoints_1byte
yield Encoding::IBM864, codepoints_1byte
yield Encoding::IBM865, codepoints_1byte
yield Encoding::IBM866, codepoints_1byte
yield Encoding::IBM869, codepoints_1byte
yield Encoding::ISO_8859_1, codepoints_1byte
yield Encoding::ISO_8859_2, codepoints_1byte
yield Encoding::ISO_8859_3, codepoints_1byte
yield Encoding::ISO_8859_4, codepoints_1byte
yield Encoding::ISO_8859_5, codepoints_1byte
yield Encoding::ISO_8859_6, codepoints_1byte
yield Encoding::ISO_8859_7, codepoints_1byte
yield Encoding::ISO_8859_8, codepoints_1byte
yield Encoding::ISO_8859_9, codepoints_1byte
yield Encoding::ISO_8859_10, codepoints_1byte
yield Encoding::ISO_8859_11, codepoints_1byte
yield Encoding::ISO_8859_13, codepoints_1byte
yield Encoding::ISO_8859_14, codepoints_1byte
yield Encoding::ISO_8859_15, codepoints_1byte
yield Encoding::ISO_8859_16, codepoints_1byte
yield Encoding::KOI8_R, codepoints_1byte
yield Encoding::KOI8_U, codepoints_1byte
yield Encoding::MACCENTEURO, codepoints_1byte
yield Encoding::MACCROATIAN, codepoints_1byte
yield Encoding::MACCYRILLIC, codepoints_1byte
yield Encoding::MACGREEK, codepoints_1byte
yield Encoding::MACICELAND, codepoints_1byte
yield Encoding::MACROMAN, codepoints_1byte
yield Encoding::MACROMANIA, codepoints_1byte
yield Encoding::MACTHAI, codepoints_1byte
yield Encoding::MACTURKISH, codepoints_1byte
yield Encoding::MACUKRAINE, codepoints_1byte
yield Encoding::TIS_620, codepoints_1byte
yield Encoding::Windows_1250, codepoints_1byte
yield Encoding::Windows_1251, codepoints_1byte
yield Encoding::Windows_1252, codepoints_1byte
yield Encoding::Windows_1254, codepoints_1byte
yield Encoding::Windows_1255, codepoints_1byte
yield Encoding::Windows_1256, codepoints_1byte
yield Encoding::Windows_1257, codepoints_1byte
yield Encoding::Windows_1258, codepoints_1byte
yield Encoding::Windows_874, codepoints_1byte
codepoints_2bytes = 0...0x10000
yield Encoding::Big5, codepoints_2bytes
yield Encoding::Big5_HKSCS, codepoints_2bytes
yield Encoding::Big5_UAO, codepoints_2bytes
yield Encoding::CP949, codepoints_2bytes
yield Encoding::CP950, codepoints_2bytes
yield Encoding::CP951, codepoints_2bytes
yield Encoding::EUC_KR, codepoints_2bytes
yield Encoding::GBK, codepoints_2bytes
yield Encoding::GB12345, codepoints_2bytes
yield Encoding::GB2312, codepoints_2bytes
yield Encoding::MACJAPANESE, codepoints_2bytes
yield Encoding::Shift_JIS, codepoints_2bytes
yield Encoding::SJIS_DoCoMo, codepoints_2bytes
yield Encoding::SJIS_KDDI, codepoints_2bytes
yield Encoding::SJIS_SoftBank, codepoints_2bytes
yield Encoding::Windows_31J, codepoints_2bytes
codepoints_unicode = (0...0x110000)
yield Encoding::UTF_8, codepoints_unicode
yield Encoding::UTF8_MAC, codepoints_unicode
yield Encoding::UTF8_DoCoMo, codepoints_unicode
yield Encoding::UTF8_KDDI, codepoints_unicode
yield Encoding::UTF8_SoftBank, codepoints_unicode
yield Encoding::CESU_8, codepoints_unicode
codepoints_eucjp = [
*(0...0x10000),
*(0...0x10000).map { |bytes| bytes | 0x8F0000 }
]
yield Encoding::CP51932, codepoints_eucjp
yield Encoding::EUC_JP, codepoints_eucjp
yield Encoding::EUCJP_MS, codepoints_eucjp
yield Encoding::EUC_JIS_2004, codepoints_eucjp
codepoints_emacs_mule = [
*(0...0x80),
*((0x81...0x90).flat_map { |byte1| (0x90...0x100).map { |byte2| byte1 << 8 | byte2 } }),
*((0x90...0x9C).flat_map { |byte1| (0xA0...0x100).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| byte1 << 16 | byte2 << 8 | byte3 } } }),
*((0xF0...0xF5).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| (0xA0...0x100).flat_map { |byte4| 0x9C << 24 | byte3 << 16 | byte3 << 8 | byte4 } } }),
]
yield Encoding::EMACS_MULE, codepoints_emacs_mule
yield Encoding::STATELESS_ISO_2022_JP, codepoints_emacs_mule
yield Encoding::STATELESS_ISO_2022_JP_KDDI, codepoints_emacs_mule
codepoints_gb18030 = [
*(0...0x80),
*((0x81..0xFE).flat_map { |byte1| (0x40...0x100).map { |byte2| byte1 << 8 | byte2 } }),
*((0x81..0xFE).flat_map { |byte1| (0x30...0x40).flat_map { |byte2| (0x81..0xFE).flat_map { |byte3| (0x2F...0x41).map { |byte4| byte1 << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } }),
]
yield Encoding::GB18030, codepoints_gb18030
codepoints_euc_tw = [
*(0..0x7F),
*(0xA1..0xFF).flat_map { |byte1| (0xA1..0xFF).map { |byte2| (byte1 << 8) | byte2 } },
*(0xA1..0xB0).flat_map { |byte2| (0xA1..0xFF).flat_map { |byte3| (0xA1..0xFF).flat_map { |byte4| 0x8E << 24 | byte2 << 16 | byte3 << 8 | byte4 } } }
]
yield Encoding::EUC_TW, codepoints_euc_tw
end
private
if RUBY_ENGINE == "ruby"
# Check that the given source is valid syntax by compiling it with RubyVM.
def check_syntax(source)
$VERBOSE, previous = nil, $VERBOSE
begin
RubyVM::InstructionSequence.compile(source)
ensure
$VERBOSE = previous
end
ignore_warnings { RubyVM::InstructionSequence.compile(source) }
end
# Assert that the given source is valid Ruby syntax by attempting to
@ -51,6 +235,8 @@ module Prism
end
end
# CRuby has this same method, so define it so that we don't accidentally
# break CRuby CI.
def assert_raises(*args, &block)
raise "Use assert_raise instead"
end
@ -122,5 +308,16 @@ module Prism
assert_equal expected, actual
end
end
def ignore_warnings
previous = $VERBOSE
$VERBOSE = nil
begin
yield
ensure
$VERBOSE = previous
end
end
end
end

Просмотреть файл

@ -2,7 +2,7 @@
require_relative "test_helper"
return if RUBY_VERSION < "3.1.0" || Prism::BACKEND == :FFI
return if RUBY_VERSION < "3.1.0"
module Prism
class UnescapeTest < TestCase
@ -41,7 +41,7 @@ module Prism
result = Prism.parse(code(escape), encoding: "binary")
if result.success?
yield result.value.statements.body.first
yield result.statement
else
:error
end