[ruby/prism] Avoid extra String copies in the FFI backend

* For Prism.parse_file the file contents would be read as native, then
  converted to a Ruby String, then converted to a native String for
  pm_serialize_parse().
* Refactor the logic to always use a pm_string for the source code and
  pass that to other native functions.

https://github.com/ruby/prism/commit/9002b3c47d
This commit is contained in:
Benoit Daloze 2024-02-14 20:57:51 +01:00 коммит произвёл git
Родитель be82755d4a
Коммит 8f17b3bd27
1 изменённых файлов: 101 добавлений и 75 удалений

Просмотреть файл

@ -119,15 +119,12 @@ module Prism
# Initialize a new buffer and yield it to the block. The buffer will be # Initialize a new buffer and yield it to the block. The buffer will be
# automatically freed when the block returns. # automatically freed when the block returns.
def self.with(&block) def self.with
pointer = FFI::MemoryPointer.new(SIZEOF) FFI::MemoryPointer.new(SIZEOF) do |pointer|
begin
raise unless LibRubyParser.pm_buffer_init(pointer) raise unless LibRubyParser.pm_buffer_init(pointer)
yield new(pointer) return yield new(pointer)
ensure ensure
LibRubyParser.pm_buffer_free(pointer) LibRubyParser.pm_buffer_free(pointer)
pointer.free
end end
end end
end end
@ -137,39 +134,47 @@ module Prism
class PrismString # :nodoc: class PrismString # :nodoc:
SIZEOF = LibRubyParser.pm_string_sizeof SIZEOF = LibRubyParser.pm_string_sizeof
attr_reader :pointer attr_reader :pointer, :length
def initialize(pointer) def initialize(pointer, length, from_string)
@pointer = pointer @pointer = pointer
end @length = length
@from_string = from_string
def source
LibRubyParser.pm_string_source(pointer)
end
def length
LibRubyParser.pm_string_length(pointer)
end end
def read def read
source.read_string(length) raise "should use the original String instead" if @from_string
@pointer.read_string(@length)
end end
# Yields a pm_string_t pointer to the given block. # Yields a pm_string_t pointer to the given block.
def self.with(filepath, &block) def self.with_string(string)
pointer = FFI::MemoryPointer.new(SIZEOF) raise TypeError unless string.is_a?(String)
begin length = string.bytesize
raise TypeError unless filepath.is_a?(String) # + 1 to never get an address of 0, which pm_parser_init() asserts
FFI::MemoryPointer.new(:char, length + 1, false) do |pointer|
pointer.write_string(string)
# since we have the extra byte we might as well \0-terminate
pointer.put_char(length, 0)
return yield new(pointer, length, true)
end
end
if LibRubyParser.pm_string_mapped_init(pointer, filepath) # Yields a pm_string_t pointer to the given block.
yield new(pointer) def self.with_file(filepath)
raise TypeError unless filepath.is_a?(String)
FFI::MemoryPointer.new(SIZEOF) do |pm_string|
if LibRubyParser.pm_string_mapped_init(pm_string, filepath)
pointer = LibRubyParser.pm_string_source(pm_string)
length = LibRubyParser.pm_string_length(pm_string)
return yield new(pointer, length, false)
else else
raise SystemCallError.new(filepath, FFI.errno) raise SystemCallError.new(filepath, FFI.errno)
end end
ensure ensure
LibRubyParser.pm_string_free(pointer) LibRubyParser.pm_string_free(pm_string)
pointer.free
end end
end end
end end
@ -185,52 +190,100 @@ module Prism
class << self class << self
# Mirror the Prism.dump API by using the serialization API. # Mirror the Prism.dump API by using the serialization API.
def dump(code, **options) def dump(code, **options)
LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) }
LibRubyParser.pm_serialize_parse(buffer.pointer, code, code.bytesize, dump_options(options))
buffer.read
end
end end
# Mirror the Prism.dump_file API by using the serialization API. # Mirror the Prism.dump_file API by using the serialization API.
def dump_file(filepath, **options) def dump_file(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string| options[:filepath] = filepath
dump(string.read, **options, filepath: filepath) LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) }
end
end end
# Mirror the Prism.lex API by using the serialization API. # Mirror the Prism.lex API by using the serialization API.
def lex(code, **options) def lex(code, **options)
LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) }
LibRubyParser.pm_serialize_lex(buffer.pointer, code, code.bytesize, dump_options(options))
Serialize.load_tokens(Source.new(code), buffer.read)
end
end end
# Mirror the Prism.lex_file API by using the serialization API. # Mirror the Prism.lex_file API by using the serialization API.
def lex_file(filepath, **options) def lex_file(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string| options[:filepath] = filepath
lex(string.read, **options, filepath: filepath) LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) }
end
end end
# Mirror the Prism.parse API by using the serialization API. # Mirror the Prism.parse API by using the serialization API.
def parse(code, **options) def parse(code, **options)
Prism.load(code, dump(code, **options)) LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) }
end end
# Mirror the Prism.parse_file API by using the serialization API. This uses # Mirror the Prism.parse_file API by using the serialization API. This uses
# native strings instead of Ruby strings because it allows us to use mmap when # native strings instead of Ruby strings because it allows us to use mmap when
# it is available. # it is available.
def parse_file(filepath, **options) def parse_file(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string| options[:filepath] = filepath
parse(string.read, **options, filepath: filepath) LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) }
end
end end
# Mirror the Prism.parse_comments API by using the serialization API. # Mirror the Prism.parse_comments API by using the serialization API.
def parse_comments(code, **options) def parse_comments(code, **options)
LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) }
end
# Mirror the Prism.parse_file_comments API by using the serialization
# API. This uses native strings instead of Ruby strings because it allows us
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
end
# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) }
end
# Mirror the Prism.parse_lex_file API by using the serialization API.
def parse_lex_file(filepath, **options)
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
end
# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) }
end
# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) }
end
private
def dump_common(string, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_comments(buffer.pointer, code, code.bytesize, dump_options(options)) LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
buffer.read
end
end
def lex_common(string, code, options) # :nodoc:
serialized = LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
buffer.read
end
Serialize.load_tokens(Source.new(code), serialized)
end
def parse_common(string, code, options) # :nodoc:
serialized = dump_common(string, options)
Prism.load(code, serialized)
end
def parse_comments_common(string, code, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options))
source = Source.new(code) source = Source.new(code)
loader = Serialize::Loader.new(source, buffer.read) loader = Serialize::Loader.new(source, buffer.read)
@ -242,19 +295,9 @@ module Prism
end end
end end
# Mirror the Prism.parse_file_comments API by using the serialization def parse_lex_common(string, code, options) # :nodoc:
# API. This uses native strings instead of Ruby strings because it allows us
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
parse_comments(string.read, **options, filepath: filepath)
end
end
# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_lex(buffer.pointer, code, code.bytesize, dump_options(options)) LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
source = Source.new(code) source = Source.new(code)
loader = Serialize::Loader.new(source, buffer.read) loader = Serialize::Loader.new(source, buffer.read)
@ -267,27 +310,10 @@ module Prism
end end
end end
# Mirror the Prism.parse_lex_file API by using the serialization API. def parse_file_success_common(string, options) # :nodoc:
def parse_lex_file(filepath, **options) LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options))
LibRubyParser::PrismString.with(filepath) do |string|
parse_lex(string.read, **options, filepath: filepath)
end
end end
# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
LibRubyParser.pm_parse_success_p(code, code.bytesize, dump_options(options))
end
# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
parse_success?(string.read, **options, filepath: filepath)
end
end
private
# Convert the given options into a serialized options string. # Convert the given options into a serialized options string.
def dump_options(options) def dump_options(options)
template = +"" template = +""