From 8f17b3bd27a5c62c44b9de2376b8d54334997071 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 14 Feb 2024 20:57:51 +0100 Subject: [PATCH] [ruby/prism] Avoid extra String copies in the FFI backend * For Prism.parse_file the file contents would be read as native, then converted to a Ruby String, then converted to a native String for pm_serialize_parse(). * Refactor the logic to always use a pm_string for the source code and pass that to other native functions. https://github.com/ruby/prism/commit/9002b3c47d --- lib/prism/ffi.rb | 176 +++++++++++++++++++++++++++-------------------- 1 file changed, 101 insertions(+), 75 deletions(-) diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 3418d787da..d09198a25d 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -119,15 +119,12 @@ module Prism # Initialize a new buffer and yield it to the block. The buffer will be # automatically freed when the block returns. - def self.with(&block) - pointer = FFI::MemoryPointer.new(SIZEOF) - - begin + def self.with + FFI::MemoryPointer.new(SIZEOF) do |pointer| raise unless LibRubyParser.pm_buffer_init(pointer) - yield new(pointer) + return yield new(pointer) ensure LibRubyParser.pm_buffer_free(pointer) - pointer.free end end end @@ -137,39 +134,47 @@ module Prism class PrismString # :nodoc: SIZEOF = LibRubyParser.pm_string_sizeof - attr_reader :pointer + attr_reader :pointer, :length - def initialize(pointer) + def initialize(pointer, length, from_string) @pointer = pointer - end - - def source - LibRubyParser.pm_string_source(pointer) - end - - def length - LibRubyParser.pm_string_length(pointer) + @length = length + @from_string = from_string end def read - source.read_string(length) + raise "should use the original String instead" if @from_string + @pointer.read_string(@length) end # Yields a pm_string_t pointer to the given block. - def self.with(filepath, &block) - pointer = FFI::MemoryPointer.new(SIZEOF) + def self.with_string(string) + raise TypeError unless string.is_a?(String) - begin - raise TypeError unless filepath.is_a?(String) + length = string.bytesize + # + 1 to never get an address of 0, which pm_parser_init() asserts + FFI::MemoryPointer.new(:char, length + 1, false) do |pointer| + pointer.write_string(string) + # since we have the extra byte we might as well \0-terminate + pointer.put_char(length, 0) + return yield new(pointer, length, true) + end + end - if LibRubyParser.pm_string_mapped_init(pointer, filepath) - yield new(pointer) + # Yields a pm_string_t pointer to the given block. + def self.with_file(filepath) + raise TypeError unless filepath.is_a?(String) + + FFI::MemoryPointer.new(SIZEOF) do |pm_string| + if LibRubyParser.pm_string_mapped_init(pm_string, filepath) + pointer = LibRubyParser.pm_string_source(pm_string) + length = LibRubyParser.pm_string_length(pm_string) + return yield new(pointer, length, false) else raise SystemCallError.new(filepath, FFI.errno) end ensure - LibRubyParser.pm_string_free(pointer) - pointer.free + LibRubyParser.pm_string_free(pm_string) end end end @@ -185,52 +190,100 @@ module Prism class << self # Mirror the Prism.dump API by using the serialization API. def dump(code, **options) - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse(buffer.pointer, code, code.bytesize, dump_options(options)) - buffer.read - end + LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) } end # Mirror the Prism.dump_file API by using the serialization API. def dump_file(filepath, **options) - LibRubyParser::PrismString.with(filepath) do |string| - dump(string.read, **options, filepath: filepath) - end + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) } end # Mirror the Prism.lex API by using the serialization API. def lex(code, **options) - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_lex(buffer.pointer, code, code.bytesize, dump_options(options)) - Serialize.load_tokens(Source.new(code), buffer.read) - end + LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) } end # Mirror the Prism.lex_file API by using the serialization API. def lex_file(filepath, **options) - LibRubyParser::PrismString.with(filepath) do |string| - lex(string.read, **options, filepath: filepath) - end + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) } end # Mirror the Prism.parse API by using the serialization API. def parse(code, **options) - Prism.load(code, dump(code, **options)) + LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) } end # Mirror the Prism.parse_file API by using the serialization API. This uses # native strings instead of Ruby strings because it allows us to use mmap when # it is available. def parse_file(filepath, **options) - LibRubyParser::PrismString.with(filepath) do |string| - parse(string.read, **options, filepath: filepath) - end + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) } end # Mirror the Prism.parse_comments API by using the serialization API. def parse_comments(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) } + end + + # Mirror the Prism.parse_file_comments API by using the serialization + # API. This uses native strings instead of Ruby strings because it allows us + # to use mmap when it is available. + def parse_file_comments(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + end + + # Mirror the Prism.parse_lex API by using the serialization API. + def parse_lex(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) } + end + + # Mirror the Prism.parse_lex_file API by using the serialization API. + def parse_lex_file(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + end + + # Mirror the Prism.parse_success? API by using the serialization API. + def parse_success?(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) } + end + + # Mirror the Prism.parse_file_success? API by using the serialization API. + def parse_file_success?(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) } + end + + private + + def dump_common(string, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_comments(buffer.pointer, code, code.bytesize, dump_options(options)) + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + buffer.read + end + end + + def lex_common(string, code, options) # :nodoc: + serialized = LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + buffer.read + end + + Serialize.load_tokens(Source.new(code), serialized) + end + + def parse_common(string, code, options) # :nodoc: + serialized = dump_common(string, options) + Prism.load(code, serialized) + end + + def parse_comments_common(string, code, options) # :nodoc: + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) source = Source.new(code) loader = Serialize::Loader.new(source, buffer.read) @@ -242,19 +295,9 @@ module Prism end end - # Mirror the Prism.parse_file_comments API by using the serialization - # API. This uses native strings instead of Ruby strings because it allows us - # to use mmap when it is available. - def parse_file_comments(filepath, **options) - LibRubyParser::PrismString.with(filepath) do |string| - parse_comments(string.read, **options, filepath: filepath) - end - end - - # Mirror the Prism.parse_lex API by using the serialization API. - def parse_lex(code, **options) + def parse_lex_common(string, code, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_lex(buffer.pointer, code, code.bytesize, dump_options(options)) + LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) source = Source.new(code) loader = Serialize::Loader.new(source, buffer.read) @@ -267,27 +310,10 @@ module Prism end end - # Mirror the Prism.parse_lex_file API by using the serialization API. - def parse_lex_file(filepath, **options) - LibRubyParser::PrismString.with(filepath) do |string| - parse_lex(string.read, **options, filepath: filepath) - end + def parse_file_success_common(string, options) # :nodoc: + LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options)) end - # Mirror the Prism.parse_success? API by using the serialization API. - def parse_success?(code, **options) - LibRubyParser.pm_parse_success_p(code, code.bytesize, dump_options(options)) - end - - # Mirror the Prism.parse_file_success? API by using the serialization API. - def parse_file_success?(filepath, **options) - LibRubyParser::PrismString.with(filepath) do |string| - parse_success?(string.read, **options, filepath: filepath) - end - end - - private - # Convert the given options into a serialized options string. def dump_options(options) template = +""