From 55cacadf2e86cb3b3442e783d6b29755dc922624 Mon Sep 17 00:00:00 2001 From: Arthur Schreiber Date: Fri, 12 Aug 2016 11:40:01 +0200 Subject: [PATCH] Don't use a StringIO when encoding data. When data is encoded to BERT, each individual, encoded result piece is stored inside an Array based Buffer. At the end, each piece is sequentially written out to a StringIO object and the underlying String is returned. Unfortunately, this sequential writing to StringIO causes a lot of growth of the internal String object. By calling `#join` on the Buffer internal Array, Ruby will allocate a single string that can contain the whole result in a single step. --- lib/bert/encode.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/bert/encode.rb b/lib/bert/encode.rb index 9302be6..db0c12e 100644 --- a/lib/bert/encode.rb +++ b/lib/bert/encode.rb @@ -62,6 +62,10 @@ module BERT @buf.each { |x| io.write x } end + def to_s + @buf.join("") + end + def bytesize @buf.map(&:bytesize).inject :+ end @@ -74,11 +78,7 @@ module BERT end def self.encode(data) - buf = encode_to_buffer data - io = StringIO.new - io.set_encoding('binary') if io.respond_to?(:set_encoding) - buf.write_to io - io.string + encode_to_buffer(data).to_s end def self.encode_data(data, io)