зеркало из https://github.com/github/bert.git
Make v4, using mochilo
mochilo is like msgpack, except that it deals with encodings in a sane(r?) way. mochilo doesn't have a custom type registry, so I've added one in the "custom-type-registry" branch, and I'm using that here. This ended up being faster than the msgpack implementation, because encoding doesn't have to pop up to ruby to deal with strings. user system total real BERT v2 tiny 0.010000 0.000000 0.010000 ( 0.015289) BERT v2 small 0.130000 0.000000 0.130000 ( 0.126682) BERT v2 large 0.580000 0.000000 0.580000 ( 0.592555) BERT v2 complex 2.880000 0.010000 2.890000 ( 2.903082) BERT v2 long array 4.200000 0.000000 4.200000 ( 4.262165) BERT v3 tiny 0.020000 0.000000 0.020000 ( 0.020252) BERT v3 small 0.020000 0.000000 0.020000 ( 0.024444) BERT v3 large 0.700000 0.010000 0.710000 ( 0.703152) BERT v3 complex 0.090000 0.000000 0.090000 ( 0.100372) BERT v3 long array 1.810000 0.010000 1.820000 ( 1.827934) BERT v4 tiny 0.010000 0.000000 0.010000 ( 0.013301) BERT v4 small 0.010000 0.000000 0.010000 ( 0.012317) BERT v4 large 0.230000 0.060000 0.290000 ( 0.292852) BERT v4 complex 0.070000 0.000000 0.070000 ( 0.064578) BERT v4 long array 0.110000 0.000000 0.110000 ( 0.110842) Msgpack tiny 0.000000 0.000000 0.000000 ( 0.001644) Msgpack small 0.010000 0.000000 0.010000 ( 0.002568) Msgpack large 0.180000 0.000000 0.180000 ( 0.192238) Msgpack complex 0.040000 0.000000 0.040000 ( 0.041482) Msgpack long array 0.070000 0.000000 0.070000 ( 0.065986)
This commit is contained in:
Родитель
70d85c7dbc
Коммит
40e478c76b
1
Gemfile
1
Gemfile
|
@ -5,3 +5,4 @@ source "https://rubygems.org"
|
|||
gemspec
|
||||
|
||||
gem "msgpack", :git => "https://github.com/spraints/msgpack-ruby", :ref => "custom-string"
|
||||
gem "mochilo", :git => "https://github.com/spraints/mochilo", :ref => "custom-type-registry"
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
GIT
|
||||
remote: https://github.com/spraints/mochilo
|
||||
revision: 458fcb528b704bae7bff074d9aa27ee2ccb9449d
|
||||
ref: custom-type-registry
|
||||
specs:
|
||||
mochilo (2.0)
|
||||
|
||||
GIT
|
||||
remote: https://github.com/spraints/msgpack-ruby
|
||||
revision: 8a2f5bf2e83038fa3c5366d63f93d5c6275c3a65
|
||||
|
@ -32,6 +39,7 @@ DEPENDENCIES
|
|||
bert!
|
||||
byebug
|
||||
git
|
||||
mochilo!
|
||||
msgpack!
|
||||
rake
|
||||
rake-compiler (~> 0.9.0)
|
||||
|
|
|
@ -16,7 +16,7 @@ complex = [42, {:foo => 'bac' * 100}, t[(1..100).to_a]] * 10
|
|||
long_array = {:a => ["a"]*1000}
|
||||
|
||||
Benchmark.bm(30) do |bench|
|
||||
[:v1, :v2, :v3].each do |v|
|
||||
[:v1, :v2, :v3, :v4].each do |v|
|
||||
BERT::Encode.version = v
|
||||
bench.report("BERT #{v} tiny") {ITER.times {BERT.decode(BERT.encode(tiny))}}
|
||||
bench.report("BERT #{v} small") {ITER.times {BERT.decode(BERT.encode(small))}}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
require "bert/msgpack"
|
||||
require "bert/mochilo"
|
||||
|
||||
module BERT
|
||||
class Decode
|
||||
|
@ -14,6 +15,8 @@ module BERT
|
|||
io.set_encoding('binary') if io.respond_to?(:set_encoding)
|
||||
header = io.getbyte
|
||||
case header
|
||||
when VERSION_4
|
||||
V4.new(io).read_any
|
||||
when VERSION_3
|
||||
V3.new(io).read_any
|
||||
when MAGIC, VERSION_2
|
||||
|
@ -33,6 +36,16 @@ module BERT
|
|||
end
|
||||
end
|
||||
|
||||
class V4
|
||||
def initialize(ins)
|
||||
@ins = ins
|
||||
end
|
||||
|
||||
def read_any
|
||||
BERT.mochilo.unpack(@ins.read)
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(ins)
|
||||
@in = ins
|
||||
@peeked = ""
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
require "bert/msgpack"
|
||||
require "bert/mochilo"
|
||||
|
||||
module BERT
|
||||
class Encode
|
||||
|
@ -61,6 +62,25 @@ module BERT
|
|||
end
|
||||
end
|
||||
|
||||
class V4
|
||||
def initialize(out)
|
||||
@out = out
|
||||
end
|
||||
|
||||
attr_reader :out
|
||||
|
||||
def write_any(obj)
|
||||
out.write(version_header.chr)
|
||||
out.write(BERT.mochilo.pack(obj))
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def version_header
|
||||
BERT::Encode::VERSION_4
|
||||
end
|
||||
end
|
||||
|
||||
class << self
|
||||
attr_accessor :version
|
||||
end
|
||||
|
@ -105,7 +125,9 @@ module BERT
|
|||
end
|
||||
|
||||
def self.encode_data(data, io)
|
||||
if version == :v3
|
||||
if version == :v4
|
||||
Encode::V4.new(io).write_any(data)
|
||||
elsif version == :v3
|
||||
Encode::V3.new(io).write_any(data)
|
||||
elsif version == :v2
|
||||
Encode::V2.new(io).write_any(data)
|
||||
|
|
|
@ -19,7 +19,7 @@ module BERT
|
|||
#
|
||||
# Returns the converted Ruby object
|
||||
def self.convert(item)
|
||||
return item if Encode.version == :v3
|
||||
return item if Encode.version == :v3 || Encode.version == :v4
|
||||
case item
|
||||
when Hash
|
||||
pairs = []
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
require "mochilo"
|
||||
|
||||
module BERT
|
||||
def self.mochilo
|
||||
@@mochilo ||= MochiloFactory.new
|
||||
end
|
||||
|
||||
class MochiloFactory
|
||||
def pack(obj)
|
||||
Mochilo.pack(obj, custom_packers)
|
||||
end
|
||||
|
||||
def unpack(str)
|
||||
Mochilo.unpack(str, custom_unpackers)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def custom_packers
|
||||
Hash[ customizations.map { |customization| [customization.unpacked_class, [customization.type_id, customization.method(:pack)]] } ]
|
||||
end
|
||||
|
||||
def custom_unpackers
|
||||
Hash[ customizations.map { |customization| [customization.type_id, customization.method(:unpack)] } ]
|
||||
end
|
||||
|
||||
def customizations
|
||||
@customizations ||= [
|
||||
MochiloSymbol,
|
||||
MochiloRegexp,
|
||||
MochiloTime,
|
||||
].map(&:new)
|
||||
end
|
||||
end
|
||||
|
||||
class MochiloSymbol
|
||||
def unpacked_class
|
||||
Symbol
|
||||
end
|
||||
|
||||
def type_id
|
||||
0x00
|
||||
end
|
||||
|
||||
def pack(symbol)
|
||||
symbol.to_s
|
||||
end
|
||||
|
||||
def unpack(str)
|
||||
str.to_sym
|
||||
end
|
||||
end
|
||||
|
||||
class MochiloRegexp
|
||||
def unpacked_class
|
||||
Regexp
|
||||
end
|
||||
|
||||
def type_id
|
||||
0x01
|
||||
end
|
||||
|
||||
def pack(regexp)
|
||||
[regexp.options, regexp.source].pack("Lm")
|
||||
end
|
||||
|
||||
def unpack(raw)
|
||||
options, source = raw.unpack("Lm")
|
||||
Regexp.new(source, options)
|
||||
end
|
||||
end
|
||||
|
||||
class MochiloTime
|
||||
def unpacked_class
|
||||
Time
|
||||
end
|
||||
|
||||
def type_id
|
||||
0x02
|
||||
end
|
||||
|
||||
def pack(time)
|
||||
[time.to_f].pack("G")
|
||||
end
|
||||
|
||||
def unpack(raw)
|
||||
Time.at(raw.unpack("G").first)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -18,6 +18,7 @@ module BERT
|
|||
MAGIC = 131
|
||||
VERSION_2 = 132
|
||||
VERSION_3 = 133
|
||||
VERSION_4 = 134
|
||||
MAX_INT = (1 << 27) -1
|
||||
MIN_INT = -(1 << 27)
|
||||
end
|
||||
|
|
|
@ -11,14 +11,16 @@ class BertTest < Test::Unit::TestCase
|
|||
@berts = {
|
||||
:v2 => "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04nameq\x00\x00\x00\x03TPWjl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexq\x00\x00\x00\x03catl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b,
|
||||
:v3 => "\x85\x99\xD6\x01user\x81\xD6\x01name\xD6\x04uTPW\x92\xC7\t\x02\x01\x00\x00\x00Y2F0\n\xCB@#\xCC\xCC\xCC\xCC\xCC\xCD\xD7\x03A\xD2\xB3Z\x90\xC0\x00\x00\xC0\xC3\xC2\xD6\x01true\xC7\x05\x01false".b,
|
||||
:v4 => "\x86\x99\xC7\x05\xFF\x00user\x81\xC7\x05\xFF\x00name\xA3TPW\x92\xC7\n\xFF\x01\x01\x00\x00\x00Y2F0\n\xCB@#\xCC\xCC\xCC\xCC\xCC\xCD\xC7\t\xFF\x02A\xD2\xB3Z\x90\xC0\x00\x00\xC0\xC3\xC2\xC7\x05\xFF\x00true\xC7\x06\xFF\x00false".b,
|
||||
}
|
||||
@ebins = {
|
||||
:v2 => "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,113,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,113,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>",
|
||||
:v3 => "<<133,153,214,1,117,115,101,114,129,214,1,110,97,109,101,214,4,117,84,80,87,146,199,9,2,1,0,0,0,89,50,70,48,10,203,64,35,204,204,204,204,204,205,215,3,65,210,179,90,144,192,0,0,192,195,194,214,1,116,114,117,101,199,5,1,102,97,108,115,101>>",
|
||||
:v4 => "<<134,153,199,5,255,0,117,115,101,114,129,199,5,255,0,110,97,109,101,163,84,80,87,146,199,10,255,1,1,0,0,0,89,50,70,48,10,203,64,35,204,204,204,204,204,205,199,9,255,2,65,210,179,90,144,192,0,0,192,195,194,199,5,255,0,116,114,117,101,199,6,255,0,102,97,108,115,101>>",
|
||||
}
|
||||
end
|
||||
|
||||
[:v2, :v3].each do |v|
|
||||
[:v2, :v3, :v4].each do |v|
|
||||
context "#{v} encoder" do
|
||||
setup do
|
||||
@old_version = BERT::Encode.version
|
||||
|
|
Загрузка…
Ссылка в новой задаче