зеркало из https://github.com/github/bert.git
Коммит
fd47af65eb
|
@ -1,4 +1,5 @@
|
|||
#include "ruby.h"
|
||||
#include "ruby/encoding.h"
|
||||
#include <stdint.h>
|
||||
#include <netinet/in.h>
|
||||
|
||||
|
@ -14,9 +15,17 @@
|
|||
#define ERL_BIN 109
|
||||
#define ERL_SMALL_BIGNUM 110
|
||||
#define ERL_LARGE_BIGNUM 111
|
||||
#define ERL_VERSION 131
|
||||
|
||||
#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM)
|
||||
/* These two types are specific to version 2 of the protocol. They diverge
|
||||
* from Erlang, but allow us to pass string encodings across the wire. */
|
||||
#define ERLEXT_ENC_STRING 112
|
||||
#define ERLEXT_UNICODE_STRING 113
|
||||
|
||||
/* Protocol version constants. */
|
||||
#define ERL_VERSION 131
|
||||
#define ERL_VERSION2 132
|
||||
|
||||
#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERLEXT_UNICODE_STRING)
|
||||
#define BERT_TYPE_OFFSET (ERL_SMALL_INT)
|
||||
|
||||
static VALUE rb_mBERT;
|
||||
|
@ -40,6 +49,8 @@ static VALUE bert_read_nil(struct bert_buf *buf);
|
|||
static VALUE bert_read_string(struct bert_buf *buf);
|
||||
static VALUE bert_read_list(struct bert_buf *buf);
|
||||
static VALUE bert_read_bin(struct bert_buf *buf);
|
||||
static VALUE bert_read_enc_string(struct bert_buf *buf);
|
||||
static VALUE bert_read_unicode_string(struct bert_buf *buf);
|
||||
static VALUE bert_read_sbignum(struct bert_buf *buf);
|
||||
static VALUE bert_read_lbignum(struct bert_buf *buf);
|
||||
|
||||
|
@ -59,7 +70,9 @@ static bert_ptr bert_callbacks[] = {
|
|||
&bert_read_list,
|
||||
&bert_read_bin,
|
||||
&bert_read_sbignum,
|
||||
&bert_read_lbignum
|
||||
&bert_read_lbignum,
|
||||
&bert_read_enc_string,
|
||||
&bert_read_unicode_string
|
||||
};
|
||||
|
||||
static inline uint8_t bert_buf_read8(struct bert_buf *buf)
|
||||
|
@ -293,6 +306,34 @@ static VALUE bert_read_bin(struct bert_buf *buf)
|
|||
return rb_bin;
|
||||
}
|
||||
|
||||
static VALUE bert_read_unicode_string(struct bert_buf *buf)
|
||||
{
|
||||
VALUE rb_str;
|
||||
|
||||
rb_str = bert_read_bin(buf);
|
||||
rb_enc_associate(rb_str, rb_utf8_encoding());
|
||||
|
||||
return rb_str;
|
||||
}
|
||||
|
||||
static VALUE bert_read_enc_string(struct bert_buf *buf)
|
||||
{
|
||||
uint8_t type;
|
||||
VALUE rb_bin, enc;
|
||||
|
||||
rb_bin = bert_read_bin(buf);
|
||||
|
||||
bert_buf_ensure(buf, 1);
|
||||
type = bert_buf_read8(buf);
|
||||
if (ERL_BIN != type)
|
||||
rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type);
|
||||
|
||||
enc = bert_read_bin(buf);
|
||||
rb_enc_associate(rb_bin, rb_find_encoding(enc));
|
||||
|
||||
return rb_bin;
|
||||
}
|
||||
|
||||
static VALUE bert_read_string(struct bert_buf *buf)
|
||||
{
|
||||
uint16_t i, length;
|
||||
|
@ -467,6 +508,7 @@ static VALUE bert_read_invalid(struct bert_buf *buf)
|
|||
static VALUE rb_bert_decode(VALUE klass, VALUE rb_string)
|
||||
{
|
||||
struct bert_buf buf;
|
||||
uint8_t proto_version;
|
||||
|
||||
Check_Type(rb_string, T_STRING);
|
||||
buf.data = (uint8_t *)RSTRING_PTR(rb_string);
|
||||
|
@ -474,10 +516,12 @@ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string)
|
|||
|
||||
bert_buf_ensure(&buf, 1);
|
||||
|
||||
if (bert_buf_read8(&buf) != ERL_VERSION)
|
||||
rb_raise(rb_eTypeError, "Invalid magic value for BERT string");
|
||||
|
||||
return bert_read(&buf);
|
||||
proto_version = bert_buf_read8(&buf);
|
||||
if (proto_version == ERL_VERSION || proto_version == ERL_VERSION2) {
|
||||
return bert_read(&buf);
|
||||
} else {
|
||||
rb_raise(rb_eTypeError, "Invalid magic value for BERT string");
|
||||
}
|
||||
}
|
||||
|
||||
static VALUE rb_bert_impl(VALUE klass)
|
||||
|
|
|
@ -22,4 +22,4 @@ require 'bert/decoder'
|
|||
# Global method for specifying that an array should be encoded as a tuple.
|
||||
def t
|
||||
BERT::Tuple
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,7 +10,13 @@ module BERT
|
|||
def self.decode(string)
|
||||
io = StringIO.new(string)
|
||||
io.set_encoding('binary') if io.respond_to?(:set_encoding)
|
||||
new(io).read_any
|
||||
header = io.getbyte
|
||||
case header
|
||||
when MAGIC, VERSION_2
|
||||
new(io).read_any
|
||||
else
|
||||
fail("Bad Magic")
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(ins)
|
||||
|
@ -19,7 +25,6 @@ module BERT
|
|||
end
|
||||
|
||||
def read_any
|
||||
fail("Bad Magic") unless read_1 == MAGIC
|
||||
read_any_raw
|
||||
end
|
||||
|
||||
|
@ -37,6 +42,8 @@ module BERT
|
|||
when STRING then read_erl_string
|
||||
when LIST then read_list
|
||||
when BIN then read_bin
|
||||
when ENC_STRING then read_enc_string
|
||||
when UNICODE_STRING then read_unicode_string
|
||||
else
|
||||
fail("Unknown term tag: #{peek_1}")
|
||||
end
|
||||
|
@ -223,6 +230,14 @@ module BERT
|
|||
[]
|
||||
end
|
||||
|
||||
def read_unicode_string
|
||||
fail("Invalid Type, not a unicode string") unless read_1 == UNICODE_STRING
|
||||
length = read_4
|
||||
str = read_string(length)
|
||||
str.force_encoding "UTF-8"
|
||||
str
|
||||
end
|
||||
|
||||
def read_erl_string
|
||||
fail("Invalid Type, not an erlang string") unless read_1 == STRING
|
||||
length = read_2
|
||||
|
@ -246,5 +261,18 @@ module BERT
|
|||
def fail(str)
|
||||
raise str
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def read_enc_string
|
||||
fail("Invalid Type, not an erlang binary") unless read_1 == ENC_STRING
|
||||
length = read_4
|
||||
x = read_string(length)
|
||||
|
||||
fail("Invalid Type, not an erlang binary") unless read_1 == BIN
|
||||
length = read_4
|
||||
x.force_encoding read_string(length)
|
||||
x
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,6 +2,47 @@ module BERT
|
|||
class Encode
|
||||
include Types
|
||||
|
||||
class V2 < Encode
|
||||
def write_binary(data)
|
||||
enc = data.encoding
|
||||
case enc
|
||||
when ::Encoding::UTF_8, ::Encoding::US_ASCII
|
||||
write_unicode_string data
|
||||
when ::Encoding::ASCII_8BIT
|
||||
super
|
||||
else
|
||||
write_enc_string data
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def write_unicode_string(data)
|
||||
write_1 UNICODE_STRING
|
||||
write_4 data.bytesize
|
||||
write_string data
|
||||
end
|
||||
|
||||
def write_enc_string(data)
|
||||
write_1 ENC_STRING
|
||||
write_4 data.bytesize
|
||||
write_string data
|
||||
enc = data.encoding.name
|
||||
write_1 BIN
|
||||
write_4 enc.bytesize
|
||||
write_string enc
|
||||
end
|
||||
|
||||
def version_header
|
||||
VERSION_2
|
||||
end
|
||||
end
|
||||
|
||||
class << self
|
||||
attr_accessor :version
|
||||
end
|
||||
self.version = :v1
|
||||
|
||||
attr_accessor :out
|
||||
|
||||
def initialize(out)
|
||||
|
@ -11,12 +52,18 @@ module BERT
|
|||
def self.encode(data)
|
||||
io = StringIO.new
|
||||
io.set_encoding('binary') if io.respond_to?(:set_encoding)
|
||||
self.new(io).write_any(data)
|
||||
|
||||
if version == :v2
|
||||
Encode::V2.new(io).write_any(data)
|
||||
else
|
||||
new(io).write_any(data)
|
||||
end
|
||||
|
||||
io.string
|
||||
end
|
||||
|
||||
def write_any obj
|
||||
write_1 MAGIC
|
||||
write_1 version_header
|
||||
write_any_raw obj
|
||||
end
|
||||
|
||||
|
@ -132,6 +179,10 @@ module BERT
|
|||
|
||||
private
|
||||
|
||||
def version_header
|
||||
MAGIC
|
||||
end
|
||||
|
||||
def fail(obj)
|
||||
raise "Cannot encode to erlang external format: #{obj.inspect}"
|
||||
end
|
||||
|
|
|
@ -12,10 +12,12 @@ module BERT
|
|||
STRING = 107
|
||||
LIST = 108
|
||||
BIN = 109
|
||||
ENC_STRING = 112
|
||||
UNICODE_STRING = 113
|
||||
FUN = 117
|
||||
NEW_FUN = 112
|
||||
MAGIC = 131
|
||||
VERSION_2 = 132
|
||||
MAX_INT = (1 << 27) -1
|
||||
MIN_INT = -(1 << 27)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,20 +5,55 @@ class BertTest < Test::Unit::TestCase
|
|||
setup do
|
||||
time = Time.at(1254976067)
|
||||
@ruby = t[:user, {:name => 'TPW'}, [/cat/i, 9.9], time, nil, true, false, :true, :false]
|
||||
@bert = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false"
|
||||
@ebin = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
|
||||
@bert_old = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false".b
|
||||
@ebin_old = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
|
||||
end
|
||||
|
||||
should "encode" do
|
||||
assert_equal @bert, BERT.encode(@ruby)
|
||||
context "v2 encoder" do
|
||||
setup do
|
||||
@old_version = BERT::Encode.version
|
||||
BERT::Encode.version = :v2
|
||||
@bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04nameq\x00\x00\x00\x03TPWjl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexq\x00\x00\x00\x03catl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b
|
||||
@ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,113,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,113,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
|
||||
end
|
||||
|
||||
teardown do
|
||||
BERT::Encode.version = @old_version
|
||||
end
|
||||
|
||||
should "decode new format" do
|
||||
assert_equal @ruby, BERT.decode(@bert)
|
||||
end
|
||||
|
||||
should "roundtrip string and maintain encoding" do
|
||||
str = "日本語".encode 'EUC-JP'
|
||||
round = BERT.decode(BERT.encode(str))
|
||||
assert_equal str, round
|
||||
assert_equal str.encoding, round.encoding
|
||||
end
|
||||
|
||||
should "roundtrip binary string" do
|
||||
str = "日本語".b
|
||||
round = BERT.decode(BERT.encode(str))
|
||||
assert_equal str, round
|
||||
assert_equal str.encoding, round.encoding
|
||||
end
|
||||
|
||||
should "encode" do
|
||||
assert_equal @bert, BERT.encode(@ruby)
|
||||
end
|
||||
|
||||
should "ebin" do
|
||||
assert_equal @ebin, BERT.ebin(@bert)
|
||||
end
|
||||
end
|
||||
|
||||
should "decode" do
|
||||
assert_equal @ruby, BERT.decode(@bert)
|
||||
should "decode the old format" do
|
||||
assert_equal @ruby, BERT.decode(@bert_old)
|
||||
end
|
||||
|
||||
should "ebin" do
|
||||
assert_equal @ebin, BERT.ebin(@bert)
|
||||
assert_equal @ebin_old, BERT.ebin(@bert_old)
|
||||
end
|
||||
|
||||
should "do roundtrips" do
|
||||
|
|
|
@ -82,6 +82,7 @@ class EncoderTest < Test::Unit::TestCase
|
|||
end
|
||||
|
||||
should 'handle utf8 strings' do
|
||||
str = "été".encode 'UTF-8'
|
||||
bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
|
||||
assert_equal bert, BERT::Encoder.encode("été")
|
||||
end
|
||||
|
@ -99,6 +100,36 @@ class EncoderTest < Test::Unit::TestCase
|
|||
assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
|
||||
end
|
||||
|
||||
context "v2" do
|
||||
setup do
|
||||
@old_version = BERT::Encode.version
|
||||
BERT::Encode.version = :v2
|
||||
end
|
||||
|
||||
teardown do
|
||||
BERT::Encode.version = @old_version
|
||||
end
|
||||
|
||||
should 'handle utf8 strings' do
|
||||
str = "été".encode 'UTF-8'
|
||||
bert = [132, 113, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
|
||||
assert_equal bert, BERT::Encoder.encode("été")
|
||||
end
|
||||
|
||||
should 'handle utf8 symbols' do
|
||||
bert = [132, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
|
||||
assert_equal bert, BERT::Encoder.encode(:'été')
|
||||
end
|
||||
|
||||
should "handle bignums" do
|
||||
bert = [132,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
|
||||
assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000)
|
||||
|
||||
bert = [132,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
|
||||
assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
|
||||
end
|
||||
end
|
||||
|
||||
should "leave other stuff alone" do
|
||||
before = [1, 2.0, [:foo, 'bar']]
|
||||
assert_equal before, BERT::Encoder.convert(before)
|
||||
|
|
Загрузка…
Ссылка в новой задаче