зеркало из https://github.com/github/ruby.git
* transcode.c (sym_partial_input): new variable.
(econv_primitive_convert): accept a hash as 5th argument as well. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19131 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
f7607eff9a
Коммит
2494e67f16
|
@ -1,3 +1,8 @@
|
|||
Thu Sep 4 21:46:21 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (sym_partial_input): new variable.
|
||||
(econv_primitive_convert): accept a hash as 5th argument as well.
|
||||
|
||||
Thu Sep 4 21:04:27 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (sym_universal_newline_decoder): new variable.
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
require 'test/unit'
|
||||
|
||||
class TestEncodingConverter < Test::Unit::TestCase
|
||||
def check_ec(edst, esrc, eres, dst, src, ec, off, len, flags=0)
|
||||
res = ec.primitive_convert(src, dst, off, len, flags)
|
||||
def check_ec(edst, esrc, eres, dst, src, ec, off, len, opts=nil)
|
||||
res = ec.primitive_convert(src, dst, off, len, opts)
|
||||
assert_equal([edst.dup.force_encoding("ASCII-8BIT"),
|
||||
esrc.dup.force_encoding("ASCII-8BIT"),
|
||||
eres],
|
||||
|
@ -11,11 +11,11 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
res])
|
||||
end
|
||||
|
||||
def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, flags=0)
|
||||
def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, opts=nil)
|
||||
ec = Encoding::Converter.new(*ec) if Array === ec
|
||||
i = consumed + rest
|
||||
o = ""
|
||||
ret = ec.primitive_convert(i, o, 0, obuf_bytesize, flags)
|
||||
ret = ec.primitive_convert(i, o, 0, obuf_bytesize, opts)
|
||||
assert_equal([converted, eres, rest],
|
||||
[o, ret, i])
|
||||
end
|
||||
|
@ -61,20 +61,20 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_output_region
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="a", dst="b", nil, 1, :partial_input=>true)
|
||||
assert_equal("ba", dst)
|
||||
ec.primitive_convert(src="a", dst="b", 0, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="a", dst="b", 0, 1, :partial_input=>true)
|
||||
assert_equal("a", dst)
|
||||
ec.primitive_convert(src="a", dst="b", 1, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="a", dst="b", 1, 1, :partial_input=>true)
|
||||
assert_equal("ba", dst)
|
||||
assert_raise(ArgumentError) {
|
||||
ec.primitive_convert(src="a", dst="b", 2, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="a", dst="b", 2, 1, :partial_input=>true)
|
||||
}
|
||||
assert_raise(ArgumentError) {
|
||||
ec.primitive_convert(src="a", dst="b", -1, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="a", dst="b", -1, 1, :partial_input=>true)
|
||||
}
|
||||
assert_raise(ArgumentError) {
|
||||
ec.primitive_convert(src="a", dst="b", 1, -1, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="a", dst="b", 1, -1, :partial_input=>true)
|
||||
}
|
||||
end
|
||||
|
||||
|
@ -114,7 +114,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_partial_input
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
ret = ec.primitive_convert(src="", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ret = ec.primitive_convert(src="", dst="", nil, 10, :partial_input=>true)
|
||||
assert_equal(:source_buffer_empty, ret)
|
||||
ret = ec.primitive_convert(src="", dst="", nil, 10)
|
||||
assert_equal(:finished, ret)
|
||||
|
@ -153,7 +153,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_iso2022jp_encode
|
||||
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
|
||||
src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a)
|
||||
src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
|
||||
|
@ -166,7 +166,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_iso2022jp_decode
|
||||
ec = Encoding::Converter.new("ISO-2022-JP", "EUC-JP")
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
|
||||
src << "\e"; check_ec("a", "", :source_buffer_empty, *a)
|
||||
src << "$"; check_ec("a", "", :source_buffer_empty, *a)
|
||||
|
@ -212,7 +212,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_invalid4
|
||||
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
|
||||
a = ["", "abc\xFFdef", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
a = ["", "abc\xFFdef", ec, nil, 10, :output_followed_by_input=>true]
|
||||
check_ec("a", "bc\xFFdef", :output_followed_by_input, *a)
|
||||
check_ec("ab", "c\xFFdef", :output_followed_by_input, *a)
|
||||
check_ec("abc", "\xFFdef", :output_followed_by_input, *a)
|
||||
|
@ -225,7 +225,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_invalid_utf16le
|
||||
ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "A"; check_ec("", "", :source_buffer_empty, *a)
|
||||
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
||||
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
|
||||
|
@ -244,7 +244,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_invalid_utf16be
|
||||
ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
||||
src << "A"; check_ec("A", "", :source_buffer_empty, *a)
|
||||
src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
|
||||
|
@ -263,7 +263,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_invalid_utf32be
|
||||
ec = Encoding::Converter.new("UTF-32BE", "UTF-8")
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
||||
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
||||
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
||||
|
@ -287,7 +287,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_invalid_utf32le
|
||||
ec = Encoding::Converter.new("UTF-32LE", "UTF-8")
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "A"; check_ec("", "", :source_buffer_empty, *a)
|
||||
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
||||
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
|
||||
|
@ -319,7 +319,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_errors2
|
||||
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
|
||||
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, :output_followed_by_input=>true]
|
||||
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
|
||||
check_ec("A", "\xDC\x00\x00B", :output_followed_by_input, *a)
|
||||
check_ec("A", "\x00B", :invalid_byte_sequence, *a)
|
||||
|
@ -329,7 +329,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_universal_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline_decoder: true)
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
|
||||
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
|
||||
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
|
||||
|
@ -340,7 +340,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_universal_newline2
|
||||
ec = Encoding::Converter.new("", "", universal_newline_decoder: true)
|
||||
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", src="", ec, nil, 50, :partial_input=>true]
|
||||
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
|
||||
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
|
||||
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
|
||||
|
@ -371,7 +371,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_output_followed_by_input
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
a = ["", "abc\u{3042}def", ec, nil, 100, :output_followed_by_input=>true]
|
||||
check_ec("a", "bc\u{3042}def", :output_followed_by_input, *a)
|
||||
check_ec("ab", "c\u{3042}def", :output_followed_by_input, *a)
|
||||
check_ec("abc", "\u{3042}def", :output_followed_by_input, *a)
|
||||
|
@ -408,7 +408,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_errinfo_valid_partial_character
|
||||
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
||||
ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="\xa4", dst="", nil, 10, :partial_input=>true)
|
||||
assert_errinfo(:source_buffer_empty, nil, nil, nil, nil, ec)
|
||||
end
|
||||
|
||||
|
@ -428,23 +428,23 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_output_iso2022jp
|
||||
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
|
||||
ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, :partial_input=>true)
|
||||
assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst)
|
||||
assert_equal(nil, ec.insert_output("???"))
|
||||
ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
|
||||
assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst)
|
||||
ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, :partial_input=>true)
|
||||
assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst)
|
||||
|
||||
assert_equal(nil, ec.insert_output("\xA1\xA1".force_encoding("EUC-JP")))
|
||||
ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
|
||||
assert_equal("\e$B!!\e(B???\e$B!\"!!".force_encoding("ISO-2022-JP"), dst)
|
||||
|
||||
ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, :partial_input=>true)
|
||||
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#".force_encoding("ISO-2022-JP"), dst)
|
||||
|
||||
assert_equal(nil, ec.insert_output("\u3042"))
|
||||
ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
||||
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
|
||||
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
|
||||
|
||||
assert_raise(Encoding::ConversionUndefined) {
|
||||
|
@ -561,7 +561,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_noconv_partial
|
||||
ec = Encoding::Converter.new("", "")
|
||||
a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::PARTIAL_INPUT]
|
||||
a = ["", "abcdefg", ec, nil, 2, :partial_input=>true]
|
||||
check_ec("ab", "cdefg", :destination_buffer_full, *a)
|
||||
check_ec("abcd", "efg", :destination_buffer_full, *a)
|
||||
check_ec("abcdef", "g", :destination_buffer_full, *a)
|
||||
|
@ -570,7 +570,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
|
||||
def test_noconv_output_followed_by_input
|
||||
ec = Encoding::Converter.new("", "")
|
||||
a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
|
||||
a = ["", "abcdefg", ec, nil, 2, :output_followed_by_input=>true]
|
||||
check_ec("a", "bcdefg", :output_followed_by_input, *a)
|
||||
check_ec("ab", "cdefg", :output_followed_by_input, *a)
|
||||
check_ec("abc", "defg", :output_followed_by_input, *a)
|
||||
|
|
41
transcode.c
41
transcode.c
|
@ -24,6 +24,7 @@ static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
|
|||
static VALUE sym_universal_newline_decoder;
|
||||
static VALUE sym_crlf_newline_encoder;
|
||||
static VALUE sym_cr_newline_encoder;
|
||||
static VALUE sym_partial_input;
|
||||
|
||||
static VALUE sym_invalid_byte_sequence;
|
||||
static VALUE sym_undefined_conversion;
|
||||
|
@ -2527,11 +2528,15 @@ econv_result_to_symbol(rb_econv_result_t res)
|
|||
* ec.primitive_convert(source_buffer, destination_buffer) -> symbol
|
||||
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
|
||||
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
|
||||
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, flags) -> symbol
|
||||
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
|
||||
*
|
||||
* possible flags:
|
||||
* Encoding::Converter::PARTIAL_INPUT # source buffer may be part of larger source
|
||||
* Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input
|
||||
* possible opt elements:
|
||||
* hash form:
|
||||
* :partial_input => true # source buffer may be part of larger source
|
||||
* output_followed_by_input => true # stop conversion after output before input
|
||||
* integer form:
|
||||
* Encoding::Converter::PARTIAL_INPUT
|
||||
* Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT
|
||||
*
|
||||
* possible results:
|
||||
* :invalid_byte_sequence
|
||||
|
@ -2583,14 +2588,14 @@ econv_result_to_symbol(rb_econv_result_t res)
|
|||
* primitive_convert stops conversion when one of following condition met.
|
||||
* - invalid byte sequence found in source buffer (:invalid_byte_sequence)
|
||||
* - unexpected end of source buffer (:incomplete_input)
|
||||
* this occur only when PARTIAL_INPUT is not specified.
|
||||
* this occur only when :partial_input is not specified.
|
||||
* - character not representable in output encoding (:undefined_conversion)
|
||||
* - after some output is generated, before input is done (:output_followed_by_input)
|
||||
* this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
|
||||
* this occur only when :output_followed_by_input is specified.
|
||||
* - destination buffer is full (:destination_buffer_full)
|
||||
* this occur only when destination_bytesize is non-nil.
|
||||
* - source buffer is empty (:source_buffer_empty)
|
||||
* this occur only when PARTIAL_INPUT is specified.
|
||||
* this occur only when :partial_input is specified.
|
||||
* - conversion is finished (:finished)
|
||||
*
|
||||
* example:
|
||||
|
@ -2612,7 +2617,7 @@ econv_result_to_symbol(rb_econv_result_t res)
|
|||
static VALUE
|
||||
econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
VALUE input, output, output_byteoffset_v, output_bytesize_v, flags_v;
|
||||
VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
|
||||
rb_econv_t *ec = check_econv(self);
|
||||
rb_econv_result_t res;
|
||||
const unsigned char *ip, *is;
|
||||
|
@ -2621,7 +2626,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
|||
unsigned long output_byteend;
|
||||
int flags;
|
||||
|
||||
rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v);
|
||||
rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt);
|
||||
|
||||
if (NIL_P(output_byteoffset_v))
|
||||
output_byteoffset = 0; /* dummy */
|
||||
|
@ -2633,10 +2638,23 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
|||
else
|
||||
output_bytesize = NUM2LONG(output_bytesize_v);
|
||||
|
||||
if (NIL_P(flags_v))
|
||||
if (NIL_P(opt)) {
|
||||
flags = 0;
|
||||
else
|
||||
}
|
||||
else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
|
||||
flags = NUM2INT(flags_v);
|
||||
}
|
||||
else {
|
||||
VALUE v;
|
||||
opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
|
||||
flags = 0;
|
||||
v = rb_hash_aref(opt, sym_partial_input);
|
||||
if (RTEST(v))
|
||||
flags |= ECONV_PARTIAL_INPUT;
|
||||
v = rb_hash_aref(opt, sym_output_followed_by_input);
|
||||
if (RTEST(v))
|
||||
flags |= ECONV_OUTPUT_FOLLOWED_BY_INPUT;
|
||||
}
|
||||
|
||||
StringValue(output);
|
||||
if (!NIL_P(input))
|
||||
|
@ -3301,6 +3319,7 @@ Init_transcode(void)
|
|||
sym_universal_newline_decoder = ID2SYM(rb_intern("universal_newline_decoder"));
|
||||
sym_crlf_newline_encoder = ID2SYM(rb_intern("crlf_newline_encoder"));
|
||||
sym_cr_newline_encoder = ID2SYM(rb_intern("cr_newline_encoder"));
|
||||
sym_partial_input = ID2SYM(rb_intern("partial_input"));
|
||||
|
||||
rb_define_method(rb_cString, "encode", str_encode, -1);
|
||||
rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
|
||||
|
|
Загрузка…
Ссылка в новой задаче