* transcode.c (sym_partial_input): new variable.

(econv_primitive_convert): accept a hash as 5th argument as well.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19131 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-09-04 12:48:21 +00:00
Родитель f7607eff9a
Коммит 2494e67f16
3 изменённых файлов: 66 добавлений и 42 удалений

Просмотреть файл

@ -1,3 +1,8 @@
Thu Sep 4 21:46:21 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (sym_partial_input): new variable.
(econv_primitive_convert): accept a hash as 5th argument as well.
Thu Sep 4 21:04:27 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (sym_universal_newline_decoder): new variable.

Просмотреть файл

@ -1,8 +1,8 @@
require 'test/unit'
class TestEncodingConverter < Test::Unit::TestCase
def check_ec(edst, esrc, eres, dst, src, ec, off, len, flags=0)
res = ec.primitive_convert(src, dst, off, len, flags)
def check_ec(edst, esrc, eres, dst, src, ec, off, len, opts=nil)
res = ec.primitive_convert(src, dst, off, len, opts)
assert_equal([edst.dup.force_encoding("ASCII-8BIT"),
esrc.dup.force_encoding("ASCII-8BIT"),
eres],
@ -11,11 +11,11 @@ class TestEncodingConverter < Test::Unit::TestCase
res])
end
def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, flags=0)
def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, opts=nil)
ec = Encoding::Converter.new(*ec) if Array === ec
i = consumed + rest
o = ""
ret = ec.primitive_convert(i, o, 0, obuf_bytesize, flags)
ret = ec.primitive_convert(i, o, 0, obuf_bytesize, opts)
assert_equal([converted, eres, rest],
[o, ret, i])
end
@ -61,20 +61,20 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_output_region
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="a", dst="b", nil, 1, :partial_input=>true)
assert_equal("ba", dst)
ec.primitive_convert(src="a", dst="b", 0, 1, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="a", dst="b", 0, 1, :partial_input=>true)
assert_equal("a", dst)
ec.primitive_convert(src="a", dst="b", 1, 1, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="a", dst="b", 1, 1, :partial_input=>true)
assert_equal("ba", dst)
assert_raise(ArgumentError) {
ec.primitive_convert(src="a", dst="b", 2, 1, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="a", dst="b", 2, 1, :partial_input=>true)
}
assert_raise(ArgumentError) {
ec.primitive_convert(src="a", dst="b", -1, 1, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="a", dst="b", -1, 1, :partial_input=>true)
}
assert_raise(ArgumentError) {
ec.primitive_convert(src="a", dst="b", 1, -1, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="a", dst="b", 1, -1, :partial_input=>true)
}
end
@ -114,7 +114,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_partial_input
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
ret = ec.primitive_convert(src="", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
ret = ec.primitive_convert(src="", dst="", nil, 10, :partial_input=>true)
assert_equal(:source_buffer_empty, ret)
ret = ec.primitive_convert(src="", dst="", nil, 10)
assert_equal(:finished, ret)
@ -153,7 +153,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_iso2022jp_encode
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a)
src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
@ -166,7 +166,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_iso2022jp_decode
ec = Encoding::Converter.new("ISO-2022-JP", "EUC-JP")
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "a"; check_ec("a", "", :source_buffer_empty, *a)
src << "\e"; check_ec("a", "", :source_buffer_empty, *a)
src << "$"; check_ec("a", "", :source_buffer_empty, *a)
@ -212,7 +212,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid4
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
a = ["", "abc\xFFdef", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
a = ["", "abc\xFFdef", ec, nil, 10, :output_followed_by_input=>true]
check_ec("a", "bc\xFFdef", :output_followed_by_input, *a)
check_ec("ab", "c\xFFdef", :output_followed_by_input, *a)
check_ec("abc", "\xFFdef", :output_followed_by_input, *a)
@ -225,7 +225,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf16le
ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "A"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
@ -244,7 +244,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf16be
ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "A"; check_ec("A", "", :source_buffer_empty, *a)
src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
@ -263,7 +263,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf32be
ec = Encoding::Converter.new("UTF-32BE", "UTF-8")
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
@ -287,7 +287,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_invalid_utf32le
ec = Encoding::Converter.new("UTF-32LE", "UTF-8")
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "A"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
@ -319,7 +319,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_errors2
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, :output_followed_by_input=>true]
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
check_ec("A", "\xDC\x00\x00B", :output_followed_by_input, *a)
check_ec("A", "\x00B", :invalid_byte_sequence, *a)
@ -329,7 +329,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_universal_newline
ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline_decoder: true)
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
@ -340,7 +340,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_universal_newline2
ec = Encoding::Converter.new("", "", universal_newline_decoder: true)
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
a = ["", src="", ec, nil, 50, :partial_input=>true]
src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
@ -371,7 +371,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_output_followed_by_input
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
a = ["", "abc\u{3042}def", ec, nil, 100, :output_followed_by_input=>true]
check_ec("a", "bc\u{3042}def", :output_followed_by_input, *a)
check_ec("ab", "c\u{3042}def", :output_followed_by_input, *a)
check_ec("abc", "\u{3042}def", :output_followed_by_input, *a)
@ -408,7 +408,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_errinfo_valid_partial_character
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="\xa4", dst="", nil, 10, :partial_input=>true)
assert_errinfo(:source_buffer_empty, nil, nil, nil, nil, ec)
end
@ -428,23 +428,23 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_output_iso2022jp
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, :partial_input=>true)
assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("???"))
ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst)
ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("\xA1\xA1".force_encoding("EUC-JP")))
ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!".force_encoding("ISO-2022-JP"), dst)
ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#".force_encoding("ISO-2022-JP"), dst)
assert_equal(nil, ec.insert_output("\u3042"))
ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT)
ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
assert_raise(Encoding::ConversionUndefined) {
@ -561,7 +561,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_noconv_partial
ec = Encoding::Converter.new("", "")
a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::PARTIAL_INPUT]
a = ["", "abcdefg", ec, nil, 2, :partial_input=>true]
check_ec("ab", "cdefg", :destination_buffer_full, *a)
check_ec("abcd", "efg", :destination_buffer_full, *a)
check_ec("abcdef", "g", :destination_buffer_full, *a)
@ -570,7 +570,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_noconv_output_followed_by_input
ec = Encoding::Converter.new("", "")
a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
a = ["", "abcdefg", ec, nil, 2, :output_followed_by_input=>true]
check_ec("a", "bcdefg", :output_followed_by_input, *a)
check_ec("ab", "cdefg", :output_followed_by_input, *a)
check_ec("abc", "defg", :output_followed_by_input, *a)

Просмотреть файл

@ -24,6 +24,7 @@ static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
static VALUE sym_universal_newline_decoder;
static VALUE sym_crlf_newline_encoder;
static VALUE sym_cr_newline_encoder;
static VALUE sym_partial_input;
static VALUE sym_invalid_byte_sequence;
static VALUE sym_undefined_conversion;
@ -2527,11 +2528,15 @@ econv_result_to_symbol(rb_econv_result_t res)
* ec.primitive_convert(source_buffer, destination_buffer) -> symbol
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, flags) -> symbol
* ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
*
* possible flags:
* Encoding::Converter::PARTIAL_INPUT # source buffer may be part of larger source
* Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input
* possible opt elements:
* hash form:
* :partial_input => true # source buffer may be part of larger source
* output_followed_by_input => true # stop conversion after output before input
* integer form:
* Encoding::Converter::PARTIAL_INPUT
* Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT
*
* possible results:
* :invalid_byte_sequence
@ -2583,14 +2588,14 @@ econv_result_to_symbol(rb_econv_result_t res)
* primitive_convert stops conversion when one of following condition met.
* - invalid byte sequence found in source buffer (:invalid_byte_sequence)
* - unexpected end of source buffer (:incomplete_input)
* this occur only when PARTIAL_INPUT is not specified.
* this occur only when :partial_input is not specified.
* - character not representable in output encoding (:undefined_conversion)
* - after some output is generated, before input is done (:output_followed_by_input)
* this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
* this occur only when :output_followed_by_input is specified.
* - destination buffer is full (:destination_buffer_full)
* this occur only when destination_bytesize is non-nil.
* - source buffer is empty (:source_buffer_empty)
* this occur only when PARTIAL_INPUT is specified.
* this occur only when :partial_input is specified.
* - conversion is finished (:finished)
*
* example:
@ -2612,7 +2617,7 @@ econv_result_to_symbol(rb_econv_result_t res)
static VALUE
econv_primitive_convert(int argc, VALUE *argv, VALUE self)
{
VALUE input, output, output_byteoffset_v, output_bytesize_v, flags_v;
VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
rb_econv_t *ec = check_econv(self);
rb_econv_result_t res;
const unsigned char *ip, *is;
@ -2621,7 +2626,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
unsigned long output_byteend;
int flags;
rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v);
rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt);
if (NIL_P(output_byteoffset_v))
output_byteoffset = 0; /* dummy */
@ -2633,10 +2638,23 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
else
output_bytesize = NUM2LONG(output_bytesize_v);
if (NIL_P(flags_v))
if (NIL_P(opt)) {
flags = 0;
else
}
else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
flags = NUM2INT(flags_v);
}
else {
VALUE v;
opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
flags = 0;
v = rb_hash_aref(opt, sym_partial_input);
if (RTEST(v))
flags |= ECONV_PARTIAL_INPUT;
v = rb_hash_aref(opt, sym_output_followed_by_input);
if (RTEST(v))
flags |= ECONV_OUTPUT_FOLLOWED_BY_INPUT;
}
StringValue(output);
if (!NIL_P(input))
@ -3301,6 +3319,7 @@ Init_transcode(void)
sym_universal_newline_decoder = ID2SYM(rb_intern("universal_newline_decoder"));
sym_crlf_newline_encoder = ID2SYM(rb_intern("crlf_newline_encoder"));
sym_cr_newline_encoder = ID2SYM(rb_intern("cr_newline_encoder"));
sym_partial_input = ID2SYM(rb_intern("partial_input"));
rb_define_method(rb_cString, "encode", str_encode, -1);
rb_define_method(rb_cString, "encode!", str_encode_bang, -1);