* include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed

from rb_econv_stateless_encoding to apply stateless ASCII
  incompatible encodings such as UTF-16BE.

* io.c (make_writeconv): use rb_econv_asciicompat_encoding.

* transcode_data.h (rb_transcoder_asciicompat_type_t): renamed from
  rb_transcoder_stateful_type_t.
  (rb_transcoder): use rb_transcoder_asciicompat_type_t.

* transcode.c: follow the type change.
  (asciicompat_encoding_i): renamed from stateless_encoding_i.
  (rb_econv_asciicompat_encoding): renamed from
  rb_econv_stateless_encoding.
  (econv_s_asciicompat_encoding): method renamed.

* tool/transcode-tblgen.rb: follow the type change.

* enc/trans/utf_16_32.trans: follow the type change.
  rb_from_UTF_16BE to UTF-8 is asciicompat_decoder.
  rb_from_UTF_16LE to UTF-8 is asciicompat_decoder.
  rb_from_UTF_32BE to UTF-8 is asciicompat_decoder.
  rb_from_UTF_32LE to UTF-8 is asciicompat_decoder.
  UTF-8 to rb_to_UTF_16BE is asciicompat_encoder.
  UTF-8 to rb_to_UTF_16LE is asciicompat_encoder.
  UTF-8 to rb_to_UTF_32BE is asciicompat_encoder.
  UTF-8 to rb_to_UTF_32LE is asciicompat_encoder.

* enc/trans/newline.trans: follow the type change.  universal newline
  decoder is asciicompat_converter.

* enc/trans/escape.trans: follow the type change.

* enc/trans/iso2022.trans: ditto.

* enc/trans/japanese.trans: ditto.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19249 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-09-08 14:33:17 +00:00
Родитель 52945b0d28
Коммит 6270ad5b7f
12 изменённых файлов: 127 добавлений и 79 удалений

Просмотреть файл

@ -1,3 +1,42 @@
Mon Sep 8 23:24:54 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed
from rb_econv_stateless_encoding to apply stateless ASCII
incompatible encodings such as UTF-16BE.
* io.c (make_writeconv): use rb_econv_asciicompat_encoding.
* transcode_data.h (rb_transcoder_asciicompat_type_t): renamed from
rb_transcoder_stateful_type_t.
(rb_transcoder): use rb_transcoder_asciicompat_type_t.
* transcode.c: follow the type change.
(asciicompat_encoding_i): renamed from stateless_encoding_i.
(rb_econv_asciicompat_encoding): renamed from
rb_econv_stateless_encoding.
(econv_s_asciicompat_encoding): method renamed.
* tool/transcode-tblgen.rb: follow the type change.
* enc/trans/utf_16_32.trans: follow the type change.
rb_from_UTF_16BE to UTF-8 is asciicompat_decoder.
rb_from_UTF_16LE to UTF-8 is asciicompat_decoder.
rb_from_UTF_32BE to UTF-8 is asciicompat_decoder.
rb_from_UTF_32LE to UTF-8 is asciicompat_decoder.
UTF-8 to rb_to_UTF_16BE is asciicompat_encoder.
UTF-8 to rb_to_UTF_16LE is asciicompat_encoder.
UTF-8 to rb_to_UTF_32BE is asciicompat_encoder.
UTF-8 to rb_to_UTF_32LE is asciicompat_encoder.
* enc/trans/newline.trans: follow the type change. universal newline
decoder is asciicompat_converter.
* enc/trans/escape.trans: follow the type change.
* enc/trans/iso2022.trans: ditto.
* enc/trans/japanese.trans: ditto.
Mon Sep 8 23:05:42 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (rb_econv_insert_output): "readagain" part should be

Просмотреть файл

@ -79,7 +79,7 @@ rb_escape_xml_attr_quote = {
1, /* input_unit_length */
1, /* max_input */
7, /* max_output */
stateful_encoder, /* stateful_type */
asciicompat_encoder, /* asciicompat_type */
1, escape_xml_attr_quote_init, escape_xml_attr_quote_init,
NULL, NULL, NULL, fun_so_escape_xml_attr_quote,
escape_xml_attr_quote_finish

Просмотреть файл

@ -114,7 +114,7 @@ rb_iso2022jp_decoder = {
1, /* input_unit_length */
3, /* max_input */
3, /* max_output */
stateful_decoder, /* stateful_type */
asciicompat_decoder, /* asciicompat_type */
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
NULL, fun_si_iso2022jp_decoder, NULL, fun_so_iso2022jp_decoder
};
@ -196,7 +196,7 @@ rb_iso2022jp_encoder = {
1, /* input_unit_length */
3, /* max_input */
5, /* max_output */
stateful_encoder, /* stateful_type */
asciicompat_encoder, /* asciicompat_type */
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_iso2022jp_encoder,
finish_iso2022jp_encoder,
@ -218,7 +218,7 @@ rb_stateless_iso2022jp_to_eucjp = {
1, /* input_unit_length */
3, /* max_input */
2, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_stateless_iso2022jp_to_eucjp,
};
@ -239,7 +239,7 @@ rb_eucjp_to_stateless_iso2022jp = {
1, /* input_unit_length */
3, /* max_input */
3, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_eucjp_to_stateless_iso2022jp,
};

Просмотреть файл

@ -73,7 +73,7 @@ rb_eucjp2sjis = {
1, /* input_unit_length */
3, /* max_input */
2, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_eucjp2sjis
};
@ -85,7 +85,7 @@ rb_sjis2eucjp = {
1, /* input_unit_length */
2, /* max_input */
2, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_sjis2eucjp
};

Просмотреть файл

@ -92,7 +92,7 @@ rb_universal_newline = {
1, /* input_unit_length */
1, /* max_input */
1, /* max_output */
stateful_decoder, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_universal_newline,
universal_newline_finish
@ -105,7 +105,7 @@ rb_crlf_newline = {
1, /* input_unit_length */
1, /* max_input */
2, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL
};
@ -117,7 +117,7 @@ rb_cr_newline = {
1, /* input_unit_length */
1, /* max_input */
1, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL
};

Просмотреть файл

@ -266,7 +266,7 @@ rb_from_UTF_16BE = {
2, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16be
};
@ -278,7 +278,7 @@ rb_to_UTF_16BE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16be
};
@ -290,7 +290,7 @@ rb_from_UTF_16LE = {
2, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16le
};
@ -302,7 +302,7 @@ rb_to_UTF_16LE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16le
};
@ -314,7 +314,7 @@ rb_from_UTF_32BE = {
4, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32be
};
@ -326,7 +326,7 @@ rb_to_UTF_32BE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32be
};
@ -338,7 +338,7 @@ rb_from_UTF_32LE = {
4, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32le
};
@ -350,7 +350,7 @@ rb_to_UTF_32LE = {
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32le
};

Просмотреть файл

@ -239,8 +239,9 @@ void rb_econv_check_error(rb_econv_t *ec);
int rb_econv_putbackable(rb_econv_t *ec);
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
/* returns corresponding stateless encoding, or NULL if not stateful. */
const char *rb_econv_stateless_encoding(const char *stateful_enc);
/* returns the corresponding ASCII compatible encoding for encname,
* or NULL if encname is not ASCII incompatible encoding. */
const char *rb_econv_asciicompat_encoding(const char *encname);
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags);
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags);

2
io.c
Просмотреть файл

@ -713,7 +713,7 @@ make_writeconv(rb_io_t *fptr)
}
else {
enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
senc = rb_econv_stateless_encoding(enc->name);
senc = rb_econv_asciicompat_encoding(enc->name);
if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) {
/* single conversion */
fptr->writeconv_pre_ecflags = ecflags;

Просмотреть файл

@ -27,20 +27,24 @@ class TestEncodingConverter < Test::Unit::TestCase
ec.primitive_errinfo)
end
def test_s_stateless_encoding
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding("ISO-2022-JP"))
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding(Encoding::ISO_2022_JP))
assert_nil(Encoding::Converter.stateless_encoding("EUC-JP"))
assert_nil(Encoding::Converter.stateless_encoding("UTF-8"))
assert_nil(Encoding::Converter.stateless_encoding("UTF-16BE"))
assert_nil(Encoding::Converter.stateless_encoding(Encoding::UTF_8))
assert_nil(Encoding::Converter.stateless_encoding("xml-attr-escaped"))
def test_s_asciicompat_encoding
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP"))
assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE"))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE"))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE"))
assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE"))
assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP"))
assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8"))
assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8))
assert_nil(Encoding::Converter.asciicompat_encoding("xml-attr-escaped"))
assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist"))
end
def test_stateless_encoding_iso2022jp
slenc = Encoding::Converter.stateless_encoding("ISO-2022-JP")
def test_asciicompat_encoding_iso2022jp
acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP")
str = "\e$B~~\(B".force_encoding("iso-2022-jp")
str2 = str.encode(slenc)
str2 = str.encode(acenc)
str3 = str.encode("ISO-2022-JP")
assert_equal(str, str3)
end

Просмотреть файл

@ -634,7 +634,7 @@ static const rb_transcoder
#{input_unit_length}, /* input_unit_length */
#{max_input}, /* max_input */
#{max_output}, /* max_output */
stateless_converter, /* stateful_type */
asciicompat_converter, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, NULL,
NULL, NULL, NULL

Просмотреть файл

@ -1414,7 +1414,7 @@ rb_econv_encoding_to_insert_output(rb_econv_t *ec)
tr = tc->transcoder;
if (tr->stateful_type == stateful_encoder)
if (tr->asciicompat_type == asciicompat_encoder)
return tr->src_encoding;
return tr->dst_encoding;
}
@ -1528,7 +1528,7 @@ rb_econv_insert_output(rb_econv_t *ec,
data_end_p = &ec->in_data_end;
buf_end_p = &ec->in_buf_end;
}
else if (tc->transcoder->stateful_type == stateful_encoder) {
else if (tc->transcoder->asciicompat_type == asciicompat_encoder) {
need += tc->readagain_len;
if (need < insert_len)
goto fail;
@ -1580,7 +1580,7 @@ rb_econv_insert_output(rb_econv_t *ec,
memcpy(*data_end_p, insert_str, insert_len);
*data_end_p += insert_len;
if (tc && tc->transcoder->stateful_type == stateful_encoder) {
if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
*data_end_p += tc->readagain_len;
tc->readagain_len = 0;
@ -1633,27 +1633,31 @@ rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
tc->readagain_len -= n;
}
struct stateless_encoding_t {
const char *stateless_enc;
const char *stateful_enc;
struct asciicompat_encoding_t {
const char *ascii_compat_name;
const char *ascii_incompat_name;
};
static int
stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
{
struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg;
struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg;
st_table *table2 = (st_table *)val;
st_data_t v;
if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) {
if (st_lookup(table2, (st_data_t)data->ascii_incompat_name, &v)) {
transcoder_entry_t *entry = (transcoder_entry_t *)v;
const rb_transcoder *tr;
if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) {
if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname))
return ST_CONTINUE;
}
tr = load_transcoder_entry(entry);
if (tr && tr->stateful_type == stateful_encoder) {
data->stateless_enc = tr->src_encoding;
if (tr && tr->asciicompat_type == asciicompat_encoder) {
/*
* Assumption:
* There is only one transcoder for
* converting to ASCII incompatible encoding.
*/
data->ascii_compat_name = tr->src_encoding;
return ST_STOP;
}
}
@ -1661,14 +1665,14 @@ stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
}
const char *
rb_econv_stateless_encoding(const char *stateful_enc)
rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
{
struct stateless_encoding_t data;
data.stateful_enc = stateful_enc;
data.stateless_enc = NULL;
st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data);
if (data.stateless_enc)
return data.stateless_enc;
struct asciicompat_encoding_t data;
data.ascii_incompat_name = ascii_incompat_name;
data.ascii_compat_name = NULL;
st_foreach(transcoder_table, asciicompat_encoding_i, (st_data_t)&data);
if (data.ascii_compat_name)
return data.ascii_compat_name;
return NULL;
}
@ -2510,42 +2514,42 @@ make_dummy_encoding(const char *name)
/*
* call-seq:
* Encoding::Converter.stateless_encoding(string) => encoding or nil
* Encoding::Converter.stateless_encoding(encoding) => encoding or nil
* Encoding::Converter.asciicompat_encoding(string) => encoding or nil
* Encoding::Converter.asciicompat_encoding(encoding) => encoding or nil
*
* returns the corresponding stateless encoding.
* returns the corresponding ASCII compatible encoding.
*
* It returns nil if the argument is not a stateful encoding.
* It returns nil if the argument is an ASCII compatible encoding.
*
* "corresponding stateless encoding" is a stateless encoding which
* represents same characters in the statefull encoding.
* "corresponding ASCII compatible encoding" is a ASCII compatible encoding which
* represents same characters in the given ASCII incompatible encoding.
*
* So, no conversion undefined error occur between the stateful encoding and the stateless encoding.
*
* For ISO-2022-JP, the dedicated stateless encoding, stateless-ISO-2022-JP, is defined.
* So, no conversion undefined error occur between the ASCII compatible and incompatible encoding.
*
* Encoding::Converter.stateless_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
* Encoding::Converter.stateless_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
* Encoding::Converter.stateless_encoding("UTF-8") #=> nil
*
*/
static VALUE
econv_s_stateless_encoding(VALUE klass, VALUE arg)
econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
{
const char *stateful_name, *stateless_name;
rb_encoding *stateful_enc, *stateless_enc;
const char *arg_name, *result_name;
rb_encoding *arg_enc, *result_enc;
enc_arg(arg, &stateful_name, &stateful_enc);
enc_arg(arg, &arg_name, &arg_enc);
stateless_name = rb_econv_stateless_encoding(stateful_name);
result_name = rb_econv_asciicompat_encoding(arg_name);
if (stateless_name == NULL)
if (result_name == NULL)
return Qnil;
stateless_enc = rb_enc_find(stateless_name);
result_enc = rb_enc_find(result_name);
if (!stateless_enc)
stateless_enc = make_dummy_encoding(stateless_name);
if (!result_enc)
result_enc = make_dummy_encoding(result_name);
return rb_enc_from_encoding(stateless_enc);
return rb_enc_from_encoding(result_enc);
}
/*
@ -3563,7 +3567,7 @@ Init_transcode(void)
rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
rb_define_singleton_method(rb_cEncodingConverter, "stateless_encoding", econv_s_stateless_encoding, 1);
rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);

Просмотреть файл

@ -57,11 +57,11 @@
#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
typedef enum {
stateless_converter, /* stateless -> stateless */
stateful_decoder, /* stateful -> stateless */
stateful_encoder /* stateless -> stateful */
/* stateful -> stateful is intentionally ommitted. */
} rb_transcoder_stateful_type_t;
asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */
asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */
asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */
/* ASCII-incompatible -> ASCII-incompatible is intentionally ommitted. */
} rb_transcoder_asciicompat_type_t;
typedef struct rb_transcoder rb_transcoder;
@ -78,7 +78,7 @@ struct rb_transcoder {
int input_unit_length;
int max_input;
int max_output;
rb_transcoder_stateful_type_t stateful_type;
rb_transcoder_asciicompat_type_t asciicompat_type;
size_t state_size;
int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */
int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */