* include/ruby/encoding.h (rb_econv_result_t): moved from

transcode_data.h.
  (rb_econv_elem_t): ditto.
  (rb_econv_t): ditto.  source_encoding and destination_encoding field
  is added.
  (rb_econv_open): declared.
  (rb_econv_convert): ditto.
  (rb_econv_close): ditto.

* transcode.c (rb_econv_open_by_transcoder_entries): initialize
  source_encoding and destination_encoding field as NULL.
  (rb_econv_open): make it external linkage.
  (rb_econv_close): ditto.
  (rb_econv_convert): ditto.  renamed from rb_econv_conv.
  (make_encoding): new function.
  (econv_init): use make_encoding and store rb_encoding* in
  rb_econv_t.
  (econv_source_encoding): new method
  Encoding::Converter#source_encoding.
  (econv_destination_encoding): new method
  Encoding::Converter#destination_encoding.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18625 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-08-14 14:28:10 +00:00
Родитель 87779b507e
Коммит c82aee31b4
5 изменённых файлов: 135 добавлений и 58 удалений

Просмотреть файл

@ -1,3 +1,27 @@
Thu Aug 14 23:22:24 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/encoding.h (rb_econv_result_t): moved from
transcode_data.h.
(rb_econv_elem_t): ditto.
(rb_econv_t): ditto. source_encoding and destination_encoding field
is added.
(rb_econv_open): declared.
(rb_econv_convert): ditto.
(rb_econv_close): ditto.
* transcode.c (rb_econv_open_by_transcoder_entries): initialize
source_encoding and destination_encoding field as NULL.
(rb_econv_open): make it external linkage.
(rb_econv_close): ditto.
(rb_econv_convert): ditto. renamed from rb_econv_conv.
(make_encoding): new function.
(econv_init): use make_encoding and store rb_encoding* in
rb_econv_t.
(econv_source_encoding): new method
Encoding::Converter#source_encoding.
(econv_destination_encoding): new method
Encoding::Converter#destination_encoding.
Thu Aug 14 22:44:32 2008 Tanaka Akira <akr@fsij.org>
* transcode_data.h (rb_econv_result_t): change enumeration

Просмотреть файл

@ -196,4 +196,45 @@ rb_enc_dummy_p(rb_encoding *enc)
VALUE rb_str_transcode(VALUE str, VALUE to);
/* econv stuff */
typedef enum {
econv_invalid_byte_sequence,
econv_undefined_conversion,
econv_destination_buffer_full,
econv_source_buffer_empty,
econv_finished,
econv_output_followed_by_input,
} rb_econv_result_t;
typedef struct {
const char *from;
const char *to;
struct rb_transcoding *tc;
unsigned char *out_buf_start;
unsigned char *out_data_start;
unsigned char *out_data_end;
unsigned char *out_buf_end;
rb_econv_result_t last_result;
} rb_econv_elem_t;
typedef struct {
rb_econv_elem_t *elems;
int num_trans;
int num_finished;
struct rb_transcoding *last_tc;
/* The following fields are only for Encoding::Converter.
* rb_econv_open set them NULL. */
rb_encoding *source_encoding;
rb_encoding *destination_encoding;
} rb_econv_t;
rb_econv_t *rb_econv_open(const char *from, const char *to, int flags);
rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
const unsigned char **input_ptr, const unsigned char *input_stop,
unsigned char **output_ptr, unsigned char *output_stop,
int flags);
void rb_econv_close(rb_econv_t *ec);
#endif /* RUBY_ENCODING_H */

Просмотреть файл

@ -25,6 +25,12 @@ class TestEncodingConverter < Test::Unit::TestCase
assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP))
end
def test_get_encoding
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
assert_equal(Encoding::UTF_8, ec.source_encoding)
assert_equal(Encoding::EUC_JP, ec.destination_encoding)
end
def test_output_region
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT)

Просмотреть файл

@ -678,6 +678,8 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
ts->elems = ALLOC_N(rb_econv_elem_t, ts->num_trans);
ts->num_finished = 0;
ts->last_tc = NULL;
ts->source_encoding = NULL;
ts->destination_encoding = NULL;
for (i = 0; i < ts->num_trans; i++) {
const rb_transcoder *tr = load_transcoder_entry(entries[i]);
ts->elems[i].from = tr->from_encoding;
@ -720,7 +722,7 @@ trans_open_i(const char *from, const char *to, int depth, void *arg)
entries[depth] = get_transcoder_entry(from, to);
}
static rb_econv_t *
rb_econv_t *
rb_econv_open(const char *from, const char *to, int flags)
{
transcoder_entry_t **entries = NULL;
@ -921,8 +923,8 @@ found_needreport:
return econv_source_buffer_empty;
}
static rb_econv_result_t
rb_econv_conv(rb_econv_t *ts,
rb_econv_result_t
rb_econv_convert(rb_econv_t *ts,
const unsigned char **input_ptr, const unsigned char *input_stop,
unsigned char **output_ptr, unsigned char *output_stop,
int flags)
@ -940,7 +942,7 @@ rb_econv_conv(rb_econv_t *ts,
return res;
}
static void
void
rb_econv_close(rb_econv_t *ts)
{
int i;
@ -1049,7 +1051,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
max_output = last_tc->transcoder->max_output;
resume:
ret = rb_econv_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
ret = rb_econv_convert(ts, in_pos, in_stop, out_pos, out_stop, opt);
if (ret == econv_invalid_byte_sequence) {
/* deal with invalid byte sequence */
/* todo: add more alternative behaviors */
@ -1119,14 +1121,14 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
if (ret == econv_source_buffer_empty) {
if (ptr < in_stop) {
input_byte = *ptr;
ret = rb_econv_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
ret = rb_econv_convert(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
}
else {
ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, 0);
ret = rb_econv_convert(ts, NULL, NULL, out_pos, out_stop, 0);
}
}
else {
ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
ret = rb_econv_convert(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
}
if (&input_byte != p)
ptr += p - &input_byte;
@ -1381,6 +1383,20 @@ econv_s_allocate(VALUE klass)
return Data_Wrap_Struct(klass, NULL, econv_free, NULL);
}
static rb_encoding *
make_encoding(VALUE encoding)
{
int idx = rb_to_encoding_index(encoding);
rb_encoding *enc;
if (0 <= idx)
enc = rb_enc_from_index(idx);
else {
idx = rb_define_dummy_encoding(StringValueCStr(encoding));
enc = rb_enc_from_index(idx);
}
return enc;
}
/*
* call-seq:
* Encoding::Converter.new(source_encoding, destination_encoding)
@ -1414,7 +1430,6 @@ econv_init(int argc, VALUE *argv, VALUE self)
{
VALUE source_encoding, destination_encoding, flags_v;
rb_encoding *senc, *denc;
const char *sname, *dname;
rb_econv_t *ec;
int flags;
@ -1425,35 +1440,21 @@ econv_init(int argc, VALUE *argv, VALUE self)
else
flags = NUM2INT(flags_v);
senc = NULL;
if (TYPE(source_encoding) != T_STRING) {
senc = rb_to_encoding(source_encoding);
}
denc = NULL;
if (TYPE(destination_encoding) != T_STRING) {
denc = rb_to_encoding(destination_encoding);
}
if (senc)
sname = senc->name;
else
sname = RSTRING_PTR(source_encoding);
if (denc)
dname = denc->name;
else
dname = RSTRING_PTR(destination_encoding);
senc = make_encoding(source_encoding);
denc = make_encoding(destination_encoding);
if (DATA_PTR(self)) {
rb_raise(rb_eTypeError, "already initialized");
}
ec = rb_econv_open(sname, dname, flags);
ec = rb_econv_open(senc->name, denc->name, flags);
if (!ec) {
rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", sname, dname);
rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", senc->name, denc->name);
}
ec->source_encoding = senc;
ec->destination_encoding = denc;
DATA_PTR(self) = ec;
return self;
@ -1489,6 +1490,36 @@ check_econv(VALUE self)
return DATA_PTR(self);
}
/*
* call-seq:
* source_encoding -> encoding
*
* returns source encoding as Encoding object.
*/
static VALUE
econv_source_encoding(VALUE self)
{
rb_econv_t *ec = check_econv(self);
if (!ec->source_encoding)
return Qnil;
return rb_enc_from_encoding(ec->source_encoding);
}
/*
* call-seq:
* destination_encoding -> encoding
*
* returns destination encoding as Encoding object.
*/
static VALUE
econv_destination_encoding(VALUE self)
{
rb_econv_t *ec = check_econv(self);
if (!ec->destination_encoding)
return Qnil;
return rb_enc_from_encoding(ec->destination_encoding);
}
/*
* call-seq:
* primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
@ -1612,7 +1643,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
os = op + output_bytesize;
res = rb_econv_conv(ts, &ip, is, &op, os, flags);
res = rb_econv_convert(ts, &ip, is, &op, os, flags);
rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
@ -1647,6 +1678,8 @@ Init_transcode(void)
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);
rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0);
rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(OUTPUT_FOLLOWED_BY_INPUT));

Просмотреть файл

@ -111,33 +111,6 @@ struct rb_transcoder {
int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
};
typedef enum {
econv_invalid_byte_sequence,
econv_undefined_conversion,
econv_destination_buffer_full,
econv_source_buffer_empty,
econv_finished,
econv_output_followed_by_input,
} rb_econv_result_t;
typedef struct {
const char *from;
const char *to;
rb_transcoding *tc;
unsigned char *out_buf_start;
unsigned char *out_data_start;
unsigned char *out_data_end;
unsigned char *out_buf_end;
rb_econv_result_t last_result;
} rb_econv_elem_t;
typedef struct {
rb_econv_elem_t *elems;
int num_trans;
int num_finished;
rb_transcoding *last_tc;
} rb_econv_t;
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
void rb_register_transcoder(const rb_transcoder *);