зеркало из https://github.com/github/ruby.git
* include/ruby/encoding.h (rb_econv_result_t): moved from
transcode_data.h. (rb_econv_elem_t): ditto. (rb_econv_t): ditto. source_encoding and destination_encoding field is added. (rb_econv_open): declared. (rb_econv_convert): ditto. (rb_econv_close): ditto. * transcode.c (rb_econv_open_by_transcoder_entries): initialize source_encoding and destination_encoding field as NULL. (rb_econv_open): make it external linkage. (rb_econv_close): ditto. (rb_econv_convert): ditto. renamed from rb_econv_conv. (make_encoding): new function. (econv_init): use make_encoding and store rb_encoding* in rb_econv_t. (econv_source_encoding): new method Encoding::Converter#source_encoding. (econv_destination_encoding): new method Encoding::Converter#destination_encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18625 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
87779b507e
Коммит
c82aee31b4
24
ChangeLog
24
ChangeLog
|
@ -1,3 +1,27 @@
|
|||
Thu Aug 14 23:22:24 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* include/ruby/encoding.h (rb_econv_result_t): moved from
|
||||
transcode_data.h.
|
||||
(rb_econv_elem_t): ditto.
|
||||
(rb_econv_t): ditto. source_encoding and destination_encoding field
|
||||
is added.
|
||||
(rb_econv_open): declared.
|
||||
(rb_econv_convert): ditto.
|
||||
(rb_econv_close): ditto.
|
||||
|
||||
* transcode.c (rb_econv_open_by_transcoder_entries): initialize
|
||||
source_encoding and destination_encoding field as NULL.
|
||||
(rb_econv_open): make it external linkage.
|
||||
(rb_econv_close): ditto.
|
||||
(rb_econv_convert): ditto. renamed from rb_econv_conv.
|
||||
(make_encoding): new function.
|
||||
(econv_init): use make_encoding and store rb_encoding* in
|
||||
rb_econv_t.
|
||||
(econv_source_encoding): new method
|
||||
Encoding::Converter#source_encoding.
|
||||
(econv_destination_encoding): new method
|
||||
Encoding::Converter#destination_encoding.
|
||||
|
||||
Thu Aug 14 22:44:32 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode_data.h (rb_econv_result_t): change enumeration
|
||||
|
|
|
@ -196,4 +196,45 @@ rb_enc_dummy_p(rb_encoding *enc)
|
|||
|
||||
VALUE rb_str_transcode(VALUE str, VALUE to);
|
||||
|
||||
/* econv stuff */
|
||||
|
||||
typedef enum {
|
||||
econv_invalid_byte_sequence,
|
||||
econv_undefined_conversion,
|
||||
econv_destination_buffer_full,
|
||||
econv_source_buffer_empty,
|
||||
econv_finished,
|
||||
econv_output_followed_by_input,
|
||||
} rb_econv_result_t;
|
||||
|
||||
typedef struct {
|
||||
const char *from;
|
||||
const char *to;
|
||||
struct rb_transcoding *tc;
|
||||
unsigned char *out_buf_start;
|
||||
unsigned char *out_data_start;
|
||||
unsigned char *out_data_end;
|
||||
unsigned char *out_buf_end;
|
||||
rb_econv_result_t last_result;
|
||||
} rb_econv_elem_t;
|
||||
|
||||
typedef struct {
|
||||
rb_econv_elem_t *elems;
|
||||
int num_trans;
|
||||
int num_finished;
|
||||
struct rb_transcoding *last_tc;
|
||||
|
||||
/* The following fields are only for Encoding::Converter.
|
||||
* rb_econv_open set them NULL. */
|
||||
rb_encoding *source_encoding;
|
||||
rb_encoding *destination_encoding;
|
||||
} rb_econv_t;
|
||||
|
||||
rb_econv_t *rb_econv_open(const char *from, const char *to, int flags);
|
||||
rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
|
||||
const unsigned char **input_ptr, const unsigned char *input_stop,
|
||||
unsigned char **output_ptr, unsigned char *output_stop,
|
||||
int flags);
|
||||
void rb_econv_close(rb_econv_t *ec);
|
||||
|
||||
#endif /* RUBY_ENCODING_H */
|
||||
|
|
|
@ -25,6 +25,12 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP))
|
||||
end
|
||||
|
||||
def test_get_encoding
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
assert_equal(Encoding::UTF_8, ec.source_encoding)
|
||||
assert_equal(Encoding::EUC_JP, ec.destination_encoding)
|
||||
end
|
||||
|
||||
def test_output_region
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
|
||||
ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT)
|
||||
|
|
95
transcode.c
95
transcode.c
|
@ -678,6 +678,8 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
|
|||
ts->elems = ALLOC_N(rb_econv_elem_t, ts->num_trans);
|
||||
ts->num_finished = 0;
|
||||
ts->last_tc = NULL;
|
||||
ts->source_encoding = NULL;
|
||||
ts->destination_encoding = NULL;
|
||||
for (i = 0; i < ts->num_trans; i++) {
|
||||
const rb_transcoder *tr = load_transcoder_entry(entries[i]);
|
||||
ts->elems[i].from = tr->from_encoding;
|
||||
|
@ -720,7 +722,7 @@ trans_open_i(const char *from, const char *to, int depth, void *arg)
|
|||
entries[depth] = get_transcoder_entry(from, to);
|
||||
}
|
||||
|
||||
static rb_econv_t *
|
||||
rb_econv_t *
|
||||
rb_econv_open(const char *from, const char *to, int flags)
|
||||
{
|
||||
transcoder_entry_t **entries = NULL;
|
||||
|
@ -921,8 +923,8 @@ found_needreport:
|
|||
return econv_source_buffer_empty;
|
||||
}
|
||||
|
||||
static rb_econv_result_t
|
||||
rb_econv_conv(rb_econv_t *ts,
|
||||
rb_econv_result_t
|
||||
rb_econv_convert(rb_econv_t *ts,
|
||||
const unsigned char **input_ptr, const unsigned char *input_stop,
|
||||
unsigned char **output_ptr, unsigned char *output_stop,
|
||||
int flags)
|
||||
|
@ -940,7 +942,7 @@ rb_econv_conv(rb_econv_t *ts,
|
|||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
rb_econv_close(rb_econv_t *ts)
|
||||
{
|
||||
int i;
|
||||
|
@ -1049,7 +1051,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
max_output = last_tc->transcoder->max_output;
|
||||
|
||||
resume:
|
||||
ret = rb_econv_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
ret = rb_econv_convert(ts, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
if (ret == econv_invalid_byte_sequence) {
|
||||
/* deal with invalid byte sequence */
|
||||
/* todo: add more alternative behaviors */
|
||||
|
@ -1119,14 +1121,14 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
if (ret == econv_source_buffer_empty) {
|
||||
if (ptr < in_stop) {
|
||||
input_byte = *ptr;
|
||||
ret = rb_econv_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
|
||||
ret = rb_econv_convert(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
|
||||
}
|
||||
else {
|
||||
ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, 0);
|
||||
ret = rb_econv_convert(ts, NULL, NULL, out_pos, out_stop, 0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
|
||||
ret = rb_econv_convert(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
|
||||
}
|
||||
if (&input_byte != p)
|
||||
ptr += p - &input_byte;
|
||||
|
@ -1381,6 +1383,20 @@ econv_s_allocate(VALUE klass)
|
|||
return Data_Wrap_Struct(klass, NULL, econv_free, NULL);
|
||||
}
|
||||
|
||||
static rb_encoding *
|
||||
make_encoding(VALUE encoding)
|
||||
{
|
||||
int idx = rb_to_encoding_index(encoding);
|
||||
rb_encoding *enc;
|
||||
if (0 <= idx)
|
||||
enc = rb_enc_from_index(idx);
|
||||
else {
|
||||
idx = rb_define_dummy_encoding(StringValueCStr(encoding));
|
||||
enc = rb_enc_from_index(idx);
|
||||
}
|
||||
return enc;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* Encoding::Converter.new(source_encoding, destination_encoding)
|
||||
|
@ -1414,7 +1430,6 @@ econv_init(int argc, VALUE *argv, VALUE self)
|
|||
{
|
||||
VALUE source_encoding, destination_encoding, flags_v;
|
||||
rb_encoding *senc, *denc;
|
||||
const char *sname, *dname;
|
||||
rb_econv_t *ec;
|
||||
int flags;
|
||||
|
||||
|
@ -1425,35 +1440,21 @@ econv_init(int argc, VALUE *argv, VALUE self)
|
|||
else
|
||||
flags = NUM2INT(flags_v);
|
||||
|
||||
senc = NULL;
|
||||
if (TYPE(source_encoding) != T_STRING) {
|
||||
senc = rb_to_encoding(source_encoding);
|
||||
}
|
||||
|
||||
denc = NULL;
|
||||
if (TYPE(destination_encoding) != T_STRING) {
|
||||
denc = rb_to_encoding(destination_encoding);
|
||||
}
|
||||
|
||||
if (senc)
|
||||
sname = senc->name;
|
||||
else
|
||||
sname = RSTRING_PTR(source_encoding);
|
||||
|
||||
if (denc)
|
||||
dname = denc->name;
|
||||
else
|
||||
dname = RSTRING_PTR(destination_encoding);
|
||||
senc = make_encoding(source_encoding);
|
||||
denc = make_encoding(destination_encoding);
|
||||
|
||||
if (DATA_PTR(self)) {
|
||||
rb_raise(rb_eTypeError, "already initialized");
|
||||
}
|
||||
|
||||
ec = rb_econv_open(sname, dname, flags);
|
||||
ec = rb_econv_open(senc->name, denc->name, flags);
|
||||
if (!ec) {
|
||||
rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", sname, dname);
|
||||
rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", senc->name, denc->name);
|
||||
}
|
||||
|
||||
ec->source_encoding = senc;
|
||||
ec->destination_encoding = denc;
|
||||
|
||||
DATA_PTR(self) = ec;
|
||||
|
||||
return self;
|
||||
|
@ -1489,6 +1490,36 @@ check_econv(VALUE self)
|
|||
return DATA_PTR(self);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* source_encoding -> encoding
|
||||
*
|
||||
* returns source encoding as Encoding object.
|
||||
*/
|
||||
static VALUE
|
||||
econv_source_encoding(VALUE self)
|
||||
{
|
||||
rb_econv_t *ec = check_econv(self);
|
||||
if (!ec->source_encoding)
|
||||
return Qnil;
|
||||
return rb_enc_from_encoding(ec->source_encoding);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* destination_encoding -> encoding
|
||||
*
|
||||
* returns destination encoding as Encoding object.
|
||||
*/
|
||||
static VALUE
|
||||
econv_destination_encoding(VALUE self)
|
||||
{
|
||||
rb_econv_t *ec = check_econv(self);
|
||||
if (!ec->destination_encoding)
|
||||
return Qnil;
|
||||
return rb_enc_from_encoding(ec->destination_encoding);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
|
||||
|
@ -1612,7 +1643,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
|
|||
op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
|
||||
os = op + output_bytesize;
|
||||
|
||||
res = rb_econv_conv(ts, &ip, is, &op, os, flags);
|
||||
res = rb_econv_convert(ts, &ip, is, &op, os, flags);
|
||||
rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
|
||||
rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
|
||||
|
||||
|
@ -1647,6 +1678,8 @@ Init_transcode(void)
|
|||
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
|
||||
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
|
||||
rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
|
||||
rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);
|
||||
rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0);
|
||||
rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
|
||||
rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
|
||||
rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(OUTPUT_FOLLOWED_BY_INPUT));
|
||||
|
|
|
@ -111,33 +111,6 @@ struct rb_transcoder {
|
|||
int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
econv_invalid_byte_sequence,
|
||||
econv_undefined_conversion,
|
||||
econv_destination_buffer_full,
|
||||
econv_source_buffer_empty,
|
||||
econv_finished,
|
||||
econv_output_followed_by_input,
|
||||
} rb_econv_result_t;
|
||||
|
||||
typedef struct {
|
||||
const char *from;
|
||||
const char *to;
|
||||
rb_transcoding *tc;
|
||||
unsigned char *out_buf_start;
|
||||
unsigned char *out_data_start;
|
||||
unsigned char *out_data_end;
|
||||
unsigned char *out_buf_end;
|
||||
rb_econv_result_t last_result;
|
||||
} rb_econv_elem_t;
|
||||
|
||||
typedef struct {
|
||||
rb_econv_elem_t *elems;
|
||||
int num_trans;
|
||||
int num_finished;
|
||||
rb_transcoding *last_tc;
|
||||
} rb_econv_t;
|
||||
|
||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||
void rb_register_transcoder(const rb_transcoder *);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче