зеркало из https://github.com/github/ruby.git
* enc/trans/newline.trans: new file.
* transcode_data.h (rb_trans_t): add last_tc field. * transcode.c (UNIVERSAL_NEWLINE): defined. (CRLF_NEWLINE): defined. (CR_NEWLINE): defined. (rb_trans_open_by_transcoder_entries): initialize last_tc. (trans_open_i): allocate one more room for newline converter. (rb_trans_open): universal newline implemented. (more_output_buffer): take max_output argument instead ts. (output_replacement_character): take tc argument instead of ts. (transcode_loop): use last_tc field. (econv_init): add flags argument for rb_trans_open. (Init_transcode): Encoding::Converter::UNIVERSAL_NEWLINE defined. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18556 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
ab0ee1d5e9
Коммит
74a2a7bdbf
18
ChangeLog
18
ChangeLog
|
@ -1,3 +1,21 @@
|
|||
Wed Aug 13 14:22:16 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* enc/trans/newline.trans: new file.
|
||||
|
||||
* transcode_data.h (rb_trans_t): add last_tc field.
|
||||
|
||||
* transcode.c (UNIVERSAL_NEWLINE): defined.
|
||||
(CRLF_NEWLINE): defined.
|
||||
(CR_NEWLINE): defined.
|
||||
(rb_trans_open_by_transcoder_entries): initialize last_tc.
|
||||
(trans_open_i): allocate one more room for newline converter.
|
||||
(rb_trans_open): universal newline implemented.
|
||||
(more_output_buffer): take max_output argument instead ts.
|
||||
(output_replacement_character): take tc argument instead of ts.
|
||||
(transcode_loop): use last_tc field.
|
||||
(econv_init): add flags argument for rb_trans_open.
|
||||
(Init_transcode): Encoding::Converter::UNIVERSAL_NEWLINE defined.
|
||||
|
||||
Wed Aug 13 14:00:19 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* common.mk (parse.c): generates parse.h together.
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
#include "transcode_data.h"
|
||||
|
||||
<%
|
||||
map_normalize = {}
|
||||
map_normalize["{00-ff}"] = :func_so
|
||||
%>
|
||||
|
||||
<%= transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") %>
|
||||
|
||||
static int
|
||||
fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
||||
{
|
||||
int len;
|
||||
/*
|
||||
t->stateful[0] == 0 : normal
|
||||
t->stateful[0] == 1 : just after '\r'
|
||||
*/
|
||||
if (s[0] == '\n') {
|
||||
if (t->stateful[0] == 0) {
|
||||
o[0] = '\n';
|
||||
len = 1;
|
||||
}
|
||||
else {
|
||||
len = 0;
|
||||
}
|
||||
t->stateful[0] = 0;
|
||||
}
|
||||
else if (s[0] == '\r') {
|
||||
o[0] = '\n';
|
||||
len = 1;
|
||||
t->stateful[0] = 1;
|
||||
}
|
||||
else {
|
||||
o[0] = s[0];
|
||||
len = 1;
|
||||
t->stateful[0] = 0;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static const rb_transcoder
|
||||
rb_universal_newline = {
|
||||
"universal_newline", "", &universal_newline,
|
||||
1, /* input_unit_length */
|
||||
1, /* max_input */
|
||||
1, /* max_output */
|
||||
NULL, NULL, NULL, fun_so_universal_newline
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
Init_newline(void)
|
||||
{
|
||||
rb_register_transcoder(&rb_universal_newline);
|
||||
}
|
||||
|
|
@ -4,7 +4,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
def assert_econv(ret_expected, dst_expected, src_expected, to, from, src, opt={})
|
||||
opt[:obuf_len] ||= 100
|
||||
src = src.dup
|
||||
ec = Encoding::Converter.new(from, to)
|
||||
ec = Encoding::Converter.new(from, to, 0)
|
||||
dst = ''
|
||||
while true
|
||||
ret = ec.primitive_convert(src, dst2="", opt[:obuf_len], 0)
|
||||
|
@ -35,7 +35,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
end
|
||||
|
||||
def test_errors
|
||||
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
|
||||
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP", 0)
|
||||
src = "\xFF\xFE\x00A\xDC\x00"
|
||||
ret = ec.primitive_convert(src, dst="", 10, 0)
|
||||
assert_equal("", src)
|
||||
|
@ -50,4 +50,18 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
assert_equal("", dst)
|
||||
assert_equal(:finished, ret)
|
||||
end
|
||||
|
||||
def test_universal_newline
|
||||
ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE)
|
||||
ret = ec.primitive_convert(src="abc\r\ndef", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "abc\ndef"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="ghi\njkl", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "ghi\njkl"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="mno\rpqr", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "mno\npqr"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="stu\r", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "stu\n"], [ret, src, dst])
|
||||
ret = ec.primitive_convert(src="\nvwx", dst="", 50, Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_equal([:ibuf_empty, "", "vwx"], [ret, src, dst])
|
||||
end
|
||||
end
|
||||
|
|
67
transcode.c
67
transcode.c
|
@ -25,7 +25,10 @@ static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
|
|||
#define INVALID_REPLACE 0x2
|
||||
#define UNDEF_IGNORE 0x10
|
||||
#define UNDEF_REPLACE 0x20
|
||||
#define PARTIAL_INPUT 0x100
|
||||
#define PARTIAL_INPUT 0x100
|
||||
#define UNIVERSAL_NEWLINE 0x200
|
||||
#define CRLF_NEWLINE 0x400
|
||||
#define CR_NEWLINE 0x800
|
||||
|
||||
/*
|
||||
* Dispatch data and logic
|
||||
|
@ -646,6 +649,7 @@ rb_trans_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
|
|||
ts->num_trans = n;
|
||||
ts->elems = ALLOC_N(rb_trans_elem_t, ts->num_trans);
|
||||
ts->num_finished = 0;
|
||||
ts->last_tc = NULL;
|
||||
for (i = 0; i < ts->num_trans; i++) {
|
||||
const rb_transcoder *tr = load_transcoder_entry(entries[i]);
|
||||
ts->elems[i].from = tr->from_encoding;
|
||||
|
@ -657,6 +661,7 @@ rb_trans_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
|
|||
ts->elems[i].out_buf_end = NULL;
|
||||
ts->elems[i].last_result = transcode_ibuf_empty;
|
||||
}
|
||||
ts->last_tc = ts->elems[ts->num_trans-1].tc;
|
||||
|
||||
for (i = 0; i < ts->num_trans-1; i++) {
|
||||
int bufsize = 4096;
|
||||
|
@ -678,7 +683,7 @@ trans_open_i(const char *from, const char *to, int depth, void *arg)
|
|||
transcoder_entry_t **entries;
|
||||
|
||||
if (!*entries_ptr) {
|
||||
entries = ALLOC_N(transcoder_entry_t *, depth+1);
|
||||
entries = ALLOC_N(transcoder_entry_t *, depth+1+1);
|
||||
*entries_ptr = entries;
|
||||
}
|
||||
else {
|
||||
|
@ -699,7 +704,19 @@ rb_trans_open(const char *from, const char *to, int flags)
|
|||
if (num_trans < 0 || !entries)
|
||||
return NULL;
|
||||
|
||||
if (flags & UNIVERSAL_NEWLINE) {
|
||||
transcoder_entry_t *e = get_transcoder_entry("universal_newline", "");
|
||||
if (!e)
|
||||
return NULL;
|
||||
entries[num_trans++] = e;
|
||||
}
|
||||
|
||||
ts = rb_trans_open_by_transcoder_entries(num_trans, entries);
|
||||
|
||||
if (flags & UNIVERSAL_NEWLINE) {
|
||||
ts->last_tc = ts->elems[ts->num_trans-2].tc;
|
||||
}
|
||||
|
||||
return ts;
|
||||
}
|
||||
|
||||
|
@ -840,13 +857,13 @@ static void
|
|||
more_output_buffer(
|
||||
VALUE destination,
|
||||
unsigned char *(*resize_destination)(VALUE, int, int),
|
||||
rb_trans_t *ts,
|
||||
int max_output,
|
||||
unsigned char **out_start_ptr,
|
||||
unsigned char **out_pos,
|
||||
unsigned char **out_stop_ptr)
|
||||
{
|
||||
size_t len = (*out_pos - *out_start_ptr);
|
||||
size_t new_len = (len + ts->elems[ts->num_trans-1].tc->transcoder->max_output) * 2;
|
||||
size_t new_len = (len + max_output) * 2;
|
||||
*out_start_ptr = resize_destination(destination, len, new_len);
|
||||
*out_pos = *out_start_ptr + len;
|
||||
*out_stop_ptr = *out_start_ptr + new_len;
|
||||
|
@ -856,20 +873,18 @@ static void
|
|||
output_replacement_character(
|
||||
VALUE destination,
|
||||
unsigned char *(*resize_destination)(VALUE, int, int),
|
||||
rb_trans_t *ts,
|
||||
rb_transcoding *tc,
|
||||
unsigned char **out_start_ptr,
|
||||
unsigned char **out_pos,
|
||||
unsigned char **out_stop_ptr)
|
||||
|
||||
{
|
||||
rb_transcoding *tc;
|
||||
const rb_transcoder *tr;
|
||||
int max_output;
|
||||
rb_encoding *enc;
|
||||
const char *replacement;
|
||||
int len;
|
||||
|
||||
tc = ts->elems[ts->num_trans-1].tc;
|
||||
tr = tc->transcoder;
|
||||
max_output = tr->max_output;
|
||||
enc = rb_enc_find(tr->to_encoding);
|
||||
|
@ -893,12 +908,12 @@ output_replacement_character(
|
|||
|
||||
if (tr->resetstate_func) {
|
||||
if (*out_stop_ptr - *out_pos < max_output)
|
||||
more_output_buffer(destination, resize_destination, ts, out_start_ptr, out_pos, out_stop_ptr);
|
||||
more_output_buffer(destination, resize_destination, max_output, out_start_ptr, out_pos, out_stop_ptr);
|
||||
*out_pos += tr->resetstate_func(tc, *out_pos);
|
||||
}
|
||||
|
||||
if (*out_stop_ptr - *out_pos < max_output)
|
||||
more_output_buffer(destination, resize_destination, ts, out_start_ptr, out_pos, out_stop_ptr);
|
||||
more_output_buffer(destination, resize_destination, max_output, out_start_ptr, out_pos, out_stop_ptr);
|
||||
|
||||
replacement = get_replacement_character(enc, &len);
|
||||
|
||||
|
@ -919,6 +934,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
const int opt)
|
||||
{
|
||||
rb_trans_t *ts;
|
||||
rb_transcoding *last_tc;
|
||||
rb_trans_result_t ret;
|
||||
unsigned char *out_start = *out_pos;
|
||||
int max_output;
|
||||
|
@ -927,7 +943,8 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
if (!ts)
|
||||
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
|
||||
|
||||
max_output = ts->elems[ts->num_trans-1].tc->transcoder->max_output;
|
||||
last_tc = ts->last_tc;
|
||||
max_output = last_tc->transcoder->max_output;
|
||||
|
||||
resume:
|
||||
ret = rb_trans_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
|
@ -938,7 +955,7 @@ resume:
|
|||
goto resume;
|
||||
}
|
||||
else if (opt&INVALID_REPLACE) {
|
||||
output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
|
||||
goto resume;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
|
@ -952,14 +969,14 @@ resume:
|
|||
goto resume;
|
||||
}
|
||||
else if (opt&UNDEF_REPLACE) {
|
||||
output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
|
||||
goto resume;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
}
|
||||
if (ret == transcode_obuf_full) {
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
|
||||
goto resume;
|
||||
}
|
||||
|
||||
|
@ -978,6 +995,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
const int opt)
|
||||
{
|
||||
rb_trans_t *ts;
|
||||
rb_transcoding *last_tc;
|
||||
rb_trans_result_t ret;
|
||||
unsigned char *out_start = *out_pos;
|
||||
const unsigned char *ptr;
|
||||
|
@ -987,6 +1005,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
if (!ts)
|
||||
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
|
||||
|
||||
last_tc = ts->last_tc;
|
||||
max_output = ts->elems[ts->num_trans-1].tc->transcoder->max_output;
|
||||
|
||||
ret = transcode_ibuf_empty;
|
||||
|
@ -1017,7 +1036,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
}
|
||||
else if (opt&INVALID_REPLACE) {
|
||||
output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
|
||||
break;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
|
@ -1032,7 +1051,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
}
|
||||
else if (opt&UNDEF_REPLACE) {
|
||||
output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
|
||||
break;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
|
@ -1040,7 +1059,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
|
||||
case transcode_obuf_full:
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
|
||||
break;
|
||||
|
||||
case transcode_ibuf_empty:
|
||||
|
@ -1261,19 +1280,24 @@ econv_s_allocate(VALUE klass)
|
|||
}
|
||||
|
||||
static VALUE
|
||||
econv_init(VALUE self, VALUE from_encoding, VALUE to_encoding)
|
||||
econv_init(VALUE self, VALUE from_encoding, VALUE to_encoding, VALUE flags_v)
|
||||
{
|
||||
const char *from_e, *to_e;
|
||||
rb_trans_t *ts;
|
||||
int flags;
|
||||
|
||||
from_e = StringValueCStr(from_encoding);
|
||||
to_e = StringValueCStr(to_encoding);
|
||||
StringValue(from_encoding);
|
||||
StringValue(to_encoding);
|
||||
flags = NUM2INT(flags_v);
|
||||
|
||||
from_e = RSTRING_PTR(from_encoding);
|
||||
to_e = RSTRING_PTR(to_encoding);
|
||||
|
||||
if (DATA_PTR(self)) {
|
||||
rb_raise(rb_eTypeError, "already initialized");
|
||||
}
|
||||
|
||||
ts = rb_trans_open(from_e, to_e, 0);
|
||||
ts = rb_trans_open(from_e, to_e, flags);
|
||||
if (!ts) {
|
||||
rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", from_e, to_e);
|
||||
}
|
||||
|
@ -1363,8 +1387,9 @@ Init_transcode(void)
|
|||
|
||||
rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
|
||||
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
|
||||
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, 2);
|
||||
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, 3);
|
||||
rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, 4);
|
||||
rb_define_method(rb_cEncodingConverter, "max_output", econv_max_output, 0);
|
||||
rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
|
||||
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE", INT2FIX(UNIVERSAL_NEWLINE));
|
||||
}
|
||||
|
|
|
@ -122,6 +122,7 @@ typedef struct {
|
|||
rb_trans_elem_t *elems;
|
||||
int num_trans;
|
||||
int num_finished;
|
||||
rb_transcoding *last_tc;
|
||||
} rb_trans_t;
|
||||
|
||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||
|
|
Загрузка…
Ссылка в новой задаче