* include/ruby/encoding.h (rb_econv_decorate_at_first): declared.

(rb_econv_decorate_at_last): declared.

* transcode.c (rb_econv_open_by_transcoder_entries): initialize
  replacement_enc.  allocate outbuf for the last transcoder.
  (rb_econv_open0): extracted from rb_econv_open.
  (rb_econv_open): use rb_econv_open0 and decorate the result using
  rb_econv_decorate_at_first and rb_econv_decorate_at_last.
  (rb_econv_decorate_at): new function.
  (rb_econv_decorate_at_first): ditto.
  (rb_econv_decorate_at_last): ditto.
  (rb_econv_binmode): fix iteration end condition.
  (econv_init): don't set source_encoding_name and
  destination_encoding_name because they are set in rb_econv_open0.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19262 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-09-08 17:23:37 +00:00
Родитель 65110d6464
Коммит 795bf7a665
4 изменённых файлов: 170 добавлений и 57 удалений

Просмотреть файл

@ -1,3 +1,20 @@
Tue Sep 9 02:18:20 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/encoding.h (rb_econv_decorate_at_first): declared.
(rb_econv_decorate_at_last): declared.
* transcode.c (rb_econv_open_by_transcoder_entries): initialize
replacement_enc. allocate outbuf for the last transcoder.
(rb_econv_open0): extracted from rb_econv_open.
(rb_econv_open): use rb_econv_open0 and decorate the result using
rb_econv_decorate_at_first and rb_econv_decorate_at_last.
(rb_econv_decorate_at): new function.
(rb_econv_decorate_at_first): ditto.
(rb_econv_decorate_at_last): ditto.
(rb_econv_binmode): fix iteration end condition.
(econv_init): don't set source_encoding_name and
destination_encoding_name because they are set in rb_econv_open0.
Tue Sep 9 01:10:54 2008 Tanaka Akira <akr@fsij.org> Tue Sep 9 01:10:54 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (rb_econv_open): make ec a local variable. * transcode.c (rb_econv_open): make ec a local variable.

Просмотреть файл

@ -224,6 +224,10 @@ void rb_econv_close(rb_econv_t *ec);
/* result: 0:success -1:failure */ /* result: 0:success -1:failure */
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
/* result: 0:success -1:failure */
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name);
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name);
VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags); VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags);
/* result: 0:success -1:failure */ /* result: 0:success -1:failure */

Просмотреть файл

@ -68,13 +68,13 @@ class TestEncodingConverter < Test::Unit::TestCase
end end
def test_newline_converter_with_ascii_incompatible def test_newline_converter_with_ascii_incompatible
assert_raise(Encoding::NoConverter) { assert_nothing_raised {
Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER)
} }
assert_raise(Encoding::NoConverter) { assert_nothing_raised {
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CRLF_NEWLINE_ENCODER) Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CRLF_NEWLINE_ENCODER)
} }
assert_raise(Encoding::NoConverter) { assert_nothing_raised {
Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CR_NEWLINE_ENCODER) Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CR_NEWLINE_ENCODER)
} }

Просмотреть файл

@ -819,6 +819,7 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
ec->destination_encoding_name = NULL; ec->destination_encoding_name = NULL;
ec->replacement_str = NULL; ec->replacement_str = NULL;
ec->replacement_len = 0; ec->replacement_len = 0;
ec->replacement_enc = NULL;
ec->replacement_allocated = 0; ec->replacement_allocated = 0;
ec->in_buf_start = NULL; ec->in_buf_start = NULL;
ec->in_data_start = NULL; ec->in_data_start = NULL;
@ -851,7 +852,7 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
ec->last_tc = ec->elems[ec->num_trans-1].tc; ec->last_tc = ec->elems[ec->num_trans-1].tc;
ec->last_trans_index = ec->num_trans-1; ec->last_trans_index = ec->num_trans-1;
for (i = 0; i < ec->num_trans-1; i++) { for (i = 0; i < ec->num_trans; i++) {
int bufsize = 4096; int bufsize = 4096;
unsigned char *p; unsigned char *p;
p = xmalloc(bufsize); p = xmalloc(bufsize);
@ -881,7 +882,7 @@ trans_open_i(const char *sname, const char *dname, int depth, void *arg)
} }
rb_econv_t * rb_econv_t *
rb_econv_open(const char *sname, const char *dname, int ecflags) rb_econv_open0(const char *sname, const char *dname, int ecflags)
{ {
transcoder_entry_t **entries = NULL; transcoder_entry_t **entries = NULL;
int num_trans; int num_trans;
@ -891,41 +892,9 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
int sidx, didx; int sidx, didx;
int num_encoders, num_decoders; int num_encoders, num_decoders;
transcoder_entry_t *encoders[4], *decoders[1];
if ((ecflags & ECONV_CRLF_NEWLINE_ENCODER) &&
(ecflags & ECONV_CR_NEWLINE_ENCODER))
return NULL;
if ((ecflags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) &&
(ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER))
return NULL;
if ((ecflags & ECONV_XML_TEXT_ENCODER) &&
(ecflags & ECONV_XML_ATTR_CONTENT_ENCODER))
return NULL;
num_encoders = 0; num_encoders = 0;
if (ecflags & ECONV_CRLF_NEWLINE_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "crlf_newline")))
return NULL;
if (ecflags & ECONV_CR_NEWLINE_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "cr_newline")))
return NULL;
if (ecflags & ECONV_XML_TEXT_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-text-escaped")))
return NULL;
if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-content-escaped")))
return NULL;
if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-quoted")))
return NULL;
num_decoders = 0; num_decoders = 0;
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER)
if (!(decoders[num_decoders++] = get_transcoder_entry("universal_newline", "")))
return NULL;
senc = NULL; senc = NULL;
if (*sname) { if (*sname) {
@ -943,12 +912,6 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
} }
} }
if (*sname && (!senc || !rb_enc_asciicompat(senc)) && num_encoders)
return NULL;
if (*dname && (!denc || !rb_enc_asciicompat(denc)) && num_decoders)
return NULL;
if (*sname == '\0' && *dname == '\0') { if (*sname == '\0' && *dname == '\0') {
num_trans = 0; num_trans = 0;
entries = ALLOC_N(transcoder_entry_t *, num_encoders+num_decoders); entries = ALLOC_N(transcoder_entry_t *, num_encoders+num_decoders);
@ -966,10 +929,6 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
return NULL; return NULL;
} }
MEMMOVE(entries+num_encoders, entries, transcoder_entry_t *, num_trans);
MEMMOVE(entries, encoders, transcoder_entry_t *, num_encoders);
MEMMOVE(entries+num_encoders+num_trans, decoders, transcoder_entry_t *, num_decoders);
num_trans += num_encoders + num_decoders; num_trans += num_encoders + num_decoders;
ec = rb_econv_open_by_transcoder_entries(num_trans, entries); ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
@ -993,6 +952,69 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
return ec; return ec;
} }
rb_econv_t *
rb_econv_open(const char *sname, const char *dname, int ecflags)
{
rb_econv_t *ec;
int num_encoders, num_decoders;
const char *encoders[4], *decoders[1];
int i;
if ((ecflags & ECONV_CRLF_NEWLINE_ENCODER) &&
(ecflags & ECONV_CR_NEWLINE_ENCODER))
return NULL;
if ((ecflags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) &&
(ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER))
return NULL;
if ((ecflags & ECONV_XML_TEXT_ENCODER) &&
(ecflags & ECONV_XML_ATTR_CONTENT_ENCODER))
return NULL;
num_encoders = 0;
if (ecflags & ECONV_CRLF_NEWLINE_ENCODER)
if (!(encoders[num_encoders++] = "crlf_newline"))
return NULL;
if (ecflags & ECONV_CR_NEWLINE_ENCODER)
if (!(encoders[num_encoders++] = "cr_newline"))
return NULL;
if (ecflags & ECONV_XML_TEXT_ENCODER)
if (!(encoders[num_encoders++] = "xml-text-escaped"))
return NULL;
if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER)
if (!(encoders[num_encoders++] = "xml-attr-content-escaped"))
return NULL;
if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER)
if (!(encoders[num_encoders++] = "xml-attr-quoted"))
return NULL;
num_decoders = 0;
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER)
if (!(decoders[num_decoders++] = "universal_newline"))
return NULL;
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
if (!ec)
return NULL;
for (i = num_decoders-1; 0 <= i; i--)
if (rb_econv_decorate_at_last(ec, decoders[i]) == -1) {
rb_econv_close(ec);
return NULL;
}
for (i = num_encoders-1; 0 <= i; i--)
if (rb_econv_decorate_at_first(ec, encoders[i]) == -1) {
rb_econv_close(ec);
return NULL;
}
ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
return ec;
}
static int static int
trans_sweep(rb_econv_t *ec, trans_sweep(rb_econv_t *ec,
const unsigned char **input_ptr, const unsigned char *input_stop, const unsigned char **input_ptr, const unsigned char *input_stop,
@ -1741,12 +1763,90 @@ rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), Qnil, flags); return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), Qnil, flags);
} }
static int
rb_econv_decorate_at(rb_econv_t *ec, const char *decorator_name, int n)
{
transcoder_entry_t *entry;
const rb_transcoder *tr;
rb_transcoding *tc;
unsigned char *p;
int bufsize = 4096;
if (ec->num_finished != 0)
return -1;
entry = get_transcoder_entry("", decorator_name);
if (!entry)
entry = get_transcoder_entry(decorator_name, "");
if (!entry)
return -1;
tr = load_transcoder_entry(entry);
if (!entry)
return -1;
tc = rb_transcoding_open_by_transcoder(tr, 0);
REALLOC_N(ec->elems, rb_econv_elem_t, ec->num_trans+1);
MEMMOVE(ec->elems+n+1, ec->elems+n, rb_econv_elem_t, ec->num_trans-n);
ec->num_trans++;
if (n <= ec->last_trans_index)
ec->last_trans_index++;
ec->elems[n].tc = tc;
bufsize = 4096;
p = xmalloc(bufsize);
ec->elems[n].out_buf_start = p;
ec->elems[n].out_data_start = p;
ec->elems[n].out_data_end = p;
ec->elems[n].out_buf_end = p + bufsize;
ec->elems[n].last_result = econv_source_buffer_empty;
return 0;
}
int
rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
{
const rb_transcoder *tr;
if (ec->num_trans == 0)
return rb_econv_decorate_at(ec, decorator_name, 0);
tr = ec->elems[0].tc->transcoder;
if (!SUPPLEMENTAL_CONVERSION(tr->src_encoding, tr->dst_encoding) &&
tr->asciicompat_type == asciicompat_decoder)
return rb_econv_decorate_at(ec, decorator_name, 1);
return rb_econv_decorate_at(ec, decorator_name, 0);
}
int
rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
{
const rb_transcoder *tr;
if (ec->num_trans == 0)
return rb_econv_decorate_at(ec, decorator_name, 0);
tr = ec->elems[ec->num_trans-1].tc->transcoder;
if (!SUPPLEMENTAL_CONVERSION(tr->src_encoding, tr->dst_encoding) &&
tr->asciicompat_type == asciicompat_encoder)
return rb_econv_decorate_at(ec, decorator_name, ec->num_trans-1);
return rb_econv_decorate_at(ec, decorator_name, ec->num_trans);
}
void void
rb_econv_binmode(rb_econv_t *ec) rb_econv_binmode(rb_econv_t *ec)
{ {
const rb_transcoder *trs[3]; const rb_transcoder *trs[3];
int n, i, j; int n, i, j;
transcoder_entry_t *entry; transcoder_entry_t *entry;
int num_trans;
n = 0; n = 0;
if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECODER) { if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECODER) {
@ -1765,8 +1865,9 @@ rb_econv_binmode(rb_econv_t *ec)
trs[n++] = entry->transcoder; trs[n++] = entry->transcoder;
} }
num_trans = ec->num_trans;
j = 0; j = 0;
for (i = 0; i < ec->num_trans; i++) { for (i = 0; i < num_trans; i++) {
int k; int k;
for (k = 0; k < n; k++) for (k = 0; k < n; k++)
if (trs[k] == ec->elems[i].tc->transcoder) if (trs[k] == ec->elems[i].tc->transcoder)
@ -2660,15 +2761,6 @@ econv_init(int argc, VALUE *argv, VALUE self)
ec->source_encoding = senc; ec->source_encoding = senc;
ec->destination_encoding = denc; ec->destination_encoding = denc;
if (ec->last_tc) {
ec->source_encoding_name = ec->elems[0].tc->transcoder->src_encoding;
ec->destination_encoding_name = ec->last_tc->transcoder->dst_encoding;
}
else {
ec->source_encoding_name = "";
ec->destination_encoding_name = "";
}
DATA_PTR(self) = ec; DATA_PTR(self) = ec;
return self; return self;