* include/ruby/io.h (rb_io_t): new fields: writeconv,

writeconv_stateless and writeconv_initialized.
  (MakeOpenFile): initialize them.

* include/ruby/encoding.h (rb_econv_stateless_encoding): declared.
  (rb_econv_string): declared.

* io.c (make_writeconv): new function.
  (io_fwrite): use econv.
  (make_readconv): fix error message.
  (finish_writeconv): new function.
  (fptr_finalize): call finish_writeconv.
  (clear_writeconv): new function.
  (clear_codeconv): new function to call both clear_readconv and
  clear_writeconv.
  (rb_io_fptr_finalize): call clear_codeconv instead of
  clear_readconv.
  (mode_enc): ditto.
  (io_set_encoding): ditto.
  (argf_next_argv): ditto.
  (io_encoding_set): ditto.

* gc.c (gc_mark_children): mark writeconv_stateless in T_FILE.

* transcode.c (stateless_encoding_i): new function.
  (rb_econv_stateless_encoding): ditto.
  (rb_econv_string): ditto.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18691 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-08-18 12:06:42 +00:00
Родитель 89b4f06a59
Коммит 035d4816c3
7 изменённых файлов: 297 добавлений и 17 удалений

Просмотреть файл

@ -1,3 +1,33 @@
Mon Aug 18 21:02:08 2008 Tanaka Akira <akr@fsij.org>
* include/ruby/io.h (rb_io_t): new fields: writeconv,
writeconv_stateless and writeconv_initialized.
(MakeOpenFile): initialize them.
* include/ruby/encoding.h (rb_econv_stateless_encoding): declared.
(rb_econv_string): declared.
* io.c (make_writeconv): new function.
(io_fwrite): use econv.
(make_readconv): fix error message.
(finish_writeconv): new function.
(fptr_finalize): call finish_writeconv.
(clear_writeconv): new function.
(clear_codeconv): new function to call both clear_readconv and
clear_writeconv.
(rb_io_fptr_finalize): call clear_codeconv instead of
clear_readconv.
(mode_enc): ditto.
(io_set_encoding): ditto.
(argf_next_argv): ditto.
(io_encoding_set): ditto.
* gc.c (gc_mark_children): mark writeconv_stateless in T_FILE.
* transcode.c (stateless_encoding_i): new function.
(rb_econv_stateless_encoding): ditto.
(rb_econv_string): ditto.
Mon Aug 18 17:23:38 2008 Tanaka Akira <akr@fsij.org>
* io.c (clear_readconv): extracted from rb_io_fptr_finalize.

4
gc.c
Просмотреть файл

@ -1507,8 +1507,10 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
break;
case T_FILE:
if (obj->as.file.fptr)
if (obj->as.file.fptr) {
gc_mark(objspace, obj->as.file.fptr->tied_io_for_writing, lev);
gc_mark(objspace, obj->as.file.fptr->writeconv_stateless, lev);
}
break;
case T_REGEXP:

Просмотреть файл

@ -268,6 +268,11 @@ void rb_econv_check_error(rb_econv_t *ec);
int rb_econv_putbackable(rb_econv_t *ec);
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
/* returns corresponding stateless encoding, or NULL if not stateful. */
const char *rb_econv_stateless_encoding(const char *stateful_enc);
VALUE rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags);
/* flags for rb_econv_open */
#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x100
#define ECONV_CRLF_NEWLINE_ENCODER 0x200

Просмотреть файл

@ -63,6 +63,11 @@ typedef struct rb_io_t {
int crbuf_off;
int crbuf_len;
int crbuf_capa;
rb_econv_t *writeconv;
VALUE writeconv_stateless;
int writeconv_initialized;
} rb_io_t;
#define HAVE_RB_IO_T 1
@ -110,6 +115,9 @@ typedef struct rb_io_t {
fp->crbuf_off = 0;\
fp->crbuf_len = 0;\
fp->crbuf_capa = 0;\
fp->writeconv = NULL;\
fp->writeconv_stateless = Qnil;\
fp->writeconv_initialized = 0;\
fp->tied_io_for_writing = 0;\
fp->enc = 0;\
fp->enc2 = 0;\

150
io.c
Просмотреть файл

@ -689,6 +689,38 @@ rb_io_wait_writable(int f)
}
}
static void
make_writeconv(rb_io_t *fptr)
{
if (!fptr->writeconv_initialized) {
const char *senc, *denc;
fptr->writeconv_stateless = Qnil;
if (fptr->enc2) {
senc = fptr->enc->name;
denc = fptr->enc2->name;
}
else {
senc = rb_econv_stateless_encoding(fptr->enc->name);
if (senc) {
denc = fptr->enc->name;
fptr->writeconv_stateless = rb_str_new2(senc);
}
else {
denc = NULL;
}
}
if (senc) {
fptr->writeconv = rb_econv_open(senc, denc, 0);
if (!fptr->writeconv)
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", senc, denc);
}
else {
fptr->writeconv = NULL;
}
fptr->writeconv_initialized = 1;
}
}
/* writing functions */
static long
io_fwrite(VALUE str, rb_io_t *fptr)
@ -701,17 +733,18 @@ io_fwrite(VALUE str, rb_io_t *fptr)
* We must also transcode if two encodings were specified
*/
if (fptr->enc) {
/* transcode str before output */
/* the methods in transcode.c are static, so call indirectly */
/* Can't use encode! because puts writes a frozen newline */
make_writeconv(fptr);
if (fptr->enc2) {
str = rb_funcall(str, id_encode, 2,
rb_enc_from_encoding(fptr->enc2),
rb_enc_from_encoding(fptr->enc));
str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
}
else {
str = rb_funcall(str, id_encode, 1,
rb_enc_from_encoding(fptr->enc));
if (fptr->writeconv) {
str = rb_str_transcode(str, fptr->writeconv_stateless);
str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
}
else {
str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc));
}
}
}
@ -1394,7 +1427,7 @@ make_readconv(rb_io_t *fptr)
if (!fptr->readconv) {
fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
if (!fptr->readconv)
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc2->name, fptr->enc->name);
fptr->crbuf_off = 0;
fptr->crbuf_len = 0;
fptr->crbuf_capa = 1024;
@ -2844,10 +2877,78 @@ rb_io_set_close_on_exec(VALUE io, VALUE arg)
#define IS_PREP_STDIO(f) ((f)->mode & FMODE_PREP)
#define PREP_STDIO_NAME(f) ((f)->path)
static void
finish_writeconv(rb_io_t *fptr, int noraise)
{
unsigned char *ds, *dp, *de;
rb_econv_result_t res;
if (!fptr->wbuf) {
unsigned char buf[1024];
int r;
res = econv_destination_buffer_full;
while (res == econv_destination_buffer_full) {
ds = dp = buf;
de = buf + sizeof(buf);
res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
while (dp-ds) {
retry:
r = rb_write_internal(fptr->fd, ds, dp-ds);
if (r == dp-ds)
break;
if (0 <= r) {
ds += r;
}
if (rb_io_wait_writable(fptr->fd)) {
if (!noraise)
rb_io_check_closed(fptr);
else if (fptr->fd < 0)
return;
goto retry;
}
return;
}
if (!noraise) {
rb_econv_check_error(fptr->writeconv);
}
if (res == econv_invalid_byte_sequence ||
res == econv_undefined_conversion) {
break;
}
}
return;
}
res = econv_destination_buffer_full;
while (res == econv_destination_buffer_full) {
if (fptr->wbuf_len == fptr->wbuf_capa) {
io_fflush(fptr);
}
ds = dp = (unsigned char *)fptr->wbuf + fptr->wbuf_off + fptr->wbuf_len;
de = (unsigned char *)fptr->wbuf + fptr->wbuf_capa;
res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
fptr->wbuf_len += dp - ds;
if (!noraise) {
rb_econv_check_error(fptr->writeconv);
}
if (res == econv_invalid_byte_sequence ||
res == econv_undefined_conversion) {
break;
}
}
}
static void
fptr_finalize(rb_io_t *fptr, int noraise)
{
int ebadf = 0;
if (fptr->writeconv) {
finish_writeconv(fptr, noraise);
}
if (fptr->wbuf_len) {
io_fflush(fptr);
}
@ -2907,6 +3008,23 @@ clear_readconv(rb_io_t *fptr)
}
}
static void
clear_writeconv(rb_io_t *fptr)
{
if (fptr->writeconv) {
rb_econv_close(fptr->writeconv);
fptr->writeconv = NULL;
}
fptr->writeconv_initialized = 0;
}
static void
clear_codeconv(rb_io_t *fptr)
{
clear_readconv(fptr);
clear_writeconv(fptr);
}
int
rb_io_fptr_finalize(rb_io_t *fptr)
{
@ -2926,7 +3044,7 @@ rb_io_fptr_finalize(rb_io_t *fptr)
free(fptr->wbuf);
fptr->wbuf = 0;
}
clear_readconv(fptr);
clear_codeconv(fptr);
free(fptr);
return 1;
}
@ -3535,7 +3653,7 @@ mode_enc(rb_io_t *fptr, const char *estr)
fptr->enc = 0;
fptr->enc2 = 0;
clear_readconv(fptr);
clear_codeconv(fptr);
p0 = strrchr(estr, ':');
if (!p0) p1 = estr;
@ -4265,7 +4383,7 @@ io_set_encoding(VALUE io, VALUE opt)
GetOpenFile(io, fptr);
fptr->enc = 0;
fptr->enc2 = 0;
clear_readconv(fptr);
clear_codeconv(fptr);
if (!NIL_P(encoding)) {
rb_warn("Ignoring encoding parameter '%s': external_encoding is used",
RSTRING_PTR(encoding));
@ -5612,7 +5730,7 @@ argf_next_argv(VALUE argf)
GetOpenFile(current_file, fptr);
fptr->enc = argf_enc;
fptr->enc2 = argf_enc2;
clear_readconv(fptr);
clear_codeconv(fptr);
}
}
else {
@ -6340,13 +6458,13 @@ io_encoding_set(rb_io_t *fptr, int argc, VALUE v1, VALUE v2)
if (argc == 2) {
fptr->enc2 = rb_to_encoding(v1);
fptr->enc = rb_to_encoding(v2);
clear_readconv(fptr);
clear_codeconv(fptr);
}
else if (argc == 1) {
if (NIL_P(v1)) {
fptr->enc = 0;
fptr->enc2 = 0;
clear_readconv(fptr);
clear_codeconv(fptr);
}
else {
VALUE tmp = rb_check_string_type(v1);
@ -6356,7 +6474,7 @@ io_encoding_set(rb_io_t *fptr, int argc, VALUE v1, VALUE v2)
else {
fptr->enc = rb_to_encoding(v1);
fptr->enc2 = 0;
clear_readconv(fptr);
clear_codeconv(fptr);
}
}
}

Просмотреть файл

@ -601,5 +601,50 @@ EOT
}
end
def test_write_conversion_fixenc
with_pipe {|r, w|
w.set_encoding("iso-2022-jp:utf-8")
t = Thread.new { r.read.force_encoding("ascii-8bit") }
w << "\u3042"
w << "\u3044"
w.close
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
}
end
def test_write_conversion_anyenc_stateful
with_pipe {|r, w|
w.set_encoding("iso-2022-jp")
t = Thread.new { r.read.force_encoding("ascii-8bit") }
w << "\u3042"
w << "\x82\xa2".force_encoding("sjis")
w.close
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
}
end
def test_write_conversion_anyenc_stateless
with_pipe {|r, w|
w.set_encoding("euc-jp")
t = Thread.new { r.read.force_encoding("ascii-8bit") }
w << "\u3042"
w << "\x82\xa2".force_encoding("sjis")
w.close
assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), t.value)
}
end
def test_write_conversion_anyenc_stateful_nosync
with_pipe {|r, w|
w.sync = false
w.set_encoding("iso-2022-jp")
t = Thread.new { r.read.force_encoding("ascii-8bit") }
w << "\u3042"
w << "\x82\xa2".force_encoding("sjis")
w.close
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
}
end
end

Просмотреть файл

@ -1219,6 +1219,78 @@ rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
tc->readagain_len -= n;
}
struct stateless_encoding_t {
const char *stateless_enc;
const char *stateful_enc;
};
static int
stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
{
struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg;
st_table *table2 = (st_table *)val;
st_data_t v;
if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) {
transcoder_entry_t *entry = (transcoder_entry_t *)v;
const rb_transcoder *tr = load_transcoder_entry(entry);
if (tr && tr->stateful_type == stateful_encoder) {
data->stateless_enc = tr->from_encoding;
return ST_STOP;
}
}
return ST_CONTINUE;
}
const char *
rb_econv_stateless_encoding(const char *stateful_enc)
{
struct stateless_encoding_t data;
data.stateful_enc = stateful_enc;
data.stateless_enc = NULL;
st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data);
if (data.stateless_enc)
return data.stateless_enc;
return NULL;
}
VALUE
rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
{
unsigned const char *ss, *sp, *se;
unsigned char *ds, *dp, *de;
rb_econv_result_t res;
if (NIL_P(dst)) {
dst = rb_str_buf_new(len);
}
res = econv_destination_buffer_full;
while (res == econv_destination_buffer_full) {
long dlen = RSTRING_LEN(dst);
int max_output = ec->last_tc->transcoder->max_output;
if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
unsigned long new_capa = (unsigned long)dlen + len + max_output;
if (LONG_MAX < new_capa)
rb_raise(rb_eArgError, "too long string");
rb_str_resize(dst, new_capa);
rb_str_set_len(dst, dlen);
}
ss = sp = (const unsigned char *)RSTRING_PTR(src) + off;
se = ss + len;
ds = dp = (unsigned char *)RSTRING_PTR(dst) + dlen;
de = ds + rb_str_capacity(dst);
res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
off += sp - ss;
len -= sp - ss;
rb_str_set_len(dst, dlen + (dp - ds));
rb_econv_check_error(ec);
}
return dst;
}
static VALUE
make_econv_exception(rb_econv_t *ec)
{