зеркало из https://github.com/github/ruby.git
* include/ruby/io.h (rb_io_t): new fields: writeconv,
writeconv_stateless and writeconv_initialized. (MakeOpenFile): initialize them. * include/ruby/encoding.h (rb_econv_stateless_encoding): declared. (rb_econv_string): declared. * io.c (make_writeconv): new function. (io_fwrite): use econv. (make_readconv): fix error message. (finish_writeconv): new function. (fptr_finalize): call finish_writeconv. (clear_writeconv): new function. (clear_codeconv): new function to call both clear_readconv and clear_writeconv. (rb_io_fptr_finalize): call clear_codeconv instead of clear_readconv. (mode_enc): ditto. (io_set_encoding): ditto. (argf_next_argv): ditto. (io_encoding_set): ditto. * gc.c (gc_mark_children): mark writeconv_stateless in T_FILE. * transcode.c (stateless_encoding_i): new function. (rb_econv_stateless_encoding): ditto. (rb_econv_string): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18691 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
89b4f06a59
Коммит
035d4816c3
30
ChangeLog
30
ChangeLog
|
@ -1,3 +1,33 @@
|
|||
Mon Aug 18 21:02:08 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* include/ruby/io.h (rb_io_t): new fields: writeconv,
|
||||
writeconv_stateless and writeconv_initialized.
|
||||
(MakeOpenFile): initialize them.
|
||||
|
||||
* include/ruby/encoding.h (rb_econv_stateless_encoding): declared.
|
||||
(rb_econv_string): declared.
|
||||
|
||||
* io.c (make_writeconv): new function.
|
||||
(io_fwrite): use econv.
|
||||
(make_readconv): fix error message.
|
||||
(finish_writeconv): new function.
|
||||
(fptr_finalize): call finish_writeconv.
|
||||
(clear_writeconv): new function.
|
||||
(clear_codeconv): new function to call both clear_readconv and
|
||||
clear_writeconv.
|
||||
(rb_io_fptr_finalize): call clear_codeconv instead of
|
||||
clear_readconv.
|
||||
(mode_enc): ditto.
|
||||
(io_set_encoding): ditto.
|
||||
(argf_next_argv): ditto.
|
||||
(io_encoding_set): ditto.
|
||||
|
||||
* gc.c (gc_mark_children): mark writeconv_stateless in T_FILE.
|
||||
|
||||
* transcode.c (stateless_encoding_i): new function.
|
||||
(rb_econv_stateless_encoding): ditto.
|
||||
(rb_econv_string): ditto.
|
||||
|
||||
Mon Aug 18 17:23:38 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* io.c (clear_readconv): extracted from rb_io_fptr_finalize.
|
||||
|
|
4
gc.c
4
gc.c
|
@ -1507,8 +1507,10 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
|
|||
break;
|
||||
|
||||
case T_FILE:
|
||||
if (obj->as.file.fptr)
|
||||
if (obj->as.file.fptr) {
|
||||
gc_mark(objspace, obj->as.file.fptr->tied_io_for_writing, lev);
|
||||
gc_mark(objspace, obj->as.file.fptr->writeconv_stateless, lev);
|
||||
}
|
||||
break;
|
||||
|
||||
case T_REGEXP:
|
||||
|
|
|
@ -268,6 +268,11 @@ void rb_econv_check_error(rb_econv_t *ec);
|
|||
int rb_econv_putbackable(rb_econv_t *ec);
|
||||
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
|
||||
|
||||
/* returns corresponding stateless encoding, or NULL if not stateful. */
|
||||
const char *rb_econv_stateless_encoding(const char *stateful_enc);
|
||||
|
||||
VALUE rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags);
|
||||
|
||||
/* flags for rb_econv_open */
|
||||
#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x100
|
||||
#define ECONV_CRLF_NEWLINE_ENCODER 0x200
|
||||
|
|
|
@ -63,6 +63,11 @@ typedef struct rb_io_t {
|
|||
int crbuf_off;
|
||||
int crbuf_len;
|
||||
int crbuf_capa;
|
||||
|
||||
rb_econv_t *writeconv;
|
||||
VALUE writeconv_stateless;
|
||||
int writeconv_initialized;
|
||||
|
||||
} rb_io_t;
|
||||
|
||||
#define HAVE_RB_IO_T 1
|
||||
|
@ -110,6 +115,9 @@ typedef struct rb_io_t {
|
|||
fp->crbuf_off = 0;\
|
||||
fp->crbuf_len = 0;\
|
||||
fp->crbuf_capa = 0;\
|
||||
fp->writeconv = NULL;\
|
||||
fp->writeconv_stateless = Qnil;\
|
||||
fp->writeconv_initialized = 0;\
|
||||
fp->tied_io_for_writing = 0;\
|
||||
fp->enc = 0;\
|
||||
fp->enc2 = 0;\
|
||||
|
|
150
io.c
150
io.c
|
@ -689,6 +689,38 @@ rb_io_wait_writable(int f)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
make_writeconv(rb_io_t *fptr)
|
||||
{
|
||||
if (!fptr->writeconv_initialized) {
|
||||
const char *senc, *denc;
|
||||
fptr->writeconv_stateless = Qnil;
|
||||
if (fptr->enc2) {
|
||||
senc = fptr->enc->name;
|
||||
denc = fptr->enc2->name;
|
||||
}
|
||||
else {
|
||||
senc = rb_econv_stateless_encoding(fptr->enc->name);
|
||||
if (senc) {
|
||||
denc = fptr->enc->name;
|
||||
fptr->writeconv_stateless = rb_str_new2(senc);
|
||||
}
|
||||
else {
|
||||
denc = NULL;
|
||||
}
|
||||
}
|
||||
if (senc) {
|
||||
fptr->writeconv = rb_econv_open(senc, denc, 0);
|
||||
if (!fptr->writeconv)
|
||||
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", senc, denc);
|
||||
}
|
||||
else {
|
||||
fptr->writeconv = NULL;
|
||||
}
|
||||
fptr->writeconv_initialized = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* writing functions */
|
||||
static long
|
||||
io_fwrite(VALUE str, rb_io_t *fptr)
|
||||
|
@ -701,17 +733,18 @@ io_fwrite(VALUE str, rb_io_t *fptr)
|
|||
* We must also transcode if two encodings were specified
|
||||
*/
|
||||
if (fptr->enc) {
|
||||
/* transcode str before output */
|
||||
/* the methods in transcode.c are static, so call indirectly */
|
||||
/* Can't use encode! because puts writes a frozen newline */
|
||||
make_writeconv(fptr);
|
||||
if (fptr->enc2) {
|
||||
str = rb_funcall(str, id_encode, 2,
|
||||
rb_enc_from_encoding(fptr->enc2),
|
||||
rb_enc_from_encoding(fptr->enc));
|
||||
str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
|
||||
}
|
||||
else {
|
||||
str = rb_funcall(str, id_encode, 1,
|
||||
rb_enc_from_encoding(fptr->enc));
|
||||
if (fptr->writeconv) {
|
||||
str = rb_str_transcode(str, fptr->writeconv_stateless);
|
||||
str = rb_econv_string(fptr->writeconv, str, 0, RSTRING_LEN(str), Qnil, ECONV_PARTIAL_INPUT);
|
||||
}
|
||||
else {
|
||||
str = rb_str_transcode(str, rb_enc_from_encoding(fptr->enc));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1394,7 +1427,7 @@ make_readconv(rb_io_t *fptr)
|
|||
if (!fptr->readconv) {
|
||||
fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
|
||||
if (!fptr->readconv)
|
||||
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
|
||||
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc2->name, fptr->enc->name);
|
||||
fptr->crbuf_off = 0;
|
||||
fptr->crbuf_len = 0;
|
||||
fptr->crbuf_capa = 1024;
|
||||
|
@ -2844,10 +2877,78 @@ rb_io_set_close_on_exec(VALUE io, VALUE arg)
|
|||
#define IS_PREP_STDIO(f) ((f)->mode & FMODE_PREP)
|
||||
#define PREP_STDIO_NAME(f) ((f)->path)
|
||||
|
||||
static void
|
||||
finish_writeconv(rb_io_t *fptr, int noraise)
|
||||
{
|
||||
unsigned char *ds, *dp, *de;
|
||||
rb_econv_result_t res;
|
||||
|
||||
if (!fptr->wbuf) {
|
||||
unsigned char buf[1024];
|
||||
int r;
|
||||
|
||||
res = econv_destination_buffer_full;
|
||||
while (res == econv_destination_buffer_full) {
|
||||
ds = dp = buf;
|
||||
de = buf + sizeof(buf);
|
||||
res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
|
||||
while (dp-ds) {
|
||||
retry:
|
||||
r = rb_write_internal(fptr->fd, ds, dp-ds);
|
||||
if (r == dp-ds)
|
||||
break;
|
||||
if (0 <= r) {
|
||||
ds += r;
|
||||
}
|
||||
if (rb_io_wait_writable(fptr->fd)) {
|
||||
if (!noraise)
|
||||
rb_io_check_closed(fptr);
|
||||
else if (fptr->fd < 0)
|
||||
return;
|
||||
goto retry;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!noraise) {
|
||||
rb_econv_check_error(fptr->writeconv);
|
||||
}
|
||||
if (res == econv_invalid_byte_sequence ||
|
||||
res == econv_undefined_conversion) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
res = econv_destination_buffer_full;
|
||||
while (res == econv_destination_buffer_full) {
|
||||
if (fptr->wbuf_len == fptr->wbuf_capa) {
|
||||
io_fflush(fptr);
|
||||
}
|
||||
|
||||
ds = dp = (unsigned char *)fptr->wbuf + fptr->wbuf_off + fptr->wbuf_len;
|
||||
de = (unsigned char *)fptr->wbuf + fptr->wbuf_capa;
|
||||
res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
|
||||
fptr->wbuf_len += dp - ds;
|
||||
if (!noraise) {
|
||||
rb_econv_check_error(fptr->writeconv);
|
||||
}
|
||||
if (res == econv_invalid_byte_sequence ||
|
||||
res == econv_undefined_conversion) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
fptr_finalize(rb_io_t *fptr, int noraise)
|
||||
{
|
||||
int ebadf = 0;
|
||||
if (fptr->writeconv) {
|
||||
finish_writeconv(fptr, noraise);
|
||||
}
|
||||
if (fptr->wbuf_len) {
|
||||
io_fflush(fptr);
|
||||
}
|
||||
|
@ -2907,6 +3008,23 @@ clear_readconv(rb_io_t *fptr)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
clear_writeconv(rb_io_t *fptr)
|
||||
{
|
||||
if (fptr->writeconv) {
|
||||
rb_econv_close(fptr->writeconv);
|
||||
fptr->writeconv = NULL;
|
||||
}
|
||||
fptr->writeconv_initialized = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
clear_codeconv(rb_io_t *fptr)
|
||||
{
|
||||
clear_readconv(fptr);
|
||||
clear_writeconv(fptr);
|
||||
}
|
||||
|
||||
int
|
||||
rb_io_fptr_finalize(rb_io_t *fptr)
|
||||
{
|
||||
|
@ -2926,7 +3044,7 @@ rb_io_fptr_finalize(rb_io_t *fptr)
|
|||
free(fptr->wbuf);
|
||||
fptr->wbuf = 0;
|
||||
}
|
||||
clear_readconv(fptr);
|
||||
clear_codeconv(fptr);
|
||||
free(fptr);
|
||||
return 1;
|
||||
}
|
||||
|
@ -3535,7 +3653,7 @@ mode_enc(rb_io_t *fptr, const char *estr)
|
|||
|
||||
fptr->enc = 0;
|
||||
fptr->enc2 = 0;
|
||||
clear_readconv(fptr);
|
||||
clear_codeconv(fptr);
|
||||
|
||||
p0 = strrchr(estr, ':');
|
||||
if (!p0) p1 = estr;
|
||||
|
@ -4265,7 +4383,7 @@ io_set_encoding(VALUE io, VALUE opt)
|
|||
GetOpenFile(io, fptr);
|
||||
fptr->enc = 0;
|
||||
fptr->enc2 = 0;
|
||||
clear_readconv(fptr);
|
||||
clear_codeconv(fptr);
|
||||
if (!NIL_P(encoding)) {
|
||||
rb_warn("Ignoring encoding parameter '%s': external_encoding is used",
|
||||
RSTRING_PTR(encoding));
|
||||
|
@ -5612,7 +5730,7 @@ argf_next_argv(VALUE argf)
|
|||
GetOpenFile(current_file, fptr);
|
||||
fptr->enc = argf_enc;
|
||||
fptr->enc2 = argf_enc2;
|
||||
clear_readconv(fptr);
|
||||
clear_codeconv(fptr);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -6340,13 +6458,13 @@ io_encoding_set(rb_io_t *fptr, int argc, VALUE v1, VALUE v2)
|
|||
if (argc == 2) {
|
||||
fptr->enc2 = rb_to_encoding(v1);
|
||||
fptr->enc = rb_to_encoding(v2);
|
||||
clear_readconv(fptr);
|
||||
clear_codeconv(fptr);
|
||||
}
|
||||
else if (argc == 1) {
|
||||
if (NIL_P(v1)) {
|
||||
fptr->enc = 0;
|
||||
fptr->enc2 = 0;
|
||||
clear_readconv(fptr);
|
||||
clear_codeconv(fptr);
|
||||
}
|
||||
else {
|
||||
VALUE tmp = rb_check_string_type(v1);
|
||||
|
@ -6356,7 +6474,7 @@ io_encoding_set(rb_io_t *fptr, int argc, VALUE v1, VALUE v2)
|
|||
else {
|
||||
fptr->enc = rb_to_encoding(v1);
|
||||
fptr->enc2 = 0;
|
||||
clear_readconv(fptr);
|
||||
clear_codeconv(fptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -601,5 +601,50 @@ EOT
|
|||
}
|
||||
end
|
||||
|
||||
def test_write_conversion_fixenc
|
||||
with_pipe {|r, w|
|
||||
w.set_encoding("iso-2022-jp:utf-8")
|
||||
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
||||
w << "\u3042"
|
||||
w << "\u3044"
|
||||
w.close
|
||||
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
|
||||
}
|
||||
end
|
||||
|
||||
def test_write_conversion_anyenc_stateful
|
||||
with_pipe {|r, w|
|
||||
w.set_encoding("iso-2022-jp")
|
||||
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
||||
w << "\u3042"
|
||||
w << "\x82\xa2".force_encoding("sjis")
|
||||
w.close
|
||||
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
|
||||
}
|
||||
end
|
||||
|
||||
def test_write_conversion_anyenc_stateless
|
||||
with_pipe {|r, w|
|
||||
w.set_encoding("euc-jp")
|
||||
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
||||
w << "\u3042"
|
||||
w << "\x82\xa2".force_encoding("sjis")
|
||||
w.close
|
||||
assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), t.value)
|
||||
}
|
||||
end
|
||||
|
||||
def test_write_conversion_anyenc_stateful_nosync
|
||||
with_pipe {|r, w|
|
||||
w.sync = false
|
||||
w.set_encoding("iso-2022-jp")
|
||||
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
||||
w << "\u3042"
|
||||
w << "\x82\xa2".force_encoding("sjis")
|
||||
w.close
|
||||
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
|
||||
}
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
|
72
transcode.c
72
transcode.c
|
@ -1219,6 +1219,78 @@ rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
|
|||
tc->readagain_len -= n;
|
||||
}
|
||||
|
||||
struct stateless_encoding_t {
|
||||
const char *stateless_enc;
|
||||
const char *stateful_enc;
|
||||
};
|
||||
|
||||
static int
|
||||
stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
|
||||
{
|
||||
struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg;
|
||||
st_table *table2 = (st_table *)val;
|
||||
st_data_t v;
|
||||
|
||||
if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) {
|
||||
transcoder_entry_t *entry = (transcoder_entry_t *)v;
|
||||
const rb_transcoder *tr = load_transcoder_entry(entry);
|
||||
if (tr && tr->stateful_type == stateful_encoder) {
|
||||
data->stateless_enc = tr->from_encoding;
|
||||
return ST_STOP;
|
||||
}
|
||||
}
|
||||
return ST_CONTINUE;
|
||||
}
|
||||
|
||||
const char *
|
||||
rb_econv_stateless_encoding(const char *stateful_enc)
|
||||
{
|
||||
struct stateless_encoding_t data;
|
||||
data.stateful_enc = stateful_enc;
|
||||
data.stateless_enc = NULL;
|
||||
st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data);
|
||||
if (data.stateless_enc)
|
||||
return data.stateless_enc;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_econv_string(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
|
||||
{
|
||||
unsigned const char *ss, *sp, *se;
|
||||
unsigned char *ds, *dp, *de;
|
||||
rb_econv_result_t res;
|
||||
|
||||
if (NIL_P(dst)) {
|
||||
dst = rb_str_buf_new(len);
|
||||
}
|
||||
|
||||
res = econv_destination_buffer_full;
|
||||
while (res == econv_destination_buffer_full) {
|
||||
long dlen = RSTRING_LEN(dst);
|
||||
int max_output = ec->last_tc->transcoder->max_output;
|
||||
if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
|
||||
unsigned long new_capa = (unsigned long)dlen + len + max_output;
|
||||
if (LONG_MAX < new_capa)
|
||||
rb_raise(rb_eArgError, "too long string");
|
||||
rb_str_resize(dst, new_capa);
|
||||
rb_str_set_len(dst, dlen);
|
||||
}
|
||||
ss = sp = (const unsigned char *)RSTRING_PTR(src) + off;
|
||||
se = ss + len;
|
||||
ds = dp = (unsigned char *)RSTRING_PTR(dst) + dlen;
|
||||
de = ds + rb_str_capacity(dst);
|
||||
res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
|
||||
off += sp - ss;
|
||||
len -= sp - ss;
|
||||
rb_str_set_len(dst, dlen + (dp - ds));
|
||||
rb_econv_check_error(ec);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
static VALUE
|
||||
make_econv_exception(rb_econv_t *ec)
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче