From f6441bf61cd4244aca5f465d262baf31b8872ac2 Mon Sep 17 00:00:00 2001 From: akr Date: Wed, 3 Sep 2008 14:12:06 +0000 Subject: [PATCH] * transcode_data.h (rb_transcoding): remove stateful field. add state field. (TRANSCODING_STATE): defined. (rb_transcoder): add fields: state_size, state_init_func, state_fini_func. change rb_transcoding* argument to void*. * transcode.c (transcode_restartable0): use TRANSCODING_STATE for first arguments of transcoder functions. (rb_transcoding_open_by_transcoder): initialize state field. (rb_transcoding_close): finalize state field. * tool/transcode-tblgen.rb: provide state size/init/fini. * enc/trans/newline.trans (universal_newline_init): defined. (fun_so_universal_newline): take void* as a state pointer. (rb_universal_newline): provide state size/init/fini. (rb_crlf_newline): ditto. (rb_cr_newline): ditto. * enc/trans/iso2022.trans (iso2022jp_init): defined. (fun_si_iso2022jp_to_eucjp): take void* as a state pointer. (fun_so_iso2022jp_to_eucjp): ditto. (fun_so_eucjp_to_iso2022jp): ditto. (iso2022jp_reset_sequence_size): ditto. (finish_eucjp_to_iso2022jp): ditto. (rb_ISO_2022_JP_to_EUC_JP): provide state size/init/fini. (rb_EUC_JP_to_ISO_2022_JP): ditto. * enc/trans/utf_16_32.trans (fun_so_from_utf_16be): take void* as a state pointer. (fun_so_to_utf_16be): ditto. (fun_so_from_utf_16le): ditto. (fun_so_to_utf_16le): ditto. (fun_so_from_utf_32be): ditto. (fun_so_to_utf_32be): ditto. (fun_so_from_utf_32le): ditto. (fun_so_to_utf_32le): ditto. (rb_from_UTF_16BE): provide state size/init/fini. (rb_to_UTF_16BE): ditto. (rb_from_UTF_16LE): ditto. (rb_to_UTF_16LE): ditto. (rb_from_UTF_32BE): ditto. (rb_to_UTF_32BE): ditto. (rb_from_UTF_32LE): ditto. (rb_to_UTF_32LE): ditto. * enc/trans/japanese.trans (fun_so_eucjp2sjis): take void* as a state pointer. (fun_so_sjis2eucjp): ditto. (rb_eucjp2sjis): provide state size/init/fini. (rb_sjis2eucjp): provide state size/init/fini. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19096 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 55 +++++++++++++++++++++++++++++++++++ enc/trans/iso2022.trans | 61 +++++++++++++++++++++++---------------- enc/trans/japanese.trans | 8 +++-- enc/trans/newline.trans | 31 +++++++++++++------- enc/trans/utf_16_32.trans | 24 ++++++++++----- tool/transcode-tblgen.rb | 1 + transcode.c | 27 +++++++++++------ transcode_data.h | 24 ++++++++++----- 8 files changed, 168 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1ca202c77c..1f31bed33e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,58 @@ +Wed Sep 3 23:03:37 2008 Tanaka Akira + + * transcode_data.h (rb_transcoding): remove stateful field. + add state field. + (TRANSCODING_STATE): defined. + (rb_transcoder): add fields: state_size, state_init_func, + state_fini_func. + change rb_transcoding* argument to void*. + + * transcode.c (transcode_restartable0): use TRANSCODING_STATE for + first arguments of transcoder functions. + (rb_transcoding_open_by_transcoder): initialize state field. + (rb_transcoding_close): finalize state field. + + * tool/transcode-tblgen.rb: provide state size/init/fini. + + * enc/trans/newline.trans (universal_newline_init): defined. + (fun_so_universal_newline): take void* as a state pointer. + (rb_universal_newline): provide state size/init/fini. + (rb_crlf_newline): ditto. + (rb_cr_newline): ditto. + + * enc/trans/iso2022.trans (iso2022jp_init): defined. + (fun_si_iso2022jp_to_eucjp): take void* as a state pointer. + (fun_so_iso2022jp_to_eucjp): ditto. + (fun_so_eucjp_to_iso2022jp): ditto. + (iso2022jp_reset_sequence_size): ditto. + (finish_eucjp_to_iso2022jp): ditto. + (rb_ISO_2022_JP_to_EUC_JP): provide state size/init/fini. + (rb_EUC_JP_to_ISO_2022_JP): ditto. + + * enc/trans/utf_16_32.trans (fun_so_from_utf_16be): take void* as a + state pointer. + (fun_so_to_utf_16be): ditto. + (fun_so_from_utf_16le): ditto. + (fun_so_to_utf_16le): ditto. + (fun_so_from_utf_32be): ditto. + (fun_so_to_utf_32be): ditto. + (fun_so_from_utf_32le): ditto. + (fun_so_to_utf_32le): ditto. + (rb_from_UTF_16BE): provide state size/init/fini. + (rb_to_UTF_16BE): ditto. + (rb_from_UTF_16LE): ditto. + (rb_to_UTF_16LE): ditto. + (rb_from_UTF_32BE): ditto. + (rb_to_UTF_32BE): ditto. + (rb_from_UTF_32LE): ditto. + (rb_to_UTF_32LE): ditto. + + * enc/trans/japanese.trans (fun_so_eucjp2sjis): take void* as a state + pointer. + (fun_so_sjis2eucjp): ditto. + (rb_eucjp2sjis): provide state size/init/fini. + (rb_sjis2eucjp): provide state size/init/fini. + Wed Sep 3 22:31:11 2008 Yuki Sonoda (Yugui) * lib/mathn.rb (Integer): moved into prime.rb. diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans index 49da2c3f6c..067611ebd0 100644 --- a/enc/trans/iso2022.trans +++ b/enc/trans/iso2022.trans @@ -27,10 +27,22 @@ <%= transcode_generated_code %> -static VALUE -fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) +#define G0_ASCII 0 +#define G0_JISX0208 1 + +static int +iso2022jp_init(void *statep) { - if (t->stateful[0] == 0) + unsigned char *sp = statep; + *sp = G0_ASCII; + return 0; +} + +static VALUE +fun_si_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l) +{ + unsigned char *sp = statep; + if (*sp == G0_ASCII) return (VALUE)NOMAP; else if (0x21 <= s[0] && s[0] <= 0x7e) return (VALUE)iso2022jp_to_eucjp_jisx0208_rest; @@ -39,14 +51,15 @@ fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) } static int -fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l, unsigned char* o) { + unsigned char *sp = statep; if (s[0] == 0x1b) { if (s[1] == '(') { switch (s[l-1]) { case 'B': case 'J': - t->stateful[0] = 0; + *sp = G0_ASCII; break; } } @@ -54,7 +67,7 @@ fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, u switch (s[l-1]) { case '@': case 'B': - t->stateful[0] = 1; + *sp = G0_JISX0208; break; } } @@ -75,31 +88,28 @@ rb_ISO_2022_JP_to_EUC_JP = { 3, /* max_input */ 3, /* max_output */ stateful_decoder, /* stateful_type */ + 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp }; static int -fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o) +fun_so_eucjp_to_iso2022jp(void *statep, const unsigned char *s, size_t l, unsigned char *o) { + unsigned char *sp = statep; unsigned char *output0 = o; - if (t->stateful[0] == 0) { - t->stateful[0] = 1; /* initialized flag */ - t->stateful[1] = 1; /* G0 = ASCII */ - } - - if (l != t->stateful[1]) { + if (*sp != (l == 1 ? G0_ASCII : G0_JISX0208)) { if (l == 1) { *o++ = 0x1b; *o++ = '('; *o++ = 'B'; - t->stateful[1] = 1; /* G0 = ASCII */ + *sp = G0_ASCII; } else { *o++ = 0x1b; *o++ = '$'; *o++ = 'B'; - t->stateful[1] = 2; /* G0 = JIS X 0208 1983 */ + *sp = G0_JISX0208; /* JIS X 0208 1983 */ } } @@ -115,27 +125,27 @@ fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, u } static int -iso2022jp_reset_sequence_size(rb_transcoding *t) +iso2022jp_reset_sequence_size(void *statep) { - if (t->stateful[1] == 2) + unsigned char *sp = statep; + if (*sp == G0_JISX0208) return 3; return 0; } static int -finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o) +finish_eucjp_to_iso2022jp(void *statep, unsigned char *o) { + unsigned char *sp = statep; unsigned char *output0 = o; - if (t->stateful[0] == 0) + if (*sp == G0_ASCII) return 0; - if (t->stateful[1] != 1) { - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - t->stateful[1] = 1; - } + *o++ = 0x1b; + *o++ = '('; + *o++ = 'B'; + *sp = G0_ASCII; return o - output0; } @@ -148,6 +158,7 @@ rb_EUC_JP_to_ISO_2022_JP = { 3, /* max_input */ 5, /* max_output */ stateful_encoder, /* stateful_type */ + 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp, iso2022jp_reset_sequence_size, finish_eucjp_to_iso2022jp diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans index 279957b972..64f38fbfc6 100644 --- a/enc/trans/japanese.trans +++ b/enc/trans/japanese.trans @@ -20,7 +20,7 @@ <%= transcode_generated_code %> static int -fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o) +fun_so_eucjp2sjis(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (s[0] == 0x8e) { o[0] = s[1]; @@ -41,7 +41,7 @@ fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned } static int -fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o) +fun_so_sjis2eucjp(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (l == 1) { o[0] = '\x8e'; @@ -49,7 +49,7 @@ fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned return 2; } else { - int h, m, l; + int h, l; h = s[0]; l = s[1]; if (0xe0 <= h) @@ -74,6 +74,7 @@ rb_eucjp2sjis = { 3, /* max_input */ 2, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp2sjis }; @@ -85,6 +86,7 @@ rb_sjis2eucjp = { 2, /* max_input */ 2, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_sjis2eucjp }; diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index da8202eea4..7b10d368b1 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -21,33 +21,41 @@ <%= transcode_generated_code %> +#define NORMAL 0 +#define JUST_AFTER_CR 1 + static int -fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +universal_newline_init(void *statep) { + unsigned char *sp = statep; + *sp = NORMAL; + return 0; +} + +static int +fun_so_universal_newline(void *statep, const unsigned char* s, size_t l, unsigned char* o) +{ + unsigned char *sp = statep; int len; - /* - t->stateful[0] == 0 : normal - t->stateful[0] == 1 : just after '\r' - */ if (s[0] == '\n') { - if (t->stateful[0] == 0) { + if (*sp == NORMAL) { o[0] = '\n'; len = 1; } - else { + else { /* JUST_AFTER_CR */ len = 0; } - t->stateful[0] = 0; + *sp = NORMAL; } else if (s[0] == '\r') { o[0] = '\n'; len = 1; - t->stateful[0] = 1; + *sp = JUST_AFTER_CR; } else { o[0] = s[0]; len = 1; - t->stateful[0] = 0; + *sp = NORMAL; } return len; } @@ -60,6 +68,7 @@ rb_universal_newline = { 1, /* max_input */ 1, /* max_output */ stateful_decoder, /* stateful_type */ + 1, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_universal_newline }; @@ -71,6 +80,7 @@ rb_crlf_newline = { 1, /* max_input */ 2, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; @@ -82,6 +92,7 @@ rb_cr_newline = { 1, /* max_input */ 1, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans index 57cd77dd77..9ffff341a9 100644 --- a/enc/trans/utf_16_32.trans +++ b/enc/trans/utf_16_32.trans @@ -38,7 +38,7 @@ <%= transcode_generated_code %> static int -fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[0] && s[1]<0x80) { o[0] = s[1]; @@ -66,7 +66,7 @@ fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!(s[0]&0x80)) { o[0] = 0x00; @@ -94,7 +94,7 @@ fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned } static int -fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[1] && s[0]<0x80) { o[0] = s[0]; @@ -122,7 +122,7 @@ fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!(s[0]&0x80)) { o[1] = 0x00; @@ -150,7 +150,7 @@ fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned } static int -fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[1]) { if (s[2]==0 && s[3]<0x80) { @@ -179,7 +179,7 @@ fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { o[0] = 0; if (!(s[0]&0x80)) { @@ -205,7 +205,7 @@ fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned } static int -fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[2]) { if (s[1]==0 && s[0]<0x80) { @@ -234,7 +234,7 @@ fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { o[3] = 0; if (!(s[0]&0x80)) { @@ -267,6 +267,7 @@ rb_from_UTF_16BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16be }; @@ -278,6 +279,7 @@ rb_to_UTF_16BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16be }; @@ -289,6 +291,7 @@ rb_from_UTF_16LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16le }; @@ -300,6 +303,7 @@ rb_to_UTF_16LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16le }; @@ -311,6 +315,7 @@ rb_from_UTF_32BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32be }; @@ -322,6 +327,7 @@ rb_to_UTF_32BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32be }; @@ -333,6 +339,7 @@ rb_from_UTF_32LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32le }; @@ -344,6 +351,7 @@ rb_to_UTF_32LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32le }; diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index 75b88b0083..207cb5118d 100644 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -590,6 +590,7 @@ static const rb_transcoder #{max_input}, /* max_input */ #{max_output}, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL, NULL, NULL, NULL }; diff --git a/transcode.c b/transcode.c index aa89f12a8e..ca772ce844 100644 --- a/transcode.c +++ b/transcode.c @@ -544,22 +544,22 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, SUSPEND_OBUF(19); *out_p++ = getBT3(next_info); continue; case FUNii: - next_info = (VALUE)(*tr->func_ii)(tc, next_info); + next_info = (VALUE)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info); goto follow_info; case FUNsi: { const unsigned char *char_start; size_t char_len; char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); - next_info = (VALUE)(*tr->func_si)(tc, char_start, (size_t)char_len); + next_info = (VALUE)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len); goto follow_info; } case FUNio: SUSPEND_OBUF(13); if (tr->max_output <= out_stop - out_p) - out_p += (VALUE)(*tr->func_io)(tc, next_info, out_p); + out_p += (VALUE)(*tr->func_io)(TRANSCODING_STATE(tc), next_info, out_p); else { - writebuf_len = (VALUE)(*tr->func_io)(tc, next_info, TRANSCODING_WRITEBUF(tc)); + writebuf_len = (VALUE)(*tr->func_io)(TRANSCODING_STATE(tc), next_info, TRANSCODING_WRITEBUF(tc)); writebuf_off = 0; while (writebuf_off < writebuf_len) { SUSPEND_OBUF(20); @@ -574,11 +574,11 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, SUSPEND_OBUF(14); if (tr->max_output <= out_stop - out_p) { char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); - out_p += (VALUE)(*tr->func_so)(tc, char_start, (size_t)char_len, out_p); + out_p += (VALUE)(*tr->func_so)(TRANSCODING_STATE(tc), char_start, (size_t)char_len, out_p); } else { char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); - writebuf_len = (VALUE)(*tr->func_so)(tc, char_start, (size_t)char_len, TRANSCODING_WRITEBUF(tc)); + writebuf_len = (VALUE)(*tr->func_so)(TRANSCODING_STATE(tc), char_start, (size_t)char_len, TRANSCODING_WRITEBUF(tc)); writebuf_off = 0; while (writebuf_off < writebuf_len) { SUSPEND_OBUF(22); @@ -632,10 +632,10 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, if (tr->finish_func) { SUSPEND_OBUF(4); if (tr->max_output <= out_stop - out_p) { - out_p += tr->finish_func(tc, out_p); + out_p += tr->finish_func(TRANSCODING_STATE(tc), out_p); } else { - writebuf_len = tr->finish_func(tc, TRANSCODING_WRITEBUF(tc)); + writebuf_len = tr->finish_func(TRANSCODING_STATE(tc), TRANSCODING_WRITEBUF(tc)); writebuf_off = 0; while (writebuf_off < writebuf_len) { SUSPEND_OBUF(23); @@ -687,7 +687,11 @@ rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags) tc = ALLOC(rb_transcoding); tc->transcoder = tr; tc->flags = flags; - memset(tc->stateful, 0, sizeof(tc->stateful)); + if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) + tc->state = xmalloc(tr->state_size); + if (tr->state_init_func) { + (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */ + } tc->resume_position = 0; tc->recognized_len = 0; tc->readagain_len = 0; @@ -718,6 +722,11 @@ static void rb_transcoding_close(rb_transcoding *tc) { const rb_transcoder *tr = tc->transcoder; + if (tr->state_fini_func) { + (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */ + } + if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) + xfree(tc->state); if (sizeof(tc->readbuf.ary) < tr->max_input) xfree(tc->readbuf.ptr); if (sizeof(tc->writebuf.ary) < tr->max_output) diff --git a/transcode_data.h b/transcode_data.h index 6081aec81d..a7f1db0c97 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -90,7 +90,7 @@ typedef struct rb_transcoding { unsigned char *ptr; /* length: max_output */ } writebuf; - unsigned char stateful[256]; /* opaque data for stateful encoding */ + void *state; /* opaque data for stateful encoding */ } rb_transcoding; #define TRANSCODING_READBUF(tc) \ ((tc)->transcoder->max_input <= sizeof((tc)->readbuf.ary) ? \ @@ -100,6 +100,11 @@ typedef struct rb_transcoding { ((tc)->transcoder->max_output <= sizeof((tc)->writebuf.ary) ? \ (tc)->writebuf.ary : \ (tc)->writebuf.ptr) +#define TRANSCODING_STATE_EMBED_MAX sizeof(void *) +#define TRANSCODING_STATE(tc) \ + ((tc)->transcoder->state_size <= sizeof((tc)->state) ? \ + (void *)&(tc)->state : \ + (tc)->state) /* static structure, one per supported encoding pair */ struct rb_transcoder { @@ -115,13 +120,16 @@ struct rb_transcoder { int max_input; int max_output; rb_transcoder_stateful_type_t stateful_type; - VALUE (*func_ii)(rb_transcoding*, VALUE); /* info -> info */ - VALUE (*func_si)(rb_transcoding*, const unsigned char*, size_t); /* start -> info */ - int (*func_io)(rb_transcoding*, VALUE, const unsigned char*); /* info -> output */ - int (*func_so)(rb_transcoding*, const unsigned char*, size_t, unsigned char*); /* start -> output */ - int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */ - int (*resetsize_func)(rb_transcoding*); /* -> len */ - int (*resetstate_func)(rb_transcoding*, unsigned char*); /* -> output */ + size_t state_size; + int (*state_init_func)(void*); /* 0:success !=0:failure(errno) */ + int (*state_fini_func)(void*); /* 0:success !=0:failure(errno) */ + VALUE (*func_ii)(void*, VALUE); /* info -> info */ + VALUE (*func_si)(void*, const unsigned char*, size_t); /* start -> info */ + int (*func_io)(void*, VALUE, const unsigned char*); /* info -> output */ + int (*func_so)(void*, const unsigned char*, size_t, unsigned char*); /* start -> output */ + int (*finish_func)(void*, unsigned char*); /* -> output */ + int (*resetsize_func)(void*); /* -> len */ + int (*resetstate_func)(void*, unsigned char*); /* -> output */ }; void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);