зеркало из https://github.com/github/ruby.git
* transcode_data.h (rb_transcoding): remove stateful field.
add state field. (TRANSCODING_STATE): defined. (rb_transcoder): add fields: state_size, state_init_func, state_fini_func. change rb_transcoding* argument to void*. * transcode.c (transcode_restartable0): use TRANSCODING_STATE for first arguments of transcoder functions. (rb_transcoding_open_by_transcoder): initialize state field. (rb_transcoding_close): finalize state field. * tool/transcode-tblgen.rb: provide state size/init/fini. * enc/trans/newline.trans (universal_newline_init): defined. (fun_so_universal_newline): take void* as a state pointer. (rb_universal_newline): provide state size/init/fini. (rb_crlf_newline): ditto. (rb_cr_newline): ditto. * enc/trans/iso2022.trans (iso2022jp_init): defined. (fun_si_iso2022jp_to_eucjp): take void* as a state pointer. (fun_so_iso2022jp_to_eucjp): ditto. (fun_so_eucjp_to_iso2022jp): ditto. (iso2022jp_reset_sequence_size): ditto. (finish_eucjp_to_iso2022jp): ditto. (rb_ISO_2022_JP_to_EUC_JP): provide state size/init/fini. (rb_EUC_JP_to_ISO_2022_JP): ditto. * enc/trans/utf_16_32.trans (fun_so_from_utf_16be): take void* as a state pointer. (fun_so_to_utf_16be): ditto. (fun_so_from_utf_16le): ditto. (fun_so_to_utf_16le): ditto. (fun_so_from_utf_32be): ditto. (fun_so_to_utf_32be): ditto. (fun_so_from_utf_32le): ditto. (fun_so_to_utf_32le): ditto. (rb_from_UTF_16BE): provide state size/init/fini. (rb_to_UTF_16BE): ditto. (rb_from_UTF_16LE): ditto. (rb_to_UTF_16LE): ditto. (rb_from_UTF_32BE): ditto. (rb_to_UTF_32BE): ditto. (rb_from_UTF_32LE): ditto. (rb_to_UTF_32LE): ditto. * enc/trans/japanese.trans (fun_so_eucjp2sjis): take void* as a state pointer. (fun_so_sjis2eucjp): ditto. (rb_eucjp2sjis): provide state size/init/fini. (rb_sjis2eucjp): provide state size/init/fini. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19096 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
fce093432e
Коммит
f6441bf61c
55
ChangeLog
55
ChangeLog
|
@ -1,3 +1,58 @@
|
||||||
|
Wed Sep 3 23:03:37 2008 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* transcode_data.h (rb_transcoding): remove stateful field.
|
||||||
|
add state field.
|
||||||
|
(TRANSCODING_STATE): defined.
|
||||||
|
(rb_transcoder): add fields: state_size, state_init_func,
|
||||||
|
state_fini_func.
|
||||||
|
change rb_transcoding* argument to void*.
|
||||||
|
|
||||||
|
* transcode.c (transcode_restartable0): use TRANSCODING_STATE for
|
||||||
|
first arguments of transcoder functions.
|
||||||
|
(rb_transcoding_open_by_transcoder): initialize state field.
|
||||||
|
(rb_transcoding_close): finalize state field.
|
||||||
|
|
||||||
|
* tool/transcode-tblgen.rb: provide state size/init/fini.
|
||||||
|
|
||||||
|
* enc/trans/newline.trans (universal_newline_init): defined.
|
||||||
|
(fun_so_universal_newline): take void* as a state pointer.
|
||||||
|
(rb_universal_newline): provide state size/init/fini.
|
||||||
|
(rb_crlf_newline): ditto.
|
||||||
|
(rb_cr_newline): ditto.
|
||||||
|
|
||||||
|
* enc/trans/iso2022.trans (iso2022jp_init): defined.
|
||||||
|
(fun_si_iso2022jp_to_eucjp): take void* as a state pointer.
|
||||||
|
(fun_so_iso2022jp_to_eucjp): ditto.
|
||||||
|
(fun_so_eucjp_to_iso2022jp): ditto.
|
||||||
|
(iso2022jp_reset_sequence_size): ditto.
|
||||||
|
(finish_eucjp_to_iso2022jp): ditto.
|
||||||
|
(rb_ISO_2022_JP_to_EUC_JP): provide state size/init/fini.
|
||||||
|
(rb_EUC_JP_to_ISO_2022_JP): ditto.
|
||||||
|
|
||||||
|
* enc/trans/utf_16_32.trans (fun_so_from_utf_16be): take void* as a
|
||||||
|
state pointer.
|
||||||
|
(fun_so_to_utf_16be): ditto.
|
||||||
|
(fun_so_from_utf_16le): ditto.
|
||||||
|
(fun_so_to_utf_16le): ditto.
|
||||||
|
(fun_so_from_utf_32be): ditto.
|
||||||
|
(fun_so_to_utf_32be): ditto.
|
||||||
|
(fun_so_from_utf_32le): ditto.
|
||||||
|
(fun_so_to_utf_32le): ditto.
|
||||||
|
(rb_from_UTF_16BE): provide state size/init/fini.
|
||||||
|
(rb_to_UTF_16BE): ditto.
|
||||||
|
(rb_from_UTF_16LE): ditto.
|
||||||
|
(rb_to_UTF_16LE): ditto.
|
||||||
|
(rb_from_UTF_32BE): ditto.
|
||||||
|
(rb_to_UTF_32BE): ditto.
|
||||||
|
(rb_from_UTF_32LE): ditto.
|
||||||
|
(rb_to_UTF_32LE): ditto.
|
||||||
|
|
||||||
|
* enc/trans/japanese.trans (fun_so_eucjp2sjis): take void* as a state
|
||||||
|
pointer.
|
||||||
|
(fun_so_sjis2eucjp): ditto.
|
||||||
|
(rb_eucjp2sjis): provide state size/init/fini.
|
||||||
|
(rb_sjis2eucjp): provide state size/init/fini.
|
||||||
|
|
||||||
Wed Sep 3 22:31:11 2008 Yuki Sonoda (Yugui) <yugui@yugui.jp>
|
Wed Sep 3 22:31:11 2008 Yuki Sonoda (Yugui) <yugui@yugui.jp>
|
||||||
|
|
||||||
* lib/mathn.rb (Integer): moved into prime.rb.
|
* lib/mathn.rb (Integer): moved into prime.rb.
|
||||||
|
|
|
@ -27,10 +27,22 @@
|
||||||
|
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
||||||
static VALUE
|
#define G0_ASCII 0
|
||||||
fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l)
|
#define G0_JISX0208 1
|
||||||
|
|
||||||
|
static int
|
||||||
|
iso2022jp_init(void *statep)
|
||||||
{
|
{
|
||||||
if (t->stateful[0] == 0)
|
unsigned char *sp = statep;
|
||||||
|
*sp = G0_ASCII;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
fun_si_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l)
|
||||||
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
|
if (*sp == G0_ASCII)
|
||||||
return (VALUE)NOMAP;
|
return (VALUE)NOMAP;
|
||||||
else if (0x21 <= s[0] && s[0] <= 0x7e)
|
else if (0x21 <= s[0] && s[0] <= 0x7e)
|
||||||
return (VALUE)iso2022jp_to_eucjp_jisx0208_rest;
|
return (VALUE)iso2022jp_to_eucjp_jisx0208_rest;
|
||||||
|
@ -39,14 +51,15 @@ fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l, unsigned char* o)
|
||||||
{
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
if (s[0] == 0x1b) {
|
if (s[0] == 0x1b) {
|
||||||
if (s[1] == '(') {
|
if (s[1] == '(') {
|
||||||
switch (s[l-1]) {
|
switch (s[l-1]) {
|
||||||
case 'B':
|
case 'B':
|
||||||
case 'J':
|
case 'J':
|
||||||
t->stateful[0] = 0;
|
*sp = G0_ASCII;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -54,7 +67,7 @@ fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, u
|
||||||
switch (s[l-1]) {
|
switch (s[l-1]) {
|
||||||
case '@':
|
case '@':
|
||||||
case 'B':
|
case 'B':
|
||||||
t->stateful[0] = 1;
|
*sp = G0_JISX0208;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -75,31 +88,28 @@ rb_ISO_2022_JP_to_EUC_JP = {
|
||||||
3, /* max_input */
|
3, /* max_input */
|
||||||
3, /* max_output */
|
3, /* max_output */
|
||||||
stateful_decoder, /* stateful_type */
|
stateful_decoder, /* stateful_type */
|
||||||
|
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
|
||||||
NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp
|
NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o)
|
fun_so_eucjp_to_iso2022jp(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
unsigned char *output0 = o;
|
unsigned char *output0 = o;
|
||||||
|
|
||||||
if (t->stateful[0] == 0) {
|
if (*sp != (l == 1 ? G0_ASCII : G0_JISX0208)) {
|
||||||
t->stateful[0] = 1; /* initialized flag */
|
|
||||||
t->stateful[1] = 1; /* G0 = ASCII */
|
|
||||||
}
|
|
||||||
|
|
||||||
if (l != t->stateful[1]) {
|
|
||||||
if (l == 1) {
|
if (l == 1) {
|
||||||
*o++ = 0x1b;
|
*o++ = 0x1b;
|
||||||
*o++ = '(';
|
*o++ = '(';
|
||||||
*o++ = 'B';
|
*o++ = 'B';
|
||||||
t->stateful[1] = 1; /* G0 = ASCII */
|
*sp = G0_ASCII;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
*o++ = 0x1b;
|
*o++ = 0x1b;
|
||||||
*o++ = '$';
|
*o++ = '$';
|
||||||
*o++ = 'B';
|
*o++ = 'B';
|
||||||
t->stateful[1] = 2; /* G0 = JIS X 0208 1983 */
|
*sp = G0_JISX0208; /* JIS X 0208 1983 */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,27 +125,27 @@ fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, u
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
iso2022jp_reset_sequence_size(rb_transcoding *t)
|
iso2022jp_reset_sequence_size(void *statep)
|
||||||
{
|
{
|
||||||
if (t->stateful[1] == 2)
|
unsigned char *sp = statep;
|
||||||
|
if (*sp == G0_JISX0208)
|
||||||
return 3;
|
return 3;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o)
|
finish_eucjp_to_iso2022jp(void *statep, unsigned char *o)
|
||||||
{
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
unsigned char *output0 = o;
|
unsigned char *output0 = o;
|
||||||
|
|
||||||
if (t->stateful[0] == 0)
|
if (*sp == G0_ASCII)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (t->stateful[1] != 1) {
|
*o++ = 0x1b;
|
||||||
*o++ = 0x1b;
|
*o++ = '(';
|
||||||
*o++ = '(';
|
*o++ = 'B';
|
||||||
*o++ = 'B';
|
*sp = G0_ASCII;
|
||||||
t->stateful[1] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return o - output0;
|
return o - output0;
|
||||||
}
|
}
|
||||||
|
@ -148,6 +158,7 @@ rb_EUC_JP_to_ISO_2022_JP = {
|
||||||
3, /* max_input */
|
3, /* max_input */
|
||||||
5, /* max_output */
|
5, /* max_output */
|
||||||
stateful_encoder, /* stateful_type */
|
stateful_encoder, /* stateful_type */
|
||||||
|
1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp,
|
NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp,
|
||||||
finish_eucjp_to_iso2022jp,
|
finish_eucjp_to_iso2022jp,
|
||||||
iso2022jp_reset_sequence_size, finish_eucjp_to_iso2022jp
|
iso2022jp_reset_sequence_size, finish_eucjp_to_iso2022jp
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o)
|
fun_so_eucjp2sjis(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (s[0] == 0x8e) {
|
if (s[0] == 0x8e) {
|
||||||
o[0] = s[1];
|
o[0] = s[1];
|
||||||
|
@ -41,7 +41,7 @@ fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o)
|
fun_so_sjis2eucjp(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (l == 1) {
|
if (l == 1) {
|
||||||
o[0] = '\x8e';
|
o[0] = '\x8e';
|
||||||
|
@ -49,7 +49,7 @@ fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int h, m, l;
|
int h, l;
|
||||||
h = s[0];
|
h = s[0];
|
||||||
l = s[1];
|
l = s[1];
|
||||||
if (0xe0 <= h)
|
if (0xe0 <= h)
|
||||||
|
@ -74,6 +74,7 @@ rb_eucjp2sjis = {
|
||||||
3, /* max_input */
|
3, /* max_input */
|
||||||
2, /* max_output */
|
2, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_eucjp2sjis
|
NULL, NULL, NULL, fun_so_eucjp2sjis
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -85,6 +86,7 @@ rb_sjis2eucjp = {
|
||||||
2, /* max_input */
|
2, /* max_input */
|
||||||
2, /* max_output */
|
2, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_sjis2eucjp
|
NULL, NULL, NULL, fun_so_sjis2eucjp
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -21,33 +21,41 @@
|
||||||
|
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
||||||
|
#define NORMAL 0
|
||||||
|
#define JUST_AFTER_CR 1
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
universal_newline_init(void *statep)
|
||||||
{
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
|
*sp = NORMAL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
fun_so_universal_newline(void *statep, const unsigned char* s, size_t l, unsigned char* o)
|
||||||
|
{
|
||||||
|
unsigned char *sp = statep;
|
||||||
int len;
|
int len;
|
||||||
/*
|
|
||||||
t->stateful[0] == 0 : normal
|
|
||||||
t->stateful[0] == 1 : just after '\r'
|
|
||||||
*/
|
|
||||||
if (s[0] == '\n') {
|
if (s[0] == '\n') {
|
||||||
if (t->stateful[0] == 0) {
|
if (*sp == NORMAL) {
|
||||||
o[0] = '\n';
|
o[0] = '\n';
|
||||||
len = 1;
|
len = 1;
|
||||||
}
|
}
|
||||||
else {
|
else { /* JUST_AFTER_CR */
|
||||||
len = 0;
|
len = 0;
|
||||||
}
|
}
|
||||||
t->stateful[0] = 0;
|
*sp = NORMAL;
|
||||||
}
|
}
|
||||||
else if (s[0] == '\r') {
|
else if (s[0] == '\r') {
|
||||||
o[0] = '\n';
|
o[0] = '\n';
|
||||||
len = 1;
|
len = 1;
|
||||||
t->stateful[0] = 1;
|
*sp = JUST_AFTER_CR;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
o[0] = s[0];
|
o[0] = s[0];
|
||||||
len = 1;
|
len = 1;
|
||||||
t->stateful[0] = 0;
|
*sp = NORMAL;
|
||||||
}
|
}
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
@ -60,6 +68,7 @@ rb_universal_newline = {
|
||||||
1, /* max_input */
|
1, /* max_input */
|
||||||
1, /* max_output */
|
1, /* max_output */
|
||||||
stateful_decoder, /* stateful_type */
|
stateful_decoder, /* stateful_type */
|
||||||
|
1, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_universal_newline
|
NULL, NULL, NULL, fun_so_universal_newline
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -71,6 +80,7 @@ rb_crlf_newline = {
|
||||||
1, /* max_input */
|
1, /* max_input */
|
||||||
2, /* max_output */
|
2, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, NULL
|
NULL, NULL, NULL, NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -82,6 +92,7 @@ rb_cr_newline = {
|
||||||
1, /* max_input */
|
1, /* max_input */
|
||||||
1, /* max_output */
|
1, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, NULL
|
NULL, NULL, NULL, NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
<%= transcode_generated_code %>
|
<%= transcode_generated_code %>
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (!s[0] && s[1]<0x80) {
|
if (!s[0] && s[1]<0x80) {
|
||||||
o[0] = s[1];
|
o[0] = s[1];
|
||||||
|
@ -66,7 +66,7 @@ fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsign
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (!(s[0]&0x80)) {
|
if (!(s[0]&0x80)) {
|
||||||
o[0] = 0x00;
|
o[0] = 0x00;
|
||||||
|
@ -94,7 +94,7 @@ fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (!s[1] && s[0]<0x80) {
|
if (!s[1] && s[0]<0x80) {
|
||||||
o[0] = s[0];
|
o[0] = s[0];
|
||||||
|
@ -122,7 +122,7 @@ fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsign
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (!(s[0]&0x80)) {
|
if (!(s[0]&0x80)) {
|
||||||
o[1] = 0x00;
|
o[1] = 0x00;
|
||||||
|
@ -150,7 +150,7 @@ fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (!s[1]) {
|
if (!s[1]) {
|
||||||
if (s[2]==0 && s[3]<0x80) {
|
if (s[2]==0 && s[3]<0x80) {
|
||||||
|
@ -179,7 +179,7 @@ fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsign
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
o[0] = 0;
|
o[0] = 0;
|
||||||
if (!(s[0]&0x80)) {
|
if (!(s[0]&0x80)) {
|
||||||
|
@ -205,7 +205,7 @@ fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
if (!s[2]) {
|
if (!s[2]) {
|
||||||
if (s[1]==0 && s[0]<0x80) {
|
if (s[1]==0 && s[0]<0x80) {
|
||||||
|
@ -234,7 +234,7 @@ fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsign
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o)
|
fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||||
{
|
{
|
||||||
o[3] = 0;
|
o[3] = 0;
|
||||||
if (!(s[0]&0x80)) {
|
if (!(s[0]&0x80)) {
|
||||||
|
@ -267,6 +267,7 @@ rb_from_UTF_16BE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_from_utf_16be
|
NULL, NULL, NULL, fun_so_from_utf_16be
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -278,6 +279,7 @@ rb_to_UTF_16BE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_to_utf_16be
|
NULL, NULL, NULL, fun_so_to_utf_16be
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -289,6 +291,7 @@ rb_from_UTF_16LE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_from_utf_16le
|
NULL, NULL, NULL, fun_so_from_utf_16le
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -300,6 +303,7 @@ rb_to_UTF_16LE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_to_utf_16le
|
NULL, NULL, NULL, fun_so_to_utf_16le
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -311,6 +315,7 @@ rb_from_UTF_32BE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_from_utf_32be
|
NULL, NULL, NULL, fun_so_from_utf_32be
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -322,6 +327,7 @@ rb_to_UTF_32BE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_to_utf_32be
|
NULL, NULL, NULL, fun_so_to_utf_32be
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -333,6 +339,7 @@ rb_from_UTF_32LE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_from_utf_32le
|
NULL, NULL, NULL, fun_so_from_utf_32le
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -344,6 +351,7 @@ rb_to_UTF_32LE = {
|
||||||
4, /* max_input */
|
4, /* max_input */
|
||||||
4, /* max_output */
|
4, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, fun_so_to_utf_32le
|
NULL, NULL, NULL, fun_so_to_utf_32le
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -590,6 +590,7 @@ static const rb_transcoder
|
||||||
#{max_input}, /* max_input */
|
#{max_input}, /* max_input */
|
||||||
#{max_output}, /* max_output */
|
#{max_output}, /* max_output */
|
||||||
stateless_converter, /* stateful_type */
|
stateless_converter, /* stateful_type */
|
||||||
|
0, NULL, NULL, /* state_size, state_init, state_fini */
|
||||||
NULL, NULL, NULL, NULL,
|
NULL, NULL, NULL, NULL,
|
||||||
NULL, NULL, NULL
|
NULL, NULL, NULL
|
||||||
};
|
};
|
||||||
|
|
27
transcode.c
27
transcode.c
|
@ -544,22 +544,22 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
||||||
SUSPEND_OBUF(19); *out_p++ = getBT3(next_info);
|
SUSPEND_OBUF(19); *out_p++ = getBT3(next_info);
|
||||||
continue;
|
continue;
|
||||||
case FUNii:
|
case FUNii:
|
||||||
next_info = (VALUE)(*tr->func_ii)(tc, next_info);
|
next_info = (VALUE)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info);
|
||||||
goto follow_info;
|
goto follow_info;
|
||||||
case FUNsi:
|
case FUNsi:
|
||||||
{
|
{
|
||||||
const unsigned char *char_start;
|
const unsigned char *char_start;
|
||||||
size_t char_len;
|
size_t char_len;
|
||||||
char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
|
char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
|
||||||
next_info = (VALUE)(*tr->func_si)(tc, char_start, (size_t)char_len);
|
next_info = (VALUE)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len);
|
||||||
goto follow_info;
|
goto follow_info;
|
||||||
}
|
}
|
||||||
case FUNio:
|
case FUNio:
|
||||||
SUSPEND_OBUF(13);
|
SUSPEND_OBUF(13);
|
||||||
if (tr->max_output <= out_stop - out_p)
|
if (tr->max_output <= out_stop - out_p)
|
||||||
out_p += (VALUE)(*tr->func_io)(tc, next_info, out_p);
|
out_p += (VALUE)(*tr->func_io)(TRANSCODING_STATE(tc), next_info, out_p);
|
||||||
else {
|
else {
|
||||||
writebuf_len = (VALUE)(*tr->func_io)(tc, next_info, TRANSCODING_WRITEBUF(tc));
|
writebuf_len = (VALUE)(*tr->func_io)(TRANSCODING_STATE(tc), next_info, TRANSCODING_WRITEBUF(tc));
|
||||||
writebuf_off = 0;
|
writebuf_off = 0;
|
||||||
while (writebuf_off < writebuf_len) {
|
while (writebuf_off < writebuf_len) {
|
||||||
SUSPEND_OBUF(20);
|
SUSPEND_OBUF(20);
|
||||||
|
@ -574,11 +574,11 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
||||||
SUSPEND_OBUF(14);
|
SUSPEND_OBUF(14);
|
||||||
if (tr->max_output <= out_stop - out_p) {
|
if (tr->max_output <= out_stop - out_p) {
|
||||||
char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
|
char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
|
||||||
out_p += (VALUE)(*tr->func_so)(tc, char_start, (size_t)char_len, out_p);
|
out_p += (VALUE)(*tr->func_so)(TRANSCODING_STATE(tc), char_start, (size_t)char_len, out_p);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
|
char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
|
||||||
writebuf_len = (VALUE)(*tr->func_so)(tc, char_start, (size_t)char_len, TRANSCODING_WRITEBUF(tc));
|
writebuf_len = (VALUE)(*tr->func_so)(TRANSCODING_STATE(tc), char_start, (size_t)char_len, TRANSCODING_WRITEBUF(tc));
|
||||||
writebuf_off = 0;
|
writebuf_off = 0;
|
||||||
while (writebuf_off < writebuf_len) {
|
while (writebuf_off < writebuf_len) {
|
||||||
SUSPEND_OBUF(22);
|
SUSPEND_OBUF(22);
|
||||||
|
@ -632,10 +632,10 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
||||||
if (tr->finish_func) {
|
if (tr->finish_func) {
|
||||||
SUSPEND_OBUF(4);
|
SUSPEND_OBUF(4);
|
||||||
if (tr->max_output <= out_stop - out_p) {
|
if (tr->max_output <= out_stop - out_p) {
|
||||||
out_p += tr->finish_func(tc, out_p);
|
out_p += tr->finish_func(TRANSCODING_STATE(tc), out_p);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
writebuf_len = tr->finish_func(tc, TRANSCODING_WRITEBUF(tc));
|
writebuf_len = tr->finish_func(TRANSCODING_STATE(tc), TRANSCODING_WRITEBUF(tc));
|
||||||
writebuf_off = 0;
|
writebuf_off = 0;
|
||||||
while (writebuf_off < writebuf_len) {
|
while (writebuf_off < writebuf_len) {
|
||||||
SUSPEND_OBUF(23);
|
SUSPEND_OBUF(23);
|
||||||
|
@ -687,7 +687,11 @@ rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags)
|
||||||
tc = ALLOC(rb_transcoding);
|
tc = ALLOC(rb_transcoding);
|
||||||
tc->transcoder = tr;
|
tc->transcoder = tr;
|
||||||
tc->flags = flags;
|
tc->flags = flags;
|
||||||
memset(tc->stateful, 0, sizeof(tc->stateful));
|
if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
|
||||||
|
tc->state = xmalloc(tr->state_size);
|
||||||
|
if (tr->state_init_func) {
|
||||||
|
(tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */
|
||||||
|
}
|
||||||
tc->resume_position = 0;
|
tc->resume_position = 0;
|
||||||
tc->recognized_len = 0;
|
tc->recognized_len = 0;
|
||||||
tc->readagain_len = 0;
|
tc->readagain_len = 0;
|
||||||
|
@ -718,6 +722,11 @@ static void
|
||||||
rb_transcoding_close(rb_transcoding *tc)
|
rb_transcoding_close(rb_transcoding *tc)
|
||||||
{
|
{
|
||||||
const rb_transcoder *tr = tc->transcoder;
|
const rb_transcoder *tr = tc->transcoder;
|
||||||
|
if (tr->state_fini_func) {
|
||||||
|
(tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */
|
||||||
|
}
|
||||||
|
if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
|
||||||
|
xfree(tc->state);
|
||||||
if (sizeof(tc->readbuf.ary) < tr->max_input)
|
if (sizeof(tc->readbuf.ary) < tr->max_input)
|
||||||
xfree(tc->readbuf.ptr);
|
xfree(tc->readbuf.ptr);
|
||||||
if (sizeof(tc->writebuf.ary) < tr->max_output)
|
if (sizeof(tc->writebuf.ary) < tr->max_output)
|
||||||
|
|
|
@ -90,7 +90,7 @@ typedef struct rb_transcoding {
|
||||||
unsigned char *ptr; /* length: max_output */
|
unsigned char *ptr; /* length: max_output */
|
||||||
} writebuf;
|
} writebuf;
|
||||||
|
|
||||||
unsigned char stateful[256]; /* opaque data for stateful encoding */
|
void *state; /* opaque data for stateful encoding */
|
||||||
} rb_transcoding;
|
} rb_transcoding;
|
||||||
#define TRANSCODING_READBUF(tc) \
|
#define TRANSCODING_READBUF(tc) \
|
||||||
((tc)->transcoder->max_input <= sizeof((tc)->readbuf.ary) ? \
|
((tc)->transcoder->max_input <= sizeof((tc)->readbuf.ary) ? \
|
||||||
|
@ -100,6 +100,11 @@ typedef struct rb_transcoding {
|
||||||
((tc)->transcoder->max_output <= sizeof((tc)->writebuf.ary) ? \
|
((tc)->transcoder->max_output <= sizeof((tc)->writebuf.ary) ? \
|
||||||
(tc)->writebuf.ary : \
|
(tc)->writebuf.ary : \
|
||||||
(tc)->writebuf.ptr)
|
(tc)->writebuf.ptr)
|
||||||
|
#define TRANSCODING_STATE_EMBED_MAX sizeof(void *)
|
||||||
|
#define TRANSCODING_STATE(tc) \
|
||||||
|
((tc)->transcoder->state_size <= sizeof((tc)->state) ? \
|
||||||
|
(void *)&(tc)->state : \
|
||||||
|
(tc)->state)
|
||||||
|
|
||||||
/* static structure, one per supported encoding pair */
|
/* static structure, one per supported encoding pair */
|
||||||
struct rb_transcoder {
|
struct rb_transcoder {
|
||||||
|
@ -115,13 +120,16 @@ struct rb_transcoder {
|
||||||
int max_input;
|
int max_input;
|
||||||
int max_output;
|
int max_output;
|
||||||
rb_transcoder_stateful_type_t stateful_type;
|
rb_transcoder_stateful_type_t stateful_type;
|
||||||
VALUE (*func_ii)(rb_transcoding*, VALUE); /* info -> info */
|
size_t state_size;
|
||||||
VALUE (*func_si)(rb_transcoding*, const unsigned char*, size_t); /* start -> info */
|
int (*state_init_func)(void*); /* 0:success !=0:failure(errno) */
|
||||||
int (*func_io)(rb_transcoding*, VALUE, const unsigned char*); /* info -> output */
|
int (*state_fini_func)(void*); /* 0:success !=0:failure(errno) */
|
||||||
int (*func_so)(rb_transcoding*, const unsigned char*, size_t, unsigned char*); /* start -> output */
|
VALUE (*func_ii)(void*, VALUE); /* info -> info */
|
||||||
int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
|
VALUE (*func_si)(void*, const unsigned char*, size_t); /* start -> info */
|
||||||
int (*resetsize_func)(rb_transcoding*); /* -> len */
|
int (*func_io)(void*, VALUE, const unsigned char*); /* info -> output */
|
||||||
int (*resetstate_func)(rb_transcoding*, unsigned char*); /* -> output */
|
int (*func_so)(void*, const unsigned char*, size_t, unsigned char*); /* start -> output */
|
||||||
|
int (*finish_func)(void*, unsigned char*); /* -> output */
|
||||||
|
int (*resetsize_func)(void*); /* -> len */
|
||||||
|
int (*resetstate_func)(void*, unsigned char*); /* -> output */
|
||||||
};
|
};
|
||||||
|
|
||||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче