* transcode.c (transcode_loop, str_transcoding_resize): use unsigned

char.  [ruby-dev:33232]

* transcode_data.h (rb_transcoding, rb_transcoder): removed callback
  parameters.

* enc/trans/japanese.c: ditto.

* enc/trans/utf_16_32.c: parenthesized bit-or operands.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15150 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2008-01-21 03:35:05 +00:00
Родитель 76e19bc534
Коммит 463af63468
5 изменённых файлов: 68 добавлений и 57 удалений

Просмотреть файл

@ -1,3 +1,15 @@
Mon Jan 21 12:35:00 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* transcode.c (transcode_loop, str_transcoding_resize): use unsigned
char. [ruby-dev:33232]
* transcode_data.h (rb_transcoding, rb_transcoder): removed callback
parameters.
* enc/trans/japanese.c: ditto.
* enc/trans/utf_16_32.c: parenthesized bit-or operands.
Mon Jan 21 11:59:00 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> Mon Jan 21 11:59:00 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (rb_str_each_char): move forward. [ruby-dev:33231] * string.c (rb_str_each_char): move forward. [ruby-dev:33231]

Просмотреть файл

@ -23671,10 +23671,10 @@ enum ISO_2022_ESCSEQ {
#define ISO_2022_GZ_JIS_X_0213_2004_1 ISO_2022_ENCODING(ISO_2022_GZDM4,'Q') #define ISO_2022_GZ_JIS_X_0213_2004_1 ISO_2022_ENCODING(ISO_2022_GZDM4,'Q')
static int static int
get_iso_2022_mode(char **in_pos) get_iso_2022_mode(unsigned char **in_pos)
{ {
int new_mode; int new_mode;
char *in_p = *in_pos; unsigned char *in_p = *in_pos;
switch (*in_p++) switch (*in_p++)
{ {
case '(': case '(':
@ -23719,15 +23719,15 @@ get_iso_2022_mode(char **in_pos)
} }
static void static void
from_iso_2022_jp_transcoder_preprocessor(char **in_pos, char **out_pos, from_iso_2022_jp_transcoder_preprocessor(unsigned char **in_pos, unsigned char **out_pos,
char *in_stop, char *out_stop, unsigned char *in_stop, unsigned char *out_stop,
rb_transcoding *my_transcoding) rb_transcoding *my_transcoding)
{ {
const rb_transcoder *my_transcoder = my_transcoding->transcoder; const rb_transcoder *my_transcoder = my_transcoding->transcoder;
char *in_p = *in_pos, *out_p = *out_pos; unsigned char *in_p = *in_pos, *out_p = *out_pos;
int cur_mode = ISO_2022_GZ_ASCII; int cur_mode = ISO_2022_GZ_ASCII;
unsigned char c1; unsigned char c1;
char *out_s = out_stop - my_transcoder->max_output + 1; unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
while (in_p < in_stop) { while (in_p < in_stop) {
if (out_p >= out_s) { if (out_p >= out_s) {
int len = (out_p - *out_pos); int len = (out_p - *out_pos);
@ -23770,9 +23770,9 @@ from_iso_2022_jp_transcoder_preprocessor(char **in_pos, char **out_pos,
} }
static int static int
select_iso_2022_mode(char **out_pos, int new_mode) select_iso_2022_mode(unsigned char **out_pos, int new_mode)
{ {
char *out_p = *out_pos; unsigned char *out_p = *out_pos;
*out_p++ = '\x1b'; *out_p++ = '\x1b';
switch (new_mode>>8) switch (new_mode>>8)
{ {
@ -23799,15 +23799,15 @@ select_iso_2022_mode(char **out_pos, int new_mode)
} }
static void static void
to_iso_2022_jp_transcoder_postprocessor(char **in_pos, char **out_pos, to_iso_2022_jp_transcoder_postprocessor(unsigned char **in_pos, unsigned char **out_pos,
char *in_stop, char *out_stop, unsigned char *in_stop, unsigned char *out_stop,
rb_transcoding *my_transcoding) rb_transcoding *my_transcoding)
{ {
const rb_transcoder *my_transcoder = my_transcoding->transcoder; const rb_transcoder *my_transcoder = my_transcoding->transcoder;
char *in_p = *in_pos, *out_p = *out_pos; unsigned char *in_p = *in_pos, *out_p = *out_pos;
int cur_mode = ISO_2022_GZ_ASCII, new_mode = 0; int cur_mode = ISO_2022_GZ_ASCII, new_mode = 0;
unsigned char next_byte; unsigned char next_byte;
char *out_s = out_stop - my_transcoder->max_output + 1; unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
while (in_p < in_stop) { while (in_p < in_stop) {
if (out_p >= out_s) { if (out_p >= out_s) {
int len = (out_p - *out_pos); int len = (out_p - *out_pos);

Просмотреть файл

@ -12,21 +12,21 @@ fun_so_from_utf_16be(const unsigned char* s, unsigned char* o)
} }
else if (s[0]<0x08) { else if (s[0]<0x08) {
o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6); o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
o[1] = 0x80 | s[1]&0x3F; o[1] = 0x80 | (s[1]&0x3F);
return 2; return 2;
} }
else if ((s[0]&0xF8)!=0xD8) { else if ((s[0]&0xF8)!=0xD8) {
o[0] = 0xE0 | s[0]>>4; o[0] = 0xE0 | s[0]>>4;
o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6); o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
o[2] = 0x80 | s[1]&0x3F; o[2] = 0x80 | (s[1]&0x3F);
return 3; return 3;
} }
else { else {
unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1; unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
o[0] = 0xF0 | u>>2; o[0] = 0xF0 | u>>2;
o[1] = 0x80 | ((u&0x03)<<4) | (s[1]>>2)&0x0F; o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6); o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
o[3] = 0x80 | s[3]&0x3F; o[3] = 0x80 | (s[3]&0x3F);
return 4; return 4;
} }
} }
@ -41,16 +41,16 @@ fun_so_to_utf_16be(const unsigned char* s, unsigned char* o)
} }
else if ((s[0]&0xE0)==0xC0) { else if ((s[0]&0xE0)==0xC0) {
o[0] = (s[0]>>2)&0x07; o[0] = (s[0]>>2)&0x07;
o[1] = ((s[0]&0x03)<<6) | s[1]&0x3F; o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
return 2; return 2;
} }
else if ((s[0]&0xF0)==0xE0) { else if ((s[0]&0xF0)==0xE0) {
o[0] = (s[0]<<4) | (s[1]>>2)^0x20; o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
o[1] = (s[1]<<6) | s[2]^0x80; o[1] = (s[1]<<6) | (s[2]^0x80);
return 2; return 2;
} }
else { else {
int w = (((s[0]&0x07)<<2) | (s[1]>>4)&0x03) - 1; int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
o[0] = 0xD8 | (w>>2); o[0] = 0xD8 | (w>>2);
o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
o[2] = 0xDC | ((s[2]>>2)&0x03); o[2] = 0xDC | ((s[2]>>2)&0x03);
@ -68,21 +68,21 @@ fun_so_from_utf_16le(const unsigned char* s, unsigned char* o)
} }
else if (s[1]<0x08) { else if (s[1]<0x08) {
o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
o[1] = 0x80 | s[0]&0x3F; o[1] = 0x80 | (s[0]&0x3F);
return 2; return 2;
} }
else if ((s[1]&0xF8)!=0xD8) { else if ((s[1]&0xF8)!=0xD8) {
o[0] = 0xE0 | s[1]>>4; o[0] = 0xE0 | s[1]>>4;
o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
o[2] = 0x80 | s[0]&0x3F; o[2] = 0x80 | (s[0]&0x3F);
return 3; return 3;
} }
else { else {
unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1; unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
o[0] = 0xF0 | u>>2; o[0] = 0xF0 | u>>2;
o[1] = 0x80 | ((u&0x03)<<4) | (s[0]>>2)&0x0F; o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6); o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
o[3] = 0x80 | s[2]&0x3F; o[3] = 0x80 | (s[2]&0x3F);
return 4; return 4;
} }
} }
@ -97,16 +97,16 @@ fun_so_to_utf_16le(const unsigned char* s, unsigned char* o)
} }
else if ((s[0]&0xE0)==0xC0) { else if ((s[0]&0xE0)==0xC0) {
o[1] = (s[0]>>2)&0x07; o[1] = (s[0]>>2)&0x07;
o[0] = ((s[0]&0x03)<<6) | s[1]&0x3F; o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
return 2; return 2;
} }
else if ((s[0]&0xF0)==0xE0) { else if ((s[0]&0xF0)==0xE0) {
o[1] = (s[0]<<4) | (s[1]>>2)^0x20; o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
o[0] = (s[1]<<6) | s[2]^0x80; o[0] = (s[1]<<6) | (s[2]^0x80);
return 2; return 2;
} }
else { else {
int w = (((s[0]&0x07)<<2) | (s[1]>>4)&0x03) - 1; int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
o[1] = 0xD8 | (w>>2); o[1] = 0xD8 | (w>>2);
o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
o[3] = 0xDC | ((s[2]>>2)&0x03); o[3] = 0xDC | ((s[2]>>2)&0x03);

Просмотреть файл

@ -147,20 +147,20 @@ transcode_dispatch(const char* from_encoding, const char* to_encoding)
* Transcoding engine logic * Transcoding engine logic
*/ */
static void static void
transcode_loop(char **in_pos, char **out_pos, transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
char *in_stop, char *out_stop, unsigned char *in_stop, unsigned char *out_stop,
const rb_transcoder *my_transcoder, const rb_transcoder *my_transcoder,
rb_transcoding *my_transcoding) rb_transcoding *my_transcoding)
{ {
char *in_p = *in_pos, *out_p = *out_pos; unsigned char *in_p = *in_pos, *out_p = *out_pos;
const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start; const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
const BYTE_LOOKUP *next_table; const BYTE_LOOKUP *next_table;
char *char_start; unsigned char *char_start;
unsigned int next_offset; unsigned int next_offset;
VALUE next_info; VALUE next_info;
unsigned char next_byte; unsigned char next_byte;
int from_utf8 = my_transcoder->from_utf8; int from_utf8 = my_transcoder->from_utf8;
char *out_s = out_stop - my_transcoder->max_output + 1; unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
while (in_p < in_stop) { while (in_p < in_stop) {
char_start = in_p; char_start = in_p;
next_table = conv_tree_start; next_table = conv_tree_start;
@ -214,17 +214,17 @@ transcode_loop(char **in_pos, char **out_pos,
*out_p++ = getBT3(next_info); *out_p++ = getBT3(next_info);
continue; continue;
case FUNii: case FUNii:
next_info = (VALUE)(*my_transcoder->func_ii)(next_info, my_transcoding); next_info = (VALUE)(*my_transcoder->func_ii)(next_info);
goto follow_info; goto follow_info;
case FUNsi: case FUNsi:
next_info = (VALUE)(*my_transcoder->func_si)(char_start, my_transcoding); next_info = (VALUE)(*my_transcoder->func_si)(char_start);
goto follow_info; goto follow_info;
break; break;
case FUNio: case FUNio:
out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p, my_transcoding); out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p);
break; break;
case FUNso: case FUNso:
out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p, my_transcoding); out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p);
break; break;
case INVALID: case INVALID:
goto invalid; goto invalid;
@ -250,12 +250,12 @@ transcode_loop(char **in_pos, char **out_pos,
* String-specific code * String-specific code
*/ */
static char * static unsigned char *
str_transcoding_resize(rb_transcoding *my_transcoding, int len, int new_len) str_transcoding_resize(rb_transcoding *my_transcoding, int len, int new_len)
{ {
VALUE dest_string = my_transcoding->ruby_string_dest; VALUE dest_string = my_transcoding->ruby_string_dest;
rb_str_resize(dest_string, new_len); rb_str_resize(dest_string, new_len);
return RSTRING_PTR(dest_string); return (unsigned char *)RSTRING_PTR(dest_string);
} }
static int static int
@ -264,7 +264,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
VALUE dest; VALUE dest;
VALUE str = *self; VALUE str = *self;
long blen, slen; long blen, slen;
char *buf, *bp, *sp, *fromp; unsigned char *buf, *bp, *sp, *fromp;
rb_encoding *from_enc, *to_enc; rb_encoding *from_enc, *to_enc;
const char *from_e, *to_e; const char *from_e, *to_e;
int from_encidx, to_encidx; int from_encidx, to_encidx;
@ -318,26 +318,26 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
} }
if (my_transcoder->preprocessor) { if (my_transcoder->preprocessor) {
fromp = sp = RSTRING_PTR(str); fromp = sp = (unsigned char *)RSTRING_PTR(str);
slen = RSTRING_LEN(str); slen = RSTRING_LEN(str);
blen = slen + 30; /* len + margin */ blen = slen + 30; /* len + margin */
dest = rb_str_tmp_new(blen); dest = rb_str_tmp_new(blen);
bp = RSTRING_PTR(dest); bp = (unsigned char *)RSTRING_PTR(dest);
my_transcoding.ruby_string_dest = dest; my_transcoding.ruby_string_dest = dest;
(*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); (*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
if (fromp != sp+slen) { if (fromp != sp+slen) {
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp); rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
} }
buf = RSTRING_PTR(dest); buf = (unsigned char *)RSTRING_PTR(dest);
*bp = '\0'; *bp = '\0';
rb_str_set_len(dest, bp - buf); rb_str_set_len(dest, bp - buf);
str = dest; str = dest;
} }
fromp = sp = RSTRING_PTR(str); fromp = sp = (unsigned char *)RSTRING_PTR(str);
slen = RSTRING_LEN(str); slen = RSTRING_LEN(str);
blen = slen + 30; /* len + margin */ blen = slen + 30; /* len + margin */
dest = rb_str_tmp_new(blen); dest = rb_str_tmp_new(blen);
bp = RSTRING_PTR(dest); bp = (unsigned char *)RSTRING_PTR(dest);
my_transcoding.ruby_string_dest = dest; my_transcoding.ruby_string_dest = dest;
my_transcoding.flush_func = str_transcoding_resize; my_transcoding.flush_func = str_transcoding_resize;
@ -345,22 +345,22 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
if (fromp != sp+slen) { if (fromp != sp+slen) {
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp); rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
} }
buf = RSTRING_PTR(dest); buf = (unsigned char *)RSTRING_PTR(dest);
*bp = '\0'; *bp = '\0';
rb_str_set_len(dest, bp - buf); rb_str_set_len(dest, bp - buf);
if (my_transcoder->postprocessor) { if (my_transcoder->postprocessor) {
str = dest; str = dest;
fromp = sp = RSTRING_PTR(str); fromp = sp = (unsigned char *)RSTRING_PTR(str);
slen = RSTRING_LEN(str); slen = RSTRING_LEN(str);
blen = slen + 30; /* len + margin */ blen = slen + 30; /* len + margin */
dest = rb_str_tmp_new(blen); dest = rb_str_tmp_new(blen);
bp = RSTRING_PTR(dest); bp = (unsigned char *)RSTRING_PTR(dest);
my_transcoding.ruby_string_dest = dest; my_transcoding.ruby_string_dest = dest;
(*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding); (*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
if (fromp != sp+slen) { if (fromp != sp+slen) {
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp); rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
} }
buf = RSTRING_PTR(dest); buf = (unsigned char *)RSTRING_PTR(dest);
*bp = '\0'; *bp = '\0';
rb_str_set_len(dest, bp - buf); rb_str_set_len(dest, bp - buf);
} }

Просмотреть файл

@ -62,8 +62,7 @@ typedef struct rb_transcoding {
struct rb_transcoder *transcoder; struct rb_transcoder *transcoder;
VALUE ruby_string_dest; /* the String used as the conversion destination, VALUE ruby_string_dest; /* the String used as the conversion destination,
or NULL if something else is being converted */ or NULL if something else is being converted */
char *(*flush_func)(struct rb_transcoding*, int, int); unsigned char *(*flush_func)(struct rb_transcoding*, int, int);
VALUE auxiliary_data;
} rb_transcoding; } rb_transcoding;
/* static structure, one per supported encoding pair */ /* static structure, one per supported encoding pair */
@ -73,12 +72,12 @@ typedef struct rb_transcoder {
const BYTE_LOOKUP *conv_tree_start; const BYTE_LOOKUP *conv_tree_start;
int max_output; int max_output;
int from_utf8; int from_utf8;
void (*preprocessor)(char**, char**, char*, char*, struct rb_transcoding *); void (*preprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
void (*postprocessor)(char**, char**, char*, char*, struct rb_transcoding *); void (*postprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
VALUE (*func_ii)(VALUE, struct rb_transcoding *); /* info -> info */ VALUE (*func_ii)(VALUE); /* info -> info */
VALUE (*func_si)(const char *, struct rb_transcoding *); /* start -> info */ VALUE (*func_si)(const unsigned char *); /* start -> info */
int (*func_io)(VALUE, const char*, struct rb_transcoding *); /* info -> output */ int (*func_io)(VALUE, const unsigned char*); /* info -> output */
int (*func_so)(const char*, char*, struct rb_transcoding *); /* start -> output */ int (*func_so)(const unsigned char*, unsigned char*); /* start -> output */
} rb_transcoder; } rb_transcoder;
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib); void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);