* transcode_data.h (rb_transcoding): add feedlen field.

* transcode.c (transcode_restartable0): renamed from
  transcode_restartable.
  save input buffer into feed buffer if next character is started the
  point before input buffer.  for example, "\x00\xd8\x01" then "\x02"
  in UTF-16LE.  \x02 causes invalid and next character is started from
  \x01.
  (transcode_restartable): new function to call
  transcode_restartable0.   if feed buffer is not empty, convert it at
  first.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18467 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-08-10 02:17:56 +00:00
Родитель 3ba7984ef4
Коммит e748ed513d
3 изменённых файлов: 69 добавлений и 10 удалений

Просмотреть файл

@ -1,3 +1,17 @@
Sun Aug 10 11:15:55 2008 Tanaka Akira <akr@fsij.org>
* transcode_data.h (rb_transcoding): add feedlen field.
* transcode.c (transcode_restartable0): renamed from
transcode_restartable.
save input buffer into feed buffer if next character is started the
point before input buffer. for example, "\x00\xd8\x01" then "\x02"
in UTF-16LE. \x02 causes invalid and next character is started from
\x01.
(transcode_restartable): new function to call
transcode_restartable0. if feed buffer is not empty, convert it at
first.
Sun Aug 10 11:02:58 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* common.mk (extconf): use MAKEDIRS.

Просмотреть файл

@ -355,7 +355,7 @@ typedef enum {
} transcode_result_t;
static transcode_result_t
transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
rb_transcoding *my_transcoding,
const int opt)
@ -363,6 +363,7 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
{
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
int unitlen = my_transcoder->input_unit_length;
int feedlen = 0;
const unsigned char *inchar_start;
const unsigned char *in_p;
@ -396,11 +397,15 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
do { \
my_transcoding->resume_position = (num); \
if (0 < in_p - inchar_start) \
MEMCPY(TRANSCODING_READBUF(my_transcoding)+my_transcoding->readlen, \
MEMMOVE(TRANSCODING_READBUF(my_transcoding)+my_transcoding->readlen, \
inchar_start, unsigned char, in_p - inchar_start); \
*in_pos = in_p; \
*out_pos = out_p; \
my_transcoding->readlen += in_p - inchar_start; \
if (feedlen) { \
my_transcoding->readlen -= feedlen; \
my_transcoding->feedlen = feedlen; \
} \
my_transcoding->next_table = next_table; \
my_transcoding->next_info = next_info; \
my_transcoding->next_byte = next_byte; \
@ -524,12 +529,23 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
}
}
else {
int found_len; /* including the last byte which cuases invalid */
int invalid_len;
int step;
/* xxx: step may be negative.
* possibly in_p is lesser than *in_pos.
* caller may want to access readbuf. */
step = (((my_transcoding->readlen + (in_p - inchar_start)) - 1) / unitlen) * unitlen - (my_transcoding->readlen + (in_p - inchar_start));
in_p += step;
found_len = my_transcoding->readlen + (in_p - inchar_start);
invalid_len = ((found_len - 1) / unitlen) * unitlen;
step = invalid_len - found_len;
if (step < -1) {
if (-step <= in_p - *in_pos) {
in_p += step;
}
else {
feedlen = -step;
}
}
else {
in_p += step;
}
}
goto invalid;
}
@ -559,6 +575,32 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
#undef SUSPEND
}
static transcode_result_t
transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
rb_transcoding *my_transcoding,
const int opt)
{
if (my_transcoding->feedlen) {
unsigned char *feed_buf = ALLOCA_N(unsigned char, my_transcoding->feedlen);
const unsigned char *feed_pos = feed_buf;
const unsigned char *feed_stop = feed_buf + my_transcoding->feedlen;
transcode_result_t res;
MEMCPY(feed_buf, TRANSCODING_READBUF(my_transcoding) + my_transcoding->readlen,
unsigned char, my_transcoding->feedlen);
my_transcoding->feedlen = 0;
res = transcode_restartable0(&feed_pos, out_pos, feed_stop, out_stop, my_transcoding, opt);
if (res != transcode_ibuf_empty) {
MEMCPY(TRANSCODING_READBUF(my_transcoding) + my_transcoding->readlen + my_transcoding->feedlen,
feed_pos, unsigned char, feed_stop - feed_pos);
my_transcoding->feedlen += feed_stop - feed_pos;
return res;
}
}
return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, my_transcoding, opt);
}
static void
more_output_buffer(
VALUE destination,
@ -590,6 +632,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
my_transcoding->resume_position = 0;
my_transcoding->readlen = 0;
my_transcoding->feedlen = 0;
if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) {
my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input);
@ -648,7 +691,7 @@ static void
transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
VALUE destination,
unsigned char *(*resize_destination)(VALUE, struct rb_transcoding*, int, int),
unsigned char *(*resize_destination)(VALUE, int, int),
rb_transcoding *my_transcoding,
const int opt)
{
@ -659,6 +702,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
my_transcoding->resume_position = 0;
my_transcoding->readlen = 0;
my_transcoding->feedlen = 0;
if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) {
my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input);

Просмотреть файл

@ -65,11 +65,12 @@ typedef struct rb_transcoding {
const BYTE_LOOKUP *next_table;
VALUE next_info;
unsigned char next_byte;
int readlen;
int readlen; /* already interpreted */
int feedlen; /* not yet interpreted */
union {
unsigned char ary[8]; /* max_input <= sizeof(ary) */
unsigned char *ptr; /* length is max_input */
} readbuf;
} readbuf; /* readlen + feedlen used */
unsigned char stateful[256]; /* opaque data for stateful encoding */
} rb_transcoding;