gecko-dev/lib/libi18n/jis2oth.c

294 строки
9.0 KiB
C

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/* jis2oth.c */
/* other: SJIS or EUC */
#include "intlpriv.h"
extern int MK_OUT_OF_MEMORY;
/* net_jis2other(obj, jisbuf, jisbufsz, uncvtbuf)
* Args:
* jisbuf: Ptr to a buf of JIS chars
* jisbufsz: Size in bytes of jisbuf
* jismode: Ptr to encoding mode, use as arg for next call to
* jis2other() for rest of current SJIS data. First call should
* initialize mode to ASCII (0).
* uncvtbuf: If entire buffer was converted, uncvtbuf[0] will be nul,
* else this points to SJIS chars that were NOT converted
* and jis2other() with additional SJIS chars appended.
* obj->cvtflag: Specifies converting to either EUC or SJIS.
* Return:
* Returns NULL on failure, otherwise it returns a pointer to a buffer of
* converted JIS characters. Caller must XP_FREE() this memory.
*
* Description:
*
* Allocate destination buffer (for SJIS or EUC).
*
* Set JIS mode state based upon ESC sequences. Also if NL or CR,
* mode is reset to JIS-Roman.
*
* If JIS mode is JIS x208 and converting to EUC, set 8th bits of next 2 bytes.
*
* If JIS mode is JIS x208-1983 and converting to SJIS, use the
* JIS to SJIS algorithm.
*
* If JIS mode is JIS x212 and converting to EUC, output SS3 and set 8th
* bits of next 2 bytes. (This mode only set when converting to EUC.)
*
* If JIS Half-width Katakana and converting to EUC, output SS2 followed
* by the 2 bytes w/8th bits set.
*
* If JIS Half-width Katakana and converting to SJIS, output the 2 bytes
* w/8th bits set.
*
* If any other JIS mode, then assume Latin1 and just copy the next byte.
*
* If either SJIS buffer does not contain complete JIS char or JIS buffer
* is full, then return unconverted SJIS to caller. Caller should
* append more data and recall jis2other.
*/
MODULE_PRIVATE unsigned char *
jis2other( CCCDataObject obj,
const unsigned char *jisbuf, /* JIS buffer for conversion*/
int32 jisbufsz) /* JIS buffer size in bytes */
{
unsigned char *tobuf = NULL;
int32 tobufsz;
unsigned char *tobufp, *jisp; /* current byte in bufs */
unsigned char *tobufep, *jisep; /* end of buffers */
int32 uncvtlen;
unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
#define sjisbuf tobuf
#define sjisbufsz tobufsz
#define sjisp tobufp
#define sjisep tobufep
#define eucbufsz tobufsz
#define eucbuf tobuf
#define eucp tobufp
#define eucep tobufep
/* Allocate a dest buffer: */
/* JIS is usually longer than SJIS or EUC because of ESC seq.
*
* In the worst case (all Roman), converted SJIS will be the same
* length as the original JIS + 1 for nul byte
*
* In the worst case ( <ESC> ( I <rest Half-width Kana>... ),
* converted EUC will be 2X - 2 the size of the original JIS + 1 for nul
* byte.
*/
uncvtlen = strlen((char *)uncvtbuf);
if (!INTL_GetCCCCvtflag(obj))
tobufsz = jisbufsz + uncvtlen + 1;
else
tobufsz = (jisbufsz + uncvtlen) << 1;
if (!tobufsz) {
return NULL;
}
if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
return(NULL);
}
/* Initialize pointers, etc. */
jisp = (unsigned char *)jisbuf;
jisep = jisp + jisbufsz - 1;
#define uncvtp tobufp /* use tobufp as temp */
/* If prev. unconverted chars, append unconverted
* chars w/new chars and try to process.
*/
if (uncvtbuf[0] != '\0') {
uncvtp = uncvtbuf + uncvtlen;
while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
jisp <= jisep)
*uncvtp++ = *jisp++;
*uncvtp = '\0'; /* nul terminate */
jisp = uncvtbuf; /* process unconverted first */
jisep = uncvtp - 1;
}
#undef uncvtp
tobufp = tobuf;
tobufep = tobufp + tobufsz - 2; /* save space for terminating null */
WHILELOOP:
/* While JIS data && space in SJIS buf. */
while ((tobufp <= tobufep) && (jisp <= jisep)) {
if (*jisp == ESC) {
if ((jisep - jisp) < 2) /* Incomplete ESC seq in JIS buf? */
break;
switch (jisp[1]) {
case '(':
switch (jisp[2]) {
case 'J': /* JIS X 0201-Roman */
case 'B': /* ASCII */
INTL_SetCCCJismode(obj, JIS_Roman);
jisp += 3; /* remove ESC seq. */
break;
case 'I': /* Half-width katakana */
INTL_SetCCCJismode(obj, JIS_HalfKana);
jisp += 3; /* remove ESC seq. */
break;
default: /* pass thru invalid ESC seq. */
*tobufp++ = *jisp++;
*tobufp++ = *jisp++;
}
break;
case DOLLAR:
switch (jisp[2]) {
case 'B': /* JIS X 0208-1983 */
case '@': /* JIS X 0208-1978 (old-JIS) */
INTL_SetCCCJismode(obj, JIS_208_83);
jisp += 3; /* remove rest of ESC seq. */
break;
case '(':
if ((jisep - jisp) < 3) /* Full ESC seq in buf? */
goto abortwhile;
switch (jisp[3]) {
case 'D': /* JIS X 0212-1990 */
if (!INTL_GetCCCCvtflag(obj)) /* No JIS212 in SJIS */
INTL_SetCCCJismode(obj, JIS_208_83);
else
INTL_SetCCCJismode(obj, JIS_212_90);
jisp += 4; /* remove rest of ESC seq. */
break;
default: /* pass thru invalid ESC seq. */
*tobufp++ = *jisp++;
*tobufp++ = *jisp++;
break;
}
break;
default: /* pass thru invalid ESC seq. */
*tobufp++ = *jisp++;
*tobufp++ = *jisp++;
}
break;
case '-':
switch (jisp[2]) {
case 'A': /* ISO8859-1 */
INTL_SetCCCJismode(obj, JIS_Roman);
jisp += 3; /* remove rest of ESC seq. */
break;
default: /* pass thru invalid ESC seq. */
*tobufp++ = *jisp++;
*tobufp++ = *jisp++;
}
break;
default: /* pass thru invalid ESC seq. */
*tobufp++ = *jisp++;
}
} else if (*jisp == NL || *jisp == CR) {
INTL_SetCCCJismode(obj, JIS_Roman);
*tobufp++ = *jisp++;
} else if (INTL_GetCCCJismode(obj) == JIS_208_83) {
if ((jisp+1) > jisep) /* Incomplete 2Byte char in JIS buf? */
break;
if (INTL_GetCCCCvtflag(obj)) { /* Convert JIS 208 to EUC */
*eucp++ = *jisp | 0x80;
jisp++;
*eucp++ = *jisp | 0x80;
jisp++;
} else { /* Convert JIS-208 to SJIS: Same as */
/* euc2sjis.c's EUC208-to-SJIS algorithm */
/* except JIS 8th bit is clear. */
if (*jisp < 0x5F) /* Convert 1st SJIS byte */
*sjisp++ = ((*jisp + 1) >> 1) + 0x70;
else
*sjisp++ = ((*jisp + 1) >> 1) + 0xB0;
/* Convert 2nd SJIS byte */
if ((*jisp++) & 1) { /* if 1st JIS byte is odd */
if (*jisp > 0x5F)
*sjisp = *jisp + 0x20;
else
*sjisp = *jisp + 0x1F;
} else {
*sjisp = *jisp + 0x7E;
}
sjisp++;
jisp++;
}
} else if (INTL_GetCCCJismode(obj) == JIS_212_90) {
/* only "to EUC" supports 212 */
if ((jisp+1) > jisep) /* Incomplete 2Byte char in JIS buf? */
break;
*eucp++ = SS3;
*eucp++ = *jisp | 0x80;
jisp++;
*eucp++ = *jisp | 0x80;
jisp++;
} else if (INTL_GetCCCJismode(obj) == JIS_HalfKana) {
if (INTL_GetCCCCvtflag(obj)) {
*eucp++ = SS2;
}
*tobufp++ = *jisp | 0x80; /* Set 8th bit for EUC & SJIS */
jisp++;
} else {
/* Unknown type: no conversion */
*tobufp++ = *jisp++;
}
}
abortwhile:
if (uncvtbuf[0] != '\0') {
/* Just processed unconverted chars:
* jisp pts to 1st unprocessed char in
* jisbuf. Some may have been processed
* while processing unconverted chars,
* so set up ptrs not to process them
* twice.
*/
/* If nothing was converted, this can
* only happen if there was not
* enough JIS data. Stop and get
* more data.
*/
if (jisp == uncvtbuf) { /* Nothing converted */
*tobufp = '\0';
return(NULL);
}
jisep = (unsigned char *)jisbuf + jisbufsz - 1 ;
jisp = (unsigned char *)jisbuf + (jisp - uncvtbuf - uncvtlen);
uncvtbuf[0] = '\0'; /* No more uncoverted chars. */
goto WHILELOOP; /* Process new data */
}
*tobufp = '\0'; /* null terminate dest. data */
INTL_SetCCCLen(obj, tobufp - tobuf); /* length not counting null */
if (jisp <= jisep) { /* uncoverted JIS? */
tobufp = uncvtbuf; /* reuse the tobufp as a TEMP */
while (jisp <= jisep)
*tobufp++ = *jisp++;
*tobufp = '\0'; /* null terminate */
}
return(tobuf);
}