зеркало из https://github.com/mozilla/gecko-dev.git
188 строки
5.9 KiB
C
188 строки
5.9 KiB
C
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
/* is2euckr.c */
|
|
|
|
#include "intlpriv.h"
|
|
|
|
|
|
extern int MK_OUT_OF_MEMORY;
|
|
|
|
|
|
/* net_iso2euckr(obj, isobuf, isobufsz, uncvtbuf)
|
|
* Args:
|
|
* isobuf: Ptr to a buf of iso-2022-kr chars
|
|
* isobufsz: Size in bytes of isobuf
|
|
* jismode: Ptr to encoding mode, use as arg for next call to
|
|
* mz_iso2euckr() for rest of current 2022-kr data. First call should
|
|
* initialize mode to ASCII (0).
|
|
* uncvtbuf: If entire buffer was converted, uncvtbuf[0] will be nul,
|
|
* else this points to iso-2022-kr chars that were NOT converted
|
|
* and mz_iso2euckr() with additional iso-2022-kr chars appended.
|
|
* Return:
|
|
* Returns NULL on failure, otherwise it returns a pointer to a buffer of
|
|
* converted EUC-KR characters. Caller must XP_FREE() this memory.
|
|
*
|
|
* Description:
|
|
*
|
|
* Allocate destination buffer (for EUC-KR).
|
|
*
|
|
* Set mode state based upon ESC sequence and SO/SI.
|
|
*
|
|
* If mode is KSC 5601, set 8th bits of next 2 bytes.
|
|
*
|
|
* If any other mode, then assume ASCII and strip the 8th bit.
|
|
*
|
|
* If either 2022-kr buffer does not contain complete char or EUC-KR buffer
|
|
* is full, then return unconverted 2022-kr to caller. Caller should
|
|
* append more data and recall mz_iso2euckr.
|
|
*/
|
|
|
|
|
|
MODULE_PRIVATE unsigned char *
|
|
mz_iso2euckr( CCCDataObject obj,
|
|
const unsigned char *isobuf, /* 2022-kr buffer for conversion */
|
|
int32 isobufsz) /* 2022-kr buffer size in bytes */
|
|
{
|
|
unsigned char *tobuf = NULL;
|
|
int32 tobufsz;
|
|
unsigned char *tobufp, *isop; /* current byte in bufs */
|
|
unsigned char *tobufep, *isoep; /* end of buffers */
|
|
int32 uncvtlen;
|
|
unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
|
|
|
|
#define euckrbufsz tobufsz
|
|
#define euckrbuf tobuf
|
|
#define euckrp tobufp
|
|
#define euckrep tobufep
|
|
/* Allocate a dest buffer: */
|
|
/* 2022-kr is usually longer than EUC-KR because of ESC seq.
|
|
*
|
|
* In the worst case (all ASCII), converted EUC-KR will be the same
|
|
* length as the original 2022-kr + 1 for nul byte
|
|
*/
|
|
uncvtlen = strlen((char *)uncvtbuf);
|
|
tobufsz = isobufsz + uncvtlen + 1;
|
|
|
|
if (!tobufsz) {
|
|
return NULL;
|
|
}
|
|
|
|
if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
|
|
INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
|
|
return(NULL);
|
|
}
|
|
/* Initialize pointers, etc. */
|
|
isop = (unsigned char *)isobuf;
|
|
isoep = isop + isobufsz - 1;
|
|
|
|
#define uncvtp tobufp /* use tobufp as temp */
|
|
/* If prev. unconverted chars, append unconverted
|
|
* chars w/new chars and try to process.
|
|
*/
|
|
if (uncvtbuf[0] != '\0') {
|
|
uncvtp = uncvtbuf + uncvtlen;
|
|
while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
|
|
isop <= isoep)
|
|
*uncvtp++ = *isop++;
|
|
*uncvtp = '\0'; /* nul terminate */
|
|
isop = uncvtbuf; /* process unconverted first */
|
|
isoep = uncvtp - 1;
|
|
}
|
|
#undef uncvtp
|
|
|
|
tobufp = tobuf;
|
|
tobufep = tobufp + tobufsz - 2; /* save space for terminating null */
|
|
|
|
WHILELOOP:
|
|
INTL_SetCCCJismode(obj, KSC_5601_87); /* jliu doesn't want to change Tony's code too much*/
|
|
/* While 2022-kr data && space in EUC-KR buf. */
|
|
while ((tobufp <= tobufep) && (isop <= isoep)) {
|
|
|
|
if( isop[0] == ESC && isoep - isop > 3 && ( isop[1] == '$' && isop[2] == ')'
|
|
&& isop[3] == 'C' ) ){
|
|
/* eat that ESC seq. */
|
|
isop += 4;
|
|
} else if (*isop == SO) {
|
|
/* obj->jismode |= SHIFT_OUT; */
|
|
INTL_SetCCCJismode(obj, INTL_GetCCCJismode(obj) | SHIFT_OUT);
|
|
isop++;
|
|
} else if (*isop == SI) {
|
|
INTL_SetCCCJismode(obj, INTL_GetCCCJismode(obj) & (~SHIFT_OUT));
|
|
isop++;
|
|
} else if (INTL_GetCCCJismode(obj) == (KSC_5601_87 | SHIFT_OUT)) {
|
|
if(*isop == 0x20) /* jliu */
|
|
{
|
|
*euckrp++ = *isop++ ;
|
|
}
|
|
else
|
|
{
|
|
if ((isop+1) > isoep) /* Incomplete 2Byte char in JIS buf? */
|
|
break;
|
|
|
|
*euckrp++ = *isop++ | 0x80;
|
|
*euckrp++ = *isop++ | 0x80;
|
|
}
|
|
} else if ((0xA1 <= *isop) && (*isop <= 0xFE)) {
|
|
/* Somehow we hit EUC_KR data, let it through */
|
|
if ((isop+1) > isoep) /* Incomplete 2Byte char in JIS buf? */
|
|
break;
|
|
*euckrp++ = *isop++ ;
|
|
*euckrp++ = *isop++ ;
|
|
} else {
|
|
/* Unknown type: no conversion */
|
|
*euckrp++ = *isop++ & 0x7f;
|
|
}
|
|
}
|
|
|
|
if (uncvtbuf[0] != '\0') {
|
|
/* Just processed unconverted chars:
|
|
* isop pts to 1st unprocessed char in
|
|
* isobuf. Some may have been processed
|
|
* while processing unconverted chars,
|
|
* so set up ptrs not to process them
|
|
* twice.
|
|
*/
|
|
/* If nothing was converted, this can
|
|
* only happen if there was not
|
|
* enough 2022-kr data. Stop and get
|
|
* more data.
|
|
*/
|
|
if (isop == uncvtbuf) { /* Nothing converted */
|
|
*tobufp = '\0';
|
|
return(NULL);
|
|
}
|
|
isoep = (unsigned char *)isobuf + isobufsz - 1 ;
|
|
isop = (unsigned char *)isobuf + (isop - uncvtbuf - uncvtlen);
|
|
uncvtbuf[0] = '\0'; /* No more uncoverted chars. */
|
|
goto WHILELOOP; /* Process new data */
|
|
}
|
|
|
|
*tobufp = '\0'; /* null terminate dest. data */
|
|
INTL_SetCCCLen(obj, tobufp - tobuf); /* length not counting null */
|
|
|
|
if (isop <= isoep) { /* unconverted 2022-kr? */
|
|
tobufp = uncvtbuf; /* reuse the tobufp as a TEMP */
|
|
while (isop <= isoep)
|
|
*tobufp++ = *isop++;
|
|
*tobufp = '\0'; /* null terminate */
|
|
}
|
|
return(tobuf);
|
|
}
|
|
|
|
|