gecko-dev/lib/libi18n/sjis2jis.c

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 *
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */
/*	sjis2jis.c	*/

#include "intlpriv.h"
#if defined(MOZ_MAIL_NEWS)
#include "katakana.h"
#endif

extern int MK_OUT_OF_MEMORY;


									/* SJIS to JIS Algorithm.		*/
#define TwoByteSJIS2JIS(sjisp, jisp, offset) {					\
 	*jisp = (*sjisp++ - offset) << 1; /* assign 1st byte */		\
 	if (*sjisp < 0x9F) {			/* check 2nd SJIS byte */	\
 		*jisp++ -= 1;				/* adjust 1st JIS byte */	\
 		if (*sjisp > 0x7F)										\
 			*jisp++ = *sjisp++ - 0x20;							\
 		else													\
 			*jisp++ = *sjisp++ - 0x1F;							\
 	} else {													\
 		jisp++;													\
 		*jisp++ = *sjisp++ - 0x7E;								\
 	}															\
}

/* net_sjis2jis(obj, sjisbuf, sjisbufsz)
 * Args:
 *	sjisbuf:	Ptr to a buf of SJIS chars
 *	sjisbufsz:	Size in bytes of sjisbuf
 *	jismode:	Ptr to encoding mode, use as arg for next call to
 *		mz_sjis2jis() for rest of current SJIS data.  First call should
 *		initialize mode to ASCII (0).
 *	uncvtbuf:	If entire buffer was converted, uncvtbuf[0] will be null,
 *		else this points to SJIS chars that were NOT converted
 *		and mz_sjis2jis() with additional SJIS chars appended.
 * Return:
 *	Returns NULL on failure, otherwise it returns a pointer to a buffer of
 *	converted SJIS characters.  Caller must XP_FREE() this memory.
 *
 * Description:
 * 	Allocate destination JIS buffer.
 *
 * 	If the SJIS to JIS conversion changes JIS encoding, output proper ESC
 * 	sequence.
 *
 * 	If byte in ASCII range, just copy it to JIS buffer.
 * 	If Half-width SJIS katakana (1 byte), convert to Half-width JIS katakana.
 *	--- Now Half-width SJIS katakana is converted to 2-byte JIS katakana. ---
 * 	If 2-byte SJIS, convert to 2-byte JIS.
 * 	Otherwise assume user-defined SJIS, just copy 2 bytes.
 *
 *	If either SJIS buffer does not contain complete SJIS char or JIS buffer
 *	is full, then return unconverted SJIS to caller.  Caller should
 *	append more data and recall mz_sjis2jis.
 */

MODULE_PRIVATE unsigned char *
mz_sjis2jis(	CCCDataObject		obj,
			const unsigned char	*sjisbuf,	/* SJIS buf for conversion	*/
			int32				sjisbufsz)	/* SJIS buf size in bytes	*/
{
 	unsigned char			*tobuf = NULL;
 	int32					tobufsz;
 	register unsigned char	*sjisp, *tobufp;	/* current byte in bufs	*/
 	register unsigned char	*sjisep, *toep;		/* end of buffers		*/
 	int32					uncvtlen;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
#if defined(MOZ_MAIL_NEWS)
  	unsigned char kanabuf[2];					/* for half-width kana */
 	uint32	byteused;							/* for half-width kana */
#endif

 										/* Allocate a JIS buffer:			*/
		/* JIS is longer than SJIS because of ESC seq.  In the worst case
		 * ( alternating Half-width Kana and Roman chars ), converted
		 * JIS will be 4X the size of the original SJIS + 1 for nul byte.
		 * Worst case: single half-width kana:
		 *	ESC ( I KANA ESC ( J
		 */
	uncvtlen = strlen((char *)uncvtbuf);
	tobufsz = ((sjisbufsz + uncvtlen) << 2) + 8;
	if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}
										/* Initialize pointers, etc.	*/
 	sjisp = (unsigned char *)sjisbuf;
 	sjisep = sjisp + sjisbufsz - 1;

#define uncvtp	tobufp	/* use tobufp as temp */
							/* If prev. unconverted chars, append unconverted
							 * chars w/new chars and try to process.
							 */
 	if (uncvtbuf[0] != '\0') {
 		uncvtp = uncvtbuf + uncvtlen;
 		while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
													sjisp <= sjisep)
 			*uncvtp++ = *sjisp++;
 		*uncvtp = '\0';						/* nul terminate	*/
 		sjisp = uncvtbuf;				/* process unconverted first */
 		sjisep = uncvtp - 1;
 	}
#undef uncvtp

 	tobufp = tobuf;
 	toep = tobufp + tobufsz - 2;		/* save space for terminating null */

WHILELOOP:
									/* While SJIS data && space in JIS buf. */
 	while ((sjisp <= sjisep) && (tobufp <= toep)) {
		if (*sjisp < 0x80) {
 										/* ASCII/JIS-Roman 				*/
 			if (INTL_GetCCCJismode(obj) != JIS_Roman) {
 				InsASCII_ESC(tobufp, obj);
 			}
 			*tobufp++ = *sjisp++;

 		} else if (*sjisp < 0xA0) {
 										/* 1st byte of 2-byte low SJIS. */
 			if (sjisp+1 > sjisep)		/* No 2nd byte in SJIS buffer?	*/
 				break;

 			if (INTL_GetCCCJismode(obj) != JIS_208_83) {
 				Ins208_83_ESC(tobufp, obj);
 			}

 			TwoByteSJIS2JIS(sjisp, tobufp, 0x70);

 		} else if (*sjisp==0xA0) {
										/* SJIS half-width space.	*/
										/* Just treat like Roman??	*/
 			if (INTL_GetCCCJismode(obj) != JIS_Roman) {
 				InsASCII_ESC(tobufp, obj);
 			}
 			*tobufp++ = *sjisp++;

 		} else if (*sjisp < 0xE0) {
										/* SJIS half-width katakana		*/
#if defined(MOZ_MAIL_NEWS)
			if (!INTL_GetCCCCvtflag_SendHankakuKana(obj)) {
 				if (INTL_GetCCCJismode(obj) != JIS_208_83) {
 					Ins208_83_ESC(tobufp, obj);
 				}
				INTL_SjisHalf2FullKana(sjisp, (uint32)sjisep - (uint32)sjisp + 1, kanabuf, &byteused);
															/* SJIS Katakana is 0x8340-0x8396 */
 				*tobufp++ = ((kanabuf[0] - 0x70) << 1) - 1;	/* assign 1st byte */
				if (kanabuf[1] > 0x7F)
					*tobufp++ = kanabuf[1] - 0x20;
				else
					*tobufp++ = kanabuf[1] - 0x1F;
				sjisp += byteused;
			} else {
	 			if (INTL_GetCCCJismode(obj) != JIS_HalfKana) {
	 				InsHalfKana_ESC(tobufp, obj);
	 			}
	 			*tobufp++ = *sjisp & 0x7F;
				sjisp++;
 	 		}
#else
 			if (INTL_GetCCCJismode(obj) != JIS_HalfKana) {
 				InsHalfKana_ESC(tobufp, obj);
 			}
 			*tobufp++ = *sjisp & 0x7F;
			sjisp++;
#endif
 		} else if (*sjisp < 0xF0) {
										/* 1st byte of 2-byte high SJIS */
 			if (sjisp+1 > sjisep)		/* No 2nd byte in SJIS buffer? */
 				break;

 			if (INTL_GetCCCJismode(obj) != JIS_208_83) {
 				Ins208_83_ESC(tobufp, obj);
 			}

 			TwoByteSJIS2JIS(sjisp, tobufp, 0xB0);
 		} else {
										/* User Defined SJIS: copy bytes */
 			if (sjisp+1 > sjisep)		/* No 2nd byte in SJIS buf?	*/
 				break;

 			if (INTL_GetCCCJismode(obj) != JIS_208_83) {
 				Ins208_83_ESC(tobufp, obj);
 			}

 			*tobufp++ = *sjisp++;			/* Just copy 2 bytes.	*/
 			*tobufp++ = *sjisp++;
 		}
 	}

 	if (uncvtbuf[0] != '\0') {
 										/* tobufp pts to 1st unprocessed char in
 										 * tobuf.  Some may have been processed
 										 * while processing unconverted chars,
 										 * so set up ptrs not to process them
 										 * twice.
 										 */
 		sjisp = (unsigned char *)sjisbuf + (sjisp - uncvtbuf - uncvtlen);
											/* save space for term. null */
 		sjisep = (unsigned char *)sjisbuf + sjisbufsz - 1;
 		uncvtbuf[0] = '\0';		/* No more uncoverted chars.	*/
 		goto WHILELOOP;					/* Process new data				*/
 	}

 	if (INTL_GetCCCJismode(obj) != JIS_Roman) {
 		INTL_SetCCCJismode(obj, JIS_Roman);
 		InsASCII_ESC(tobufp, obj);
	}

	*tobufp = '\0';						/* null terminate JIS data */
	INTL_SetCCCLen(obj,  tobufp - tobuf);			/* length not counting null	*/

 	if (sjisp <= sjisep) {				/* uncoverted SJIS?		*/
		tobufp = uncvtbuf;			/* reuse the tobufp as a TEMP */
 		while (sjisp <= sjisep)
 			*tobufp++ = *sjisp++;
 		*tobufp = '\0';					/* null terminate		*/
 	}
	return(tobuf);
}