gecko-dev/lib/libi18n/unicvt.c

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 *
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */
/* unicvrt.c
 * ---------
 *
 *
 * This file implements conversions from one Unicode format to another
 * Unicode format.
 *
 * There are no conversions to/from other encodings.
 *
 * There are streams conversion between UTF8 and UCS2, and UTF8 and UTF7.
 * It generates a DLL on Win 32, and at present, normal libraries on mac, X, and
 * Win16.
 */

#define _UNICVT_DLL_  1

#include "intlpriv.h"
#include "unicpriv.h"
#include "xp.h"
#include <string.h>

#ifdef XP_WIN32
#define XP_ALLOC_PRIV	malloc
#else
#define XP_ALLOC_PRIV	XP_ALLOC
#endif

typedef struct utf7_encoding_method_data {
	int16			*fromb64;
	unsigned char	*tob64;
	unsigned char	*shift;
	unsigned char	startshift;
	unsigned char	endshift;
} utf7_encoding_method_data;


int32
ucs2_to_utf8_buffer(const uint16 *ucs2p, int32 num_chars,
		unsigned char *utf8p, int32 num_utf8_bytes, int32 *utf8_bytes_written);


/* Private Helper function prototypes */

PRIVATE int16 one_utf8_to_ucs2_char(const unsigned char *utf8p, const unsigned char *utf8endp,
							   uint16 *onecharp);

PRIVATE int16 one_ucs2_to_utf8_char(unsigned char *tobufp,
		unsigned char *tobufendp, uint16 onechar);

PRIVATE unsigned char *intl_utf72utf8(	CCCDataObject		obj,
				const unsigned char	*utf7buf,
				int32				utf7bufsz,
				utf7_encoding_method_data*	opt
				);
PRIVATE unsigned char *intl_utf82utf7(	CCCDataObject		obj,
				const unsigned char	*utf8buf,
				int32				utf8bufsz,
				utf7_encoding_method_data*	opt
				);


PRIVATE uint16  pad_and_write(uint32 buffer, unsigned char *tobufp,
							int16 bufferBitCount, utf7_encoding_method_data*	opt);

PRIVATE void swap_ucs2_bytes(unsigned char *ucsbuf, int32 ucsbufsz);


/* Private constants */

#define MAX_UCS2			0xFFFF
#define DEFAULT_CHAR		0x003F	/* Default char is "?" */
#define BYTE_MASK			0xBF
#define BYTE_MARK			0x80

#define MAX_ASCII			0x7F
#define NOT_BASE64			-1


/* Take care of different API for different platforms */


#ifdef XP_WIN32

/* UNICVTAPI def now accomplished in libi18n.h */
/*#define  UNICVTAPI __declspec(dllexport)*/


/* THIS #define IS VERY BAD AND SHOULD BE CHANGED WHEN WE REVISIT
 * THE ERROR HANDLING STUFF AND MOVE IT ALL OUT OF XPSTR.H
 * THE CALL SHOULD BE: extern int MK_OUT_OF_MEMORY; BUT WE HAVE
 * CHICKEN AND EGG LINKING PROBLEMS ON WIN32 BECAUSE THE DLL
 * MUST BE COMPILED BEFORE THE int IS DECLARED.
 */

#define MK_OUT_OF_MEMORY	-207

#else /* !XP_WIN32 */

/* UNICVTAPI def now accomplished in libi18n.h */
/*#define UNICVTAPI*/

extern int MK_OUT_OF_MEMORY;

#endif /*!XP_WIN32 */


/* UCS-2 to UTF-8 conversion routines */

/*
 * mz_ucs2utf8
 * -----------
 *
 * Takes a CCCDataObject, a buffer of UCS-2 data, and the size of that buffer.
 * Allocates and returns the translation of the UCS-2 data in UTF-8. The caller
 * is responsible for freeing the allocated memory. If the UCS-2 data is not
 * complete, and ends on a character boundary, the extra byte of data is stored
 * in uncvtbuf, and will be used the next time this function is called.
 *
 * Note about swapping: UCS-2 data can come in big-endian or little-endian
 * order, so we need to be aware of the need to potentially swap the data.
 * On the very first block of the stream we will discover (because UCS-2
 * always begins with a byte order mark) whether the data is of the same or
 * opposite endian-ness from us.
 * The information is store in FromCSID
 * The use of uncvtbuf:
 *   uncvtbuf[0] is 0 or 1
 *	 uncvtbuf[0] == 0 - there are no left over last time
 *	 uncvtbuf[0] == 1 - there one byte left over last time stored in uncvtbuf[1]
 *
 */
MODULE_PRIVATE UNICVTAPI unsigned char *
mz_ucs2utf8(	CCCDataObject		obj,
				const unsigned char	*ucsbuf,	/* UCS-2 buf for conv */
				int32				ucsbufsz)	/* UCS-2 buf size in bytes */
{
	int32	tobufsz;
	unsigned char *tobuf = NULL;
	unsigned char *tobufp, *tobufendp,*ucsp, *ucsendp;
	int16	numUTF8bytes;
	uint16 	onechar;
	XP_Bool needToSwap = FALSE;
	int 	scanstate = 0;
	unsigned p1=0, p2;
	unsigned char *uncvtbuf =INTL_GetCCCUncvtbuf(obj);


	if(INTL_GetCCCFromCSID(obj) ==  CS_UCS2_SWAP)
		needToSwap = TRUE;

	/* Allocate Memory */
	/* In the worst case, one UCS2 could expand to three byte */
    /* so, the ration is 2:3 	*/
	tobufsz = (3*(ucsbufsz + 1)) / 2 + 2;
	if ((tobuf = (unsigned char *)XP_ALLOC_PRIV(tobufsz)) == (unsigned char *)NULL)
	{
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}

	/* do the set up */
	tobufendp = tobuf + tobufsz;	/* point to the end of buffer */
	tobufp = tobuf;					/* point to the begining of buffer */
	ucsp = (unsigned char *)ucsbuf;
	ucsendp = (unsigned char *)ucsbuf + ucsbufsz;

	/* Get the unconvert byte */
	if(uncvtbuf[0] > 0)
	{
		p1 = uncvtbuf[1];
		scanstate++;
	}
	/* Do the conversion */
	while( ucsp < ucsendp )
 	{
		if(scanstate++ == 0)
		{
			p1 = *ucsp;
		}
		else
		{
			p2 = *ucsp;
			scanstate = 0;
			onechar = (p1 << 8) | (p2);
			/* Look for (and strip) BYTE_ORDER_MARK */
			if(onechar == NEEDS_SWAP_MARK)
			{
				INTL_SetCCCFromCSID(obj, CS_UCS2_SWAP);
				needToSwap = TRUE;
			}
			else if(onechar == BYTE_ORDER_MARK)
			{
				INTL_SetCCCFromCSID(obj, CS_UCS2);
				needToSwap = FALSE;
			}
			else
			{
				if(needToSwap)
					numUTF8bytes = one_ucs2_to_utf8_char(tobufp, tobufendp,
									(uint16)((p2 << 8) | (p1)));
				else
					numUTF8bytes = one_ucs2_to_utf8_char(tobufp, tobufendp, onechar);

				if(numUTF8bytes == -1)
					break; /* out of space in tobuf */

				tobufp += numUTF8bytes;
			}
		}
		ucsp ++;
	}
	*tobufp = '\0';								/* NULL terminate dest. data */
	INTL_SetCCCLen(obj, tobufp - tobuf);		/* length of processed data, in bytes */

	/* If there are left over, set it to uncvtbuf[1] */
	if((uncvtbuf[0] = scanstate) != 0)
		uncvtbuf[1] = p1;
	return(tobuf);
}

/* UTF-8 to UCS-2 */

 /*
  * mz_utf82ucs
  * -----------
  *
  * This function takes a streams object, a buffer of utf8 data, and the
  * size of that buffer. It allocates, fills, and returns a buffer of the
  * equivalent UCS-2 data. The caller is responsible for freeing that
  * data. If the UTF-8 data cannot be completely converted, the unconverted
  * final bytes will be stored in uncvtbuf and used on the next call.
  *
  * Note: UCS-2 data must always begin with a byte order mark, so we
  * must write that at the beginning of our stream. This function
  * employs obj->cvtflag to determine if it is indeed at the beginning
  * of the stream. obj->cvtflag starts at 0, and we switch it to 1
  * as we write the byte order mark.
  *
  * A note on endian-ness: This function will return UCS-2 data of the
  * same endian-ness as the machine we are running on. To generate data
  * of the opposite endian-ness, use mz_utf82ucsswap.
  */


MODULE_PRIVATE UNICVTAPI unsigned char *
mz_utf82ucs(	CCCDataObject		obj,
				const unsigned char	*utf8buf,	/* UTF-8 buf for conv */
				int32				utf8bufsz)	/* UTF-8 buf size in bytes */


{

	unsigned char	*tobuf = NULL;
	int32			tobufsz;
	unsigned char	*tobufp, *utf8p;		/* current byte in bufs	*/
 	unsigned char	*tobufendp, *utf8endp;	/* end of buffers		*/
 	int32					uncvtlen;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);


	uint16 onechar;
	int16 numoctets;


#define ucsbufsz	tobufsz
#define ucsbuf		tobuf
#define ucsp		tobufp
#define ucsendp	tobufendp
											  	/* Allocate a dest buffer:		*/


	/* At worst, all the octets are ASCII, and each 1 byte of UTF 8
	 * will take 2 bytes of UCS-2, plus 2 for NULL termination (and
	 * possibly 2 for byte order mark)
	 */

	uncvtlen = strlen((char *)uncvtbuf);
	tobufsz = 2*(utf8bufsz + uncvtlen) + 4;
	if (!tobufsz) {
		return NULL;
	}

	if ((tobuf = (unsigned char *)XP_ALLOC_PRIV(tobufsz)) == (unsigned char *)NULL) {
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}


	/* Initialize pointers, etc.	*/
 	utf8p = (unsigned char *)utf8buf;
 	utf8endp = utf8p + utf8bufsz - 1; /* leave room for NULL termination (as sentinel?)*/

#define uncvtp	tobufp		/* use tobufp as temp index for uncvtbuf */
							/* If prev. unconverted chars, append unconverted
							 * chars w/new chars and try to process.
							 */

 	if (uncvtbuf[0] != '\0') {
 		uncvtp = uncvtbuf + uncvtlen;
 		while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
														utf8p <= utf8endp)
			 *uncvtp++ = *utf8p++;

 		*uncvtp = '\0';						/* nul terminate as sentinel */
 		utf8p = uncvtbuf;				/* process unconverted first */
 		utf8endp = uncvtp - 1;

 	}

#undef uncvtp

 	tobufp = tobuf;
 	tobufendp = tobufp + tobufsz - 3;		/* save space for terminating null */

	/* write byte order mark */

	  if(!(INTL_GetCCCCvtflag(obj))) {
		*((uint16 *) tobufp) = (uint16) BYTE_ORDER_MARK;
		tobufp += 2;
		INTL_SetCCCCvtflag(obj, TRUE);
	  }

 WHILELOOP:

	while( (tobufp <= tobufendp) && (utf8p <= utf8endp) ) {


		numoctets = one_utf8_to_ucs2_char(utf8p, utf8endp, &onechar);
		if(numoctets == -1) break; /* not enought utf8 data */
		utf8p += numoctets;


		/* Check to make sure there's space to write onechar */
		if((tobufp+2) >= tobufendp) break;

		*((uint16 *) tobufp) = (onechar <= MAX_UCS2 ? onechar :  DEFAULT_CHAR);

		tobufp +=2;

	}
	if(uncvtbuf[0] != '\0') {			/* Just processed unconverted chars.
											 * ucsp points to 1st unprocessed char
											 * in ucsbuf. Some may have been
											 * processed while processing unconverted
											 * chars, so setup ptrs. not to process
											 * them twice.
											 */

											/* If nothing was converted, there wasn't
											 * enough UCS-2 data. Stop and get more
											 * data.
											 */

		if(utf8p == uncvtbuf) {		/* nothing was converted */
			*tobufp = '\0';
			return(NULL);
		}
		utf8endp = (unsigned char *) utf8buf + utf8bufsz - 1;
		utf8p = 	(unsigned char *) utf8buf + (utf8p - uncvtbuf - uncvtlen);
		uncvtbuf[0] = '\0';		   /* No more unconverted chars.*/
		goto WHILELOOP;					   /* Process new data */
	}

	 *tobufp = '\0';				/* NULL terminate dest. data */

	INTL_SetCCCLen(obj, tobufp - tobuf);		/* length of processed data, in bytes */

	if(utf8p <= utf8endp) {			/*  unconverted utf8 left? */
		tobufp = uncvtbuf;		/* just using tobufp as a temp index. */
		while (utf8p <= utf8endp)
				*tobufp++ = *utf8p++;
		*tobufp = '\0';				/* NULL terminate, as a sentinel */
	}


#undef ucsbufsz
#undef ucsbuf
#undef ucsp
#undef ucsendp


	return(tobuf);
}


/*
 * mz_utf82ucsswap
 * ---------------
 *
 * mz_utf82ucs will convert the UTF-8 data to UCS-2 data of the same
 * endian-ness of the platform the client is running on. Occasionally,
 * this is not what is desired. mz_utf82ucsswap converts the UTF-8
 * data to UCS-2 of the opposite endian-ness.
 */


MODULE_PRIVATE UNICVTAPI unsigned char *
mz_utf82ucsswap(	CCCDataObject		obj,
				const unsigned char	*utf8buf,	/* UTF-8 buf for conv */
				int32				utf8bufsz)	/* UTF-8 buf size in bytes */
{

	unsigned char *result;

	result = mz_utf82ucs(obj, utf8buf, utf8bufsz);
	swap_ucs2_bytes(result, INTL_GetCCCLen(obj));
	return(result);

}


/* UTF-7 to UTF-8 conversion routines */


/* mz_utf72utf8
 * ------------
 *
 * Takes a streams object, a buffer of UTF-7 data, and the size of
 * that buffer.  Allocates, fills, and returns a buffer of UTF-8
 * data. (Its size is returned in the CCCDataObject.) The caller
 * is responsible for freeing the returned buffer.
 *
 * Note: UTF-7 has the property that multiple characters of UTF-7
 * may make up a single character of UTF-8. Also, a single UTF-7 char
 * may contribute bits to more than one utf8 character. If such a
 * UTF-7 character is involved at the end of the current chunk, it won't
 * be save-able in uncvtbuf. For this reason, we also need to
 * save the bit buffer. It turns out that we also need to save the
 * fact that we are within a shifted sequence, because there is no
 * other way for that information to persist between chunks of a
 * stream. If we save a buffer, then we are certainly in the middle
 * of a shifted sequence, but even if there is no buffer to save, we
 * may still be in a shifted sequence.
 *
 * The streams module gives me one int32 - obj->cvtflag - in which
 * to save my state.  This means that to save all my data, I'll need
 * to do a few bit-wise operations.
 *
 * Arbitrarily, the top two bytes will hold the buffer, the next byte
 * holds the count of relevant bits in the buffer, and the low order
 * byte will hold 0 if we are not in a shiftSequence, 1 if we are.
 *
 * Since we will only save a buffer and bufferBitCount if we are
 * in a shift sequence when this chunk terminates, obj->cvtflag == 0
 * when we do not terminate in a shift sequence.
 */


/*
	tables for RFC1642- UTF7
*/

PRIVATE	int16 rfc1642_fromb64[128] =
{
	/*   0 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  10 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  20 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  30 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  40 */  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,
	/*  50 */  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,
	/*  60 */  -1,  -1,  -1,  -1,  -1,   0,   1,   2,   3,   4,
	/*  70 */   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
	/*  80 */  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
	/*  90 */  25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,
	/* 100 */  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
	/* 110 */  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
	/* 120 */  49,  50,  51,  -1,  -1,  -1,  -1,  -1
};
PRIVATE	unsigned char rfc1642_tob64[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
PRIVATE	unsigned char rfc1642_shift[128] = {
/*			0		1		2		3		4		5		6		7	*/
/*			8		9		A		B		C		D		E		F	*/
/* 0x00 */	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,
/* 0x08 */	TRUE,	FALSE,	FALSE,	TRUE,	TRUE,	FALSE,	TRUE,	TRUE,
/* 0x10 */	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,
/* 0x18 */	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,
/* 0x20 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x28 */	FALSE,	FALSE,	FALSE,	TRUE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x30 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x38 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x40 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x48 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x50 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x58 */	FALSE,	FALSE,	FALSE,	FALSE,	TRUE,	FALSE,	FALSE,	FALSE,
/* 0x60 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x68 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x70 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x78 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	TRUE,	TRUE
};

PRIVATE utf7_encoding_method_data rfc1642_utf7 = {
	rfc1642_fromb64,
	rfc1642_tob64,
	rfc1642_shift,
	(unsigned char)'+',
	(unsigned char)'-'
};


/*
	tables for RFC2060- IMAP4rev1 Mail Box Name
*/
PRIVATE	int16 rfc2060_fromb64[128] =
{
	/*   0 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  10 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  20 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  30 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	/*  40 */  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,  52,  53,
	/*  50 */  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,
	/*  60 */  -1,  -1,  -1,  -1,  -1,   0,   1,   2,   3,   4,
	/*  70 */   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
	/*  80 */  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
	/*  90 */  25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,
	/* 100 */  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
	/* 110 */  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
	/* 120 */  49,  50,  51,  -1,  -1,  -1,  -1,  -1
};
PRIVATE	unsigned char rfc2060_tob64[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
PRIVATE	unsigned char rfc2060_shift[128] = {
/*			0		1		2		3		4		5		6		7	*/
/*			8		9		A		B		C		D		E		F	*/
/* 0x00 */	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,
/* 0x08 */	TRUE,	FALSE,	FALSE,	TRUE,	TRUE,	FALSE,	TRUE,	TRUE,
/* 0x10 */	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,
/* 0x18 */	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,	TRUE,
/* 0x20 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	TRUE,	FALSE,
/* 0x28 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x30 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x38 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x40 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x48 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x50 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x58 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x60 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x68 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x70 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,
/* 0x78 */	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	FALSE,	TRUE
};

PRIVATE utf7_encoding_method_data rfc2060_utf7 = {
	rfc2060_fromb64,
	rfc2060_tob64,
	rfc2060_shift,
	(unsigned char)'&',
	(unsigned char)'-'
};

MODULE_PRIVATE UNICVTAPI unsigned char *
mz_utf72utf8(	CCCDataObject		obj,
				const unsigned char	*utf7buf,	/* UTF-7 buf for conv */
				int32				utf7bufsz)	/* UTF-7 buf size in bytes */
{
	return intl_utf72utf8(obj,utf7buf, utf7bufsz, &rfc1642_utf7);
}
MODULE_PRIVATE UNICVTAPI unsigned char *
mz_imap4utf72utf8(	CCCDataObject		obj,
				const unsigned char	*utf7buf,	/* UTF-7 buf for conv */
				int32				utf7bufsz)	/* UTF-7 buf size in bytes */
{
	return intl_utf72utf8(obj,utf7buf, utf7bufsz, &rfc2060_utf7);
}

PRIVATE unsigned char *
intl_utf72utf8(	CCCDataObject		obj,
				const unsigned char	*utf7buf,	/* UTF-7 buf for conv */
				int32				utf7bufsz,	/* UTF-7 buf size in bytes */
				utf7_encoding_method_data*	opt)

{

 	unsigned char	*tobuf = NULL;
 	int32			tobufsz;
 	unsigned char	*tobufp, *utf7p;		/* current byte in bufs	*/
 	unsigned char	*tobufendp, *utf7endp;	/* end of buffers		*/
 	int32					uncvtlen;

	uint16	oneUCS2char;
	unsigned char	onechar;
	int16			numoctets;
	int16 mustnotshift = 0;
	int16 inShiftSequence;

	uint32 buffer;
	uint32 buffertemp = 0;
	int16 bufferBitCount;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);

	/* set up table to convert ASCII values of base64 chars to
	 * their base 64 value. If there is no conversion, use -1 as sentinel.
	 */


	/* initialize data saved from previous stream */

	int32 flag = INTL_GetCCCCvtflag(obj);
	inShiftSequence = flag & 1;
	buffer = 0xFFFF0000 & flag;
	bufferBitCount = (uint16) ((0x0000FF00 & flag) >> 8);

#define utf8bufsz	tobufsz
#define utf8buf		tobuf
#define utf8p		tobufp
#define utf8endp	tobufendp
											  	/* Allocate a dest buffer:		*/


	/* UTF-7 characters that are directly encoded will be one octet UTF-8
	 * chars. Shifted chars will take 2.7 octets (plus shift in or out chars)
	 * to make 2 or 3 octet UTF-8 chars. So in the worst input, all the UTF-7
	 * data would convert to 3 octet UTF-8 data, and we would need 1/9th as
	 * many UTF-7 characters, plus 1 to round up, plus 1 for NULL termination.
	 */

	uncvtlen = strlen((char *)uncvtbuf);
	tobufsz = (int32) (1.2*(utf7bufsz + uncvtlen) + 2);

	if ((tobuf = (unsigned char *)XP_ALLOC_PRIV(tobufsz)) == (unsigned char *)NULL)
	{
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}
										/* Initialize pointers, etc.	*/
 	utf7p = (unsigned char *)utf7buf;
 	utf7endp = utf7p + utf7bufsz - 1;

#define uncvtp	tobufp		/* use tobufp as temp index for uncvtbuf */
							/* If prev. unconverted chars, append unconverted
							 * chars w/new chars and try to process.
							 */

 	if (uncvtbuf[0] != '\0')
	{
 		uncvtp = uncvtbuf + uncvtlen;
 		while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
														utf7p <= utf7endp)
 			*uncvtp++ = *utf7p++;


 		*uncvtp = '\0';						/* nul terminate as sentinel */
 		utf7p = uncvtbuf;				/* process unconverted first */
 		utf7endp = uncvtp - 1;
 	}
#undef uncvtp

 	tobufp = tobuf;
 	tobufendp = tobufp + tobufsz - 2;

WHILELOOP:

	while( (tobufp <= tobufendp) && (utf7p <= utf7endp) )
	{


		onechar = *utf7p++;


		/* If I'm not in the shift sequence, and I have the start symbol,
		 * absorb it and loop again. Otherwise, if I have a legal character
		 * for a non-shifted sequence, (ASCII) write it directly. This is
		 * ok, because ASCII is just ASCII in UTF-8, so don't need to worry
		 * about UCS-2 conversion.
		 */

		if(!inShiftSequence)
		{

			if(onechar == opt->startshift)
			{
				if(*utf7p == opt->endshift)
				{
					*tobufp++ = opt->startshift;
					utf7p++;
				} else inShiftSequence = TRUE;
				continue;
			}

			if(onechar <= MAX_ASCII) *tobufp++ = onechar;
			else continue;

		}
		else
		{	/* inShiftSequence is TRUE */

			/* onechar is not a base64 allowable char if it is non-ASCII or
			 * if it is a non-base64 char from the ASCII set.
			 */
			mustnotshift = (onechar > MAX_ASCII ||
					(opt->fromb64[onechar] == NOT_BASE64));

			/* If I'm in the shift sequence, and get the opt->endshift character,
			 * I want to absorb it and turn off shifting. If I get another
			 * non-shiftable character, I want to write it and turn off shifting.
			 * If I get an illegal character, I discard it and keep looping.
			 */

			if(mustnotshift)
			{

				if(!(onechar == opt->endshift))
				{

					if(onechar > MAX_ASCII)
						continue;

					*tobufp++ = onechar;
				}

				inShiftSequence = FALSE;
				buffer = 0;			/* flush buffer at end of shift sequence */
				bufferBitCount = 0;


			}
			else
			{

				buffertemp = opt->fromb64[onechar] & 0x0000003F;	/* grab 6-bit base64 char */
				buffer |= buffertemp << (26 - bufferBitCount); /* 26 is 32 - 6 bits */
				bufferBitCount += 6;

				/* Flush the buffer of a UCS-2 character (won't be more than one)  */

				if(bufferBitCount > 15)
				{

					oneUCS2char = (int16) ((buffer & 0xFFFF0000) >> 16);
					numoctets = one_ucs2_to_utf8_char(tobufp, tobufendp, oneUCS2char);
					if(numoctets == -1) break; /* out of space in tobuf */
					tobufp += numoctets;
					bufferBitCount -= 16;
					buffer <<= 16;
				}

			}

		} /* end of inShiftSequence == TRUE */

	}	   /* end of conversion while loop */


	if(uncvtbuf[0] != '\0')
	{										/* Just processed unconverted chars.
											 * ucsp points to 1st unprocessed char
											 * in ucsbuf. Some may have been
											 * processed while processing unconverted
											 * chars, so setup ptrs. not to process
											 * them twice.
											 */

											/* If nothing was converted, there wasn't
											 * enough UCS-2 data. Stop and get more
											 * data.
											 */

		if(utf7p == uncvtbuf)
		{	/* nothing was converted */
			*tobufp = '\0';
			INTL_SetCCCLen(obj, 0);
			return(NULL);
		}

		/* set up to read ucsbuf */
		utf7endp = (unsigned char *) utf7buf + utf7bufsz - 1;
		utf7p = 	(unsigned char *) utf7buf + (utf7p - uncvtbuf - uncvtlen);
		uncvtbuf[0] = '\0';		   /* No more unconverted chars.*/
		goto WHILELOOP;					   /* Process new data */
	}


	*tobufp = '\0';					/* NULL terminate dest. data */
	INTL_SetCCCLen(obj, tobufp - tobuf);		/* length of processed data, in bytes */

	/* If we're in a shift sequence, we need to save away our buffer
	 * and the buffer bit count (although if all that's left in the buffer
	 * is padding 0's, we don't need to worry about it and should reset
	 * the bitCount to 0.)
	 */

	INTL_SetCCCCvtflag(obj,((inShiftSequence ? 1 : 0 ) |
							(buffer & 0xFFFF0000) |
							((bufferBitCount << 8) & 0x0000FF00)));

	/* Now check for unconverted data from utf7p */
	if(utf7p <= utf7endp)
	{
		int l = utf7endp - utf7p + 1;
		memcpy(uncvtbuf, utf7p, l);
		uncvtbuf[l] = '\0';
	}

#undef utf8bufsz
#undef utf8buf
#undef utf8p
#undef utf8endp

	return(tobuf);

}


/* UTF-8 to UTF-7 */


 /*
  * mz_utf82utf7
  * ------------
  *
  * This function takes a CCCDataObject, a buffer of UTF-8 data, and the
  * size of that buffer. It allocates and returns a buffer of the
  * corresponding UTF-7 data (returning the size as a field in the
  * CCCDataObject). The caller is responsible for freeing the returned
  * data. If there are extra data at the end of the UTF-8 buffer which
  * cannot be translated into UTF-7 (ie, an incomplete character), it
  * will be saved in the uncvtbuf of the CCCDataObject and used on the
  * next call.
  *
  * UTF-7 is a variant of base-64, and like base-64, it accumulates
  * bits in a bit buffer, transforming them to UTF-7 chars when it
  * has multiples of 6 bits. If the UTF-8 data being translated does
  * not happen to terminate with a multiple of 6 bits, the final
  * char will be padded with 0's, and the shift sequence terminated.
  * For this reason, we will *never* be inside a shift sequence in
  * between chunks of data. This may mean that the final stream of
  * data has sequences that look like +[some UTF-7 data]-+[more data]-,
  * with a plus immediately following a -. Although unconventional,
  * this is in fact legal UTF-7.
  *
  * Finally, there are two formats of UTF-7, one extremely conservative
  * fashion which shifts every character which could possibly be
  * considered unsafe, and another which is somewhat more lax. Which
  * of these is used is determined by obj->cvtflag. By default (cvtflag == 0)
  * we employ the safer form of conversion. The differing characters
  * are: !\"#$%&*;<=>@[]^_`{|}
  */
/* Tables */


MODULE_PRIVATE UNICVTAPI unsigned char *
mz_utf82utf7(	CCCDataObject		obj,
				const unsigned char	*utf8buf,	/* UTF-8 buf for conv */
				int32				utf8bufsz)	/* UTF-8 buf size in bytes */
{
	return intl_utf82utf7(obj,utf8buf, utf8bufsz, &rfc1642_utf7);
}
MODULE_PRIVATE UNICVTAPI unsigned char *
mz_utf82imap4utf7(	CCCDataObject		obj,
				const unsigned char	*utf8buf,	/* UTF-8 buf for conv */
				int32				utf8bufsz)	/* UTF-8 buf size in bytes */
{
	return intl_utf82utf7(obj,utf8buf, utf8bufsz, &rfc2060_utf7);
}
PRIVATE unsigned char *
intl_utf82utf7(	CCCDataObject		obj,
				const unsigned char	*utf8buf,	/* UTF-8 buf for conv */
				int32				utf8bufsz,	/* UTF-8 buf size in bytes */
				utf7_encoding_method_data*	opt)
{


 	unsigned char	*tobuf = NULL;
	int32			tobufsz;
	unsigned char	*tobufp, *utf8p;		/* current byte in bufs	*/
 	unsigned char	*tobufendp, *utf8endp;	/* end of buffers		*/
 	int32					uncvtlen;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);


	uint16 onechar;
	int16 numoctets;
	int16 inShiftSequence = FALSE;
	int16 needToShift = FALSE;
	uint32 buffer = 0;
	uint32 buffertemp = 0;
	int16 bufferBitCount = 0;
	unsigned char oneBase64char;


#define utf7bufsz	tobufsz
#define utf7buf		tobuf
#define utf7p		tobufp
#define utf7endp	tobufendp


	/* Allocate a dest buffer:		*/

	uncvtlen = strlen((char *)uncvtbuf);
	tobufsz = 3*(utf8bufsz + uncvtlen) +1;
	if (!tobufsz) {
		return NULL;
	}

	if ((tobuf = (unsigned char *)XP_ALLOC_PRIV(tobufsz)) == (unsigned char *)NULL) {
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}
										/* Initialize pointers, etc.	*/
 	utf8p = (unsigned char *)utf8buf;
 	utf8endp = utf8p + utf8bufsz - 1; /* leave room for NULL termination (as sentinel?)*/

#define uncvtp	tobufp		/* use tobufp as temp index for uncvtbuf */
							/* If prev. unconverted chars, append unconverted
							 * chars w/new chars and try to process.
							 */

 	if (uncvtbuf[0] != '\0') {
 		uncvtp = uncvtbuf + uncvtlen;
		/* This is not leaving space for a NULL !!!!!!!!!!!! */
 		while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
														utf8p <= utf8endp)
			 *uncvtp++ = *utf8p++;

 		*uncvtp = '\0';						/* nul terminate as sentinel */
 		utf8p = uncvtbuf;				/* process unconverted first */
 		utf8endp = uncvtp - 1;
 	}
#undef uncvtp


 	tobufp = tobuf;
 	tobufendp = tobufp + tobufsz - 2;		/* save space for terminating null*/


 WHILELOOP:

	while( (tobufp <= tobufendp) && (utf8p <= utf8endp) ) {

		/* convert one char's worth of utf8 to ucs2 */
		numoctets = one_utf8_to_ucs2_char(utf8p, utf8endp, &onechar);
		if(numoctets == -1) break; /* out of input*/
		utf8p += numoctets;

		/* we need to be shifted if the character is non-ASCII or
		 * is an ASCII character that should be shifted.
		 */
		needToShift = (onechar > MAX_ASCII) || (opt->shift[onechar]);


		if(!needToShift && inShiftSequence)	  {

			if(bufferBitCount > 0) {
				if((tobufp+2) > tobufendp) break;
				bufferBitCount = pad_and_write(buffer, tobufp, bufferBitCount, opt);
				if (!bufferBitCount) {	/* buffer successfully flushed */
					tobufp+=2;
					buffer = 0;
				}

			} else {
				if((tobufp+1) > tobufendp) break;
				*tobufp++ = opt->endshift;
			}
			inShiftSequence = FALSE; /* now just fallthrough to next case*/
		}

		if(!needToShift && 	!inShiftSequence) {
			if((tobufp+1) > tobufendp) break;
			*tobufp++ = (char) onechar;
		}

		if(needToShift && !inShiftSequence)	 {
			*tobufp++ = opt->startshift;
			if(onechar == opt->startshift) { /* special-case behavior if onechar is a + */
				if((tobufp+1) > tobufendp) break;
				*tobufp++ = opt->endshift;
			}
			else inShiftSequence = TRUE;
		}

		if(needToShift && inShiftSequence) {

			buffertemp = onechar & 0x0000FFFF;
			buffer |= buffertemp << (16 - bufferBitCount);
											/* ^--16 is the size of the int32 minus
											 * the size of onechar */
			bufferBitCount += 16;


			/* Flush the buffer of as many base64 characters as we can form */
			while(bufferBitCount>5) {
	 			  if(tobufp > tobufendp) break;
				  oneBase64char = (char)  ((buffer & 0xFC000000) >> 26);
				  *tobufp++ =  opt->tob64[oneBase64char];
				  buffer <<= 6;
				  bufferBitCount -= 6;
			}
		}


	} /* end of while loop */


	if(uncvtbuf[0] != '\0') {			/* Just processed unconverted chars.
											 * ucsp points to 1st unprocessed char
											 * in ucsbuf. Some may have been
											 * processed while processing unconverted
											 * chars, so setup ptrs. not to process
											 * them twice.
											 */

											/* If nothing was converted, there wasn't
											 * enough UTF-8 data. Stop and get more
											 * data.
											 */

		if(utf8p == uncvtbuf) {		/* nothing was converted */
			*tobufp = '\0';
			return(NULL);
		}
		utf8endp = (unsigned char *) utf8buf + utf8bufsz - 1;
		utf8p = 	(unsigned char *) utf8buf + (utf8p - uncvtbuf - uncvtlen);
		uncvtbuf[0] = '\0';		   /* No more unconverted chars.*/
		goto WHILELOOP;					   /* Process new data */
	}


	/* Anything left in the buffer at this point should be padded with 0's
	 * and appended to tobuf. */

	if(inShiftSequence) {

		if(bufferBitCount > 0) {

			if((tobufp+2) <= tobufendp) {
				bufferBitCount = pad_and_write(buffer, tobufp, bufferBitCount,  opt);
				if (!bufferBitCount) { /* buffer successfully flushed */
					tobufp+=2;
					buffer = 0;
				}
			}

		}  else {
			 if((tobufp+1) <= tobufendp) *tobufp++ = opt->endshift;
		}

		inShiftSequence = FALSE;
	}


	 *tobufp = '\0';				/* NULL terminate dest. data */


	INTL_SetCCCLen(obj, tobufp - tobuf);		/* length of processed data, in bytes */

	if(utf8p <= utf8endp) {			/*  unconverted utf8 left? */
		tobufp = uncvtbuf;		/* just using tobufp as a temp index. */
		while (utf8p <= utf8endp)
				*tobufp++ = *utf8p++;
		*tobufp = '\0';				/* NULL terminate, as a sentinel if nothing else.*/
	}


#undef utf7bufsz
#undef utf7buf
#undef utf7p
#undef utf7endp


	return(tobuf);
}


/* Function: one_ucs2_to_utf8_char
 *
 * Function takes one UCS-2 char and writes it to a UTF-8 buffer.
 * We need a UTF-8 buffer because we don't know before this
 * function how many bytes of utf-8 data will be written. It also
 * takes a pointer to the end of the UTF-8 buffer so that we don't
 * overwrite data. This function returns the number of UTF-8 bytes
 * of data written, or -1 if the buffer would have been overrun.
 */

#define LINE_SEPARATOR		0x2028
#define PARAGRAPH_SEPARATOR	0x2029
PRIVATE int16 one_ucs2_to_utf8_char(unsigned char *tobufp,
		unsigned char *tobufendp, uint16 onechar)

{

	 int16 numUTF8bytes = 0;

	if((onechar == LINE_SEPARATOR)||(onechar == PARAGRAPH_SEPARATOR))
	{
		strcpy((char*)tobufp, "\n");
		return strlen((char*)tobufp);;
	}

	 	if (onechar < 0x80) {				numUTF8bytes = 1;
		} else if (onechar < 0x800) {		numUTF8bytes = 2;
		} else if (onechar <= MAX_UCS2) {	numUTF8bytes = 3;
		} else { numUTF8bytes = 2;
				 onechar = DEFAULT_CHAR;
		}

		tobufp += numUTF8bytes;

		/* return error if we don't have space for the whole character */
		if (tobufp > tobufendp) {
			return(-1);
		}


		switch(numUTF8bytes) {

			case 3: *--tobufp = (onechar | BYTE_MARK) & BYTE_MASK; onechar >>=6;
					*--tobufp = (onechar | BYTE_MARK) & BYTE_MASK; onechar >>=6;
					*--tobufp = onechar |  THREE_OCTET_BASE;
					break;

			case 2: *--tobufp = (onechar | BYTE_MARK) & BYTE_MASK; onechar >>=6;
					*--tobufp = onechar | TWO_OCTET_BASE;
					break;
			case 1: *--tobufp = (unsigned char)onechar;  break;
		}

		return(numUTF8bytes);
}


/*
 * utf8_to_ucs2_char
 *
 * Convert a utf8 multibyte character to ucs2
 *
 * inputs: pointer to utf8 character(s)
 *         length of utf8 buffer ("read" length limit)
 *         pointer to return ucs2 character
 *
 * outputs: number of bytes in the utf8 character
 *          -1 if not a valid utf8 character sequence
 *          -2 if the buffer is too short
 */
MODULE_PRIVATE UNICVTAPI int16
utf8_to_ucs2_char(const unsigned char *utf8p, int16 buflen, uint16 *ucs2p)
{
	uint16 lead, cont1, cont2;

	/*
	 * Check for minimum buffer length
	 */
	if ((buflen < 1) || (utf8p == NULL)) {
		return -2;
	}
	lead = (uint16) (*utf8p);

	/*
	 * Check for a one octet sequence
	 */
	if (IS_UTF8_1ST_OF_1(lead)) {
		*ucs2p = lead & ONE_OCTET_MASK;
		return 1;
	}

	/*
	 * Check for a two octet sequence
	 */
	if (IS_UTF8_1ST_OF_2(*utf8p)) {
		if (buflen < 2)
			return -2;
		cont1 = (uint16) *(utf8p+1);
		if (!IS_UTF8_2ND_THRU_6TH(cont1))
			return -1;
		*ucs2p =  (lead & TWO_OCTET_MASK) << 6;
		*ucs2p |= cont1 & CONTINUING_OCTET_MASK;
		return 2;
	}

	/*
	 * Check for a three octet sequence
	 */
	else if (IS_UTF8_1ST_OF_3(lead)) {
		if (buflen < 3)
			return -2;
		cont1 = (uint16) *(utf8p+1);
		cont2 = (uint16) *(utf8p+2);
		if (   (!IS_UTF8_2ND_THRU_6TH(cont1))
			|| (!IS_UTF8_2ND_THRU_6TH(cont2)))
			return -1;
		*ucs2p =  (lead & THREE_OCTET_MASK) << 12;
		*ucs2p |= (cont1 & CONTINUING_OCTET_MASK) << 6;
		*ucs2p |= cont2 & CONTINUING_OCTET_MASK;
		return 3;
	}
	else { /* not a valid utf8/ucs2 character */
		return -1;
	}
}

UNICVTAPI int32
INTL_NumUTF8Chars(const unsigned char *utf8p)
{
	int num_chars = 0;

	while (*utf8p) {
		/*
		 * Check for a one octet sequence
		 */
		if (IS_UTF8_1ST_OF_1(*utf8p)) {
			num_chars += 1;
			utf8p += 1;
			continue;
		}

		/*
		 * Check for a two octet sequence
		 */
		else if (IS_UTF8_1ST_OF_2(*utf8p)
			&& IS_UTF8_2ND_THRU_6TH(*(utf8p+1))) {
			num_chars += 2;
			utf8p += 2;
			continue;
		}

		/*
		 * Check for a three octet sequence
		 */
		else if (IS_UTF8_1ST_OF_3(*utf8p)
			&& IS_UTF8_2ND_THRU_6TH(*(utf8p+1))
			&& IS_UTF8_2ND_THRU_6TH(*(utf8p+2))) {
			num_chars += 3;
			utf8p += 3;
			continue;
		}

		/*
		 * Not UTF8 : just muddle forward
		 */
		else {
			num_chars += 1;
			utf8p += 1;
		}

	}

	return num_chars;
}

PUBLIC UNICVTAPI uint16 *
INTL_UTF8ToUCS2(const unsigned char *utf8p, int32 *num_chars)
{
	uint16 *ucs2_chars;
	int32 num_utf8_chars, ucs2_len, num_ucs2_chars;
	int parse_cnt, inval_cnt;

	/*
	 * Figure the number of chars
	 */
	num_utf8_chars = INTL_NumUTF8Chars(utf8p);
	ucs2_len = num_utf8_chars*2;
	ucs2_chars = (uint16 *)XP_ALLOC_PRIV(ucs2_len + 2);
	if (!ucs2_chars) return NULL;
	/*

	 * Do the conversion
	 */
	num_ucs2_chars = utf8_to_ucs2_buffer(utf8p, strlen((char*)utf8p),
								&parse_cnt, &inval_cnt, ucs2_chars,  ucs2_len);
	ucs2_chars[num_ucs2_chars] = 0; /* null terminator */

	/*
	 * return the result
	 */
	if (num_ucs2_chars > 0)
		*num_chars = num_ucs2_chars;
	else
		*num_chars = 0;
	return ucs2_chars;
}

PUBLIC UNICVTAPI unsigned char *
INTL_UCS2ToUTF8(const uint16 *ucs2p, int32 num_chars)
{
	unsigned char *utf8_chars;
	int32 num_utf8_bytes, num_bytes_written, dummy;
	int i;

	/*
	 * Figure the number of bytes for the utf8 string
	 */
	num_utf8_bytes =0;
	for (i=0; i<num_chars; i++) {
		if (ucs2p[i] <= 0x7F) /* 0-0x7f only need one byte */
			num_utf8_bytes += 1;
		else if (ucs2p[i] <= 0x3FF) /* 0x80-0x3ff only need two bytes */
			num_utf8_bytes += 2;
		else /* 0x400-0xffff need three bytes */
			num_utf8_bytes += 3;
	}
	utf8_chars = (unsigned char *)XP_ALLOC_PRIV(num_utf8_bytes + 1);
	if (!utf8_chars) return NULL;
	XP_MEMSET(utf8_chars, 0, num_utf8_bytes + 1);

	/*
	 * Do the conversion
	 */
	num_bytes_written = ucs2_to_utf8_buffer(ucs2p, num_chars, utf8_chars,
											num_utf8_bytes, &dummy);
	/*
	 * return the result
	 */
	return utf8_chars;
}

/*
 * ucs2_to_utf8_buffer
 *
 * Convert a ucs2 buffer to a utf8 multibyte character string
 *
 * inputs:
 *         pointer to return ucs2 buffer
 *         length of ucs2 buffer ("read" length limit)
 *         pointer to utf8 character(s)
 *         length of utf8 buffer ("write" length limit)
 *
 * outputs: returns number of charecters "read" from the ucs2 string
 *          sets *num_bytes_written to # of utf8 characters "written"
 */
int32
ucs2_to_utf8_buffer(const uint16 *ucs2p, int32 num_chars,
		unsigned char *utf8p, int32 num_utf8_bytes, int32 *utf8_bytes_written)
{
	int i;

	/*
	 * Init values
	 */
	*utf8_bytes_written = 0;


	/*
	 * Convert the data
	 */
	for (i=0; i<num_chars; i++) {
		if (ucs2p[i] <= 0x7F) { /* 0-0x7f only need one byte */
			if (num_utf8_bytes < 1)
				break;
			utf8p[*utf8_bytes_written] = (unsigned char)ucs2p[i];
			num_utf8_bytes -= 1;
			*utf8_bytes_written += 1;
		}
		else if (ucs2p[i] <= 0x3FF) { /* 0x80-0x3ff only need two bytes */
			if (num_utf8_bytes < 2)
				break;
			utf8p[*utf8_bytes_written+0] = (unsigned char)
					(TWO_OCTET_BASE | ((ucs2p[i]>>6)&TWO_OCTET_MASK));
			utf8p[*utf8_bytes_written+1] = (unsigned char)
					(CONTINUING_OCTET_BASE | (ucs2p[i]&CONTINUING_OCTET_MASK));
			num_utf8_bytes -= 2;
			*utf8_bytes_written += 2;
		}
		else { /* 0x400-0xffff need three bytes */
			if (num_utf8_bytes < 3)
				break;
			utf8p[*utf8_bytes_written+0] = (unsigned char)
					(THREE_OCTET_BASE | ((ucs2p[i]>>12)&THREE_OCTET_MASK));
			utf8p[*utf8_bytes_written+1] = (unsigned char)
				(CONTINUING_OCTET_BASE | ((ucs2p[i]>>6)&CONTINUING_OCTET_MASK));
			utf8p[*utf8_bytes_written+2] = (unsigned char)
					(CONTINUING_OCTET_BASE | (ucs2p[i]&CONTINUING_OCTET_MASK));
			num_utf8_bytes -= 3;
			*utf8_bytes_written += 3;
		}
	}

	return i;
}

/*
 * utf8_to_ucs2_buffer
 *
 * Convert a utf8 multibyte character string and place in a ucs2 buffer
 *
 * inputs: pointer to utf8 character(s)
 *         length of utf8 buffer ("read" length limit)
 *         pointer to return ucs2 buffer
 *         length of ucs2 buffer ("write" length limit)
 *         pointer to return count of invalid bytes
 *
 * outputs: returns number of bytes "read" from the utf8 string
 *          sets *invalid_cnt to # of invalid utf8 characters "read"
 */
UNICVTAPI int32
utf8_to_ucs2_buffer(const unsigned char *utf8p, int16 utf8len,
						int *parsed_cnt, int *invalid_cnt,
						uint16 *ucs2p, int32 ucs2len)
{
	int read_len, write_len;
	int char_len;

	/*
	 * Init the return values
	 */
	*parsed_cnt = 0;
	*invalid_cnt = 0;

	/*
	 * Check for minimum buffer lengths
	 */
	if ((utf8len < 1) || (utf8p == NULL)
		|| (ucs2len < 1) || (ucs2p == NULL)) {
		return 0;
	}

	/*
	 * Do the conversion
	 */
	for (read_len=0,write_len=0;
					(read_len<utf8len) && (write_len<ucs2len);
								read_len +=char_len)
		{
		char_len = utf8_to_ucs2_char(utf8p+read_len, utf8len-read_len,
														(uint16*)ucs2p+write_len);
		if (char_len == -1) { /* invalid character */
			*invalid_cnt += 1;
			char_len = 1; /* try to resynchronize */
			*(ucs2p+write_len) = *(utf8p+read_len);
		}
		else if (char_len == -2) { /* buffer too short for last char */
			/* return with what we have so far */
			break;
		}
		/*
		 * Note we converted one
		 */
		*parsed_cnt += char_len;
		write_len += 1;
	}
	return write_len;
}

/* Function:  one_utf8_to_ucs2_char
 *
 * Converts one UTF8 char to one UCS2 char. Needs to get UTF-8 from a
 * buffer of utf8 data, because we don't know how many octets it will
 * be, not before this function is called. Take a pointer to the end of that
 * buffer to make sure we don't run past it. Put the resulting UCS-2
 * char into an int16 we're given a pointer to. Returns the number of
 * octets used in the utf-8 char we converted, and returns -1 if it
 * runs out of utf-8 data without a complete UCS-2 character.
 */
PRIVATE int16 one_utf8_to_ucs2_char(const unsigned char *utf8p, const unsigned char *utf8endp,
							   uint16 *onecharp)
{

	int16 i, numoctets;
	uint32	ucs4 = 0;
	*onecharp = 0;

	if(*utf8p >= THREE_OCTET_BASE) numoctets = 3;
	else if (*utf8p >= TWO_OCTET_BASE) numoctets = 2;
	else numoctets = 1;

	/* See if all the data for the char is there */
	if ((utf8p + numoctets - 1) > utf8endp) {
		return (-1);
	}


	for(i=numoctets; i>0; i--) {
		ucs4 += *utf8p++;
		if (i == 1) break;
		ucs4 <<= 6;
	}

	switch(numoctets) {

		case 3: ucs4 -= 0x000E2080UL; break;  /* truncating... */
		case 2: ucs4 -= 0x00003080UL; break;
	}
	*onecharp= (uint16)(ucs4 & 0x0000FFFFUL);
	return(numoctets);
}


/*
 * Internal Function: pad_and_write
 * Checks to make sure there is less than one full base64 character in the
 * buffer, pad it with 0 to make up a full base64 character, write that
 * to tobuf, and write the shift termination character. (-)
 */

PRIVATE uint16  pad_and_write(uint32 buffer, unsigned char *tobufp,
							int16 bufferBitCount, utf7_encoding_method_data*	opt)


{
	int16 oneBase64char;

	if(bufferBitCount >= 6) return(bufferBitCount);
	oneBase64char = ((unsigned char) (buffer >> 26));
	*tobufp++ = 	opt->tob64[oneBase64char];
	*tobufp = opt->endshift;
	return(0);
}


/* Function: swap_ucs2_bytes
 *
 * Takes a buffer of ucs2 chars, and its size in *bytes*.
 *
 * This function is meant to cope with the problem that sometimes
 * UCS-2 data (because of the big-endian, little-endian problem?)
 * comes in in reversed order, and needs to be swapped to be
 * dealt with appropriately.
 *
 * This case can be detected at the very beginning of the stream,
 * because the first two bytes of any UCS-2 stream should be the
 * Byte Order Mark, or 0xFEFF. If instead you see 0xFFFE, you know
 * you need to swap. Neither of these are legal UCS-2 characters
 * otherwise, so you know that there is no danger of accidentally
 * triggering swapping with a legitimate UCS-2 stream.
 * Unfortunately, this marker is only present at the very beginning
 * of a stream; future chunks of the stream won't have the marker.
 * So if we ever detect that a stream needs to be swapped, we
 * save that information by turning on the obj->cvtflag. If, on
 * future chunks, we see that that flag is turned on, we'll go
 * ahead and swap.
 * Notice that if swapping is unnecessary, this function has
 * no effect whatsoever.
 */
PRIVATE void	swap_ucs2_bytes(unsigned char *ucsbuf, int32 ucsbufsz)
{

	int32 i;
	unsigned char swapTemp = 0;

  		if(ucsbufsz%2) ucsbufsz--;

		for(i=0; i<ucsbufsz; i+=2) {

			  swapTemp = ucsbuf[i];
			  ucsbuf[i] = ucsbuf[i+1];
			  ucsbuf[i+1] = swapTemp;

		}
	return;
}


/* UCS-2 to UTF-7 jliu */


 /*
  * mz_ucs2utf7
  * ------------
  *
  * This function takes a CCCDataObject, a buffer of UCS-2 data, and the
  * size of that buffer. It allocates and returns a buffer of the
  * corresponding UTF-7 data (returning the size as a field in the
  * CCCDataObject). The caller is responsible for freeing the returned
  * data. If there are extra data at the end of the UTF-8 buffer which
  * cannot be translated into UTF-7 (ie, an incomplete character), it
  * will be saved in the uncvtbuf of the CCCDataObject and used on the
  * next call.
  *
  * UTF-7 is a variant of base-64, and like base-64, it accumulates
  * bits in a bit buffer, transforming them to UTF-7 chars when it
  * has multiples of 6 bits. If the UTF-8 data being translated does
  * not happen to terminate with a multiple of 6 bits, the final
  * char will be padded with 0's, and the shift sequence terminated.
  * For this reason, we will *never* be inside a shift sequence in
  * between chunks of data. This may mean that the final stream of
  * data has sequences that look like +[some UTF-7 data]-+[more data]-,
  * with a plus immediately following a -. Although unconventional,
  * this is in fact legal UTF-7.
  *
  * Finally, there are two formats of UTF-7, one extremely conservative
  * fashion which shifts every character which could possibly be
  * considered unsafe, and another which is somewhat more lax. Which
  * of these is used is determined by obj->cvtflag. By default (cvtflag == 0)
  * we employ the safer form of conversion. The differing characters
  * are: !\"#$%&*;<=>@[]^_`{|}
  */
/* Tables */


MODULE_PRIVATE UNICVTAPI unsigned char *
mz_ucs2utf7(	CCCDataObject		obj,
				const unsigned char	*ucs2buf,	/* UTF-8 buf for conv */
				int32				ucs2bufsz)	/* UTF-8 buf size in bytes */
{
	utf7_encoding_method_data* opt = &rfc1642_utf7;
 	unsigned char	*tobuf = NULL;
	int32			tobufsz;
	unsigned char	*tobufp, *ucs2p;		/* current byte in bufs	*/
 	unsigned char	*tobufendp, *ucs2endp;	/* end of buffers		*/
 	int32					uncvtlen = 0;
	unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);


	uint16 onechar;
	int16 inShiftSequence = FALSE;
	int16 needToShift = FALSE;
	uint32 buffer = 0;
	uint32 buffertemp = 0;
	int16 bufferBitCount = 0;
	unsigned char oneBase64char;
	XP_Bool needToSwap = FALSE;


	if( INTL_GetCCCFromCSID( obj ) == CS_UCS2_SWAP )
		needToSwap = TRUE;


	/* Allocate a dest buffer:
	** in the worst case, every Unicode character will cost 2+4 = 6 octetes
	*/

	uncvtlen = uncvtbuf[0];
	tobufsz = 6*( (ucs2bufsz + uncvtlen)/2 + 1 ) + 1;
	if (!tobufsz) {
		return NULL;
	}

	if ((tobuf = (unsigned char *)XP_ALLOC_PRIV(tobufsz)) == (unsigned char *)NULL) {
		INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
		return(NULL);
	}
										/* Initialize pointers, etc.	*/
 	ucs2p = (unsigned char *)ucs2buf;
 	ucs2endp = ucs2p + ucs2bufsz - 1; /* leave room for NULL termination (as sentinel?)*/

 	tobufp = tobuf;
 	tobufendp = tobufp + tobufsz - 2;		/* save space for terminating null*/


	while( (tobufp <= tobufendp) && (ucs2p < ucs2endp) ) {

		if( uncvtbuf[0] != 0 ){
			onechar = uncvtbuf[1];
			uncvtbuf[0] = 0;
		} else
			onechar = *ucs2p++;
		onechar <<= 8;
		onechar |= *ucs2p++;

		/* do the swap stuff */

		if( onechar == NEEDS_SWAP_MARK ){
			INTL_SetCCCFromCSID( obj, CS_UCS2_SWAP );
			needToSwap = TRUE;
			continue;
		} else if( onechar == BYTE_ORDER_MARK ){
			INTL_SetCCCFromCSID( obj, CS_UCS2 );
			needToSwap = FALSE;
			continue;
		}

		if( needToSwap ){
			onechar = ( onechar << 8 ) | ( onechar >> 8 );
		}

		/* we need to be shifted if the character is non-ASCII or
		 * is an ASCII character that should be shifted.
		 */
		needToShift = (onechar > MAX_ASCII) || (opt->shift[onechar]);


		if(!needToShift && inShiftSequence)	  {

			if(bufferBitCount > 0) {
				if((tobufp+2) > tobufendp) break;
				bufferBitCount = pad_and_write(buffer, tobufp, bufferBitCount, opt);
				if (!bufferBitCount) {	/* buffer successfully flushed */
					tobufp+=2;
					buffer = 0;
				}

			} else {
				if((tobufp+1) > tobufendp) break;
				*tobufp++ = opt->endshift;
			}
			inShiftSequence = FALSE; /* now just fallthrough to next case*/
		}

		if(!needToShift && 	!inShiftSequence) {
			if((tobufp+1) > tobufendp) break;
			*tobufp++ = (char) onechar;
		}

		if(needToShift && !inShiftSequence)	 {
			*tobufp++ = opt->startshift;
			if(onechar == opt->startshift) { /* special-case behavior if onechar is a + */
				if((tobufp+1) > tobufendp) break;
				*tobufp++ = opt->endshift;
			}
			else inShiftSequence = TRUE;
		}

		if(needToShift && inShiftSequence) {

			buffertemp = onechar & 0x0000FFFF;
			buffer |= buffertemp << (16 - bufferBitCount);
											/* ^--16 is the size of the int32 minus
											 * the size of onechar */
			bufferBitCount += 16;


			/* Flush the buffer of as many base64 characters as we can form */
			while(bufferBitCount>5) {
	 			  if(tobufp > tobufendp) break;
				  oneBase64char = (char)  ((buffer & 0xFC000000) >> 26);
				  *tobufp++ =  opt->tob64[oneBase64char];
				  buffer <<= 6;
				  bufferBitCount -= 6;
			}
		}


	} /* end of while loop */


	/* Anything left in the buffer at this point should be padded with 0's
	 * and appended to tobuf. */

	if(inShiftSequence) {

		if(bufferBitCount > 0) {

			if((tobufp+2) <= tobufendp) {
				bufferBitCount = pad_and_write(buffer, tobufp, bufferBitCount,  opt);
				if (!bufferBitCount) { /* buffer successfully flushed */
					tobufp+=2;
					buffer = 0;
				}
			}

		}  else {
			 if((tobufp+1) <= tobufendp) *tobufp++ = opt->endshift;
		}

		inShiftSequence = FALSE;
	}


	*tobufp = '\0';				/* NULL terminate dest. data */


	INTL_SetCCCLen(obj, tobufp - tobuf);		/* length of processed data, in bytes */

	if(ucs2p <= ucs2endp) {			/*  unconverted ucs2 left? */
		uncvtbuf[0] = 1;
		uncvtbuf[1] = *ucs2endp;
	} else
		uncvtbuf[0] = 0;


	return(tobuf);
}