pjs/lib/libi18n/fe_ccc.c

1243 строки
43 KiB
C

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/* fe_ccc.c */
/* Test harness code to be replaced by FE specific code */
#ifdef XP_OS2
#define INCL_DOS
#endif
#include "intlpriv.h"
#include <stdio.h>
#include "xp.h"
#include "intl_csi.h"
#ifdef XP_MAC
#include "resgui.h"
#endif
/* for XP_GetString() */
#include "xpgetstr.h"
extern int MK_OUT_OF_MEMORY;
/*
IMPORTANT NOTE:
mz_euc2euc
mz_b52b5
mz_cns2cns
mz_ksc2ksc
mz_sjis2sjis
mz_utf82utf8
is now replaced by mz_mbNullConv
we eventually should replacing mz_hz2gb after we extract the hz -> gb conversion
*/
MODULE_PRIVATE unsigned char* mz_euctwtob5( CCCDataObject obj, const unsigned char *in, int32 insize);
MODULE_PRIVATE unsigned char* mz_b5toeuctw( CCCDataObject obj, const unsigned char *in, int32 insize);
PRIVATE unsigned char *
mz_hz2gb(CCCDataObject obj, const unsigned char *kscbuf, int32 kscbufsz);
PRIVATE unsigned char *
mz_mbNullConv(CCCDataObject obj, const unsigned char *buf, int32 bufsz);
PRIVATE unsigned char *
mz_AnyToAnyThroughUCS2(CCCDataObject obj, const unsigned char *buf, int32 bufsz);
/* intl_CharLenFunc is designed to used with mz_mbNullConv */
typedef int16 (*intl_CharLenFunc) ( unsigned char ch);
PRIVATE int16 intl_CharLen_SJIS( unsigned char ch);
PRIVATE int16 intl_CharLen_EUC_JP( unsigned char ch);
PRIVATE int16 intl_CharLen_CGK( unsigned char ch);
PRIVATE int16 intl_CharLen_CNS_8BIT( unsigned char ch);
PRIVATE int16 intl_CharLen_UTF8( unsigned char ch);
PRIVATE int16 intl_CharLen_SingleByte(unsigned char ch);
#define INTL_CHARLEN_SJIS 0
#define INTL_CHARLEN_EUC_JP 1
#define INTL_CHARLEN_CGK 2
#define INTL_CHARLEN_CNS_8BIT 3
#define INTL_CHARLEN_UTF8 4
#define INTL_CHARLEN_SINGLEBYTE 5
/* a conversion flag for JIS, set if converting hankaku (1byte) kana to zenkaku (2byte) */
#define INTL_SEND_HANKAKU_KANA 128
PRIVATE intl_CharLenFunc intl_char_len_func[]=
{
intl_CharLen_SJIS,
intl_CharLen_EUC_JP,
intl_CharLen_CGK,
intl_CharLen_CNS_8BIT,
intl_CharLen_UTF8,
intl_CharLen_SingleByte,
};
#ifdef XP_UNIX
PRIVATE XP_Bool haveBig5 = FALSE;
PRIVATE XP_Bool have88595 = FALSE;
PRIVATE XP_Bool have1251 = FALSE;
PRIVATE XP_Bool haveKOI8R = FALSE;
#endif
PRIVATE int16 *availableFontCharSets = NULL;
/* Table that maps the FROM char, codeset to all other relevant info:
* - TO character codeset
* - Fonts (fixe & proportional) for TO character codeset
* - Type of conversion (func for Win/Mac, value for X)
* - Argument for conversion routine. Routine-defined.
*
* Not all of these may be available. Depends upon available fonts,
* scripts, codepages, etc. Need to query system to build valid table.
*
* What info do I need to make the font change API on the 3 platforms?
* Is just a 32bit font ID sufficient?
*
* Some X Windows can render Japanese in either EUC or SJIS, how do we
* choose?
*/
/* The ***first*** match of a "FROM" encoding (1st col.) will be
* used as the URL->native encoding. Be careful of the
* ordering.
* Additional entries for the same "FROM" encoding, specifies
* how to convert going out (e.g., sending mail, news or forms).
*/
/*
What is the flag mean ?
For Mac the flag in One2OneCCC is the resouce number of a 256 byte mapping table
For all platform the flag in mz_mbNullConv is a pointer to a intl_CharLenFunc routine
*/
#ifdef XP_MAC
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
/* SINGLE BYTE */
/* LATIN1 */
{CS_LATIN1, CS_MAC_ROMAN, 0, (CCCFunc)One2OneCCC, xlat_LATIN1_TO_MAC_ROMAN},
{CS_ASCII, CS_MAC_ROMAN, 0, (CCCFunc)One2OneCCC, xlat_LATIN1_TO_MAC_ROMAN},
{CS_MAC_ROMAN, CS_MAC_ROMAN, 0, (CCCFunc)0, 0},
{CS_MAC_ROMAN, CS_LATIN1, 0, (CCCFunc)One2OneCCC, xlat_MAC_ROMAN_TO_LATIN1},
{CS_MAC_ROMAN, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_ROMAN_TO_LATIN1},
/* LATIN2 */
{CS_LATIN2, CS_MAC_CE, 0, (CCCFunc)One2OneCCC, xlat_LATIN2_TO_MAC_CE},
{CS_MAC_CE, CS_MAC_CE, 0, (CCCFunc)0, 0},
{CS_MAC_CE, CS_LATIN2, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_LATIN2},
{CS_MAC_CE, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_LATIN2},
{CS_CP_1250, CS_MAC_CE, 0, (CCCFunc)One2OneCCC, xlat_CP_1250_TO_MAC_CE},
{CS_MAC_CE, CS_CP_1250, 0, (CCCFunc)One2OneCCC, xlat_MAC_CE_TO_CP_1250},
/* CYRILLIC */
{CS_8859_5, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_8859_5_TO_MAC_CYRILLIC},
{CS_MAC_CYRILLIC,CS_MAC_CYRILLIC, 0, (CCCFunc)0, 0},
{CS_MAC_CYRILLIC,CS_8859_5, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_8859_5},
{CS_MAC_CYRILLIC,CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_8859_5},
{CS_CP_1251, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_CP_1251_TO_MAC_CYRILLIC},
{CS_MAC_CYRILLIC,CS_CP_1251, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_CP_1251},
{CS_KOI8_R, CS_MAC_CYRILLIC,0, (CCCFunc)One2OneCCC, xlat_KOI8_R_TO_MAC_CYRILLIC},
{CS_MAC_CYRILLIC,CS_KOI8_R, 0, (CCCFunc)One2OneCCC, xlat_MAC_CYRILLIC_TO_KOI8_R},
/* GREEK */
{CS_8859_7, CS_MAC_GREEK, 0, (CCCFunc)One2OneCCC, xlat_8859_7_TO_MAC_GREEK},
{CS_MAC_GREEK, CS_MAC_GREEK, 0, (CCCFunc)0, 0},
{CS_MAC_GREEK, CS_8859_7, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_8859_7},
{CS_MAC_GREEK, CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_8859_7},
{CS_CP_1253, CS_MAC_GREEK, 0, (CCCFunc)One2OneCCC, xlat_CP_1253_TO_MAC_GREEK},
{CS_MAC_GREEK, CS_CP_1253, 0, (CCCFunc)One2OneCCC, xlat_MAC_GREEK_TO_CP_1253},
/* TURKISH */
{CS_8859_9, CS_MAC_TURKISH, 0, (CCCFunc)One2OneCCC, xlat_8859_9_TO_MAC_TURKISH},
{CS_MAC_TURKISH,CS_MAC_TURKISH, 0, (CCCFunc)0, 0},
{CS_MAC_TURKISH,CS_8859_9, 0, (CCCFunc)One2OneCCC, xlat_MAC_TURKISH_TO_8859_9},
{CS_MAC_TURKISH,CS_ASCII, 0, (CCCFunc)One2OneCCC, xlat_MAC_TURKISH_TO_8859_9},
/* MULTIBYTE */
/* JAPANESE */
{CS_SJIS, CS_SJIS, 1, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_SJIS },
{CS_SJIS, CS_JIS, 1, (CCCFunc)mz_sjis2jis, 0},
{CS_JIS, CS_SJIS, 1, (CCCFunc)jis2other, 0},
{CS_EUCJP, CS_SJIS, 1, (CCCFunc)mz_euc2sjis, 0},
{CS_JIS, CS_EUCJP, 1, (CCCFunc)jis2other, 1},
{CS_EUCJP, CS_JIS, 1, (CCCFunc)mz_euc2jis, 0},
{CS_SJIS, CS_EUCJP, 1, (CCCFunc)mz_sjis2euc, 0},
/* auto-detect Japanese conversions */
{CS_SJIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
{CS_JIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
{CS_EUCJP_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
/* KOREAN */
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK },
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
/* auto-detect Korean conversions */
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
{CS_KSC5601, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_KSC_8BIT, CS_KSC5601, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
/* SIMPLIFIED CHINESE */
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
{CS_GB2312, CS_GB_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_GB_8BIT, CS_GB2312, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_HZ, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
/* we need gb2hz routine to complete hz support */
/* TRADITIONAL CHINESE */
{CS_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK },
{CS_BIG5, CS_CNS_8BIT, 0, (CCCFunc)mz_b5toeuctw, 0},
{CS_CNS_8BIT, CS_BIG5, 0, (CCCFunc)mz_euctwtob5, 0},
{CS_X_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_BIG5, CS_X_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
/* UNICODE */
{CS_UTF8, CS_UTF8, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_UTF8 },
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
{CS_MAC_ROMAN, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_MAC_CE, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_MAC_CYRILLIC, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_MAC_GREEK, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_MAC_TURKISH, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_UTF8, CS_MAC_ROMAN, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_MAC_CE, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_MAC_CYRILLIC,0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_MAC_GREEK, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_MAC_TURKISH, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_USER_DEFINED_ENCODING, CS_USER_DEFINED_ENCODING, 0, (CCCFunc)0, 0},
{0, 0, 1, (CCCFunc)0, 0}
};
#endif /* XP_MAC */
#if defined(XP_WIN) || defined(XP_OS2)
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
/* SINGLE BYTE */
/* LATIN1 */
{CS_LATIN1, CS_LATIN1, 0, (CCCFunc)0, 0},
{CS_LATIN1, CS_ASCII, 0, (CCCFunc)0, 0},
{CS_ASCII, CS_LATIN1, 0, (CCCFunc)0, 0},
{CS_ASCII, CS_ASCII, 0, (CCCFunc)0, 0},
/* LATIN2 */
{CS_CP_1250, CS_CP_1250, 0, (CCCFunc)0, 0},
{CS_CP_1250, CS_LATIN2, 0, (CCCFunc)One2OneCCC, 0},
{CS_LATIN2, CS_CP_1250, 0, (CCCFunc)One2OneCCC, 0},
{CS_LATIN2, CS_LATIN2, 0, (CCCFunc)0, 0},
{CS_LATIN2, CS_ASCII, 0, (CCCFunc)0, 0},
/* CYRILLIC */
{CS_CP_1251, CS_CP_1251, 0, (CCCFunc)0, 0},
{CS_8859_5, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
{CS_CP_1251, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
{CS_CP_1251, CS_CP_1251, 0, (CCCFunc)0, 0},
{CS_KOI8_R, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
{CS_CP_1251, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
/* ARMENIAN */
{CS_ARMSCII8, CS_ARMSCII8, 0, (CCCFunc)0, 0},
/* GREEK */
{CS_CP_1253, CS_CP_1253, 0, (CCCFunc)0, 0},
{CS_CP_1253, CS_8859_7, 0, (CCCFunc)One2OneCCC, 0},
{CS_8859_7, CS_CP_1253, 0, (CCCFunc)One2OneCCC, 0},
{CS_8859_7, CS_8859_7, 0, (CCCFunc)0, 0},
/* TURKISH */
#ifdef XP_OS2
{CS_CP_1254, CS_CP_1254, 0, (CCCFunc)0, 0},
{CS_CP_1254, CS_8859_9, 0, (CCCFunc)One2OneCCC, 0},
{CS_8859_9, CS_CP_1254, 0, (CCCFunc)One2OneCCC, 0},
#endif
{CS_8859_9, CS_8859_9, 0, (CCCFunc)0, 0},
/* MULTIBYTE */
/* JAPANESE */
{CS_SJIS, CS_SJIS, 1, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_SJIS},
{CS_SJIS, CS_JIS, 1, (CCCFunc)mz_sjis2jis, 0},
{CS_JIS, CS_SJIS, 1, (CCCFunc)jis2other, 0},
{CS_EUCJP, CS_SJIS, 1, (CCCFunc)mz_euc2sjis, 0},
{CS_JIS, CS_EUCJP, 1, (CCCFunc)jis2other, 1},
{CS_EUCJP, CS_JIS, 1, (CCCFunc)mz_euc2jis, 0},
{CS_SJIS, CS_EUCJP, 1, (CCCFunc)mz_sjis2euc, 0},
/* auto-detect Japanese conversions */
{CS_SJIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
{CS_JIS_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
{CS_EUCJP_AUTO, CS_SJIS, 1, (CCCFunc)autoJCCC, 0},
/* KOREAN */
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
/* auto-detect Korean conversions */
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
{CS_KSC5601, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_KSC_8BIT, CS_KSC5601, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
/* SIMPLIFIED CHINESE */
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
{CS_GB2312, CS_GB_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_GB_8BIT, CS_GB2312, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_HZ, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
/* we need gb2hz routine to complete hz support */
/* TRADITIONAL CHINESE */
{CS_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_BIG5, CS_CNS_8BIT, 0, (CCCFunc)mz_b5toeuctw, 0},
{CS_CNS_8BIT, CS_BIG5, 0, (CCCFunc)mz_euctwtob5, 0},
{CS_X_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_BIG5, CS_X_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
/* UNICODE */
{CS_UTF8, CS_UTF8, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_1250, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_1251, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_1253, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_8859_9, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CP_1250, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CP_1253, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_8859_9, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
#ifdef XP_OS2
/*
* Define additional codepage conversions for OS/2. All of these use the unicode
* based conversion tables.
*/
/* Thai */
{CS_CP_874, CS_CP_874, 0, (CCCFunc)0, 0},
{CS_CP_874, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_UTF8, CS_CP_874, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
/* Baltic */
{CS_CP_1257, CS_CP_1257, 0, (CCCFunc)0, 0},
{CS_CP_1257, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_UTF8, CS_CP_1257, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
/* Hebrew */
{CS_CP_862, CS_CP_862, 0, (CCCFunc)0, 0},
{CS_CP_862, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_UTF8, CS_CP_862, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
/* Arabic */
{CS_CP_864, CS_CP_864, 0, (CCCFunc)0, 0},
{CS_CP_864, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_UTF8, CS_CP_864, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
/* PC codepages - Default convert to windows codepages */
{CS_CP_850, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_852, CS_LATIN2, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_855, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_857, CS_CP_1254, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_866, CS_CP_1251, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_850, CS_CP_850, 0, (CCCFunc)0, 0},
{CS_CP_852, CS_CP_852, 0, (CCCFunc)0, 0},
{CS_CP_855, CS_CP_855, 0, (CCCFunc)0, 0},
{CS_CP_857, CS_CP_857, 0, (CCCFunc)0, 0},
{CS_CP_866, CS_CP_866, 0, (CCCFunc)0, 0},
{CS_LATIN1, CS_CP_850, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_LATIN2, CS_CP_852, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_1251, CS_CP_855, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_1254, CS_CP_857, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_1251, CS_CP_866, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_850, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_852, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_855, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_857, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_CP_866, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_UTF8, CS_CP_850, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CP_852, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CP_855, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CP_857, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CP_866, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_MAC_ROMAN, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_LATIN1, CS_MAC_ROMAN, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
#endif /* XP_OS2 */
{CS_USER_DEFINED_ENCODING, CS_USER_DEFINED_ENCODING, 0, (CCCFunc)0, 0},
{0, 0, 1, (CCCFunc)0, 0}
};
#endif /* XP_WIN || XP_OS2 */
#ifdef XP_UNIX
MODULE_PRIVATE cscvt_t cscvt_tbl[] = {
/* SINGLE BYTE */
/* LATIN1 */
{CS_LATIN1, CS_LATIN1, 0, (CCCFunc)One2OneCCC, 0},
{CS_LATIN1, CS_ASCII, 0, NULL, 0},
{CS_ASCII, CS_LATIN1, 0, NULL, 0},
/* LATIN2 */
{CS_LATIN2, CS_LATIN2, 0, NULL, 0},
{CS_LATIN2, CS_ASCII, 0, NULL, 0},
{CS_LATIN2, CS_CP_1250, 0, (CCCFunc)One2OneCCC, 0},
{CS_CP_1250, CS_LATIN2, 0, (CCCFunc)One2OneCCC, 0},
/* CYRILLIC */
{CS_KOI8_R, CS_KOI8_R, 0, NULL, 0},
{CS_8859_5, CS_8859_5, 0, NULL, 0},
{CS_CP_1251, CS_CP_1251, 0, NULL, 0},
{CS_8859_5, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
{CS_KOI8_R, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
{CS_CP_1251, CS_8859_5, 0, (CCCFunc)One2OneCCC, 0},
{CS_8859_5, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
{CS_CP_1251, CS_KOI8_R, 0, (CCCFunc)One2OneCCC, 0},
{CS_KOI8_R, CS_CP_1251, 0, (CCCFunc)One2OneCCC, 0},
/* ARMENIAN */
{CS_ARMSCII8, CS_ARMSCII8, 0, (CCCFunc)0, 0},
/* GREEK */
{CS_8859_7, CS_8859_7, 0, NULL, 0},
{CS_8859_7, CS_CP_1253, 0, (CCCFunc)One2OneCCC, 0},
{CS_CP_1253, CS_8859_7, 0, (CCCFunc)One2OneCCC, 0},
/* TURKISH */
{CS_8859_9, CS_8859_9, 0, NULL, 0},
/* MULTIBYTE */
/* JAPANESE */
{CS_EUCJP, CS_EUCJP, 1, mz_mbNullConv, INTL_CHARLEN_EUC_JP},
{CS_JIS, CS_EUCJP, 1, jis2other, 1},
{CS_SJIS, CS_EUCJP, 1, mz_sjis2euc, 0},
{CS_EUCJP, CS_SJIS, 1, mz_euc2sjis, 0},
{CS_JIS, CS_SJIS, 1, jis2other, 0},
{CS_SJIS, CS_SJIS, 1, mz_mbNullConv, INTL_CHARLEN_SJIS},
{CS_EUCJP, CS_JIS, 1, mz_euc2jis, 0},
{CS_SJIS, CS_JIS, 1, mz_sjis2jis, 0},
/* auto-detect Japanese conversions */
{CS_JIS_AUTO, CS_EUCJP, 1, autoJCCC, 1},
{CS_SJIS_AUTO, CS_EUCJP, 1, autoJCCC, 0},
{CS_EUCJP_AUTO, CS_EUCJP, 1, autoJCCC, 0},
{CS_EUCJP_AUTO, CS_SJIS, 1, autoJCCC, 0},
{CS_JIS_AUTO, CS_SJIS, 1, autoJCCC, 0},
{CS_SJIS_AUTO, CS_SJIS, 1, autoJCCC, 0},
/* KOREAN */
{CS_KSC_8BIT, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_2022_KR, CS_KSC_8BIT, 0, (CCCFunc)mz_iso2euckr, 0},
{CS_KSC_8BIT, CS_2022_KR, 0, (CCCFunc)mz_euckr2iso, 0},
/* auto-detect Korean conversions */
{CS_KSC_8BIT_AUTO, CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
{(CS_2022_KR|CS_AUTO) , CS_KSC_8BIT,1, (CCCFunc)autoKCCC, 0},
{CS_KSC5601, CS_KSC_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_KSC_8BIT, CS_KSC5601, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
/* SIMPLIFIED CHINESE */
{CS_GB_8BIT, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
{CS_GB2312, CS_GB_8BIT, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_GB_8BIT, CS_GB2312, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_HZ, CS_GB_8BIT, 0, (CCCFunc)mz_hz2gb, 0},
/* we need gb2hz routine to complete hz support */
/* TRADITIONAL CHINESE */
{CS_CNS_8BIT, CS_CNS_8BIT, 0, mz_mbNullConv, INTL_CHARLEN_CNS_8BIT},
{CS_BIG5, CS_CNS_8BIT, 0, mz_b5toeuctw, 0},
{CS_CNS_8BIT, CS_BIG5, 0, mz_euctwtob5, 0},
{CS_X_BIG5, CS_CNS_8BIT, 0, mz_b5toeuctw, 0},
{CS_CNS_8BIT, CS_X_BIG5, 0, mz_euctwtob5, 0},
{CS_BIG5, CS_BIG5, 0, mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_X_BIG5, CS_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_BIG5, CS_X_BIG5, 0, (CCCFunc)mz_mbNullConv, INTL_CHARLEN_CGK},
{CS_USRDEF2, CS_USRDEF2, 0, NULL, 0},
/* UNICODE */
{CS_UTF8, CS_UTF8, 0, mz_mbNullConv, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_UCS2, 0, (CCCFunc)mz_utf82ucs, 0},
{CS_UTF8, CS_UTF7, 0, (CCCFunc)mz_utf82utf7, 0},
{CS_UTF8, CS_UCS2_SWAP, 0, (CCCFunc)mz_utf82ucsswap, 0},
{CS_UTF8, CS_IMAP4_UTF7, 0, (CCCFunc)mz_utf82imap4utf7, 0},
{CS_UCS2, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
{CS_UCS2, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
{CS_UCS2_SWAP, CS_UTF8, 0, (CCCFunc)mz_ucs2utf8, 0},
{CS_UCS2_SWAP, CS_UTF7, 0, (CCCFunc)mz_ucs2utf7, 0},
{CS_UTF7, CS_UTF8, 0, (CCCFunc)mz_utf72utf8, 0},
{CS_LATIN1, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_LATIN2, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_8859_5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_KOI8_R, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_8859_7, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_8859_9, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SINGLEBYTE},
{CS_SJIS, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_SJIS},
{CS_EUCJP, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_EUC_JP},
{CS_KSC_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_BIG5, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_CNS_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CNS_8BIT},
{CS_GB_8BIT, CS_UTF8, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_CGK},
{CS_UTF8, CS_LATIN1, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_LATIN2, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_8859_5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_KOI8_R, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_8859_7, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_8859_9, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_SJIS, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_EUCJP, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_KSC_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_BIG5, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_CNS_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_UTF8, CS_GB_8BIT, 0, (CCCFunc)mz_AnyToAnyThroughUCS2, INTL_CHARLEN_UTF8},
{CS_IMAP4_UTF7, CS_UTF8, 0, (CCCFunc)mz_imap4utf72utf8, 0},
{0, 0, 0, NULL, 0}
};
#endif /* XP_UNIX */
/*
* this routine is needed to make sure parser and layout see whole
* characters, not partial characters
*/
typedef enum {
kHZSingle,
kHZSingleTild,
kHZSingleTildLFCR,
kHZDouble,
kHZDoubleGet1,
kHZDoubleTild,
kGBDoubleGet1, /* hacky state which allow GB pass through */
} HZ_STATE;
PRIVATE unsigned char *
mz_hz2gb(CCCDataObject obj, const unsigned char *inbuf, int32 inbufsize)
{
HZ_STATE state;
unsigned char *outbuf;
unsigned char *out;
const unsigned char *in;
int32 i;
state = (HZ_STATE)INTL_GetCCCCvtflag(obj);
out = outbuf = XP_ALLOC(inbufsize + 1 + 1); /* 1 for unconverted, 4 for fake escape 1 for NULL */
XP_ASSERT(NULL != out);
if((kHZDoubleGet1 == state) ||(kGBDoubleGet1 == state))
{
/* If we have anything in the unconverted buffer, let's output it
please notice the usage of uncoverted buffer in this routine is different from other,
it is really "converted, but not output yet" buffer
*/
unsigned char* u = INTL_GetCCCUncvtbuf(obj);
XP_ASSERT(NULL != u);
*out++ = *u;
}
for(in = inbuf, i = 0; i < inbufsize; i++, in++)
{
switch(state)
{
case kHZSingle:
if(0x80 & *in) {
state = kGBDoubleGet1; /* change state */
*out++ = *in; /* get 1 double byte, output it */
}
else if('~' == *in) {
state = kHZSingleTild; /* change state */
} else {
*out++ = *in; /* normal single byte, output it, do not change state */
}
break;
case kHZSingleTild:
switch(*in)
{
case '{':
state = kHZDouble; /* change state */
break;
case '~':
*out++ = '~'; /* a ~~ , generate a ~ */
state = kHZSingle; /* change state back to normal */
break;
case LF: /* continue in ASCII mode */
case CR: /* continue in ASCII mode */
state = kHZSingleTildLFCR;
break;
default:
*out++ = '~';
i--; in--; /* unread it */
state = kHZSingle;
break;
}
break;
case kHZSingleTildLFCR:
switch(*in)
{
case LF: /* continue in ASCII mode */
case CR: /* continue in ASCII mode */
/* eat all following CR or LF */
break;
default:
i--; in--; /* unread it */
state = kHZSingle;
break;
}
break;
case kHZDouble:
if('~' == *in) {
state = kHZDoubleTild; /* change state, generate nothing */
} else {
*out++ = 0x80 | *in; /* get one byte in double byte, output it */
state = kHZDoubleGet1; /* change state */
}
break;
case kHZDoubleGet1:
*out++ = 0x80 | *in; /* get the 2nd byte in double byte character, output it with hi-bit on */
state = kHZDouble; /* change state */
break;
case kHZDoubleTild:
if('}' == *in) {
state = kHZSingle; /* change state, output nothing */
} else {
*out++ = 0x80 | '~'; /* not the escape sequence, output the ~ with hi-bit on */
*out++ = 0x80 | *in; /* now output it with hi-bit on*/
state = kHZDouble; /* change state */
}
break;
case kGBDoubleGet1:
*out++ = *in; /* get the 2nd byte in double byte character, output it */
state = kHZSingle; /* change state */
break;
default:
XP_ASSERT(0);
state = kHZSingle; /* change state so the program won't stop */
break;
}
}
if((kHZDoubleGet1 == state) ||(kGBDoubleGet1 == state))
{
/* We need to make sure we always return in the boundary of a character,
So we back off one byte, store it in the unconverted buf
*/
unsigned char* u = INTL_GetCCCUncvtbuf(obj);
XP_ASSERT(NULL != u);
out--; /* back up */
*u = *out; /* store it into unconvert buffer */
*(u+1) = '\0'; /* null terminate the unconvertered buffer */
}
INTL_SetCCCCvtflag(obj, ((int32)state));
*out = '\0';
INTL_SetCCCLen(obj, out - outbuf);
return outbuf;
}
/* mz_mbNullConv
* this routine is needed to make sure parser and layout see whole
* characters, not partial characters
*/
/* This routine is designed to replace the following routine:
mz_euc2euc
mz_b52b5
mz_cns2cns
mz_ksc2ksc
mz_sjis2sjis
mz_utf82utf8
mz_gb2gb
*/
PRIVATE unsigned char *
mz_mbNullConv(CCCDataObject obj, const unsigned char *buf, int32 bufsz)
{
int32 left_over;
int32 len;
unsigned char *p;
unsigned char *ret;
int32 total;
intl_CharLenFunc CharLenFunc = intl_char_len_func[INTL_GetCCCCvtflag(obj)];
int charlen = 0;
/* Get the unconverted buffer */
unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
int32 uncvtsz = strlen((char *)uncvtbuf);
/* return in the input is nonsense */
if ((!obj) || (! buf) || (bufsz < 0))
return NULL;
/* Allocate Output Buffer */
total = uncvtsz + bufsz;
ret = (unsigned char *) XP_ALLOC(total + 1);
if (!ret)
{
INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
return NULL;
}
/* Copy unconverted buffer into the output bufer */
memcpy(ret, uncvtbuf, uncvtsz);
/* Copy the current input buffer into the output buffer */
memcpy(ret+uncvtsz, buf, bufsz);
/* Walk through the buffer and figure out the left_over length */
for (p=ret, len=total, left_over=0; len > 0; p += charlen, len -= charlen)
{
if((charlen = CharLenFunc(*p)) > 1)
{ /* count left_over only if it is multibyte char */
if(charlen > len) /* count left_over only if the len is less than charlen */
left_over = len;
};
}
/* Copy the left over into the uncvtbuf */
if(left_over)
memcpy(uncvtbuf, p - charlen, left_over);
/* Null terminated the uncvtbuf */
uncvtbuf[left_over] = '\0';
/* Null terminate the return buffer and set the length */
INTL_SetCCCLen(obj, total - left_over);
ret[total - left_over] = 0;
return ret;
}
/*
buf -> mz_mbNullConv -> frombuf -> INTL_TextToUnicode -> ucs2buf
-> INTL_UnicodeToStr -> tobuf
*/
PRIVATE unsigned char* mz_AnyToAnyThroughUCS2(CCCDataObject obj, const unsigned char *buf, int32 bufsz)
{
/* buffers */
unsigned char* fromBuf = NULL;
INTL_Unicode* ucs2Buf = NULL;
unsigned char* toBuf = NULL;
/* buffers' length */
uint32 ucs2BufLen = 0;
uint32 fromBufLen = 0;
uint32 toBufLen = 0;
/* from & to csid */
uint16 fromCsid = INTL_GetCCCFromCSID(obj);
uint16 toCsid = INTL_GetCCCToCSID(obj);
/* get the fromBuf */
if( !( fromBuf = mz_mbNullConv( obj, buf, bufsz) ) )
return NULL;
/* map fromBuf -> ucs2Buf */
fromBufLen = INTL_GetCCCLen(obj);
ucs2BufLen = INTL_TextToUnicodeLen( fromCsid, fromBuf, fromBufLen );
if( !( ucs2Buf = XP_ALLOC( (ucs2BufLen + 1 ) * 2)) ){
return NULL;
}
/* be care, the return value is HOW MANY UNICODE IN THIS UCS2BUF, not how many bytes */
ucs2BufLen = INTL_TextToUnicode( fromCsid, fromBuf, fromBufLen, ucs2Buf, ucs2BufLen );
/* map ucs2Buf -> toBuf */
toBufLen = INTL_UnicodeToStrLen( toCsid, ucs2Buf, ucs2BufLen ); /* we get BYTES here :) */
if( !( toBuf = XP_ALLOC( toBufLen + 1 ) ) )
return NULL;
INTL_UnicodeToStr( toCsid, ucs2Buf, ucs2BufLen, toBuf, toBufLen );
/* clean up after myself */
free( fromBuf );
free( ucs2Buf );
/* In order to let the caller know how long the buffer is, i have to set its tail NULL. */
toBuf[ toBufLen ] = 0;
return toBuf;
}
PRIVATE int16 intl_CharLen_SJIS( unsigned char ch)
{
return ( (((ch >= 0x81) && (ch <= 0x9f)) || ((ch >= 0xe0) && (ch <= 0xfc))) ? 2 : 1);
}
PRIVATE int16 intl_CharLen_EUC_JP( unsigned char ch)
{
return ( (((ch >= 0xa1) && (ch <= 0xfe)) || (ch == 0x8e)) ? 2 : ((ch ==0x8f) ? 3 : 1));
}
PRIVATE int16 intl_CharLen_CGK( unsigned char ch)
{
return ( ((ch >= 0xa1) && (ch <= 0xfe)) ? 2 : 1);
}
PRIVATE int16 intl_CharLen_CNS_8BIT( unsigned char ch)
{
return ( ((ch >= 0xa1) && (ch <= 0xfe)) ? 2 : ((ch == 0x8e) ? 4 : 1));
}
PRIVATE int16 intl_CharLen_UTF8( unsigned char ch)
{
return ( ((ch >= 0xc0) && (ch <= 0xdf)) ? 2 : (((ch >= 0xe0) && (ch <= 0xef)) ? 3 : 1));
}
PRIVATE int16 intl_CharLen_SingleByte( unsigned char ch)
{
return 1;
}
/*
INTL_DefaultWinCharSetID,
Based on DefaultDocCSID, it determines which Win CSID to use for Display
*/
PUBLIC int16 INTL_DefaultWinCharSetID(iDocumentContext context)
{
if (context) {
INTL_CharSetInfo csi = LO_GetDocumentCharacterSetInfo(context);
if (INTL_GetCSIWinCSID(csi))
return INTL_GetCSIWinCSID(csi);
}
return INTL_DocToWinCharSetID(INTL_DefaultDocCharSetID(context));
}
/*
INTL_DocToWinCharSetID,
Based on DefaultDocCSID, it determines which Win CSID to use for Display
*/
/*
To Do: (ftang)
We should seperate the DocToWinCharSetID logic from the cscvt_t table
for Cyrillic users.
*/
PUBLIC int16 INTL_DocToWinCharSetID(int16 csid)
{
cscvt_t *cscvtp;
int16 from_csid = 0, to_csid = 0;
from_csid = csid & ~CS_AUTO; /* remove auto bit */
/* Look-up conversion method given FROM and TO char. code sets */
cscvtp = cscvt_tbl;
while (cscvtp->from_csid)
{
if (cscvtp->from_csid == from_csid)
{
/*
* disgusting hack...
*/
#ifdef XP_UNIX
if (((cscvtp->to_csid == CS_CNS_8BIT) && (TRUE == haveBig5)) ||
((cscvtp->to_csid == CS_8859_5) && (FALSE == have88595)) ||
((cscvtp->to_csid == CS_KOI8_R) && (FALSE == haveKOI8R)) ||
((cscvtp->to_csid == CS_CP_1251) && (FALSE == have1251)) )
{
cscvtp++;
continue;
}
#endif
to_csid = cscvtp->to_csid;
break ;
}
cscvtp++;
}
return to_csid == 0 ? CS_FE_ASCII: to_csid ;
}
XP_Bool
INTL_CanAutoSelect(int16 csid)
{
register cscvt_t *cscvtp;
cscvtp = cscvt_tbl;
while (cscvtp->from_csid) {
if (cscvtp->from_csid == csid) {
return (cscvtp->autoselect);
}
cscvtp++;
}
return FALSE;
}
PUBLIC int16
INTL_DefaultTextAttributeCharSetID(iDocumentContext context)
{
if (context)
{
INTL_CharSetInfo c = LO_GetDocumentCharacterSetInfo(context);
if (INTL_GetCSIWinCSID(c))
return INTL_GetCSIWinCSID(c);
}
return INTL_DefaultWinCharSetID(context);
}
void
INTL_ReportFontCharSets(int16 *charsets)
{
uint16 len;
if (!charsets)
{
return;
}
if (availableFontCharSets)
{
free(availableFontCharSets);
}
availableFontCharSets = charsets;
while (*charsets)
{
#ifdef XP_UNIX
switch(*charsets)
{
case CS_X_BIG5:
haveBig5 = TRUE;
break;
case CS_8859_5:
have88595 = TRUE;
break;
case CS_CP_1251:
have1251 = TRUE;
break;
case CS_KOI8_R:
haveKOI8R = TRUE;
break;
}
#endif
charsets++;
}
len = (charsets - availableFontCharSets);
#ifdef XP_UNIX
INTL_SetUnicodeCSIDList(len, availableFontCharSets);
#endif
}
/* Code for CSID Iterator */
#define NUMOFCSIDINITERATOR 15
struct INTL_CSIDIteratorPriv
{
int16 cur;
int16 csidlist[NUMOFCSIDINITERATOR];
};
typedef struct INTL_CSIDIteratorPriv INTL_CSIDIteratorPriv;
#ifdef MOZ_MAIL_NEWS
PRIVATE void intl_FillTryIMAP4SearchIterator(INTL_CSIDIteratorPriv* p, int16 csid);
PRIVATE void intl_FillTryIMAP4SearchIterator(INTL_CSIDIteratorPriv* p, int16 csid)
{
int idx = 0;
cscvt_t *cscvtp = cscvt_tbl;
p->csidlist[idx++] = INTL_DefaultMailCharSetID(csid); /* add mailcsid first */
p->csidlist[idx++] = INTL_DefaultNewsCharSetID(csid); /* If the news csid is different add it */
if(p->csidlist[0] == p->csidlist[1])
idx--;
/* Add all the csid that we know how to convert to (Without CS_AUTO bit on */
while (cscvtp->from_csid)
{
if ( (cscvtp->from_csid & ~CS_AUTO) == (csid & ~CS_AUTO))
{
int16 foundcsid = cscvtp->to_csid & ~CS_AUTO;
XP_Bool notInTheList = TRUE;
int i;
for(i = 0; i < idx ;i++)
{
if(foundcsid == p->csidlist[i])
notInTheList = FALSE;
}
if(notInTheList)
{
p->csidlist[idx++] = foundcsid;
XP_ASSERT(NUMOFCSIDINITERATOR == idx);
if(NUMOFCSIDINITERATOR == idx)
break;
}
}
cscvtp++;
}
p->csidlist[idx] = 0; /* terminate the list by 0 */
}
PUBLIC void INTL_CSIDIteratorCreate( INTL_CSIDIterator* iterator, int16 csid, int flag)
{
INTL_CSIDIteratorPriv* priv =
(INTL_CSIDIteratorPriv*) XP_ALLOC(sizeof(INTL_CSIDIteratorPriv));
*iterator = (INTL_CSIDIterator) priv;
if(priv)
{
priv->cur = 0;
switch(flag)
{
case csiditerate_TryIMAP4Search:
intl_FillTryIMAP4SearchIterator (priv, (int16)(csid & ~CS_AUTO));
break;
default:
XP_ASSERT(FALSE);
break;
}
}
return;
}
#endif /* MOZ_MAIL_NEWS */
PUBLIC void INTL_CSIDIteratorDestroy(INTL_CSIDIterator* iterator)
{
INTL_CSIDIteratorPriv* priv = (INTL_CSIDIteratorPriv*) *iterator;
*iterator = NULL;
XP_FREE(priv);
}
PUBLIC XP_Bool INTL_CSIDIteratorNext( INTL_CSIDIterator* iterator, int16* pCsid)
{
INTL_CSIDIteratorPriv* priv = (INTL_CSIDIteratorPriv*) *iterator;
int16 csid = priv->csidlist[(priv->cur)++];
if(0 == csid)
{
return FALSE;
}
else
{
*pCsid = csid;
return TRUE;
}
}
#ifdef XP_OS2
/*
* Map Netscape charset to OS/2 codepage
*/
/*
* This is tricker then you think. For a given charset, first entry should
* be windows codepage, second entry should be OS/2 codepage.
*/
static uint16 CS2CodePage[] = {
CS_LATIN1 , 1004, /* 2 */
CS_ASCII , 1252, /* 1 */
CS_UTF8 , 1208, /* 290 */
CS_SJIS , 943, /* 260 */
CS_8859_3 , 913, /* 14 */
CS_8859_4 , 914, /* 15 */
CS_8859_5 , 915, /* 16 ISO Cyrillic */
CS_8859_6 , 1089, /* 17 ISO Arabic */
CS_8859_7 , 813, /* 18 ISO Greek */
CS_8859_8 , 916, /* 19 ISO Hebrew */
CS_8859_9 , 920, /* 20 */
CS_BIG5 , 950, /* 263 */
CS_GB2312 , 1386, /* 287 */
CS_CP_1250 , 1250, /* 44 CS_CP_1250 is window Centrl Europe */
CS_CP_1251 , 1251, /* 41 CS_CP_1251 is window Cyrillic */
CS_LATIN2 , 912, /* 10 */
CS_CP_1253 , 1253, /* 43 CS_CP_1253 is window Greek */
CS_CP_1254 , 1254, /* 45 CS_CP_1254 is window Turkish */
CS_CP_1257 , 1257, /* 61 Windows Baltic */
CS_CP_1258 , 1258, /* 62 Windows Vietnamese */
CS_CP_850 , 850, /* 53 PC Latin 1 */
CS_CP_852 , 852, /* 54 PC Latin 2 */
CS_CP_855 , 855, /* 55 PC Cyrillic */
CS_CP_857 , 857, /* 56 PC Turkish */
CS_CP_862 , 862, /* 57 PC Hebrew */
CS_CP_864 , 864, /* 58 PC Arabic */
CS_CP_866 , 866, /* 59 PC Russian */
CS_CP_874 , 874, /* 60 PC Thai */
CS_EUCJP , 930, /* 261 */
CS_GB_8BIT , 1386, /* 264 */
CS_KOI8_R , 878, /* 39 */
CS_KSC5601 , 949, /* 284 */
CS_MAC_CE , 1282, /* 11 */
CS_MAC_CYRILLIC, 1283, /* 40 */
CS_MAC_GREEK , 1280, /* 42 */
CS_MAC_ROMAN , 1275, /* 6 */
CS_MAC_TURKISH, 1281, /* 46 */
CS_UCS2 , 1200, /* 810 */
CS_USRDEF2 , 1252, /* 38 */
0, 0,
};
/*
* MapCpToCsNum: Search table and return netscape codeset name
*/
uint16 INTL_MapCpToCsNum(uint16 cpid) {
uint16 * up;
up = CS2CodePage;
while (*up) {
if (up[1] == cpid) {
return up[0];
}
up += 2;
}
return 0;
}
/*
* MapCsToCpNum: Search table and return codepage
*/
uint16 INTL_MapCsToCpNum(uint16 csid) {
uint16 * up;
up = CS2CodePage;
while (*up) {
if (up[0] == csid) {
return up[1];
}
up += 2;
}
return 0;
}
/*
* Map from process codepage to default charset
*/
int16 INTL_MenuFontCSID(void) {
ULONG codepage, xxx;
DosQueryCp(4, &codepage, &xxx);
return INTL_MapCpToCsNum(codepage);
}
/*
* This returns the ID for the
*/
int INTL_MenuFontID() {
return 0;
}
#endif /* XP_OS2 */
#if defined(MOZ_MAIL_NEWS)
/*
* Access a conversion flag for hankaku->zenkaku kana conversion for mail.
*/
XP_Bool INTL_GetCCCCvtflag_SendHankakuKana(CCCDataObject obj)
{
return ((CS_JIS == (INTL_GetCCCToCSID(obj) & ~CS_AUTO)) &&
(INTL_SEND_HANKAKU_KANA & INTL_GetCCCCvtflag(obj)));
}
void INTL_SetCCCCvtflag_SendHankakuKana(CCCDataObject obj, XP_Bool flag)
{
int32 cvtflag;
if (CS_JIS == (INTL_GetCCCToCSID(obj) & ~CS_AUTO))
{
cvtflag = INTL_GetCCCCvtflag(obj);
cvtflag = flag ? (INTL_SEND_HANKAKU_KANA | cvtflag) : (~INTL_SEND_HANKAKU_KANA & cvtflag);
INTL_SetCCCCvtflag(obj, cvtflag);
}
}
#endif /* MOZ_MAIL_NEWS */