bug 206811. use UTF-16 instead of UCS-2 for nsNativeCharsetutils on Unix.

Also use iconv(3) on Linux (r=smontagu, sr=alecf)
This commit is contained in:
jshin%mailaps.org 2003-06-03 12:37:44 +00:00
Родитель 9ab392b2fb
Коммит d99c80bf83
1 изменённых файлов: 28 добавлений и 17 удалений

Просмотреть файл

@ -51,20 +51,25 @@
#include "nsAString.h"
//
// choose a conversion library. under linux we prefer using wcrtomb/mbrtowc
// to improve performance. other platforms in which wchar_t is unicode might
// benefit from this optimization as well.
// choose a conversion library. we used to use mbrtowc/wcrtomb under Linux,
// but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
// or not (see bug 206811 and
// news://news.mozilla.org:119/bajml3$fvr1@ripley.netscape.com). we now use
// iconv for all platforms where nltypes.h and nllanginfo.h are present
// along with iconv.
//
#if defined(__linux) && defined(HAVE_WCRTOMB) && defined(HAVE_MBRTOWC)
#define USE_STDCONV 1
#elif defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_NL_LANGINFO)
#if 1
#if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_NL_LANGINFO)
#define USE_ICONV 1
#else
#define USE_STDCONV 1
#endif
#else
#define USE_STDCONV 1
#endif
static void
isolatin1_to_ucs2(const char **input, PRUint32 *inputLeft, PRUnichar **output, PRUint32 *outputLeft)
isolatin1_to_utf16(const char **input, PRUint32 *inputLeft, PRUnichar **output, PRUint32 *outputLeft)
{
while (*inputLeft && *outputLeft) {
**output = (unsigned char) **input;
@ -76,7 +81,7 @@ isolatin1_to_ucs2(const char **input, PRUint32 *inputLeft, PRUnichar **output, P
}
static void
ucs2_to_isolatin1(const PRUnichar **input, PRUint32 *inputLeft, char **output, PRUint32 *outputLeft)
utf16_to_isolatin1(const PRUnichar **input, PRUint32 *inputLeft, char **output, PRUint32 *outputLeft)
{
while (*inputLeft && *outputLeft) {
**output = (unsigned char) **input;
@ -174,7 +179,13 @@ xp_iconv_open(const char **to_list, const char **from_list)
return INVALID_ICONV_T;
}
static const char *UCS_2_NAMES[] = {
// PRUnichar[] is NOT a UCS-2 array BUT for UTF-16 string. Therefore, we
// have to use UTF-16 with iconv(3) on platforms where it's supported.
// We could list 'UTF-16' name variants, but all platforms known (to me) to
// support UTF-16 in iconv(3) uses 'UTF-16'. Let me know (jshin) if there's an
// exception. (bug 206811)
static const char *UTF_16_NAMES[] = {
"UTF-16",
"UCS-2",
"UCS2",
"UCS_2",
@ -265,18 +276,18 @@ nsNativeCharsetConverter::LazyInit()
else
native_charset_list[0] = native_charset;
gNativeToUnicode = xp_iconv_open(UCS_2_NAMES, native_charset_list);
gUnicodeToNative = xp_iconv_open(native_charset_list, UCS_2_NAMES);
gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
if (gNativeToUnicode == INVALID_ICONV_T) {
gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
gUTF8ToUnicode = xp_iconv_open(UCS_2_NAMES, UTF_8_NAMES);
gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to ucs-2 converter");
}
if (gUnicodeToNative == INVALID_ICONV_T) {
gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UCS_2_NAMES);
gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no unicode to utf-8 converter");
NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
@ -459,7 +470,7 @@ nsNativeCharsetConverter::NativeToUnicode(const char **input,
#endif
// fallback: zero-pad and hope for the best
isolatin1_to_ucs2(input, inputLeft, output, outputLeft);
isolatin1_to_utf16(input, inputLeft, output, outputLeft);
return NS_OK;
}
@ -534,7 +545,7 @@ nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
#endif
// fallback: truncate and hope for the best
ucs2_to_isolatin1(input, inputLeft, output, outputLeft);
utf16_to_isolatin1(input, inputLeft, output, outputLeft);
return NS_OK;
}
@ -645,7 +656,7 @@ nsNativeCharsetConverter::NativeToUnicode(const char **input,
else {
// wchar_t isn't unicode, so the best we can do is treat the
// input as if it is isolatin1 :(
isolatin1_to_ucs2(input, inputLeft, output, outputLeft);
isolatin1_to_utf16(input, inputLeft, output, outputLeft);
}
return NS_OK;
@ -683,7 +694,7 @@ nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
else {
// wchar_t isn't unicode, so the best we can do is treat the
// input as if it is isolatin1 :(
ucs2_to_isolatin1(input, inputLeft, output, outputLeft);
utf16_to_isolatin1(input, inputLeft, output, outputLeft);
}
return NS_OK;