diff --git a/intl/locale/unix/unixcharset.properties b/intl/locale/unix/unixcharset.properties index 9ff094a6c3b7..cead2deeb04a 100644 --- a/intl/locale/unix/unixcharset.properties +++ b/intl/locale/unix/unixcharset.properties @@ -27,8 +27,8 @@ locale.all.C=ISO-8859-1 # HP locale.all.C.iso885915=ISO-8859-15 locale.all.c-french.iso88591=ISO-8859-1 -locale.all.chinese=gb18030 -locale.all.chinese-s=gb18030 +locale.all.chinese=GB2312 +locale.all.chinese-s=GB2312 locale.all.chinese-t.big5=Big5 locale.all.cs=ISO-8859-2 locale.all.cs_CZ=ISO-8859-2 @@ -510,18 +510,18 @@ locale.all.th=windows-874 locale.all.th_TH.UTF-8=UTF-8 # RedHat 7 reported by Garaschenko Slava + + Table in Debug form +Begin of Item 0000 + Format 2 + srcBegin = 0080 + destBegin = 20AC +End of Item 0000 + +========================================================*/ +/* Offset=0x0000 ItemOfList */ + 0x0001, +/*-------------------------------------------------------*/ +/* Offset=0x0001 offsetToFormatArray */ + 0x0004, +/*-------------------------------------------------------*/ +/* Offset=0x0002 offsetToMapCellArray */ + 0x0005, +/*-------------------------------------------------------*/ +/* Offset=0x0003 offsetToMappingTable */ + 0x0008, +/*-------------------------------------------------------*/ +/* Offset=0x0004 Start of Format Array */ +/* Total of Format 0 : 0x0000 */ +/* Total of Format 1 : 0x0000 */ +/* Total of Format 2 : 0x0001 */ +/* Total of Format 3 : 0x0000 */ + +0x0002, +/*-------------------------------------------------------*/ +/* Offset=0x0005 Start of MapCell Array */ +/* 0000 */ 0x0080, 0x0000, 0x20AC, +/*-------------------------------------------------------*/ +/* Offset=0x0008 Start of MappingTable */ + +/* End of table Total Length = 0x0008 * 2 */ diff --git a/intl/uconv/ucvcn/nsGB2312ToUnicodeV2.h b/intl/uconv/ucvcn/nsGB2312ToUnicodeV2.h new file mode 100644 index 000000000000..4e311dea7006 --- /dev/null +++ b/intl/uconv/ucvcn/nsGB2312ToUnicodeV2.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsGB2312ToUnicodeV2_h___ +#define nsGB2312ToUnicodeV2_h___ + +#include "nsUCSupport.h" +#include "gbku.h" +#include "nsGBKToUnicode.h" +//---------------------------------------------------------------------- +// Class nsGB2312ToUnicodeV2 [declaration] + +/** + * A character set converter from GB2312 to Unicode. + * + * @created 06/Apr/1999 + * @author Catalin Rotaru [CATA] + */ +class nsGB2312ToUnicodeV2 : public nsGB18030ToUnicode +{ +public: + + /** + * Class constructor. + */ + nsGB2312ToUnicodeV2(){} +}; + + +#endif /* nsGB2312ToUnicodeV2_h___ */ diff --git a/intl/uconv/ucvcn/nsGBKToUnicode.cpp b/intl/uconv/ucvcn/nsGBKToUnicode.cpp index 0751cc6fd7c6..f28f818b372a 100644 --- a/intl/uconv/ucvcn/nsGBKToUnicode.cpp +++ b/intl/uconv/ucvcn/nsGBKToUnicode.cpp @@ -13,6 +13,28 @@ #include "nsGBKToUnicode.h" #include "gbku.h" + +//------------------------------------------------------------ +// nsGBKUnique2BytesToUnicode +//------------------------------------------------------------ +class nsGBKUnique2BytesToUnicode : public nsTableDecoderSupport +{ +public: + nsGBKUnique2BytesToUnicode(); + virtual ~nsGBKUnique2BytesToUnicode() + { } +protected: +}; + +static const uint16_t g_utGBKUnique2Bytes[] = { +#include "gbkuniq2b.ut" +}; +nsGBKUnique2BytesToUnicode::nsGBKUnique2BytesToUnicode() + : nsTableDecoderSupport(u2BytesCharset, nullptr, + (uMappingTable*) &g_utGBKUnique2Bytes, 1) +{ +} + //------------------------------------------------------------ // nsGB18030Unique2BytesToUnicode //------------------------------------------------------------ @@ -75,10 +97,10 @@ nsGB18030Unique4BytesToUnicode::nsGB18030Unique4BytesToUnicode() #define LEGAL_GBK_4BYTE_FORTH_BYTE(c) \ (UINT8_IN_RANGE(0x30, (c), 0x39)) -NS_IMETHODIMP nsGB18030ToUnicode::ConvertNoBuff(const char* aSrc, - int32_t * aSrcLength, - char16_t *aDest, - int32_t * aDestLength) +NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc, + int32_t * aSrcLength, + char16_t *aDest, + int32_t * aDestLength) { int32_t i=0; int32_t iSrcLength = (*aSrcLength); @@ -207,6 +229,15 @@ NS_IMETHODIMP nsGB18030ToUnicode::ConvertNoBuff(const char* aSrc, return rv; } + +void nsGBKToUnicode::CreateExtensionDecoder() +{ + mExtensionDecoder = new nsGBKUnique2BytesToUnicode(); +} +void nsGBKToUnicode::Create4BytesDecoder() +{ + m4BytesDecoder = nullptr; +} void nsGB18030ToUnicode::CreateExtensionDecoder() { mExtensionDecoder = new nsGB18030Unique2BytesToUnicode(); @@ -248,7 +279,7 @@ bool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, char16_t* aOut) return true; } -bool nsGB18030ToUnicode::TryExtensionDecoder(const char* aSrc, char16_t* aOut) +bool nsGBKToUnicode::TryExtensionDecoder(const char* aSrc, char16_t* aOut) { if(!mExtensionDecoder) CreateExtensionDecoder(); @@ -269,8 +300,11 @@ bool nsGB18030ToUnicode::TryExtensionDecoder(const char* aSrc, char16_t* aOut) } return false; } - -bool nsGB18030ToUnicode::Try4BytesDecoder(const char* aSrc, char16_t* aOut) +bool nsGBKToUnicode::DecodeToSurrogate(const char* aSrc, char16_t* aOut) +{ + return false; +} +bool nsGBKToUnicode::Try4BytesDecoder(const char* aSrc, char16_t* aOut) { if(!m4BytesDecoder) Create4BytesDecoder(); diff --git a/intl/uconv/ucvcn/nsGBKToUnicode.h b/intl/uconv/ucvcn/nsGBKToUnicode.h index 9830e93efa40..6264944e708e 100644 --- a/intl/uconv/ucvcn/nsGBKToUnicode.h +++ b/intl/uconv/ucvcn/nsGBKToUnicode.h @@ -2,8 +2,8 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef nsGBKToUnicode_h___ -#define nsGBKToUnicode_h___ +#ifndef nsGBK2312ToUnicode_h___ +#define nsGBK2312ToUnicode_h___ #include "nsCOMPtr.h" #include "nsIUnicodeDecoder.h" @@ -11,23 +11,23 @@ #include "nsGBKConvUtil.h" //---------------------------------------------------------------------- -// Class nsGB18030ToUnicode [declaration] +// Class nsGBKToUnicode [declaration] /** - * A character set converter from GB18030 to Unicode. + * A character set converter from GBK to Unicode. * * * @created 07/Sept/1999 * @author Yueheng Xu, Yueheng.Xu@intel.com */ -class nsGB18030ToUnicode : public nsBufferDecoderSupport +class nsGBKToUnicode : public nsBufferDecoderSupport { public: /** * Class constructor. */ - nsGB18030ToUnicode() : nsBufferDecoderSupport(1) + nsGBKToUnicode() : nsBufferDecoderSupport(1) { mExtensionDecoder = nullptr; m4BytesDecoder = nullptr; @@ -44,13 +44,25 @@ protected: nsCOMPtr mExtensionDecoder; nsCOMPtr m4BytesDecoder; - void CreateExtensionDecoder(); - void Create4BytesDecoder(); + virtual void CreateExtensionDecoder(); + virtual void Create4BytesDecoder(); bool TryExtensionDecoder(const char* aSrc, char16_t* aDest); bool Try4BytesDecoder(const char* aSrc, char16_t* aDest); - bool DecodeToSurrogate(const char* aSrc, char16_t* aDest); + virtual bool DecodeToSurrogate(const char* aSrc, char16_t* aDest); }; -#endif /* nsGBKToUnicode_h___ */ + +class nsGB18030ToUnicode : public nsGBKToUnicode +{ +public: + nsGB18030ToUnicode() {} + virtual ~nsGB18030ToUnicode() {} +protected: + virtual void CreateExtensionDecoder(); + virtual void Create4BytesDecoder(); + virtual bool DecodeToSurrogate(const char* aSrc, char16_t* aDest); +}; + +#endif /* nsGBK2312ToUnicode_h___ */ diff --git a/intl/uconv/ucvcn/nsUCvCnCID.h b/intl/uconv/ucvcn/nsUCvCnCID.h index 43e1d37d447b..77410527b45d 100644 --- a/intl/uconv/ucvcn/nsUCvCnCID.h +++ b/intl/uconv/ucvcn/nsUCvCnCID.h @@ -8,6 +8,10 @@ #include "nsISupports.h" +// Class ID for our GB2312ToUnicode charset converter// {379C2774-EC77-11d2-8AAC-00600811A836} +#define NS_GB2312TOUNICODE_CID \ + { 0x379c2774, 0xec77, 0x11d2, {0x8a, 0xac, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36}} + // Class ID for our HZToUnicode charset converter // {BA61519A-1DFA-11d3-B3BF-00805F8A6670} #define NS_HZTOUNICODE_CID \ @@ -18,6 +22,11 @@ #define NS_GBKTOUNICODE_CID \ { 0xba61519e, 0x1dfa, 0x11d3, {0xb3, 0xbf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70}} +// Class ID for our UnicodeToGB2312 charset converter +// {379C2777-EC77-11d2-8AAC-00600811A836} +#define NS_UNICODETOGB2312_CID \ + { 0x379c2777, 0xec77, 0x11d2, {0x8a, 0xac, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36}} + // Class ID for our UnicodeToGBK charset converter // {BA61519B-1DFA-11d3-B3BF-00805F8A6670} #define NS_UNICODETOGBK_CID \ diff --git a/intl/uconv/ucvcn/nsUnicodeToGB2312V2.cpp b/intl/uconv/ucvcn/nsUnicodeToGB2312V2.cpp new file mode 100644 index 000000000000..b633a7594de9 --- /dev/null +++ b/intl/uconv/ucvcn/nsUnicodeToGB2312V2.cpp @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnicodeToGB2312V2.h" +#include "gbku.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeToGB2312V2 [implementation] +nsUnicodeToGB2312V2::nsUnicodeToGB2312V2() : + nsEncoderSupport(2) +{ +} + +NS_IMETHODIMP nsUnicodeToGB2312V2::ConvertNoBuff(const char16_t * aSrc, + int32_t * aSrcLength, + char * aDest, + int32_t * aDestLength) +{ + int32_t iSrcLength = 0; + int32_t iDestLength = 0; + nsresult res = NS_OK; + + while (iSrcLength < *aSrcLength) + { + //if unicode's hi byte has something, it is not ASCII, must be a GB + if(IS_ASCII(*aSrc)) + { + // this is an ASCII + *aDest = CAST_UNICHAR_TO_CHAR(*aSrc); + aDest++; // increment 1 byte + iDestLength +=1; + } else { + char byte1, byte2; + if(mUtil.UnicodeToGBKChar(*aSrc, false, &byte1, &byte2)) + { + if(iDestLength+2 > *aDestLength) + { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + aDest[0]=byte1; + aDest[1]=byte2; + aDest += 2; // increment 2 bytes + iDestLength +=2; // each GB char count as two in char* string + } else { + // cannot convert + res= NS_ERROR_UENC_NOMAPPING; + iSrcLength++; // include length of the unmapped character + break; + } + } + iSrcLength++ ; // each unicode char just count as one in char16_t* string + aSrc++; + if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength )) + { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + } + *aDestLength = iDestLength; + *aSrcLength = iSrcLength; + return res; +} diff --git a/intl/uconv/ucvcn/nsUnicodeToGB2312V2.h b/intl/uconv/ucvcn/nsUnicodeToGB2312V2.h new file mode 100644 index 000000000000..4e6e0bd57d03 --- /dev/null +++ b/intl/uconv/ucvcn/nsUnicodeToGB2312V2.h @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnicodeToGB2312V2_h___ +#define nsUnicodeToGB2312V2_h___ + +#include "nsUCSupport.h" +#include "nsGBKConvUtil.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeToGB2312V2 [declaration] + +/** + * A character set converter from Unicode to GB2312. + * + * @created 06/Apr/1999 + * @author Catalin Rotaru [CATA] + */ +class nsUnicodeToGB2312V2 : public nsEncoderSupport +{ +public: + + /** + * Class constructor. + */ + nsUnicodeToGB2312V2(); + +protected: + + NS_IMETHOD ConvertNoBuff(const char16_t * aSrc, + int32_t * aSrcLength, + char * aDest, + int32_t * aDestLength); + + //-------------------------------------------------------------------- + // Subclassing of nsEncoderSupport class [declaration] + + NS_IMETHOD ConvertNoBuffNoErr(const char16_t * aSrc, int32_t * aSrcLength, + char * aDest, int32_t * aDestLength) + { + return NS_OK; + } // just make it not abstract; + +protected: + nsGBKConvUtil mUtil; +}; + +#endif /* nsUnicodeToGB2312V2_h___ */ diff --git a/intl/uconv/ucvja/nsJapaneseToUnicode.cpp b/intl/uconv/ucvja/nsJapaneseToUnicode.cpp index 6e7febd8a758..f8b4714da18c 100644 --- a/intl/uconv/ucvja/nsJapaneseToUnicode.cpp +++ b/intl/uconv/ucvja/nsJapaneseToUnicode.cpp @@ -764,7 +764,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert( if (!mGB2312Decoder) { // creating a delegate converter (GB2312) mGB2312Decoder = - EncodingUtils::DecoderForEncoding("gb18030"); + EncodingUtils::DecoderForEncoding(NS_LITERAL_CSTRING("GB2312")); } if (!mGB2312Decoder) {// failed creating a delegate converter goto error2; diff --git a/toolkit/locales/en-US/chrome/global/charsetMenu.properties b/toolkit/locales/en-US/chrome/global/charsetMenu.properties index 101113371332..5ed954f760d3 100644 --- a/toolkit/locales/en-US/chrome/global/charsetMenu.properties +++ b/toolkit/locales/en-US/chrome/global/charsetMenu.properties @@ -64,8 +64,15 @@ ISO-8859-2.key = l ISO-8859-2 = Central European (ISO) # Chinese, Simplified +# LOCALIZATION NOTE (gbk.bis.key): +# gbk.bis.key never appears together with gbk.key and, hence, can be the same. gbk.bis.key = S gbk.bis = Chinese, Simplified +# The strings gbk.key, gbk and gb18030 are no longer used but are retained +# in order to enable backout after string freeze, just in case. +gbk.key = S +gbk = Chinese, Simplified (GBK) +gb18030 = Chinese, Simplified (GB18030) # Chinese, Traditional Big5.key = T