b=184120 Add non-BMP char. support to UTF-32 converters.

r=smontague, sr=dbaron, a=asa
This commit is contained in:
jshin%mailaps.org 2003-01-31 23:26:20 +00:00
Родитель e74f70b957
Коммит 13b3fd479c
15 изменённых файлов: 781 добавлений и 86 удалений

Просмотреть файл

@ -1859,14 +1859,7 @@
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4LE.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4BE.cpp</PATH>
<PATH>nsUnicodeToUTF32.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
@ -1901,14 +1894,7 @@
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4LEToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4BEToUnicode.cpp</PATH>
<PATH>nsUTF32ToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
@ -3175,12 +3161,7 @@
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4LE.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4BE.cpp</PATH>
<PATH>nsUnicodeToUTF32.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
@ -3205,12 +3186,7 @@
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4LEToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4BEToUnicode.cpp</PATH>
<PATH>nsUTF32ToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
@ -5491,14 +5467,7 @@
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4LE.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4BE.cpp</PATH>
<PATH>nsUnicodeToUTF32.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
@ -5533,14 +5502,7 @@
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4LEToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
</FILE>
<FILE>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4BEToUnicode.cpp</PATH>
<PATH>nsUTF32ToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
<FILEKIND>Text</FILEKIND>
<FILEFLAGS>Debug</FILEFLAGS>
@ -6807,12 +6769,7 @@
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4LE.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4BE.cpp</PATH>
<PATH>nsUnicodeToUTF32.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
@ -6837,12 +6794,7 @@
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4LEToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4BEToUnicode.cpp</PATH>
<PATH>nsUTF32ToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
@ -8374,13 +8326,7 @@
<FILEREF>
<TARGETNAME>uconv.shlb</TARGETNAME>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4LEToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
<TARGETNAME>uconv.shlb</TARGETNAME>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUCS4BEToUnicode.cpp</PATH>
<PATH>nsUTF32ToUnicode.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
</GROUP>
@ -8412,13 +8358,7 @@
<FILEREF>
<TARGETNAME>uconv.shlb</TARGETNAME>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4LE.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
<FILEREF>
<TARGETNAME>uconv.shlb</TARGETNAME>
<PATHTYPE>Name</PATHTYPE>
<PATH>nsUnicodeToUCS4BE.cpp</PATH>
<PATH>nsUnicodeToUTF32.cpp</PATH>
<PATHFORMAT>MacOS</PATHFORMAT>
</FILEREF>
</GROUP>

Просмотреть файл

@ -126,8 +126,7 @@
#include "nsVPSToUnicode.h"
#include "nsUTF7ToUnicode.h"
#include "nsMUTF7ToUnicode.h"
#include "nsUCS4BEToUnicode.h"
#include "nsUCS4LEToUnicode.h"
#include "nsUTF32ToUnicode.h"
#include "nsUCS2BEToUnicode.h"
#include "nsUCS2LEToUnicode.h"
#include "nsT61ToUnicode.h"
@ -180,8 +179,7 @@
#include "nsUnicodeToMUTF7.h"
#include "nsUnicodeToUCS2BE.h"
#include "nsUnicodeToUCS2LE.h"
#include "nsUnicodeToUCS4BE.h"
#include "nsUnicodeToUCS4LE.h"
#include "nsUnicodeToUTF32.h"
#include "nsUnicodeToT61.h"
#include "nsUnicodeToUserDefined.h"
#include "nsUnicodeToSymbol.h"
@ -437,8 +435,8 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF7ToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsMUTF7ToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16BEToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16LEToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUCS4BEToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUCS4LEToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32BEToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32LEToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUEscape);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUEscapeToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF7);
@ -446,8 +444,8 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToMUTF7);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16BE);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16LE);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUCS4BE);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUCS4LE);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32BE);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32LE);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToLangBoxArabic8);
// ucvibm
@ -958,12 +956,12 @@ static const nsModuleComponentInfo components[] =
{
DECODER_NAME_BASE "UTF-32BE" , NS_UTF32BETOUNICODE_CID,
NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32BE",
nsUCS4BEToUnicodeConstructor ,
nsUTF32BEToUnicodeConstructor ,
},
{
DECODER_NAME_BASE "UTF-32LE" , NS_UTF32LETOUNICODE_CID,
NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32LE",
nsUCS4LEToUnicodeConstructor ,
nsUTF32LEToUnicodeConstructor ,
},
{
DECODER_NAME_BASE "T.61-8bit" , NS_T61TOUNICODE_CID,
@ -1258,12 +1256,12 @@ static const nsModuleComponentInfo components[] =
{
ENCODER_NAME_BASE "UTF-32BE" , NS_UNICODETOUTF32BE_CID,
NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32BE",
nsUnicodeToUCS4BEConstructor,
nsUnicodeToUTF32BEConstructor,
},
{
ENCODER_NAME_BASE "UTF-32LE" , NS_UNICODETOUTF32LE_CID,
NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32LE",
nsUnicodeToUCS4LEConstructor,
nsUnicodeToUTF32LEConstructor,
},
{
ENCODER_NAME_BASE "T.61-8bit" , NS_UNICODETOT61_CID,

Просмотреть файл

@ -93,8 +93,7 @@ CPPSRCS = \
nsMUTF7ToUnicode.cpp \
nsUCS2BEToUnicode.cpp \
nsUCS2LEToUnicode.cpp \
nsUCS4BEToUnicode.cpp \
nsUCS4LEToUnicode.cpp \
nsUTF32ToUnicode.cpp \
nsT61ToUnicode.cpp \
nsUserDefinedToUnicode.cpp \
nsUnicodeToUEscape.cpp \
@ -155,8 +154,7 @@ CPPSRCS = \
nsUnicodeToMUTF7.cpp \
nsUnicodeToUCS2BE.cpp \
nsUnicodeToUCS2LE.cpp \
nsUnicodeToUCS4BE.cpp \
nsUnicodeToUCS4LE.cpp \
nsUnicodeToUTF32.cpp \
nsUnicodeToT61.cpp \
nsUnicodeToUserDefined.cpp \
nsUnicodeToSymbol.cpp \

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,237 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Netscape Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): Jungshik Shin <jshin@mailaps.org>
*
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the NPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsUCSupport.h"
#include "nsUTF32ToUnicode.h"
#include <string.h>
//----------------------------------------------------------------------
// static functions and macro definition common to nsUTF32(BE|LE)ToUnicode
#ifdef IS_BIG_ENDIAN
#define LE_STRING_TO_UCS4(s) \
(PRUint8(*(s)) | (PRUint8(*((s) + 1)) << 8) | \
(PRUint8(*((s) + 2)) << 16) | (PRUint8(*((s) + 3)) << 24))
#else
#define LE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
#endif
#ifdef IS_BIG_ENDIAN
#define BE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
#else
#define BE_STRING_TO_UCS4(s) \
(PRUint8(*((s) + 3)) | (PRUint8(*((s) + 2)) << 8) | \
(PRUint8(*((s) + 1)) << 16) | (PRUint8(*(s)) << 24))
#endif
static nsresult ConvertCommon(const char * aSrc,
PRInt32 * aSrcLength,
PRUnichar * aDest,
PRInt32 * aDestLength,
PRUint16 * aState,
PRUint8 * aBuffer,
PRBool aIsLE)
{
NS_ENSURE_TRUE(*aState < 4, NS_ERROR_INVALID_ARG);
NS_ENSURE_TRUE(*aDestLength > 0, NS_ERROR_INVALID_ARG);
const char *src = aSrc;
const char *srcEnd = aSrc + *aSrcLength;
PRUnichar *dest = aDest;
PRUnichar *destEnd = aDest + *aDestLength;
if (*aState > *aSrcLength)
{
memcpy(aBuffer + 4 - *aState, src, *aSrcLength);
*aDestLength = 0;
*aState -= *aSrcLength;
return NS_OK_UDEC_MOREINPUT;
}
PRUint32 ucs4;
// prev. run left a partial UTF-32 seq.
if (*aState > 0)
{
memcpy(aBuffer + 4 - *aState, src, *aState);
ucs4 = aIsLE ? LE_STRING_TO_UCS4(aBuffer) : BE_STRING_TO_UCS4(aBuffer);
if (ucs4 < 0x10000L) // BMP
{
// XXX Do we have to convert surrogate code points to the replacement
// character (0xfffd)?
*dest++= PRUnichar(ucs4);
}
else if (ucs4 < 0x110000L) // plane 1 through plane 16
{
if (destEnd - dest < 2)
{
*aSrcLength = 0;
*aDestLength = 0;
return NS_OK_UDEC_MOREOUTPUT;
}
// ((ucs4 - 0x10000) >> 10) + 0xd800;
*dest++= PRUnichar((ucs4 >> 10) + 0xd7c0); // high surrogate
*dest++= PRUnichar(ucs4 & 0x3ffL | 0xdc00); // low surrogate
}
// Codepoints in plane 17 and higher (> 0x10ffff)
// are not representable in UTF-16 we use for the internal
// character representation. This is not a problem
// because Unicode/ISO 10646 will never assign characters
// in plane 17 and higher. Therefore, we convert them
// to Unicode replacement character (0xfffd).
else
*dest++ = 0xfffd;
src += *aState;
*aState = 0;
}
nsresult rv = NS_OK; // conversion result
for ( ; src < srcEnd && dest < destEnd; src += 4)
{
if (srcEnd - src < 4)
{
// fill up aBuffer until src buffer gets exhausted.
memcpy(aBuffer, src, srcEnd - src);
*aState = 4 - (srcEnd - src); // set add. char to read in next run
src = srcEnd;
rv = NS_OK_UDEC_MOREINPUT;
break;
}
ucs4 = aIsLE ? LE_STRING_TO_UCS4(src) : BE_STRING_TO_UCS4(src);
if (ucs4 < 0x10000L) // BMP
{
// XXX Do we have to convert surrogate code points to the replacement
// character (0xfffd)?
*dest++= PRUnichar(ucs4);
}
else if (ucs4 < 0x110000L) // plane 1 through plane 16
{
if (destEnd - dest < 2)
break;
// ((ucs4 - 0x10000) >> 10) + 0xd800;
*dest++= PRUnichar((ucs4 >> 10) + 0xd7c0);
*dest++= PRUnichar(ucs4 & 0x3ffL | 0xdc00);
}
else // plane 17 and higher
*dest++ = 0xfffd;
}
//output not finished, output buffer too short
if((NS_OK == rv) && (src < srcEnd) && (dest >= destEnd))
rv = NS_OK_UDEC_MOREOUTPUT;
*aSrcLength = src - aSrc;
*aDestLength = dest - aDest;
return rv;
}
//----------------------------------------------------------------------
// Class nsUTF32ToUnicode [implementation]
nsUTF32ToUnicode::nsUTF32ToUnicode() : nsBasicDecoderSupport()
{
Reset();
}
//----------------------------------------------------------------------
// Subclassing of nsDecoderSupport class [implementation]
NS_IMETHODIMP nsUTF32ToUnicode::GetMaxLength(const char * aSrc,
PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
// Non-BMP characters take two PRUnichars(a pair of surrogate codepoints)
// so that we have to divide by 2 instead of 4 for the worst case.
*aDestLength = aSrcLength / 2;
return NS_OK;
}
//----------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [implementation]
NS_IMETHODIMP nsUTF32ToUnicode::Reset()
{
// the number of additional bytes to read to complete UTF-32 4byte seq.
mState = 0;
memset(mBufferInc, 0, 4);
return NS_OK;
}
//----------------------------------------------------------------------
// Class nsUTF32BEToUnicode [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUTF32ToUnicode class [implementation]
NS_IMETHODIMP nsUTF32BEToUnicode::Convert(const char * aSrc,
PRInt32 * aSrcLength,
PRUnichar * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState,
mBufferInc, PR_FALSE);
}
//----------------------------------------------------------------------
// Class nsUTF32LEToUnicode [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUTF32ToUnicode class [implementation]
NS_IMETHODIMP nsUTF32LEToUnicode::Convert(const char * aSrc,
PRInt32 * aSrcLength,
PRUnichar * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState,
mBufferInc, PR_TRUE);
}
// XXX : What to do with 'unflushed' mBufferInc?? : Finish()

Просмотреть файл

@ -0,0 +1,129 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Netscape Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): Jungshik Shin <jshin@mailaps.org>
*
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the NPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsUTF32ToUnicode_h___
#define nsUTF32ToUnicode_h___
//----------------------------------------------------------------------
// Class nsUTF32ToUnicode [declaration]
/**
* A character set converter from UTF32 to Unicode.
* The base class for UTF32BE/UTF32LE to Unicode converters.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUTF32ToUnicode : public nsBasicDecoderSupport
{
public:
/**
* Class constructor.
*/
nsUTF32ToUnicode();
protected:
// the number of additional bytes to read to complete an incomplete UTF-32 4byte seq.
PRUint16 mState;
// buffer for an incomplete UTF-32 sequence.
PRUint8 mBufferInc[4];
//--------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [declaration]
NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength);
NS_IMETHOD Reset();
};
//----------------------------------------------------------------------
// Class nsUTF32BEToUnicode [declaration]
/**
* A character set converter from UTF32BE to Unicode.
* A subclass of UTF32ToUnicode.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUTF32BEToUnicode : public nsUTF32ToUnicode
{
public:
//--------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [declaration]
NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength);
};
//----------------------------------------------------------------------
// Class nsUTF32LEToUnicode [declaration]
/**
* A character set converter from UTF32LE to Unicode.
* A subclass of UTF32ToUnicode.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUTF32LEToUnicode : public nsUTF32ToUnicode
{
public:
//--------------------------------------------------------------------
// Subclassing of nsBasicDecoderSupport class [declaration]
NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength);
};
#endif /* nsUTF32ToUnicode_h___ */

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,262 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Netscape Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): Jungshik Shin <jshin@mailaps.org>
*
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the NPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include <string.h>
#include "nsUCSupport.h"
#include "nsUnicodeToUTF32.h"
#ifdef IS_BIG_ENDIAN
#define UCS4_TO_LE_STRING(u, s) \
PR_BEGIN_MACRO \
s[3] = PRUint8(((u) >> 24) & 0xffL); \
s[2] = PRUint8(((u) >> 16) & 0xffL); \
s[1] = PRUint8(((u) >> 8) & 0xffL); \
s[0] = PRUint8((u) & 0xffL); \
PR_END_MACRO
#else
#define UCS4_TO_LE_STRING(u, s) \
PR_BEGIN_MACRO \
*((PRUint32*)(s)) = (u); \
PR_END_MACRO
#endif
#ifdef IS_BIG_ENDIAN
#define UCS4_TO_BE_STRING(u, s) \
PR_BEGIN_MACRO \
*((PRUint32*)(s)) = (u); \
PR_END_MACRO
#else
#define UCS4_TO_BE_STRING(u, s) \
PR_BEGIN_MACRO \
s[0] = PRUint8(((u) >> 24) & 0xffL); \
s[1] = PRUint8(((u) >> 16) & 0xffL); \
s[2] = PRUint8(((u) >> 8) & 0xffL); \
s[3] = PRUint8((u) & 0xffL); \
PR_END_MACRO
#endif
//----------------------------------------------------------------------
// Static functions common to nsUnicodeToUTF32LE and nsUnicodeToUTF32BE
static nsresult ConvertCommon(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength,
PRUnichar * aHighSurrogate,
PRBool aIsLE)
{
const PRUnichar * src = aSrc;
const PRUnichar * srcEnd = aSrc + *aSrcLength;
char * dest = aDest;
const char * destEnd = aDest + *aDestLength;
PRUint32 ucs4;
// left-over high surroage code point from the prev. run.
if (*aHighSurrogate)
{
if (! *aSrcLength)
{
*aDestLength = 0;
return NS_OK_UENC_MOREINPUT;
}
if (*aDestLength < 4)
{
*aSrcLength = 0;
*aDestLength = 0;
return NS_OK_UENC_MOREOUTPUT;
}
if ((*src & 0xfc00) != 0xdc00) // Not a low surrogate codepoint. Unpaird.
ucs4 = PRUint32(*aHighSurrogate);
else
ucs4 = (((*aHighSurrogate & 0x3ffL) << 10) | (*src & 0x3ffL)) + 0x10000;
++src;
if (aIsLE)
UCS4_TO_LE_STRING(ucs4, dest);
else
UCS4_TO_BE_STRING(ucs4, dest);
dest += 4;
*aHighSurrogate = 0;
}
while (src < srcEnd) {
// regular codepoint or an unpaired low surrogate
if ((src[0] & 0xfc00) != 0xd800)
{
if (destEnd - dest < 4)
goto error_more_output;
ucs4 = PRUint32(src[0]);
}
else // high surrogate
{
if ((src+1) >= srcEnd) {
//we need another surrogate to complete this unicode char
*aHighSurrogate = src[0];
*aDestLength = dest - aDest;
return NS_OK_UENC_MOREINPUT;
}
//handle surrogate
if (destEnd - dest < 4)
goto error_more_output;
if ((src[1] & 0xfc00) != 0xdc00) // unpaired
ucs4 = PRUint32(src[0]);
else
{ // convert surrogate pair to UCS4
ucs4 = (((src[0] & 0x3ffL) << 10) | (src[1] & 0x3ffL)) + 0x10000;
*aHighSurrogate = 0;
++src;
}
}
if (aIsLE)
UCS4_TO_LE_STRING(ucs4, dest);
else
UCS4_TO_BE_STRING(ucs4, dest);
dest += 4;
++src;
}
*aDestLength = dest - aDest;
return NS_OK;
error_more_output:
*aSrcLength = src - aSrc;
*aDestLength = dest - aDest;
return NS_OK_UENC_MOREOUTPUT;
}
static nsresult FinishCommon(char * aDest,
PRInt32 * aDestLength,
PRUnichar * aHighSurrogate,
PRBool aIsLE)
{
char * dest = aDest;
if (*aHighSurrogate) {
if (*aDestLength < 4) {
*aDestLength = 0;
return NS_OK_UENC_MOREOUTPUT;
}
PRUint32 high = PRUint32(*aHighSurrogate);
if (aIsLE)
UCS4_TO_LE_STRING(high, dest);
else
UCS4_TO_BE_STRING(high, dest);
*aHighSurrogate = 0;
*aDestLength = 4;
return NS_OK;
}
*aDestLength = 0;
return NS_OK;
}
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32 [implementation]
NS_IMPL_ISUPPORTS1(nsUnicodeToUTF32, nsIUnicodeEncoder);
//----------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [implementation]
NS_IMETHODIMP nsUnicodeToUTF32::GetMaxLength(const PRUnichar * aSrc,
PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
*aDestLength = aSrcLength * 4;
return NS_OK;
}
NS_IMETHODIMP nsUnicodeToUTF32::FillInfo(PRUint32 *aInfo)
{
memset(aInfo, 0xFF, (0x10000L >> 3));
return NS_OK;
}
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32BE [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUnicodeToUTF32 class [implementation]
NS_IMETHODIMP nsUnicodeToUTF32BE::Convert(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength,
&mHighSurrogate, PR_FALSE);
}
NS_IMETHODIMP nsUnicodeToUTF32BE::Finish(char * aDest,
PRInt32 * aDestLength)
{
return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_FALSE);
}
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32LE [implementation]
//----------------------------------------------------------------------
// Subclassing of nsUnicodeToUTF32 class [implementation]
NS_IMETHODIMP nsUnicodeToUTF32LE::Convert(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength)
{
return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength,
&mHighSurrogate, PR_TRUE);
}
NS_IMETHODIMP nsUnicodeToUTF32LE::Finish(char * aDest,
PRInt32 * aDestLength)
{
return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_TRUE);
}

Просмотреть файл

@ -0,0 +1,131 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
*/
/* ***** BEGIN LICENSE BLOCK *****
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Netscape Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): Jungshik Shin <jshin@mailaps.org>
*
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the NPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsUnicodeToUTF32_h___
#define nsUnicodeToUTF32_h___
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32 [declaration]
/**
* A character set converter from UTF32 to Unicode.
* The base class for UTF32BE/UTF32LE to Unicode converters.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUnicodeToUTF32 : public nsIUnicodeEncoder
{
NS_DECL_ISUPPORTS
public:
/**
* Class constructor.
*/
nsUnicodeToUTF32() {mHighSurrogate = 0;};
virtual ~nsUnicodeToUTF32() {};
protected:
PRUnichar mHighSurrogate;
NS_IMETHOD GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength);
//--------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [declaration]
NS_IMETHOD Reset() {mHighSurrogate = 0; return NS_OK;};
NS_IMETHOD FillInfo(PRUint32* aInfo);
NS_IMETHOD SetOutputErrorBehavior(PRInt32 aBehavior,
nsIUnicharEncoder * aEncoder,
PRUnichar aChar)
{return NS_OK;};
};
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32BE [declaration]
/**
* A character set converter from Unicode to UTF32BE.
* A subclass of UnicodeToUTF32.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUnicodeToUTF32BE : public nsUnicodeToUTF32
{
public:
//--------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [declaration]
NS_IMETHOD Convert(const PRUnichar * aSrc, PRInt32 * aSrcLength,
char * aDest, PRInt32 * aDestLength);
NS_IMETHOD Finish(char * aDest, PRInt32 * aDestLength);
};
//----------------------------------------------------------------------
// Class nsUnicodeToUTF32LE [declaration]
/**
* A character set converter from Unicode to UTF32LE.
* A subclass of UnicodeToUTF32.
* @created 08/Dec/2002
* @author Jungshik Shin
*/
class nsUnicodeToUTF32LE : public nsUnicodeToUTF32
{
public:
//--------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [declaration]
NS_IMETHOD Convert(const PRUnichar * aSrc, PRInt32 * aSrcLength,
char * aDest, PRInt32 * aDestLength);
NS_IMETHOD Finish(char * aDest, PRInt32 * aDestLength);
};
#endif /* nsUnicodeToUTF32_h___ */