2001-09-26 04:40:45 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2012-05-21 15:12:37 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2001-02-07 02:54:12 +03:00
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Global functions and data [declaration]
|
|
|
|
#include "nsUnicodeToUTF8.h"
|
2015-06-17 14:21:39 +03:00
|
|
|
#include "mozilla/CheckedInt.h"
|
2001-02-07 02:54:12 +03:00
|
|
|
|
2014-04-27 11:06:00 +04:00
|
|
|
NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
|
2001-02-07 02:54:12 +03:00
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
2002-01-15 04:04:24 +03:00
|
|
|
// nsUnicodeToUTF8 class [implementation]
|
2001-02-07 02:54:12 +03:00
|
|
|
|
2015-06-17 14:21:39 +03:00
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t* aSrc,
|
|
|
|
int32_t aSrcLength,
|
|
|
|
int32_t* aDestLength)
|
2001-02-07 02:54:12 +03:00
|
|
|
{
|
2015-06-17 14:21:39 +03:00
|
|
|
MOZ_ASSERT(aDestLength);
|
|
|
|
|
2002-01-15 04:04:24 +03:00
|
|
|
// aSrc is interpreted as UTF16, 3 is normally enough.
|
2015-06-17 14:21:39 +03:00
|
|
|
// But when previous buffer only contains part of the surrogate pair, we
|
2002-01-15 04:04:24 +03:00
|
|
|
// need to complete it here. If the first word in following buffer is not
|
2011-09-07 04:20:35 +04:00
|
|
|
// in valid surrogate range, we need to convert the remaining of last buffer
|
2002-01-15 04:04:24 +03:00
|
|
|
// to 3 bytes.
|
2015-06-17 14:21:39 +03:00
|
|
|
mozilla::CheckedInt32 length = aSrcLength;
|
|
|
|
length *= 3;
|
|
|
|
length += 3;
|
|
|
|
|
|
|
|
if (!length.isValid()) {
|
|
|
|
return NS_ERROR_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
*aDestLength = length.value();
|
2001-02-07 02:54:12 +03:00
|
|
|
return NS_OK;
|
|
|
|
}
|
2002-01-15 04:04:24 +03:00
|
|
|
|
2015-06-17 14:21:39 +03:00
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc,
|
|
|
|
int32_t * aSrcLength,
|
|
|
|
char * aDest,
|
|
|
|
int32_t * aDestLength)
|
2002-01-15 04:04:24 +03:00
|
|
|
{
|
2014-01-04 19:02:17 +04:00
|
|
|
const char16_t * src = aSrc;
|
|
|
|
const char16_t * srcEnd = aSrc + *aSrcLength;
|
2002-01-15 04:04:24 +03:00
|
|
|
char * dest = aDest;
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t destLen = *aDestLength;
|
|
|
|
uint32_t n;
|
2002-01-15 04:04:24 +03:00
|
|
|
|
|
|
|
//complete remaining of last conversion
|
|
|
|
if (mHighSurrogate) {
|
|
|
|
if (src < srcEnd) {
|
|
|
|
*aDestLength = 0;
|
|
|
|
return NS_OK_UENC_MOREINPUT;
|
|
|
|
}
|
|
|
|
if (*aDestLength < 4) {
|
|
|
|
*aSrcLength = 0;
|
|
|
|
*aDestLength = 0;
|
|
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
|
|
}
|
2014-01-04 19:02:17 +04:00
|
|
|
if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
|
2012-11-27 05:38:19 +04:00
|
|
|
*dest++ = (char)0xef; //replacement character
|
|
|
|
*dest++ = (char)0xbf;
|
|
|
|
*dest++ = (char)0xbd;
|
2002-03-14 03:36:55 +03:00
|
|
|
destLen -= 3;
|
2002-01-15 04:04:24 +03:00
|
|
|
} else {
|
2014-01-04 19:02:17 +04:00
|
|
|
n = ((mHighSurrogate - (char16_t)0xd800) << 10) +
|
|
|
|
(*src - (char16_t)0xdc00) + 0x10000;
|
2002-01-15 04:04:24 +03:00
|
|
|
*dest++ = (char)0xf0 | (n >> 18);
|
|
|
|
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
|
|
|
|
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
|
|
|
|
*dest++ = (char)0x80 | (n & 0x3f);
|
|
|
|
++src;
|
2002-03-14 03:36:55 +03:00
|
|
|
destLen -= 4;
|
2002-01-15 04:04:24 +03:00
|
|
|
}
|
|
|
|
mHighSurrogate = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (src < srcEnd) {
|
2003-05-30 05:16:20 +04:00
|
|
|
if ( *src <= 0x007f) {
|
2002-01-15 04:04:24 +03:00
|
|
|
if (destLen < 1)
|
|
|
|
goto error_more_output;
|
|
|
|
*dest++ = (char)*src;
|
|
|
|
--destLen;
|
2003-05-30 05:16:20 +04:00
|
|
|
} else if (*src <= 0x07ff) {
|
2002-01-15 04:04:24 +03:00
|
|
|
if (destLen < 2)
|
|
|
|
goto error_more_output;
|
|
|
|
*dest++ = (char)0xc0 | (*src >> 6);
|
|
|
|
*dest++ = (char)0x80 | (*src & 0x003f);
|
2002-03-14 03:36:55 +03:00
|
|
|
destLen -= 2;
|
2014-01-04 19:02:17 +04:00
|
|
|
} else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
|
|
|
|
if (*src >= (char16_t)0xdc00) { //not a pair
|
2012-11-27 05:38:19 +04:00
|
|
|
if (destLen < 3)
|
|
|
|
goto error_more_output;
|
|
|
|
*dest++ = (char)0xef; //replacement character
|
|
|
|
*dest++ = (char)0xbf;
|
|
|
|
*dest++ = (char)0xbd;
|
|
|
|
destLen -= 3;
|
|
|
|
++src;
|
|
|
|
continue;
|
|
|
|
}
|
2002-01-15 04:04:24 +03:00
|
|
|
if ((src+1) >= srcEnd) {
|
|
|
|
//we need another surrogate to complete this unicode char
|
|
|
|
mHighSurrogate = *src;
|
|
|
|
*aDestLength = dest - aDest;
|
|
|
|
return NS_OK_UENC_MOREINPUT;
|
|
|
|
}
|
|
|
|
//handle surrogate
|
|
|
|
if (destLen < 4)
|
|
|
|
goto error_more_output;
|
2014-01-04 19:02:17 +04:00
|
|
|
if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
|
2012-11-27 05:38:19 +04:00
|
|
|
*dest++ = (char)0xef; //replacement character
|
|
|
|
*dest++ = (char)0xbf;
|
|
|
|
*dest++ = (char)0xbd;
|
2002-03-14 03:36:55 +03:00
|
|
|
destLen -= 3;
|
2002-01-15 04:04:24 +03:00
|
|
|
} else {
|
2014-01-04 19:02:17 +04:00
|
|
|
n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
|
2002-01-15 04:04:24 +03:00
|
|
|
*dest++ = (char)0xf0 | (n >> 18);
|
|
|
|
*dest++ = (char)0x80 | ((n >> 12) & 0x3f);
|
|
|
|
*dest++ = (char)0x80 | ((n >> 6) & 0x3f);
|
|
|
|
*dest++ = (char)0x80 | (n & 0x3f);
|
2002-03-14 03:36:55 +03:00
|
|
|
destLen -= 4;
|
2002-01-15 04:04:24 +03:00
|
|
|
++src;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (destLen < 3)
|
|
|
|
goto error_more_output;
|
|
|
|
//treat rest of the character as BMP
|
|
|
|
*dest++ = (char)0xe0 | (*src >> 12);
|
|
|
|
*dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
|
|
|
|
*dest++ = (char)0x80 | (*src & 0x003f);
|
2002-03-14 03:36:55 +03:00
|
|
|
destLen -= 3;
|
2002-01-15 04:04:24 +03:00
|
|
|
}
|
|
|
|
++src;
|
|
|
|
}
|
|
|
|
|
|
|
|
*aDestLength = dest - aDest;
|
|
|
|
return NS_OK;
|
|
|
|
|
|
|
|
error_more_output:
|
|
|
|
*aSrcLength = src - aSrc;
|
|
|
|
*aDestLength = dest - aDest;
|
|
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
|
|
}
|
|
|
|
|
2012-08-22 19:56:38 +04:00
|
|
|
NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
|
2002-01-15 04:04:24 +03:00
|
|
|
{
|
|
|
|
char * dest = aDest;
|
|
|
|
|
|
|
|
if (mHighSurrogate) {
|
|
|
|
if (*aDestLength < 3) {
|
|
|
|
*aDestLength = 0;
|
|
|
|
return NS_OK_UENC_MOREOUTPUT;
|
|
|
|
}
|
2012-11-27 05:38:19 +04:00
|
|
|
*dest++ = (char)0xef; //replacement character
|
|
|
|
*dest++ = (char)0xbf;
|
|
|
|
*dest++ = (char)0xbd;
|
2002-01-15 04:04:24 +03:00
|
|
|
mHighSurrogate = 0;
|
|
|
|
*aDestLength = 3;
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
*aDestLength = 0;
|
|
|
|
return NS_OK;
|
|
|
|
}
|