зеркало из https://github.com/mozilla/gecko-dev.git
416 строки
12 KiB
C++
416 строки
12 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications Corporation.
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Pierre Phaneuf <pp@ludusdesign.com>
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
|
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
|
|
#include "prmem.h"
|
|
#include "prprf.h"
|
|
#include "nsIServiceManager.h"
|
|
#include "nsIComponentManager.h"
|
|
#include "nsICharsetConverterManager.h"
|
|
#include "nsSaveAsCharset.h"
|
|
#include "nsCRT.h"
|
|
#include "nsUnicharUtils.h"
|
|
#include "nsCompressedCharMap.h"
|
|
|
|
//
|
|
// nsISupports methods
|
|
//
|
|
NS_IMPL_ISUPPORTS1(nsSaveAsCharset, nsISaveAsCharset)
|
|
|
|
#include "ignorables_abjadpoints.x-ccmap"
|
|
DEFINE_X_CCMAP(gIgnorableCCMapExt, const);
|
|
|
|
//
|
|
// nsSaveAsCharset
|
|
//
|
|
nsSaveAsCharset::nsSaveAsCharset()
|
|
{
|
|
mAttribute = attr_htmlTextDefault;
|
|
mEntityVersion = 0;
|
|
mCharsetListIndex = -1;
|
|
}
|
|
|
|
nsSaveAsCharset::~nsSaveAsCharset()
|
|
{
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsSaveAsCharset::Init(const char *charset, PRUint32 attr, PRUint32 entityVersion)
|
|
{
|
|
nsresult rv = NS_OK;
|
|
|
|
mAttribute = attr;
|
|
mEntityVersion = entityVersion;
|
|
|
|
rv = SetupCharsetList(charset);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
// set up unicode encoder
|
|
rv = SetupUnicodeEncoder(GetNextCharset());
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
// set up entity converter
|
|
if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
|
|
mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsSaveAsCharset::Convert(const PRUnichar *inString, char **_retval)
|
|
{
|
|
if (nsnull == _retval)
|
|
return NS_ERROR_NULL_POINTER;
|
|
if (nsnull == inString)
|
|
return NS_ERROR_NULL_POINTER;
|
|
if (0 == *inString)
|
|
return NS_ERROR_ILLEGAL_VALUE;
|
|
nsresult rv = NS_OK;
|
|
|
|
NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
|
|
NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
|
|
|
|
*_retval = nsnull;
|
|
|
|
// make sure to start from the first charset in the list
|
|
if (mCharsetListIndex > 0) {
|
|
mCharsetListIndex = -1;
|
|
rv = SetupUnicodeEncoder(GetNextCharset());
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
}
|
|
|
|
do {
|
|
// fallback to the next charset in the list if the last conversion failed by an unmapped character
|
|
if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
|
|
const char * charset = GetNextCharset();
|
|
if (!charset)
|
|
break;
|
|
rv = SetupUnicodeEncoder(charset);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
PR_FREEIF(*_retval);
|
|
}
|
|
|
|
if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
|
|
NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
|
|
NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
|
|
PRUnichar *entity = nsnull;
|
|
// do the entity conversion first
|
|
rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
|
|
if(NS_SUCCEEDED(rv)) {
|
|
rv = DoCharsetConversion(entity, _retval);
|
|
nsMemory::Free(entity);
|
|
}
|
|
}
|
|
else
|
|
rv = DoCharsetConversion(inString, _retval);
|
|
|
|
} while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsSaveAsCharset::GetCharset(char * *aCharset)
|
|
{
|
|
NS_ENSURE_ARG(aCharset);
|
|
NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
|
|
NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
|
|
|
|
const char *charset = mCharsetList[mCharsetListIndex]->get();
|
|
if (!charset) {
|
|
*aCharset = nsnull;
|
|
NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
|
|
return NS_ERROR_FAILURE;
|
|
}
|
|
|
|
*aCharset = nsCRT::strdup(charset);
|
|
return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// do the fallback, reallocate the buffer if necessary
|
|
// need to pass destination buffer info (size, current position and estimation of rest of the conversion)
|
|
NS_IMETHODIMP
|
|
nsSaveAsCharset::HandleFallBack(PRUint32 character, char **outString, PRInt32 *bufferLength,
|
|
PRInt32 *currentPos, PRInt32 estimatedLength)
|
|
{
|
|
if((nsnull == outString ) || (nsnull == bufferLength) ||(nsnull ==currentPos))
|
|
return NS_ERROR_NULL_POINTER;
|
|
char fallbackStr[256];
|
|
nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
|
|
if (NS_SUCCEEDED(rv)) {
|
|
PRInt32 tempLen = (PRInt32) PL_strlen(fallbackStr);
|
|
|
|
// reallocate if the buffer is not large enough
|
|
if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
|
|
char *temp = (char *) PR_Realloc(*outString, *bufferLength + tempLen);
|
|
if (NULL != temp) {
|
|
// adjust length/pointer after realloc
|
|
*bufferLength += tempLen;
|
|
*outString = temp;
|
|
} else {
|
|
*outString = NULL;
|
|
*bufferLength =0;
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
}
|
|
memcpy((*outString + *currentPos), fallbackStr, tempLen);
|
|
*currentPos += tempLen;
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsSaveAsCharset::DoCharsetConversion(const PRUnichar *inString, char **outString)
|
|
{
|
|
if(nsnull == outString )
|
|
return NS_ERROR_NULL_POINTER;
|
|
NS_ASSERTION(outString, "invalid input");
|
|
|
|
*outString = NULL;
|
|
|
|
nsresult rv;
|
|
PRInt32 inStringLength = nsCRT::strlen(inString); // original input string length
|
|
PRInt32 bufferLength; // allocated buffer length
|
|
PRInt32 srcLength = inStringLength;
|
|
PRInt32 dstLength;
|
|
char *dstPtr = NULL;
|
|
PRInt32 pos1, pos2;
|
|
nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
|
|
|
|
// estimate and allocate the target buffer (reserve extra memory for fallback)
|
|
rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
|
|
if (NS_FAILED(rv)) return rv;
|
|
|
|
bufferLength = dstLength + 512; // reserve 512 byte for fallback.
|
|
dstPtr = (char *) PR_Malloc(bufferLength);
|
|
if (NULL == dstPtr) return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
|
|
for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
|
|
// convert from unicode
|
|
dstLength = bufferLength - pos2;
|
|
rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
|
|
|
|
pos1 += srcLength ? srcLength : 1;
|
|
pos2 += dstLength;
|
|
dstPtr[pos2] = '\0';
|
|
|
|
// break: this is usually the case (no error) OR unrecoverable error
|
|
if (NS_ERROR_UENC_NOMAPPING != rv) break;
|
|
|
|
// remember this happened and reset the result
|
|
saveResult = rv;
|
|
rv = NS_OK;
|
|
|
|
// finish encoder, give it a chance to write extra data like escape sequences
|
|
dstLength = bufferLength - pos2;
|
|
rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
|
|
if (NS_SUCCEEDED(rv)) {
|
|
pos2 += dstLength;
|
|
dstPtr[pos2] = '\0';
|
|
}
|
|
|
|
srcLength = inStringLength - pos1;
|
|
|
|
// do the fallback
|
|
if (!ATTR_NO_FALLBACK(mAttribute)) {
|
|
PRUint32 unMappedChar;
|
|
if (IS_HIGH_SURROGATE(inString[pos1-1]) &&
|
|
inStringLength > pos1 && IS_LOW_SURROGATE(inString[pos1])) {
|
|
unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
|
|
pos1++;
|
|
} else {
|
|
unMappedChar = inString[pos1-1];
|
|
}
|
|
|
|
// if we're asked to ignore default ignorable code points, skip them.
|
|
if (MASK_IGNORABLE_FALLBACK(mAttribute) &&
|
|
CCMAP_HAS_CHAR_EXT(gIgnorableCCMapExt, unMappedChar))
|
|
continue;
|
|
|
|
rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
|
|
if (NS_FAILED(rv))
|
|
break;
|
|
|
|
rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
|
|
if (NS_FAILED(rv))
|
|
break;
|
|
dstPtr[pos2] = '\0';
|
|
}
|
|
}
|
|
|
|
if (NS_SUCCEEDED(rv)) {
|
|
// finish encoder, give it a chance to write extra data like escape sequences
|
|
dstLength = bufferLength - pos2;
|
|
rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
|
|
if (NS_SUCCEEDED(rv)) {
|
|
pos2 += dstLength;
|
|
dstPtr[pos2] = '\0';
|
|
}
|
|
}
|
|
|
|
if (NS_FAILED(rv)) {
|
|
PR_FREEIF(dstPtr);
|
|
return rv;
|
|
}
|
|
|
|
*outString = dstPtr; // set the result string
|
|
|
|
// set error code so that the caller can do own fall back
|
|
if (NS_ERROR_UENC_NOMAPPING == saveResult) {
|
|
rv = NS_ERROR_UENC_NOMAPPING;
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsSaveAsCharset::DoConversionFallBack(PRUint32 inUCS4, char *outString, PRInt32 bufferLength)
|
|
{
|
|
NS_ASSERTION(outString, "invalid input");
|
|
if(nsnull == outString )
|
|
return NS_ERROR_NULL_POINTER;
|
|
|
|
*outString = '\0';
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
if (ATTR_NO_FALLBACK(mAttribute)) {
|
|
return NS_OK;
|
|
}
|
|
if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute) &&
|
|
!(inUCS4 & 0xff0000) ) {
|
|
char *entity = NULL;
|
|
rv = mEntityConverter->ConvertToEntity((PRUnichar)inUCS4, mEntityVersion, &entity);
|
|
if (NS_SUCCEEDED(rv)) {
|
|
if (NULL == entity || (PRInt32)strlen(entity) > bufferLength) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
PL_strcpy(outString, entity);
|
|
nsMemory::Free(entity);
|
|
return rv;
|
|
}
|
|
}
|
|
|
|
switch (MASK_FALLBACK(mAttribute)) {
|
|
case attr_FallbackQuestionMark:
|
|
if(bufferLength>=2) {
|
|
*outString++='?';
|
|
*outString='\0';
|
|
rv = NS_OK;
|
|
} else {
|
|
rv = NS_ERROR_FAILURE;
|
|
}
|
|
break;
|
|
case attr_FallbackEscapeU:
|
|
if (inUCS4 & 0xff0000)
|
|
rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
|
|
else
|
|
rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
|
|
break;
|
|
case attr_FallbackDecimalNCR:
|
|
rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
|
|
break;
|
|
case attr_FallbackHexNCR:
|
|
rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
|
|
break;
|
|
case attr_FallbackNone:
|
|
rv = NS_OK;
|
|
break;
|
|
default:
|
|
rv = NS_ERROR_ILLEGAL_VALUE;
|
|
break;
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
|
|
{
|
|
NS_ENSURE_ARG(charset);
|
|
nsresult rv;
|
|
|
|
// set up unicode encoder
|
|
nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
|
|
}
|
|
|
|
nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
|
|
{
|
|
NS_ENSURE_ARG(charsetList);
|
|
|
|
NS_ASSERTION(charsetList[0], "charsetList should not be empty");
|
|
if (!charsetList[0])
|
|
return NS_ERROR_INVALID_ARG;
|
|
|
|
if (mCharsetListIndex >= 0) {
|
|
mCharsetList.Clear();
|
|
mCharsetListIndex = -1;
|
|
}
|
|
|
|
mCharsetList.ParseString(charsetList, ", ");
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
const char * nsSaveAsCharset::GetNextCharset()
|
|
{
|
|
if ((mCharsetListIndex + 1) >= mCharsetList.Count())
|
|
return nsnull;
|
|
|
|
// bump the index and return the next charset
|
|
return mCharsetList[++mCharsetListIndex]->get();
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
nsresult
|
|
NS_NewSaveAsCharset(nsISupports **inst)
|
|
{
|
|
if(nsnull == inst )
|
|
return NS_ERROR_NULL_POINTER;
|
|
*inst = (nsISupports *) new nsSaveAsCharset;
|
|
if(*inst)
|
|
NS_ADDREF(*inst);
|
|
return (*inst) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
|
|
}
|