Bug 174351: Encoding errors aren't treated as fatal XML errors. r=smontagu, sr=peterv
This commit is contained in:
Родитель
1bc76957a3
Коммит
ea7e9e26a8
|
@ -78,6 +78,10 @@ public:
|
|||
PRInt32 aSrcLength,
|
||||
PRInt32 * aDestLength);
|
||||
NS_IMETHOD Reset();
|
||||
|
||||
virtual void SetInputErrorBehavior(PRInt32 aBehavior);
|
||||
|
||||
virtual PRUnichar GetCharacterForUnMapped();
|
||||
|
||||
// Encoder methods:
|
||||
|
||||
|
@ -209,6 +213,18 @@ IConvAdaptor::Reset()
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
IConvAdaptor::SetInputErrorBehavior(PRInt32 aBehavior)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
PRUnichar
|
||||
IConvAdaptor::GetCharacterForUnMapped()
|
||||
{
|
||||
return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
|
||||
}
|
||||
|
||||
// convert unicode data into some charset.
|
||||
nsresult
|
||||
IConvAdaptor::Convert(const PRUnichar * aSrc,
|
||||
|
|
|
@ -98,6 +98,9 @@ public:
|
|||
PRInt32 * aDestLength);
|
||||
NS_IMETHOD Reset();
|
||||
|
||||
virtual void SetInputErrorBehavior(PRInt32 aBehavior);
|
||||
virtual PRUnichar GetCharacterForUnMapped();
|
||||
|
||||
// Encoder methods:
|
||||
|
||||
NS_IMETHOD Convert(const PRUnichar * aSrc,
|
||||
|
@ -312,6 +315,17 @@ WinCEUConvAdapter::Reset()
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
void
|
||||
WinCEUConvAdapter::SetInputErrorBehavior(PRInt32 aBehavior)
|
||||
{
|
||||
}
|
||||
|
||||
PRUnichar
|
||||
WinCEUConvAdapter::GetCharacterForUnMapped()
|
||||
{
|
||||
return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
|
||||
}
|
||||
|
||||
// Encoder methods:
|
||||
|
||||
NS_IMETHODIMP
|
||||
|
|
|
@ -42,13 +42,13 @@
|
|||
#include "nsISupports.h"
|
||||
|
||||
// Interface ID for our Unicode Decoder interface
|
||||
// {B2F178E1-832A-11d2-8A8E-00600811A836}
|
||||
// {25359602-FC70-4d13-A9AB-8086D3827C0D}
|
||||
//NS_DECLARE_ID(kIUnicodeDecoderIID,
|
||||
// 0xb2f178e1, 0x832a, 0x11d2, 0x8a, 0x8e, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36);
|
||||
// 0x25359602, 0xfc70, 0x4d13, 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd);
|
||||
|
||||
#define NS_IUNICODEDECODER_IID \
|
||||
{ 0xb2f178e1, 0x832a, 0x11d2, \
|
||||
{ 0x8a, 0x8e, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36 }}
|
||||
{ 0x25359602, 0xfc70, 0x4d13, \
|
||||
{ 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd }}
|
||||
|
||||
// XXX deprecated
|
||||
/*---------- BEGIN DEPRECATED */
|
||||
|
@ -168,6 +168,20 @@ public:
|
|||
* different and urelated buffer of data.
|
||||
*/
|
||||
NS_IMETHOD Reset() = 0;
|
||||
|
||||
/**
|
||||
* Specify what to do when a character cannot be mapped into unicode
|
||||
*
|
||||
* @param aBehavior [IN] the desired behavior
|
||||
* @see kOnError_Recover
|
||||
* @see kOnError_Signal
|
||||
*/
|
||||
virtual void SetInputErrorBehavior(PRInt32 aBehavior) = 0;
|
||||
|
||||
/**
|
||||
* return the UNICODE character for unmapped character
|
||||
*/
|
||||
virtual PRUnichar GetCharacterForUnMapped() = 0;
|
||||
};
|
||||
|
||||
NS_DEFINE_STATIC_IID_ACCESSOR(nsIUnicodeDecoder, NS_IUNICODEDECODER_IID)
|
||||
|
|
|
@ -50,6 +50,7 @@ static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CI
|
|||
#define SJIS_INDEX mMapIndex[0]
|
||||
#define JIS0208_INDEX mMapIndex[1]
|
||||
#define JIS0212_INDEX gJIS0212Index
|
||||
#define SJIS_UNMAPPED 0x30fb
|
||||
|
||||
void nsJapaneseToUnicode::setMapMode()
|
||||
{
|
||||
|
@ -153,7 +154,9 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
|||
break;
|
||||
|
||||
default:
|
||||
*dest++ = 0x30FB;
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
*dest++ = SJIS_UNMAPPED;
|
||||
}
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
|
@ -178,11 +181,16 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
|||
{
|
||||
PRUint8 off = sbIdx[*src];
|
||||
if(0xFF == off) {
|
||||
*dest++ = 0x30FB;
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
*dest++ = SJIS_UNMAPPED;
|
||||
} else {
|
||||
PRUnichar ch = gJapaneseMap[mData+off];
|
||||
if(ch == 0xfffd)
|
||||
ch = 0x30fb;
|
||||
if(ch == 0xfffd) {
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
ch = SJIS_UNMAPPED;
|
||||
}
|
||||
*dest++ = ch;
|
||||
}
|
||||
mState = 0;
|
||||
|
@ -195,7 +203,10 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
|||
{
|
||||
PRUint8 off = sbIdx[*src];
|
||||
if(0xFF == off) {
|
||||
*dest++ = 0x30fb;
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
|
||||
*dest++ = SJIS_UNMAPPED;
|
||||
} else {
|
||||
*dest++ = mData + off;
|
||||
}
|
||||
|
@ -210,8 +221,12 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
|||
}
|
||||
*aDestLen = dest - aDest;
|
||||
return NS_OK;
|
||||
error_invalidchar:
|
||||
*aDestLen = dest - aDest;
|
||||
*aSrcLen = src - (const unsigned char*)aSrc;
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
error1:
|
||||
*aDestLen = dest-aDest;
|
||||
*aDestLen = dest - aDest;
|
||||
src++;
|
||||
if ((mState == 0) && (src == srcEnd)) {
|
||||
return NS_OK;
|
||||
|
@ -220,8 +235,11 @@ error1:
|
|||
return NS_OK_UDEC_MOREOUTPUT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
PRUnichar
|
||||
nsShiftJISToUnicode::GetCharacterForUnMapped()
|
||||
{
|
||||
return PRUnichar(SJIS_UNMAPPED);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
||||
const char * aSrc, PRInt32 * aSrcLen,
|
||||
|
@ -303,6 +321,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
mState = 3; // JIS0212
|
||||
} else {
|
||||
// others
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
*dest++ = 0xFFFD;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
|
@ -320,6 +340,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
{
|
||||
PRUint8 off = sbIdx[*src];
|
||||
if(0xFF == off) {
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
*dest++ = 0xFFFD;
|
||||
// if the first byte is valid for EUC-JP but the second
|
||||
// is not while being a valid US-ASCII(i.e. < 0xc0), save it
|
||||
|
@ -340,6 +362,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
if((0xA1 <= *src) && (*src <= 0xDF)) {
|
||||
*dest++ = (0xFF61-0x00A1) + *src;
|
||||
} else {
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
*dest++ = 0xFFFD;
|
||||
// if 0x8e is not followed by a valid JIS X 0201 byte
|
||||
// but by a valid US-ASCII, save it instead of eating it up.
|
||||
|
@ -372,6 +396,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
{
|
||||
PRUint8 off = sbIdx[*src];
|
||||
if(0xFF == off) {
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
*dest++ = 0xFFFD;
|
||||
} else {
|
||||
*dest++ = gJapaneseMap[mData+off];
|
||||
|
@ -383,6 +409,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
break;
|
||||
case 5: // two bytes undefined
|
||||
{
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error_invalidchar;
|
||||
*dest++ = 0xFFFD;
|
||||
mState = 0;
|
||||
if(dest >= destEnd)
|
||||
|
@ -394,8 +422,12 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
}
|
||||
*aDestLen = dest - aDest;
|
||||
return NS_OK;
|
||||
error_invalidchar:
|
||||
*aDestLen = dest - aDest;
|
||||
*aSrcLen = src - (const unsigned char*)aSrc;
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
error1:
|
||||
*aDestLen = dest-aDest;
|
||||
*aDestLen = dest - aDest;
|
||||
src++;
|
||||
if ((mState == 0) && (src == srcEnd)) {
|
||||
return NS_OK;
|
||||
|
@ -546,6 +578,8 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
|||
if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
|
||||
if((dest+1) >= destEnd)
|
||||
goto error1;
|
||||
if (mErrBehavior == kOnError_Signal)
|
||||
goto error2;
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
mRunLength = 0;
|
||||
|
@ -919,7 +953,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
|
|||
*aDestLen = dest - aDest;
|
||||
return NS_OK;
|
||||
error1:
|
||||
*aDestLen = dest-aDest;
|
||||
*aDestLen = dest - aDest;
|
||||
src++;
|
||||
if ((mState == 0) && (src == srcEnd)) {
|
||||
return NS_OK;
|
||||
|
@ -928,6 +962,6 @@ error1:
|
|||
return NS_OK_UDEC_MOREOUTPUT;
|
||||
error2:
|
||||
*aSrcLen = src - (const unsigned char*)aSrc;
|
||||
*aDestLen = dest-aDest;
|
||||
*aDestLen = dest - aDest;
|
||||
return NS_ERROR_UNEXPECTED;
|
||||
}
|
||||
|
|
|
@ -76,6 +76,8 @@ public:
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
virtual PRUnichar GetCharacterForUnMapped();
|
||||
|
||||
private:
|
||||
|
||||
private:
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
// Class nsBasicDecoderSupport [implementation]
|
||||
|
||||
nsBasicDecoderSupport::nsBasicDecoderSupport()
|
||||
: mErrBehavior(kOnError_Recover)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -72,6 +73,20 @@ NS_IMPL_QUERY_INTERFACE1(nsBasicDecoderSupport, nsIUnicodeDecoder)
|
|||
//----------------------------------------------------------------------
|
||||
// Interface nsIUnicodeDecoder [implementation]
|
||||
|
||||
void
|
||||
nsBasicDecoderSupport::SetInputErrorBehavior(PRInt32 aBehavior)
|
||||
{
|
||||
NS_ABORT_IF_FALSE(aBehavior == kOnError_Recover || aBehavior == kOnError_Signal,
|
||||
"Unknown behavior for SetInputErrorBehavior");
|
||||
mErrBehavior = aBehavior;
|
||||
}
|
||||
|
||||
PRUnichar
|
||||
nsBasicDecoderSupport::GetCharacterForUnMapped()
|
||||
{
|
||||
return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsBufferDecoderSupport [implementation]
|
||||
|
||||
|
@ -144,6 +159,11 @@ NS_IMETHODIMP nsBufferDecoderSupport::Convert(const char * aSrc,
|
|||
res = ConvertNoBuff(mBuffer, &bcr, dest, &bcw);
|
||||
dest += bcw;
|
||||
|
||||
// Detect invalid input character
|
||||
if (res == NS_ERROR_ILLEGAL_INPUT && mErrBehavior == kOnError_Signal) {
|
||||
break;
|
||||
}
|
||||
|
||||
if ((res == NS_OK_UDEC_MOREINPUT) && (bcw == 0)) {
|
||||
res = NS_ERROR_UNEXPECTED;
|
||||
#if defined(DEBUG_yokoyama) || defined(DEBUG_ftang)
|
||||
|
@ -237,7 +257,8 @@ NS_IMETHODIMP nsTableDecoderSupport::ConvertNoBuff(const char * aSrc,
|
|||
return nsUnicodeDecodeHelper::ConvertByTable(aSrc, aSrcLength,
|
||||
aDest, aDestLength,
|
||||
mScanClass,
|
||||
mShiftInTable, mMappingTable);
|
||||
mShiftInTable, mMappingTable,
|
||||
mErrBehavior == kOnError_Signal);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -273,7 +294,8 @@ NS_IMETHODIMP nsMultiTableDecoderSupport::ConvertNoBuff(const char * aSrc,
|
|||
aDest, aDestLength,
|
||||
mTableCount, mRangeArray,
|
||||
mScanClassArray,
|
||||
mMappingTable);
|
||||
mMappingTable,
|
||||
mErrBehavior == kOnError_Signal);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -309,7 +331,8 @@ NS_IMETHODIMP nsOneByteDecoderSupport::Convert(const char * aSrc,
|
|||
return nsUnicodeDecodeHelper::ConvertByFastTable(aSrc, aSrcLength,
|
||||
aDest, aDestLength,
|
||||
mFastTable,
|
||||
ONE_BYTE_TABLE_SIZE);
|
||||
ONE_BYTE_TABLE_SIZE,
|
||||
mErrBehavior == kOnError_Signal);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP nsOneByteDecoderSupport::GetMaxLength(const char * aSrc,
|
||||
|
|
|
@ -110,6 +110,12 @@ public:
|
|||
|
||||
//--------------------------------------------------------------------
|
||||
// Interface nsIUnicodeDecoder [declaration]
|
||||
|
||||
virtual void SetInputErrorBehavior(PRInt32 aBehavior);
|
||||
virtual PRUnichar GetCharacterForUnMapped();
|
||||
|
||||
protected:
|
||||
PRInt32 mErrBehavior;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
|
|
@ -49,7 +49,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByTable(
|
|||
PRInt32 * aDestLength,
|
||||
uScanClassID aScanClass,
|
||||
uShiftInTable * aShiftInTable,
|
||||
uMappingTable * aMappingTable)
|
||||
uMappingTable * aMappingTable,
|
||||
PRBool aErrorSignal)
|
||||
{
|
||||
const char * src = aSrc;
|
||||
PRInt32 srcLen = *aSrcLength;
|
||||
|
@ -82,6 +83,10 @@ nsresult nsUnicodeDecodeHelper::ConvertByTable(
|
|||
// somehow some table miss the 0x00 - 0x20 part
|
||||
*dest = med;
|
||||
} else {
|
||||
if (aErrorSignal) {
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
// Unicode replacement value for unmappable chars
|
||||
*dest = 0xfffd;
|
||||
}
|
||||
|
@ -107,7 +112,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(
|
|||
PRInt32 aTableCount,
|
||||
const uRange * aRangeArray,
|
||||
uScanClassID * aScanClassArray,
|
||||
uMappingTable ** aMappingTable)
|
||||
uMappingTable ** aMappingTable,
|
||||
PRBool aErrorSignal)
|
||||
{
|
||||
PRUint8 * src = (PRUint8 *)aSrc;
|
||||
PRInt32 srcLen = *aSrcLength;
|
||||
|
@ -143,7 +149,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(
|
|||
|
||||
if(passRangeCheck && (! passScan))
|
||||
{
|
||||
res = NS_OK_UDEC_MOREINPUT;
|
||||
if (res != NS_ERROR_ILLEGAL_INPUT)
|
||||
res = NS_OK_UDEC_MOREINPUT;
|
||||
break;
|
||||
}
|
||||
if(! done)
|
||||
|
@ -182,7 +189,15 @@ nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(
|
|||
}
|
||||
}
|
||||
// treat it as NSBR if bcr == 1 and it is 0xa0
|
||||
*dest = ((1==bcr)&&(*src == (PRUint8)0xa0 )) ? 0x00a0 : 0xfffd;
|
||||
if ((1==bcr)&&(*src == (PRUint8)0xa0 )) {
|
||||
*dest = 0x00a0;
|
||||
} else {
|
||||
if (aErrorSignal) {
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
*dest = 0xfffd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,7 +219,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByFastTable(
|
|||
PRUnichar * aDest,
|
||||
PRInt32 * aDestLength,
|
||||
const PRUnichar * aFastTable,
|
||||
PRInt32 aTableSize)
|
||||
PRInt32 aTableSize,
|
||||
PRBool aErrorSignal)
|
||||
{
|
||||
PRUint8 * src = (PRUint8 *)aSrc;
|
||||
PRUint8 * srcEnd = src;
|
||||
|
@ -219,7 +235,14 @@ nsresult nsUnicodeDecodeHelper::ConvertByFastTable(
|
|||
res = NS_OK;
|
||||
}
|
||||
|
||||
for (; src<srcEnd;) *dest++ = aFastTable[*src++];
|
||||
for (; src<srcEnd;) {
|
||||
*dest = aFastTable[*src++];
|
||||
if (*dest == 0xfffd && aErrorSignal) {
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
dest++;
|
||||
}
|
||||
|
||||
*aSrcLength = src - (PRUint8 *)aSrc;
|
||||
*aDestLength = dest - aDest;
|
||||
|
|
|
@ -57,7 +57,8 @@ public:
|
|||
PRUnichar * aDest, PRInt32 * aDestLength,
|
||||
uScanClassID aScanClass,
|
||||
uShiftInTable * aShiftInTable,
|
||||
uMappingTable * aMappingTable);
|
||||
uMappingTable * aMappingTable,
|
||||
PRBool aErrorSignal = PR_FALSE);
|
||||
|
||||
/**
|
||||
* Converts data using a set of lookup tables.
|
||||
|
@ -65,14 +66,14 @@ public:
|
|||
static nsresult ConvertByMultiTable(const char * aSrc, PRInt32 * aSrcLength,
|
||||
PRUnichar * aDest, PRInt32 * aDestLength, PRInt32 aTableCount,
|
||||
const uRange * aRangeArray, uScanClassID * aScanClassArray,
|
||||
uMappingTable ** aMappingTable);
|
||||
uMappingTable ** aMappingTable, PRBool aErrorSignal = PR_FALSE);
|
||||
|
||||
/**
|
||||
* Converts data using a fast lookup table.
|
||||
*/
|
||||
static nsresult ConvertByFastTable(const char * aSrc, PRInt32 * aSrcLength,
|
||||
PRUnichar * aDest, PRInt32 * aDestLength, const PRUnichar * aFastTable,
|
||||
PRInt32 aTableSize);
|
||||
PRInt32 aTableSize, PRBool aErrorSignal);
|
||||
|
||||
/**
|
||||
* Create a cache-like fast lookup table from a normal one.
|
||||
|
|
|
@ -1286,6 +1286,9 @@ nsExpatDriver::WillBuildModel(const CParserContext& aParserContext,
|
|||
// Set up the user data.
|
||||
XML_SetUserData(mExpatParser, this);
|
||||
|
||||
// XML must detect invalid character convertion
|
||||
aParserContext.mScanner->OverrideReplacementCharacter(0xffff);
|
||||
|
||||
return aSink->WillBuildModel();
|
||||
}
|
||||
|
||||
|
|
|
@ -108,6 +108,8 @@ nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
|
|||
mIncremental = PR_FALSE;
|
||||
mUnicodeDecoder = 0;
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
mHasInvalidCharacter = PR_FALSE;
|
||||
mReplacementCharacter = PRUnichar(0x0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -143,6 +145,8 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream,
|
|||
|
||||
mUnicodeDecoder = 0;
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
mHasInvalidCharacter = PR_FALSE;
|
||||
mReplacementCharacter = PRUnichar(0x0);
|
||||
SetDocumentCharset(aCharset, aSource);
|
||||
}
|
||||
|
||||
|
@ -184,8 +188,16 @@ nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSou
|
|||
NS_ASSERTION(nsParser::GetCharsetConverterManager(),
|
||||
"Must have the charset converter manager!");
|
||||
|
||||
return nsParser::GetCharsetConverterManager()->
|
||||
res = nsParser::GetCharsetConverterManager()->
|
||||
GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
|
||||
if (NS_SUCCEEDED(res) && mUnicodeDecoder)
|
||||
{
|
||||
// We need to detect conversion error of character to support XML
|
||||
// encoding error.
|
||||
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -303,6 +315,8 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
|
|||
|
||||
PRInt32 totalChars = 0;
|
||||
PRInt32 unicharLength = unicharBufLen;
|
||||
PRInt32 errorPos = -1;
|
||||
|
||||
do {
|
||||
PRInt32 srcLength = aLen;
|
||||
res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
|
||||
|
@ -310,8 +324,8 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
|
|||
totalChars += unicharLength;
|
||||
// Continuation of failure case
|
||||
if(NS_FAILED(res)) {
|
||||
// if we failed, we consume one byte, replace it with U+FFFD
|
||||
// and try the conversion again.
|
||||
// if we failed, we consume one byte, replace it with the replacement
|
||||
// character and try the conversion again.
|
||||
|
||||
// This is only needed because some decoders don't follow the
|
||||
// nsIUnicodeDecoder contract: they return a failure when *aDestLength
|
||||
|
@ -321,7 +335,13 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
|
|||
break;
|
||||
}
|
||||
|
||||
unichars[unicharLength++] = (PRUnichar)0xFFFD;
|
||||
if (mReplacementCharacter == 0x0 && errorPos == -1) {
|
||||
errorPos = totalChars;
|
||||
}
|
||||
unichars[unicharLength++] = mReplacementCharacter == 0x0 ?
|
||||
mUnicodeDecoder->GetCharacterForUnMapped() :
|
||||
mReplacementCharacter;
|
||||
|
||||
unichars = unichars + unicharLength;
|
||||
unicharLength = unicharBufLen - (++totalChars);
|
||||
|
||||
|
@ -344,7 +364,7 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
|
|||
// since it doesn't reflect on our success or failure
|
||||
// - Ref. bug 87110
|
||||
res = NS_OK;
|
||||
if (!AppendToBuffer(buffer, aRequest))
|
||||
if (!AppendToBuffer(buffer, aRequest, errorPos))
|
||||
res = NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
else {
|
||||
|
@ -1143,7 +1163,8 @@ void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
|
|||
}
|
||||
|
||||
PRBool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
|
||||
nsIRequest *aRequest)
|
||||
nsIRequest *aRequest,
|
||||
PRInt32 aErrorPos)
|
||||
{
|
||||
if (nsParser::sParserDataListeners && mParser &&
|
||||
NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(),
|
||||
|
@ -1171,6 +1192,12 @@ PRBool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
|
|||
mCountRemaining += aBuf->DataLength();
|
||||
}
|
||||
|
||||
if (aErrorPos != -1 && !mHasInvalidCharacter) {
|
||||
mHasInvalidCharacter = PR_TRUE;
|
||||
mFirstInvalidPosition = mCurrentPosition;
|
||||
mFirstInvalidPosition.advance(aErrorPos);
|
||||
}
|
||||
|
||||
if (mFirstNonWhitespacePosition == -1) {
|
||||
nsScannerIterator iter(mCurrentPosition);
|
||||
nsScannerIterator end(mEndPosition);
|
||||
|
@ -1235,5 +1262,12 @@ void nsScanner::SelfTest(void) {
|
|||
#endif
|
||||
}
|
||||
|
||||
void nsScanner::OverrideReplacementCharacter(PRUnichar aReplacementCharacter)
|
||||
{
|
||||
mReplacementCharacter = aReplacementCharacter;
|
||||
|
||||
if (mHasInvalidCharacter) {
|
||||
ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -315,9 +315,19 @@ class nsScanner {
|
|||
mParser = aParser;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Override replacement character used by nsIUnicodeDecoder.
|
||||
* Default behavior is that it uses nsIUnicodeDecoder's mapping.
|
||||
*
|
||||
* @param aReplacementCharacter the replacement character
|
||||
* XML (expat) parser uses 0xffff
|
||||
*/
|
||||
void OverrideReplacementCharacter(PRUnichar aReplacementCharacter);
|
||||
|
||||
protected:
|
||||
|
||||
PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest);
|
||||
PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, PRInt32 aErrorPos = -1);
|
||||
PRBool AppendToBuffer(const nsAString& aStr)
|
||||
{
|
||||
nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
|
||||
|
@ -331,10 +341,13 @@ class nsScanner {
|
|||
nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer
|
||||
nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here)
|
||||
nsScannerIterator mEndPosition; // The current end of the scanner buffer
|
||||
nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected
|
||||
nsString mFilename;
|
||||
PRUint32 mCountRemaining; // The number of bytes still to be read
|
||||
// from the scanner buffer
|
||||
PRPackedBool mIncremental;
|
||||
PRPackedBool mHasInvalidCharacter;
|
||||
PRUnichar mReplacementCharacter;
|
||||
PRInt32 mFirstNonWhitespacePosition;
|
||||
PRInt32 mCharsetSource;
|
||||
nsCString mCharset;
|
||||
|
|
|
@ -51,6 +51,7 @@ _TEST_FILES = parser_datreader.js \
|
|||
html5lib_tree_dat3.txt \
|
||||
html5_tree_construction_exceptions.js \
|
||||
test_html5_tree_construction.html \
|
||||
test_bug174351.html \
|
||||
test_bug339350.xhtml \
|
||||
test_bug358797.html \
|
||||
test_bug396568.html \
|
||||
|
@ -58,6 +59,7 @@ _TEST_FILES = parser_datreader.js \
|
|||
test_bug460437.xhtml \
|
||||
test_compatmode.html \
|
||||
regressions.txt \
|
||||
invalidchar.xml \
|
||||
$(NULL)
|
||||
|
||||
libs:: $(_TEST_FILES)
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<root>
|
||||
<fail> This is an invalid byte in UTF-8: ¿ </fail>
|
||||
</root>
|
|
@ -0,0 +1,32 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=174351
|
||||
-->
|
||||
<head>
|
||||
<title>Test for Bug 174351</title>
|
||||
<script type="text/javascript" src="/MochiKit/MochiKit.js"></script>
|
||||
<script type="text/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=174351">Mozilla Bug 174351</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script class="testbody" type="text/javascript">
|
||||
var iframe = document.createElement('iframe');
|
||||
iframe.src = "invalidchar.xml";
|
||||
iframe.onload = function () {
|
||||
var doc = document.getElementById('test').childNodes[1].contentDocument;
|
||||
ok(doc.documentElement.tagName != "root", "Since XML has invalid enconding, must throw error");
|
||||
};
|
||||
|
||||
document.getElementById('test').appendChild(iframe);
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
||||
|
Загрузка…
Ссылка в новой задаче