Bug 174351: Encoding errors aren't treated as fatal XML errors. r=smontagu, sr=peterv

2009-02-16 04:22:47 -08:00 · 2009-02-16 04:22:47 -08:00 · ea7e9e26a8
--- a/intl/uconv/native/nsNativeUConvService.cpp
+++ b/intl/uconv/native/nsNativeUConvService.cpp
@ -78,6 +78,10 @@ public:
                            PRInt32 aSrcLength, 
                            PRInt32 * aDestLength);
    NS_IMETHOD Reset();
+
+    virtual void SetInputErrorBehavior(PRInt32 aBehavior);
+
+    virtual PRUnichar GetCharacterForUnMapped();
    
    // Encoder methods:
    
@ -209,6 +213,18 @@ IConvAdaptor::Reset()
 }


+void
+IConvAdaptor::SetInputErrorBehavior(PRInt32 aBehavior)
+{
+}
+
+
+PRUnichar
+IConvAdaptor::GetCharacterForUnMapped()
+{
+    return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
+}
+
 // convert unicode data into some charset.
 nsresult 
 IConvAdaptor::Convert(const PRUnichar * aSrc, 
--- a/intl/uconv/native/nsWinCEUConvService.cpp
+++ b/intl/uconv/native/nsWinCEUConvService.cpp
@ -98,6 +98,9 @@ public:
                          PRInt32 * aDestLength);
  NS_IMETHOD Reset();
  
+  virtual void SetInputErrorBehavior(PRInt32 aBehavior);
+  virtual PRUnichar GetCharacterForUnMapped();
+
  // Encoder methods:
  
  NS_IMETHOD Convert(const PRUnichar * aSrc, 
@ -312,6 +315,17 @@ WinCEUConvAdapter::Reset()
  return NS_OK;
 }

+void
+WinCEUConvAdapter::SetInputErrorBehavior(PRInt32 aBehavior)
+{
+}
+
+PRUnichar
+WinCEUConvAdapter::GetCharacterForUnMapped()
+{
+  return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
+}
+
 // Encoder methods:

 NS_IMETHODIMP
--- a/intl/uconv/public/nsIUnicodeDecoder.h
+++ b/intl/uconv/public/nsIUnicodeDecoder.h
@ -42,13 +42,13 @@
 #include "nsISupports.h"

 // Interface ID for our Unicode Decoder interface
-// {B2F178E1-832A-11d2-8A8E-00600811A836}
+// {25359602-FC70-4d13-A9AB-8086D3827C0D}
 //NS_DECLARE_ID(kIUnicodeDecoderIID,
-//  0xb2f178e1, 0x832a, 0x11d2, 0x8a, 0x8e, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36);
+//  0x25359602, 0xfc70, 0x4d13, 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd);

 #define NS_IUNICODEDECODER_IID	\
-	{ 0xb2f178e1, 0x832a, 0x11d2,	\
-		{ 0x8a, 0x8e, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36 }}
+	{ 0x25359602, 0xfc70, 0x4d13,	\
+		{ 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd }}

 // XXX deprecated
 /*---------- BEGIN DEPRECATED */ 
@ -168,6 +168,20 @@ public:
   * different and urelated buffer of data.
   */
  NS_IMETHOD Reset() = 0;
+
+  /**
+   * Specify what to do when a character cannot be mapped into unicode
+   *
+   * @param aBehavior [IN] the desired behavior
+   * @see kOnError_Recover
+   * @see kOnError_Signal
+   */
+  virtual void SetInputErrorBehavior(PRInt32 aBehavior) = 0;
+
+  /**
+   * return the UNICODE character for unmapped character
+   */
+  virtual PRUnichar GetCharacterForUnMapped() = 0;
 };

 NS_DEFINE_STATIC_IID_ACCESSOR(nsIUnicodeDecoder, NS_IUNICODEDECODER_IID)
--- a/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
+++ b/intl/uconv/ucvja/nsJapaneseToUnicode.cpp
@ -50,6 +50,7 @@ static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CI
 #define SJIS_INDEX mMapIndex[0]
 #define JIS0208_INDEX mMapIndex[1]
 #define JIS0212_INDEX gJIS0212Index
+#define SJIS_UNMAPPED	0x30fb

 void nsJapaneseToUnicode::setMapMode()
 {
@ -153,7 +154,9 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
                       break;

                     default:
-                       *dest++ = 0x30FB;
+                       if (mErrBehavior == kOnError_Signal)
+                         goto error_invalidchar;
+                       *dest++ = SJIS_UNMAPPED;
                   }
                   if(dest >= destEnd)
                     goto error1;
@ -178,11 +181,16 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
          {
            PRUint8 off = sbIdx[*src];
            if(0xFF == off) {
-               *dest++ = 0x30FB;
+               if (mErrBehavior == kOnError_Signal)
+                 goto error_invalidchar;
+               *dest++ = SJIS_UNMAPPED;
            } else {
               PRUnichar ch = gJapaneseMap[mData+off];
-               if(ch == 0xfffd) 
-                 ch = 0x30fb;
+               if(ch == 0xfffd) {
+                 if (mErrBehavior == kOnError_Signal)
+                   goto error_invalidchar;
+                 ch = SJIS_UNMAPPED;
+               }
               *dest++ = ch;
            }
            mState = 0;
@ -195,7 +203,10 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
          {
            PRUint8 off = sbIdx[*src];
            if(0xFF == off) {
-               *dest++ = 0x30fb;
+               if (mErrBehavior == kOnError_Signal)
+                 goto error_invalidchar;
+
+               *dest++ = SJIS_UNMAPPED;
            } else {
               *dest++ = mData + off;
            }
@ -210,8 +221,12 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
   }
   *aDestLen = dest - aDest;
   return NS_OK;
+error_invalidchar:
+   *aDestLen = dest - aDest;
+   *aSrcLen = src - (const unsigned char*)aSrc;
+   return NS_ERROR_ILLEGAL_INPUT;
 error1:
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
   src++;
   if ((mState == 0) && (src == srcEnd)) {
     return NS_OK;
@ -220,8 +235,11 @@ error1:
   return NS_OK_UDEC_MOREOUTPUT;
 }

-
-
+PRUnichar
+nsShiftJISToUnicode::GetCharacterForUnMapped()
+{
+  return PRUnichar(SJIS_UNMAPPED);
+}

 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
   const char * aSrc, PRInt32 * aSrcLen,
@ -303,6 +321,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
                 mState = 3; // JIS0212
               } else {
                 // others 
+                 if (mErrBehavior == kOnError_Signal)
+                   goto error_invalidchar;
                 *dest++ = 0xFFFD;
                 if(dest >= destEnd)
                   goto error1;
@ -320,6 +340,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
          {
            PRUint8 off = sbIdx[*src];
            if(0xFF == off) {
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
              *dest++ = 0xFFFD;
               // if the first byte is valid for EUC-JP but the second 
               // is not while being a valid US-ASCII(i.e. < 0xc0), save it
@ -340,6 +362,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
            if((0xA1 <= *src) && (*src <= 0xDF)) {
              *dest++ = (0xFF61-0x00A1) + *src;
            } else {
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
              *dest++ = 0xFFFD;             
              // if 0x8e is not followed by a valid JIS X 0201 byte
              // but by a valid US-ASCII, save it instead of eating it up.
@ -372,6 +396,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
          {
            PRUint8 off = sbIdx[*src];
            if(0xFF == off) {
+              if (mErrBehavior == kOnError_Signal)
+                goto error_invalidchar;
               *dest++ = 0xFFFD;
            } else {
               *dest++ = gJapaneseMap[mData+off];
@ -383,6 +409,8 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
          break;
          case 5: // two bytes undefined
          {
+            if (mErrBehavior == kOnError_Signal)
+              goto error_invalidchar;
            *dest++ = 0xFFFD;
            mState = 0;
            if(dest >= destEnd)
@ -394,8 +422,12 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
   }
   *aDestLen = dest - aDest;
   return NS_OK;
+error_invalidchar:
+   *aDestLen = dest - aDest;
+   *aSrcLen = src - (const unsigned char*)aSrc;
+   return NS_ERROR_ILLEGAL_INPUT;
 error1:
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
   src++;
   if ((mState == 0) && (src == srcEnd)) {
     return NS_OK;
@ -546,6 +578,8 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
              if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
                if((dest+1) >= destEnd)
                  goto error1;
+                if (mErrBehavior == kOnError_Signal)
+                  goto error2;
                *dest++ = 0xFFFD;
              }
              mRunLength = 0;
@ -919,7 +953,7 @@ NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
   *aDestLen = dest - aDest;
   return NS_OK;
 error1:
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
   src++;
   if ((mState == 0) && (src == srcEnd)) {
     return NS_OK;
@ -928,6 +962,6 @@ error1:
   return NS_OK_UDEC_MOREOUTPUT;
 error2:
   *aSrcLen = src - (const unsigned char*)aSrc;
-   *aDestLen = dest-aDest;
+   *aDestLen = dest - aDest;
   return NS_ERROR_UNEXPECTED;
 }
--- a/intl/uconv/ucvja/nsJapaneseToUnicode.h
+++ b/intl/uconv/ucvja/nsJapaneseToUnicode.h
@ -76,6 +76,8 @@ public:
        return NS_OK;
     }

+  virtual PRUnichar GetCharacterForUnMapped();
+
 private:

 private:
--- a/intl/uconv/util/nsUCSupport.cpp
+++ b/intl/uconv/util/nsUCSupport.cpp
@ -51,6 +51,7 @@
 // Class nsBasicDecoderSupport [implementation]

 nsBasicDecoderSupport::nsBasicDecoderSupport() 
+  : mErrBehavior(kOnError_Recover)
 {
 }

@ -72,6 +73,20 @@ NS_IMPL_QUERY_INTERFACE1(nsBasicDecoderSupport, nsIUnicodeDecoder)
 //----------------------------------------------------------------------
 // Interface nsIUnicodeDecoder [implementation]

+void
+nsBasicDecoderSupport::SetInputErrorBehavior(PRInt32 aBehavior)
+{
+  NS_ABORT_IF_FALSE(aBehavior == kOnError_Recover || aBehavior == kOnError_Signal,
+                    "Unknown behavior for SetInputErrorBehavior");
+  mErrBehavior = aBehavior;
+}
+
+PRUnichar
+nsBasicDecoderSupport::GetCharacterForUnMapped()
+{
+  return PRUnichar(0xfffd); // Unicode REPLACEMENT CHARACTER
+}
+
 //----------------------------------------------------------------------
 // Class nsBufferDecoderSupport [implementation]

@ -144,6 +159,11 @@ NS_IMETHODIMP nsBufferDecoderSupport::Convert(const char * aSrc,
    res = ConvertNoBuff(mBuffer, &bcr, dest, &bcw);
    dest += bcw;

+    // Detect invalid input character
+    if (res == NS_ERROR_ILLEGAL_INPUT && mErrBehavior == kOnError_Signal) {
+      break;
+    }
+
    if ((res == NS_OK_UDEC_MOREINPUT) && (bcw == 0)) {
        res = NS_ERROR_UNEXPECTED;
 #if defined(DEBUG_yokoyama) || defined(DEBUG_ftang)
@ -237,7 +257,8 @@ NS_IMETHODIMP nsTableDecoderSupport::ConvertNoBuff(const char * aSrc,
  return nsUnicodeDecodeHelper::ConvertByTable(aSrc, aSrcLength,
                                               aDest, aDestLength,
                                               mScanClass, 
-                                               mShiftInTable, mMappingTable);
+                                               mShiftInTable, mMappingTable,
+                                               mErrBehavior == kOnError_Signal);
 }

 //----------------------------------------------------------------------
@ -273,7 +294,8 @@ NS_IMETHODIMP nsMultiTableDecoderSupport::ConvertNoBuff(const char * aSrc,
                                                    aDest, aDestLength, 
                                                    mTableCount, mRangeArray,
                                                    mScanClassArray,
-                                                    mMappingTable);
+                                                    mMappingTable,
+                                                    mErrBehavior == kOnError_Signal);
 }

 //----------------------------------------------------------------------
@ -309,7 +331,8 @@ NS_IMETHODIMP nsOneByteDecoderSupport::Convert(const char * aSrc,
  return nsUnicodeDecodeHelper::ConvertByFastTable(aSrc, aSrcLength, 
                                                   aDest, aDestLength, 
                                                   mFastTable,
-                                                   ONE_BYTE_TABLE_SIZE);
+                                                   ONE_BYTE_TABLE_SIZE,
+                                                   mErrBehavior == kOnError_Signal);
 }

 NS_IMETHODIMP nsOneByteDecoderSupport::GetMaxLength(const char * aSrc, 
--- a/intl/uconv/util/nsUCSupport.h
+++ b/intl/uconv/util/nsUCSupport.h
@ -110,6 +110,12 @@ public:

  //--------------------------------------------------------------------
  // Interface nsIUnicodeDecoder [declaration]
+
+  virtual void SetInputErrorBehavior(PRInt32 aBehavior);
+  virtual PRUnichar GetCharacterForUnMapped();
+
+protected:
+  PRInt32   mErrBehavior;
 };

 //----------------------------------------------------------------------
--- a/intl/uconv/util/nsUnicodeDecodeHelper.cpp
+++ b/intl/uconv/util/nsUnicodeDecodeHelper.cpp
@ -49,7 +49,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByTable(
                                     PRInt32 * aDestLength, 
                                     uScanClassID aScanClass,
                                     uShiftInTable * aShiftInTable, 
-                                     uMappingTable  * aMappingTable)
+                                     uMappingTable  * aMappingTable,
+                                     PRBool aErrorSignal)
 {
  const char * src = aSrc;
  PRInt32 srcLen = *aSrcLength;
@ -82,6 +83,10 @@ nsresult nsUnicodeDecodeHelper::ConvertByTable(
        // somehow some table miss the 0x00 - 0x20 part
        *dest = med;
      } else {
+        if (aErrorSignal) {
+          res = NS_ERROR_ILLEGAL_INPUT;
+          break;
+        }
        // Unicode replacement value for unmappable chars
        *dest = 0xfffd;
      }
@ -107,7 +112,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(
                                     PRInt32 aTableCount, 
                                     const uRange * aRangeArray, 
                                     uScanClassID * aScanClassArray,
-                                     uMappingTable ** aMappingTable)
+                                     uMappingTable ** aMappingTable,
+                                     PRBool aErrorSignal)
 {
  PRUint8 * src = (PRUint8 *)aSrc;
  PRInt32 srcLen = *aSrcLength;
@ -143,7 +149,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(

    if(passRangeCheck && (! passScan))
    {
-      res = NS_OK_UDEC_MOREINPUT;
+      if (res != NS_ERROR_ILLEGAL_INPUT)
+        res = NS_OK_UDEC_MOREINPUT;
      break;
    }
    if(! done)
@ -182,7 +189,15 @@ nsresult nsUnicodeDecodeHelper::ConvertByMultiTable(
          }
        }
        // treat it as NSBR if bcr == 1 and it is 0xa0
-        *dest = ((1==bcr)&&(*src == (PRUint8)0xa0 )) ? 0x00a0 : 0xfffd;
+        if ((1==bcr)&&(*src == (PRUint8)0xa0 )) {
+          *dest = 0x00a0;
+        } else {
+          if (aErrorSignal) {
+            res = NS_ERROR_ILLEGAL_INPUT;
+            break;
+          }
+          *dest = 0xfffd;
+        }
      }
    }

@ -204,7 +219,8 @@ nsresult nsUnicodeDecodeHelper::ConvertByFastTable(
                                     PRUnichar * aDest, 
                                     PRInt32 * aDestLength, 
                                     const PRUnichar * aFastTable, 
-                                     PRInt32 aTableSize)
+                                     PRInt32 aTableSize,
+                                     PRBool aErrorSignal)
 {
  PRUint8 * src = (PRUint8 *)aSrc;
  PRUint8 * srcEnd = src;
@ -219,7 +235,14 @@ nsresult nsUnicodeDecodeHelper::ConvertByFastTable(
    res = NS_OK;
  }

-  for (; src<srcEnd;) *dest++ = aFastTable[*src++];
+  for (; src<srcEnd;) {
+    *dest = aFastTable[*src++];
+    if (*dest == 0xfffd && aErrorSignal) {
+      res = NS_ERROR_ILLEGAL_INPUT;
+      break;
+    }
+    dest++;
+  }

  *aSrcLength = src - (PRUint8 *)aSrc;
  *aDestLength  = dest - aDest;
--- a/intl/uconv/util/nsUnicodeDecodeHelper.h
+++ b/intl/uconv/util/nsUnicodeDecodeHelper.h
@ -57,7 +57,8 @@ public:
                                 PRUnichar * aDest, PRInt32 * aDestLength,
                                 uScanClassID aScanClass,
                                 uShiftInTable * aShiftInTable,
-                                 uMappingTable  * aMappingTable);
+                                 uMappingTable  * aMappingTable,
+                                 PRBool aErrorSignal = PR_FALSE);

  /**
   * Converts data using a set of lookup tables.
@ -65,14 +66,14 @@ public:
  static nsresult ConvertByMultiTable(const char * aSrc, PRInt32 * aSrcLength,
      PRUnichar * aDest, PRInt32 * aDestLength, PRInt32 aTableCount, 
      const uRange * aRangeArray, uScanClassID * aScanClassArray,
-      uMappingTable ** aMappingTable);
+      uMappingTable ** aMappingTable, PRBool aErrorSignal = PR_FALSE);

  /**
   * Converts data using a fast lookup table.
   */
  static nsresult ConvertByFastTable(const char * aSrc, PRInt32 * aSrcLength, 
      PRUnichar * aDest, PRInt32 * aDestLength, const PRUnichar * aFastTable, 
-      PRInt32 aTableSize);
+      PRInt32 aTableSize, PRBool aErrorSignal);

  /**
   * Create a cache-like fast lookup table from a normal one.
--- a/parser/htmlparser/src/nsExpatDriver.cpp
+++ b/parser/htmlparser/src/nsExpatDriver.cpp
@ -1286,6 +1286,9 @@ nsExpatDriver::WillBuildModel(const CParserContext& aParserContext,
  // Set up the user data.
  XML_SetUserData(mExpatParser, this);

+  // XML must detect invalid character convertion
+  aParserContext.mScanner->OverrideReplacementCharacter(0xffff);
+
  return aSink->WillBuildModel();
 }

--- a/parser/htmlparser/src/nsScanner.cpp
+++ b/parser/htmlparser/src/nsScanner.cpp
@ -108,6 +108,8 @@ nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
  mIncremental = PR_FALSE;
  mUnicodeDecoder = 0;
  mCharsetSource = kCharsetUninitialized;
+  mHasInvalidCharacter = PR_FALSE;
+  mReplacementCharacter = PRUnichar(0x0);
 }

 /**
@ -143,6 +145,8 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream,

  mUnicodeDecoder = 0;
  mCharsetSource = kCharsetUninitialized;
+  mHasInvalidCharacter = PR_FALSE;
+  mReplacementCharacter = PRUnichar(0x0);
  SetDocumentCharset(aCharset, aSource);
 }

@ -184,8 +188,16 @@ nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSou
  NS_ASSERTION(nsParser::GetCharsetConverterManager(),
               "Must have the charset converter manager!");

-  return nsParser::GetCharsetConverterManager()->
+  res = nsParser::GetCharsetConverterManager()->
    GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
+  if (NS_SUCCEEDED(res) && mUnicodeDecoder)
+  {
+     // We need to detect conversion error of character to support XML
+     // encoding error.
+     mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
+  }
+
+  return res;
 }


@ -303,6 +315,8 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,

    PRInt32 totalChars = 0;
    PRInt32 unicharLength = unicharBufLen;
+    PRInt32 errorPos = -1;
+
    do {
      PRInt32 srcLength = aLen;
      res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
@ -310,8 +324,8 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
      totalChars += unicharLength;
      // Continuation of failure case
      if(NS_FAILED(res)) {
-        // if we failed, we consume one byte, replace it with U+FFFD
-        // and try the conversion again.
+        // if we failed, we consume one byte, replace it with the replacement
+        // character and try the conversion again.

        // This is only needed because some decoders don't follow the
        // nsIUnicodeDecoder contract: they return a failure when *aDestLength
@ -321,7 +335,13 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
          break;
        }

-        unichars[unicharLength++] = (PRUnichar)0xFFFD;
+        if (mReplacementCharacter == 0x0 && errorPos == -1) {
+          errorPos = totalChars;
+        }
+        unichars[unicharLength++] = mReplacementCharacter == 0x0 ?
+                                    mUnicodeDecoder->GetCharacterForUnMapped() :
+                                    mReplacementCharacter;
+
        unichars = unichars + unicharLength;
        unicharLength = unicharBufLen - (++totalChars);

@ -344,7 +364,7 @@ nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
    // since it doesn't reflect on our success or failure
    // - Ref. bug 87110
    res = NS_OK; 
-    if (!AppendToBuffer(buffer, aRequest))
+    if (!AppendToBuffer(buffer, aRequest, errorPos))
      res = NS_ERROR_OUT_OF_MEMORY;
  }
  else {
@ -1143,7 +1163,8 @@ void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
 }

 PRBool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
-                                 nsIRequest *aRequest)
+                                 nsIRequest *aRequest,
+                                 PRInt32 aErrorPos)
 {
  if (nsParser::sParserDataListeners && mParser &&
      NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(),
@ -1171,6 +1192,12 @@ PRBool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
    mCountRemaining += aBuf->DataLength();
  }

+  if (aErrorPos != -1 && !mHasInvalidCharacter) {
+    mHasInvalidCharacter = PR_TRUE;
+    mFirstInvalidPosition = mCurrentPosition;
+    mFirstInvalidPosition.advance(aErrorPos);
+  }
+
  if (mFirstNonWhitespacePosition == -1) {
    nsScannerIterator iter(mCurrentPosition);
    nsScannerIterator end(mEndPosition);
@ -1235,5 +1262,12 @@ void nsScanner::SelfTest(void) {
 #endif
 }

+void nsScanner::OverrideReplacementCharacter(PRUnichar aReplacementCharacter)
+{
+  mReplacementCharacter = aReplacementCharacter;

+  if (mHasInvalidCharacter) {
+    ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);
+  }
+}

--- a/parser/htmlparser/src/nsScanner.h
+++ b/parser/htmlparser/src/nsScanner.h
@ -315,9 +315,19 @@ class nsScanner {
        mParser = aParser;
      }

+
+      /**
+       * Override replacement character used by nsIUnicodeDecoder.
+       * Default behavior is that it uses nsIUnicodeDecoder's mapping.
+       *
+       * @param aReplacementCharacter the replacement character
+       *        XML (expat) parser uses 0xffff
+       */
+      void OverrideReplacementCharacter(PRUnichar aReplacementCharacter);
+
  protected:

-      PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest);
+      PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, PRInt32 aErrorPos = -1);
      PRBool AppendToBuffer(const nsAString& aStr)
      {
        nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
@ -331,10 +341,13 @@ class nsScanner {
      nsScannerIterator            mCurrentPosition; // The position we will next read from in the scanner buffer
      nsScannerIterator            mMarkPosition;    // The position last marked (we may rewind to here)
      nsScannerIterator            mEndPosition;     // The current end of the scanner buffer
+      nsScannerIterator            mFirstInvalidPosition; // The position of the first invalid character that was detected
      nsString        mFilename;
      PRUint32        mCountRemaining; // The number of bytes still to be read
                                       // from the scanner buffer
      PRPackedBool    mIncremental;
+      PRPackedBool    mHasInvalidCharacter;
+      PRUnichar       mReplacementCharacter;
      PRInt32         mFirstNonWhitespacePosition;
      PRInt32         mCharsetSource;
      nsCString       mCharset;
--- a/parser/htmlparser/tests/mochitest/Makefile.in
+++ b/parser/htmlparser/tests/mochitest/Makefile.in
@ -51,6 +51,7 @@ _TEST_FILES =	parser_datreader.js \
 		html5lib_tree_dat3.txt \
 		html5_tree_construction_exceptions.js \
 		test_html5_tree_construction.html \
+		test_bug174351.html \
 	 	test_bug339350.xhtml \
 		test_bug358797.html \
 		test_bug396568.html \
@ -58,6 +59,7 @@ _TEST_FILES =	parser_datreader.js \
 		test_bug460437.xhtml \
 		test_compatmode.html \
 		regressions.txt \
+		invalidchar.xml \
 		$(NULL)

 libs:: $(_TEST_FILES)
--- a/parser/htmlparser/tests/mochitest/invalidchar.xml
+++ b/parser/htmlparser/tests/mochitest/invalidchar.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <fail> This is an invalid byte in UTF-8: ¿ </fail>
+</root>
--- a/parser/htmlparser/tests/mochitest/test_bug174351.html
+++ b/parser/htmlparser/tests/mochitest/test_bug174351.html
@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=174351
+-->
+<head>
+  <title>Test for Bug 174351</title>
+  <script type="text/javascript" src="/MochiKit/MochiKit.js"></script>
+  <script type="text/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css" />
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=174351">Mozilla Bug 174351</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+  var iframe = document.createElement('iframe');
+  iframe.src = "invalidchar.xml";
+  iframe.onload = function () {
+    var doc = document.getElementById('test').childNodes[1].contentDocument;
+    ok(doc.documentElement.tagName != "root", "Since XML has invalid enconding, must throw error");
+  };
+
+  document.getElementById('test').appendChild(iframe);
+</script>
+</pre>
+</body>
+</html>
+