fix take 2 on bug 114134, crashes when reading utf8 strings from dtds/etc

r=dbaron, sr=jag when multi-byte characters crossed a buffer boundary, we weren't correctly backing up and instead tried to decode the half-loaded utf8 character... in addition, our new "remainder" count was completely broken - we were adding the old remainder to the new remainder, when in fact we had just consumed the old remainder
2002-01-12 02:38:08 +00:00 · 2002-01-12 02:38:08 +00:00 · bd2a6aec4a
--- a/xpcom/io/nsUnicharInputStream.cpp
+++ b/xpcom/io/nsUnicharInputStream.cpp
@ -247,6 +247,7 @@ PRInt32 UTF8InputStream::Fill(nsresult * aErrorCode)

  // Now convert as much of the byte buffer to unicode as possible
  PRInt32 srcLen = CountValidUTF8Bytes(mByteData->GetBuffer(),remainder + nb);
+  NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer");

  NS_ConvertUTF8toUCS2
    unicodeValue(Substring(mByteData->GetBuffer(),
@ -264,7 +265,7 @@ PRInt32 UTF8InputStream::Fill(nsresult * aErrorCode)

  mUnicharDataOffset = 0;
  mUnicharDataLength = dstLen;
-  mByteDataOffset += srcLen;
+  mByteDataOffset = srcLen;
  
  return dstLen;
 }
@ -274,8 +275,10 @@ UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, PRInt32 aMaxBytes)
 {
  const char *c = aBuffer;
  const char *end = aBuffer + aMaxBytes;
+  const char *lastchar = c;     // pre-initialize in case of 0-length buffer
  
  while (c < end && *c) {
+    lastchar = c;
    if (UTF8traits::isASCII(*c))
      c++;
    else if (UTF8traits::is2byte(*c))
@ -293,6 +296,8 @@ UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, PRInt32 aMaxBytes)
      break; // Otherwise we go into an infinite loop.  But what happens now?
    }
  }
+  if (c > end)
+    c = lastchar;

  return c - aBuffer;
 }