Bug 575175: Make the HTML5 parser deal with Unicode decoder signaling -1 consumed bytes. r=smontagu a=blocking

This commit is contained in:
Henri Sivonen 2010-07-26 12:11:09 -07:00
Родитель 31c8318893
Коммит f2738cb6f4
3 изменённых файлов: 34 добавлений и 17 удалений

Просмотреть файл

@ -125,8 +125,9 @@ public:
* encountered, like a format error, the converter stop and return error.
* However, we should keep in mind that we need to be lax in decoding. When
* a decoding error is returned to the caller, it is the caller's
* responsibility to advance over the bad byte and reset the decoder before
* trying to call the decoder again.
* responsibility to advance over the bad byte (unless aSrcLength is -1 in
* which case the caller should call the decoder with 0 offset again) and
* reset the decoder before trying to call the decoder again.
*
* Converter required behavior:
* In this order: when output space is full - return right away. When input
@ -137,7 +138,9 @@ public:
*
* @param aSrc [IN] the source data buffer
* @param aSrcLength [IN/OUT] the length of source data buffer; after
* conversion will contain the number of bytes read
* conversion will contain the number of bytes read or
* -1 on error to indicate that the caller should re-push
* the same buffer after resetting the decoder
* @param aDest [OUT] the destination data buffer
* @param aDestLength [IN/OUT] the length of the destination data buffer;
* after conversion will contain the number of Unicode

Просмотреть файл

@ -65,7 +65,7 @@ nsUTF8ToUnicode::nsUTF8ToUnicode()
* However, there is an edge case where the output can be longer than the
* input: if the previous buffer ended with an incomplete multi-byte
* sequence and this buffer does not begin with a valid continuation
* byte, we will return NS_ERROR_UNEXPECTED and the caller may insert a
* byte, we will return NS_ERROR_ILLEGAL_INPUT and the caller may insert a
* replacement character in the output buffer which corresponds to no
* character in the input buffer. So in the worst case the destination
* will need to be one code unit longer than the source.
@ -341,7 +341,7 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
* Return an error condition. Caller is responsible for flushing and
* refilling the buffer and resetting state.
*/
res = NS_ERROR_UNEXPECTED;
res = NS_ERROR_ILLEGAL_INPUT;
break;
}
} else {
@ -370,7 +370,7 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
((mUcs4 & 0xFFFFF800) == 0xD800) ||
// Codepoints outside the Unicode range are illegal
(mUcs4 > 0x10FFFF)) {
res = NS_ERROR_UNEXPECTED;
res = NS_ERROR_ILLEGAL_INPUT;
break;
}
if (mUcs4 > 0xFFFF) {
@ -396,7 +396,7 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
* for flushing and refilling the buffer and resetting state.
*/
in--;
res = NS_ERROR_UNEXPECTED;
res = NS_ERROR_ILLEGAL_INPUT;
break;
}
}

Просмотреть файл

@ -476,7 +476,7 @@ nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
mLastBuffer = (mLastBuffer->next = new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE));
}
PRUint32 totalByteCount = 0;
PRInt32 totalByteCount = 0;
for (;;) {
PRInt32 end = mLastBuffer->getEnd();
PRInt32 byteCount = aCount - totalByteCount;
@ -491,19 +491,31 @@ nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
totalByteCount += byteCount;
aFromSegment += byteCount;
NS_ASSERTION(mLastBuffer->getEnd() <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE, "The Unicode decoder wrote too much data.");
NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE,
"The Unicode decoder wrote too much data.");
NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes.");
NS_ASSERTION(byteCount > 0 || NS_FAILED(convResult),
"The decoder consumed too few bytes but did not signal an error.");
if (NS_FAILED(convResult)) {
// Using the more generic NS_FAILED test above in case there are still
// decoders around that don't use NS_ERROR_ILLEGAL_INPUT properly.
NS_ASSERTION(convResult == NS_ERROR_ILLEGAL_INPUT,
"The decoder signaled an error other than NS_ERROR_ILLEGAL_INPUT.");
// There's an illegal byte in the input. It's now the responsibility
// of this calling code to output a U+FFFD REPLACEMENT CHARACTER and
// reset the decoder.
NS_ASSERTION(totalByteCount < aCount,
"The decoder signaled an error but consumed all input.");
if (totalByteCount < aCount) {
if (totalByteCount < (PRInt32)aCount) {
// advance over the bad byte
++totalByteCount;
++aFromSegment;
} else {
NS_NOTREACHED("The decoder signaled an error but consumed all input.");
// Recovering from this situation in case there are still broken
// decoders, since nsScanner had recovery code, too.
totalByteCount = (PRInt32)aCount;
}
// Emit the REPLACEMENT CHARACTER
@ -515,16 +527,18 @@ nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
}
mUnicodeDecoder->Reset();
if (totalByteCount == aCount) {
*aWriteCount = totalByteCount;
if (totalByteCount == (PRInt32)aCount) {
*aWriteCount = (PRUint32)totalByteCount;
return NS_OK;
}
} else if (convResult == NS_PARTIAL_MORE_OUTPUT) {
mLastBuffer = mLastBuffer->next = new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
NS_ASSERTION(totalByteCount < aCount, "The Unicode decoder has consumed too many bytes.");
NS_ASSERTION(totalByteCount < (PRInt32)aCount,
"The Unicode decoder consumed too many bytes.");
} else {
NS_ASSERTION(totalByteCount == aCount, "The Unicode decoder consumed the wrong number of bytes.");
*aWriteCount = totalByteCount;
NS_ASSERTION(totalByteCount == (PRInt32)aCount,
"The Unicode decoder consumed the wrong number of bytes.");
*aWriteCount = (PRUint32)totalByteCount;
return NS_OK;
}
}