зеркало из https://github.com/mozilla/gecko-dev.git
Bug 575175: Make the HTML5 parser deal with Unicode decoder signaling -1 consumed bytes. r=smontagu a=blocking
This commit is contained in:
Родитель
31c8318893
Коммит
f2738cb6f4
|
@ -125,8 +125,9 @@ public:
|
|||
* encountered, like a format error, the converter stop and return error.
|
||||
* However, we should keep in mind that we need to be lax in decoding. When
|
||||
* a decoding error is returned to the caller, it is the caller's
|
||||
* responsibility to advance over the bad byte and reset the decoder before
|
||||
* trying to call the decoder again.
|
||||
* responsibility to advance over the bad byte (unless aSrcLength is -1 in
|
||||
* which case the caller should call the decoder with 0 offset again) and
|
||||
* reset the decoder before trying to call the decoder again.
|
||||
*
|
||||
* Converter required behavior:
|
||||
* In this order: when output space is full - return right away. When input
|
||||
|
@ -137,7 +138,9 @@ public:
|
|||
*
|
||||
* @param aSrc [IN] the source data buffer
|
||||
* @param aSrcLength [IN/OUT] the length of source data buffer; after
|
||||
* conversion will contain the number of bytes read
|
||||
* conversion will contain the number of bytes read or
|
||||
* -1 on error to indicate that the caller should re-push
|
||||
* the same buffer after resetting the decoder
|
||||
* @param aDest [OUT] the destination data buffer
|
||||
* @param aDestLength [IN/OUT] the length of the destination data buffer;
|
||||
* after conversion will contain the number of Unicode
|
||||
|
|
|
@ -65,7 +65,7 @@ nsUTF8ToUnicode::nsUTF8ToUnicode()
|
|||
* However, there is an edge case where the output can be longer than the
|
||||
* input: if the previous buffer ended with an incomplete multi-byte
|
||||
* sequence and this buffer does not begin with a valid continuation
|
||||
* byte, we will return NS_ERROR_UNEXPECTED and the caller may insert a
|
||||
* byte, we will return NS_ERROR_ILLEGAL_INPUT and the caller may insert a
|
||||
* replacement character in the output buffer which corresponds to no
|
||||
* character in the input buffer. So in the worst case the destination
|
||||
* will need to be one code unit longer than the source.
|
||||
|
@ -341,7 +341,7 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
|||
* Return an error condition. Caller is responsible for flushing and
|
||||
* refilling the buffer and resetting state.
|
||||
*/
|
||||
res = NS_ERROR_UNEXPECTED;
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -370,7 +370,7 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
|||
((mUcs4 & 0xFFFFF800) == 0xD800) ||
|
||||
// Codepoints outside the Unicode range are illegal
|
||||
(mUcs4 > 0x10FFFF)) {
|
||||
res = NS_ERROR_UNEXPECTED;
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
if (mUcs4 > 0xFFFF) {
|
||||
|
@ -396,7 +396,7 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(const char * aSrc,
|
|||
* for flushing and refilling the buffer and resetting state.
|
||||
*/
|
||||
in--;
|
||||
res = NS_ERROR_UNEXPECTED;
|
||||
res = NS_ERROR_ILLEGAL_INPUT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -476,7 +476,7 @@ nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
|
|||
if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
|
||||
mLastBuffer = (mLastBuffer->next = new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE));
|
||||
}
|
||||
PRUint32 totalByteCount = 0;
|
||||
PRInt32 totalByteCount = 0;
|
||||
for (;;) {
|
||||
PRInt32 end = mLastBuffer->getEnd();
|
||||
PRInt32 byteCount = aCount - totalByteCount;
|
||||
|
@ -491,19 +491,31 @@ nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
|
|||
totalByteCount += byteCount;
|
||||
aFromSegment += byteCount;
|
||||
|
||||
NS_ASSERTION(mLastBuffer->getEnd() <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE, "The Unicode decoder wrote too much data.");
|
||||
NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE,
|
||||
"The Unicode decoder wrote too much data.");
|
||||
NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes.");
|
||||
NS_ASSERTION(byteCount > 0 || NS_FAILED(convResult),
|
||||
"The decoder consumed too few bytes but did not signal an error.");
|
||||
|
||||
if (NS_FAILED(convResult)) {
|
||||
// Using the more generic NS_FAILED test above in case there are still
|
||||
// decoders around that don't use NS_ERROR_ILLEGAL_INPUT properly.
|
||||
NS_ASSERTION(convResult == NS_ERROR_ILLEGAL_INPUT,
|
||||
"The decoder signaled an error other than NS_ERROR_ILLEGAL_INPUT.");
|
||||
|
||||
// There's an illegal byte in the input. It's now the responsibility
|
||||
// of this calling code to output a U+FFFD REPLACEMENT CHARACTER and
|
||||
// reset the decoder.
|
||||
|
||||
NS_ASSERTION(totalByteCount < aCount,
|
||||
"The decoder signaled an error but consumed all input.");
|
||||
if (totalByteCount < aCount) {
|
||||
if (totalByteCount < (PRInt32)aCount) {
|
||||
// advance over the bad byte
|
||||
++totalByteCount;
|
||||
++aFromSegment;
|
||||
} else {
|
||||
NS_NOTREACHED("The decoder signaled an error but consumed all input.");
|
||||
// Recovering from this situation in case there are still broken
|
||||
// decoders, since nsScanner had recovery code, too.
|
||||
totalByteCount = (PRInt32)aCount;
|
||||
}
|
||||
|
||||
// Emit the REPLACEMENT CHARACTER
|
||||
|
@ -515,16 +527,18 @@ nsHtml5StreamParser::WriteStreamBytes(const PRUint8* aFromSegment,
|
|||
}
|
||||
|
||||
mUnicodeDecoder->Reset();
|
||||
if (totalByteCount == aCount) {
|
||||
*aWriteCount = totalByteCount;
|
||||
if (totalByteCount == (PRInt32)aCount) {
|
||||
*aWriteCount = (PRUint32)totalByteCount;
|
||||
return NS_OK;
|
||||
}
|
||||
} else if (convResult == NS_PARTIAL_MORE_OUTPUT) {
|
||||
mLastBuffer = mLastBuffer->next = new nsHtml5UTF16Buffer(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
|
||||
NS_ASSERTION(totalByteCount < aCount, "The Unicode decoder has consumed too many bytes.");
|
||||
NS_ASSERTION(totalByteCount < (PRInt32)aCount,
|
||||
"The Unicode decoder consumed too many bytes.");
|
||||
} else {
|
||||
NS_ASSERTION(totalByteCount == aCount, "The Unicode decoder consumed the wrong number of bytes.");
|
||||
*aWriteCount = totalByteCount;
|
||||
NS_ASSERTION(totalByteCount == (PRInt32)aCount,
|
||||
"The Unicode decoder consumed the wrong number of bytes.");
|
||||
*aWriteCount = (PRUint32)totalByteCount;
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче