Bug 489820 and bug 483209 - Make the HTML5 parser check buffer bounds less often and place limits on buffer growth. rs=sicking.

--HG--
extra : rebase_source : 57088761bab23d50aab5bb6b2e5f22f50a48c2e8
This commit is contained in:
Henri Sivonen 2010-02-12 09:49:06 +02:00
Родитель 48cde2f9a8
Коммит 4f886ae14a
5 изменённых файлов: 136 добавлений и 141 удалений

Просмотреть файл

@ -191,38 +191,6 @@ nsHtml5Tokenizer::emptyAttributes()
return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
}
void
nsHtml5Tokenizer::clearStrBufAndAppendCurrentC(PRUnichar c)
{
strBuf[0] = c;
strBufLen = 1;
}
void
nsHtml5Tokenizer::clearStrBufAndAppendForceWrite(PRUnichar c)
{
strBuf[0] = c;
strBufLen = 1;
}
void
nsHtml5Tokenizer::clearStrBufForNextState()
{
strBufLen = 0;
}
void
nsHtml5Tokenizer::appendStrBuf(PRUnichar c)
{
if (strBufLen == strBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(strBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY);
nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
strBuf.release();
strBuf = newBuf;
}
strBuf[strBufLen++] = c;
}
nsString*
nsHtml5Tokenizer::strBufToString()
{
@ -243,44 +211,6 @@ nsHtml5Tokenizer::emitStrBuf()
}
}
void
nsHtml5Tokenizer::clearLongStrBufForNextState()
{
longStrBufLen = 0;
}
void
nsHtml5Tokenizer::clearLongStrBuf()
{
longStrBufLen = 0;
}
void
nsHtml5Tokenizer::clearLongStrBufAndAppendCurrentC(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
void
nsHtml5Tokenizer::clearLongStrBufAndAppendToComment(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
void
nsHtml5Tokenizer::appendLongStrBuf(PRUnichar c)
{
if (longStrBufLen == longStrBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(longStrBufLen + (longStrBufLen >> 1));
nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
longStrBuf.release();
longStrBuf = newBuf;
}
longStrBuf[longStrBufLen++] = c;
}
void
nsHtml5Tokenizer::appendSecondHyphenToBogusComment()
{
@ -294,32 +224,6 @@ nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c)
appendLongStrBuf(c);
}
void
nsHtml5Tokenizer::appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length)
{
PRInt32 reqLen = longStrBufLen + length;
if (longStrBuf.length < reqLen) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(reqLen + (reqLen >> 1));
nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
longStrBuf.release();
longStrBuf = newBuf;
}
nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
longStrBufLen = reqLen;
}
void
nsHtml5Tokenizer::appendLongStrBuf(jArray<PRUnichar,PRInt32> arr)
{
appendLongStrBuf(arr, 0, arr.length);
}
void
nsHtml5Tokenizer::appendStrBufToLongStrBuf()
{
appendLongStrBuf(strBuf, 0, strBufLen);
}
nsString*
nsHtml5Tokenizer::longStrBufToString()
{
@ -342,12 +246,6 @@ nsHtml5Tokenizer::flushChars(PRUnichar* buf, PRInt32 pos)
cstart = 0x7fffffff;
}
void
nsHtml5Tokenizer::resetAttributes()
{
attributes = nsnull;
}
void
nsHtml5Tokenizer::strBufToElementNameString()
{
@ -429,6 +327,7 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
shouldSuspend = PR_FALSE;
lastCR = PR_FALSE;
PRInt32 start = buffer->getStart();
PRInt32 end = buffer->getEnd();
PRInt32 pos = start - 1;
switch(state) {
case NS_HTML5TOKENIZER_DATA:
@ -456,8 +355,9 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
break;
}
}
pos = stateLoop(state, c, pos, buffer->getBuffer(), PR_FALSE, returnState, buffer->getEnd());
if (pos == buffer->getEnd()) {
ensureBufferSpace(end - start);
pos = stateLoop(state, c, pos, buffer->getBuffer(), PR_FALSE, returnState, end);
if (pos == end) {
buffer->setStart(pos);
} else {
buffer->setStart(pos + 1);
@ -465,6 +365,38 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
return lastCR;
}
void
nsHtml5Tokenizer::ensureBufferSpace(PRInt32 addedLength)
{
PRInt32 newlongStrBufCapacity = longStrBufLen + addedLength;
if (newlongStrBufCapacity > NS_HTML5TOKENIZER_BUFFER_CLIP_THRESHOLD) {
longStrBuf[0] = 0x2026;
longStrBuf[1] = 0xfffd;
longStrBufLen = 2;
newlongStrBufCapacity = 2 + addedLength;
}
if (newlongStrBufCapacity > longStrBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newlongStrBufCapacity);
nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBufLen);
longStrBuf.release();
longStrBuf = newBuf;
}
PRInt32 newStrBufCapacity = strBufLen + addedLength;
if (newStrBufCapacity > NS_HTML5TOKENIZER_BUFFER_CLIP_THRESHOLD) {
strBuf[0] = 0x2026;
strBuf[1] = 0xfffd;
strBufLen = 2;
newStrBufCapacity = 2 + addedLength;
}
if (newStrBufCapacity > strBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newStrBufCapacity);
nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBufLen);
strBuf.release();
strBuf = newBuf;
}
tokenHandler->ensureBufferSpace(addedLength);
}
PRInt32
nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos)
{

Просмотреть файл

@ -158,31 +158,83 @@ class nsHtml5Tokenizer
nsHtml5HtmlAttributes* emptyAttributes();
private:
void clearStrBufAndAppendCurrentC(PRUnichar c);
void clearStrBufAndAppendForceWrite(PRUnichar c);
void clearStrBufForNextState();
void appendStrBuf(PRUnichar c);
inline void clearStrBufAndAppendCurrentC(PRUnichar c)
{
strBuf[0] = c;
strBufLen = 1;
}
inline void clearStrBufAndAppendForceWrite(PRUnichar c)
{
strBuf[0] = c;
strBufLen = 1;
}
inline void clearStrBufForNextState()
{
strBufLen = 0;
}
inline void appendStrBuf(PRUnichar c)
{
strBuf[strBufLen++] = c;
}
protected:
nsString* strBufToString();
private:
void strBufToDoctypeName();
void emitStrBuf();
void clearLongStrBufForNextState();
void clearLongStrBuf();
void clearLongStrBufAndAppendCurrentC(PRUnichar c);
void clearLongStrBufAndAppendToComment(PRUnichar c);
void appendLongStrBuf(PRUnichar c);
inline void clearLongStrBufForNextState()
{
longStrBufLen = 0;
}
inline void clearLongStrBuf()
{
longStrBufLen = 0;
}
inline void clearLongStrBufAndAppendCurrentC(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
inline void clearLongStrBufAndAppendToComment(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
inline void appendLongStrBuf(PRUnichar c)
{
longStrBuf[longStrBufLen++] = c;
}
void appendSecondHyphenToBogusComment();
void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c);
void appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length);
void appendLongStrBuf(jArray<PRUnichar,PRInt32> arr);
void appendStrBufToLongStrBuf();
inline void appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length)
{
nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
longStrBufLen += length;
}
inline void appendStrBufToLongStrBuf()
{
appendLongStrBuf(strBuf, 0, strBufLen);
}
nsString* longStrBufToString();
void emitComment(PRInt32 provisionalHyphens, PRInt32 pos);
protected:
void flushChars(PRUnichar* buf, PRInt32 pos);
private:
void resetAttributes();
inline void resetAttributes()
{
attributes = nsnull;
}
void strBufToElementNameString();
PRInt32 emitCurrentTagToken(PRBool selfClosing, PRInt32 pos);
void attributeNameComplete();
@ -194,6 +246,7 @@ class nsHtml5Tokenizer
void start();
PRBool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
private:
void ensureBufferSpace(PRInt32 addedLength);
PRInt32 stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos);
void initDoctypeFields();
inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
@ -294,6 +347,7 @@ jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOSCRIPT_ARR = 0;
jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOFRAMES_ARR = 0;
#endif
#define NS_HTML5TOKENIZER_BUFFER_CLIP_THRESHOLD 8000
#define NS_HTML5TOKENIZER_DATA 0
#define NS_HTML5TOKENIZER_RCDATA 1
#define NS_HTML5TOKENIZER_SCRIPT_DATA 2

Просмотреть файл

@ -193,6 +193,22 @@ nsHtml5TreeBuilder::comment(PRUnichar* buf, PRInt32 start, PRInt32 length)
return;
}
void
nsHtml5TreeBuilder::ensureBufferSpace(PRInt32 addedLength)
{
PRInt32 newCharBufferCapacity = charBufferLen + addedLength;
if (newCharBufferCapacity > NS_HTML5TREE_BUILDER_BUFFER_FLUSH_THRESHOLD) {
flushCharacters();
newCharBufferCapacity = addedLength;
}
if (newCharBufferCapacity > charBuffer.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newCharBufferCapacity);
nsHtml5ArrayCopy::arraycopy(charBuffer, newBuf, charBufferLen);
charBuffer.release();
charBuffer = newBuf;
}
}
void
nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 length)
{
@ -3061,6 +3077,10 @@ nsHtml5TreeBuilder::clearLastListSlot()
void
nsHtml5TreeBuilder::push(nsHtml5StackNode* node)
{
if (currentPtr == NS_HTML5TREE_BUILDER_STACK_MAX_DEPTH) {
pop();
}
currentPtr++;
if (currentPtr == stack.length) {
jArray<nsHtml5StackNode*,PRInt32> newStack = jArray<nsHtml5StackNode*,PRInt32>(stack.length + 64);
@ -3075,6 +3095,10 @@ nsHtml5TreeBuilder::push(nsHtml5StackNode* node)
void
nsHtml5TreeBuilder::silentPush(nsHtml5StackNode* node)
{
if (currentPtr == NS_HTML5TREE_BUILDER_STACK_MAX_DEPTH) {
pop();
}
currentPtr++;
if (currentPtr == stack.length) {
jArray<nsHtml5StackNode*,PRInt32> newStack = jArray<nsHtml5StackNode*,PRInt32>(stack.length + 64);
@ -3722,20 +3746,6 @@ nsHtml5TreeBuilder::appendVoidFormToCurrent(nsHtml5HtmlAttributes* attributes)
elementPopped(kNameSpaceID_XHTML, nsHtml5Atoms::form, elt);
}
void
nsHtml5TreeBuilder::accumulateCharacter(PRUnichar c)
{
PRInt32 newLen = charBufferLen + 1;
if (newLen > charBuffer.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newLen);
nsHtml5ArrayCopy::arraycopy(charBuffer, newBuf, charBufferLen);
charBuffer.release();
charBuffer = newBuf;
}
charBuffer[charBufferLen] = c;
charBufferLen = newLen;
}
void
nsHtml5TreeBuilder::requestSuspension()
{

Просмотреть файл

@ -101,6 +101,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
void startTokenization(nsHtml5Tokenizer* self);
void doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, PRBool forceQuirks);
void comment(PRUnichar* buf, PRInt32 start, PRInt32 length);
void ensureBufferSpace(PRInt32 addedLength);
void characters(const PRUnichar* buf, PRInt32 start, PRInt32 length);
void eof();
void endTokenization();
@ -183,7 +184,11 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
void appendVoidFormToCurrent(nsHtml5HtmlAttributes* attributes);
protected:
void accumulateCharacters(const PRUnichar* buf, PRInt32 start, PRInt32 length);
void accumulateCharacter(PRUnichar c);
inline void accumulateCharacter(PRUnichar c)
{
charBuffer[charBufferLen++] = c;
}
void requestSuspension();
nsIContent** createElement(PRInt32 ns, nsIAtom* name, nsHtml5HtmlAttributes* attributes);
nsIContent** createElement(PRInt32 ns, nsIAtom* name, nsHtml5HtmlAttributes* attributes, nsIContent** form);
@ -244,6 +249,8 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
jArray<const char*,PRInt32> nsHtml5TreeBuilder::QUIRKY_PUBLIC_IDS = nsnull;
#endif
#define NS_HTML5TREE_BUILDER_BUFFER_FLUSH_THRESHOLD 4096
#define NS_HTML5TREE_BUILDER_STACK_MAX_DEPTH 200
#define NS_HTML5TREE_BUILDER_OTHER 0
#define NS_HTML5TREE_BUILDER_A 1
#define NS_HTML5TREE_BUILDER_BASE 2

Просмотреть файл

@ -605,16 +605,8 @@ nsHtml5TreeBuilder::elementPopped(PRInt32 aNamespace, nsIAtom* aName, nsIContent
void
nsHtml5TreeBuilder::accumulateCharacters(const PRUnichar* aBuf, PRInt32 aStart, PRInt32 aLength)
{
PRInt32 newFillLen = charBufferLen + aLength;
if (newFillLen > charBuffer.length) {
PRInt32 newAllocLength = newFillLen + (newFillLen >> 1);
jArray<PRUnichar,PRInt32> newBuf(newAllocLength);
memcpy(newBuf, charBuffer, sizeof(PRUnichar) * charBufferLen);
charBuffer.release();
charBuffer = newBuf;
}
memcpy(charBuffer + charBufferLen, aBuf + aStart, sizeof(PRUnichar) * aLength);
charBufferLen = newFillLen;
charBufferLen += aLength;
}
nsIContent**