diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java index a8adb7d9986a..902e8af90102 100644 --- a/parser/html/javasrc/Tokenizer.java +++ b/parser/html/javasrc/Tokenizer.java @@ -5860,7 +5860,7 @@ public class Tokenizer implements Locator { private void emitReplacementCharacter(@NoLength char[] buf, int pos) throws SAXException { flushChars(buf, pos); - tokenHandler.characters(Tokenizer.REPLACEMENT_CHARACTER, 0, 1); + tokenHandler.zeroOriginatingReplacementCharacter(); cstart = pos + 1; } diff --git a/parser/html/javasrc/TreeBuilder.java b/parser/html/javasrc/TreeBuilder.java index 90246cc648b0..114251efdf6c 100644 --- a/parser/html/javasrc/TreeBuilder.java +++ b/parser/html/javasrc/TreeBuilder.java @@ -60,6 +60,11 @@ import org.xml.sax.SAXParseException; public abstract class TreeBuilder implements TokenHandler, TreeBuilderState { + /** + * Array version of U+FFFD. + */ + private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' }; + // Start dispatch groups final static int OTHER = 0; @@ -839,8 +844,11 @@ public abstract class TreeBuilder implements TokenHandler, needToDropLF = false; } + if (inForeign) { + accumulateCharacters(buf, start, length); + return; + } // optimize the most common case - // XXX should there be an IN FOREIGN check here? switch (mode) { case IN_BODY: case IN_CELL: @@ -1206,6 +1214,16 @@ public abstract class TreeBuilder implements TokenHandler, } } + /** + * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter() + */ + @Override public void zeroOriginatingReplacementCharacter() + throws SAXException { + if (inForeign || mode == TEXT) { + characters(REPLACEMENT_CHARACTER, 0, 1); + } + } + public final void eof() throws SAXException { flushCharacters(); if (inForeign) { diff --git a/parser/html/nsHtml5Tokenizer.cpp b/parser/html/nsHtml5Tokenizer.cpp index 6acc66d16da2..8026f9e3a65c 100644 --- a/parser/html/nsHtml5Tokenizer.cpp +++ b/parser/html/nsHtml5Tokenizer.cpp @@ -3339,7 +3339,7 @@ void nsHtml5Tokenizer::emitReplacementCharacter(PRUnichar* buf, PRInt32 pos) { flushChars(buf, pos); - tokenHandler->characters(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, 0, 1); + tokenHandler->zeroOriginatingReplacementCharacter(); cstart = pos + 1; } diff --git a/parser/html/nsHtml5TreeBuilder.cpp b/parser/html/nsHtml5TreeBuilder.cpp index 0a876b5a0fb4..6eb98f4fe43c 100644 --- a/parser/html/nsHtml5TreeBuilder.cpp +++ b/parser/html/nsHtml5TreeBuilder.cpp @@ -190,6 +190,10 @@ nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 leng } needToDropLF = PR_FALSE; } + if (inForeign) { + accumulateCharacters(buf, start, length); + return; + } switch(mode) { case NS_HTML5TREE_BUILDER_IN_BODY: case NS_HTML5TREE_BUILDER_IN_CELL: @@ -413,6 +417,14 @@ nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 leng } } +void +nsHtml5TreeBuilder::zeroOriginatingReplacementCharacter() +{ + if (inForeign || mode == NS_HTML5TREE_BUILDER_TEXT) { + characters(REPLACEMENT_CHARACTER, 0, 1); + } +} + void nsHtml5TreeBuilder::eof() { diff --git a/parser/html/nsHtml5TreeBuilder.h b/parser/html/nsHtml5TreeBuilder.h index cf678753b0d9..de38321d54c0 100644 --- a/parser/html/nsHtml5TreeBuilder.h +++ b/parser/html/nsHtml5TreeBuilder.h @@ -70,6 +70,7 @@ class nsHtml5Portability; class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState { private: + static PRUnichar REPLACEMENT_CHARACTER[]; static jArray QUIRKY_PUBLIC_IDS; PRInt32 mode; PRInt32 originalMode; @@ -100,6 +101,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState void doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, PRBool forceQuirks); void comment(PRUnichar* buf, PRInt32 start, PRInt32 length); void characters(const PRUnichar* buf, PRInt32 start, PRInt32 length); + void zeroOriginatingReplacementCharacter(); void eof(); void endTokenization(); void startTag(nsHtml5ElementName* elementName, nsHtml5HtmlAttributes* attributes, PRBool selfClosing); @@ -239,6 +241,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState }; #ifdef nsHtml5TreeBuilder_cpp__ +PRUnichar nsHtml5TreeBuilder::REPLACEMENT_CHARACTER[] = { 0xfffd }; jArray nsHtml5TreeBuilder::QUIRKY_PUBLIC_IDS = nsnull; #endif