diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java
index a8adb7d9986a..902e8af90102 100644
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -5860,7 +5860,7 @@ public class Tokenizer implements Locator {
private void emitReplacementCharacter(@NoLength char[] buf, int pos)
throws SAXException {
flushChars(buf, pos);
- tokenHandler.characters(Tokenizer.REPLACEMENT_CHARACTER, 0, 1);
+ tokenHandler.zeroOriginatingReplacementCharacter();
cstart = pos + 1;
}
diff --git a/parser/html/javasrc/TreeBuilder.java b/parser/html/javasrc/TreeBuilder.java
index 90246cc648b0..114251efdf6c 100644
--- a/parser/html/javasrc/TreeBuilder.java
+++ b/parser/html/javasrc/TreeBuilder.java
@@ -60,6 +60,11 @@ import org.xml.sax.SAXParseException;
public abstract class TreeBuilder implements TokenHandler,
TreeBuilderState {
+ /**
+ * Array version of U+FFFD.
+ */
+ private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+
// Start dispatch groups
final static int OTHER = 0;
@@ -839,8 +844,11 @@ public abstract class TreeBuilder implements TokenHandler,
needToDropLF = false;
}
+ if (inForeign) {
+ accumulateCharacters(buf, start, length);
+ return;
+ }
// optimize the most common case
- // XXX should there be an IN FOREIGN check here?
switch (mode) {
case IN_BODY:
case IN_CELL:
@@ -1206,6 +1214,16 @@ public abstract class TreeBuilder implements TokenHandler,
}
}
+ /**
+ * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter()
+ */
+ @Override public void zeroOriginatingReplacementCharacter()
+ throws SAXException {
+ if (inForeign || mode == TEXT) {
+ characters(REPLACEMENT_CHARACTER, 0, 1);
+ }
+ }
+
public final void eof() throws SAXException {
flushCharacters();
if (inForeign) {
diff --git a/parser/html/nsHtml5Tokenizer.cpp b/parser/html/nsHtml5Tokenizer.cpp
index 6acc66d16da2..8026f9e3a65c 100644
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -3339,7 +3339,7 @@ void
nsHtml5Tokenizer::emitReplacementCharacter(PRUnichar* buf, PRInt32 pos)
{
flushChars(buf, pos);
- tokenHandler->characters(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, 0, 1);
+ tokenHandler->zeroOriginatingReplacementCharacter();
cstart = pos + 1;
}
diff --git a/parser/html/nsHtml5TreeBuilder.cpp b/parser/html/nsHtml5TreeBuilder.cpp
index 0a876b5a0fb4..6eb98f4fe43c 100644
--- a/parser/html/nsHtml5TreeBuilder.cpp
+++ b/parser/html/nsHtml5TreeBuilder.cpp
@@ -190,6 +190,10 @@ nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 leng
}
needToDropLF = PR_FALSE;
}
+ if (inForeign) {
+ accumulateCharacters(buf, start, length);
+ return;
+ }
switch(mode) {
case NS_HTML5TREE_BUILDER_IN_BODY:
case NS_HTML5TREE_BUILDER_IN_CELL:
@@ -413,6 +417,14 @@ nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 leng
}
}
+void
+nsHtml5TreeBuilder::zeroOriginatingReplacementCharacter()
+{
+ if (inForeign || mode == NS_HTML5TREE_BUILDER_TEXT) {
+ characters(REPLACEMENT_CHARACTER, 0, 1);
+ }
+}
+
void
nsHtml5TreeBuilder::eof()
{
diff --git a/parser/html/nsHtml5TreeBuilder.h b/parser/html/nsHtml5TreeBuilder.h
index cf678753b0d9..de38321d54c0 100644
--- a/parser/html/nsHtml5TreeBuilder.h
+++ b/parser/html/nsHtml5TreeBuilder.h
@@ -70,6 +70,7 @@ class nsHtml5Portability;
class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
{
private:
+ static PRUnichar REPLACEMENT_CHARACTER[];
static jArray QUIRKY_PUBLIC_IDS;
PRInt32 mode;
PRInt32 originalMode;
@@ -100,6 +101,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
void doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, PRBool forceQuirks);
void comment(PRUnichar* buf, PRInt32 start, PRInt32 length);
void characters(const PRUnichar* buf, PRInt32 start, PRInt32 length);
+ void zeroOriginatingReplacementCharacter();
void eof();
void endTokenization();
void startTag(nsHtml5ElementName* elementName, nsHtml5HtmlAttributes* attributes, PRBool selfClosing);
@@ -239,6 +241,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
};
#ifdef nsHtml5TreeBuilder_cpp__
+PRUnichar nsHtml5TreeBuilder::REPLACEMENT_CHARACTER[] = { 0xfffd };
jArray nsHtml5TreeBuilder::QUIRKY_PUBLIC_IDS = nsnull;
#endif