From f404ddb2db688de0da4252124527ae82443612a7 Mon Sep 17 00:00:00 2001
From: "mrbkap%gmail.com" <mrbkap%gmail.com>
Date: Wed, 1 Dec 2004 04:37:36 +0000
Subject: [PATCH] bug 88952: Remove trailing content. This checkin fixes a
 whole slew of textarea bugs. r=jst sr=rbs

---
 parser/htmlparser/public/nsHTMLTokens.h   |  19 +-
 parser/htmlparser/public/nsIParser.h      |  14 +-
 parser/htmlparser/public/nsITokenizer.h   |   4 +-
 parser/htmlparser/src/CNavDTD.cpp         | 129 +-----
 parser/htmlparser/src/CParserContext.cpp  |  30 +-
 parser/htmlparser/src/CParserContext.h    |   4 +-
 parser/htmlparser/src/nsElementTable.cpp  |   7 +-
 parser/htmlparser/src/nsExpatDriver.cpp   |   6 -
 parser/htmlparser/src/nsHTMLTokenizer.cpp | 193 ++++-----
 parser/htmlparser/src/nsHTMLTokenizer.h   |   9 +-
 parser/htmlparser/src/nsHTMLTokens.cpp    | 494 ++++++++++++++--------
 parser/htmlparser/src/nsLoggingSink.h     |   4 +-
 parser/htmlparser/src/nsParser.cpp        |  20 +-
 13 files changed, 490 insertions(+), 443 deletions(-)

diff --git a/parser/htmlparser/public/nsHTMLTokens.h b/parser/htmlparser/public/nsHTMLTokens.h
index 29b4cde26c6..e741352687d 100644
--- a/parser/htmlparser/public/nsHTMLTokens.h
+++ b/parser/htmlparser/public/nsHTMLTokens.h
@@ -153,7 +153,6 @@ public:
   }
 
   nsString mTextValue;
-  nsString mTrailingContent;
 protected:
   eContainerInfo mContainerInfo;
   PRPackedBool mEmpty;
@@ -282,9 +281,6 @@ public:
   CTextToken();
   CTextToken(const nsAString& aString);
   virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
-  nsresult ConsumeUntil(PRUnichar aChar, PRBool aIgnoreComments,
-                        nsScanner& aScanner, const nsAString& aEndTagName,
-                        PRInt32 aFlag, PRBool& aFlushTokens);
   virtual PRInt32 GetTokenType(void);
   virtual PRInt32 GetTextLength(void);
   virtual void CopyTo(nsAString& aStr);
@@ -293,6 +289,21 @@ public:
                     nsScannerIterator& aEnd);
   virtual void Bind(const nsAString& aStr);
 
+  nsresult ConsumeCharacterData(PRUnichar aChar,
+                                PRBool aConservativeConsume,
+                                PRBool aIgnoreComments,
+                                nsScanner& aScanner,
+                                const nsAString& aEndTagName,
+                                PRInt32 aFlag,
+                                PRBool& aFlushTokens);
+
+  nsresult ConsumeParsedCharacterData(PRUnichar aChar,
+                                      PRBool aConservativeConsume,
+                                      nsScanner& aScanner,
+                                      const nsAString& aEndTagName,
+                                      PRInt32 aFlag,
+                                      PRBool& aFound);
+
 protected:
   nsScannerSubstring mTextValue;
 };
diff --git a/parser/htmlparser/public/nsIParser.h b/parser/htmlparser/public/nsIParser.h
index 9355c134ce9..f889ff88cde 100644
--- a/parser/htmlparser/public/nsIParser.h
+++ b/parser/htmlparser/public/nsIParser.h
@@ -310,7 +310,8 @@ class nsIParser : public nsISupports {
 #define NS_ERROR_HTMLPARSER_STOPPARSING                    NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1015)
 #define NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL      NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1016)
 #define NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP               NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1017)
-
+#define NS_ERROR_HTMLPARSER_FAKE_ENDTAG                    NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1018)
+#define NS_ERROR_HTMLPARSER_INVALID_COMMENT                NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1019)
 
 #define NS_ERROR_HTMLPARSER_CONTINUE              NS_OK
 
@@ -325,6 +326,8 @@ const PRUint32  kInvalidParserContext = NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT
 const PRUint32  kBlocked          = NS_ERROR_HTMLPARSER_BLOCK;
 const PRUint32  kBadStringLiteral = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
 const PRUint32  kHierarchyTooDeep = NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP;
+const PRUint32  kFakeEndTag       = NS_ERROR_HTMLPARSER_FAKE_ENDTAG;
+const PRUint32  kNotAComment      = NS_ERROR_HTMLPARSER_INVALID_COMMENT;
 
 const PRUnichar  kNewLine          = '\n';
 const PRUnichar  kCR               = '\r';
@@ -381,9 +384,10 @@ const PRUnichar kNullCh           = '\0';
 #define NS_IPARSER_FLAG_VIEW_NORMAL          0x00000020
 #define NS_IPARSER_FLAG_VIEW_SOURCE          0x00000040
 #define NS_IPARSER_FLAG_VIEW_ERRORS          0x00000080
-#define NS_IPARSER_FLAG_PRESERVE_CONTENT     0x00000100
-#define NS_IPARSER_FLAG_PLAIN_TEXT           0x00000200
-#define NS_IPARSER_FLAG_XML                  0x00000400
-#define NS_IPARSER_FLAG_HTML                 0x00000800
+#define NS_IPARSER_FLAG_PLAIN_TEXT           0x00000100
+#define NS_IPARSER_FLAG_XML                  0x00000200
+#define NS_IPARSER_FLAG_HTML                 0x00000400
+#define NS_IPARSER_FLAG_SCRIPT_ENABLED       0x00000800
+#define NS_IPARSER_FLAG_FRAMES_ENABLED       0x00001000
 
 #endif 
diff --git a/parser/htmlparser/public/nsITokenizer.h b/parser/htmlparser/public/nsITokenizer.h
index 11b24377025..3027b43d925 100644
--- a/parser/htmlparser/public/nsITokenizer.h
+++ b/parser/htmlparser/public/nsITokenizer.h
@@ -86,7 +86,6 @@ public:
   NS_IMETHOD_(PRInt32)           GetCount(void)=0;
   NS_IMETHOD_(nsTokenAllocator*) GetTokenAllocator(void)=0;
   NS_IMETHOD_(void)              PrependTokens(nsDeque& aDeque)=0;
-  NS_IMETHOD                     CopyState(nsITokenizer* aTokenizer) = 0;
   
 };
 
@@ -101,8 +100,7 @@ public:
   NS_IMETHOD_(CToken*)           GetTokenAt(PRInt32 anIndex);\
   NS_IMETHOD_(PRInt32)           GetCount(void);\
   NS_IMETHOD_(nsTokenAllocator*) GetTokenAllocator(void);\
-  NS_IMETHOD_(void)              PrependTokens(nsDeque& aDeque);\
-  NS_IMETHOD                     CopyState(nsITokenizer* aTokenizer);
+  NS_IMETHOD_(void)              PrependTokens(nsDeque& aDeque);
 
 
 #endif
diff --git a/parser/htmlparser/src/CNavDTD.cpp b/parser/htmlparser/src/CNavDTD.cpp
index 1172aff20be..574b3d6234e 100644
--- a/parser/htmlparser/src/CNavDTD.cpp
+++ b/parser/htmlparser/src/CNavDTD.cpp
@@ -116,12 +116,10 @@ static char gShowCRC;
 #define NS_DTD_FLAG_HAD_BODY               0x00000010
 #define NS_DTD_FLAG_HAD_FRAMESET           0x00000020
 #define NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE  0x00000040
-#define NS_DTD_FLAG_SCRIPT_ENABLED         0x00000100
-#define NS_DTD_FLAG_FRAMES_ENABLED         0x00000200
-#define NS_DTD_FLAG_ALTERNATE_CONTENT      0x00000400 // NOFRAMES, NOSCRIPT 
-#define NS_DTD_FLAG_MISPLACED_CONTENT      0x00000800
-#define NS_DTD_FLAG_IN_MISPLACED_CONTENT   0x00001000
-#define NS_DTD_FLAG_STOP_PARSING           0x00002000
+#define NS_DTD_FLAG_ALTERNATE_CONTENT      0x00000080 // NOFRAMES, NOSCRIPT 
+#define NS_DTD_FLAG_MISPLACED_CONTENT      0x00000100
+#define NS_DTD_FLAG_IN_MISPLACED_CONTENT   0x00000200
+#define NS_DTD_FLAG_STOP_PARSING           0x00000400
 
 /**
  *  This method gets called as part of our COM-like interfaces.
@@ -384,15 +382,15 @@ nsresult CNavDTD::WillBuildModel(const CParserContext& aParserContext,
 #endif    
 
    if(mSink) {
-      PRBool enabled;
+      PRBool enabled = PR_TRUE;
       mSink->IsEnabled(eHTMLTag_frameset, &enabled);
       if(enabled) {
-        mFlags |= NS_DTD_FLAG_FRAMES_ENABLED;
+        mFlags |= NS_IPARSER_FLAG_FRAMES_ENABLED;
       }
       
       mSink->IsEnabled(eHTMLTag_script, &enabled);
       if(enabled) {
-        mFlags |= NS_DTD_FLAG_SCRIPT_ENABLED;
+        mFlags |= NS_IPARSER_FLAG_SCRIPT_ENABLED;
       }
     }
     
@@ -439,7 +437,7 @@ nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsIToke
         }
 
         // always open a body if frames are disabled....
-        if(!(mFlags & NS_DTD_FLAG_FRAMES_ENABLED)) {
+        if(!(mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) {
           theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body,NS_LITERAL_STRING("body")));
           mTokenizer->PushTokenFront(theToken);
         }
@@ -522,37 +520,6 @@ CNavDTD::BuildNeglectedTarget(eHTMLTags aTarget,
   NS_ASSERTION(mTokenAllocator, "unable to create tokens without an allocator.");
   if (!mTokenizer || !mTokenAllocator)
     return NS_OK;
-  if (eHTMLTag_unknown != mSkipTarget && eHTMLTag_title == aTarget) {
-    PRInt32 size = mSkippedContent.GetSize();
-    // Note: The first location of the skipped content 
-    // deque contains the opened-skip-target. Do not include
-    // that when guessing title contents. The term "guessing" 
-    // is used because the document did not contain an end title
-    // and hence it's almost impossible to know what markup
-    // should belong in the title. The assumption used here is that
-    // if the markup is anything other than "text", or "entity" or,
-    // "whitespace" then it's least likely to belong in the title.
-    PRInt32 index;
-    for (index = 1; index < size; index++) {
-      CHTMLToken* token = 
-        NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.ObjectAt(index));
-      NS_ASSERTION(token, "there is a null token in the skipped content list!");
-      eHTMLTokenTypes type = eHTMLTokenTypes(token->GetTokenType());
-      if (eToken_whitespace != type && 
-          eToken_newline != type    && 
-          eToken_text != type       && 
-          eToken_entity != type     &&
-          eToken_attribute != type) {
-        // Now pop the tokens that do not belong ( just a guess work )
-        // in the title and push them into the tokens queue.
-        while (size != index++) {
-          token = NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.Pop()); 
-          mTokenizer->PushTokenFront(token);
-        }
-        break;
-      }
-    }
-  }
   CHTMLToken* target = 
       NS_STATIC_CAST(CHTMLToken*, mTokenAllocator->CreateTokenOfType(aType, aTarget));
   mTokenizer->PushTokenFront(target);
@@ -846,30 +813,6 @@ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){
         return result;
       }
     }
-    else if(mFlags & NS_DTD_FLAG_ALTERNATE_CONTENT) {
-      if(theTag != mBodyContext->Last() || theType!=eToken_end) {
-        // attribute source is a part of start token.
-        if(theType!=eToken_attribute) {
-          aToken->AppendSourceTo(mScratch);
-        }
-        IF_FREE(aToken, mTokenAllocator);
-        return result;
-      }
-      else {
-        // If you're here then we have either seen a /noscript,
-        // or /noframes, or /iframe. After handling the text token 
-        // intentionally fall thro' to handle the current end token.
-        CTextToken theTextToken(mScratch);        
-        result=HandleStartToken(&theTextToken);
-        
-        if(NS_FAILED(result)) {
-          return result;
-        }
-
-        mScratch.Truncate();
-        mScratch.SetCapacity(0);
-      }
-    }
     else if(mFlags & NS_DTD_FLAG_MISPLACED_CONTENT) {
       // Included TD & TH to fix Bug# 20797
       static eHTMLTags gLegalElements[]={eHTMLTag_table,eHTMLTag_thead,eHTMLTag_tbody,
@@ -949,7 +892,9 @@ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){
           }
         default:
           if(!gHTMLElements[eHTMLTag_html].SectionContains(theTag,PR_FALSE)) {
-            if(!(mFlags & (NS_DTD_FLAG_HAD_BODY | NS_DTD_FLAG_HAD_FRAMESET))) {
+            if(!(mFlags & (NS_DTD_FLAG_HAD_BODY |
+                           NS_DTD_FLAG_HAD_FRAMESET |
+                           NS_DTD_FLAG_ALTERNATE_CONTENT))) {
 
               //For bug examples from this code, see bugs: 18928, 20989.
 
@@ -1075,28 +1020,6 @@ nsresult CNavDTD::DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag){
         }//if
       }
       break;
-
-    case eHTMLTag_xmp:
-      //grab the skipped content and dump it out as text...
-      {        
-        STOP_TIMER()
-        MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::DidHandleStartTag(), this=%p\n", this));
-        nsAutoString theString;
-        PRInt32 lineNo = 0;
-        
-        result = CollectSkippedContent(aChildTag, theString, lineNo);
-        NS_ENSURE_SUCCESS(result, result);
-
-        if(0<theString.Length()) {
-          CTextToken *theToken=NS_STATIC_CAST(CTextToken*,mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,theString));
-          nsCParserNode theNode(theToken, mTokenAllocator);
-          result=mSink->AddLeaf(theNode); //when the node get's destructed, so does the new token
-        }
-        MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::DidHandleStartTag(), this=%p\n", this));
-        START_TIMER()
-      }
-      break;
-
 #ifdef DEBUG
     case eHTMLTag_counter:
       {
@@ -2485,30 +2408,10 @@ CNavDTD::CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLine
   PRInt32 tagCount = mSkippedContent.GetSize();
   for (i = 0; i< tagCount; ++i){
     CHTMLToken* theNextToken = (CHTMLToken*)mSkippedContent.PopFront();
-      
     if (theNextToken) {
-      eHTMLTokenTypes theTokenType = (eHTMLTokenTypes)theNextToken->GetTokenType();
-
-      // Dont worry about attributes here because it's already stored in 
-      // the start token as mTrailing content and will get appended in 
-      // start token's GetSource();
-      if (eToken_attribute!=theTokenType) {
-        if ((eToken_entity==theTokenType) &&
-           ((eHTMLTag_textarea == aTag) || (eHTMLTag_title == aTag))) {
-            mScratch.Truncate();
-            ((CEntityToken*)theNextToken)->TranslateToUnicodeStr(mScratch);
-            if (!mScratch.IsEmpty()){
-              aContent.Append(mScratch);
-            }
-            else {
-              // We thought it was an entity but it is not! - bug 79492
-              aContent.Append(PRUnichar('&'));
-              aContent.Append(theNextToken->GetStringValue());
-            }
-          }
-        else theNextToken->AppendSourceTo(aContent);
-      }
+      theNextToken->AppendSourceTo(aContent);
     }
+
     IF_FREE(theNextToken, mTokenAllocator);
   }
   
@@ -3433,8 +3336,7 @@ CNavDTD::OpenContainer(const nsCParserNode *aNode,
       // If the script is disabled noscript should not be
       // in the content model until the layout can somehow
       // turn noscript's display property to block <-- bug 67899
-      if(mFlags & NS_DTD_FLAG_SCRIPT_ENABLED) {
-        mScratch.Truncate();
+      if(mFlags & NS_IPARSER_FLAG_SCRIPT_ENABLED) {
         mFlags |= NS_DTD_FLAG_ALTERNATE_CONTENT;
       }
       break;
@@ -3442,8 +3344,7 @@ CNavDTD::OpenContainer(const nsCParserNode *aNode,
     case eHTMLTag_iframe: // Bug 84491 
     case eHTMLTag_noframes:
       done=PR_FALSE;
-      if(mFlags & NS_DTD_FLAG_FRAMES_ENABLED) {
-        mScratch.Truncate();
+      if(mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED) {
         mFlags |= NS_DTD_FLAG_ALTERNATE_CONTENT;
       }
       break;
diff --git a/parser/htmlparser/src/CParserContext.cpp b/parser/htmlparser/src/CParserContext.cpp
index 36f0b58181a..5197a6488bb 100644
--- a/parser/htmlparser/src/CParserContext.cpp
+++ b/parser/htmlparser/src/CParserContext.cpp
@@ -40,6 +40,7 @@
 #include "CParserContext.h"
 #include "nsToken.h"
 #include "prenv.h"  
+#include "nsIHTMLContentSink.h"
 #include "nsHTMLTokenizer.h"
 #include "nsExpatDriver.h"
 
@@ -169,16 +170,33 @@ void CParserContext::SetMimeType(const nsACString& aMimeType){
 }
 
 nsresult
-CParserContext::GetTokenizer(PRInt32 aType, nsITokenizer*& aTokenizer) {
+CParserContext::GetTokenizer(PRInt32 aType,
+                             nsIContentSink* aSink,
+                             nsITokenizer*& aTokenizer) {
   nsresult result = NS_OK;
   
   if(!mTokenizer) {
     if (aType == NS_IPARSER_FLAG_HTML || mParserCommand == eViewSource) {
-      result = NS_NewHTMLTokenizer(&mTokenizer,mDTDMode,mDocType,mParserCommand);
-      // Propagate tokenizer state so that information is preserved
-      // between document.write. This fixes bug 99467
-      if (mTokenizer && mPrevContext)
-        mTokenizer->CopyState(mPrevContext->mTokenizer);
+      nsCOMPtr<nsIHTMLContentSink> theSink = do_QueryInterface(aSink);
+      PRUint16 theFlags = 0;
+
+      if (theSink) {
+        // XXX This code is repeated both here and in CNavDTD. Can the two
+        // callsites be combined?
+        PRBool enabled;
+        theSink->IsEnabled(eHTMLTag_frameset, &enabled);
+        if(enabled) {
+          theFlags |= NS_IPARSER_FLAG_FRAMES_ENABLED;
+        }
+        
+        theSink->IsEnabled(eHTMLTag_script, &enabled);
+        if(enabled) {
+          theFlags |= NS_IPARSER_FLAG_SCRIPT_ENABLED;
+        }
+      }
+
+      result = NS_NewHTMLTokenizer(&mTokenizer,mDTDMode,mDocType,
+                                   mParserCommand,theFlags);
     }
     else if (aType == NS_IPARSER_FLAG_XML)
     {
diff --git a/parser/htmlparser/src/CParserContext.h b/parser/htmlparser/src/CParserContext.h
index 695a8603d45..f73531b7ce3 100644
--- a/parser/htmlparser/src/CParserContext.h
+++ b/parser/htmlparser/src/CParserContext.h
@@ -76,7 +76,9 @@ public:
     CParserContext( const CParserContext& aContext);
     ~CParserContext();
 
-    nsresult GetTokenizer(PRInt32 aType, nsITokenizer*& aTokenizer);
+    nsresult GetTokenizer(PRInt32 aType,
+                          nsIContentSink* aSink,
+                          nsITokenizer*& aTokenizer);
     void  SetMimeType(const nsACString& aMimeType);
 
     nsCOMPtr<nsIRequest> mRequest; // provided by necko to differnciate different input streams
diff --git a/parser/htmlparser/src/nsElementTable.cpp b/parser/htmlparser/src/nsElementTable.cpp
index d32ffc3e0b4..763e3111349 100644
--- a/parser/htmlparser/src/nsElementTable.cpp
+++ b/parser/htmlparser/src/nsElementTable.cpp
@@ -1228,9 +1228,9 @@ void InitializeElementTable(void) {
       /*req-parent excl-parent*/          eHTMLTag_unknown,eHTMLTag_unknown,
 	    /*rootnodes,endrootnodes*/          &gRootTags,&gRootTags,	
       /*autoclose starttags and endtags*/ 0,0,0,0,
-      /*parent,incl,exclgroups*/          kInlineEntity|kPreformatted, kNone, kNone,	
+      /*parent,incl,exclgroups*/          kInlineEntity|kPreformatted, kCDATA, kNone,	
       /*special props, prop-range*/       kNone,kDefaultPropRange,
-      /*special parents,kids,skip*/       0,0,eHTMLTag_xmp);
+      /*special parents,kids,skip*/       0,0,eHTMLTag_unknown);
 
     Initialize( 
       /*tag*/                             eHTMLTag_text,
@@ -2270,7 +2270,8 @@ PRBool nsHTMLElement::CanContain(eHTMLTags aChild,nsDTDMode aMode) const{
     }
 
     if(nsHTMLElement::IsTextTag(aChild)) {
-      if(nsHTMLElement::IsInlineParent(mTagID)){
+      // Allow <xmp> to contain text.
+      if(nsHTMLElement::IsInlineParent(mTagID) || CanContainType(kCDATA)){
         return PR_TRUE;
       }
     }
diff --git a/parser/htmlparser/src/nsExpatDriver.cpp b/parser/htmlparser/src/nsExpatDriver.cpp
index a559dca2de6..0c247bbad21 100644
--- a/parser/htmlparser/src/nsExpatDriver.cpp
+++ b/parser/htmlparser/src/nsExpatDriver.cpp
@@ -1168,12 +1168,6 @@ nsExpatDriver::PrependTokens(nsDeque& aDeque)
 
 }
 
-NS_IMETHODIMP
-nsExpatDriver::CopyState(nsITokenizer* aTokenizer)
-{
-  return NS_OK;
-}
-
 NS_IMETHODIMP
 nsExpatDriver::HandleToken(CToken* aToken,nsIParser* aParser)
 {
diff --git a/parser/htmlparser/src/nsHTMLTokenizer.cpp b/parser/htmlparser/src/nsHTMLTokenizer.cpp
index fcab7d35c76..157172b826f 100644
--- a/parser/htmlparser/src/nsHTMLTokenizer.cpp
+++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp
@@ -103,13 +103,14 @@ nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr)
 nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult,
                                          PRInt32 aFlag,
                                          eParserDocType aDocType, 
-                                         eParserCommands aCommand) 
+                                         eParserCommands aCommand,
+                                         PRInt32 aFlags) 
 {
   NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr");
   if (nsnull == aInstancePtrResult) {
     return NS_ERROR_NULL_POINTER;
   }
-  nsHTMLTokenizer* it = new nsHTMLTokenizer(aFlag,aDocType,aCommand);
+  nsHTMLTokenizer* it = new nsHTMLTokenizer(aFlag,aDocType,aCommand,aFlags);
   if (nsnull == it) {
     return NS_ERROR_OUT_OF_MEMORY;
   }
@@ -128,23 +129,24 @@ NS_IMPL_RELEASE(nsHTMLTokenizer)
  *  @param   
  *  @return  
  */
- nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode,
-                                  eParserDocType aDocType,
-                                  eParserCommands aCommand) :
-  nsITokenizer(), mTokenDeque(0)
+nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode,
+                                 eParserDocType aDocType,
+                                 eParserCommands aCommand,
+                                 PRUint16 aFlags) :
+  nsITokenizer(), mTokenDeque(0), mFlags(aFlags)
 {
   if (aParseMode==eDTDMode_full_standards ||
       aParseMode==eDTDMode_almost_standards) {
-    mFlags = NS_IPARSER_FLAG_STRICT_MODE;
+    mFlags |= NS_IPARSER_FLAG_STRICT_MODE;
   }
   else if (aParseMode==eDTDMode_quirks)  {
-    mFlags = NS_IPARSER_FLAG_QUIRKS_MODE;
+    mFlags |= NS_IPARSER_FLAG_QUIRKS_MODE;
   }
   else if (aParseMode==eDTDMode_autodetect) {
-    mFlags = NS_IPARSER_FLAG_AUTO_DETECT_MODE;
+    mFlags |= NS_IPARSER_FLAG_AUTO_DETECT_MODE;
   }
   else {
-    mFlags = NS_IPARSER_FLAG_UNKNOWN_MODE;
+    mFlags |= NS_IPARSER_FLAG_UNKNOWN_MODE;
   }
 
   if (aDocType==ePlainText) {
@@ -167,7 +169,6 @@ NS_IMPL_RELEASE(nsHTMLTokenizer)
 
   mTokenAllocator = nsnull;
   mTokenScanPos = 0;
-  mPreserveTarget = eHTMLTag_unknown;
 }
 
 
@@ -311,19 +312,6 @@ void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){
 
 }
 
-NS_IMETHODIMP
-nsHTMLTokenizer::CopyState(nsITokenizer* aTokenizer)
-{
-  if (aTokenizer) {
-    mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT;
-    mPreserveTarget =
-      NS_STATIC_CAST(nsHTMLTokenizer*, aTokenizer)->mPreserveTarget;
-    if (mPreserveTarget != eHTMLTag_unknown)
-      mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT;
-  }
-  return NS_OK;
-}
-
 /**
  * This is a utilty method for ScanDocStructure, which finds a given
  * tag in the stack.
@@ -649,10 +637,11 @@ nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar,
         const nsSubstring& key=theToken->GetKey();
         const nsAString& text=theToken->GetValue();
 
-         // support XML like syntax to fix bugs like 44186
         if(!key.IsEmpty() && kForwardSlash==key.First() && text.IsEmpty()) {
-          isUsableAttr = PRBool(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE); // Fix bug 103095
-          aToken->SetEmpty(isUsableAttr);
+          if(!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+            // We only care about these in view-source.
+            isUsableAttr = PR_FALSE;
+          }
         }
         if(isUsableAttr) {
           ++theAttrCount;
@@ -721,10 +710,6 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
   aToken=theAllocator->CreateTokenOfType(eToken_start,eHTMLTag_unknown);
   
   if(aToken) {
-    // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204.
-    nsScannerIterator origin;
-    aScanner.CurrentPosition(origin);
-
     result= aToken->Consume(aChar,aScanner,mFlags);     //tell new token to finish consuming text...    
 
     if(NS_SUCCEEDED(result)) {
@@ -757,68 +742,81 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
        */
       if(NS_SUCCEEDED(result) && !(mFlags & NS_IPARSER_FLAG_XML)) {
         CStartToken* theStartToken = NS_STATIC_CAST(CStartToken*,aToken);
-        //XXX - Find a better soution to record content
-        if(!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) &&
-           (theTag == eHTMLTag_textarea  ||
-            theTag == eHTMLTag_xmp       || 
-            theTag == eHTMLTag_noscript  ||
-            theTag == eHTMLTag_noframes)) {
-          NS_ASSERTION(mPreserveTarget == eHTMLTag_unknown,
-                       "mPreserveTarget set but not preserving content?");
-          mPreserveTarget = theTag;
-          mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT;
+
+        PRBool isCDATA = gHTMLElements[theTag].CanContainType(kCDATA);
+        PRBool isPCDATA = eHTMLTag_textarea == theTag ||
+                          eHTMLTag_title    == theTag;
+
+        if ((eHTMLTag_iframe == theTag   && (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
+            (eHTMLTag_noframes == theTag && (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
+            (eHTMLTag_noscript == theTag && (mFlags & NS_IPARSER_FLAG_SCRIPT_ENABLED))) {
+          isCDATA = PR_TRUE;
         }
-          
-        if (mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) 
-          PreserveToken(theStartToken, aScanner, origin);
-        
-        //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) {
-        if(gHTMLElements[theTag].CanContainType(kCDATA)) {
+
+
+        if (isCDATA || isPCDATA) {
+          PRBool done = PR_FALSE;
           nsDependentString endTagName(nsHTMLTags::GetStringValue(theTag)); 
 
-          CToken*     text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text);
-          CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text);
+          CToken* text =
+              theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text);
+          CTextToken* textToken = NS_STATIC_CAST(CTextToken*,text);
 
-          //tell new token to finish consuming text...    
-          result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,
-                                         aScanner,
-                                         endTagName,
-                                         mFlags,
-                                         aFlushTokens);
-          
-          // Fix bug 44186
-          // Support XML like syntax, i.e., <script src="external.js"/> == <script src="external.js"></script>
-          // Note: if aFlushTokens is TRUE then we have seen an </script>
-          // We do NOT want to output the end token if we didn't see a
-          // </script> and have a preserve target.  If that happens, then we'd
-          // be messing up the text inside the <textarea> or <xmp> or whatever
-          // it is.
-          if((!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) &&
-              !theStartToken->IsEmpty()) || aFlushTokens) {
-            // Setting this would make cases like <script/>d.w("text");</script> work.
-            theStartToken->SetEmpty(PR_FALSE);
-            // do this up here so we can just add the end token later on
-            AddToken(text,result,&mTokenDeque,theAllocator);
+          if (isCDATA) {
+            // The only tags that consume conservatively are <script> and
+            // <style>, the rest all consume until the end of the document.
+            result = textToken->ConsumeCharacterData(0,
+                                                     theTag==eHTMLTag_script ||
+                                                     theTag==eHTMLTag_style,
+                                                     theTag!=eHTMLTag_script,
+                                                     aScanner,
+                                                     endTagName,
+                                                     mFlags,
+                                                     done);
+            aFlushTokens = done;
+          }
+          else if (isPCDATA) {
+            // Title is consumed conservatively in order to not regress
+            // bug 42945
+            result = textToken->ConsumeParsedCharacterData(0,
+                                                           theTag==eHTMLTag_title,
+                                                           aScanner,
+                                                           endTagName,
+                                                           mFlags,
+                                                           done);
 
-            CToken* endToken=nsnull;
+            // Note: we *don't* set aFlushTokens here.
+          }
+
+          // We want to do this unless result is kEOF, in which case we will
+          // simply unwind our stack and wait for more data anyway.
+          if (kEOF != result) {
+            AddToken(text,NS_OK,&mTokenDeque,theAllocator);
+            CToken* endToken = nsnull;
             
-            if (NS_SUCCEEDED(result) && aFlushTokens) {
+            if (NS_SUCCEEDED(result) && done) {
               PRUnichar theChar;
               // Get the <
               result = aScanner.GetChar(theChar);
               NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kLessThan,
-                           "CTextToken::ConsumeUntil is broken!");
+                           "CTextToken::Consume*Data is broken!");
 #ifdef DEBUG
               // Ensure we have a /
               PRUnichar tempChar;  // Don't change non-debug vars in debug-only code
               result = aScanner.Peek(tempChar);
               NS_ASSERTION(NS_SUCCEEDED(result) && tempChar == kForwardSlash,
-                           "CTextToken::ConsumeUntil is broken!");
+                           "CTextToken::Consume*Data is broken!");
 #endif
               result = ConsumeEndTag(PRUnichar('/'),endToken,aScanner);
-            } else if (!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+            } else if (result == kFakeEndTag && 
+                      !(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+              result = NS_OK;
               endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName);
               AddToken(endToken,result,&mTokenDeque,theAllocator);
+            } else if (result == kFakeEndTag) {
+              // If we are here, we are both faking having seen the end tag
+              // and are in view-source.
+              result = NS_OK;
             }
           }
           else {
@@ -826,7 +824,7 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
           }
         }
       }
- 
+
       //EEEEECCCCKKKK!!! 
       //This code is confusing, so pay attention.
       //If you're here, it's because we were in the midst of consuming a start
@@ -879,15 +877,6 @@ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanne
       aScanner.GetChar(aChar);
     }        
 
-    if (NS_SUCCEEDED(result)) {
-      eHTMLTags theTag = (eHTMLTags)aToken->GetTypeID();
-      if (mPreserveTarget == theTag) {
-        // Target reached. Stop preserving content.
-        mPreserveTarget = eHTMLTag_unknown;
-        mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT;
-      }
-    }
-
     // Do the same thing as we do in ConsumeStartTag. Basically, if we've run
     // out of room in this *section* of the document, pop all of the tokens
     // we've consumed this round and wait for more data.
@@ -984,6 +973,12 @@ nsresult nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,CToken*& aToken,nsScann
     result=aToken->Consume(aChar,aScanner,mFlags);
     AddToken(aToken,result,&mTokenDeque,theAllocator);
   }
+
+  if (kNotAComment == result) {
+    // AddToken has IF_FREE()'d our token, so...
+    return ConsumeText(aToken, aScanner);
+  }
+
   return result;
 }
 
@@ -1109,33 +1104,3 @@ nsresult nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,CToken*&
   }
   return result;
 }
-
-/**
- *  This method keeps a copy of contents within the start token.
- *  The stored content could later be used in displaying TEXTAREA, 
- *  and also in view source.
- *  
- *  @update harishd 11/09/99
- *  @param  aStartToken: The token whose trailing contents are to be recorded
- *  @param  aScanner: see nsScanner.h
- *  
- */
-
-void nsHTMLTokenizer::PreserveToken(CStartToken* aStartToken, 
-                                    nsScanner& aScanner, 
-                                    nsScannerIterator aOrigin) {
-  if(aStartToken) {
-    nsScannerIterator theCurrentPosition;
-    aScanner.CurrentPosition(theCurrentPosition);
-
-    nsString& trailingContent = aStartToken->mTrailingContent;
-    PRUint32 oldLength = trailingContent.Length();
-    trailingContent.SetLength(oldLength + Distance(aOrigin, theCurrentPosition));
-
-    nsWritingIterator<PRUnichar> beginWriting;
-    trailingContent.BeginWriting(beginWriting);
-    beginWriting.advance(oldLength);
-
-    copy_string( aOrigin, theCurrentPosition, beginWriting );
-  }
-}
diff --git a/parser/htmlparser/src/nsHTMLTokenizer.h b/parser/htmlparser/src/nsHTMLTokenizer.h
index 7cb8a416941..8e5000d7020 100644
--- a/parser/htmlparser/src/nsHTMLTokenizer.h
+++ b/parser/htmlparser/src/nsHTMLTokenizer.h
@@ -58,7 +58,6 @@
   {0xe4238ddd, 0x9eb6, 0x11d2, \
   {0xba, 0xa5, 0x0,     0x10, 0x4b, 0x98, 0x3f, 0xd4 }}
 
-
 /***************************************************************
   Notes: 
  ***************************************************************/
@@ -74,7 +73,8 @@ public:
   NS_DECL_NSITOKENIZER
   nsHTMLTokenizer(PRInt32 aParseMode = eDTDMode_quirks,
                   eParserDocType aDocType = eHTML3_Quirks,
-                  eParserCommands aCommand = eViewNormal);
+                  eParserCommands aCommand = eViewNormal,
+                  PRUint16 aPrefs = 0);
   virtual ~nsHTMLTokenizer();
 
 protected:
@@ -93,8 +93,6 @@ protected:
 
   nsresult ScanDocStructure(PRBool aIsFinalChunk);
 
-  virtual void PreserveToken(CStartToken* aStartToken, nsScanner& aScanner, nsScannerIterator aOrigin);
-
   static void AddToken(CToken*& aToken,nsresult aResult,nsDeque* aDeque,nsTokenAllocator* aTokenAllocator);
 
   nsDeque            mTokenDeque;
@@ -102,12 +100,11 @@ protected:
   nsTokenAllocator*  mTokenAllocator;
   PRInt32            mTokenScanPos;
   PRUint32           mFlags;
-  eHTMLTags          mPreserveTarget; // Tag whose content is preserved
 };
 
 extern nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult,
                                     PRInt32 aMode,eParserDocType aDocType,
-                                    eParserCommands aCommand);
+                                    eParserCommands aCommand, PRInt32 aFlags);
 
 #endif
 
diff --git a/parser/htmlparser/src/nsHTMLTokens.cpp b/parser/htmlparser/src/nsHTMLTokens.cpp
index d8bb3046037..935c31db280 100644
--- a/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/parser/htmlparser/src/nsHTMLTokens.cpp
@@ -61,7 +61,139 @@ static const PRUnichar kAttributeTerminalChars[] = {
   PRUnichar('>'),  
   PRUnichar(0) 
 };
-                   
+
+static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue);
+/*
+ *  @param   aScanner -- controller of underlying input source
+ *  @param   aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
+ *  @return  error result
+ *
+ */
+static
+nsresult ConsumeEntity(nsScannerSharedSubstring& aString,
+                       nsScanner& aScanner,
+                       PRInt32 aFlag) 
+{
+  nsresult result=NS_OK;
+
+  PRUnichar ch;
+  result=aScanner.Peek(ch, 1);
+
+  if (NS_SUCCEEDED(result)) {
+    PRUnichar amp=0;
+    PRInt32 theNCRValue=0;
+    nsAutoString entity;
+
+    if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+      result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
+      if (NS_SUCCEEDED(result)) {
+        theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
+        PRUnichar theTermChar=entity.Last();
+        // If an entity value is greater than 255 then:
+        // Nav 4.x does not treat it as an entity,
+        // IE treats it as an entity if terminated with a semicolon.
+        // Resembling IE!!
+
+        nsSubstring &writable = aString.writable();
+        if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
+          // Looks like we're not dealing with an entity
+          writable.Append(kAmpersand);
+          writable.Append(entity);
+        }
+        else {
+          // A valid entity so reduce it.
+          writable.Append(PRUnichar(theNCRValue));
+        }
+      }
+    }
+    else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
+      result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
+      if (NS_SUCCEEDED(result)) {
+        nsSubstring &writable = aString.writable();
+        if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
+          // Looked like an entity but it's not
+          aScanner.GetChar(amp);
+          writable.Append(amp);
+          result = NS_OK; // just being safe..
+        }
+        else {
+          PRInt32 err;
+          theNCRValue=entity.ToInteger(&err,kAutoDetect);
+          AppendNCR(writable, theNCRValue);
+        }
+      }
+    }
+    else {
+      // What we thought as entity is not really an entity...
+      aScanner.GetChar(amp);
+      aString.writable().Append(amp);
+    }//if
+  }
+
+  return result;
+}
+
+/*
+ *  This general purpose method is used when you want to
+ *  consume attributed text value. 
+ *  Note: It also reduces entities.
+ *
+ *  @param   aNewlineCount -- the newline count to increment when hitting newlines
+ *  @param   aScanner -- controller of underlying input source
+ *  @param   aTerminalChars -- characters that stop consuming attribute.
+ *  @param   aAllowNewlines -- whether to allow newlines in the value.
+ *                             XXX it would be nice to roll this info into
+ *                             aTerminalChars somehow....
+ *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
+ *  @return  error result
+ */
+static
+nsresult ConsumeUntil(nsScannerSharedSubstring& aString,
+                      PRInt32& aNewlineCount,
+                      nsScanner& aScanner,
+                      const nsReadEndCondition& aEndCondition,
+                      PRBool aAllowNewlines,
+                      PRInt32 aFlag)
+{
+  nsresult result = NS_OK;
+  PRBool   done = PR_FALSE;
+  
+  do {
+    result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE);
+    if(NS_SUCCEEDED(result)) {
+      PRUnichar ch;
+      aScanner.Peek(ch);
+      if(ch == kAmpersand) {
+        result = ConsumeEntity(aString,aScanner,aFlag);
+      }
+      else if(ch == kCR && aAllowNewlines) {
+        aScanner.GetChar(ch);
+        result = aScanner.Peek(ch);
+        if (NS_SUCCEEDED(result)) {
+          nsSubstring &writable = aString.writable();
+          if(ch == kNewLine) {
+            writable.AppendLiteral("\r\n");
+            aScanner.GetChar(ch);
+          }
+          else {
+            writable.Append(PRUnichar('\r'));
+          }
+          ++aNewlineCount;
+        }
+      }
+      else if(ch == kNewLine && aAllowNewlines) {
+        aScanner.GetChar(ch);
+        aString.writable().Append(PRUnichar('\n'));
+        ++aNewlineCount;
+      }
+      else {
+        done = PR_TRUE;
+      }
+    }
+  } while (NS_SUCCEEDED(result) && !done);
+
+  return result;
+}
 
 /**************************************************************
   And now for the token classes...
@@ -244,15 +376,12 @@ void CStartToken::AppendSourceTo(nsAString& anOutputString){
   /*
    * Watch out for Bug 15204 
    */
-  if(!mTrailingContent.IsEmpty())
-    anOutputString.Append(mTrailingContent);
-  else {
-    if(!mTextValue.IsEmpty())
-      anOutputString.Append(mTextValue);
-    else
-     anOutputString.Append(GetTagName(mTypeID));
-    anOutputString.Append(PRUnichar('>'));
-  }
+  if(!mTextValue.IsEmpty())
+    anOutputString.Append(mTextValue);
+  else
+    anOutputString.Append(GetTagName(mTypeID));
+
+  anOutputString.Append(PRUnichar('>'));
 }
 
 /*
@@ -293,8 +422,8 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
     mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
     // Save the original tag string if this is user-defined or if we
     // are viewing source
-    if(eHTMLTag_userdefined==mTypeID ||
-       (aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT))) {
+    if(eHTMLTag_userdefined==mTypeID || 
+       (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
       mTextValue = tagIdent.str();
     }
   }
@@ -388,6 +517,7 @@ void CEndToken::AppendSourceTo(nsAString& anOutputString){
     anOutputString.Append(mTextValue);
   else
     anOutputString.Append(GetTagName(mTypeID));
+
   anOutputString.Append(PRUnichar('>'));
 }
 
@@ -498,14 +628,24 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
  *  Consume as much clear text from scanner as possible.
  *  The scanner is left on the < of the perceived end tag.
  *
- *  @update  gess 3/25/98
  *  @param   aChar -- last char consumed from stream
+ *  @param   aConservativeConsume -- controls our handling of content with no
+ *                                   terminating string.
+ *  @param   aIgnoreComments -- whether or not we should take comments into
+ *                              account in looking for the end tag.
  *  @param   aScanner -- controller of underlying input source
+ *  @param   aEndTagname -- the terminal tag name.
+ *  @param   aFlag -- dtd modes and such.
+ *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
  *  @return  error result
  */
-nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
-                                  const nsAString& aEndTagName,PRInt32 aFlag,
-                                  PRBool& aFlushTokens){
+nsresult CTextToken::ConsumeCharacterData(PRUnichar aChar,
+                                          PRBool aConservativeConsume,
+                                          PRBool aIgnoreComments,
+                                          nsScanner& aScanner,
+                                          const nsAString& aEndTagName,
+                                          PRInt32 aFlag,
+                                          PRBool& aFlushTokens) {
   nsresult      result=NS_OK;
   nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
   PRBool        done=PR_FALSE;
@@ -532,7 +672,9 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
   // 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
   // 5. If the end of the document is reached and if we still don't have the condition in step 4. then
   //    assume that the prematured terminal string is the actual terminal string and goto step 1. This
-  //    will be our last iteration.
+  //    will be our last iteration. If there is no premature terminal string and we're being
+  //    conservative in our consumption (aConservativeConsume), then don't consume anything
+  //    from the scanner. Otherwise, we consume all the way until the end (for <xmp>).
 
   NS_NAMED_LITERAL_STRING(ltslash, "</");
   const nsString theTerminalString = ltslash + aEndTagName;
@@ -609,20 +751,171 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
       // a) when the buffer runs out ot data.
       // b) when the terminal string is not found.
       if(!aScanner.IsIncremental()) {
-        if(theAltTermStrPos != endPos) {
+        if(theAltTermStrPos != endPos && aConservativeConsume) {
           // If you're here it means..we hit the rock bottom and therefore switch to plan B.
           theCurrOffset = theAltTermStrPos;
           theLastIteration = PR_TRUE;
         }
-        else {
+        else if (!aConservativeConsume) {
           done = PR_TRUE; // Do this to fix Bug. 35456
+          result = kFakeEndTag;
+          aScanner.BindSubstring(mTextValue, theStartOffset, endPos.advance(-1));
+          aScanner.SetPosition(endPos.advance(1));
+        }
+        else {
+          done = PR_TRUE;
+          result = kFakeEndTag;
+          // We need to bind our value to a non-empty string.
+          aScanner.BindSubstring(mTextValue, theStartOffset, theStartOffset);
         }
       }
       else {
-       result=kEOF;
+        result=kEOF;
       }
     }
   }
+
+  return result;
+}
+
+/*
+ *  Consume as much clear text from scanner as possible. Reducing entities.
+ *  The scanner is left on the < of the perceived end tag.
+ *
+ *  @param   aChar -- last char consumed from stream
+ *  @param   aConservativeConsume -- controls our handling of content with no
+ *                                   terminating string.
+ *  @param   aScanner -- controller of underlying input source
+ *  @param   aEndTagname -- the terminal tag name.
+ *  @param   aFlag -- dtd modes and such.
+ *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
+ *  @return  error result
+ */
+nsresult CTextToken::ConsumeParsedCharacterData(PRUnichar aChar,
+                                                PRBool aConservativeConsume,
+                                                nsScanner& aScanner,
+                                                const nsAString& aEndTagName,
+                                                PRInt32 aFlag,
+                                                PRBool& aFound)
+{
+  // This function is fairly straightforward except if there is no terminating
+  // string. If there is, we simply loop through all of the entities, reducing
+  // them as necessary and skipping over non-terminal strings starting with <.
+  // If there is *no* terminal string, then we examine aConservativeConsume.
+  // If we want to be conservative, we backtrack to the first place in the
+  // document that looked like the end of PCDATA (i.e., the first tag). This
+  // is for compatibility and so we don't regress bug 42945. If we are not
+  // conservative, then we consume everything, all the way up to the end of
+  // the document.
+
+  static const PRUnichar terminalChars[] = {
+    PRUnichar('&'), PRUnichar('<'),
+    PRUnichar(0)
+  };
+  static const nsReadEndCondition theEndCondition(terminalChars);
+
+  nsScannerIterator currPos,endPos,altEndPos;
+  PRUint32 truncPos = 0;
+  aScanner.CurrentPosition(currPos);
+  aScanner.EndReading(endPos);
+
+  altEndPos = endPos;
+
+  nsScannerSharedSubstring theContent;
+  PRUnichar ch = 0;
+
+  NS_NAMED_LITERAL_STRING(commentStart, "<!--");
+  NS_NAMED_LITERAL_STRING(ltslash, "</");
+  const nsString theTerminalString = ltslash + aEndTagName;
+  PRUint32 termStrLen = theTerminalString.Length();
+  PRUint32 commentStartLen = commentStart.Length();
+
+  nsresult result = NS_OK;
+
+  while (currPos != endPos) {
+    result = ConsumeUntil(theContent, mNewlineCount, aScanner, 
+                          theEndCondition, PR_TRUE, aFlag);
+
+    if (NS_FAILED(result)) {
+      if (kEOF == result && !aScanner.IsIncremental()) {
+        aFound = PR_TRUE; // this is as good as it gets.
+        result = kFakeEndTag;
+
+        if (aConservativeConsume && altEndPos != endPos) {
+          // We ran out of room looking for a </title>. Go back to the first
+          // place that looked like a tag and use that as our stopping point.
+          theContent.writable().Truncate(truncPos);
+          aScanner.SetPosition(altEndPos);
+        }
+        // else we take everything we consumed.
+        mTextValue.Rebind(theContent.str());
+      }
+      else {
+        aFound = PR_FALSE;
+      }
+
+      return result;
+    }
+
+    aScanner.CurrentPosition(currPos);
+    aScanner.GetChar(ch); // this character must be '&' or '<'
+
+    if (ch == kLessThan && altEndPos == endPos) {
+      // Keep this position in case we need it for later.
+      altEndPos = currPos;
+      truncPos = theContent.str().Length();
+    }
+
+    if (Distance(currPos, endPos) >= termStrLen) {
+      nsScannerIterator start(currPos), end(currPos);
+      end.advance(termStrLen);
+
+      if (CaseInsensitiveFindInReadable(theTerminalString,start,end)) {
+        if (end != endPos && (*end == '>'  || *end == ' '  || 
+                              *end == '\t' || *end == '\n' || 
+                              *end == '\r' || *end == '\b')) {
+          aFound = PR_TRUE;
+          mTextValue.Rebind(theContent.str());
+          aScanner.SetPosition(currPos);
+          break;
+        }
+      }
+    }
+    // IE only consumes <!-- --> as comments in PCDATA. We'll accept a bit
+    // more in quirks mode, but lets ensure that this really is a comment
+    // start to maintain the illusion of compatability.
+    if (Distance(currPos, endPos) >= commentStartLen) {
+      nsScannerIterator start(currPos), end(currPos);
+      end.advance(commentStartLen);
+
+      if (CaseInsensitiveFindInReadable(commentStart,start,end)) {
+        CCommentToken consumer; // stack allocated.
+
+        // CCommentToken expects us to be on the '-'
+        aScanner.SetPosition(currPos.advance(2));
+        result = consumer.Consume(*currPos, aScanner, aFlag);
+        if (kEOF == result) {
+          return kEOF; // this can only happen if we're really out of space.
+        }
+        else if (kNotAComment == result) {
+          // Fall through and consume this as text.
+          aScanner.CurrentPosition(currPos);
+          aScanner.SetPosition(currPos.advance(1));
+        }
+        else {
+          consumer.AppendSourceTo(theContent.writable());
+          mNewlineCount += consumer.GetNewlineCount();
+          continue;
+        }
+      }
+    }
+
+    result = kEOF;
+    // We did not find the terminal string yet so
+    // include the character that stopped consumption.
+    theContent.writable().Append(ch);
+  }
+
   return result;
 }
 
@@ -1036,10 +1329,9 @@ nsresult CCommentToken::ConsumeStrictComment(nsScanner& aScanner)
     return kEOF; // not really an nsresult, but...
   }
 
-  // XXX We should return kNotAComment, parse comment open as text, and parse
-  //     the rest of the document normally. Now we ALMOST do that: <! is
-  //     missing from the content model.
-  return NS_OK;
+  // There was no terminating string, parse this comment as text.
+  aScanner.SetPosition(lt);
+  return kNotAComment;
 }
 
 nsresult CCommentToken::ConsumeQuirksComment(nsScanner& aScanner) 
@@ -1435,140 +1727,6 @@ void CAttributeToken::AppendSourceTo(nsAString& anOutputString){
   // anOutputString.AppendLiteral(";");
 }
 
-static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue);
-/*
- *  @param   aScanner -- controller of underlying input source
- *  @param   aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
- *  @return  error result
- *
- */
-static
-nsresult ConsumeAttributeEntity(nsScannerSharedSubstring& aString,
-                                nsScanner& aScanner,
-                                PRInt32 aFlag) 
-{
- 
-  nsresult result=NS_OK;
-
-  PRUnichar ch;
-  result=aScanner.Peek(ch, 1);
-
-  if (NS_SUCCEEDED(result)) {
-    PRUnichar amp=0;
-    PRInt32 theNCRValue=0;
-    nsAutoString entity;
-
-    if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
-      result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
-      if (NS_SUCCEEDED(result)) {
-        theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
-        PRUnichar theTermChar=entity.Last();
-        // If an entity value is greater than 255 then:
-        // Nav 4.x does not treat it as an entity,
-        // IE treats it as an entity if terminated with a semicolon.
-        // Resembling IE!!
-
-        nsSubstring &writable = aString.writable();
-        if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
-          // Looks like we're not dealing with an entity
-          writable.Append(kAmpersand);
-          writable.Append(entity);
-        }
-        else {
-          // A valid entity so reduce it.
-          writable.Append(PRUnichar(theNCRValue));
-        }
-      }
-    }
-    else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
-      result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
-      if (NS_SUCCEEDED(result)) {
-        nsSubstring &writable = aString.writable();
-        if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
-          // Looked like an entity but it's not
-          aScanner.GetChar(amp);
-          writable.Append(amp);
-          result = NS_OK; // just being safe..
-        }
-        else {
-          PRInt32 err;
-          theNCRValue=entity.ToInteger(&err,kAutoDetect);
-          AppendNCR(writable, theNCRValue);
-        }
-      }
-    }
-    else {
-      // What we thought as entity is not really an entity...
-      aScanner.GetChar(amp);
-      aString.writable().Append(amp);
-    }//if
-  }
-
-  return result;
-}
-
-/*
- *  This general purpose method is used when you want to
- *  consume attributed text value. 
- *  Note: It also reduces entities within attributes.
- *
- *  @param   aNewlineCount -- the newline count to increment when hitting newlines
- *  @param   aScanner -- controller of underlying input source
- *  @param   aTerminalChars -- characters that stop consuming attribute.
- *  @param   aAllowNewlines -- whether to allow newlines in the value.
- *                             XXX it would be nice to roll this info into
- *                             aTerminalChars somehow....
- *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
- *  @return  error result
- */
-static
-nsresult ConsumeAttributeValueText(nsScannerSharedSubstring& aString,
-                                   PRInt32& aNewlineCount,
-                                   nsScanner& aScanner,
-                                   const nsReadEndCondition& aEndCondition,
-                                   PRBool aAllowNewlines,
-                                   PRInt32 aFlag)
-{
-  nsresult result = NS_OK;
-  PRBool   done = PR_FALSE;
-  
-  do {
-    result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE);
-    if(NS_SUCCEEDED(result)) {
-      PRUnichar ch;
-      aScanner.Peek(ch);
-      if(ch == kAmpersand) {
-        result = ConsumeAttributeEntity(aString,aScanner,aFlag);
-      }
-      else if(ch == kCR && aAllowNewlines) {
-        aScanner.GetChar(ch);
-        result = aScanner.Peek(ch);
-        if (NS_SUCCEEDED(result)) {
-          nsSubstring &writable = aString.writable();
-          if(ch == kNewLine) {
-            writable.AppendLiteral("\r\n");
-            aScanner.GetChar(ch);
-          }
-          else {
-            writable.Append(PRUnichar('\r'));
-          }
-          ++aNewlineCount;
-        }
-      }
-      else if(ch == kNewLine && aAllowNewlines) {
-        aScanner.GetChar(ch);
-        aString.writable().Append(PRUnichar('\n'));
-        ++aNewlineCount;
-      }
-      else {
-        done = PR_TRUE;
-      }
-    }
-  } while (NS_SUCCEEDED(result) && !done);
-
-  return result;
-}
-
 /*
  *  This general purpose method is used when you want to
  *  consume a known quoted string. 
@@ -1609,8 +1767,8 @@ nsresult ConsumeQuotedString(PRUnichar aChar,
   nsScannerIterator theOffset;
   aScanner.CurrentPosition(theOffset);
 
-  result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner,
-                                   *terminateCondition,PR_TRUE,aFlag);
+  result=ConsumeUntil(aString,aNewlineCount,aScanner,
+                      *terminateCondition,PR_TRUE,aFlag);
 
   if(NS_SUCCEEDED(result)) {
     result = aScanner.GetChar(aChar); // aChar should be " or '
@@ -1625,8 +1783,8 @@ nsresult ConsumeQuotedString(PRUnichar aChar,
       theAttributeTerminator(kAttributeTerminalChars);
     aString.writable().Truncate(origLen);
     aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE);
-    result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner,
-                                     theAttributeTerminator,PR_FALSE,aFlag);
+    result=ConsumeUntil(aString,aNewlineCount,aScanner,
+                        theAttributeTerminator,PR_FALSE,aFlag);
     if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
       // Remember that this string literal was unterminated.
       result = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
@@ -1770,12 +1928,12 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a
                   else {
                     static const nsReadEndCondition
                       theAttributeTerminator(kAttributeTerminalChars);
-                    result=ConsumeAttributeValueText(mTextValue,
-                                                     mNewlineCount,
-                                                     aScanner,
-                                                     theAttributeTerminator,
-                                                     PR_FALSE,
-                                                     aFlag);
+                    result=ConsumeUntil(mTextValue,
+                                        mNewlineCount,
+                                        aScanner,
+                                        theAttributeTerminator,
+                                        PR_FALSE,
+                                        aFlag);
                   } 
                 }//if
                 if (NS_OK==result) {
diff --git a/parser/htmlparser/src/nsLoggingSink.h b/parser/htmlparser/src/nsLoggingSink.h
index db245b2714c..e7cb45ecea4 100644
--- a/parser/htmlparser/src/nsLoggingSink.h
+++ b/parser/htmlparser/src/nsLoggingSink.h
@@ -90,7 +90,9 @@ public:
   NS_IMETHOD CloseMap();
   NS_IMETHOD OpenFrameset(const nsIParserNode& aNode);
   NS_IMETHOD CloseFrameset();
-  NS_IMETHOD IsEnabled(PRInt32 aTag, PRBool* aReturn) { return NS_OK; }
+  NS_IMETHOD IsEnabled(PRInt32 aTag, PRBool* aReturn) 
+  /* Take the largest possible feature set. */
+  { NS_ENSURE_ARG_POINTER(aReturn); *aReturn = PR_TRUE; return NS_OK; }
   NS_IMETHOD NotifyTagObservers(nsIParserNode* aNode) { return NS_OK; }
   NS_IMETHOD_(PRBool) IsFormOnStack() { return PR_FALSE; }
 
diff --git a/parser/htmlparser/src/nsParser.cpp b/parser/htmlparser/src/nsParser.cpp
index 2d1bcf28914..556ebedb94c 100644
--- a/parser/htmlparser/src/nsParser.cpp
+++ b/parser/htmlparser/src/nsParser.cpp
@@ -1294,7 +1294,7 @@ nsParser::WillBuildModel(nsString& aFilename)
     return rv;
 
   nsITokenizer* tokenizer;
-  mParserContext->GetTokenizer(mParserContext->mDTD->GetType(), tokenizer);
+  mParserContext->GetTokenizer(mParserContext->mDTD->GetType(), mSink, tokenizer);
   return mParserContext->mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
 }
 
@@ -1355,10 +1355,6 @@ CParserContext* nsParser::PopContext()
       if (mParserContext->mStreamListenerState != eOnStop) {
         mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
       }
-      // Preserve tokenizer state so that information is not lost
-      // between document.write. This fixes bug 99467
-      if (mParserContext->mTokenizer)
-        mParserContext->mTokenizer->CopyState(oldContext->mTokenizer);
     }
   }
   return oldContext;
@@ -1644,7 +1640,6 @@ nsParser::Parse(nsIInputStream* aStream,
   return result;
 }
 
-
 /**
  * Call this method if all you want to do is parse 1 string full of HTML text.
  * In particular, this method should be called by the DOM when it has an HTML
@@ -1718,7 +1713,8 @@ nsParser::Parse(const nsAString& aSourceBuffer,
         }
       } 
 
-      pc = new CParserContext(theScanner, aKey, mCommand, 0, theDTD, theStatus, aLastCall);
+      pc = new CParserContext(theScanner, aKey, mCommand,
+                              0, theDTD, theStatus, aLastCall);
       NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
 
       PushContext(*pc); 
@@ -2010,7 +2006,7 @@ nsresult nsParser::BuildModel() {
   nsresult result = NS_OK;
   if (mParserContext) {
     PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML;
-    mParserContext->GetTokenizer(type, theTokenizer);
+    mParserContext->GetTokenizer(type, mSink, theTokenizer);
   }
 
   if (theTokenizer) {
@@ -2047,7 +2043,7 @@ nsresult nsParser::GetTokenizer(nsITokenizer*& aTokenizer) {
   aTokenizer = nsnull;
   if(mParserContext) {
     PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML;
-    result = mParserContext->GetTokenizer(type, aTokenizer);
+    result = mParserContext->GetTokenizer(type, mSink, aTokenizer);
   }
   return result;
 }
@@ -2658,7 +2654,7 @@ PRBool nsParser::WillTokenize(PRBool aIsFinalChunk){
   nsresult result = NS_OK;
   if (mParserContext) {
     PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML;
-    mParserContext->GetTokenizer(type, theTokenizer);
+    mParserContext->GetTokenizer(type, mSink, theTokenizer);
   }
 
   if (theTokenizer) {
@@ -2684,7 +2680,7 @@ nsresult nsParser::Tokenize(PRBool aIsFinalChunk){
 
   if (mParserContext) {
     PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML;
-    mParserContext->GetTokenizer(type, theTokenizer);
+    mParserContext->GetTokenizer(type, mSink, theTokenizer);
   }
 
   if (theTokenizer) { 
@@ -2755,7 +2751,7 @@ PRBool nsParser::DidTokenize(PRBool aIsFinalChunk){
   nsresult rv = NS_OK;
   if (mParserContext) {
     PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML;
-    mParserContext->GetTokenizer(type, theTokenizer);
+    mParserContext->GetTokenizer(type, mSink, theTokenizer);
   }
 
   if (NS_SUCCEEDED(rv) && theTokenizer) {