From df72f84c56b9c8e8708b66afe2473dd0d3cb1647 Mon Sep 17 00:00:00 2001
From: rickg <rickg>
Date: Thu, 28 May 1998 07:12:08 +0000
Subject: [PATCH] modest improvements to push, and various bug fixes

---
 htmlparser/src/CNavDTD.cpp             | 10 +---
 htmlparser/src/CNavDelegate.cpp        | 36 +++++++-------
 htmlparser/src/nsHTMLParser.cpp        | 54 ++++++++++++++++----
 htmlparser/src/nsHTMLParser.h          |  1 +
 htmlparser/src/nsHTMLTokens.cpp        | 69 ++++++++++++++++++--------
 htmlparser/src/nsHTMLTokens.h          | 22 ++++----
 htmlparser/src/nsParserTypes.h         |  1 +
 htmlparser/src/nsToken.cpp             | 11 ++++
 htmlparser/src/nsToken.h               |  7 +++
 parser/htmlparser/src/CNavDTD.cpp      | 10 +---
 parser/htmlparser/src/CNavDelegate.cpp | 36 +++++++-------
 parser/htmlparser/src/nsHTMLParser.cpp | 54 ++++++++++++++++----
 parser/htmlparser/src/nsHTMLParser.h   |  1 +
 parser/htmlparser/src/nsHTMLTokens.cpp | 69 ++++++++++++++++++--------
 parser/htmlparser/src/nsHTMLTokens.h   | 22 ++++----
 parser/htmlparser/src/nsParserTypes.h  |  1 +
 parser/htmlparser/src/nsToken.cpp      | 11 ++++
 parser/htmlparser/src/nsToken.h        |  7 +++
 18 files changed, 290 insertions(+), 132 deletions(-)
diff --git a/htmlparser/src/CNavDTD.cpp b/htmlparser/src/CNavDTD.cpp
index 351d853ebf6..898f46a7703 100644
--- a/htmlparser/src/CNavDTD.cpp
+++ b/htmlparser/src/CNavDTD.cpp
@@ -400,8 +400,7 @@ PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const {
       if (eHTMLTag_listitem == aChild) {
         return PR_FALSE;
       }
-      result = PR_TRUE;
-      break;
+      result=PRBool(!strchr(gHeadingTags,aChild)); break;
 
     case eHTMLTag_listing:
       result = PR_TRUE; break;
@@ -423,8 +422,7 @@ PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const {
     case eHTMLTag_ol:
     case eHTMLTag_ul:
       // XXX kipp was here
-      result = PR_TRUE;
-      break;
+      result=PRBool(!strchr(gHeadingTags,aChild)); break;
 
     case eHTMLTag_noframes:
       if(eHTMLTag_body==aChild)
@@ -786,10 +784,6 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{
     case eHTMLTag_col:
       result=eHTMLTag_colgroup; break;    
 
-      //These have to do with listings...
-    case eHTMLTag_listitem:
-      result=eHTMLTag_ul; break;    
-
     case eHTMLTag_dd:
     case eHTMLTag_dt:
       result=eHTMLTag_dl; break;    
diff --git a/htmlparser/src/CNavDelegate.cpp b/htmlparser/src/CNavDelegate.cpp
index 14c5cf34e03..3befb9ae52c 100644
--- a/htmlparser/src/CNavDelegate.cpp
+++ b/htmlparser/src/CNavDelegate.cpp
@@ -162,23 +162,7 @@ PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToke
     }//if
   }//while
 
-  //ok, this is a bit complicated, so follow closely.
-  //Since we're incremental (but pessimistic), it is possible that even though 
-  //we've eaten a few delicious attributes, we can't keep them because
-  //we couldn't eat all of them (up to an including the close > for this tag).
-  //Therefore, we need to remove the ones we just created from the tokendeque,
-  //and destroy them. (They'll get reconsumed on the next incremental pass).
-  //NOTE: This process can be enhanced later on by adding state to the delegate
-  //      telling us that we're in the attribute consumption phase.
-  //      Remember the mantra: Crawl, Walk, Run!
-  if(kNoError==result) {
-    aToken->SetAttributeCount(theAttrCount);
-  }
-  else {
-    while(theAttrCount--) {
-      delete mTokenDeque.PopBack();
-    }
-  }
+  aToken->SetAttributeCount(theAttrCount);
   return result;
 }
 
@@ -214,9 +198,11 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
  *  @return new token or null 
  */
 PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
-  aToken=new CStartToken(nsAutoString(""));
+  PRInt32 theDequeSize=mTokenDeque.GetSize();
   PRInt32 result=kNoError;
 
+  aToken=new CStartToken(nsAutoString(""));
+
   if(aToken) {
     result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
     if(kNoError==result) {
@@ -251,6 +237,20 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*
           } //if
         } //if
       } //if
+
+      //EEEEECCCCKKKK!!! 
+      //This code is confusing, so pay attention.
+      //If you're here, it's because we were in the midst of consuming a start
+      //tag but ran out of data (not in the stream, but in this *part* of the stream.
+      //For simplicity, we have to unwind our input. Therefore, we pop and discard
+      //any new tokens we've cued this round. Later we can get smarter about this.
+      if(kNoError!=result) {
+        while(mTokenDeque.GetSize()>theDequeSize) {
+          delete mTokenDeque.PopBack();
+        }
+      }
+
+
     } //if
   } //if
   return result;
diff --git a/htmlparser/src/nsHTMLParser.cpp b/htmlparser/src/nsHTMLParser.cpp
index 73be135c322..4e3f39a7992 100644
--- a/htmlparser/src/nsHTMLParser.cpp
+++ b/htmlparser/src/nsHTMLParser.cpp
@@ -512,6 +512,7 @@ void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*&
  */
 PRInt32 nsHTMLParser::WillBuildModel(void) {
   mIteration=-1;
+  mHasSeenOpenTag=PR_FALSE;
   if(mSink)
     mSink->WillBuildModel();
   return kNoError;
@@ -824,7 +825,7 @@ PRInt32 nsHTMLParser::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag
 
   if(PR_FALSE==contains){
     result=CreateContextStackFor(aChildTag);
-    if(PR_FALSE==result) {
+    if(kNoError!=result) {
       //if you're here, then the new topmost container can't contain aToken.
       //You must determine what container hierarchy you need to hold aToken,
       //and create that on the parsestack.
@@ -1096,9 +1097,24 @@ PRInt32 nsHTMLParser::HandleAttributeToken(CToken* aToken) {
 PRInt32 nsHTMLParser::HandleScriptToken(CToken* aToken) {
   NS_PRECONDITION(0!=aToken,kNullToken);
 
-  CScriptToken*  st = (CScriptToken*)(aToken);
-  PRInt32 result=kNoError;
-  return result;
+  CScriptToken*   st = (CScriptToken*)(aToken);
+
+  eHTMLTokenTypes subtype=eToken_attribute;
+  nsDeque&        deque=mTokenizer->GetDeque();
+  nsDequeIterator end=deque.End();
+
+  if(*mCurrentPos!=end) {
+    CHTMLToken* tkn=(CHTMLToken*)(++(*mCurrentPos));
+    subtype=eHTMLTokenTypes(tkn->GetTokenType());
+    if(eToken_skippedcontent==subtype) {
+      //WE INTENTIONALLY DROP THE TOKEN ON THE FLOOR!
+      //LATER, we'll pass this onto the javascript system.
+      return kNoError;
+    } 
+    else (*mCurrentPos)--;
+  }
+  return kInterrupted;
+
 }
 
 /**
@@ -1531,6 +1547,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){
         }
       } //while
     } //elseif
+    else result=kCantPropagate;
   } //elseif
 
     //now, build up the stack according to the tags 
@@ -1539,8 +1556,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){
     nsAutoString  empty;
     int i=0;
     for(i=pos;i<cnt;i++) {
-      CStartToken* st=new CStartToken(empty);
-      st->SetHTMLTag((eHTMLTags)theVector[cnt-1-i]);
+      CStartToken* st=new CStartToken((eHTMLTags)theVector[cnt-1-i]);
       HandleStartToken(st);
     }
   }
@@ -1616,19 +1632,39 @@ nsresult nsHTMLParser::OnStartBinding(void){
  *  
  *  
  *  @update  gess 5/12/98
- *  @param   
- *  @return  
+ *  @param   pIStream contains the input chars
+ *  @param   length is the number of bytes waiting input
+ *  @return  error code (usually 0)
  */
 nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
 
   int len=0;
+  int offset=0;
 
   do {
       PRInt32 err;
       len = pIStream->Read(&err, mTransferBuffer, 0, gTransferBufferSize);
       if(len>0) {
+
+        //Ok -- here's the problem.
+        //Just because someone throws you some data, doesn't mean that it's
+        //actually GOOD data. Recently, I encountered a problem where netlib
+        //was prepending an otherwise valid buffer with a few garbage characters.
+        //To solve this, I'm adding some debug code here that protects us from
+        //propagating the bad data upwards.
+
         mTransferBuffer[len]=0;
-        mTokenizer->Append(mTransferBuffer,len);
+        if(PR_FALSE==mHasSeenOpenTag) {
+          for(offset=0;offset<len;offset++) {
+            if(kLessThan==mTransferBuffer[offset]){
+              mHasSeenOpenTag=PR_TRUE;
+              break;
+            } 
+          }
+        }
+
+        if(len-offset)
+          mTokenizer->Append(&mTransferBuffer[offset],len);
       }
   } while (len > 0);
 
diff --git a/htmlparser/src/nsHTMLParser.h b/htmlparser/src/nsHTMLParser.h
index c3a51b9f75d..776670bb378 100644
--- a/htmlparser/src/nsHTMLParser.h
+++ b/htmlparser/src/nsHTMLParser.h
@@ -521,6 +521,7 @@ protected:
     ITokenizerDelegate* mDelegate;
     PRInt32             mIteration;
     char*               mTransferBuffer;
+    PRBool              mHasSeenOpenTag;
 };
 
 
diff --git a/htmlparser/src/nsHTMLTokens.cpp b/htmlparser/src/nsHTMLTokens.cpp
index af6690dda8e..791846a81f8 100644
--- a/htmlparser/src/nsHTMLTokens.cpp
+++ b/htmlparser/src/nsHTMLTokens.cpp
@@ -37,6 +37,7 @@ static nsString     gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTU
 static nsAutoString gDigits("0123456789");
 static nsAutoString gWhitespace(" \t\b");
 static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:");
+static const char*  gUserdefined = "userdefined";
 
 //debug error messages...
 static const char* kNullScanner = "Error: Scanner is null.";
@@ -99,7 +100,6 @@ struct HTMLTagEntry {
   eHTMLTags  fTagID;
 };
 
-
   // KEEP THIS LIST SORTED!
   // NOTE: This table is sorted in ascii collating order. If you
   // add a new entry, make sure you put it in the right spot otherwise
@@ -121,6 +121,7 @@ HTMLTagEntry gHTMLTagTable[] =
   {"CERTIFICATE", eHTMLTag_certificate},
   {"CITE",        eHTMLTag_cite},         {"CODE",      eHTMLTag_code},
   {"COL",         eHTMLTag_col},          {"COLGROUP",  eHTMLTag_colgroup},
+  {"COMMENT",     eHTMLTag_comment},
 
   {"DD",          eHTMLTag_dd},           {"DEL",       eHTMLTag_del},
   {"DFN",         eHTMLTag_dfn},          {"DIR",       eHTMLTag_dir},
@@ -193,7 +194,6 @@ HTMLTagEntry gHTMLTagTable[] =
   {"VAR",         eHTMLTag_var},          {"WBR",       eHTMLTag_wbr},
   {"WS",          eHTMLTag_whitespace},       
 
-
 };
 
 
@@ -262,6 +262,17 @@ CHTMLToken::CHTMLToken(const nsString& aName) : CToken(aName) {
   mTagType=eHTMLTag_unknown;
 }
 
+/*
+ *  constructor from tag id
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(GetTagName(aTag)) {
+  mTagType=aTag; 
+}
+
 /*
  *  
  *  
@@ -296,6 +307,17 @@ CStartToken::CStartToken(const nsString& aName) : CHTMLToken(aName) {
   mAttributed=PR_FALSE;
 }
 
+/*
+ *  constructor from tag id
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
+  mAttributed=PR_FALSE;
+}
+
 /*
  *  default destructor
  *  
@@ -1335,17 +1357,15 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString)
   PRInt32  high=cnt-1;
   PRInt32  middle=kNotFound;
   
-  if (0 != cnt)
-    while(low<=high)
-    {
-      middle=(PRInt32)(low+high)/2;
-      result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE);
-      if (result==0)
-        return gHTMLTagTable[middle].fTagID; 
-      if (result<0)
-        high=middle-1; 
-      else low=middle+1; 
-    }
+  while(low<=high){
+    middle=(PRInt32)(low+high)/2;
+    result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE);
+    if (result==0)
+      return gHTMLTagTable[middle].fTagID; 
+    if (result<0)
+      high=middle-1; 
+    else low=middle+1; 
+  }
   return eHTMLTag_userdefined;
 }
 
@@ -1357,17 +1377,24 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString)
  * @return
  */
 const char* GetTagName(PRInt32 aTag) {
-  const char* result=0;
-  PRInt32     cnt=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry);
-
-  int i=0;
-  for(i=0;i<cnt;i++){
-    if(aTag==gHTMLTagTable[i].fTagID)
-      return gHTMLTagTable[i].fName;
+  const    char* result=0;
+  PRInt32  cnt=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry);
+  PRInt32  low=0; 
+  PRInt32  high=cnt-1;
+  PRInt32  middle=kNotFound;
+  
+  while(low<=high) {
+    middle=(PRInt32)(low+high)/2;
+    if(aTag==gHTMLTagTable[middle].fTagID)
+      return gHTMLTagTable[middle].fName;
+    if(aTag<gHTMLTagTable[middle].fTagID)
+      high=middle-1; 
+    else low=middle+1; 
   }
-  return result;
+  return gUserdefined;
 }
 
+
 /*
  *  This method iterates the attribute-table to ensure that is 
  *  is proper sort order. This method only needs to be
diff --git a/htmlparser/src/nsHTMLTokens.h b/htmlparser/src/nsHTMLTokens.h
index b8eae70f8b9..4c6c2af6e33 100644
--- a/htmlparser/src/nsHTMLTokens.h
+++ b/htmlparser/src/nsHTMLTokens.h
@@ -59,21 +59,21 @@ enum eHTMLTags
   eHTMLTag_button,      eHTMLTag_caption,     eHTMLTag_center,    
   eHTMLTag_certificate, eHTMLTag_cite,
   eHTMLTag_code,        eHTMLTag_col,         eHTMLTag_colgroup,  eHTMLTag_comment,
-  eHTMLTag_dd,          eHTMLTag_del,         eHTMLTag_dfn,       eHTMLTag_div,       
-  eHTMLTag_dir,         eHTMLTag_dl,          eHTMLTag_dt,        
+  eHTMLTag_dd,          eHTMLTag_del,         eHTMLTag_dfn,       eHTMLTag_dir,       
+  eHTMLTag_div,         eHTMLTag_dl,          eHTMLTag_dt,        
   eHTMLTag_em,          eHTMLTag_embed,
   eHTMLTag_fieldset,    eHTMLTag_font,        eHTMLTag_footer,  
-  eHTMLTag_form,        eHTMLTag_frame,       eHTMLTag_frameset,
+  eHTMLTag_form,        eHTMLTag_frame,       eHTMLTag_frameset,  //39
   eHTMLTag_h1,          eHTMLTag_h2,          eHTMLTag_h3,        eHTMLTag_h4,
   eHTMLTag_h5,          eHTMLTag_h6,          eHTMLTag_head,      eHTMLTag_header,
-  eHTMLTag_hr,          eHTMLTag_html,        eHTMLTag_iframe,    eHTMLTag_ilayer,
-  eHTMLTag_italic,      eHTMLTag_img,         eHTMLTag_ins,       eHTMLTag_input,       
+  eHTMLTag_hr,          eHTMLTag_html,        eHTMLTag_italic,    eHTMLTag_iframe,    
+  eHTMLTag_ilayer,      eHTMLTag_img,         eHTMLTag_input,     eHTMLTag_ins,
   eHTMLTag_isindex,       
   eHTMLTag_kbd,         eHTMLTag_keygen,
-  eHTMLTag_label,       eHTMLTag_layer,       eHTMLTag_legend,    eHTMLTag_listitem,
+  eHTMLTag_label,       eHTMLTag_layer,       eHTMLTag_legend,    eHTMLTag_listitem, 
   eHTMLTag_link,        eHTMLTag_listing,     eHTMLTag_map,       eHTMLTag_marquee,
   eHTMLTag_math,        eHTMLTag_menu,        eHTMLTag_meta,      eHTMLTag_newline,
-  eHTMLTag_noembed,     eHTMLTag_noframes,    eHTMLTag_nolayer,   eHTMLTag_noscript,  
+  eHTMLTag_noembed,     eHTMLTag_noframes,    eHTMLTag_nolayer,   eHTMLTag_noscript,  //74
   eHTMLTag_note,        eHTMLTag_object,      eHTMLTag_ol,
   eHTMLTag_option,      eHTMLTag_paragraph,   eHTMLTag_param,     eHTMLTag_plaintext,   
   eHTMLTag_pre,         eHTMLTag_quotation,   eHTMLTag_strike,    eHTMLTag_samp,        
@@ -81,10 +81,10 @@ enum eHTMLTags
   eHTMLTag_server,      eHTMLTag_small,     
   eHTMLTag_spacer,      eHTMLTag_span,
   eHTMLTag_strong,      eHTMLTag_style,       eHTMLTag_sub,       eHTMLTag_sup,         
-  eHTMLTag_table,       eHTMLTag_tbody,       eHTMLTag_td,        
+  eHTMLTag_table,       eHTMLTag_tbody,       eHTMLTag_td,        //98
   
-  eHTMLTag_text,  //used for plain text; this is not really a tag.   
-  eHTMLTag_textarea,
+  eHTMLTag_text,  //used for plain text; this is not really a tag.  
+  eHTMLTag_textarea,  //100
   
   eHTMLTag_tfoot,   
   eHTMLTag_th,          eHTMLTag_thead,       eHTMLTag_title,     eHTMLTag_tr,
@@ -125,6 +125,7 @@ const char*     GetTagName(PRInt32 aTag);
  */
 class CHTMLToken : public CToken {
 public:
+                        CHTMLToken(eHTMLTags aTag);
                         CHTMLToken(const nsString& aString);
   virtual   eHTMLTags   GetHTMLTag();
             void        SetHTMLTag(eHTMLTags aTagType);
@@ -142,6 +143,7 @@ protected:
  */
 class CStartToken: public CHTMLToken {
   public:
+                        CStartToken(eHTMLTags aTag);
                         CStartToken(const nsString& aString);
     virtual PRInt32     Consume(PRUnichar aChar,CScanner& aScanner);
     virtual eHTMLTags   GetHTMLTag();
diff --git a/htmlparser/src/nsParserTypes.h b/htmlparser/src/nsParserTypes.h
index 39122d4b844..716632dcd3d 100644
--- a/htmlparser/src/nsParserTypes.h
+++ b/htmlparser/src/nsParserTypes.h
@@ -43,6 +43,7 @@ enum  eParseMode {
 
 const PRInt32   kEOF          = 1000000L;
 const PRInt32   kUnknownError = -1000;
+const PRInt32   kCantPropagate = -6;
 const PRInt32   kContextMismatch = -5;
 const PRInt32   kBadFilename  = -4;
 const PRInt32   kBadURL       = -3;
diff --git a/htmlparser/src/nsToken.cpp b/htmlparser/src/nsToken.cpp
index 03000a678b2..a8ba53a6171 100644
--- a/htmlparser/src/nsToken.cpp
+++ b/htmlparser/src/nsToken.cpp
@@ -29,6 +29,17 @@ CToken::CToken(const nsString& aName) : mTextValue(aName) {
   mOrdinalValue=0;
   mAttrCount=0;
 }
+
+/**
+ *  constructor from char*
+ *  
+ *  @update gess 3/25/98
+ *  @param  aName--char* containing name of token
+ */
+CToken::CToken(const char* aName) : mTextValue(aName) {
+  mOrdinalValue=0;
+  mAttrCount=0;
+}
  
 /**
  *  Decstructor
diff --git a/htmlparser/src/nsToken.h b/htmlparser/src/nsToken.h
index 0bdf7a0a0d8..4c757a42091 100644
--- a/htmlparser/src/nsToken.h
+++ b/htmlparser/src/nsToken.h
@@ -60,6 +60,13 @@ class CToken {
      */
     CToken(const nsString& aName);
 
+    /**
+     * constructor from char*
+     * @update	gess5/11/98
+     * @param   aName is the given name of the token 
+     */
+    CToken(const char* aName);
+
     /**
      * destructor
      * @update	gess5/11/98
diff --git a/parser/htmlparser/src/CNavDTD.cpp b/parser/htmlparser/src/CNavDTD.cpp
index 351d853ebf6..898f46a7703 100644
--- a/parser/htmlparser/src/CNavDTD.cpp
+++ b/parser/htmlparser/src/CNavDTD.cpp
@@ -400,8 +400,7 @@ PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const {
       if (eHTMLTag_listitem == aChild) {
         return PR_FALSE;
       }
-      result = PR_TRUE;
-      break;
+      result=PRBool(!strchr(gHeadingTags,aChild)); break;
 
     case eHTMLTag_listing:
       result = PR_TRUE; break;
@@ -423,8 +422,7 @@ PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const {
     case eHTMLTag_ol:
     case eHTMLTag_ul:
       // XXX kipp was here
-      result = PR_TRUE;
-      break;
+      result=PRBool(!strchr(gHeadingTags,aChild)); break;
 
     case eHTMLTag_noframes:
       if(eHTMLTag_body==aChild)
@@ -786,10 +784,6 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{
     case eHTMLTag_col:
       result=eHTMLTag_colgroup; break;    
 
-      //These have to do with listings...
-    case eHTMLTag_listitem:
-      result=eHTMLTag_ul; break;    
-
     case eHTMLTag_dd:
     case eHTMLTag_dt:
       result=eHTMLTag_dl; break;    
diff --git a/parser/htmlparser/src/CNavDelegate.cpp b/parser/htmlparser/src/CNavDelegate.cpp
index 14c5cf34e03..3befb9ae52c 100644
--- a/parser/htmlparser/src/CNavDelegate.cpp
+++ b/parser/htmlparser/src/CNavDelegate.cpp
@@ -162,23 +162,7 @@ PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToke
     }//if
   }//while
 
-  //ok, this is a bit complicated, so follow closely.
-  //Since we're incremental (but pessimistic), it is possible that even though 
-  //we've eaten a few delicious attributes, we can't keep them because
-  //we couldn't eat all of them (up to an including the close > for this tag).
-  //Therefore, we need to remove the ones we just created from the tokendeque,
-  //and destroy them. (They'll get reconsumed on the next incremental pass).
-  //NOTE: This process can be enhanced later on by adding state to the delegate
-  //      telling us that we're in the attribute consumption phase.
-  //      Remember the mantra: Crawl, Walk, Run!
-  if(kNoError==result) {
-    aToken->SetAttributeCount(theAttrCount);
-  }
-  else {
-    while(theAttrCount--) {
-      delete mTokenDeque.PopBack();
-    }
-  }
+  aToken->SetAttributeCount(theAttrCount);
   return result;
 }
 
@@ -214,9 +198,11 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
  *  @return new token or null 
  */
 PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
-  aToken=new CStartToken(nsAutoString(""));
+  PRInt32 theDequeSize=mTokenDeque.GetSize();
   PRInt32 result=kNoError;
 
+  aToken=new CStartToken(nsAutoString(""));
+
   if(aToken) {
     result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
     if(kNoError==result) {
@@ -251,6 +237,20 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*
           } //if
         } //if
       } //if
+
+      //EEEEECCCCKKKK!!! 
+      //This code is confusing, so pay attention.
+      //If you're here, it's because we were in the midst of consuming a start
+      //tag but ran out of data (not in the stream, but in this *part* of the stream.
+      //For simplicity, we have to unwind our input. Therefore, we pop and discard
+      //any new tokens we've cued this round. Later we can get smarter about this.
+      if(kNoError!=result) {
+        while(mTokenDeque.GetSize()>theDequeSize) {
+          delete mTokenDeque.PopBack();
+        }
+      }
+
+
     } //if
   } //if
   return result;
diff --git a/parser/htmlparser/src/nsHTMLParser.cpp b/parser/htmlparser/src/nsHTMLParser.cpp
index 73be135c322..4e3f39a7992 100644
--- a/parser/htmlparser/src/nsHTMLParser.cpp
+++ b/parser/htmlparser/src/nsHTMLParser.cpp
@@ -512,6 +512,7 @@ void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*&
  */
 PRInt32 nsHTMLParser::WillBuildModel(void) {
   mIteration=-1;
+  mHasSeenOpenTag=PR_FALSE;
   if(mSink)
     mSink->WillBuildModel();
   return kNoError;
@@ -824,7 +825,7 @@ PRInt32 nsHTMLParser::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag
 
   if(PR_FALSE==contains){
     result=CreateContextStackFor(aChildTag);
-    if(PR_FALSE==result) {
+    if(kNoError!=result) {
       //if you're here, then the new topmost container can't contain aToken.
       //You must determine what container hierarchy you need to hold aToken,
       //and create that on the parsestack.
@@ -1096,9 +1097,24 @@ PRInt32 nsHTMLParser::HandleAttributeToken(CToken* aToken) {
 PRInt32 nsHTMLParser::HandleScriptToken(CToken* aToken) {
   NS_PRECONDITION(0!=aToken,kNullToken);
 
-  CScriptToken*  st = (CScriptToken*)(aToken);
-  PRInt32 result=kNoError;
-  return result;
+  CScriptToken*   st = (CScriptToken*)(aToken);
+
+  eHTMLTokenTypes subtype=eToken_attribute;
+  nsDeque&        deque=mTokenizer->GetDeque();
+  nsDequeIterator end=deque.End();
+
+  if(*mCurrentPos!=end) {
+    CHTMLToken* tkn=(CHTMLToken*)(++(*mCurrentPos));
+    subtype=eHTMLTokenTypes(tkn->GetTokenType());
+    if(eToken_skippedcontent==subtype) {
+      //WE INTENTIONALLY DROP THE TOKEN ON THE FLOOR!
+      //LATER, we'll pass this onto the javascript system.
+      return kNoError;
+    } 
+    else (*mCurrentPos)--;
+  }
+  return kInterrupted;
+
 }
 
 /**
@@ -1531,6 +1547,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){
         }
       } //while
     } //elseif
+    else result=kCantPropagate;
   } //elseif
 
     //now, build up the stack according to the tags 
@@ -1539,8 +1556,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){
     nsAutoString  empty;
     int i=0;
     for(i=pos;i<cnt;i++) {
-      CStartToken* st=new CStartToken(empty);
-      st->SetHTMLTag((eHTMLTags)theVector[cnt-1-i]);
+      CStartToken* st=new CStartToken((eHTMLTags)theVector[cnt-1-i]);
       HandleStartToken(st);
     }
   }
@@ -1616,19 +1632,39 @@ nsresult nsHTMLParser::OnStartBinding(void){
  *  
  *  
  *  @update  gess 5/12/98
- *  @param   
- *  @return  
+ *  @param   pIStream contains the input chars
+ *  @param   length is the number of bytes waiting input
+ *  @return  error code (usually 0)
  */
 nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
 
   int len=0;
+  int offset=0;
 
   do {
       PRInt32 err;
       len = pIStream->Read(&err, mTransferBuffer, 0, gTransferBufferSize);
       if(len>0) {
+
+        //Ok -- here's the problem.
+        //Just because someone throws you some data, doesn't mean that it's
+        //actually GOOD data. Recently, I encountered a problem where netlib
+        //was prepending an otherwise valid buffer with a few garbage characters.
+        //To solve this, I'm adding some debug code here that protects us from
+        //propagating the bad data upwards.
+
         mTransferBuffer[len]=0;
-        mTokenizer->Append(mTransferBuffer,len);
+        if(PR_FALSE==mHasSeenOpenTag) {
+          for(offset=0;offset<len;offset++) {
+            if(kLessThan==mTransferBuffer[offset]){
+              mHasSeenOpenTag=PR_TRUE;
+              break;
+            } 
+          }
+        }
+
+        if(len-offset)
+          mTokenizer->Append(&mTransferBuffer[offset],len);
       }
   } while (len > 0);
 
diff --git a/parser/htmlparser/src/nsHTMLParser.h b/parser/htmlparser/src/nsHTMLParser.h
index c3a51b9f75d..776670bb378 100644
--- a/parser/htmlparser/src/nsHTMLParser.h
+++ b/parser/htmlparser/src/nsHTMLParser.h
@@ -521,6 +521,7 @@ protected:
     ITokenizerDelegate* mDelegate;
     PRInt32             mIteration;
     char*               mTransferBuffer;
+    PRBool              mHasSeenOpenTag;
 };
 
 
diff --git a/parser/htmlparser/src/nsHTMLTokens.cpp b/parser/htmlparser/src/nsHTMLTokens.cpp
index af6690dda8e..791846a81f8 100644
--- a/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/parser/htmlparser/src/nsHTMLTokens.cpp
@@ -37,6 +37,7 @@ static nsString     gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTU
 static nsAutoString gDigits("0123456789");
 static nsAutoString gWhitespace(" \t\b");
 static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:");
+static const char*  gUserdefined = "userdefined";
 
 //debug error messages...
 static const char* kNullScanner = "Error: Scanner is null.";
@@ -99,7 +100,6 @@ struct HTMLTagEntry {
   eHTMLTags  fTagID;
 };
 
-
   // KEEP THIS LIST SORTED!
   // NOTE: This table is sorted in ascii collating order. If you
   // add a new entry, make sure you put it in the right spot otherwise
@@ -121,6 +121,7 @@ HTMLTagEntry gHTMLTagTable[] =
   {"CERTIFICATE", eHTMLTag_certificate},
   {"CITE",        eHTMLTag_cite},         {"CODE",      eHTMLTag_code},
   {"COL",         eHTMLTag_col},          {"COLGROUP",  eHTMLTag_colgroup},
+  {"COMMENT",     eHTMLTag_comment},
 
   {"DD",          eHTMLTag_dd},           {"DEL",       eHTMLTag_del},
   {"DFN",         eHTMLTag_dfn},          {"DIR",       eHTMLTag_dir},
@@ -193,7 +194,6 @@ HTMLTagEntry gHTMLTagTable[] =
   {"VAR",         eHTMLTag_var},          {"WBR",       eHTMLTag_wbr},
   {"WS",          eHTMLTag_whitespace},       
 
-
 };
 
 
@@ -262,6 +262,17 @@ CHTMLToken::CHTMLToken(const nsString& aName) : CToken(aName) {
   mTagType=eHTMLTag_unknown;
 }
 
+/*
+ *  constructor from tag id
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(GetTagName(aTag)) {
+  mTagType=aTag; 
+}
+
 /*
  *  
  *  
@@ -296,6 +307,17 @@ CStartToken::CStartToken(const nsString& aName) : CHTMLToken(aName) {
   mAttributed=PR_FALSE;
 }
 
+/*
+ *  constructor from tag id
+ *  
+ *  @update  gess 3/25/98
+ *  @param   
+ *  @return  
+ */
+CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
+  mAttributed=PR_FALSE;
+}
+
 /*
  *  default destructor
  *  
@@ -1335,17 +1357,15 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString)
   PRInt32  high=cnt-1;
   PRInt32  middle=kNotFound;
   
-  if (0 != cnt)
-    while(low<=high)
-    {
-      middle=(PRInt32)(low+high)/2;
-      result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE);
-      if (result==0)
-        return gHTMLTagTable[middle].fTagID; 
-      if (result<0)
-        high=middle-1; 
-      else low=middle+1; 
-    }
+  while(low<=high){
+    middle=(PRInt32)(low+high)/2;
+    result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE);
+    if (result==0)
+      return gHTMLTagTable[middle].fTagID; 
+    if (result<0)
+      high=middle-1; 
+    else low=middle+1; 
+  }
   return eHTMLTag_userdefined;
 }
 
@@ -1357,17 +1377,24 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString)
  * @return
  */
 const char* GetTagName(PRInt32 aTag) {
-  const char* result=0;
-  PRInt32     cnt=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry);
-
-  int i=0;
-  for(i=0;i<cnt;i++){
-    if(aTag==gHTMLTagTable[i].fTagID)
-      return gHTMLTagTable[i].fName;
+  const    char* result=0;
+  PRInt32  cnt=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry);
+  PRInt32  low=0; 
+  PRInt32  high=cnt-1;
+  PRInt32  middle=kNotFound;
+  
+  while(low<=high) {
+    middle=(PRInt32)(low+high)/2;
+    if(aTag==gHTMLTagTable[middle].fTagID)
+      return gHTMLTagTable[middle].fName;
+    if(aTag<gHTMLTagTable[middle].fTagID)
+      high=middle-1; 
+    else low=middle+1; 
   }
-  return result;
+  return gUserdefined;
 }
 
+
 /*
  *  This method iterates the attribute-table to ensure that is 
  *  is proper sort order. This method only needs to be
diff --git a/parser/htmlparser/src/nsHTMLTokens.h b/parser/htmlparser/src/nsHTMLTokens.h
index b8eae70f8b9..4c6c2af6e33 100644
--- a/parser/htmlparser/src/nsHTMLTokens.h
+++ b/parser/htmlparser/src/nsHTMLTokens.h
@@ -59,21 +59,21 @@ enum eHTMLTags
   eHTMLTag_button,      eHTMLTag_caption,     eHTMLTag_center,    
   eHTMLTag_certificate, eHTMLTag_cite,
   eHTMLTag_code,        eHTMLTag_col,         eHTMLTag_colgroup,  eHTMLTag_comment,
-  eHTMLTag_dd,          eHTMLTag_del,         eHTMLTag_dfn,       eHTMLTag_div,       
-  eHTMLTag_dir,         eHTMLTag_dl,          eHTMLTag_dt,        
+  eHTMLTag_dd,          eHTMLTag_del,         eHTMLTag_dfn,       eHTMLTag_dir,       
+  eHTMLTag_div,         eHTMLTag_dl,          eHTMLTag_dt,        
   eHTMLTag_em,          eHTMLTag_embed,
   eHTMLTag_fieldset,    eHTMLTag_font,        eHTMLTag_footer,  
-  eHTMLTag_form,        eHTMLTag_frame,       eHTMLTag_frameset,
+  eHTMLTag_form,        eHTMLTag_frame,       eHTMLTag_frameset,  //39
   eHTMLTag_h1,          eHTMLTag_h2,          eHTMLTag_h3,        eHTMLTag_h4,
   eHTMLTag_h5,          eHTMLTag_h6,          eHTMLTag_head,      eHTMLTag_header,
-  eHTMLTag_hr,          eHTMLTag_html,        eHTMLTag_iframe,    eHTMLTag_ilayer,
-  eHTMLTag_italic,      eHTMLTag_img,         eHTMLTag_ins,       eHTMLTag_input,       
+  eHTMLTag_hr,          eHTMLTag_html,        eHTMLTag_italic,    eHTMLTag_iframe,    
+  eHTMLTag_ilayer,      eHTMLTag_img,         eHTMLTag_input,     eHTMLTag_ins,
   eHTMLTag_isindex,       
   eHTMLTag_kbd,         eHTMLTag_keygen,
-  eHTMLTag_label,       eHTMLTag_layer,       eHTMLTag_legend,    eHTMLTag_listitem,
+  eHTMLTag_label,       eHTMLTag_layer,       eHTMLTag_legend,    eHTMLTag_listitem, 
   eHTMLTag_link,        eHTMLTag_listing,     eHTMLTag_map,       eHTMLTag_marquee,
   eHTMLTag_math,        eHTMLTag_menu,        eHTMLTag_meta,      eHTMLTag_newline,
-  eHTMLTag_noembed,     eHTMLTag_noframes,    eHTMLTag_nolayer,   eHTMLTag_noscript,  
+  eHTMLTag_noembed,     eHTMLTag_noframes,    eHTMLTag_nolayer,   eHTMLTag_noscript,  //74
   eHTMLTag_note,        eHTMLTag_object,      eHTMLTag_ol,
   eHTMLTag_option,      eHTMLTag_paragraph,   eHTMLTag_param,     eHTMLTag_plaintext,   
   eHTMLTag_pre,         eHTMLTag_quotation,   eHTMLTag_strike,    eHTMLTag_samp,        
@@ -81,10 +81,10 @@ enum eHTMLTags
   eHTMLTag_server,      eHTMLTag_small,     
   eHTMLTag_spacer,      eHTMLTag_span,
   eHTMLTag_strong,      eHTMLTag_style,       eHTMLTag_sub,       eHTMLTag_sup,         
-  eHTMLTag_table,       eHTMLTag_tbody,       eHTMLTag_td,        
+  eHTMLTag_table,       eHTMLTag_tbody,       eHTMLTag_td,        //98
   
-  eHTMLTag_text,  //used for plain text; this is not really a tag.   
-  eHTMLTag_textarea,
+  eHTMLTag_text,  //used for plain text; this is not really a tag.  
+  eHTMLTag_textarea,  //100
   
   eHTMLTag_tfoot,   
   eHTMLTag_th,          eHTMLTag_thead,       eHTMLTag_title,     eHTMLTag_tr,
@@ -125,6 +125,7 @@ const char*     GetTagName(PRInt32 aTag);
  */
 class CHTMLToken : public CToken {
 public:
+                        CHTMLToken(eHTMLTags aTag);
                         CHTMLToken(const nsString& aString);
   virtual   eHTMLTags   GetHTMLTag();
             void        SetHTMLTag(eHTMLTags aTagType);
@@ -142,6 +143,7 @@ protected:
  */
 class CStartToken: public CHTMLToken {
   public:
+                        CStartToken(eHTMLTags aTag);
                         CStartToken(const nsString& aString);
     virtual PRInt32     Consume(PRUnichar aChar,CScanner& aScanner);
     virtual eHTMLTags   GetHTMLTag();
diff --git a/parser/htmlparser/src/nsParserTypes.h b/parser/htmlparser/src/nsParserTypes.h
index 39122d4b844..716632dcd3d 100644
--- a/parser/htmlparser/src/nsParserTypes.h
+++ b/parser/htmlparser/src/nsParserTypes.h
@@ -43,6 +43,7 @@ enum  eParseMode {
 
 const PRInt32   kEOF          = 1000000L;
 const PRInt32   kUnknownError = -1000;
+const PRInt32   kCantPropagate = -6;
 const PRInt32   kContextMismatch = -5;
 const PRInt32   kBadFilename  = -4;
 const PRInt32   kBadURL       = -3;
diff --git a/parser/htmlparser/src/nsToken.cpp b/parser/htmlparser/src/nsToken.cpp
index 03000a678b2..a8ba53a6171 100644
--- a/parser/htmlparser/src/nsToken.cpp
+++ b/parser/htmlparser/src/nsToken.cpp
@@ -29,6 +29,17 @@ CToken::CToken(const nsString& aName) : mTextValue(aName) {
   mOrdinalValue=0;
   mAttrCount=0;
 }
+
+/**
+ *  constructor from char*
+ *  
+ *  @update gess 3/25/98
+ *  @param  aName--char* containing name of token
+ */
+CToken::CToken(const char* aName) : mTextValue(aName) {
+  mOrdinalValue=0;
+  mAttrCount=0;
+}
  
 /**
  *  Decstructor
diff --git a/parser/htmlparser/src/nsToken.h b/parser/htmlparser/src/nsToken.h
index 0bdf7a0a0d8..4c757a42091 100644
--- a/parser/htmlparser/src/nsToken.h
+++ b/parser/htmlparser/src/nsToken.h
@@ -60,6 +60,13 @@ class CToken {
      */
     CToken(const nsString& aName);
 
+    /**
+     * constructor from char*
+     * @update	gess5/11/98
+     * @param   aName is the given name of the token 
+     */
+    CToken(const char* aName);
+
     /**
      * destructor
      * @update	gess5/11/98