Major whacking of the debug robot code.

The Debug robot has now been moved into it's own class (xp_com object) and is fairly well separated from the rest of the parser. An argument to pass the ParserDebug object into the parsing process has been added to the Parse member. This should clean up Unix a bit as well as provide the ability to multiple simultaneous robots. This also cleaned up the global variable hackage.
1998-06-18 22:57:25 +00:00 · 1998-06-18 22:57:25 +00:00 · 8d8f3997e5
--- a/htmlparser/src/CNavDTD.cpp
+++ b/htmlparser/src/CNavDTD.cpp
@ -31,6 +31,7 @@
 *         
 */

+#include "nsIParserDebug.h"
 #include "CNavDTD.h"
 #include "nsHTMLTokens.h"
 #include "nsCRT.h"
@ -43,13 +44,10 @@
 #include "prtypes.h"  //this is here for debug reasons...
 #include "prio.h"
 #include "plstr.h"
-#include "prstrm.h"
-#include <fstream.h>

 #ifdef XP_PC
 #include <direct.h> //this is here for debug reasons...
 #endif
-#include <time.h>
 #include "prmem.h"


@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
 static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

-static char*        gVerificationOutputDir=0;
-static char*        gURLRef=0;
 static nsAutoString gEmpty;

 static char formElementTags[]= {  
@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
 *  @return  
 */
 CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
+  NS_INIT_REFCNT();
  mParser=0;
+  mURLRef=0;
+  mParserDebug=0;
  nsCRT::zero(mLeafBits,sizeof(mLeafBits));
  nsCRT::zero(mContextStack,sizeof(mContextStack));
  nsCRT::zero(mStyleStack,sizeof(mStyleStack));
  nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
  mContextStackPos=0;
  mStyleStackPos=0;
-  gURLRef = 0;
  mHasOpenForm=PR_FALSE;
  mHasOpenMap=PR_FALSE;
-  gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
  InitializeDefaultTokenHandlers();
 }

@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
 */
 CNavDTD::~CNavDTD(){
  DeleteTokenHandlers();
-  if (gURLRef)
-  {
-     PL_strfree(gURLRef);
-     gURLRef = 0;
-  }
+  if (mURLRef)
+     PL_strfree(mURLRef);
+  if (mParserDebug)
+     NS_RELEASE(mParserDebug);
 //  NS_RELEASE(mSink);
 }

@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){

    if(aHandler) {
      result=(*aHandler)(theToken,this);
-      Verify("xxx",PR_TRUE);
+      if (mParserDebug)
+         mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
    }

  }//if
@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
 *  @param   aChild -- tag enum of child container
 *  @return  PR_TRUE if parent can contain child
 */
-PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {

  PRBool result=PR_FALSE;

@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {

    //handle form elements (this is very much a WIP!!!)
  if(0!=strchr(formElementTags,aChild)){
-    return CanContainFormElement(aParent,aChild);
+    return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
  }

  
-  switch(aParent) {
+  switch((eHTMLTags)aParent) {
    case eHTMLTag_a:
    case eHTMLTag_acronym:
      result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
 * @param   aChild -- tag type of child
 * @return  TRUE if propagation closes; false otherwise
 */
-PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag)  {
  PRBool result=PR_FALSE;

  switch(aParentTag) {
@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
      //otherwise, intentionally fall through...

    case eHTMLTag_tr:
-      if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
+      if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
        aVector.Append((PRUnichar)eHTMLTag_td);
        result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
 //        result=PR_TRUE;
@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
  return;
 }

+void CNavDTD::SetURLRef(char * aURLRef){
+   if (mURLRef) {
+      PL_strfree(mURLRef);
+      mURLRef=0;
+   }
+   if (aURLRef)
+      mURLRef = PL_strdup(aURLRef);
+}

-/************************************************************************
-  Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/** 
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update  jevering 6/06/98
- * @param   path is the directory structure indicating the bad context vector
- * @param   pURLRef is the associated URL
- * @param   filename to record mapping to if not already recorded
- * @return  TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP	"/vector.map"
-#define CONTEXT_VECTOR_STAT	"/vector.stat"
-#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
+void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
 {
-   char recordPath[2048];
-   PRIntn oflags = 0;
-
-   // create the record file name from the verification director
-   // and the default name.
-   strcpy(recordPath,gVerificationOutputDir);
-   strcat(recordPath,CONTEXT_VECTOR_MAP);
-
-   // create the file exists, only open for read/write
-   // otherwise, create it
-   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
-      oflags = PR_CREATE_FILE;
-   oflags |= PR_RDWR;
-
-   // open the record file
-   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
-   if (recordFile) {
-
-      char * string = (char *)PR_Malloc(2048);
-      PRBool found = PR_FALSE;
-
-	  // vectors are stored on the format iof "URL vector filename"
-	  // where the vector contains the verification path and
-	  // the filename contains the debug source dump
-      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
-	  // get the file size, read in the file and parse it line at
-	  // a time to check to see if we have already recorded this
-	  // occurance
-
-      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
-      if (iSize) {
-
-         char * buffer = (char*)PR_Malloc(iSize);
-         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
-         if (buffer!=NULL && string!=NULL) {
-            PRInt32 ibufferpos, istringpos;
-
-			// beginning of file for read
-            PR_Seek(recordFile,0,PR_SEEK_SET);
-            PR_Read(recordFile,buffer,iSize);
-
-			// run through the file looking for a matching vector
-            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
-            {
-			   // compare string once we have hit the end of the line
-               if (buffer[ibufferpos] == '\r') {
-                  stringbuf[istringpos] = '\0';
-                  istringpos = 0;
-                  // skip newline and space
-                  ibufferpos++;
-
-                  if (PL_strlen(stringbuf)) {
-					char * space;
-   					// chop of the filename for compare
-                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
-						*space = '\0';
-
-					// we have already recorded this one, free up, and return
-                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
-						PR_Free(buffer);
-                        PR_Free(stringbuf);
-						PR_Free(string);
-                        return PR_TRUE;
+   if (aParserDebug) {
+      mParserDebug = aParserDebug;
+      NS_ADDREF(mParserDebug);
   }
 }
-               }
-
-               // build up the compare string
-               else
-                  stringbuf[istringpos++] = buffer[ibufferpos];
-            }
-
-            // throw away the record file data
-            PR_Free(buffer);
-            PR_Free(stringbuf);
-         }
-      }
-
-      // if this bad vector was not recorded, add it to record file
-
-      if (!found) {
-         PR_Seek(recordFile,0,PR_SEEK_END);
-         PR_Write(recordFile,string,PL_strlen(string));
-      }
-
-      PR_Close(recordFile);
-	  PR_Free(string);
-   }
-
-   // vector was not recorded
-   return PR_FALSE;
-}
-
-// structure to store the vector statistic information
-
-typedef struct vector_info {
-	PRInt32 references;     // number of occurances counted
-	PRInt32 count;          // number of tags in the vector
-  PRBool  good_vector;    // is this a valid vector?
-	eHTMLTags* vector;       // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE	128
-
-// compare function for quick sort.  Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
-{
-	VectorInfo ** p1 = (VectorInfo**)arg1;
-	VectorInfo ** p2 = (VectorInfo**)arg2;
-	return (*p2)->references - (*p1)->references;
-}
-
-
-/**
- *  This debug routines stores statistical information about a
- *  context vector.  The context vector statistics are stored in
- *  a global array.  The table is resorted each time it grows to
- *  aid in lookup speed.  If a vector has already been noted, its
- *  reference count is bumped, otherwise it is added to the table
- *
- *  @update     jevering 6/11/98
- *  @param      aTags is the tag list (vector)
- *  @param      count is the size of the vector
- *  @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
-    // if the table doesn't exist, create it
-	if (!gVectorInfoArray) {
-		gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
-	} 
-	else {
-        // attempt to look up the vector
-		for (PRInt32 i = 0; i < gVectorCount; i++)
-
-            // check the vector only if they are the same size, if they
-            // match then just return without doing further work
-			if (gVectorInfoArray[i]->count == count)
-				if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
-                    // bzzzt. and we have a winner.. bump the ref count
-					gVectorInfoArray[i]->references++;
-					return;
-				}
-	}
-
-    // the context vector hasn't been noted, so allocate it and
-    // initialize it one.. add it to the table
-	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
-	pVectorInfo->references = 1;
-	pVectorInfo->count = count;
-	pVectorInfo->good_vector = good_vector;
-	pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
-	memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
-	gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
-    // have we maxed out the table?  grow it.. sort it.. love it. 
-	if ((gVectorCount % TABLE_SIZE) == 0) {
-		gVectorInfoArray = (VectorInfo**)realloc(
-			gVectorInfoArray,
-			(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
-	  if (gVectorCount) {
-		  qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
-	  }
-	}
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
-    sprintf (vector_string, "%6d ", pInfo->references);
-    for (PRInt32 j = 0; j < pInfo->count; j++) {
-	    PL_strcat(vector_string, "<");
-	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
-	    PL_strcat(vector_string, ">");
-    }
-    PL_strcat(vector_string,"\r\n");
-}
-
-/**
- *  This debug routine dumps out the vector statistics to a text
- *  file in the verification directory and defaults to the name
- *  "vector.stat".  It contains all parsed context vectors and there
- *  occurance count sorted in decending order.
- *  
- *  @update     jevering 6/11/98
- *  @param
- *  @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord(void)
-{
-    // do we have a table?
-	if (gVectorCount) {
-
-        // hopefully, they wont exceed 1K.
-      char vector_string[1024];
-      char path[1024];
-
-      path[0] = '\0';
-
-      // put in the verification directory.. else the root
-      if (gVerificationOutputDir)
-         strcpy(path,gVerificationOutputDir);
-
-      strcat(path,CONTEXT_VECTOR_STAT);
-
-      // open the stat file creaming any existing stat file
-      PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
-		if (statisticFile) {
-
-            PRInt32 i;
-            PRofstream ps;
-            ps.attach(statisticFile);
-        
-            // oh what the heck, sort it again
-	          if (gVectorCount) {
-		          qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
-	          }
-
-            // cute little header
-            sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
-            ps << vector_string;
-
-            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
-            ps << VECTOR_TABLE_HEADER;
-
-            // dump out the bad vectors encountered
-            for (i = 0; i < gVectorCount; i++) {
-               if (!gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-            }
-
-            ps << "\r\n\r\nValid context vector summary\r\n";
-            ps << VECTOR_TABLE_HEADER;
-            
-            // take a big vector table dump (good vectors)
-            for (i = 0; i < gVectorCount; i++) {
-               if (gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-                // free em up.  they mean nothing to me now (I'm such a user)
-
-            if (gVectorInfoArray[i]->vector)
-               PR_Free(gVectorInfoArray[i]->vector);
-            PR_Free(gVectorInfoArray[i]);
-         }
-      }
-
-        // ok, we are done with the table, free it up as well
-      PR_Free(gVectorInfoArray);
-      gVectorInfoArray = 0;
-      gVectorCount = 0;
-      PR_Close(statisticFile);
-   }
-}
-
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool CNavDTD::VerifyContextVector(void) const {
-
-  PRBool  result=PR_TRUE;
-
-  if(0!=gVerificationOutputDir) {
-  
-#ifdef XP_PC
-      char    path[_MAX_PATH+1];
-      strcpy(path,gVerificationOutputDir);
-#endif
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-
-#ifdef NS_WIN32
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        mkdir(path);
-#endif
-      }
-
-      //**************************************************
-      //Add code here to see if we understand this vector
-      //**************************************************
-
-	  if(PR_FALSE==result){
-#ifdef NS_WIN32
-      // save file to directory indicated by bad context vector
-      int iCount = 1;
-      char filename[_MAX_PATH];
-      do {
-         sprintf(filename,"%s/html%04d.dbg", path, iCount++);
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-      PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-      if (debugFile) {
-         PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
-         PR_Write(debugFile,"\n",PL_strlen("\n"));
-         PR_Close(debugFile);
-      }
-#endif
-      //add debugging code here to record the fact that we just encountered
-      //a context vector we don't know how to handle.
-    }
-  }
-
-  return result;
-}
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool CNavDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
-
-  PRBool  result=PR_TRUE;
-
-  //ok, now see if we understand this vector
-
-  if(0!=anOutputDir || aRecordStats) 
-      result=VerifyContextVector();
-
-  if (aRecordStats) {
-	  NoteVector(mContextStack,mContextStackPos,result);
-  }
-
-  if(0!=anOutputDir) {
-      char    path[2048];
-      strcpy(path,anOutputDir);
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        PR_MkDir(path,0);
-      }
-	  if(PR_FALSE==result){
-      static PRBool rnd_initialized = PR_FALSE;
-
-      if (!rnd_initialized) {
-         // seed randomn number generator to aid in temp file
-         // creation.
-         rnd_initialized = PR_TRUE;
-         srand((unsigned)time(NULL));
-      }
-
-      // generate a filename to dump the html source into
-      char filename[1024];
-      do {
-         // use system time to generate a temporary file name
-         time_t ltime;
-         time (&ltime);
-         // add in random number so that we can create uniques names
-         // faster than simply every second.
-         ltime += (time_t)rand();
-         sprintf(filename,"%s/%lX.html", path, ltime);
-         // try until we find one we can create
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-
-      // check to see if we already recorded an instance of this particular
-      // bad vector.  
-      if (!DebugRecord(path,gURLRef, filename))
-      {
-         // save file to directory indicated by bad context vector
-         PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-         // if we were able to open the debug file, then
-         // write the true URL at the top of the file.
-         if (debugFile) {
-            // dump the html source into the newly created file.
-            PRofstream ps;
-            ps.attach(debugFile);
-            mParser->DebugDumpSource(ps);
-            PR_Close(debugFile);
-         }
-      }
-    }
-  }
-
-  return result;
-}
--- a/htmlparser/src/CNavDTD.h
+++ b/htmlparser/src/CNavDTD.h
@ -42,6 +42,7 @@

 class nsHTMLParser;
 class nsIHTMLContentSink;
+class nsIParserDebug;

 class CNavDTD : public nsIDTD {
            
@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
     *  of one type can contain a tag of another type.
     *  
     *  @update  gess 3/25/98
-     *  @param   aParent -- tag enum of parent container
-     *  @param   aChild -- tag enum of child container
+     *  @param   aParent -- int tag of parent container
+     *  @param   aChild -- int tag of child container
     *  @return  PR_TRUE if parent can contain child
     */
-    virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+    virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);

    /**
     *  This method is called to determine whether or not a tag
@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
     */
    virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;

-
-    /**
-     * This method gets called at various times by the parser
-     * whenever we want to verify a valid context stack. This
-     * method also gives us a hook to add debugging metrics.
-     *
-     * @update  gess4/6/98
-     * @param   aStack[] array of ints (tokens)
-     * @param   aCount number of elements in given array
-     * @return  TRUE if stack is valid, else FALSE
-     */
-    virtual PRBool VerifyContextVector(void) const;
-
    /**
     * 
-     * @update	gess5/18/98
-     * @param 
+     * @update	jevering 6/18/98
+     * @param  aURLRef if the current URL reference (for debugger)
     * @return
     */
-    virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+    virtual void SetURLRef(char * aURLRef);
+
+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParserDebug   created debug parser object
+     * @return
+     */
+    virtual void SetParserDebug(nsIParserDebug * aParserDebug);

    /**
     * This method tries to design a context map (without actually
@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
     * @param   aChild -- tag type of child
     * @return  True if closure was achieved -- other false
     */
-    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);

    /**
     * This method tries to design a context map (without actually
@ -699,7 +695,8 @@ protected:
    PRBool              mHasOpenForm;
    PRBool              mHasOpenMap;
    nsDeque             mTokenDeque;
-
+    char*               mURLRef;
+    nsIParserDebug*     mParserDebug;
 };


--- a/htmlparser/src/COtherDTD.cpp
+++ b/htmlparser/src/COtherDTD.cpp
@ -31,6 +31,7 @@
 *         
 */

+#include "nsIParserDebug.h"
 #include "COtherDTD.h"
 #include "nsHTMLTokens.h"
 #include "nsCRT.h"
@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
 static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

-static char*        gVerificationOutputDir=0;
-static char*        gURLRef=0;
 static nsAutoString gEmpty;

 static char formElementTags[]= {  
@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
 *  @return  
 */
 COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
+  NS_INIT_REFCNT();
  mParser=0;
+  mURLRef=0;
+  mParserDebug=0;
  nsCRT::zero(mLeafBits,sizeof(mLeafBits));
  nsCRT::zero(mContextStack,sizeof(mContextStack));
  nsCRT::zero(mStyleStack,sizeof(mStyleStack));
  nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
  mContextStackPos=0;
  mStyleStackPos=0;
-  gURLRef = 0;
  mHasOpenForm=PR_FALSE;
  mHasOpenMap=PR_FALSE;
-  gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
  InitializeDefaultTokenHandlers();
 }

@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
 */
 COtherDTD::~COtherDTD(){
  DeleteTokenHandlers();
-  if (gURLRef)
-  {
-     PL_strfree(gURLRef);
-     gURLRef = 0;
-  }
+  if (mURLRef)
+     PL_strfree(mURLRef);
+  if (mParserDebug)
+     NS_RELEASE(mParserDebug);
 //  NS_RELEASE(mSink);
 }

@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){

    if(aHandler) {
      result=(*aHandler)(theToken,this);
-      Verify("xxx",PR_TRUE);
+      if (mParserDebug)
+         mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
    }

  }//if
@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
 *  @param   aChild -- tag enum of child container
 *  @return  PR_TRUE if parent can contain child
 */
-PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {

  PRBool result=PR_FALSE;

@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {

    //handle form elements (this is very much a WIP!!!)
  if(0!=strchr(formElementTags,aChild)){
-    return CanContainFormElement(aParent,aChild);
+    return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
  }

  
-  switch(aParent) {
+  switch((eHTMLTags)aParent) {
    case eHTMLTag_a:
    case eHTMLTag_acronym:
      result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
 * @param   aChild -- tag type of child
 * @return  TRUE if propagation closes; false otherwise
 */
-PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
  PRBool result=PR_FALSE;

  switch(aParentTag) {
@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
  return;
 }

+void COtherDTD::SetURLRef(char * aURLRef){
+   if (mURLRef) {
+      PL_strfree(mURLRef);
+      mURLRef=0;
+   }
+   if (aURLRef)
+      mURLRef = PL_strdup(aURLRef);
+}

-/************************************************************************
-  Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/** 
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update  jevering 6/06/98
- * @param   path is the directory structure indicating the bad context vector
- * @param   pURLRef is the associated URL
- * @param   filename to record mapping to if not already recorded
- * @return  TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP	"/vector.map"
-#define CONTEXT_VECTOR_STAT	"/vector.stat"
-#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
+void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
 {
-   char recordPath[2048];
-   PRIntn oflags = 0;
-
-   // create the record file name from the verification director
-   // and the default name.
-   strcpy(recordPath,gVerificationOutputDir);
-   strcat(recordPath,CONTEXT_VECTOR_MAP);
-
-   // create the file exists, only open for read/write
-   // otherwise, create it
-   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
-      oflags = PR_CREATE_FILE;
-   oflags |= PR_RDWR;
-
-   // open the record file
-   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
-   if (recordFile) {
-
-      char * string = (char *)PR_Malloc(2048);
-      PRBool found = PR_FALSE;
-
-	  // vectors are stored on the format iof "URL vector filename"
-	  // where the vector contains the verification path and
-	  // the filename contains the debug source dump
-      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
-	  // get the file size, read in the file and parse it line at
-	  // a time to check to see if we have already recorded this
-	  // occurance
-
-      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
-      if (iSize) {
-
-         char * buffer = (char*)PR_Malloc(iSize);
-         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
-         if (buffer!=NULL && string!=NULL) {
-            PRInt32 ibufferpos, istringpos;
-
-			// beginning of file for read
-            PR_Seek(recordFile,0,PR_SEEK_SET);
-            PR_Read(recordFile,buffer,iSize);
-
-			// run through the file looking for a matching vector
-            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
-            {
-			   // compare string once we have hit the end of the line
-               if (buffer[ibufferpos] == '\r') {
-                  stringbuf[istringpos] = '\0';
-                  istringpos = 0;
-                  // skip newline and space
-                  ibufferpos++;
-
-                  if (PL_strlen(stringbuf)) {
-					char * space;
-   					// chop of the filename for compare
-                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
-						*space = '\0';
-
-					// we have already recorded this one, free up, and return
-                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
-						PR_Free(buffer);
-                        PR_Free(stringbuf);
-						PR_Free(string);
-                        return PR_TRUE;
+   if (aParserDebug) {
+      mParserDebug = aParserDebug;
+      NS_ADDREF(mParserDebug);
   }
 }
-               }
-
-               // build up the compare string
-               else
-                  stringbuf[istringpos++] = buffer[ibufferpos];
-            }
-
-            // throw away the record file data
-            PR_Free(buffer);
-            PR_Free(stringbuf);
-         }
-      }
-
-      // if this bad vector was not recorded, add it to record file
-
-      if (!found) {
-         PR_Seek(recordFile,0,PR_SEEK_END);
-         PR_Write(recordFile,string,PL_strlen(string));
-      }
-
-      PR_Close(recordFile);
-	  PR_Free(string);
-   }
-
-   // vector was not recorded
-   return PR_FALSE;
-}
-
-// structure to store the vector statistic information
-
-typedef struct vector_info {
-	PRInt32 references;     // number of occurances counted
-	PRInt32 count;          // number of tags in the vector
-  PRBool  good_vector;    // is this a valid vector?
-	eHTMLTags* vector;       // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE	128
-
-// compare function for quick sort.  Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
-{
-	VectorInfo ** p1 = (VectorInfo**)arg1;
-	VectorInfo ** p2 = (VectorInfo**)arg2;
-	return (*p2)->references - (*p1)->references;
-}
-
-/**
- * quick sort the statistic array causing the most frequently
- * used vectors to be at the top (this makes it a little speedier
- * when looking them up)
- */
-static void SortVectorRecord(void) {
-    // of course, sort it only if there is something to sort
-	if (gVectorCount) {
-		qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
-	}
-}
-
-
-/**
- *  This debug routines stores statistical information about a
- *  context vector.  The context vector statistics are stored in
- *  a global array.  The table is resorted each time it grows to
- *  aid in lookup speed.  If a vector has already been noted, its
- *  reference count is bumped, otherwise it is added to the table
- *
- *  @update     jevering 6/11/98
- *  @param      aTags is the tag list (vector)
- *  @param      count is the size of the vector
- *  @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
-    // if the table doesn't exist, create it
-	if (!gVectorInfoArray) {
-		gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
-	} 
-	else {
-        // attempt to look up the vector
-		for (PRInt32 i = 0; i < gVectorCount; i++)
-
-            // check the vector only if they are the same size, if they
-            // match then just return without doing further work
-			if (gVectorInfoArray[i]->count == count)
-				if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
-                    // bzzzt. and we have a winner.. bump the ref count
-					gVectorInfoArray[i]->references++;
-					return;
-				}
-	}
-
-    // the context vector hasn't been noted, so allocate it and
-    // initialize it one.. add it to the table
-	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
-	pVectorInfo->references = 1;
-	pVectorInfo->count = count;
-	pVectorInfo->good_vector = good_vector;
-	pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
-	memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
-	gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
-    // have we maxed out the table?  grow it.. sort it.. love it. 
-	if ((gVectorCount % TABLE_SIZE) == 0) {
-		gVectorInfoArray = (VectorInfo**)realloc(
-			gVectorInfoArray,
-			(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
-		SortVectorRecord();
-	}
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
-    sprintf (vector_string, "%6d ", pInfo->references);
-    for (PRInt32 j = 0; j < pInfo->count; j++) {
-	    PL_strcat(vector_string, "<");
-	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
-	    PL_strcat(vector_string, ">");
-    }
-    PL_strcat(vector_string,"\r\n");
-}
-
-/**
- *  This debug routine dumps out the vector statistics to a text
- *  file in the verification directory and defaults to the name
- *  "vector.stat".  It contains all parsed context vectors and there
- *  occurance count sorted in decending order.
- *  
- *  @update     jevering 6/11/98
- *  @param
- *  @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord_other(void)
-{
-    // do we have a table?
-	if (gVectorCount) {
-
-        // hopefully, they wont exceed 1K.
-      char vector_string[1024];
-      char path[1024];
-
-      path[0] = '\0';
-
-      // put in the verification directory.. else the root
-      if (gVerificationOutputDir)
-         strcpy(path,gVerificationOutputDir);
-
-      strcat(path,CONTEXT_VECTOR_STAT);
-
-      // open the stat file creaming any existing stat file
-      PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
-		if (statisticFile) {
-
-            PRInt32 i;
-            PRofstream ps;
-            ps.attach(statisticFile);
-        
-            // oh what the heck, sort it again
-            SortVectorRecord();
-
-            // cute little header
-            sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
-            ps << vector_string;
-
-            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
-            ps << VECTOR_TABLE_HEADER;
-
-            // dump out the bad vectors encountered
-            for (i = 0; i < gVectorCount; i++) {
-               if (!gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-            }
-
-            ps << "\r\n\r\nValid context vector summary\r\n";
-            ps << VECTOR_TABLE_HEADER;
-            
-            // take a big vector table dump (good vectors)
-            for (i = 0; i < gVectorCount; i++) {
-               if (gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-                // free em up.  they mean nothing to me now (I'm such a user)
-
-            if (gVectorInfoArray[i]->vector)
-               PR_Free(gVectorInfoArray[i]->vector);
-            PR_Free(gVectorInfoArray[i]);
-         }
-      }
-
-        // ok, we are done with the table, free it up as well
-      PR_Free(gVectorInfoArray);
-      gVectorInfoArray = 0;
-      gVectorCount = 0;
-      PR_Close(statisticFile);
-   }
-}
-
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool COtherDTD::VerifyContextVector(void) const {
-
-  PRBool  result=PR_TRUE;
-
-  if(0!=gVerificationOutputDir) {
-  
-#ifdef XP_PC
-      char    path[_MAX_PATH+1];
-      strcpy(path,gVerificationOutputDir);
-#endif
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-
-#ifdef NS_WIN32
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        mkdir(path);
-#endif
-      }
-
-      //**************************************************
-      //Add code here to see if we understand this vector
-      //**************************************************
-
-	  if(PR_FALSE==result){
-#ifdef NS_WIN32
-      // save file to directory indicated by bad context vector
-      int iCount = 1;
-      char filename[_MAX_PATH];
-      do {
-         sprintf(filename,"%s/html%04d.dbg", path, iCount++);
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-      PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-      if (debugFile) {
-         PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
-         PR_Write(debugFile,"\n",PL_strlen("\n"));
-         PR_Close(debugFile);
-      }
-#endif
-      //add debugging code here to record the fact that we just encountered
-      //a context vector we don't know how to handle.
-    }
-  }
-
-  return result;
-}
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool COtherDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
-
-  PRBool  result=PR_TRUE;
-
-  //ok, now see if we understand this vector
-
-  if(0!=anOutputDir || aRecordStats) 
-      result=VerifyContextVector();
-
-  if (aRecordStats) {
-	  NoteVector(mContextStack,mContextStackPos,result);
-  }
-
-  if(0!=anOutputDir) {
-      char    path[2048];
-      strcpy(path,anOutputDir);
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        PR_MkDir(path,0);
-      }
-	  if(PR_FALSE==result){
-      static PRBool rnd_initialized = PR_FALSE;
-
-      if (!rnd_initialized) {
-         // seed randomn number generator to aid in temp file
-         // creation.
-         rnd_initialized = PR_TRUE;
-         srand((unsigned)time(NULL));
-      }
-
-      // generate a filename to dump the html source into
-      char filename[1024];
-      do {
-         // use system time to generate a temporary file name
-         time_t ltime;
-         time (&ltime);
-         // add in random number so that we can create uniques names
-         // faster than simply every second.
-         ltime += (time_t)rand();
-         sprintf(filename,"%s/%lX.html", path, ltime);
-         // try until we find one we can create
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-
-      // check to see if we already recorded an instance of this particular
-      // bad vector.  
-      if (!DebugRecord(path,gURLRef, filename))
-      {
-         // save file to directory indicated by bad context vector
-         PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-         // if we were able to open the debug file, then
-         // write the true URL at the top of the file.
-         if (debugFile) {
-            // dump the html source into the newly created file.
-            PRofstream ps;
-            ps.attach(debugFile);
-            mParser->DebugDumpSource(ps);
-            PR_Close(debugFile);
-         }
-      }
-    }
-  }
-
-  return result;
-}
-
--- a/htmlparser/src/COtherDTD.h
+++ b/htmlparser/src/COtherDTD.h
@ -34,7 +34,6 @@
 #include "nsDeque.h"


-
 #define NS_IOtherHTML_DTD_IID      \
  {0x8a5e89c0, 0xd16d,  0x11d1,  \
  {0x80, 0x22, 0x00,    0x60, 0x8, 0x14, 0x98, 0x89}}
@ -42,6 +41,7 @@

 class nsIParser;
 class nsIHTMLContentSink;
+class nsIParserDebug;

 class COtherDTD : public nsIDTD {
            
@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
     *  of one type can contain a tag of another type.
     *  
     *  @update  gess 3/25/98
-     *  @param   aParent -- tag enum of parent container
-     *  @param   aChild -- tag enum of child container
+     *  @param   aParent -- int tag of parent container
+     *  @param   aChild -- int tag of child container
     *  @return  PR_TRUE if parent can contain child
     */
-    virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+    virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);

    /**
     *  This method is called to determine whether or not a tag
@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
     */
    virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;

-
-    /**
-     * This method gets called at various times by the parser
-     * whenever we want to verify a valid context stack. This
-     * method also gives us a hook to add debugging metrics.
-     *
-     * @update  gess4/6/98
-     * @param   aStack[] array of ints (tokens)
-     * @param   aCount number of elements in given array
-     * @return  TRUE if stack is valid, else FALSE
-     */
-    virtual PRBool VerifyContextVector(void) const;
-
    /**
     * 
-     * @update	gess5/18/98
-     * @param 
+     * @update	jevering 6/18/98
+     * @param  aURLRef if the current URL reference (for debugger)
     * @return
     */
-    virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+    virtual void SetURLRef(char * aURLRef);
+
+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParserDebug   created debug parser object
+     * @return
+     */
+    virtual void SetParserDebug(nsIParserDebug * aParserDebug);

    /**
     * This method tries to design a context map (without actually
@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
     * @param   aChild -- tag type of child
     * @return  True if closure was achieved -- other false
     */
-    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);

    /**
     * This method tries to design a context map (without actually
@ -701,7 +696,8 @@ protected:
    PRBool              mHasOpenForm;
    PRBool              mHasOpenMap;
    nsDeque             mTokenDeque;
-
+    char*               mURLRef;
+    nsIParserDebug*     mParserDebug;
 };


--- a/htmlparser/src/Makefile
+++ b/htmlparser/src/Makefile
@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
 CPPSRCS =     \
  nsHTMLContentSink.cpp \
  nsParserNode.cpp  \
+  nsParserDebug.cpp \
  nsScanner.cpp   \
  nsToken.cpp   \
  nsTokenHandler.cpp \
@ -41,6 +42,8 @@ EXPORTS =     \
  nsHTMLTokens.h    \
  nsIParserNode.h   \
  nsIParser.h   \
+  nsIParserDebug.h \
+  nsIParserFilter.h \
  nsToken.h   \
  $(NULL)

--- a/htmlparser/src/makefile.win
+++ b/htmlparser/src/makefile.win
@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
    nsHTMLParser.cpp prstrm.cpp

 EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
-    nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h 
+    nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h  \
+    nsIParserDebug.h nsIParserFilter.h

 CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
    .\$(OBJDIR)\CNavDTD.obj \
@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
    .\$(OBJDIR)\nsHTMLParser.obj \
    .\$(OBJDIR)\nsHTMLTokens.obj          .\$(OBJDIR)\nsParserNode.obj \
    .\$(OBJDIR)\nsScanner.obj             .\$(OBJDIR)\nsToken.obj \
-    .\$(OBJDIR)\nsTokenHandler.obj \
+    .\$(OBJDIR)\nsTokenHandler.obj        .\$(OBJDIR)\nsParserDebug.obj \
    .\$(OBJDIR)\prstrm.obj

 LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
--- a/htmlparser/src/nsHTMLParser.cpp
+++ b/htmlparser/src/nsHTMLParser.cpp
@ -30,6 +30,7 @@
 #include "prstrm.h"
 #include <fstream.h>
 #include "nsIInputStream.h"
+#include "nsIParserFilter.h"

 /* UNCOMMENT THIS IF STUFF STOPS WORKING...
 #ifdef XP_PC
@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
 static const char* kNullFilename= "Error: Null filename given";
 static const char* kNullTokenizer = "Error: Unable to construct tokenizer";

-static char*      gVerificationOutputDir=0;
-static PRBool     gRecordingStatistics=PR_TRUE;
 static const int  gTransferBufferSize=4096;  //size of the buffer used in moving data from iistream
-static char*      gURLRef=0;

 //#define DEBUG_SAVE_SOURCE_DOC 1
 #ifdef DEBUG_SAVE_SOURCE_DOC
@ -58,17 +56,6 @@ fstream* gTempStream=0;
 #endif


-extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
-{
-	gVerificationOutputDir = verify_dir;
-}
-
-
-extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
-{
-	gRecordingStatistics = bval;
-}
-
 /**
 *  This method is defined in nsIParser. It is used to 
 *  cause the COM-like construction of an nsHTMLParser.
@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
 */
 nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
  NS_INIT_REFCNT();
+  mParserFilter = nsnull;
  mListener = nsnull;
  mTransferBuffer=0;
  mSink=0;
@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
 *  @return  
 */
 nsHTMLParser::~nsHTMLParser() {
-  if (gURLRef)
-  {
-    PL_strfree(gURLRef);
-    gURLRef = 0;
-  }
  NS_IF_RELEASE(mListener);
  if(mTransferBuffer)
    delete [] mTransferBuffer;
@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
    delete mCurrentPos;
  mCurrentPos=0;
  if(mDTD)
-    delete mDTD;    
+     NS_RELEASE(mDTD);
  mDTD=0;
  if(mScanner)
    delete mScanner;
@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
  return NS_OK;                                                        
 }

+nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
+{
+  nsIParserFilter* old=mParserFilter;
+  if(old)
+    NS_RELEASE(old);
+  if(aFilter) {
+    mParserFilter=aFilter;
+    NS_ADDREF(aFilter);
+  }
+  return old;
+}
+
 /**
 *  This method gets called in order to set the content
 *  sink for this parser to dump nodes to.
@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
  mDTD=aDTD;
 }

+nsIDTD * nsHTMLParser::GetDTD(void) {
+   return mDTD;
+}
+
 /**
 *  
 *  
@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
 *  @param   
 *  @return  
 */
-nsIDTD* GetDTD(eParseMode aMode) {
+nsIDTD* NewDTD(eParseMode aMode) {
  nsIDTD* aDTD=0;
  switch(aMode) {
    case eParseMode_navigator:
@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
    default:
      break;
  }
+  if (aDTD)
+     aDTD->AddRef();
  return aDTD;
 }

@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
  nsString  theBuffer;
  const int kLocalBufSize=10;

-  if (gURLRef)
-     PL_strfree(gURLRef);
-  if (aFilename)
-     gURLRef = PL_strdup(aFilename);
-
  mMajorIteration=-1;
  mMinorIteration=-1;

@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
 *  @param   aFilename -- const char* containing file to be parsed.
 *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
 */
-PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
+PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
  NS_PRECONDITION(0!=aFilename,kNullFilename);
  PRInt32 status=kBadFilename;
  mIncremental=aIncremental;

  if(aFilename) {

-    if (gURLRef)
-       PL_strfree(gURLRef);
-    gURLRef = PL_strdup(aFilename);
-
    mParseMode=DetermineParseMode();  
-    mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+    mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
    if(mDTD) {
      mDTD->SetParser(this);
      mDTD->SetContentSink(mSink);
+      mDTD->SetURLRef((char *)aFilename);
+      mDTD->SetParserDebug(aDebug);
    }

    WillBuildModel();
@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
 */
 PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
                            nsIStreamListener* aListener,
-                            PRBool aIncremental) {
+                            PRBool aIncremental,
+                            nsIParserDebug * aDebug) {
  NS_PRECONDITION(0!=aURL,kNullURL);

  PRInt32 status=kBadURL;
@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,

  if(aURL) {

-     if (gURLRef)
-     {
-        PL_strfree(gURLRef);
-        gURLRef = 0;
-     }
-     if (aURL->GetSpec())
-        gURLRef = PL_strdup(aURL->GetSpec());
-
    mParseMode=DetermineParseMode();  
-    mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+    mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
    if(mDTD) {
      mDTD->SetParser(this);
      mDTD->SetContentSink(mSink);
+      mDTD->SetURLRef((char *)aURL->GetSpec());
+      mDTD->SetParserDebug(aDebug);
    }

    WillBuildModel();
@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
          }
        #endif

+        if (mParserFilter)
+           mParserFilter->RawBuffer(mTransferBuffer, &len);
+
        mScanner->Append(&mTransferBuffer[offset],len);

      } //if
--- a/htmlparser/src/nsHTMLParser.h
+++ b/htmlparser/src/nsHTMLParser.h
@ -1,313 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/*
- * The contents of this file are subject to the Netscape Public License
- * Version 1.0 (the "NPL"); you may not use this file except in
- * compliance with the NPL.  You may obtain a copy of the NPL at
- * http://www.mozilla.org/NPL/
- *
- * Software distributed under the NPL is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
- * for the specific language governing rights and limitations under the
- * NPL.
- *
- * The Initial Developer of this code under the NPL is Netscape
- * Communications Corporation.  Portions created by Netscape are
- * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
- * Reserved.
- */
- 
-/**
- * MODULE NOTES:
- * @update  gess 4/1/98
- * 
- *  This class does two primary jobs:
- *    1) It iterates the tokens provided during the 
- *       tokenization process, identifing where elements
- *       begin and end (doing validation and normalization).
- *    2) It controls and coordinates with an instance of
- *       the IContentSink interface, to coordinate the
- *       the production of the content model.
- *
- *  The basic operation of this class assumes that an HTML
- *  document is non-normalized. Therefore, we don't process
- *  the document in a normalized way. Don't bother to look
- *  for methods like: doHead() or doBody().
- *
- *  Instead, in order to be backward compatible, we must
- *  scan the set of tokens and perform this basic set of
- *  operations:
- *    1)  Determine the token type (easy, since the tokens know)
- *    2)  Determine the appropriate section of the HTML document
- *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
- *    3)  Insert content into our document (via the sink) into
- *        the correct section.
- *    4)  In the case of tags that belong in the BODY, we must
- *        ensure that our underlying document state reflects
- *        the appropriate context for our tag. 
- *
- *        For example,if we see a <TR>, we must ensure our 
- *        document contains a table into which the row can
- *        be placed. This may result in "implicit containers" 
- *        created to ensure a well-formed document.
- *         
- */
-
-#ifndef NS_HTMLPARSER__
-#define NS_HTMLPARSER__
-
-#include "nsIParser.h"
-#include "nsDeque.h"
-#include "nsParserNode.h"
-#include "nsParserTypes.h"
-#include "nsIURL.h"
-#include "nsIStreamListener.h"
-
-
-#define NS_IHTML_PARSER_IID      \
-  {0x2ce606b0, 0xbee6,  0x11d1,  \
-  {0xaa, 0xd9, 0x00,    0x80, 0x5f, 0x8a, 0x3e, 0x14}}
-
-
-class IContentSink;
-class nsIHTMLContentSink;
-class nsIURL;
-class nsIDTD;
-class CScanner;
-
-
-class nsHTMLParser : public nsIParser, public nsIStreamListener {
-            
-  public:
-friend class CTokenHandler;
-
-    NS_DECL_ISUPPORTS
-
-
-    /**
-     * default constructor
-     * @update	gess5/11/98
-     */
-    nsHTMLParser();
-
-
-    /**
-     * Destructor
-     * @update	gess5/11/98
-     */
-    ~nsHTMLParser();
-
-    /**
-     * Select given content sink into parser for parser output
-     * @update	gess5/11/98
-     * @param   aSink is the new sink to be used by parser
-     * @return  old sink, or NULL
-     */
-    virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
-    
-    virtual void SetDTD(nsIDTD* aDTD);
-    
-    /**
-     *  
-     *  
-     *  @update  gess 6/9/98
-     *  @param   
-     *  @return  
-     */
-    virtual CScanner* GetScanner(void);
-
-    /**
-     * Cause parser to parse input from given URL in given mode
-     * @update	gess5/11/98
-     * @param   aURL is a descriptor for source document
-     * @param   aListener is a listener to forward notifications to
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRInt32 Parse(nsIURL* aURL,
-                          nsIStreamListener* aListener,
-                          PRBool aIncremental=PR_TRUE);
-
-    /**
-     * Cause parser to parse input from given file in given mode
-     * @update	gess5/11/98
-     * @param   aFilename is a path for file document
-     * @param   aMode is the desired parser mode (Nav, other, etc.)
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
-
-    /**
-     * @update	gess5/11/98
-     * @param   anHTMLString contains a string-full of real HTML
-     * @param   appendTokens tells us whether we should insert tokens inline, or append them.
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
-
-    /**
-     * This method gets called (automatically) during incremental parsing
-     * @update	gess5/11/98
-     * @return  TRUE if all went well, otherwise FALSE
-     */
-    virtual PRInt32 ResumeParse(void);
-
-    /**
-     * Causes the parser to scan foward, collecting nearby (sequential)
-     * attribute tokens into the given node.
-     * @update	gess5/11/98
-     * @param   node to store attributes
-     * @return  number of attributes added to node.
-     */
-    virtual PRInt32 CollectAttributes(nsCParserNode& aNode,PRInt32 aCount);
-
-    /**
-     * Causes the next skipped-content token (if any) to
-     * be consumed by this node.
-     * @update	gess5/11/98
-     * @param   node to consume skipped-content
-     * @return  number of skipped-content tokens consumed.
-     */
-    virtual PRInt32 CollectSkippedContent(nsCParserNode& aNode);
-
-    /**
-     *  This debug routine is used to cause the tokenizer to
-     *  iterate its token list, asking each token to dump its
-     *  contents to the given output stream.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  
-     */
-    void DebugDumpSource(ostream& out);
-
-
-     //*********************************************
-      // These methods are callback methods used by
-      // net lib to let us know about our inputstream.
-      //*********************************************
-    NS_IMETHOD GetBindInfo(void);
-    NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const nsString& aMmsg);
-    NS_IMETHOD OnStartBinding(const char *aContentType);
-    NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
-    NS_IMETHOD OnStopBinding(PRInt32 status, const nsString& aMsg);
-
-protected:
-
-    /**
-     * 
-     * @update	gess5/18/98
-     * @param 
-     * @return
-     */
-    PRInt32 WillBuildModel(void);
-
-    /**
-     * 
-     * @update	gess5/18/98
-     * @param 
-     * @return
-     */
-    PRInt32 DidBuildModel(PRInt32 anErrorCode);
-
-    /**
-     * This method gets called when the tokens have been consumed, and it's time
-     * to build the model via the content sink.
-     * @update	gess5/11/98
-     * @return  YES if model building went well -- NO otherwise.
-     */
-    virtual PRInt32 IterateTokens(void);
-  
-private:
-    PRInt32 ParseFileIncrementally(const char* aFilename);  //XXX ONLY FOR DEBUG PURPOSES...
-
-    /*******************************************
-      These are the tokenization methods...
-     *******************************************/
-
-    /**
-     *  Cause the tokenizer to consume the next token, and 
-     *  return an error result.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   anError -- ref to error code
-     *  @return  new token or null
-     */
-    virtual PRInt32 ConsumeToken(CToken*& aToken);
-
-    /**
-     *  Part of the code sandwich, this gets called right before
-     *  the tokenization process begins. The main reason for
-     *  this call is to allow the delegate to do initialization.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  TRUE if it's ok to proceed
-     */
-    PRBool WillTokenize(PRBool aIncremental);
-
-    /**
-     *  
-     *  @update  gess 3/25/98
-     *  @return  TRUE if it's ok to proceed
-     */
-    PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens);
-
-    /**
-     *  This is the primary control routine. It iteratively
-     *  consumes tokens until an error occurs or you run out
-     *  of data.
-     *  
-     *  @update  gess 3/25/98
-     *  @return  error code 
-     */
-    PRInt32 Tokenize(void);
-
-    /**
-     *  This is the tail-end of the code sandwich for the
-     *  tokenization process. It gets called once tokenziation
-     *  has completed.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  TRUE if all went well
-     */
-    PRBool DidTokenize(PRBool aIncremental);
-
-    /**
-     *  This debug routine is used to cause the tokenizer to
-     *  iterate its token list, asking each token to dump its
-     *  contents to the given output stream.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  
-     */
-    void DebugDumpTokens(ostream& out);
-
-
-protected:
-    //*********************************************
-    // And now, some data members...
-    //*********************************************
-
-    nsIStreamListener*  mListener;
-    nsIContentSink*     mSink;
-
-    nsDequeIterator*    mCurrentPos;
-    nsDequeIterator*    mMarkPos;
-
-    nsIDTD*             mDTD;
-    eParseMode          mParseMode;
-    PRBool              mIncremental;
-    char*               mTransferBuffer;
-
-    PRInt32             mMajorIteration;
-    PRInt32             mMinorIteration;
-
-    nsDeque             mTokenDeque;
-    CScanner*           mScanner;
-
-};
-
-
-#endif 
-
--- a/htmlparser/src/nsIDTD.h
+++ b/htmlparser/src/nsIDTD.h
@ -37,6 +37,7 @@
 class nsIParser;
 class CToken;
 class nsIContentSink;
+class nsIParserDebug;

 class nsIDTD : public nsISupports {
            
@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {

    /**
     * 
-     * @update	gess5/18/98
-     * @param 
+     * @update	jevering 6/18/98
+     * @param  aURLRef if the current URL reference (for debugger)
     * @return
     */
-    virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
+    virtual void SetURLRef(char * aURLRef) = 0;

+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParent  parent tag
+     * @param  aChild   child tag
+     * @return PR_TRUE if valid container
+     */
+    virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
+
+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParserDebug   created debug parser object
+     * @return
+     */
+    virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
 };


--- a/htmlparser/src/nsIParser.h
+++ b/htmlparser/src/nsIParser.h
@ -34,6 +34,7 @@ class nsString;
 class CToken;
 class nsIURL;
 class nsIDTD;
+class nsIParserDebug;

 /**
 *  This class defines the iparser interface. This XPCOM
@ -60,9 +61,10 @@ class nsIParser : public nsISupports {

    virtual PRInt32 Parse(nsIURL* aURL,
                          nsIStreamListener* aListener,
-                          PRBool aIncremental=PR_TRUE) = 0;
+                          PRBool aIncremental=PR_TRUE,
+                          nsIParserDebug * aDebug = 0) = 0;

-    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
+    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;

    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;

--- a/htmlparser/src/nsIParserDebug.h
+++ b/htmlparser/src/nsIParserDebug.h
@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL.  You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation.  Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update  gess 4/8/98
+ * 
+ *         
+ */
+
+#ifndef NS_IPARSERDEBUG__
+#define NS_IPARSERDEBUG__
+
+#include "nsISupports.h"
+#include "nsHTMLTokens.h"
+#include "prtypes.h"
+
+#define NS_IPARSERDEBUG_IID      \
+  {0x7b68c220, 0x0685,  0x11d2,  \
+  {0xa4, 0xb5, 0x00,    0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
+
+
+class nsIDTD;
+class nsHTMLParser;
+
+class nsIParserDebug : public nsISupports {
+            
+public:
+
+   virtual void SetVerificationDirectory(char * verify_dir) = 0;
+
+   virtual void SetRecordStatistics(PRBool bval) = 0;
+
+   virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
+
+   virtual void DumpVectorRecord(void) = 0;
+
+};
+
+extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
+
+#endif /* NS_IPARSERDEBUG__ */
--- a/htmlparser/src/nsIParserFilter.h
+++ b/htmlparser/src/nsIParserFilter.h
@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL.  You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation.  Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update  jevering 6/17/98
+ * 
+ */
+
+#ifndef  IPARSERFILTER
+#define  IPARSERFILTER
+
+#include "nsISupports.h"
+
+class CToken;
+
+#define NS_IPARSERFILTER_IID     \
+  {0x14d6ff0,  0x0610,  0x11d2,  \
+  {0x8c, 0x3f, 0x00,    0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
+
+
+class nsIParserFilter : public nsISupports {
+  public:
+      
+   NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
+
+   NS_IMETHOD WillAddToken(CToken & token) = 0;
+
+   NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
+};
+
+extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
+
+
+#endif
+
--- a/htmlparser/src/nsParserDebug.cpp
+++ b/htmlparser/src/nsParserDebug.cpp
@ -0,0 +1,534 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL.  You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation.  Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
+ * Reserved.
+ */
+  
+/**
+ * MODULE NOTES:
+ * @update  jevering 06/18/98
+ * 
+ * This file contains the parser debugger object which aids in
+ * walking links and reporting statistic information, reporting
+ * bad vectors.
+ */
+
+#include "CNavDTD.h"
+#include "nsHTMLTokens.h"
+#include "nsHTMLParser.h"
+#include "nsIParserDebug.h"
+#include "nsCRT.h"
+#include "prenv.h"  //this is here for debug reasons...
+#include "prtypes.h"  //this is here for debug reasons...
+#include "prio.h"
+#include "plstr.h"
+#include "prstrm.h"
+#include <fstream.h>
+#include <time.h>
+#include "prmem.h"
+
+#define CONTEXT_VECTOR_MAP	"/vector.map"
+#define CONTEXT_VECTOR_STAT	"/vector.stat"
+#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
+
+// structure to store the vector statistic information
+
+typedef struct vector_info {
+    PRInt32 references;     // number of occurances counted
+    PRInt32 count;          // number of tags in the vector
+    PRBool  good_vector;    // is this a valid vector?
+    eHTMLTags* vector;       // and the vector
+} VectorInfo;
+
+// the statistic vector table grows each time it exceeds this
+// stepping value
+#define TABLE_SIZE	128
+
+class CParserDebug : public nsIParserDebug {
+public:
+
+    CParserDebug(char * aVerifyDir = 0);
+    ~CParserDebug();
+
+    NS_DECL_ISUPPORTS
+
+    void SetVerificationDirectory(char * verify_dir);
+    void SetRecordStatistics(PRBool bval);
+    PRBool Verify(nsIDTD * aDTD,  nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
+    void DumpVectorRecord(void);
+
+    // global table for storing vector statistics and the size
+
+private:
+    VectorInfo ** mVectorInfoArray;
+    PRInt32 mVectorCount;
+    char * mVerificationDir;
+    PRBool mRecordingStatistics;
+
+    PRBool DebugRecord(char * path, char * pURLRef, char * filename);
+    void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
+    void MakeVectorString(char * vector_string, VectorInfo * pInfo);
+};
+
+static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);                 
+static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
+
+/**
+ *  This method is defined in nsIParser. It is used to 
+ *  cause the COM-like construction of an nsHTMLParser.
+ *  
+ *  @update  jevering 3/25/98
+ *  @param   nsIParser** ptr to newly instantiated parser
+ *  @return  NS_xxx error result
+ */
+
+NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
+{
+  CParserDebug *it = new CParserDebug();
+
+  if (it == 0) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
+}
+
+CParserDebug::CParserDebug(char * aVerifyDir)
+{
+   NS_INIT_REFCNT();
+   mVectorInfoArray = 0;
+   mVectorCount = 0;
+   if (aVerifyDir)
+     mVerificationDir = PL_strdup(aVerifyDir);
+   else {
+     char * pString = PR_GetEnv("VERIFY_PARSER");
+     if (pString)
+        mVerificationDir = PL_strdup(pString);
+     else
+        mVerificationDir = 0;
+   }
+   mRecordingStatistics = PR_TRUE;
+}
+
+CParserDebug::~CParserDebug()
+{
+   if (mVerificationDir)
+      PL_strfree(mVerificationDir);
+}
+
+/**
+ *  This method gets called as part of our COM-like interfaces.
+ *  Its purpose is to create an interface to parser object
+ *  of some type.
+ *  
+ *  @update   gess 4/8/98
+ *  @param    nsIID  id of object to discover
+ *  @param    aInstancePtr ptr to newly discovered interface
+ *  @return   NS_xxx result code
+ */
+nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)  
+{                                                                        
+  if (NULL == aInstancePtr) {                                            
+    return NS_ERROR_NULL_POINTER;                                        
+  }                                                                      
+
+  if(aIID.Equals(kISupportsIID))    {  //do IUnknown...
+    *aInstancePtr = (nsIParserDebug*)(this);                                        
+  }
+  else if(aIID.Equals(kIDebugParserIID)) {  //do IParserDebug base class...
+    *aInstancePtr = (nsIParserDebug*)(this);                                        
+  }
+  else {
+    *aInstancePtr=0;
+    return NS_NOINTERFACE;
+  }
+  ((nsISupports*) *aInstancePtr)->AddRef();
+  return NS_OK;                                                        
+}
+
+NS_IMPL_ADDREF(CParserDebug)
+NS_IMPL_RELEASE(CParserDebug)
+
+void CParserDebug::SetVerificationDirectory(char * verify_dir)
+{
+   if (mVerificationDir) {
+      PL_strfree(mVerificationDir);
+      mVerificationDir = 0;
+   }
+	mVerificationDir = PL_strdup(verify_dir);
+}
+
+void CParserDebug::SetRecordStatistics(PRBool bval)
+{
+	mRecordingStatistics = bval;
+}
+
+/** 
+ * This debug method records an invalid context vector and it's
+ * associated context vector and URL in a simple flat file mapping which
+ * resides in the verification directory and is named context.map
+ *
+ * @update  jevering 6/06/98
+ * @param   path is the directory structure indicating the bad context vector
+ * @param   pURLRef is the associated URL
+ * @param   filename to record mapping to if not already recorded
+ * @return  TRUE if it is already record (dont rerecord)
+ */
+
+PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
+{
+   char recordPath[2048];
+   PRIntn oflags = 0;
+
+   // create the record file name from the verification director
+   // and the default name.
+   strcpy(recordPath,mVerificationDir);
+   strcat(recordPath,CONTEXT_VECTOR_MAP);
+
+   // create the file exists, only open for read/write
+   // otherwise, create it
+   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
+      oflags = PR_CREATE_FILE;
+   oflags |= PR_RDWR;
+
+   // open the record file
+   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
+
+   if (recordFile) {
+
+      char * string = (char *)PR_Malloc(2048);
+      PRBool found = PR_FALSE;
+
+	  // vectors are stored on the format iof "URL vector filename"
+	  // where the vector contains the verification path and
+	  // the filename contains the debug source dump
+      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
+
+	  // get the file size, read in the file and parse it line at
+	  // a time to check to see if we have already recorded this
+	  // occurance
+
+      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
+      if (iSize) {
+
+         char * buffer = (char*)PR_Malloc(iSize);
+         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
+         if (buffer!=NULL && string!=NULL) {
+            PRInt32 ibufferpos, istringpos;
+
+			// beginning of file for read
+            PR_Seek(recordFile,0,PR_SEEK_SET);
+            PR_Read(recordFile,buffer,iSize);
+
+			// run through the file looking for a matching vector
+            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
+            {
+			   // compare string once we have hit the end of the line
+               if (buffer[ibufferpos] == '\r') {
+                  stringbuf[istringpos] = '\0';
+                  istringpos = 0;
+                  // skip newline and space
+                  ibufferpos++;
+
+                  if (PL_strlen(stringbuf)) {
+					char * space;
+   					// chop of the filename for compare
+                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
+						*space = '\0';
+
+					// we have already recorded this one, free up, and return
+                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
+						PR_Free(buffer);
+                  PR_Free(stringbuf);
+						PR_Free(string);
+                        return PR_TRUE;
+                    }
+                  }
+               }
+
+               // build up the compare string
+               else
+                  stringbuf[istringpos++] = buffer[ibufferpos];
+            }
+
+            // throw away the record file data
+            PR_Free(buffer);
+            PR_Free(stringbuf);
+         }
+      }
+
+      // if this bad vector was not recorded, add it to record file
+
+      if (!found) {
+         PR_Seek(recordFile,0,PR_SEEK_END);
+         PR_Write(recordFile,string,PL_strlen(string));
+      }
+
+      PR_Close(recordFile);
+	  PR_Free(string);
+   }
+
+   // vector was not recorded
+   return PR_FALSE;
+}
+
+/**
+ * compare function for quick sort.  Compares references and
+ * sorts in decending order
+ */
+
+static int compare( const void *arg1, const void *arg2 )
+{
+	VectorInfo ** p1 = (VectorInfo**)arg1;
+	VectorInfo ** p2 = (VectorInfo**)arg2;
+	return (*p2)->references - (*p1)->references;
+}
+
+/**
+ *  This debug routines stores statistical information about a
+ *  context vector.  The context vector statistics are stored in
+ *  a global array.  The table is resorted each time it grows to
+ *  aid in lookup speed.  If a vector has already been noted, its
+ *  reference count is bumped, otherwise it is added to the table
+ *
+ *  @update     jevering 6/11/98
+ *  @param      aTags is the tag list (vector)
+ *  @param      count is the size of the vector
+ *  @return
+ */
+
+void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
+{
+    // if the table doesn't exist, create it
+	if (!mVectorInfoArray) {
+		mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
+	} 
+	else {
+        // attempt to look up the vector
+		for (PRInt32 i = 0; i < mVectorCount; i++)
+
+            // check the vector only if they are the same size, if they
+            // match then just return without doing further work
+			if (mVectorInfoArray[i]->count == count)
+				if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
+
+                    // bzzzt. and we have a winner.. bump the ref count
+					mVectorInfoArray[i]->references++;
+					return;
+				}
+	}
+
+    // the context vector hasn't been noted, so allocate it and
+    // initialize it one.. add it to the table
+	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
+	pVectorInfo->references = 1;
+	pVectorInfo->count = count;
+	pVectorInfo->good_vector = good_vector;
+	pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
+	memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
+	mVectorInfoArray[mVectorCount++] = pVectorInfo;
+
+    // have we maxed out the table?  grow it.. sort it.. love it. 
+	if ((mVectorCount % TABLE_SIZE) == 0) {
+		mVectorInfoArray = (VectorInfo**)realloc(
+			mVectorInfoArray,
+			(sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
+	  if (mVectorCount) {
+		  qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+	  }
+	}
+}
+
+void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
+{
+    sprintf (vector_string, "%6d ", pInfo->references);
+    for (PRInt32 j = 0; j < pInfo->count; j++) {
+	    PL_strcat(vector_string, "<");
+	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
+	    PL_strcat(vector_string, ">");
+    }
+    PL_strcat(vector_string,"\r\n");
+}
+
+/**
+ *  This debug routine dumps out the vector statistics to a text
+ *  file in the verification directory and defaults to the name
+ *  "vector.stat".  It contains all parsed context vectors and there
+ *  occurance count sorted in decending order.
+ *  
+ *  @update     jevering 6/11/98
+ *  @param
+ *  @return
+ */
+
+void CParserDebug::DumpVectorRecord(void)
+{
+    // do we have a table?
+	if (mVectorCount) {
+
+        // hopefully, they wont exceed 1K.
+      char vector_string[1024];
+      char path[1024];
+
+      path[0] = '\0';
+
+      // put in the verification directory.. else the root
+      if (mVerificationDir)
+         strcpy(path,mVerificationDir);
+
+      strcat(path,CONTEXT_VECTOR_STAT);
+
+      // open the stat file creaming any existing stat file
+      PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
+		if (statisticFile) {
+
+            PRInt32 i;
+            PRofstream ps;
+            ps.attach(statisticFile);
+        
+            // oh what the heck, sort it again
+	          if (mVectorCount) {
+		          qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+	          }
+
+            // cute little header
+            sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
+            ps << vector_string;
+
+            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
+            ps << VECTOR_TABLE_HEADER;
+
+            // dump out the bad vectors encountered
+            for (i = 0; i < mVectorCount; i++) {
+               if (!mVectorInfoArray[i]->good_vector) {
+                  MakeVectorString(vector_string, mVectorInfoArray[i]);
+                  ps << vector_string;
+               }
+            }
+
+            ps << "\r\n\r\nValid context vector summary\r\n";
+            ps << VECTOR_TABLE_HEADER;
+            
+            // take a big vector table dump (good vectors)
+            for (i = 0; i < mVectorCount; i++) {
+               if (mVectorInfoArray[i]->good_vector) {
+                  MakeVectorString(vector_string, mVectorInfoArray[i]);
+                  ps << vector_string;
+               }
+                // free em up.  they mean nothing to me now (I'm such a user)
+
+            if (mVectorInfoArray[i]->vector)
+               PR_Free(mVectorInfoArray[i]->vector);
+            PR_Free(mVectorInfoArray[i]);
+         }
+      }
+
+        // ok, we are done with the table, free it up as well
+      PR_Free(mVectorInfoArray);
+      mVectorInfoArray = 0;
+      mVectorCount = 0;
+      PR_Close(statisticFile);
+   }
+}
+
+
+/**
+ * This debug method allows us to determine whether or not 
+ * we've seen (and can handle) the given context vector.
+ *
+ * @update  gess4/22/98
+ * @param   tags is an array of eHTMLTags
+ * @param   count represents the number of items in the tags array
+ * @param   aDTD is the DTD we plan to ask for verification
+ * @return  TRUE if we know how to handle it, else false
+ */
+
+PRBool CParserDebug::Verify(nsIDTD * aDTD,  nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef) 
+{
+   PRBool  result=PR_TRUE;
+
+    //ok, now see if we understand this vector
+
+   if(0!=mVerificationDir || mRecordingStatistics) {
+
+      if(aDTD && aContextStackPos>1) {
+         for (int i = 0; i < aContextStackPos-1; i++)
+            if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
+               result = PR_FALSE;
+               break;
+            }
+         }
+   }
+
+   if (mRecordingStatistics) {
+	   NoteVector(aContextStack,aContextStackPos,result);
+   }
+
+   if(0!=mVerificationDir) {
+      char    path[2048];
+      strcpy(path,mVerificationDir);
+
+      int i=0;      
+      for(i=0;i<aContextStackPos;i++){
+         strcat(path,"/");
+         const char* name=GetTagName(aContextStack[i]);
+         strcat(path,name);
+         PR_MkDir(path,0);
+      }
+      if(PR_FALSE==result){
+         static PRBool rnd_initialized = PR_FALSE;
+
+         if (!rnd_initialized) {
+            // seed randomn number generator to aid in temp file
+            // creation.
+            rnd_initialized = PR_TRUE;
+            srand((unsigned)time(NULL));
+         }
+
+         // generate a filename to dump the html source into
+         char filename[1024];
+         do {
+            // use system time to generate a temporary file name
+            time_t ltime;
+            time (&ltime);
+            // add in random number so that we can create uniques names
+            // faster than simply every second.
+            ltime += (time_t)rand();
+            sprintf(filename,"%s/%lX.html", path, ltime);
+            // try until we find one we can create
+         } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
+
+         // check to see if we already recorded an instance of this particular
+         // bad vector.  
+         if (!DebugRecord(path, aURLRef, filename))
+         {
+            // save file to directory indicated by bad context vector
+            PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
+            // if we were able to open the debug file, then
+            // write the true URL at the top of the file.
+            if (debugFile) {
+               // dump the html source into the newly created file.
+               PRofstream ps;
+               ps.attach(debugFile);
+               if (aParser)
+                  aParser->DebugDumpSource(ps);
+               PR_Close(debugFile);
+            }
+         }
+      }
+   }
+
+   return result;
+}
--- a/htmlparser/src/nsTokenizer.cpp
+++ b/htmlparser/src/nsTokenizer.cpp
--- a/htmlparser/src/nsTokenizer.h
+++ b/htmlparser/src/nsTokenizer.h
--- a/parser/htmlparser/src/CNavDTD.cpp
+++ b/parser/htmlparser/src/CNavDTD.cpp
@ -31,6 +31,7 @@
 *         
 */

+#include "nsIParserDebug.h"
 #include "CNavDTD.h"
 #include "nsHTMLTokens.h"
 #include "nsCRT.h"
@ -43,13 +44,10 @@
 #include "prtypes.h"  //this is here for debug reasons...
 #include "prio.h"
 #include "plstr.h"
-#include "prstrm.h"
-#include <fstream.h>

 #ifdef XP_PC
 #include <direct.h> //this is here for debug reasons...
 #endif
-#include <time.h>
 #include "prmem.h"


@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
 static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

-static char*        gVerificationOutputDir=0;
-static char*        gURLRef=0;
 static nsAutoString gEmpty;

 static char formElementTags[]= {  
@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
 *  @return  
 */
 CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
+  NS_INIT_REFCNT();
  mParser=0;
+  mURLRef=0;
+  mParserDebug=0;
  nsCRT::zero(mLeafBits,sizeof(mLeafBits));
  nsCRT::zero(mContextStack,sizeof(mContextStack));
  nsCRT::zero(mStyleStack,sizeof(mStyleStack));
  nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
  mContextStackPos=0;
  mStyleStackPos=0;
-  gURLRef = 0;
  mHasOpenForm=PR_FALSE;
  mHasOpenMap=PR_FALSE;
-  gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
  InitializeDefaultTokenHandlers();
 }

@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
 */
 CNavDTD::~CNavDTD(){
  DeleteTokenHandlers();
-  if (gURLRef)
-  {
-     PL_strfree(gURLRef);
-     gURLRef = 0;
-  }
+  if (mURLRef)
+     PL_strfree(mURLRef);
+  if (mParserDebug)
+     NS_RELEASE(mParserDebug);
 //  NS_RELEASE(mSink);
 }

@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){

    if(aHandler) {
      result=(*aHandler)(theToken,this);
-      Verify("xxx",PR_TRUE);
+      if (mParserDebug)
+         mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
    }

  }//if
@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
 *  @param   aChild -- tag enum of child container
 *  @return  PR_TRUE if parent can contain child
 */
-PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {

  PRBool result=PR_FALSE;

@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {

    //handle form elements (this is very much a WIP!!!)
  if(0!=strchr(formElementTags,aChild)){
-    return CanContainFormElement(aParent,aChild);
+    return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
  }

  
-  switch(aParent) {
+  switch((eHTMLTags)aParent) {
    case eHTMLTag_a:
    case eHTMLTag_acronym:
      result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
 * @param   aChild -- tag type of child
 * @return  TRUE if propagation closes; false otherwise
 */
-PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag)  {
  PRBool result=PR_FALSE;

  switch(aParentTag) {
@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
      //otherwise, intentionally fall through...

    case eHTMLTag_tr:
-      if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
+      if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
        aVector.Append((PRUnichar)eHTMLTag_td);
        result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
 //        result=PR_TRUE;
@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
  return;
 }

+void CNavDTD::SetURLRef(char * aURLRef){
+   if (mURLRef) {
+      PL_strfree(mURLRef);
+      mURLRef=0;
+   }
+   if (aURLRef)
+      mURLRef = PL_strdup(aURLRef);
+}

-/************************************************************************
-  Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/** 
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update  jevering 6/06/98
- * @param   path is the directory structure indicating the bad context vector
- * @param   pURLRef is the associated URL
- * @param   filename to record mapping to if not already recorded
- * @return  TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP	"/vector.map"
-#define CONTEXT_VECTOR_STAT	"/vector.stat"
-#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
+void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
 {
-   char recordPath[2048];
-   PRIntn oflags = 0;
-
-   // create the record file name from the verification director
-   // and the default name.
-   strcpy(recordPath,gVerificationOutputDir);
-   strcat(recordPath,CONTEXT_VECTOR_MAP);
-
-   // create the file exists, only open for read/write
-   // otherwise, create it
-   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
-      oflags = PR_CREATE_FILE;
-   oflags |= PR_RDWR;
-
-   // open the record file
-   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
-   if (recordFile) {
-
-      char * string = (char *)PR_Malloc(2048);
-      PRBool found = PR_FALSE;
-
-	  // vectors are stored on the format iof "URL vector filename"
-	  // where the vector contains the verification path and
-	  // the filename contains the debug source dump
-      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
-	  // get the file size, read in the file and parse it line at
-	  // a time to check to see if we have already recorded this
-	  // occurance
-
-      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
-      if (iSize) {
-
-         char * buffer = (char*)PR_Malloc(iSize);
-         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
-         if (buffer!=NULL && string!=NULL) {
-            PRInt32 ibufferpos, istringpos;
-
-			// beginning of file for read
-            PR_Seek(recordFile,0,PR_SEEK_SET);
-            PR_Read(recordFile,buffer,iSize);
-
-			// run through the file looking for a matching vector
-            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
-            {
-			   // compare string once we have hit the end of the line
-               if (buffer[ibufferpos] == '\r') {
-                  stringbuf[istringpos] = '\0';
-                  istringpos = 0;
-                  // skip newline and space
-                  ibufferpos++;
-
-                  if (PL_strlen(stringbuf)) {
-					char * space;
-   					// chop of the filename for compare
-                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
-						*space = '\0';
-
-					// we have already recorded this one, free up, and return
-                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
-						PR_Free(buffer);
-                        PR_Free(stringbuf);
-						PR_Free(string);
-                        return PR_TRUE;
+   if (aParserDebug) {
+      mParserDebug = aParserDebug;
+      NS_ADDREF(mParserDebug);
   }
 }
-               }
-
-               // build up the compare string
-               else
-                  stringbuf[istringpos++] = buffer[ibufferpos];
-            }
-
-            // throw away the record file data
-            PR_Free(buffer);
-            PR_Free(stringbuf);
-         }
-      }
-
-      // if this bad vector was not recorded, add it to record file
-
-      if (!found) {
-         PR_Seek(recordFile,0,PR_SEEK_END);
-         PR_Write(recordFile,string,PL_strlen(string));
-      }
-
-      PR_Close(recordFile);
-	  PR_Free(string);
-   }
-
-   // vector was not recorded
-   return PR_FALSE;
-}
-
-// structure to store the vector statistic information
-
-typedef struct vector_info {
-	PRInt32 references;     // number of occurances counted
-	PRInt32 count;          // number of tags in the vector
-  PRBool  good_vector;    // is this a valid vector?
-	eHTMLTags* vector;       // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE	128
-
-// compare function for quick sort.  Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
-{
-	VectorInfo ** p1 = (VectorInfo**)arg1;
-	VectorInfo ** p2 = (VectorInfo**)arg2;
-	return (*p2)->references - (*p1)->references;
-}
-
-
-/**
- *  This debug routines stores statistical information about a
- *  context vector.  The context vector statistics are stored in
- *  a global array.  The table is resorted each time it grows to
- *  aid in lookup speed.  If a vector has already been noted, its
- *  reference count is bumped, otherwise it is added to the table
- *
- *  @update     jevering 6/11/98
- *  @param      aTags is the tag list (vector)
- *  @param      count is the size of the vector
- *  @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
-    // if the table doesn't exist, create it
-	if (!gVectorInfoArray) {
-		gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
-	} 
-	else {
-        // attempt to look up the vector
-		for (PRInt32 i = 0; i < gVectorCount; i++)
-
-            // check the vector only if they are the same size, if they
-            // match then just return without doing further work
-			if (gVectorInfoArray[i]->count == count)
-				if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
-                    // bzzzt. and we have a winner.. bump the ref count
-					gVectorInfoArray[i]->references++;
-					return;
-				}
-	}
-
-    // the context vector hasn't been noted, so allocate it and
-    // initialize it one.. add it to the table
-	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
-	pVectorInfo->references = 1;
-	pVectorInfo->count = count;
-	pVectorInfo->good_vector = good_vector;
-	pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
-	memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
-	gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
-    // have we maxed out the table?  grow it.. sort it.. love it. 
-	if ((gVectorCount % TABLE_SIZE) == 0) {
-		gVectorInfoArray = (VectorInfo**)realloc(
-			gVectorInfoArray,
-			(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
-	  if (gVectorCount) {
-		  qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
-	  }
-	}
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
-    sprintf (vector_string, "%6d ", pInfo->references);
-    for (PRInt32 j = 0; j < pInfo->count; j++) {
-	    PL_strcat(vector_string, "<");
-	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
-	    PL_strcat(vector_string, ">");
-    }
-    PL_strcat(vector_string,"\r\n");
-}
-
-/**
- *  This debug routine dumps out the vector statistics to a text
- *  file in the verification directory and defaults to the name
- *  "vector.stat".  It contains all parsed context vectors and there
- *  occurance count sorted in decending order.
- *  
- *  @update     jevering 6/11/98
- *  @param
- *  @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord(void)
-{
-    // do we have a table?
-	if (gVectorCount) {
-
-        // hopefully, they wont exceed 1K.
-      char vector_string[1024];
-      char path[1024];
-
-      path[0] = '\0';
-
-      // put in the verification directory.. else the root
-      if (gVerificationOutputDir)
-         strcpy(path,gVerificationOutputDir);
-
-      strcat(path,CONTEXT_VECTOR_STAT);
-
-      // open the stat file creaming any existing stat file
-      PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
-		if (statisticFile) {
-
-            PRInt32 i;
-            PRofstream ps;
-            ps.attach(statisticFile);
-        
-            // oh what the heck, sort it again
-	          if (gVectorCount) {
-		          qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
-	          }
-
-            // cute little header
-            sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
-            ps << vector_string;
-
-            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
-            ps << VECTOR_TABLE_HEADER;
-
-            // dump out the bad vectors encountered
-            for (i = 0; i < gVectorCount; i++) {
-               if (!gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-            }
-
-            ps << "\r\n\r\nValid context vector summary\r\n";
-            ps << VECTOR_TABLE_HEADER;
-            
-            // take a big vector table dump (good vectors)
-            for (i = 0; i < gVectorCount; i++) {
-               if (gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-                // free em up.  they mean nothing to me now (I'm such a user)
-
-            if (gVectorInfoArray[i]->vector)
-               PR_Free(gVectorInfoArray[i]->vector);
-            PR_Free(gVectorInfoArray[i]);
-         }
-      }
-
-        // ok, we are done with the table, free it up as well
-      PR_Free(gVectorInfoArray);
-      gVectorInfoArray = 0;
-      gVectorCount = 0;
-      PR_Close(statisticFile);
-   }
-}
-
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool CNavDTD::VerifyContextVector(void) const {
-
-  PRBool  result=PR_TRUE;
-
-  if(0!=gVerificationOutputDir) {
-  
-#ifdef XP_PC
-      char    path[_MAX_PATH+1];
-      strcpy(path,gVerificationOutputDir);
-#endif
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-
-#ifdef NS_WIN32
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        mkdir(path);
-#endif
-      }
-
-      //**************************************************
-      //Add code here to see if we understand this vector
-      //**************************************************
-
-	  if(PR_FALSE==result){
-#ifdef NS_WIN32
-      // save file to directory indicated by bad context vector
-      int iCount = 1;
-      char filename[_MAX_PATH];
-      do {
-         sprintf(filename,"%s/html%04d.dbg", path, iCount++);
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-      PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-      if (debugFile) {
-         PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
-         PR_Write(debugFile,"\n",PL_strlen("\n"));
-         PR_Close(debugFile);
-      }
-#endif
-      //add debugging code here to record the fact that we just encountered
-      //a context vector we don't know how to handle.
-    }
-  }
-
-  return result;
-}
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool CNavDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
-
-  PRBool  result=PR_TRUE;
-
-  //ok, now see if we understand this vector
-
-  if(0!=anOutputDir || aRecordStats) 
-      result=VerifyContextVector();
-
-  if (aRecordStats) {
-	  NoteVector(mContextStack,mContextStackPos,result);
-  }
-
-  if(0!=anOutputDir) {
-      char    path[2048];
-      strcpy(path,anOutputDir);
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        PR_MkDir(path,0);
-      }
-	  if(PR_FALSE==result){
-      static PRBool rnd_initialized = PR_FALSE;
-
-      if (!rnd_initialized) {
-         // seed randomn number generator to aid in temp file
-         // creation.
-         rnd_initialized = PR_TRUE;
-         srand((unsigned)time(NULL));
-      }
-
-      // generate a filename to dump the html source into
-      char filename[1024];
-      do {
-         // use system time to generate a temporary file name
-         time_t ltime;
-         time (&ltime);
-         // add in random number so that we can create uniques names
-         // faster than simply every second.
-         ltime += (time_t)rand();
-         sprintf(filename,"%s/%lX.html", path, ltime);
-         // try until we find one we can create
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-
-      // check to see if we already recorded an instance of this particular
-      // bad vector.  
-      if (!DebugRecord(path,gURLRef, filename))
-      {
-         // save file to directory indicated by bad context vector
-         PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-         // if we were able to open the debug file, then
-         // write the true URL at the top of the file.
-         if (debugFile) {
-            // dump the html source into the newly created file.
-            PRofstream ps;
-            ps.attach(debugFile);
-            mParser->DebugDumpSource(ps);
-            PR_Close(debugFile);
-         }
-      }
-    }
-  }
-
-  return result;
-}
--- a/parser/htmlparser/src/CNavDTD.h
+++ b/parser/htmlparser/src/CNavDTD.h
@ -42,6 +42,7 @@

 class nsHTMLParser;
 class nsIHTMLContentSink;
+class nsIParserDebug;

 class CNavDTD : public nsIDTD {
            
@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
     *  of one type can contain a tag of another type.
     *  
     *  @update  gess 3/25/98
-     *  @param   aParent -- tag enum of parent container
-     *  @param   aChild -- tag enum of child container
+     *  @param   aParent -- int tag of parent container
+     *  @param   aChild -- int tag of child container
     *  @return  PR_TRUE if parent can contain child
     */
-    virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+    virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);

    /**
     *  This method is called to determine whether or not a tag
@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
     */
    virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;

-
-    /**
-     * This method gets called at various times by the parser
-     * whenever we want to verify a valid context stack. This
-     * method also gives us a hook to add debugging metrics.
-     *
-     * @update  gess4/6/98
-     * @param   aStack[] array of ints (tokens)
-     * @param   aCount number of elements in given array
-     * @return  TRUE if stack is valid, else FALSE
-     */
-    virtual PRBool VerifyContextVector(void) const;
-
    /**
     * 
-     * @update	gess5/18/98
-     * @param 
+     * @update	jevering 6/18/98
+     * @param  aURLRef if the current URL reference (for debugger)
     * @return
     */
-    virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+    virtual void SetURLRef(char * aURLRef);
+
+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParserDebug   created debug parser object
+     * @return
+     */
+    virtual void SetParserDebug(nsIParserDebug * aParserDebug);

    /**
     * This method tries to design a context map (without actually
@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
     * @param   aChild -- tag type of child
     * @return  True if closure was achieved -- other false
     */
-    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);

    /**
     * This method tries to design a context map (without actually
@ -699,7 +695,8 @@ protected:
    PRBool              mHasOpenForm;
    PRBool              mHasOpenMap;
    nsDeque             mTokenDeque;
-
+    char*               mURLRef;
+    nsIParserDebug*     mParserDebug;
 };


--- a/parser/htmlparser/src/COtherDTD.cpp
+++ b/parser/htmlparser/src/COtherDTD.cpp
@ -31,6 +31,7 @@
 *         
 */

+#include "nsIParserDebug.h"
 #include "COtherDTD.h"
 #include "nsHTMLTokens.h"
 #include "nsCRT.h"
@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
 static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

-static char*        gVerificationOutputDir=0;
-static char*        gURLRef=0;
 static nsAutoString gEmpty;

 static char formElementTags[]= {  
@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
 *  @return  
 */
 COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
+  NS_INIT_REFCNT();
  mParser=0;
+  mURLRef=0;
+  mParserDebug=0;
  nsCRT::zero(mLeafBits,sizeof(mLeafBits));
  nsCRT::zero(mContextStack,sizeof(mContextStack));
  nsCRT::zero(mStyleStack,sizeof(mStyleStack));
  nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
  mContextStackPos=0;
  mStyleStackPos=0;
-  gURLRef = 0;
  mHasOpenForm=PR_FALSE;
  mHasOpenMap=PR_FALSE;
-  gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
  InitializeDefaultTokenHandlers();
 }

@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller)  {
 */
 COtherDTD::~COtherDTD(){
  DeleteTokenHandlers();
-  if (gURLRef)
-  {
-     PL_strfree(gURLRef);
-     gURLRef = 0;
-  }
+  if (mURLRef)
+     PL_strfree(mURLRef);
+  if (mParserDebug)
+     NS_RELEASE(mParserDebug);
 //  NS_RELEASE(mSink);
 }

@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){

    if(aHandler) {
      result=(*aHandler)(theToken,this);
-      Verify("xxx",PR_TRUE);
+      if (mParserDebug)
+         mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
    }

  }//if
@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
 *  @param   aChild -- tag enum of child container
 *  @return  PR_TRUE if parent can contain child
 */
-PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {

  PRBool result=PR_FALSE;

@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {

    //handle form elements (this is very much a WIP!!!)
  if(0!=strchr(formElementTags,aChild)){
-    return CanContainFormElement(aParent,aChild);
+    return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
  }

  
-  switch(aParent) {
+  switch((eHTMLTags)aParent) {
    case eHTMLTag_a:
    case eHTMLTag_acronym:
      result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
 * @param   aChild -- tag type of child
 * @return  TRUE if propagation closes; false otherwise
 */
-PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
  PRBool result=PR_FALSE;

  switch(aParentTag) {
@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
  return;
 }

+void COtherDTD::SetURLRef(char * aURLRef){
+   if (mURLRef) {
+      PL_strfree(mURLRef);
+      mURLRef=0;
+   }
+   if (aURLRef)
+      mURLRef = PL_strdup(aURLRef);
+}

-/************************************************************************
-  Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/** 
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update  jevering 6/06/98
- * @param   path is the directory structure indicating the bad context vector
- * @param   pURLRef is the associated URL
- * @param   filename to record mapping to if not already recorded
- * @return  TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP	"/vector.map"
-#define CONTEXT_VECTOR_STAT	"/vector.stat"
-#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
+void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
 {
-   char recordPath[2048];
-   PRIntn oflags = 0;
-
-   // create the record file name from the verification director
-   // and the default name.
-   strcpy(recordPath,gVerificationOutputDir);
-   strcat(recordPath,CONTEXT_VECTOR_MAP);
-
-   // create the file exists, only open for read/write
-   // otherwise, create it
-   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
-      oflags = PR_CREATE_FILE;
-   oflags |= PR_RDWR;
-
-   // open the record file
-   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
-   if (recordFile) {
-
-      char * string = (char *)PR_Malloc(2048);
-      PRBool found = PR_FALSE;
-
-	  // vectors are stored on the format iof "URL vector filename"
-	  // where the vector contains the verification path and
-	  // the filename contains the debug source dump
-      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
-	  // get the file size, read in the file and parse it line at
-	  // a time to check to see if we have already recorded this
-	  // occurance
-
-      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
-      if (iSize) {
-
-         char * buffer = (char*)PR_Malloc(iSize);
-         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
-         if (buffer!=NULL && string!=NULL) {
-            PRInt32 ibufferpos, istringpos;
-
-			// beginning of file for read
-            PR_Seek(recordFile,0,PR_SEEK_SET);
-            PR_Read(recordFile,buffer,iSize);
-
-			// run through the file looking for a matching vector
-            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
-            {
-			   // compare string once we have hit the end of the line
-               if (buffer[ibufferpos] == '\r') {
-                  stringbuf[istringpos] = '\0';
-                  istringpos = 0;
-                  // skip newline and space
-                  ibufferpos++;
-
-                  if (PL_strlen(stringbuf)) {
-					char * space;
-   					// chop of the filename for compare
-                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
-						*space = '\0';
-
-					// we have already recorded this one, free up, and return
-                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
-						PR_Free(buffer);
-                        PR_Free(stringbuf);
-						PR_Free(string);
-                        return PR_TRUE;
+   if (aParserDebug) {
+      mParserDebug = aParserDebug;
+      NS_ADDREF(mParserDebug);
   }
 }
-               }
-
-               // build up the compare string
-               else
-                  stringbuf[istringpos++] = buffer[ibufferpos];
-            }
-
-            // throw away the record file data
-            PR_Free(buffer);
-            PR_Free(stringbuf);
-         }
-      }
-
-      // if this bad vector was not recorded, add it to record file
-
-      if (!found) {
-         PR_Seek(recordFile,0,PR_SEEK_END);
-         PR_Write(recordFile,string,PL_strlen(string));
-      }
-
-      PR_Close(recordFile);
-	  PR_Free(string);
-   }
-
-   // vector was not recorded
-   return PR_FALSE;
-}
-
-// structure to store the vector statistic information
-
-typedef struct vector_info {
-	PRInt32 references;     // number of occurances counted
-	PRInt32 count;          // number of tags in the vector
-  PRBool  good_vector;    // is this a valid vector?
-	eHTMLTags* vector;       // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE	128
-
-// compare function for quick sort.  Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
-{
-	VectorInfo ** p1 = (VectorInfo**)arg1;
-	VectorInfo ** p2 = (VectorInfo**)arg2;
-	return (*p2)->references - (*p1)->references;
-}
-
-/**
- * quick sort the statistic array causing the most frequently
- * used vectors to be at the top (this makes it a little speedier
- * when looking them up)
- */
-static void SortVectorRecord(void) {
-    // of course, sort it only if there is something to sort
-	if (gVectorCount) {
-		qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
-	}
-}
-
-
-/**
- *  This debug routines stores statistical information about a
- *  context vector.  The context vector statistics are stored in
- *  a global array.  The table is resorted each time it grows to
- *  aid in lookup speed.  If a vector has already been noted, its
- *  reference count is bumped, otherwise it is added to the table
- *
- *  @update     jevering 6/11/98
- *  @param      aTags is the tag list (vector)
- *  @param      count is the size of the vector
- *  @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
-    // if the table doesn't exist, create it
-	if (!gVectorInfoArray) {
-		gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
-	} 
-	else {
-        // attempt to look up the vector
-		for (PRInt32 i = 0; i < gVectorCount; i++)
-
-            // check the vector only if they are the same size, if they
-            // match then just return without doing further work
-			if (gVectorInfoArray[i]->count == count)
-				if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
-                    // bzzzt. and we have a winner.. bump the ref count
-					gVectorInfoArray[i]->references++;
-					return;
-				}
-	}
-
-    // the context vector hasn't been noted, so allocate it and
-    // initialize it one.. add it to the table
-	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
-	pVectorInfo->references = 1;
-	pVectorInfo->count = count;
-	pVectorInfo->good_vector = good_vector;
-	pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
-	memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
-	gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
-    // have we maxed out the table?  grow it.. sort it.. love it. 
-	if ((gVectorCount % TABLE_SIZE) == 0) {
-		gVectorInfoArray = (VectorInfo**)realloc(
-			gVectorInfoArray,
-			(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
-		SortVectorRecord();
-	}
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
-    sprintf (vector_string, "%6d ", pInfo->references);
-    for (PRInt32 j = 0; j < pInfo->count; j++) {
-	    PL_strcat(vector_string, "<");
-	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
-	    PL_strcat(vector_string, ">");
-    }
-    PL_strcat(vector_string,"\r\n");
-}
-
-/**
- *  This debug routine dumps out the vector statistics to a text
- *  file in the verification directory and defaults to the name
- *  "vector.stat".  It contains all parsed context vectors and there
- *  occurance count sorted in decending order.
- *  
- *  @update     jevering 6/11/98
- *  @param
- *  @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord_other(void)
-{
-    // do we have a table?
-	if (gVectorCount) {
-
-        // hopefully, they wont exceed 1K.
-      char vector_string[1024];
-      char path[1024];
-
-      path[0] = '\0';
-
-      // put in the verification directory.. else the root
-      if (gVerificationOutputDir)
-         strcpy(path,gVerificationOutputDir);
-
-      strcat(path,CONTEXT_VECTOR_STAT);
-
-      // open the stat file creaming any existing stat file
-      PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
-		if (statisticFile) {
-
-            PRInt32 i;
-            PRofstream ps;
-            ps.attach(statisticFile);
-        
-            // oh what the heck, sort it again
-            SortVectorRecord();
-
-            // cute little header
-            sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
-            ps << vector_string;
-
-            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
-            ps << VECTOR_TABLE_HEADER;
-
-            // dump out the bad vectors encountered
-            for (i = 0; i < gVectorCount; i++) {
-               if (!gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-            }
-
-            ps << "\r\n\r\nValid context vector summary\r\n";
-            ps << VECTOR_TABLE_HEADER;
-            
-            // take a big vector table dump (good vectors)
-            for (i = 0; i < gVectorCount; i++) {
-               if (gVectorInfoArray[i]->good_vector) {
-                  MakeVectorString(vector_string, gVectorInfoArray[i]);
-                  ps << vector_string;
-               }
-                // free em up.  they mean nothing to me now (I'm such a user)
-
-            if (gVectorInfoArray[i]->vector)
-               PR_Free(gVectorInfoArray[i]->vector);
-            PR_Free(gVectorInfoArray[i]);
-         }
-      }
-
-        // ok, we are done with the table, free it up as well
-      PR_Free(gVectorInfoArray);
-      gVectorInfoArray = 0;
-      gVectorCount = 0;
-      PR_Close(statisticFile);
-   }
-}
-
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool COtherDTD::VerifyContextVector(void) const {
-
-  PRBool  result=PR_TRUE;
-
-  if(0!=gVerificationOutputDir) {
-  
-#ifdef XP_PC
-      char    path[_MAX_PATH+1];
-      strcpy(path,gVerificationOutputDir);
-#endif
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-
-#ifdef NS_WIN32
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        mkdir(path);
-#endif
-      }
-
-      //**************************************************
-      //Add code here to see if we understand this vector
-      //**************************************************
-
-	  if(PR_FALSE==result){
-#ifdef NS_WIN32
-      // save file to directory indicated by bad context vector
-      int iCount = 1;
-      char filename[_MAX_PATH];
-      do {
-         sprintf(filename,"%s/html%04d.dbg", path, iCount++);
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-      PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-      if (debugFile) {
-         PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
-         PR_Write(debugFile,"\n",PL_strlen("\n"));
-         PR_Close(debugFile);
-      }
-#endif
-      //add debugging code here to record the fact that we just encountered
-      //a context vector we don't know how to handle.
-    }
-  }
-
-  return result;
-}
-
-/**
- * This debug method allows us to determine whether or not 
- * we've seen (and can handle) the given context vector.
- *
- * @update  gess4/22/98
- * @param   tags is an array of eHTMLTags
- * @param   count represents the number of items in the tags array
- * @param   aDTD is the DTD we plan to ask for verification
- * @return  TRUE if we know how to handle it, else false
- */
-PRBool COtherDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
-
-  PRBool  result=PR_TRUE;
-
-  //ok, now see if we understand this vector
-
-  if(0!=anOutputDir || aRecordStats) 
-      result=VerifyContextVector();
-
-  if (aRecordStats) {
-	  NoteVector(mContextStack,mContextStackPos,result);
-  }
-
-  if(0!=anOutputDir) {
-      char    path[2048];
-      strcpy(path,anOutputDir);
-
-      int i=0;      
-      for(i=0;i<mContextStackPos;i++){
-        strcat(path,"/");
-        const char* name=GetTagName(mContextStack[i]);
-        strcat(path,name);
-        PR_MkDir(path,0);
-      }
-	  if(PR_FALSE==result){
-      static PRBool rnd_initialized = PR_FALSE;
-
-      if (!rnd_initialized) {
-         // seed randomn number generator to aid in temp file
-         // creation.
-         rnd_initialized = PR_TRUE;
-         srand((unsigned)time(NULL));
-      }
-
-      // generate a filename to dump the html source into
-      char filename[1024];
-      do {
-         // use system time to generate a temporary file name
-         time_t ltime;
-         time (&ltime);
-         // add in random number so that we can create uniques names
-         // faster than simply every second.
-         ltime += (time_t)rand();
-         sprintf(filename,"%s/%lX.html", path, ltime);
-         // try until we find one we can create
-      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-
-      // check to see if we already recorded an instance of this particular
-      // bad vector.  
-      if (!DebugRecord(path,gURLRef, filename))
-      {
-         // save file to directory indicated by bad context vector
-         PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-         // if we were able to open the debug file, then
-         // write the true URL at the top of the file.
-         if (debugFile) {
-            // dump the html source into the newly created file.
-            PRofstream ps;
-            ps.attach(debugFile);
-            mParser->DebugDumpSource(ps);
-            PR_Close(debugFile);
-         }
-      }
-    }
-  }
-
-  return result;
-}
-
--- a/parser/htmlparser/src/COtherDTD.h
+++ b/parser/htmlparser/src/COtherDTD.h
@ -34,7 +34,6 @@
 #include "nsDeque.h"


-
 #define NS_IOtherHTML_DTD_IID      \
  {0x8a5e89c0, 0xd16d,  0x11d1,  \
  {0x80, 0x22, 0x00,    0x60, 0x8, 0x14, 0x98, 0x89}}
@ -42,6 +41,7 @@

 class nsIParser;
 class nsIHTMLContentSink;
+class nsIParserDebug;

 class COtherDTD : public nsIDTD {
            
@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
     *  of one type can contain a tag of another type.
     *  
     *  @update  gess 3/25/98
-     *  @param   aParent -- tag enum of parent container
-     *  @param   aChild -- tag enum of child container
+     *  @param   aParent -- int tag of parent container
+     *  @param   aChild -- int tag of child container
     *  @return  PR_TRUE if parent can contain child
     */
-    virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+    virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);

    /**
     *  This method is called to determine whether or not a tag
@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
     */
    virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;

-
-    /**
-     * This method gets called at various times by the parser
-     * whenever we want to verify a valid context stack. This
-     * method also gives us a hook to add debugging metrics.
-     *
-     * @update  gess4/6/98
-     * @param   aStack[] array of ints (tokens)
-     * @param   aCount number of elements in given array
-     * @return  TRUE if stack is valid, else FALSE
-     */
-    virtual PRBool VerifyContextVector(void) const;
-
    /**
     * 
-     * @update	gess5/18/98
-     * @param 
+     * @update	jevering 6/18/98
+     * @param  aURLRef if the current URL reference (for debugger)
     * @return
     */
-    virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+    virtual void SetURLRef(char * aURLRef);
+
+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParserDebug   created debug parser object
+     * @return
+     */
+    virtual void SetParserDebug(nsIParserDebug * aParserDebug);

    /**
     * This method tries to design a context map (without actually
@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
     * @param   aChild -- tag type of child
     * @return  True if closure was achieved -- other false
     */
-    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+    virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);

    /**
     * This method tries to design a context map (without actually
@ -701,7 +696,8 @@ protected:
    PRBool              mHasOpenForm;
    PRBool              mHasOpenMap;
    nsDeque             mTokenDeque;
-
+    char*               mURLRef;
+    nsIParserDebug*     mParserDebug;
 };


--- a/parser/htmlparser/src/Makefile
+++ b/parser/htmlparser/src/Makefile
@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
 CPPSRCS =     \
  nsHTMLContentSink.cpp \
  nsParserNode.cpp  \
+  nsParserDebug.cpp \
  nsScanner.cpp   \
  nsToken.cpp   \
  nsTokenHandler.cpp \
@ -41,6 +42,8 @@ EXPORTS =     \
  nsHTMLTokens.h    \
  nsIParserNode.h   \
  nsIParser.h   \
+  nsIParserDebug.h \
+  nsIParserFilter.h \
  nsToken.h   \
  $(NULL)

--- a/parser/htmlparser/src/makefile.win
+++ b/parser/htmlparser/src/makefile.win
@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
    nsHTMLParser.cpp prstrm.cpp

 EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
-    nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h 
+    nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h  \
+    nsIParserDebug.h nsIParserFilter.h

 CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
    .\$(OBJDIR)\CNavDTD.obj \
@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
    .\$(OBJDIR)\nsHTMLParser.obj \
    .\$(OBJDIR)\nsHTMLTokens.obj          .\$(OBJDIR)\nsParserNode.obj \
    .\$(OBJDIR)\nsScanner.obj             .\$(OBJDIR)\nsToken.obj \
-    .\$(OBJDIR)\nsTokenHandler.obj \
+    .\$(OBJDIR)\nsTokenHandler.obj        .\$(OBJDIR)\nsParserDebug.obj \
    .\$(OBJDIR)\prstrm.obj

 LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
--- a/parser/htmlparser/src/nsHTMLParser.cpp
+++ b/parser/htmlparser/src/nsHTMLParser.cpp
@ -30,6 +30,7 @@
 #include "prstrm.h"
 #include <fstream.h>
 #include "nsIInputStream.h"
+#include "nsIParserFilter.h"

 /* UNCOMMENT THIS IF STUFF STOPS WORKING...
 #ifdef XP_PC
@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
 static const char* kNullFilename= "Error: Null filename given";
 static const char* kNullTokenizer = "Error: Unable to construct tokenizer";

-static char*      gVerificationOutputDir=0;
-static PRBool     gRecordingStatistics=PR_TRUE;
 static const int  gTransferBufferSize=4096;  //size of the buffer used in moving data from iistream
-static char*      gURLRef=0;

 //#define DEBUG_SAVE_SOURCE_DOC 1
 #ifdef DEBUG_SAVE_SOURCE_DOC
@ -58,17 +56,6 @@ fstream* gTempStream=0;
 #endif


-extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
-{
-	gVerificationOutputDir = verify_dir;
-}
-
-
-extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
-{
-	gRecordingStatistics = bval;
-}
-
 /**
 *  This method is defined in nsIParser. It is used to 
 *  cause the COM-like construction of an nsHTMLParser.
@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
 */
 nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
  NS_INIT_REFCNT();
+  mParserFilter = nsnull;
  mListener = nsnull;
  mTransferBuffer=0;
  mSink=0;
@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
 *  @return  
 */
 nsHTMLParser::~nsHTMLParser() {
-  if (gURLRef)
-  {
-    PL_strfree(gURLRef);
-    gURLRef = 0;
-  }
  NS_IF_RELEASE(mListener);
  if(mTransferBuffer)
    delete [] mTransferBuffer;
@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
    delete mCurrentPos;
  mCurrentPos=0;
  if(mDTD)
-    delete mDTD;    
+     NS_RELEASE(mDTD);
  mDTD=0;
  if(mScanner)
    delete mScanner;
@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
  return NS_OK;                                                        
 }

+nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
+{
+  nsIParserFilter* old=mParserFilter;
+  if(old)
+    NS_RELEASE(old);
+  if(aFilter) {
+    mParserFilter=aFilter;
+    NS_ADDREF(aFilter);
+  }
+  return old;
+}
+
 /**
 *  This method gets called in order to set the content
 *  sink for this parser to dump nodes to.
@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
  mDTD=aDTD;
 }

+nsIDTD * nsHTMLParser::GetDTD(void) {
+   return mDTD;
+}
+
 /**
 *  
 *  
@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
 *  @param   
 *  @return  
 */
-nsIDTD* GetDTD(eParseMode aMode) {
+nsIDTD* NewDTD(eParseMode aMode) {
  nsIDTD* aDTD=0;
  switch(aMode) {
    case eParseMode_navigator:
@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
    default:
      break;
  }
+  if (aDTD)
+     aDTD->AddRef();
  return aDTD;
 }

@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
  nsString  theBuffer;
  const int kLocalBufSize=10;

-  if (gURLRef)
-     PL_strfree(gURLRef);
-  if (aFilename)
-     gURLRef = PL_strdup(aFilename);
-
  mMajorIteration=-1;
  mMinorIteration=-1;

@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
 *  @param   aFilename -- const char* containing file to be parsed.
 *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
 */
-PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
+PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
  NS_PRECONDITION(0!=aFilename,kNullFilename);
  PRInt32 status=kBadFilename;
  mIncremental=aIncremental;

  if(aFilename) {

-    if (gURLRef)
-       PL_strfree(gURLRef);
-    gURLRef = PL_strdup(aFilename);
-
    mParseMode=DetermineParseMode();  
-    mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+    mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
    if(mDTD) {
      mDTD->SetParser(this);
      mDTD->SetContentSink(mSink);
+      mDTD->SetURLRef((char *)aFilename);
+      mDTD->SetParserDebug(aDebug);
    }

    WillBuildModel();
@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
 */
 PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
                            nsIStreamListener* aListener,
-                            PRBool aIncremental) {
+                            PRBool aIncremental,
+                            nsIParserDebug * aDebug) {
  NS_PRECONDITION(0!=aURL,kNullURL);

  PRInt32 status=kBadURL;
@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,

  if(aURL) {

-     if (gURLRef)
-     {
-        PL_strfree(gURLRef);
-        gURLRef = 0;
-     }
-     if (aURL->GetSpec())
-        gURLRef = PL_strdup(aURL->GetSpec());
-
    mParseMode=DetermineParseMode();  
-    mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+    mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
    if(mDTD) {
      mDTD->SetParser(this);
      mDTD->SetContentSink(mSink);
+      mDTD->SetURLRef((char *)aURL->GetSpec());
+      mDTD->SetParserDebug(aDebug);
    }

    WillBuildModel();
@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
          }
        #endif

+        if (mParserFilter)
+           mParserFilter->RawBuffer(mTransferBuffer, &len);
+
        mScanner->Append(&mTransferBuffer[offset],len);

      } //if
--- a/parser/htmlparser/src/nsHTMLParser.h
+++ b/parser/htmlparser/src/nsHTMLParser.h
@ -1,313 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/*
- * The contents of this file are subject to the Netscape Public License
- * Version 1.0 (the "NPL"); you may not use this file except in
- * compliance with the NPL.  You may obtain a copy of the NPL at
- * http://www.mozilla.org/NPL/
- *
- * Software distributed under the NPL is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
- * for the specific language governing rights and limitations under the
- * NPL.
- *
- * The Initial Developer of this code under the NPL is Netscape
- * Communications Corporation.  Portions created by Netscape are
- * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
- * Reserved.
- */
- 
-/**
- * MODULE NOTES:
- * @update  gess 4/1/98
- * 
- *  This class does two primary jobs:
- *    1) It iterates the tokens provided during the 
- *       tokenization process, identifing where elements
- *       begin and end (doing validation and normalization).
- *    2) It controls and coordinates with an instance of
- *       the IContentSink interface, to coordinate the
- *       the production of the content model.
- *
- *  The basic operation of this class assumes that an HTML
- *  document is non-normalized. Therefore, we don't process
- *  the document in a normalized way. Don't bother to look
- *  for methods like: doHead() or doBody().
- *
- *  Instead, in order to be backward compatible, we must
- *  scan the set of tokens and perform this basic set of
- *  operations:
- *    1)  Determine the token type (easy, since the tokens know)
- *    2)  Determine the appropriate section of the HTML document
- *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
- *    3)  Insert content into our document (via the sink) into
- *        the correct section.
- *    4)  In the case of tags that belong in the BODY, we must
- *        ensure that our underlying document state reflects
- *        the appropriate context for our tag. 
- *
- *        For example,if we see a <TR>, we must ensure our 
- *        document contains a table into which the row can
- *        be placed. This may result in "implicit containers" 
- *        created to ensure a well-formed document.
- *         
- */
-
-#ifndef NS_HTMLPARSER__
-#define NS_HTMLPARSER__
-
-#include "nsIParser.h"
-#include "nsDeque.h"
-#include "nsParserNode.h"
-#include "nsParserTypes.h"
-#include "nsIURL.h"
-#include "nsIStreamListener.h"
-
-
-#define NS_IHTML_PARSER_IID      \
-  {0x2ce606b0, 0xbee6,  0x11d1,  \
-  {0xaa, 0xd9, 0x00,    0x80, 0x5f, 0x8a, 0x3e, 0x14}}
-
-
-class IContentSink;
-class nsIHTMLContentSink;
-class nsIURL;
-class nsIDTD;
-class CScanner;
-
-
-class nsHTMLParser : public nsIParser, public nsIStreamListener {
-            
-  public:
-friend class CTokenHandler;
-
-    NS_DECL_ISUPPORTS
-
-
-    /**
-     * default constructor
-     * @update	gess5/11/98
-     */
-    nsHTMLParser();
-
-
-    /**
-     * Destructor
-     * @update	gess5/11/98
-     */
-    ~nsHTMLParser();
-
-    /**
-     * Select given content sink into parser for parser output
-     * @update	gess5/11/98
-     * @param   aSink is the new sink to be used by parser
-     * @return  old sink, or NULL
-     */
-    virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
-    
-    virtual void SetDTD(nsIDTD* aDTD);
-    
-    /**
-     *  
-     *  
-     *  @update  gess 6/9/98
-     *  @param   
-     *  @return  
-     */
-    virtual CScanner* GetScanner(void);
-
-    /**
-     * Cause parser to parse input from given URL in given mode
-     * @update	gess5/11/98
-     * @param   aURL is a descriptor for source document
-     * @param   aListener is a listener to forward notifications to
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRInt32 Parse(nsIURL* aURL,
-                          nsIStreamListener* aListener,
-                          PRBool aIncremental=PR_TRUE);
-
-    /**
-     * Cause parser to parse input from given file in given mode
-     * @update	gess5/11/98
-     * @param   aFilename is a path for file document
-     * @param   aMode is the desired parser mode (Nav, other, etc.)
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
-
-    /**
-     * @update	gess5/11/98
-     * @param   anHTMLString contains a string-full of real HTML
-     * @param   appendTokens tells us whether we should insert tokens inline, or append them.
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
-
-    /**
-     * This method gets called (automatically) during incremental parsing
-     * @update	gess5/11/98
-     * @return  TRUE if all went well, otherwise FALSE
-     */
-    virtual PRInt32 ResumeParse(void);
-
-    /**
-     * Causes the parser to scan foward, collecting nearby (sequential)
-     * attribute tokens into the given node.
-     * @update	gess5/11/98
-     * @param   node to store attributes
-     * @return  number of attributes added to node.
-     */
-    virtual PRInt32 CollectAttributes(nsCParserNode& aNode,PRInt32 aCount);
-
-    /**
-     * Causes the next skipped-content token (if any) to
-     * be consumed by this node.
-     * @update	gess5/11/98
-     * @param   node to consume skipped-content
-     * @return  number of skipped-content tokens consumed.
-     */
-    virtual PRInt32 CollectSkippedContent(nsCParserNode& aNode);
-
-    /**
-     *  This debug routine is used to cause the tokenizer to
-     *  iterate its token list, asking each token to dump its
-     *  contents to the given output stream.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  
-     */
-    void DebugDumpSource(ostream& out);
-
-
-     //*********************************************
-      // These methods are callback methods used by
-      // net lib to let us know about our inputstream.
-      //*********************************************
-    NS_IMETHOD GetBindInfo(void);
-    NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const nsString& aMmsg);
-    NS_IMETHOD OnStartBinding(const char *aContentType);
-    NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
-    NS_IMETHOD OnStopBinding(PRInt32 status, const nsString& aMsg);
-
-protected:
-
-    /**
-     * 
-     * @update	gess5/18/98
-     * @param 
-     * @return
-     */
-    PRInt32 WillBuildModel(void);
-
-    /**
-     * 
-     * @update	gess5/18/98
-     * @param 
-     * @return
-     */
-    PRInt32 DidBuildModel(PRInt32 anErrorCode);
-
-    /**
-     * This method gets called when the tokens have been consumed, and it's time
-     * to build the model via the content sink.
-     * @update	gess5/11/98
-     * @return  YES if model building went well -- NO otherwise.
-     */
-    virtual PRInt32 IterateTokens(void);
-  
-private:
-    PRInt32 ParseFileIncrementally(const char* aFilename);  //XXX ONLY FOR DEBUG PURPOSES...
-
-    /*******************************************
-      These are the tokenization methods...
-     *******************************************/
-
-    /**
-     *  Cause the tokenizer to consume the next token, and 
-     *  return an error result.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   anError -- ref to error code
-     *  @return  new token or null
-     */
-    virtual PRInt32 ConsumeToken(CToken*& aToken);
-
-    /**
-     *  Part of the code sandwich, this gets called right before
-     *  the tokenization process begins. The main reason for
-     *  this call is to allow the delegate to do initialization.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  TRUE if it's ok to proceed
-     */
-    PRBool WillTokenize(PRBool aIncremental);
-
-    /**
-     *  
-     *  @update  gess 3/25/98
-     *  @return  TRUE if it's ok to proceed
-     */
-    PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens);
-
-    /**
-     *  This is the primary control routine. It iteratively
-     *  consumes tokens until an error occurs or you run out
-     *  of data.
-     *  
-     *  @update  gess 3/25/98
-     *  @return  error code 
-     */
-    PRInt32 Tokenize(void);
-
-    /**
-     *  This is the tail-end of the code sandwich for the
-     *  tokenization process. It gets called once tokenziation
-     *  has completed.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  TRUE if all went well
-     */
-    PRBool DidTokenize(PRBool aIncremental);
-
-    /**
-     *  This debug routine is used to cause the tokenizer to
-     *  iterate its token list, asking each token to dump its
-     *  contents to the given output stream.
-     *  
-     *  @update  gess 3/25/98
-     *  @param   
-     *  @return  
-     */
-    void DebugDumpTokens(ostream& out);
-
-
-protected:
-    //*********************************************
-    // And now, some data members...
-    //*********************************************
-
-    nsIStreamListener*  mListener;
-    nsIContentSink*     mSink;
-
-    nsDequeIterator*    mCurrentPos;
-    nsDequeIterator*    mMarkPos;
-
-    nsIDTD*             mDTD;
-    eParseMode          mParseMode;
-    PRBool              mIncremental;
-    char*               mTransferBuffer;
-
-    PRInt32             mMajorIteration;
-    PRInt32             mMinorIteration;
-
-    nsDeque             mTokenDeque;
-    CScanner*           mScanner;
-
-};
-
-
-#endif 
-
--- a/parser/htmlparser/src/nsIDTD.h
+++ b/parser/htmlparser/src/nsIDTD.h
@ -37,6 +37,7 @@
 class nsIParser;
 class CToken;
 class nsIContentSink;
+class nsIParserDebug;

 class nsIDTD : public nsISupports {
            
@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {

    /**
     * 
-     * @update	gess5/18/98
-     * @param 
+     * @update	jevering 6/18/98
+     * @param  aURLRef if the current URL reference (for debugger)
     * @return
     */
-    virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
+    virtual void SetURLRef(char * aURLRef) = 0;

+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParent  parent tag
+     * @param  aChild   child tag
+     * @return PR_TRUE if valid container
+     */
+    virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
+
+    /**
+     * 
+     * @update	jevering 6/18/98
+     * @param  aParserDebug   created debug parser object
+     * @return
+     */
+    virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
 };


--- a/parser/htmlparser/src/nsIParser.h
+++ b/parser/htmlparser/src/nsIParser.h
@ -34,6 +34,7 @@ class nsString;
 class CToken;
 class nsIURL;
 class nsIDTD;
+class nsIParserDebug;

 /**
 *  This class defines the iparser interface. This XPCOM
@ -60,9 +61,10 @@ class nsIParser : public nsISupports {

    virtual PRInt32 Parse(nsIURL* aURL,
                          nsIStreamListener* aListener,
-                          PRBool aIncremental=PR_TRUE) = 0;
+                          PRBool aIncremental=PR_TRUE,
+                          nsIParserDebug * aDebug = 0) = 0;

-    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
+    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;

    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;

--- a/parser/htmlparser/src/nsIParserDebug.h
+++ b/parser/htmlparser/src/nsIParserDebug.h
@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL.  You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation.  Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update  gess 4/8/98
+ * 
+ *         
+ */
+
+#ifndef NS_IPARSERDEBUG__
+#define NS_IPARSERDEBUG__
+
+#include "nsISupports.h"
+#include "nsHTMLTokens.h"
+#include "prtypes.h"
+
+#define NS_IPARSERDEBUG_IID      \
+  {0x7b68c220, 0x0685,  0x11d2,  \
+  {0xa4, 0xb5, 0x00,    0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
+
+
+class nsIDTD;
+class nsHTMLParser;
+
+class nsIParserDebug : public nsISupports {
+            
+public:
+
+   virtual void SetVerificationDirectory(char * verify_dir) = 0;
+
+   virtual void SetRecordStatistics(PRBool bval) = 0;
+
+   virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
+
+   virtual void DumpVectorRecord(void) = 0;
+
+};
+
+extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
+
+#endif /* NS_IPARSERDEBUG__ */
--- a/parser/htmlparser/src/nsIParserFilter.h
+++ b/parser/htmlparser/src/nsIParserFilter.h
@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL.  You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation.  Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update  jevering 6/17/98
+ * 
+ */
+
+#ifndef  IPARSERFILTER
+#define  IPARSERFILTER
+
+#include "nsISupports.h"
+
+class CToken;
+
+#define NS_IPARSERFILTER_IID     \
+  {0x14d6ff0,  0x0610,  0x11d2,  \
+  {0x8c, 0x3f, 0x00,    0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
+
+
+class nsIParserFilter : public nsISupports {
+  public:
+      
+   NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
+
+   NS_IMETHOD WillAddToken(CToken & token) = 0;
+
+   NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
+};
+
+extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
+
+
+#endif
+
--- a/parser/htmlparser/src/nsParserDebug.cpp
+++ b/parser/htmlparser/src/nsParserDebug.cpp
@ -0,0 +1,534 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL.  You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation.  Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
+ * Reserved.
+ */
+  
+/**
+ * MODULE NOTES:
+ * @update  jevering 06/18/98
+ * 
+ * This file contains the parser debugger object which aids in
+ * walking links and reporting statistic information, reporting
+ * bad vectors.
+ */
+
+#include "CNavDTD.h"
+#include "nsHTMLTokens.h"
+#include "nsHTMLParser.h"
+#include "nsIParserDebug.h"
+#include "nsCRT.h"
+#include "prenv.h"  //this is here for debug reasons...
+#include "prtypes.h"  //this is here for debug reasons...
+#include "prio.h"
+#include "plstr.h"
+#include "prstrm.h"
+#include <fstream.h>
+#include <time.h>
+#include "prmem.h"
+
+#define CONTEXT_VECTOR_MAP	"/vector.map"
+#define CONTEXT_VECTOR_STAT	"/vector.stat"
+#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
+
+// structure to store the vector statistic information
+
+typedef struct vector_info {
+    PRInt32 references;     // number of occurances counted
+    PRInt32 count;          // number of tags in the vector
+    PRBool  good_vector;    // is this a valid vector?
+    eHTMLTags* vector;       // and the vector
+} VectorInfo;
+
+// the statistic vector table grows each time it exceeds this
+// stepping value
+#define TABLE_SIZE	128
+
+class CParserDebug : public nsIParserDebug {
+public:
+
+    CParserDebug(char * aVerifyDir = 0);
+    ~CParserDebug();
+
+    NS_DECL_ISUPPORTS
+
+    void SetVerificationDirectory(char * verify_dir);
+    void SetRecordStatistics(PRBool bval);
+    PRBool Verify(nsIDTD * aDTD,  nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
+    void DumpVectorRecord(void);
+
+    // global table for storing vector statistics and the size
+
+private:
+    VectorInfo ** mVectorInfoArray;
+    PRInt32 mVectorCount;
+    char * mVerificationDir;
+    PRBool mRecordingStatistics;
+
+    PRBool DebugRecord(char * path, char * pURLRef, char * filename);
+    void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
+    void MakeVectorString(char * vector_string, VectorInfo * pInfo);
+};
+
+static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);                 
+static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
+
+/**
+ *  This method is defined in nsIParser. It is used to 
+ *  cause the COM-like construction of an nsHTMLParser.
+ *  
+ *  @update  jevering 3/25/98
+ *  @param   nsIParser** ptr to newly instantiated parser
+ *  @return  NS_xxx error result
+ */
+
+NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
+{
+  CParserDebug *it = new CParserDebug();
+
+  if (it == 0) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
+}
+
+CParserDebug::CParserDebug(char * aVerifyDir)
+{
+   NS_INIT_REFCNT();
+   mVectorInfoArray = 0;
+   mVectorCount = 0;
+   if (aVerifyDir)
+     mVerificationDir = PL_strdup(aVerifyDir);
+   else {
+     char * pString = PR_GetEnv("VERIFY_PARSER");
+     if (pString)
+        mVerificationDir = PL_strdup(pString);
+     else
+        mVerificationDir = 0;
+   }
+   mRecordingStatistics = PR_TRUE;
+}
+
+CParserDebug::~CParserDebug()
+{
+   if (mVerificationDir)
+      PL_strfree(mVerificationDir);
+}
+
+/**
+ *  This method gets called as part of our COM-like interfaces.
+ *  Its purpose is to create an interface to parser object
+ *  of some type.
+ *  
+ *  @update   gess 4/8/98
+ *  @param    nsIID  id of object to discover
+ *  @param    aInstancePtr ptr to newly discovered interface
+ *  @return   NS_xxx result code
+ */
+nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)  
+{                                                                        
+  if (NULL == aInstancePtr) {                                            
+    return NS_ERROR_NULL_POINTER;                                        
+  }                                                                      
+
+  if(aIID.Equals(kISupportsIID))    {  //do IUnknown...
+    *aInstancePtr = (nsIParserDebug*)(this);                                        
+  }
+  else if(aIID.Equals(kIDebugParserIID)) {  //do IParserDebug base class...
+    *aInstancePtr = (nsIParserDebug*)(this);                                        
+  }
+  else {
+    *aInstancePtr=0;
+    return NS_NOINTERFACE;
+  }
+  ((nsISupports*) *aInstancePtr)->AddRef();
+  return NS_OK;                                                        
+}
+
+NS_IMPL_ADDREF(CParserDebug)
+NS_IMPL_RELEASE(CParserDebug)
+
+void CParserDebug::SetVerificationDirectory(char * verify_dir)
+{
+   if (mVerificationDir) {
+      PL_strfree(mVerificationDir);
+      mVerificationDir = 0;
+   }
+	mVerificationDir = PL_strdup(verify_dir);
+}
+
+void CParserDebug::SetRecordStatistics(PRBool bval)
+{
+	mRecordingStatistics = bval;
+}
+
+/** 
+ * This debug method records an invalid context vector and it's
+ * associated context vector and URL in a simple flat file mapping which
+ * resides in the verification directory and is named context.map
+ *
+ * @update  jevering 6/06/98
+ * @param   path is the directory structure indicating the bad context vector
+ * @param   pURLRef is the associated URL
+ * @param   filename to record mapping to if not already recorded
+ * @return  TRUE if it is already record (dont rerecord)
+ */
+
+PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
+{
+   char recordPath[2048];
+   PRIntn oflags = 0;
+
+   // create the record file name from the verification director
+   // and the default name.
+   strcpy(recordPath,mVerificationDir);
+   strcat(recordPath,CONTEXT_VECTOR_MAP);
+
+   // create the file exists, only open for read/write
+   // otherwise, create it
+   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
+      oflags = PR_CREATE_FILE;
+   oflags |= PR_RDWR;
+
+   // open the record file
+   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
+
+   if (recordFile) {
+
+      char * string = (char *)PR_Malloc(2048);
+      PRBool found = PR_FALSE;
+
+	  // vectors are stored on the format iof "URL vector filename"
+	  // where the vector contains the verification path and
+	  // the filename contains the debug source dump
+      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
+
+	  // get the file size, read in the file and parse it line at
+	  // a time to check to see if we have already recorded this
+	  // occurance
+
+      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
+      if (iSize) {
+
+         char * buffer = (char*)PR_Malloc(iSize);
+         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
+         if (buffer!=NULL && string!=NULL) {
+            PRInt32 ibufferpos, istringpos;
+
+			// beginning of file for read
+            PR_Seek(recordFile,0,PR_SEEK_SET);
+            PR_Read(recordFile,buffer,iSize);
+
+			// run through the file looking for a matching vector
+            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
+            {
+			   // compare string once we have hit the end of the line
+               if (buffer[ibufferpos] == '\r') {
+                  stringbuf[istringpos] = '\0';
+                  istringpos = 0;
+                  // skip newline and space
+                  ibufferpos++;
+
+                  if (PL_strlen(stringbuf)) {
+					char * space;
+   					// chop of the filename for compare
+                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
+						*space = '\0';
+
+					// we have already recorded this one, free up, and return
+                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
+						PR_Free(buffer);
+                  PR_Free(stringbuf);
+						PR_Free(string);
+                        return PR_TRUE;
+                    }
+                  }
+               }
+
+               // build up the compare string
+               else
+                  stringbuf[istringpos++] = buffer[ibufferpos];
+            }
+
+            // throw away the record file data
+            PR_Free(buffer);
+            PR_Free(stringbuf);
+         }
+      }
+
+      // if this bad vector was not recorded, add it to record file
+
+      if (!found) {
+         PR_Seek(recordFile,0,PR_SEEK_END);
+         PR_Write(recordFile,string,PL_strlen(string));
+      }
+
+      PR_Close(recordFile);
+	  PR_Free(string);
+   }
+
+   // vector was not recorded
+   return PR_FALSE;
+}
+
+/**
+ * compare function for quick sort.  Compares references and
+ * sorts in decending order
+ */
+
+static int compare( const void *arg1, const void *arg2 )
+{
+	VectorInfo ** p1 = (VectorInfo**)arg1;
+	VectorInfo ** p2 = (VectorInfo**)arg2;
+	return (*p2)->references - (*p1)->references;
+}
+
+/**
+ *  This debug routines stores statistical information about a
+ *  context vector.  The context vector statistics are stored in
+ *  a global array.  The table is resorted each time it grows to
+ *  aid in lookup speed.  If a vector has already been noted, its
+ *  reference count is bumped, otherwise it is added to the table
+ *
+ *  @update     jevering 6/11/98
+ *  @param      aTags is the tag list (vector)
+ *  @param      count is the size of the vector
+ *  @return
+ */
+
+void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
+{
+    // if the table doesn't exist, create it
+	if (!mVectorInfoArray) {
+		mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
+	} 
+	else {
+        // attempt to look up the vector
+		for (PRInt32 i = 0; i < mVectorCount; i++)
+
+            // check the vector only if they are the same size, if they
+            // match then just return without doing further work
+			if (mVectorInfoArray[i]->count == count)
+				if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
+
+                    // bzzzt. and we have a winner.. bump the ref count
+					mVectorInfoArray[i]->references++;
+					return;
+				}
+	}
+
+    // the context vector hasn't been noted, so allocate it and
+    // initialize it one.. add it to the table
+	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
+	pVectorInfo->references = 1;
+	pVectorInfo->count = count;
+	pVectorInfo->good_vector = good_vector;
+	pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
+	memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
+	mVectorInfoArray[mVectorCount++] = pVectorInfo;
+
+    // have we maxed out the table?  grow it.. sort it.. love it. 
+	if ((mVectorCount % TABLE_SIZE) == 0) {
+		mVectorInfoArray = (VectorInfo**)realloc(
+			mVectorInfoArray,
+			(sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
+	  if (mVectorCount) {
+		  qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+	  }
+	}
+}
+
+void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
+{
+    sprintf (vector_string, "%6d ", pInfo->references);
+    for (PRInt32 j = 0; j < pInfo->count; j++) {
+	    PL_strcat(vector_string, "<");
+	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
+	    PL_strcat(vector_string, ">");
+    }
+    PL_strcat(vector_string,"\r\n");
+}
+
+/**
+ *  This debug routine dumps out the vector statistics to a text
+ *  file in the verification directory and defaults to the name
+ *  "vector.stat".  It contains all parsed context vectors and there
+ *  occurance count sorted in decending order.
+ *  
+ *  @update     jevering 6/11/98
+ *  @param
+ *  @return
+ */
+
+void CParserDebug::DumpVectorRecord(void)
+{
+    // do we have a table?
+	if (mVectorCount) {
+
+        // hopefully, they wont exceed 1K.
+      char vector_string[1024];
+      char path[1024];
+
+      path[0] = '\0';
+
+      // put in the verification directory.. else the root
+      if (mVerificationDir)
+         strcpy(path,mVerificationDir);
+
+      strcat(path,CONTEXT_VECTOR_STAT);
+
+      // open the stat file creaming any existing stat file
+      PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
+		if (statisticFile) {
+
+            PRInt32 i;
+            PRofstream ps;
+            ps.attach(statisticFile);
+        
+            // oh what the heck, sort it again
+	          if (mVectorCount) {
+		          qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+	          }
+
+            // cute little header
+            sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
+            ps << vector_string;
+
+            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
+            ps << VECTOR_TABLE_HEADER;
+
+            // dump out the bad vectors encountered
+            for (i = 0; i < mVectorCount; i++) {
+               if (!mVectorInfoArray[i]->good_vector) {
+                  MakeVectorString(vector_string, mVectorInfoArray[i]);
+                  ps << vector_string;
+               }
+            }
+
+            ps << "\r\n\r\nValid context vector summary\r\n";
+            ps << VECTOR_TABLE_HEADER;
+            
+            // take a big vector table dump (good vectors)
+            for (i = 0; i < mVectorCount; i++) {
+               if (mVectorInfoArray[i]->good_vector) {
+                  MakeVectorString(vector_string, mVectorInfoArray[i]);
+                  ps << vector_string;
+               }
+                // free em up.  they mean nothing to me now (I'm such a user)
+
+            if (mVectorInfoArray[i]->vector)
+               PR_Free(mVectorInfoArray[i]->vector);
+            PR_Free(mVectorInfoArray[i]);
+         }
+      }
+
+        // ok, we are done with the table, free it up as well
+      PR_Free(mVectorInfoArray);
+      mVectorInfoArray = 0;
+      mVectorCount = 0;
+      PR_Close(statisticFile);
+   }
+}
+
+
+/**
+ * This debug method allows us to determine whether or not 
+ * we've seen (and can handle) the given context vector.
+ *
+ * @update  gess4/22/98
+ * @param   tags is an array of eHTMLTags
+ * @param   count represents the number of items in the tags array
+ * @param   aDTD is the DTD we plan to ask for verification
+ * @return  TRUE if we know how to handle it, else false
+ */
+
+PRBool CParserDebug::Verify(nsIDTD * aDTD,  nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef) 
+{
+   PRBool  result=PR_TRUE;
+
+    //ok, now see if we understand this vector
+
+   if(0!=mVerificationDir || mRecordingStatistics) {
+
+      if(aDTD && aContextStackPos>1) {
+         for (int i = 0; i < aContextStackPos-1; i++)
+            if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
+               result = PR_FALSE;
+               break;
+            }
+         }
+   }
+
+   if (mRecordingStatistics) {
+	   NoteVector(aContextStack,aContextStackPos,result);
+   }
+
+   if(0!=mVerificationDir) {
+      char    path[2048];
+      strcpy(path,mVerificationDir);
+
+      int i=0;      
+      for(i=0;i<aContextStackPos;i++){
+         strcat(path,"/");
+         const char* name=GetTagName(aContextStack[i]);
+         strcat(path,name);
+         PR_MkDir(path,0);
+      }
+      if(PR_FALSE==result){
+         static PRBool rnd_initialized = PR_FALSE;
+
+         if (!rnd_initialized) {
+            // seed randomn number generator to aid in temp file
+            // creation.
+            rnd_initialized = PR_TRUE;
+            srand((unsigned)time(NULL));
+         }
+
+         // generate a filename to dump the html source into
+         char filename[1024];
+         do {
+            // use system time to generate a temporary file name
+            time_t ltime;
+            time (&ltime);
+            // add in random number so that we can create uniques names
+            // faster than simply every second.
+            ltime += (time_t)rand();
+            sprintf(filename,"%s/%lX.html", path, ltime);
+            // try until we find one we can create
+         } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
+
+         // check to see if we already recorded an instance of this particular
+         // bad vector.  
+         if (!DebugRecord(path, aURLRef, filename))
+         {
+            // save file to directory indicated by bad context vector
+            PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
+            // if we were able to open the debug file, then
+            // write the true URL at the top of the file.
+            if (debugFile) {
+               // dump the html source into the newly created file.
+               PRofstream ps;
+               ps.attach(debugFile);
+               if (aParser)
+                  aParser->DebugDumpSource(ps);
+               PR_Close(debugFile);
+            }
+         }
+      }
+   }
+
+   return result;
+}
--- a/parser/htmlparser/src/nsTokenizer.cpp
+++ b/parser/htmlparser/src/nsTokenizer.cpp
--- a/parser/htmlparser/src/nsTokenizer.h
+++ b/parser/htmlparser/src/nsTokenizer.h