More debug robot work. I added some statistic gathering information

as well as hooking up the context vector validation stuff. The debug robot now makes two files in the root verification directory for mapping context vector to debug source dumps as well as occurance tracking of good and bad context vectors.
1998-06-12 01:34:49 +00:00 · 1998-06-12 01:34:49 +00:00 · 3b414b2bc9
--- a/htmlparser/src/CNavDTD.cpp
+++ b/htmlparser/src/CNavDTD.cpp
@ -842,8 +842,12 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{
 PRBool CNavDTD::VerifyContextVector(PRInt32* aVector,PRInt32 aCount) const {
  PRBool result=PR_TRUE;

-  if(aCount>0) {
-
+  if(aCount>1) {
+     for (int i = 0; i < aCount-1; i++)
+        if (!CanContain(aVector[i],aVector[i+1])) {
+           result = PR_FALSE;
+           break;
+        }
  }
  return result;
 }
--- a/htmlparser/src/CNavDelegate.cpp
+++ b/htmlparser/src/CNavDelegate.cpp
@ -31,6 +31,13 @@

 static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";

+static void TokenFreeProc(void * pToken)
+{
+   if (pToken!=NULL) {
+      CToken * pCToken = (CToken*)pToken;
+      delete pCToken;
+   }
+}

 /**
 *  Default constructor
@ -40,7 +47,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY
 *  @return  
 */
 CNavDelegate::CNavDelegate() :
-  ITokenizerDelegate(), mTokenDeque() {
+  ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
 }

 /**
--- a/htmlparser/src/COtherDelegate.cpp
+++ b/htmlparser/src/COtherDelegate.cpp
@ -32,6 +32,14 @@
 static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";


+static void TokenFreeProc(void * pToken)
+{
+   if (pToken!=NULL) {
+      CToken * pCToken = (CToken*)pToken;
+      delete pCToken;
+   }
+}
+
 /**
 *  Default constructor
 *  
@ -40,7 +48,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY
 *  @return  
 */
 COtherDelegate::COtherDelegate() :
-  ITokenizerDelegate(), mTokenDeque() {
+  ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
 }

 /**
--- a/htmlparser/src/makefile.win
+++ b/htmlparser/src/makefile.win
@ -28,7 +28,7 @@ CPPSRCS=nsHTMLContentSink.cpp \
    nsToken.cpp nsTokenizer.cpp nsTokenHandler.cpp  \
    CNavDTD.cpp CNavDelegate.cpp \
    COtherDTD.cpp COtherDelegate.cpp \
-    nsHTMLParser.cpp
+    nsHTMLParser.cpp prstrm.cpp

 EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
    nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h 
@ -39,7 +39,8 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
    .\$(OBJDIR)\nsHTMLParser.obj \
    .\$(OBJDIR)\nsHTMLTokens.obj          .\$(OBJDIR)\nsParserNode.obj \
    .\$(OBJDIR)\nsScanner.obj             .\$(OBJDIR)\nsToken.obj \
-    .\$(OBJDIR)\nsTokenizer.obj           .\$(OBJDIR)\nsTokenHandler.obj 
+    .\$(OBJDIR)\nsTokenizer.obj           .\$(OBJDIR)\nsTokenHandler.obj \
+    .\$(OBJDIR)\prstrm.obj

 LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib

--- a/htmlparser/src/nsHTMLParser.cpp
+++ b/htmlparser/src/nsHTMLParser.cpp
@ -33,10 +33,13 @@
 #include "prio.h"
 #include "plstr.h"
 #include <fstream.h>
+#include "prstrm.h"
 #include "nsIInputStream.h"
 #ifdef XP_PC
 #include <direct.h> //this is here for debug reasons...
 #endif
+#include <time.h>
+#include "prmem.h"

 static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);                 
 static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID); 
@ -49,10 +52,20 @@ static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

 static char*  gVerificationOutputDir=0;
+static PRBool gRecordingStatistics=PR_TRUE;
 static char*  gURLRef=0;
 static int    rickGDebug=0;
 static const int gTransferBufferSize=4096;  //size of the buffer used in moving data from iistream

+extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
+{
+	gVerificationOutputDir = verify_dir;
+}
+
+extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
+{
+	gRecordingStatistics = bval;
+}

 /**
 *  This method is defined in nsIParser. It is used to 
@ -73,7 +86,6 @@ NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult)
  return it->QueryInterface(kIParserIID, (void **) aInstancePtrResult);
 }

-
 /**
 *  This big dispatch method is used to route token handler calls to the right place.
 *  What's wrong with it? This table, and the dispatch methods themselves need to be 
@ -367,6 +379,298 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) {
  return old;
 }

+/** 
+ * This debug method records an invalid context vector and it's
+ * associated context vector and URL in a simple flat file mapping which
+ * resides in the verification directory and is named context.map
+ *
+ * @update  jevering 6/06/98
+ * @param   path is the directory structure indicating the bad context vector
+ * @param   pURLRef is the associated URL
+ * @param   filename to record mapping to if not already recorded
+ * @return  TRUE if it is already record (dont rerecord)
+ */
+
+#define CONTEXT_VECTOR_MAP	"/vector.map"
+#define CONTEXT_VECTOR_STAT	"/vector.stat"
+#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
+PRBool DebugRecord(char * path, char * pURLRef, char * filename)
+{
+   char recordPath[2048];
+   PRIntn oflags = 0;
+
+   // create the record file name from the verification director
+   // and the default name.
+   strcpy(recordPath,gVerificationOutputDir);
+   strcat(recordPath,CONTEXT_VECTOR_MAP);
+
+   // create the file exists, only open for read/write
+   // otherwise, create it
+   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
+      oflags = PR_CREATE_FILE;
+   oflags |= PR_RDWR;
+
+   // open the record file
+   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
+
+   if (recordFile) {
+
+      char * string = (char *)PR_Malloc(2048);
+      PRBool found = PR_FALSE;
+
+	  // vectors are stored on the format iof "URL vector filename"
+	  // where the vector contains the verification path and
+	  // the filename contains the debug source dump
+      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
+
+	  // get the file size, read in the file and parse it line at
+	  // a time to check to see if we have already recorded this
+	  // occurance
+
+      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
+      if (iSize) {
+
+         char * buffer = (char*)PR_Malloc(iSize);
+         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
+         if (buffer!=NULL && string!=NULL) {
+            PRInt32 ibufferpos, istringpos;
+
+			// beginning of file for read
+            PR_Seek(recordFile,0,PR_SEEK_SET);
+            PR_Read(recordFile,buffer,iSize);
+
+			// run through the file looking for a matching vector
+            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
+            {
+			   // compare string once we have hit the end of the line
+               if (buffer[ibufferpos] == '\r') {
+                  stringbuf[istringpos] = '\0';
+                  istringpos = 0;
+                  // skip newline and space
+                  ibufferpos++;
+
+                  if (PL_strlen(stringbuf)) {
+					char * space;
+   					// chop of the filename for compare
+                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
+						*space = '\0';
+
+					// we have already recorded this one, free up, and return
+                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
+						PR_Free(buffer);
+                        PR_Free(stringbuf);
+						PR_Free(string);
+                        return PR_TRUE;
+                    }
+                  }
+               }
+
+               // build up the compare string
+               else
+                  stringbuf[istringpos++] = buffer[ibufferpos];
+            }
+
+            // throw away the record file data
+            PR_Free(buffer);
+            PR_Free(stringbuf);
+         }
+      }
+
+      // if this bad vector was not recorded, add it to record file
+
+      if (!found) {
+         PR_Seek(recordFile,0,PR_SEEK_END);
+         PR_Write(recordFile,string,PL_strlen(string));
+      }
+
+      PR_Close(recordFile);
+	  PR_Free(string);
+   }
+
+   // vector was not recorded
+   return PR_FALSE;
+}
+
+// structure to store the vector statistic information
+
+typedef struct vector_info {
+	PRInt32 references;     // number of occurances counted
+	PRInt32 count;          // number of tags in the vector
+    PRBool  good_vector;    // is this a valid vector?
+	PRInt32 * vector;       // and the vector
+} VectorInfo;
+
+// global table for storing vector statistics and the size
+static VectorInfo ** gVectorInfoArray = 0;
+static PRInt32 gVectorCount = 0;
+
+// the statistic vector table grows each time it exceeds this
+// stepping value
+#define TABLE_SIZE	128
+
+// compare function for quick sort.  Compares references and
+// sorts in decending order
+
+static int compare( const void *arg1, const void *arg2 )
+{
+	VectorInfo ** p1 = (VectorInfo**)arg1;
+	VectorInfo ** p2 = (VectorInfo**)arg2;
+	return (*p2)->references - (*p1)->references;
+}
+
+/**
+ * quick sort the statistic array causing the most frequently
+ * used vectors to be at the top (this makes it a little speedier
+ * when looking them up)
+ */
+
+void SortVectorRecord(void)
+{
+    // of course, sort it only if there is something to sort
+	if (gVectorCount) {
+		qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
+	}
+}
+
+/**
+ *  This debug routines stores statistical information about a
+ *  context vector.  The context vector statistics are stored in
+ *  a global array.  The table is resorted each time it grows to
+ *  aid in lookup speed.  If a vector has already been noted, its
+ *  reference count is bumped, otherwise it is added to the table
+ *
+ *  @update     jevering 6/11/98
+ *  @param      aTags is the tag list (vector)
+ *  @param      count is the size of the vector
+ *  @return
+ */
+
+void NoteVector(PRInt32 aTags[],PRInt32 count, PRBool good_vector)
+{
+    // if the table doesn't exist, create it
+	if (!gVectorInfoArray) {
+		gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
+	} 
+	else {
+        // attempt to look up the vector
+		for (PRInt32 i = 0; i < gVectorCount; i++)
+
+            // check the vector only if they are the same size, if they
+            // match then just return without doing further work
+			if (gVectorInfoArray[i]->count == count)
+				if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(PRInt32)*count)) {
+
+                    // bzzzt. and we have a winner.. bump the ref count
+					gVectorInfoArray[i]->references++;
+					return;
+				}
+	}
+
+    // the context vector hasn't been noted, so allocate it and
+    // initialize it one.. add it to the table
+	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
+	pVectorInfo->references = 1;
+	pVectorInfo->count = count;
+	pVectorInfo->good_vector = good_vector;
+	pVectorInfo->vector = (PRInt32*)PR_Malloc(count*sizeof(PRInt32));
+	memcpy(pVectorInfo->vector,aTags,sizeof(PRInt32)*count);
+	gVectorInfoArray[gVectorCount++] = pVectorInfo;
+
+    // have we maxed out the table?  grow it.. sort it.. love it. 
+	if ((gVectorCount % TABLE_SIZE) == 0) {
+		gVectorInfoArray = (VectorInfo**)realloc(
+			gVectorInfoArray,
+			(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
+		SortVectorRecord();
+	}
+}
+
+void MakeVectorString(char * vector_string, VectorInfo * pInfo)
+{
+    sprintf (vector_string, "%6d ", pInfo->references);
+    for (PRInt32 j = 0; j < pInfo->count; j++) {
+	    PL_strcat(vector_string, "<");
+	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
+	    PL_strcat(vector_string, ">");
+    }
+    PL_strcat(vector_string,"\r\n");
+}
+
+/**
+ *  This debug routine dumps out the vector statistics to a text
+ *  file in the verification directory and defaults to the name
+ *  "vector.stat".  It contains all parsed context vectors and there
+ *  occurance count sorted in decending order.
+ *  
+ *  @update     jevering 6/11/98
+ *  @param
+ *  @return
+ */
+
+extern "C" NS_EXPORT void DumpVectorRecord(void)
+{
+    // do we have a table?
+	if (gVectorCount) {
+
+        // hopefully, they wont exceed 1K.
+		char vector_string[1024];
+		char path[1024];
+
+		path[0] = '\0';
+
+        // put in the verification directory.. else the root
+		if (gVerificationOutputDir)
+			strcpy(path,gVerificationOutputDir);
+
+		strcat(path,CONTEXT_VECTOR_STAT);
+
+        // open the stat file creaming any existing stat file
+		PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
+		if (statisticFile) {
+
+            PRInt32 i;
+            PRofstream ps;
+            ps.attach(statisticFile);
+        
+            // oh what the heck, sort it again
+			SortVectorRecord();
+
+            // cute little header
+		    sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
+		    ps << vector_string;
+
+            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
+		    ps << VECTOR_TABLE_HEADER;
+
+            // dump out the bad vectors encountered
+			for (i = 0; i < gVectorCount; i++) {
+                if (!gVectorInfoArray[i]->good_vector) {
+                    MakeVectorString(vector_string, gVectorInfoArray[i]);
+	    			ps << vector_string;
+                }
+            }
+
+            ps << "\r\n\r\nValid context vector summary\r\n";
+		    ps << VECTOR_TABLE_HEADER;
+            
+            // take a big vector table dump (good vectors)
+			for (i = 0; i < gVectorCount; i++) {
+                if (gVectorInfoArray[i]->good_vector) {
+                    MakeVectorString(vector_string, gVectorInfoArray[i]);
+	    			ps << vector_string;
+                }
+                // free em up.  they mean nothing to me now (I'm such a user)
+				PR_Free(gVectorInfoArray[i]);
+			}
+		}
+
+        // ok, we are done with the table, free it up as well
+		PR_Free(gVectorInfoArray);
+		gVectorInfoArray = 0;
+		gVectorCount = 0;
+		PR_Close(statisticFile);
+	}
+}

 /**
 * This debug method allows us to determine whether or not 
@ -378,47 +682,72 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) {
 * @param   aDTD is the DTD we plan to ask for verification
 * @return  TRUE if we know how to handle it, else false
 */
-PRBool VerifyContextVector(PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) {
+
+PRBool VerifyContextVector(CTokenizer * tokenizer, PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) {

  PRBool  result=PR_TRUE;

+  //ok, now see if we understand this vector
+
+  if(0!=gVerificationOutputDir || gRecordingStatistics) 
+      result=aDTD->VerifyContextVector(aTags,count);
+
+  if (gRecordingStatistics) {
+	  NoteVector(aTags,count,result);
+  }
+
  if(0!=gVerificationOutputDir) {
-  
-#ifdef XP_PC
-      char    path[_MAX_PATH+1];
+      char    path[2048];
      strcpy(path,gVerificationOutputDir);
-#endif

      int i=0;      
      for(i=0;i<count;i++){
-
-#ifdef NS_WIN32
        strcat(path,"/");
        const char* name=GetTagName(aTags[i]);
        strcat(path,name);
        mkdir(path);
-#endif
      }
-      //ok, now see if we understand this vector
-      result=aDTD->VerifyContextVector(aTags,count);
-
 	  if(PR_FALSE==result){
-#ifdef NS_WIN32
-      // save file to directory indicated by bad context vector
-      int iCount = 1;
+      static PRBool rnd_initialized = PR_FALSE;
+
+      if (!rnd_initialized) {
+         // seed randomn number generator to aid in temp file
+         // creation.
+         rnd_initialized = PR_TRUE;
+         srand((unsigned)time(NULL));
+      }
+
+      // generate a filename to dump the html source into
      char filename[_MAX_PATH];
      do {
-         sprintf(filename,"%s/html%04d.dbg", path, iCount++);
+         // use system time to generate a temporary file name
+         time_t ltime;
+         time (&ltime);
+         // add in random number so that we can create uniques names
+         // faster than simply every second.
+         ltime += (time_t)rand();
+         sprintf(filename,"%s/%lX.html", path, ltime);
+         // try until we find one we can create
      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-      PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-      if (debugFile) {
-         PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
-         PR_Write(debugFile,"\n",PL_strlen("\n"));
-         PR_Close(debugFile);
+
+      // check to see if we already recorded an instance of this particular
+      // bad vector.  
+      if (!DebugRecord(path,gURLRef, filename))
+      {
+         // save file to directory indicated by bad context vector
+         PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
+         // if we were able to open the debug file, then
+         // write the true URL at the top of the file.
+         if (debugFile) {
+            // dump the html source into the newly created file.
+            if (tokenizer) {
+               PRofstream ps;
+               ps.attach(debugFile);
+               tokenizer->DebugDumpSource(ps);
+            }
+            PR_Close(debugFile);
+         }
      }
-#endif
-      //add debugging code here to record the fact that we just encountered
-      //a context vector we don't know how to handle.
    }
  }

@ -454,7 +783,7 @@ PRInt32 nsHTMLParser::IterateTokens() {
    if(aHandler) {
      theMarkPos=*mCurrentPos;
      result=(*aHandler)(theToken,this);
-      VerifyContextVector(mContextStack,mContextStackPos,mDTD);
+      VerifyContextVector(mTokenizer, mContextStack,mContextStackPos,mDTD);
    }
    ++(*mCurrentPos);
  }
--- a/htmlparser/src/nsTokenizer.cpp
+++ b/htmlparser/src/nsTokenizer.cpp
@ -23,6 +23,13 @@
 #include "nsScanner.h"
 #include "nsIURL.h"

+static void TokenFreeProc(void * pToken)
+{
+   if (pToken!=NULL) {
+      CToken * pCToken = (CToken*)pToken;
+      delete pCToken;
+   }
+}

 /**
 *  Default constructor
@ -33,7 +40,7 @@
 *  @return 
 */
 CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) :
-  mTokenDeque() {
+  mTokenDeque(PR_TRUE,TokenFreeProc) {
  mDelegate=aDelegate;
  mScanner=new CScanner(aURL,aMode);
  mParseMode=aMode;
@ -48,7 +55,7 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo
 *  @return 
 */
 CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
-  mTokenDeque() {
+  mTokenDeque(PR_TRUE,TokenFreeProc) {
  mDelegate=aDelegate;
  mScanner=new CScanner(aFilename,aMode);
  mParseMode=aMode;
@ -63,7 +70,7 @@ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,ePars
 *  @return 
 */
 CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
-  mTokenDeque() {
+  mTokenDeque(PR_TRUE,TokenFreeProc) {
  mDelegate=aDelegate;
  mScanner=new CScanner(aMode);
  mParseMode=aMode;
--- a/htmlparser/src/prstrm.cpp
+++ b/htmlparser/src/prstrm.cpp
@ -0,0 +1,343 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is Netscape Communications
+ * Corporation.  Portions created by Netscape are Copyright (C) 1998
+ * Netscape Communications Corporation.  All Rights Reserved.
+ */
+#include "prtypes.h"
+#include "prstrm.h"
+#include <string.h>
+
+const PRIntn STRM_BUFSIZ = 8192;
+
+PRfilebuf::PRfilebuf():
+_fd(0),
+_opened(PR_FALSE),
+_allocated(PR_FALSE)
+{
+}
+
+PRfilebuf::PRfilebuf(PRFileDesc *fd):
+streambuf(),
+_fd(fd),
+_opened(PR_FALSE),
+_allocated(PR_FALSE)
+{
+}
+
+PRfilebuf::PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen):
+_fd(fd),
+_opened(PR_FALSE),
+_allocated(PR_FALSE)
+{
+    PRfilebuf::setbuf(buffptr, bufflen);
+}
+
+PRfilebuf::~PRfilebuf()
+{
+    if (_opened){
+        close();
+    }else
+        sync();
+	if (_allocated)
+		delete base();
+}
+
+PRfilebuf*	
+PRfilebuf::open(const char *name, int mode, int flags)
+{
+     if (_fd != 0)
+        return 0;    // error if already open
+     PRIntn PRmode = 0;
+    // translate mode argument
+    if (!(mode & ios::nocreate))
+        PRmode |= PR_CREATE_FILE;
+    //if (mode & ios::noreplace)
+    //    PRmode |= O_EXCL;
+    if (mode & ios::app){
+        mode |= ios::out;
+        PRmode |= PR_APPEND;
+    }
+    if (mode & ios::trunc){
+        mode |= ios::out;  // IMPLIED
+        PRmode |= PR_TRUNCATE;
+    }
+    if (mode & ios::out){
+        if (mode & ios::in)
+            PRmode |= PR_RDWR;
+        else
+            PRmode |= PR_WRONLY;
+        if (!(mode & (ios::in|ios::app|ios::ate|ios::noreplace))){
+            mode |= ios::trunc; // IMPLIED
+            PRmode |= PR_TRUNCATE;
+        }
+    }else if (mode & ios::in)
+        PRmode |= PR_RDONLY;
+    else
+        return 0;    // error if not ios:in or ios::out
+
+
+    //
+    // The usual portable across unix crap...
+    // NT gets a hokey piece of junk layer that prevents
+    // access to the API.
+#ifdef WIN32
+    _fd = PR_Open(name, PRmode, PRmode);
+#else
+    _fd = PR_Open(name, PRmode, flags);
+#endif
+    if (_fd == 0)
+        return 0;
+    _opened = PR_TRUE;
+    if ((!unbuffered()) && (!ebuf())){
+        char * sbuf = new char[STRM_BUFSIZ];
+        if (!sbuf)
+            unbuffered(1);
+        else{
+			_allocated = PR_TRUE;
+            streambuf::setb(sbuf,sbuf+STRM_BUFSIZ,0);
+		}
+    }
+    if (mode & ios::ate){
+        if (seekoff(0,ios::end,mode)==EOF){
+            close();
+            return 0;
+        }
+    }
+    return this;
+}
+
+PRfilebuf*	
+PRfilebuf::attach(PRFileDesc *fd)
+{
+    _opened = PR_FALSE;
+    _fd = fd;
+    return this;
+}
+
+int	
+PRfilebuf::overflow(int c)
+{
+    if (allocate()==EOF)        // make sure there is a reserve area
+        return EOF;
+    if (PRfilebuf::sync()==EOF) // sync before new buffer created below
+        return EOF;
+
+    if (!unbuffered())
+        setp(base(),ebuf());
+
+    if (c!=EOF){
+        if ((!unbuffered()) && (pptr() < epptr())) // guard against recursion
+            sputc(c);
+        else{
+            if (PR_Write(_fd, &c, 1)!=1)
+                return(EOF);
+        }
+    }
+    return(1);  // return something other than EOF if successful
+}
+
+int	
+PRfilebuf::underflow()
+{
+    int count;
+    unsigned char tbuf;
+
+    if (in_avail())
+        return (int)(unsigned char) *gptr();
+
+    if (allocate()==EOF)        // make sure there is a reserve area
+        return EOF;
+    if (PRfilebuf::sync()==EOF)
+        return EOF;
+
+    if (unbuffered())
+        {
+        if (PR_Read(_fd,(void *)&tbuf,1)<=0)
+            return EOF;
+        return (int)tbuf;
+        }
+
+    if ((count=PR_Read(_fd,(void *)base(),blen())) <= 0)
+        return EOF;     // reached EOF
+    setg(base(),base(),base()+count);
+    return (int)(unsigned char) *gptr();
+}
+
+streambuf*	
+PRfilebuf::setbuf(char *buffptr, int bufflen)
+{
+    if (is_open() && (ebuf()))
+        return 0;
+    if ((!buffptr) || (bufflen <= 0))
+        unbuffered(1);
+    else
+        setb(buffptr, buffptr+bufflen, 0);
+    return this;
+}
+
+streampos	
+PRfilebuf::seekoff(streamoff offset, ios::seek_dir dir, int /* mode */)
+{
+    if (PR_GetDescType(_fd) == PR_DESC_FILE){
+        PRSeekWhence fdir;
+        PRInt32 retpos;
+        switch (dir) {
+            case ios::beg :
+                fdir = PR_SEEK_SET;
+                break;
+            case ios::cur :
+                fdir = PR_SEEK_CUR;
+                break;
+            case ios::end :
+                fdir = PR_SEEK_END;
+                break;
+            default:
+            // error
+                return(EOF);
+            }
+
+        if (PRfilebuf::sync()==EOF)
+            return EOF;
+        if ((retpos=PR_Seek(_fd, offset, fdir))==-1L)
+            return (EOF);
+        return((streampos)retpos);
+    }else
+        return (EOF);
+}
+
+
+int 
+PRfilebuf::sync()
+{
+    PRInt32 count; 
+
+    if (_fd==0)
+        return(EOF);
+
+    if (!unbuffered()){
+        // Sync write area
+        if ((count=out_waiting())!=0){
+            PRInt32 nout;
+            if ((nout =PR_Write(_fd,
+                               (void *) pbase(),
+                               (unsigned int)count)) != count){
+                if (nout > 0) {
+                    // should set _pptr -= nout
+                    pbump(-(int)nout);
+                    memmove(pbase(), pbase()+nout, (int)(count-nout));
+                }
+                return(EOF);
+            }
+        }
+        setp(0,0); // empty put area
+
+        if (PR_GetDescType(_fd) == PR_DESC_FILE){
+            // Sockets can't seek; don't need this
+            if ((count=in_avail()) > 0){
+                if (PR_Seek(_fd, -count, PR_SEEK_CUR)!=-1L)
+                {
+                    return (EOF);
+                }
+            }
+        }
+        setg(0,0,0); // empty get area
+    }
+    return(0);
+}
+
+PRfilebuf * 
+PRfilebuf::close()
+{
+    int retval;
+    if (_fd==0)
+        return 0;
+
+    retval = sync();
+
+    if ((PR_Close(_fd)==0) || (retval==EOF))
+        return 0;
+    _fd = 0;
+    return this;
+}
+
+PRofstream::PRofstream():
+ostream(new PRfilebuf)
+{
+    _PRSTR_DELBUF(0);
+}
+
+PRofstream::PRofstream(PRFileDesc *fd):
+ostream(new PRfilebuf(fd))
+{
+    _PRSTR_DELBUF(0);
+}
+
+PRofstream::PRofstream(PRFileDesc *fd, char *buff, int bufflen):
+ostream(new PRfilebuf(fd, buff, bufflen))
+{
+    _PRSTR_DELBUF(0);
+}
+
+PRofstream::PRofstream(const char *name, int mode, int flags):
+ostream(new PRfilebuf)
+{
+    _PRSTR_DELBUF(0);
+    if (!rdbuf()->open(name, (mode|ios::out), flags))
+        clear(rdstate() | ios::failbit);
+}
+
+PRofstream::~PRofstream()
+{
+	flush();
+
+	delete rdbuf();
+#ifdef _PRSTR_BP
+	_PRSTR_BP = 0;
+#endif
+}
+
+streambuf * 
+PRofstream::setbuf(char * ptr, int len)
+{
+    if ((is_open()) || (!(rdbuf()->setbuf(ptr, len)))){
+        clear(rdstate() | ios::failbit);
+        return 0;
+    }
+    return rdbuf();
+}
+
+void 
+PRofstream::attach(PRFileDesc *fd)
+{
+    if (!(rdbuf()->attach(fd)))
+        clear(rdstate() | ios::failbit);
+}
+
+void 
+PRofstream::open(const char * name, int mode, int flags)
+{
+    if (is_open() || !(rdbuf()->open(name, (mode|ios::out), flags)))
+        clear(rdstate() | ios::failbit);
+}
+
+void 
+PRofstream::close()
+{
+    clear((rdbuf()->close()) ? 0 : (rdstate() | ios::failbit));
+}
+
+
+
--- a/htmlparser/src/prstrm.h
+++ b/htmlparser/src/prstrm.h
@ -0,0 +1,94 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is Netscape Communications
+ * Corporation.  Portions created by Netscape are Copyright (C) 1998
+ * Netscape Communications Corporation.  All Rights Reserved.
+ */
+
+// The originals are in: nsprpub/lib/pstreams/
+// currently not being built into nspr.. these files will go away.
+
+#ifndef __PRSTRM
+#define __PRSTRM
+
+#include "prtypes.h"
+#include "prio.h"
+#include <iostream.h>
+
+#if defined(__GNUC__)
+#define _PRSTR_BP _strbuf
+#define _PRSTR_DELBUF(x)    /* as nothing */
+#define _PRSTR_DELBUF_C(c, x)  /* as nothing */
+#elif defined(WIN32)
+#define _PRSTR_BP bp
+#define _PRSTR_DELBUF(x)	delbuf(x)
+#define _PRSTR_DELBUF_C(c, x)	c::_PRSTR_DELBUF(x)
+#elif defined(OSF1)
+#define _PRSTR_BP m_psb
+#define _PRSTR_DELBUF(x) /* as nothing */
+#define _PRSTR_DELBUF_C(c, x)	/* as nothing */
+#else
+#define _PRSTR_BP bp
+// Unix compilers don't believe in encapsulation
+// At least on Solaris this is also ignored
+#define _PRSTR_DELBUF(x)	delbuf = x
+#define _PRSTR_DELBUF_C(c, x)	c::_PRSTR_DELBUF(x)
+#endif
+
+class PR_IMPLEMENT(PRfilebuf): public streambuf
+{
+public:
+    PRfilebuf();
+    PRfilebuf(PRFileDesc *fd);
+    PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen);
+    ~PRfilebuf();
+    virtual	int	overflow(int=EOF);
+    virtual	int	underflow();
+    virtual	streambuf *setbuf(char *buff, int bufflen);
+    virtual	streampos seekoff(streamoff, ios::seek_dir, int);
+    virtual int sync();
+    PRfilebuf *open(const char *name, int mode, int flags);
+   	PRfilebuf *attach(PRFileDesc *fd);
+    PRfilebuf *close();
+   	int	is_open() const {return (_fd != 0);}
+    PRFileDesc *fd(){return _fd;}
+
+private:
+    PRFileDesc * _fd;
+    PRBool _opened;
+	PRBool _allocated;
+};
+
+
+class PR_IMPLEMENT(PRofstream) : public ostream {
+public:
+	PRofstream();
+	PRofstream(const char *, int mode=ios::out, int flags = 0);
+	PRofstream(PRFileDesc *);
+	PRofstream(PRFileDesc *, char *, int);
+	~PRofstream();
+
+	streambuf * setbuf(char *, int);
+	PRfilebuf* rdbuf() { return (PRfilebuf*) ios::rdbuf(); }
+
+	void attach(PRFileDesc *);
+	PRFileDesc *fd() {return rdbuf()->fd();}
+
+	int is_open(){return rdbuf()->is_open();}
+	void open(const char *, int =ios::out, int = 0);
+	void close();
+};
+
+#endif /* __PRSTRM */
--- a/parser/htmlparser/src/CNavDTD.cpp
+++ b/parser/htmlparser/src/CNavDTD.cpp
@ -842,8 +842,12 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{
 PRBool CNavDTD::VerifyContextVector(PRInt32* aVector,PRInt32 aCount) const {
  PRBool result=PR_TRUE;

-  if(aCount>0) {
-
+  if(aCount>1) {
+     for (int i = 0; i < aCount-1; i++)
+        if (!CanContain(aVector[i],aVector[i+1])) {
+           result = PR_FALSE;
+           break;
+        }
  }
  return result;
 }
--- a/parser/htmlparser/src/CNavDelegate.cpp
+++ b/parser/htmlparser/src/CNavDelegate.cpp
@ -31,6 +31,13 @@

 static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";

+static void TokenFreeProc(void * pToken)
+{
+   if (pToken!=NULL) {
+      CToken * pCToken = (CToken*)pToken;
+      delete pCToken;
+   }
+}

 /**
 *  Default constructor
@ -40,7 +47,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY
 *  @return  
 */
 CNavDelegate::CNavDelegate() :
-  ITokenizerDelegate(), mTokenDeque() {
+  ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
 }

 /**
--- a/parser/htmlparser/src/COtherDelegate.cpp
+++ b/parser/htmlparser/src/COtherDelegate.cpp
@ -32,6 +32,14 @@
 static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";


+static void TokenFreeProc(void * pToken)
+{
+   if (pToken!=NULL) {
+      CToken * pCToken = (CToken*)pToken;
+      delete pCToken;
+   }
+}
+
 /**
 *  Default constructor
 *  
@ -40,7 +48,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY
 *  @return  
 */
 COtherDelegate::COtherDelegate() :
-  ITokenizerDelegate(), mTokenDeque() {
+  ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
 }

 /**
--- a/parser/htmlparser/src/makefile.win
+++ b/parser/htmlparser/src/makefile.win
@ -28,7 +28,7 @@ CPPSRCS=nsHTMLContentSink.cpp \
    nsToken.cpp nsTokenizer.cpp nsTokenHandler.cpp  \
    CNavDTD.cpp CNavDelegate.cpp \
    COtherDTD.cpp COtherDelegate.cpp \
-    nsHTMLParser.cpp
+    nsHTMLParser.cpp prstrm.cpp

 EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
    nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h 
@ -39,7 +39,8 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
    .\$(OBJDIR)\nsHTMLParser.obj \
    .\$(OBJDIR)\nsHTMLTokens.obj          .\$(OBJDIR)\nsParserNode.obj \
    .\$(OBJDIR)\nsScanner.obj             .\$(OBJDIR)\nsToken.obj \
-    .\$(OBJDIR)\nsTokenizer.obj           .\$(OBJDIR)\nsTokenHandler.obj 
+    .\$(OBJDIR)\nsTokenizer.obj           .\$(OBJDIR)\nsTokenHandler.obj \
+    .\$(OBJDIR)\prstrm.obj

 LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib

--- a/parser/htmlparser/src/nsHTMLParser.cpp
+++ b/parser/htmlparser/src/nsHTMLParser.cpp
@ -33,10 +33,13 @@
 #include "prio.h"
 #include "plstr.h"
 #include <fstream.h>
+#include "prstrm.h"
 #include "nsIInputStream.h"
 #ifdef XP_PC
 #include <direct.h> //this is here for debug reasons...
 #endif
+#include <time.h>
+#include "prmem.h"

 static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);                 
 static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID); 
@ -49,10 +52,20 @@ static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

 static char*  gVerificationOutputDir=0;
+static PRBool gRecordingStatistics=PR_TRUE;
 static char*  gURLRef=0;
 static int    rickGDebug=0;
 static const int gTransferBufferSize=4096;  //size of the buffer used in moving data from iistream

+extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
+{
+	gVerificationOutputDir = verify_dir;
+}
+
+extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
+{
+	gRecordingStatistics = bval;
+}

 /**
 *  This method is defined in nsIParser. It is used to 
@ -73,7 +86,6 @@ NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult)
  return it->QueryInterface(kIParserIID, (void **) aInstancePtrResult);
 }

-
 /**
 *  This big dispatch method is used to route token handler calls to the right place.
 *  What's wrong with it? This table, and the dispatch methods themselves need to be 
@ -367,6 +379,298 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) {
  return old;
 }

+/** 
+ * This debug method records an invalid context vector and it's
+ * associated context vector and URL in a simple flat file mapping which
+ * resides in the verification directory and is named context.map
+ *
+ * @update  jevering 6/06/98
+ * @param   path is the directory structure indicating the bad context vector
+ * @param   pURLRef is the associated URL
+ * @param   filename to record mapping to if not already recorded
+ * @return  TRUE if it is already record (dont rerecord)
+ */
+
+#define CONTEXT_VECTOR_MAP	"/vector.map"
+#define CONTEXT_VECTOR_STAT	"/vector.stat"
+#define VECTOR_TABLE_HEADER "count  vector\r\n====== =============================================\r\n"    
+PRBool DebugRecord(char * path, char * pURLRef, char * filename)
+{
+   char recordPath[2048];
+   PRIntn oflags = 0;
+
+   // create the record file name from the verification director
+   // and the default name.
+   strcpy(recordPath,gVerificationOutputDir);
+   strcat(recordPath,CONTEXT_VECTOR_MAP);
+
+   // create the file exists, only open for read/write
+   // otherwise, create it
+   if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
+      oflags = PR_CREATE_FILE;
+   oflags |= PR_RDWR;
+
+   // open the record file
+   PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
+
+   if (recordFile) {
+
+      char * string = (char *)PR_Malloc(2048);
+      PRBool found = PR_FALSE;
+
+	  // vectors are stored on the format iof "URL vector filename"
+	  // where the vector contains the verification path and
+	  // the filename contains the debug source dump
+      sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
+
+	  // get the file size, read in the file and parse it line at
+	  // a time to check to see if we have already recorded this
+	  // occurance
+
+      PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
+      if (iSize) {
+
+         char * buffer = (char*)PR_Malloc(iSize);
+         char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
+         if (buffer!=NULL && string!=NULL) {
+            PRInt32 ibufferpos, istringpos;
+
+			// beginning of file for read
+            PR_Seek(recordFile,0,PR_SEEK_SET);
+            PR_Read(recordFile,buffer,iSize);
+
+			// run through the file looking for a matching vector
+            for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
+            {
+			   // compare string once we have hit the end of the line
+               if (buffer[ibufferpos] == '\r') {
+                  stringbuf[istringpos] = '\0';
+                  istringpos = 0;
+                  // skip newline and space
+                  ibufferpos++;
+
+                  if (PL_strlen(stringbuf)) {
+					char * space;
+   					// chop of the filename for compare
+                    if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
+						*space = '\0';
+
+					// we have already recorded this one, free up, and return
+                    if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
+						PR_Free(buffer);
+                        PR_Free(stringbuf);
+						PR_Free(string);
+                        return PR_TRUE;
+                    }
+                  }
+               }
+
+               // build up the compare string
+               else
+                  stringbuf[istringpos++] = buffer[ibufferpos];
+            }
+
+            // throw away the record file data
+            PR_Free(buffer);
+            PR_Free(stringbuf);
+         }
+      }
+
+      // if this bad vector was not recorded, add it to record file
+
+      if (!found) {
+         PR_Seek(recordFile,0,PR_SEEK_END);
+         PR_Write(recordFile,string,PL_strlen(string));
+      }
+
+      PR_Close(recordFile);
+	  PR_Free(string);
+   }
+
+   // vector was not recorded
+   return PR_FALSE;
+}
+
+// structure to store the vector statistic information
+
+typedef struct vector_info {
+	PRInt32 references;     // number of occurances counted
+	PRInt32 count;          // number of tags in the vector
+    PRBool  good_vector;    // is this a valid vector?
+	PRInt32 * vector;       // and the vector
+} VectorInfo;
+
+// global table for storing vector statistics and the size
+static VectorInfo ** gVectorInfoArray = 0;
+static PRInt32 gVectorCount = 0;
+
+// the statistic vector table grows each time it exceeds this
+// stepping value
+#define TABLE_SIZE	128
+
+// compare function for quick sort.  Compares references and
+// sorts in decending order
+
+static int compare( const void *arg1, const void *arg2 )
+{
+	VectorInfo ** p1 = (VectorInfo**)arg1;
+	VectorInfo ** p2 = (VectorInfo**)arg2;
+	return (*p2)->references - (*p1)->references;
+}
+
+/**
+ * quick sort the statistic array causing the most frequently
+ * used vectors to be at the top (this makes it a little speedier
+ * when looking them up)
+ */
+
+void SortVectorRecord(void)
+{
+    // of course, sort it only if there is something to sort
+	if (gVectorCount) {
+		qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
+	}
+}
+
+/**
+ *  This debug routines stores statistical information about a
+ *  context vector.  The context vector statistics are stored in
+ *  a global array.  The table is resorted each time it grows to
+ *  aid in lookup speed.  If a vector has already been noted, its
+ *  reference count is bumped, otherwise it is added to the table
+ *
+ *  @update     jevering 6/11/98
+ *  @param      aTags is the tag list (vector)
+ *  @param      count is the size of the vector
+ *  @return
+ */
+
+void NoteVector(PRInt32 aTags[],PRInt32 count, PRBool good_vector)
+{
+    // if the table doesn't exist, create it
+	if (!gVectorInfoArray) {
+		gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
+	} 
+	else {
+        // attempt to look up the vector
+		for (PRInt32 i = 0; i < gVectorCount; i++)
+
+            // check the vector only if they are the same size, if they
+            // match then just return without doing further work
+			if (gVectorInfoArray[i]->count == count)
+				if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(PRInt32)*count)) {
+
+                    // bzzzt. and we have a winner.. bump the ref count
+					gVectorInfoArray[i]->references++;
+					return;
+				}
+	}
+
+    // the context vector hasn't been noted, so allocate it and
+    // initialize it one.. add it to the table
+	VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
+	pVectorInfo->references = 1;
+	pVectorInfo->count = count;
+	pVectorInfo->good_vector = good_vector;
+	pVectorInfo->vector = (PRInt32*)PR_Malloc(count*sizeof(PRInt32));
+	memcpy(pVectorInfo->vector,aTags,sizeof(PRInt32)*count);
+	gVectorInfoArray[gVectorCount++] = pVectorInfo;
+
+    // have we maxed out the table?  grow it.. sort it.. love it. 
+	if ((gVectorCount % TABLE_SIZE) == 0) {
+		gVectorInfoArray = (VectorInfo**)realloc(
+			gVectorInfoArray,
+			(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
+		SortVectorRecord();
+	}
+}
+
+void MakeVectorString(char * vector_string, VectorInfo * pInfo)
+{
+    sprintf (vector_string, "%6d ", pInfo->references);
+    for (PRInt32 j = 0; j < pInfo->count; j++) {
+	    PL_strcat(vector_string, "<");
+	    PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
+	    PL_strcat(vector_string, ">");
+    }
+    PL_strcat(vector_string,"\r\n");
+}
+
+/**
+ *  This debug routine dumps out the vector statistics to a text
+ *  file in the verification directory and defaults to the name
+ *  "vector.stat".  It contains all parsed context vectors and there
+ *  occurance count sorted in decending order.
+ *  
+ *  @update     jevering 6/11/98
+ *  @param
+ *  @return
+ */
+
+extern "C" NS_EXPORT void DumpVectorRecord(void)
+{
+    // do we have a table?
+	if (gVectorCount) {
+
+        // hopefully, they wont exceed 1K.
+		char vector_string[1024];
+		char path[1024];
+
+		path[0] = '\0';
+
+        // put in the verification directory.. else the root
+		if (gVerificationOutputDir)
+			strcpy(path,gVerificationOutputDir);
+
+		strcat(path,CONTEXT_VECTOR_STAT);
+
+        // open the stat file creaming any existing stat file
+		PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
+		if (statisticFile) {
+
+            PRInt32 i;
+            PRofstream ps;
+            ps.attach(statisticFile);
+        
+            // oh what the heck, sort it again
+			SortVectorRecord();
+
+            // cute little header
+		    sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
+		    ps << vector_string;
+
+            ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
+		    ps << VECTOR_TABLE_HEADER;
+
+            // dump out the bad vectors encountered
+			for (i = 0; i < gVectorCount; i++) {
+                if (!gVectorInfoArray[i]->good_vector) {
+                    MakeVectorString(vector_string, gVectorInfoArray[i]);
+	    			ps << vector_string;
+                }
+            }
+
+            ps << "\r\n\r\nValid context vector summary\r\n";
+		    ps << VECTOR_TABLE_HEADER;
+            
+            // take a big vector table dump (good vectors)
+			for (i = 0; i < gVectorCount; i++) {
+                if (gVectorInfoArray[i]->good_vector) {
+                    MakeVectorString(vector_string, gVectorInfoArray[i]);
+	    			ps << vector_string;
+                }
+                // free em up.  they mean nothing to me now (I'm such a user)
+				PR_Free(gVectorInfoArray[i]);
+			}
+		}
+
+        // ok, we are done with the table, free it up as well
+		PR_Free(gVectorInfoArray);
+		gVectorInfoArray = 0;
+		gVectorCount = 0;
+		PR_Close(statisticFile);
+	}
+}

 /**
 * This debug method allows us to determine whether or not 
@ -378,47 +682,72 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) {
 * @param   aDTD is the DTD we plan to ask for verification
 * @return  TRUE if we know how to handle it, else false
 */
-PRBool VerifyContextVector(PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) {
+
+PRBool VerifyContextVector(CTokenizer * tokenizer, PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) {

  PRBool  result=PR_TRUE;

+  //ok, now see if we understand this vector
+
+  if(0!=gVerificationOutputDir || gRecordingStatistics) 
+      result=aDTD->VerifyContextVector(aTags,count);
+
+  if (gRecordingStatistics) {
+	  NoteVector(aTags,count,result);
+  }
+
  if(0!=gVerificationOutputDir) {
-  
-#ifdef XP_PC
-      char    path[_MAX_PATH+1];
+      char    path[2048];
      strcpy(path,gVerificationOutputDir);
-#endif

      int i=0;      
      for(i=0;i<count;i++){
-
-#ifdef NS_WIN32
        strcat(path,"/");
        const char* name=GetTagName(aTags[i]);
        strcat(path,name);
        mkdir(path);
-#endif
      }
-      //ok, now see if we understand this vector
-      result=aDTD->VerifyContextVector(aTags,count);
-
 	  if(PR_FALSE==result){
-#ifdef NS_WIN32
-      // save file to directory indicated by bad context vector
-      int iCount = 1;
+      static PRBool rnd_initialized = PR_FALSE;
+
+      if (!rnd_initialized) {
+         // seed randomn number generator to aid in temp file
+         // creation.
+         rnd_initialized = PR_TRUE;
+         srand((unsigned)time(NULL));
+      }
+
+      // generate a filename to dump the html source into
      char filename[_MAX_PATH];
      do {
-         sprintf(filename,"%s/html%04d.dbg", path, iCount++);
+         // use system time to generate a temporary file name
+         time_t ltime;
+         time (&ltime);
+         // add in random number so that we can create uniques names
+         // faster than simply every second.
+         ltime += (time_t)rand();
+         sprintf(filename,"%s/%lX.html", path, ltime);
+         // try until we find one we can create
      } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
-      PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
-      if (debugFile) {
-         PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
-         PR_Write(debugFile,"\n",PL_strlen("\n"));
-         PR_Close(debugFile);
+
+      // check to see if we already recorded an instance of this particular
+      // bad vector.  
+      if (!DebugRecord(path,gURLRef, filename))
+      {
+         // save file to directory indicated by bad context vector
+         PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
+         // if we were able to open the debug file, then
+         // write the true URL at the top of the file.
+         if (debugFile) {
+            // dump the html source into the newly created file.
+            if (tokenizer) {
+               PRofstream ps;
+               ps.attach(debugFile);
+               tokenizer->DebugDumpSource(ps);
+            }
+            PR_Close(debugFile);
+         }
      }
-#endif
-      //add debugging code here to record the fact that we just encountered
-      //a context vector we don't know how to handle.
    }
  }

@ -454,7 +783,7 @@ PRInt32 nsHTMLParser::IterateTokens() {
    if(aHandler) {
      theMarkPos=*mCurrentPos;
      result=(*aHandler)(theToken,this);
-      VerifyContextVector(mContextStack,mContextStackPos,mDTD);
+      VerifyContextVector(mTokenizer, mContextStack,mContextStackPos,mDTD);
    }
    ++(*mCurrentPos);
  }
--- a/parser/htmlparser/src/nsTokenizer.cpp
+++ b/parser/htmlparser/src/nsTokenizer.cpp
@ -23,6 +23,13 @@
 #include "nsScanner.h"
 #include "nsIURL.h"

+static void TokenFreeProc(void * pToken)
+{
+   if (pToken!=NULL) {
+      CToken * pCToken = (CToken*)pToken;
+      delete pCToken;
+   }
+}

 /**
 *  Default constructor
@ -33,7 +40,7 @@
 *  @return 
 */
 CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) :
-  mTokenDeque() {
+  mTokenDeque(PR_TRUE,TokenFreeProc) {
  mDelegate=aDelegate;
  mScanner=new CScanner(aURL,aMode);
  mParseMode=aMode;
@ -48,7 +55,7 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo
 *  @return 
 */
 CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
-  mTokenDeque() {
+  mTokenDeque(PR_TRUE,TokenFreeProc) {
  mDelegate=aDelegate;
  mScanner=new CScanner(aFilename,aMode);
  mParseMode=aMode;
@ -63,7 +70,7 @@ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,ePars
 *  @return 
 */
 CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
-  mTokenDeque() {
+  mTokenDeque(PR_TRUE,TokenFreeProc) {
  mDelegate=aDelegate;
  mScanner=new CScanner(aMode);
  mParseMode=aMode;
--- a/parser/htmlparser/src/prstrm.cpp
+++ b/parser/htmlparser/src/prstrm.cpp
@ -0,0 +1,343 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is Netscape Communications
+ * Corporation.  Portions created by Netscape are Copyright (C) 1998
+ * Netscape Communications Corporation.  All Rights Reserved.
+ */
+#include "prtypes.h"
+#include "prstrm.h"
+#include <string.h>
+
+const PRIntn STRM_BUFSIZ = 8192;
+
+PRfilebuf::PRfilebuf():
+_fd(0),
+_opened(PR_FALSE),
+_allocated(PR_FALSE)
+{
+}
+
+PRfilebuf::PRfilebuf(PRFileDesc *fd):
+streambuf(),
+_fd(fd),
+_opened(PR_FALSE),
+_allocated(PR_FALSE)
+{
+}
+
+PRfilebuf::PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen):
+_fd(fd),
+_opened(PR_FALSE),
+_allocated(PR_FALSE)
+{
+    PRfilebuf::setbuf(buffptr, bufflen);
+}
+
+PRfilebuf::~PRfilebuf()
+{
+    if (_opened){
+        close();
+    }else
+        sync();
+	if (_allocated)
+		delete base();
+}
+
+PRfilebuf*	
+PRfilebuf::open(const char *name, int mode, int flags)
+{
+     if (_fd != 0)
+        return 0;    // error if already open
+     PRIntn PRmode = 0;
+    // translate mode argument
+    if (!(mode & ios::nocreate))
+        PRmode |= PR_CREATE_FILE;
+    //if (mode & ios::noreplace)
+    //    PRmode |= O_EXCL;
+    if (mode & ios::app){
+        mode |= ios::out;
+        PRmode |= PR_APPEND;
+    }
+    if (mode & ios::trunc){
+        mode |= ios::out;  // IMPLIED
+        PRmode |= PR_TRUNCATE;
+    }
+    if (mode & ios::out){
+        if (mode & ios::in)
+            PRmode |= PR_RDWR;
+        else
+            PRmode |= PR_WRONLY;
+        if (!(mode & (ios::in|ios::app|ios::ate|ios::noreplace))){
+            mode |= ios::trunc; // IMPLIED
+            PRmode |= PR_TRUNCATE;
+        }
+    }else if (mode & ios::in)
+        PRmode |= PR_RDONLY;
+    else
+        return 0;    // error if not ios:in or ios::out
+
+
+    //
+    // The usual portable across unix crap...
+    // NT gets a hokey piece of junk layer that prevents
+    // access to the API.
+#ifdef WIN32
+    _fd = PR_Open(name, PRmode, PRmode);
+#else
+    _fd = PR_Open(name, PRmode, flags);
+#endif
+    if (_fd == 0)
+        return 0;
+    _opened = PR_TRUE;
+    if ((!unbuffered()) && (!ebuf())){
+        char * sbuf = new char[STRM_BUFSIZ];
+        if (!sbuf)
+            unbuffered(1);
+        else{
+			_allocated = PR_TRUE;
+            streambuf::setb(sbuf,sbuf+STRM_BUFSIZ,0);
+		}
+    }
+    if (mode & ios::ate){
+        if (seekoff(0,ios::end,mode)==EOF){
+            close();
+            return 0;
+        }
+    }
+    return this;
+}
+
+PRfilebuf*	
+PRfilebuf::attach(PRFileDesc *fd)
+{
+    _opened = PR_FALSE;
+    _fd = fd;
+    return this;
+}
+
+int	
+PRfilebuf::overflow(int c)
+{
+    if (allocate()==EOF)        // make sure there is a reserve area
+        return EOF;
+    if (PRfilebuf::sync()==EOF) // sync before new buffer created below
+        return EOF;
+
+    if (!unbuffered())
+        setp(base(),ebuf());
+
+    if (c!=EOF){
+        if ((!unbuffered()) && (pptr() < epptr())) // guard against recursion
+            sputc(c);
+        else{
+            if (PR_Write(_fd, &c, 1)!=1)
+                return(EOF);
+        }
+    }
+    return(1);  // return something other than EOF if successful
+}
+
+int	
+PRfilebuf::underflow()
+{
+    int count;
+    unsigned char tbuf;
+
+    if (in_avail())
+        return (int)(unsigned char) *gptr();
+
+    if (allocate()==EOF)        // make sure there is a reserve area
+        return EOF;
+    if (PRfilebuf::sync()==EOF)
+        return EOF;
+
+    if (unbuffered())
+        {
+        if (PR_Read(_fd,(void *)&tbuf,1)<=0)
+            return EOF;
+        return (int)tbuf;
+        }
+
+    if ((count=PR_Read(_fd,(void *)base(),blen())) <= 0)
+        return EOF;     // reached EOF
+    setg(base(),base(),base()+count);
+    return (int)(unsigned char) *gptr();
+}
+
+streambuf*	
+PRfilebuf::setbuf(char *buffptr, int bufflen)
+{
+    if (is_open() && (ebuf()))
+        return 0;
+    if ((!buffptr) || (bufflen <= 0))
+        unbuffered(1);
+    else
+        setb(buffptr, buffptr+bufflen, 0);
+    return this;
+}
+
+streampos	
+PRfilebuf::seekoff(streamoff offset, ios::seek_dir dir, int /* mode */)
+{
+    if (PR_GetDescType(_fd) == PR_DESC_FILE){
+        PRSeekWhence fdir;
+        PRInt32 retpos;
+        switch (dir) {
+            case ios::beg :
+                fdir = PR_SEEK_SET;
+                break;
+            case ios::cur :
+                fdir = PR_SEEK_CUR;
+                break;
+            case ios::end :
+                fdir = PR_SEEK_END;
+                break;
+            default:
+            // error
+                return(EOF);
+            }
+
+        if (PRfilebuf::sync()==EOF)
+            return EOF;
+        if ((retpos=PR_Seek(_fd, offset, fdir))==-1L)
+            return (EOF);
+        return((streampos)retpos);
+    }else
+        return (EOF);
+}
+
+
+int 
+PRfilebuf::sync()
+{
+    PRInt32 count; 
+
+    if (_fd==0)
+        return(EOF);
+
+    if (!unbuffered()){
+        // Sync write area
+        if ((count=out_waiting())!=0){
+            PRInt32 nout;
+            if ((nout =PR_Write(_fd,
+                               (void *) pbase(),
+                               (unsigned int)count)) != count){
+                if (nout > 0) {
+                    // should set _pptr -= nout
+                    pbump(-(int)nout);
+                    memmove(pbase(), pbase()+nout, (int)(count-nout));
+                }
+                return(EOF);
+            }
+        }
+        setp(0,0); // empty put area
+
+        if (PR_GetDescType(_fd) == PR_DESC_FILE){
+            // Sockets can't seek; don't need this
+            if ((count=in_avail()) > 0){
+                if (PR_Seek(_fd, -count, PR_SEEK_CUR)!=-1L)
+                {
+                    return (EOF);
+                }
+            }
+        }
+        setg(0,0,0); // empty get area
+    }
+    return(0);
+}
+
+PRfilebuf * 
+PRfilebuf::close()
+{
+    int retval;
+    if (_fd==0)
+        return 0;
+
+    retval = sync();
+
+    if ((PR_Close(_fd)==0) || (retval==EOF))
+        return 0;
+    _fd = 0;
+    return this;
+}
+
+PRofstream::PRofstream():
+ostream(new PRfilebuf)
+{
+    _PRSTR_DELBUF(0);
+}
+
+PRofstream::PRofstream(PRFileDesc *fd):
+ostream(new PRfilebuf(fd))
+{
+    _PRSTR_DELBUF(0);
+}
+
+PRofstream::PRofstream(PRFileDesc *fd, char *buff, int bufflen):
+ostream(new PRfilebuf(fd, buff, bufflen))
+{
+    _PRSTR_DELBUF(0);
+}
+
+PRofstream::PRofstream(const char *name, int mode, int flags):
+ostream(new PRfilebuf)
+{
+    _PRSTR_DELBUF(0);
+    if (!rdbuf()->open(name, (mode|ios::out), flags))
+        clear(rdstate() | ios::failbit);
+}
+
+PRofstream::~PRofstream()
+{
+	flush();
+
+	delete rdbuf();
+#ifdef _PRSTR_BP
+	_PRSTR_BP = 0;
+#endif
+}
+
+streambuf * 
+PRofstream::setbuf(char * ptr, int len)
+{
+    if ((is_open()) || (!(rdbuf()->setbuf(ptr, len)))){
+        clear(rdstate() | ios::failbit);
+        return 0;
+    }
+    return rdbuf();
+}
+
+void 
+PRofstream::attach(PRFileDesc *fd)
+{
+    if (!(rdbuf()->attach(fd)))
+        clear(rdstate() | ios::failbit);
+}
+
+void 
+PRofstream::open(const char * name, int mode, int flags)
+{
+    if (is_open() || !(rdbuf()->open(name, (mode|ios::out), flags)))
+        clear(rdstate() | ios::failbit);
+}
+
+void 
+PRofstream::close()
+{
+    clear((rdbuf()->close()) ? 0 : (rdstate() | ios::failbit));
+}
+
+
+
--- a/parser/htmlparser/src/prstrm.h
+++ b/parser/htmlparser/src/prstrm.h
@ -0,0 +1,94 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is Netscape Communications
+ * Corporation.  Portions created by Netscape are Copyright (C) 1998
+ * Netscape Communications Corporation.  All Rights Reserved.
+ */
+
+// The originals are in: nsprpub/lib/pstreams/
+// currently not being built into nspr.. these files will go away.
+
+#ifndef __PRSTRM
+#define __PRSTRM
+
+#include "prtypes.h"
+#include "prio.h"
+#include <iostream.h>
+
+#if defined(__GNUC__)
+#define _PRSTR_BP _strbuf
+#define _PRSTR_DELBUF(x)    /* as nothing */
+#define _PRSTR_DELBUF_C(c, x)  /* as nothing */
+#elif defined(WIN32)
+#define _PRSTR_BP bp
+#define _PRSTR_DELBUF(x)	delbuf(x)
+#define _PRSTR_DELBUF_C(c, x)	c::_PRSTR_DELBUF(x)
+#elif defined(OSF1)
+#define _PRSTR_BP m_psb
+#define _PRSTR_DELBUF(x) /* as nothing */
+#define _PRSTR_DELBUF_C(c, x)	/* as nothing */
+#else
+#define _PRSTR_BP bp
+// Unix compilers don't believe in encapsulation
+// At least on Solaris this is also ignored
+#define _PRSTR_DELBUF(x)	delbuf = x
+#define _PRSTR_DELBUF_C(c, x)	c::_PRSTR_DELBUF(x)
+#endif
+
+class PR_IMPLEMENT(PRfilebuf): public streambuf
+{
+public:
+    PRfilebuf();
+    PRfilebuf(PRFileDesc *fd);
+    PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen);
+    ~PRfilebuf();
+    virtual	int	overflow(int=EOF);
+    virtual	int	underflow();
+    virtual	streambuf *setbuf(char *buff, int bufflen);
+    virtual	streampos seekoff(streamoff, ios::seek_dir, int);
+    virtual int sync();
+    PRfilebuf *open(const char *name, int mode, int flags);
+   	PRfilebuf *attach(PRFileDesc *fd);
+    PRfilebuf *close();
+   	int	is_open() const {return (_fd != 0);}
+    PRFileDesc *fd(){return _fd;}
+
+private:
+    PRFileDesc * _fd;
+    PRBool _opened;
+	PRBool _allocated;
+};
+
+
+class PR_IMPLEMENT(PRofstream) : public ostream {
+public:
+	PRofstream();
+	PRofstream(const char *, int mode=ios::out, int flags = 0);
+	PRofstream(PRFileDesc *);
+	PRofstream(PRFileDesc *, char *, int);
+	~PRofstream();
+
+	streambuf * setbuf(char *, int);
+	PRfilebuf* rdbuf() { return (PRfilebuf*) ios::rdbuf(); }
+
+	void attach(PRFileDesc *);
+	PRFileDesc *fd() {return rdbuf()->fd();}
+
+	int is_open(){return rdbuf()->is_open();}
+	void open(const char *, int =ios::out, int = 0);
+	void close();
+};
+
+#endif /* __PRSTRM */