From 3b414b2bc9fc451ec75542dbc77b41b069fbdc73 Mon Sep 17 00:00:00 2001 From: jevering Date: Fri, 12 Jun 1998 01:34:49 +0000 Subject: [PATCH] More debug robot work. I added some statistic gathering information as well as hooking up the context vector validation stuff. The debug robot now makes two files in the root verification directory for mapping context vector to debug source dumps as well as occurance tracking of good and bad context vectors. --- htmlparser/src/CNavDTD.cpp | 8 +- htmlparser/src/CNavDelegate.cpp | 9 +- htmlparser/src/COtherDelegate.cpp | 10 +- htmlparser/src/makefile.win | 5 +- htmlparser/src/nsHTMLParser.cpp | 379 +++++++++++++++++++++-- htmlparser/src/nsTokenizer.cpp | 13 +- htmlparser/src/prstrm.cpp | 343 ++++++++++++++++++++ htmlparser/src/prstrm.h | 94 ++++++ parser/htmlparser/src/CNavDTD.cpp | 8 +- parser/htmlparser/src/CNavDelegate.cpp | 9 +- parser/htmlparser/src/COtherDelegate.cpp | 10 +- parser/htmlparser/src/makefile.win | 5 +- parser/htmlparser/src/nsHTMLParser.cpp | 379 +++++++++++++++++++++-- parser/htmlparser/src/nsTokenizer.cpp | 13 +- parser/htmlparser/src/prstrm.cpp | 343 ++++++++++++++++++++ parser/htmlparser/src/prstrm.h | 94 ++++++ 16 files changed, 1654 insertions(+), 68 deletions(-) create mode 100644 htmlparser/src/prstrm.cpp create mode 100644 htmlparser/src/prstrm.h create mode 100644 parser/htmlparser/src/prstrm.cpp create mode 100644 parser/htmlparser/src/prstrm.h diff --git a/htmlparser/src/CNavDTD.cpp b/htmlparser/src/CNavDTD.cpp index ecfe599d4bba..e3301a0e2596 100644 --- a/htmlparser/src/CNavDTD.cpp +++ b/htmlparser/src/CNavDTD.cpp @@ -842,8 +842,12 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{ PRBool CNavDTD::VerifyContextVector(PRInt32* aVector,PRInt32 aCount) const { PRBool result=PR_TRUE; - if(aCount>0) { - + if(aCount>1) { + for (int i = 0; i < aCount-1; i++) + if (!CanContain(aVector[i],aVector[i+1])) { + result = PR_FALSE; + break; + } } return result; } diff --git a/htmlparser/src/CNavDelegate.cpp b/htmlparser/src/CNavDelegate.cpp index 0eb2be51551a..44044201c985 100644 --- a/htmlparser/src/CNavDelegate.cpp +++ b/htmlparser/src/CNavDelegate.cpp @@ -31,6 +31,13 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -40,7 +47,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ CNavDelegate::CNavDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/htmlparser/src/COtherDelegate.cpp b/htmlparser/src/COtherDelegate.cpp index 1728b49dee39..321c2ee1c0b8 100644 --- a/htmlparser/src/COtherDelegate.cpp +++ b/htmlparser/src/COtherDelegate.cpp @@ -32,6 +32,14 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} + /** * Default constructor * @@ -40,7 +48,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ COtherDelegate::COtherDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/htmlparser/src/makefile.win b/htmlparser/src/makefile.win index 348e3a4f1caa..58213d2e54c1 100644 --- a/htmlparser/src/makefile.win +++ b/htmlparser/src/makefile.win @@ -28,7 +28,7 @@ CPPSRCS=nsHTMLContentSink.cpp \ nsToken.cpp nsTokenizer.cpp nsTokenHandler.cpp \ CNavDTD.cpp CNavDelegate.cpp \ COtherDTD.cpp COtherDelegate.cpp \ - nsHTMLParser.cpp + nsHTMLParser.cpp prstrm.cpp EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \ nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h @@ -39,7 +39,8 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\nsHTMLParser.obj \ .\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \ .\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \ - .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj + .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj \ + .\$(OBJDIR)\prstrm.obj LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib diff --git a/htmlparser/src/nsHTMLParser.cpp b/htmlparser/src/nsHTMLParser.cpp index 59eaf1d06f68..ba375429ef95 100644 --- a/htmlparser/src/nsHTMLParser.cpp +++ b/htmlparser/src/nsHTMLParser.cpp @@ -33,10 +33,13 @@ #include "prio.h" #include "plstr.h" #include +#include "prstrm.h" #include "nsIInputStream.h" #ifdef XP_PC #include //this is here for debug reasons... #endif +#include +#include "prmem.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID); @@ -49,10 +52,20 @@ static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; static char* gVerificationOutputDir=0; +static PRBool gRecordingStatistics=PR_TRUE; static char* gURLRef=0; static int rickGDebug=0; static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream +extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir) +{ + gVerificationOutputDir = verify_dir; +} + +extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval) +{ + gRecordingStatistics = bval; +} /** * This method is defined in nsIParser. It is used to @@ -73,7 +86,6 @@ NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult) return it->QueryInterface(kIParserIID, (void **) aInstancePtrResult); } - /** * This big dispatch method is used to route token handler calls to the right place. * What's wrong with it? This table, and the dispatch methods themselves need to be @@ -367,6 +379,298 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { return old; } +/** + * This debug method records an invalid context vector and it's + * associated context vector and URL in a simple flat file mapping which + * resides in the verification directory and is named context.map + * + * @update jevering 6/06/98 + * @param path is the directory structure indicating the bad context vector + * @param pURLRef is the associated URL + * @param filename to record mapping to if not already recorded + * @return TRUE if it is already record (dont rerecord) + */ + +#define CONTEXT_VECTOR_MAP "/vector.map" +#define CONTEXT_VECTOR_STAT "/vector.stat" +#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" +PRBool DebugRecord(char * path, char * pURLRef, char * filename) +{ + char recordPath[2048]; + PRIntn oflags = 0; + + // create the record file name from the verification director + // and the default name. + strcpy(recordPath,gVerificationOutputDir); + strcat(recordPath,CONTEXT_VECTOR_MAP); + + // create the file exists, only open for read/write + // otherwise, create it + if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) + oflags = PR_CREATE_FILE; + oflags |= PR_RDWR; + + // open the record file + PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); + + if (recordFile) { + + char * string = (char *)PR_Malloc(2048); + PRBool found = PR_FALSE; + + // vectors are stored on the format iof "URL vector filename" + // where the vector contains the verification path and + // the filename contains the debug source dump + sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); + + // get the file size, read in the file and parse it line at + // a time to check to see if we have already recorded this + // occurance + + PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); + if (iSize) { + + char * buffer = (char*)PR_Malloc(iSize); + char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); + if (buffer!=NULL && string!=NULL) { + PRInt32 ibufferpos, istringpos; + + // beginning of file for read + PR_Seek(recordFile,0,PR_SEEK_SET); + PR_Read(recordFile,buffer,iSize); + + // run through the file looking for a matching vector + for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) + { + // compare string once we have hit the end of the line + if (buffer[ibufferpos] == '\r') { + stringbuf[istringpos] = '\0'; + istringpos = 0; + // skip newline and space + ibufferpos++; + + if (PL_strlen(stringbuf)) { + char * space; + // chop of the filename for compare + if ((space = PL_strrchr(stringbuf, ' '))!=NULL) + *space = '\0'; + + // we have already recorded this one, free up, and return + if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { + PR_Free(buffer); + PR_Free(stringbuf); + PR_Free(string); + return PR_TRUE; + } + } + } + + // build up the compare string + else + stringbuf[istringpos++] = buffer[ibufferpos]; + } + + // throw away the record file data + PR_Free(buffer); + PR_Free(stringbuf); + } + } + + // if this bad vector was not recorded, add it to record file + + if (!found) { + PR_Seek(recordFile,0,PR_SEEK_END); + PR_Write(recordFile,string,PL_strlen(string)); + } + + PR_Close(recordFile); + PR_Free(string); + } + + // vector was not recorded + return PR_FALSE; +} + +// structure to store the vector statistic information + +typedef struct vector_info { + PRInt32 references; // number of occurances counted + PRInt32 count; // number of tags in the vector + PRBool good_vector; // is this a valid vector? + PRInt32 * vector; // and the vector +} VectorInfo; + +// global table for storing vector statistics and the size +static VectorInfo ** gVectorInfoArray = 0; +static PRInt32 gVectorCount = 0; + +// the statistic vector table grows each time it exceeds this +// stepping value +#define TABLE_SIZE 128 + +// compare function for quick sort. Compares references and +// sorts in decending order + +static int compare( const void *arg1, const void *arg2 ) +{ + VectorInfo ** p1 = (VectorInfo**)arg1; + VectorInfo ** p2 = (VectorInfo**)arg2; + return (*p2)->references - (*p1)->references; +} + +/** + * quick sort the statistic array causing the most frequently + * used vectors to be at the top (this makes it a little speedier + * when looking them up) + */ + +void SortVectorRecord(void) +{ + // of course, sort it only if there is something to sort + if (gVectorCount) { + qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); + } +} + +/** + * This debug routines stores statistical information about a + * context vector. The context vector statistics are stored in + * a global array. The table is resorted each time it grows to + * aid in lookup speed. If a vector has already been noted, its + * reference count is bumped, otherwise it is added to the table + * + * @update jevering 6/11/98 + * @param aTags is the tag list (vector) + * @param count is the size of the vector + * @return + */ + +void NoteVector(PRInt32 aTags[],PRInt32 count, PRBool good_vector) +{ + // if the table doesn't exist, create it + if (!gVectorInfoArray) { + gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); + } + else { + // attempt to look up the vector + for (PRInt32 i = 0; i < gVectorCount; i++) + + // check the vector only if they are the same size, if they + // match then just return without doing further work + if (gVectorInfoArray[i]->count == count) + if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(PRInt32)*count)) { + + // bzzzt. and we have a winner.. bump the ref count + gVectorInfoArray[i]->references++; + return; + } + } + + // the context vector hasn't been noted, so allocate it and + // initialize it one.. add it to the table + VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); + pVectorInfo->references = 1; + pVectorInfo->count = count; + pVectorInfo->good_vector = good_vector; + pVectorInfo->vector = (PRInt32*)PR_Malloc(count*sizeof(PRInt32)); + memcpy(pVectorInfo->vector,aTags,sizeof(PRInt32)*count); + gVectorInfoArray[gVectorCount++] = pVectorInfo; + + // have we maxed out the table? grow it.. sort it.. love it. + if ((gVectorCount % TABLE_SIZE) == 0) { + gVectorInfoArray = (VectorInfo**)realloc( + gVectorInfoArray, + (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); + SortVectorRecord(); + } +} + +void MakeVectorString(char * vector_string, VectorInfo * pInfo) +{ + sprintf (vector_string, "%6d ", pInfo->references); + for (PRInt32 j = 0; j < pInfo->count; j++) { + PL_strcat(vector_string, "<"); + PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); + PL_strcat(vector_string, ">"); + } + PL_strcat(vector_string,"\r\n"); +} + +/** + * This debug routine dumps out the vector statistics to a text + * file in the verification directory and defaults to the name + * "vector.stat". It contains all parsed context vectors and there + * occurance count sorted in decending order. + * + * @update jevering 6/11/98 + * @param + * @return + */ + +extern "C" NS_EXPORT void DumpVectorRecord(void) +{ + // do we have a table? + if (gVectorCount) { + + // hopefully, they wont exceed 1K. + char vector_string[1024]; + char path[1024]; + + path[0] = '\0'; + + // put in the verification directory.. else the root + if (gVerificationOutputDir) + strcpy(path,gVerificationOutputDir); + + strcat(path,CONTEXT_VECTOR_STAT); + + // open the stat file creaming any existing stat file + PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); + if (statisticFile) { + + PRInt32 i; + PRofstream ps; + ps.attach(statisticFile); + + // oh what the heck, sort it again + SortVectorRecord(); + + // cute little header + sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); + ps << vector_string; + + ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; + ps << VECTOR_TABLE_HEADER; + + // dump out the bad vectors encountered + for (i = 0; i < gVectorCount; i++) { + if (!gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + } + + ps << "\r\n\r\nValid context vector summary\r\n"; + ps << VECTOR_TABLE_HEADER; + + // take a big vector table dump (good vectors) + for (i = 0; i < gVectorCount; i++) { + if (gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + // free em up. they mean nothing to me now (I'm such a user) + PR_Free(gVectorInfoArray[i]); + } + } + + // ok, we are done with the table, free it up as well + PR_Free(gVectorInfoArray); + gVectorInfoArray = 0; + gVectorCount = 0; + PR_Close(statisticFile); + } +} /** * This debug method allows us to determine whether or not @@ -378,47 +682,72 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { * @param aDTD is the DTD we plan to ask for verification * @return TRUE if we know how to handle it, else false */ -PRBool VerifyContextVector(PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { + +PRBool VerifyContextVector(CTokenizer * tokenizer, PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { PRBool result=PR_TRUE; + //ok, now see if we understand this vector + + if(0!=gVerificationOutputDir || gRecordingStatistics) + result=aDTD->VerifyContextVector(aTags,count); + + if (gRecordingStatistics) { + NoteVector(aTags,count,result); + } + if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; + char path[2048]; strcpy(path,gVerificationOutputDir); -#endif int i=0; for(i=0;iVerifyContextVector(aTags,count); - if(PR_FALSE==result){ -#ifdef NS_WIN32 - // save file to directory indicated by bad context vector - int iCount = 1; + static PRBool rnd_initialized = PR_FALSE; + + if (!rnd_initialized) { + // seed randomn number generator to aid in temp file + // creation. + rnd_initialized = PR_TRUE; + srand((unsigned)time(NULL)); + } + + // generate a filename to dump the html source into char filename[_MAX_PATH]; do { - sprintf(filename,"%s/html%04d.dbg", path, iCount++); + // use system time to generate a temporary file name + time_t ltime; + time (<ime); + // add in random number so that we can create uniques names + // faster than simply every second. + ltime += (time_t)rand(); + sprintf(filename,"%s/%lX.html", path, ltime); + // try until we find one we can create } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS); - PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); - if (debugFile) { - PR_Write(debugFile,gURLRef,PL_strlen(gURLRef)); - PR_Write(debugFile,"\n",PL_strlen("\n")); - PR_Close(debugFile); + + // check to see if we already recorded an instance of this particular + // bad vector. + if (!DebugRecord(path,gURLRef, filename)) + { + // save file to directory indicated by bad context vector + PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); + // if we were able to open the debug file, then + // write the true URL at the top of the file. + if (debugFile) { + // dump the html source into the newly created file. + if (tokenizer) { + PRofstream ps; + ps.attach(debugFile); + tokenizer->DebugDumpSource(ps); + } + PR_Close(debugFile); + } } -#endif - //add debugging code here to record the fact that we just encountered - //a context vector we don't know how to handle. } } @@ -454,7 +783,7 @@ PRInt32 nsHTMLParser::IterateTokens() { if(aHandler) { theMarkPos=*mCurrentPos; result=(*aHandler)(theToken,this); - VerifyContextVector(mContextStack,mContextStackPos,mDTD); + VerifyContextVector(mTokenizer, mContextStack,mContextStackPos,mDTD); } ++(*mCurrentPos); } diff --git a/htmlparser/src/nsTokenizer.cpp b/htmlparser/src/nsTokenizer.cpp index 62c6967e78a2..8d37467c794c 100644 --- a/htmlparser/src/nsTokenizer.cpp +++ b/htmlparser/src/nsTokenizer.cpp @@ -23,6 +23,13 @@ #include "nsScanner.h" #include "nsIURL.h" +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -33,7 +40,7 @@ * @return */ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aURL,aMode); mParseMode=aMode; @@ -48,7 +55,7 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo * @return */ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aFilename,aMode); mParseMode=aMode; @@ -63,7 +70,7 @@ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,ePars * @return */ CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aMode); mParseMode=aMode; diff --git a/htmlparser/src/prstrm.cpp b/htmlparser/src/prstrm.cpp new file mode 100644 index 000000000000..0b09b4f938ff --- /dev/null +++ b/htmlparser/src/prstrm.cpp @@ -0,0 +1,343 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ +#include "prtypes.h" +#include "prstrm.h" +#include + +const PRIntn STRM_BUFSIZ = 8192; + +PRfilebuf::PRfilebuf(): +_fd(0), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd): +streambuf(), +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen): +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ + PRfilebuf::setbuf(buffptr, bufflen); +} + +PRfilebuf::~PRfilebuf() +{ + if (_opened){ + close(); + }else + sync(); + if (_allocated) + delete base(); +} + +PRfilebuf* +PRfilebuf::open(const char *name, int mode, int flags) +{ + if (_fd != 0) + return 0; // error if already open + PRIntn PRmode = 0; + // translate mode argument + if (!(mode & ios::nocreate)) + PRmode |= PR_CREATE_FILE; + //if (mode & ios::noreplace) + // PRmode |= O_EXCL; + if (mode & ios::app){ + mode |= ios::out; + PRmode |= PR_APPEND; + } + if (mode & ios::trunc){ + mode |= ios::out; // IMPLIED + PRmode |= PR_TRUNCATE; + } + if (mode & ios::out){ + if (mode & ios::in) + PRmode |= PR_RDWR; + else + PRmode |= PR_WRONLY; + if (!(mode & (ios::in|ios::app|ios::ate|ios::noreplace))){ + mode |= ios::trunc; // IMPLIED + PRmode |= PR_TRUNCATE; + } + }else if (mode & ios::in) + PRmode |= PR_RDONLY; + else + return 0; // error if not ios:in or ios::out + + + // + // The usual portable across unix crap... + // NT gets a hokey piece of junk layer that prevents + // access to the API. +#ifdef WIN32 + _fd = PR_Open(name, PRmode, PRmode); +#else + _fd = PR_Open(name, PRmode, flags); +#endif + if (_fd == 0) + return 0; + _opened = PR_TRUE; + if ((!unbuffered()) && (!ebuf())){ + char * sbuf = new char[STRM_BUFSIZ]; + if (!sbuf) + unbuffered(1); + else{ + _allocated = PR_TRUE; + streambuf::setb(sbuf,sbuf+STRM_BUFSIZ,0); + } + } + if (mode & ios::ate){ + if (seekoff(0,ios::end,mode)==EOF){ + close(); + return 0; + } + } + return this; +} + +PRfilebuf* +PRfilebuf::attach(PRFileDesc *fd) +{ + _opened = PR_FALSE; + _fd = fd; + return this; +} + +int +PRfilebuf::overflow(int c) +{ + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) // sync before new buffer created below + return EOF; + + if (!unbuffered()) + setp(base(),ebuf()); + + if (c!=EOF){ + if ((!unbuffered()) && (pptr() < epptr())) // guard against recursion + sputc(c); + else{ + if (PR_Write(_fd, &c, 1)!=1) + return(EOF); + } + } + return(1); // return something other than EOF if successful +} + +int +PRfilebuf::underflow() +{ + int count; + unsigned char tbuf; + + if (in_avail()) + return (int)(unsigned char) *gptr(); + + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) + return EOF; + + if (unbuffered()) + { + if (PR_Read(_fd,(void *)&tbuf,1)<=0) + return EOF; + return (int)tbuf; + } + + if ((count=PR_Read(_fd,(void *)base(),blen())) <= 0) + return EOF; // reached EOF + setg(base(),base(),base()+count); + return (int)(unsigned char) *gptr(); +} + +streambuf* +PRfilebuf::setbuf(char *buffptr, int bufflen) +{ + if (is_open() && (ebuf())) + return 0; + if ((!buffptr) || (bufflen <= 0)) + unbuffered(1); + else + setb(buffptr, buffptr+bufflen, 0); + return this; +} + +streampos +PRfilebuf::seekoff(streamoff offset, ios::seek_dir dir, int /* mode */) +{ + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + PRSeekWhence fdir; + PRInt32 retpos; + switch (dir) { + case ios::beg : + fdir = PR_SEEK_SET; + break; + case ios::cur : + fdir = PR_SEEK_CUR; + break; + case ios::end : + fdir = PR_SEEK_END; + break; + default: + // error + return(EOF); + } + + if (PRfilebuf::sync()==EOF) + return EOF; + if ((retpos=PR_Seek(_fd, offset, fdir))==-1L) + return (EOF); + return((streampos)retpos); + }else + return (EOF); +} + + +int +PRfilebuf::sync() +{ + PRInt32 count; + + if (_fd==0) + return(EOF); + + if (!unbuffered()){ + // Sync write area + if ((count=out_waiting())!=0){ + PRInt32 nout; + if ((nout =PR_Write(_fd, + (void *) pbase(), + (unsigned int)count)) != count){ + if (nout > 0) { + // should set _pptr -= nout + pbump(-(int)nout); + memmove(pbase(), pbase()+nout, (int)(count-nout)); + } + return(EOF); + } + } + setp(0,0); // empty put area + + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + // Sockets can't seek; don't need this + if ((count=in_avail()) > 0){ + if (PR_Seek(_fd, -count, PR_SEEK_CUR)!=-1L) + { + return (EOF); + } + } + } + setg(0,0,0); // empty get area + } + return(0); +} + +PRfilebuf * +PRfilebuf::close() +{ + int retval; + if (_fd==0) + return 0; + + retval = sync(); + + if ((PR_Close(_fd)==0) || (retval==EOF)) + return 0; + _fd = 0; + return this; +} + +PRofstream::PRofstream(): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd): +ostream(new PRfilebuf(fd)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd, char *buff, int bufflen): +ostream(new PRfilebuf(fd, buff, bufflen)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(const char *name, int mode, int flags): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); + if (!rdbuf()->open(name, (mode|ios::out), flags)) + clear(rdstate() | ios::failbit); +} + +PRofstream::~PRofstream() +{ + flush(); + + delete rdbuf(); +#ifdef _PRSTR_BP + _PRSTR_BP = 0; +#endif +} + +streambuf * +PRofstream::setbuf(char * ptr, int len) +{ + if ((is_open()) || (!(rdbuf()->setbuf(ptr, len)))){ + clear(rdstate() | ios::failbit); + return 0; + } + return rdbuf(); +} + +void +PRofstream::attach(PRFileDesc *fd) +{ + if (!(rdbuf()->attach(fd))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::open(const char * name, int mode, int flags) +{ + if (is_open() || !(rdbuf()->open(name, (mode|ios::out), flags))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::close() +{ + clear((rdbuf()->close()) ? 0 : (rdstate() | ios::failbit)); +} + + + diff --git a/htmlparser/src/prstrm.h b/htmlparser/src/prstrm.h new file mode 100644 index 000000000000..d19957889ea7 --- /dev/null +++ b/htmlparser/src/prstrm.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ + +// The originals are in: nsprpub/lib/pstreams/ +// currently not being built into nspr.. these files will go away. + +#ifndef __PRSTRM +#define __PRSTRM + +#include "prtypes.h" +#include "prio.h" +#include + +#if defined(__GNUC__) +#define _PRSTR_BP _strbuf +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#elif defined(WIN32) +#define _PRSTR_BP bp +#define _PRSTR_DELBUF(x) delbuf(x) +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#elif defined(OSF1) +#define _PRSTR_BP m_psb +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#else +#define _PRSTR_BP bp +// Unix compilers don't believe in encapsulation +// At least on Solaris this is also ignored +#define _PRSTR_DELBUF(x) delbuf = x +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#endif + +class PR_IMPLEMENT(PRfilebuf): public streambuf +{ +public: + PRfilebuf(); + PRfilebuf(PRFileDesc *fd); + PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen); + ~PRfilebuf(); + virtual int overflow(int=EOF); + virtual int underflow(); + virtual streambuf *setbuf(char *buff, int bufflen); + virtual streampos seekoff(streamoff, ios::seek_dir, int); + virtual int sync(); + PRfilebuf *open(const char *name, int mode, int flags); + PRfilebuf *attach(PRFileDesc *fd); + PRfilebuf *close(); + int is_open() const {return (_fd != 0);} + PRFileDesc *fd(){return _fd;} + +private: + PRFileDesc * _fd; + PRBool _opened; + PRBool _allocated; +}; + + +class PR_IMPLEMENT(PRofstream) : public ostream { +public: + PRofstream(); + PRofstream(const char *, int mode=ios::out, int flags = 0); + PRofstream(PRFileDesc *); + PRofstream(PRFileDesc *, char *, int); + ~PRofstream(); + + streambuf * setbuf(char *, int); + PRfilebuf* rdbuf() { return (PRfilebuf*) ios::rdbuf(); } + + void attach(PRFileDesc *); + PRFileDesc *fd() {return rdbuf()->fd();} + + int is_open(){return rdbuf()->is_open();} + void open(const char *, int =ios::out, int = 0); + void close(); +}; + +#endif /* __PRSTRM */ \ No newline at end of file diff --git a/parser/htmlparser/src/CNavDTD.cpp b/parser/htmlparser/src/CNavDTD.cpp index ecfe599d4bba..e3301a0e2596 100644 --- a/parser/htmlparser/src/CNavDTD.cpp +++ b/parser/htmlparser/src/CNavDTD.cpp @@ -842,8 +842,12 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{ PRBool CNavDTD::VerifyContextVector(PRInt32* aVector,PRInt32 aCount) const { PRBool result=PR_TRUE; - if(aCount>0) { - + if(aCount>1) { + for (int i = 0; i < aCount-1; i++) + if (!CanContain(aVector[i],aVector[i+1])) { + result = PR_FALSE; + break; + } } return result; } diff --git a/parser/htmlparser/src/CNavDelegate.cpp b/parser/htmlparser/src/CNavDelegate.cpp index 0eb2be51551a..44044201c985 100644 --- a/parser/htmlparser/src/CNavDelegate.cpp +++ b/parser/htmlparser/src/CNavDelegate.cpp @@ -31,6 +31,13 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -40,7 +47,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ CNavDelegate::CNavDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/parser/htmlparser/src/COtherDelegate.cpp b/parser/htmlparser/src/COtherDelegate.cpp index 1728b49dee39..321c2ee1c0b8 100644 --- a/parser/htmlparser/src/COtherDelegate.cpp +++ b/parser/htmlparser/src/COtherDelegate.cpp @@ -32,6 +32,14 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} + /** * Default constructor * @@ -40,7 +48,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ COtherDelegate::COtherDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/parser/htmlparser/src/makefile.win b/parser/htmlparser/src/makefile.win index 348e3a4f1caa..58213d2e54c1 100644 --- a/parser/htmlparser/src/makefile.win +++ b/parser/htmlparser/src/makefile.win @@ -28,7 +28,7 @@ CPPSRCS=nsHTMLContentSink.cpp \ nsToken.cpp nsTokenizer.cpp nsTokenHandler.cpp \ CNavDTD.cpp CNavDelegate.cpp \ COtherDTD.cpp COtherDelegate.cpp \ - nsHTMLParser.cpp + nsHTMLParser.cpp prstrm.cpp EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \ nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h @@ -39,7 +39,8 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\nsHTMLParser.obj \ .\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \ .\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \ - .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj + .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj \ + .\$(OBJDIR)\prstrm.obj LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib diff --git a/parser/htmlparser/src/nsHTMLParser.cpp b/parser/htmlparser/src/nsHTMLParser.cpp index 59eaf1d06f68..ba375429ef95 100644 --- a/parser/htmlparser/src/nsHTMLParser.cpp +++ b/parser/htmlparser/src/nsHTMLParser.cpp @@ -33,10 +33,13 @@ #include "prio.h" #include "plstr.h" #include +#include "prstrm.h" #include "nsIInputStream.h" #ifdef XP_PC #include //this is here for debug reasons... #endif +#include +#include "prmem.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID); @@ -49,10 +52,20 @@ static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; static char* gVerificationOutputDir=0; +static PRBool gRecordingStatistics=PR_TRUE; static char* gURLRef=0; static int rickGDebug=0; static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream +extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir) +{ + gVerificationOutputDir = verify_dir; +} + +extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval) +{ + gRecordingStatistics = bval; +} /** * This method is defined in nsIParser. It is used to @@ -73,7 +86,6 @@ NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult) return it->QueryInterface(kIParserIID, (void **) aInstancePtrResult); } - /** * This big dispatch method is used to route token handler calls to the right place. * What's wrong with it? This table, and the dispatch methods themselves need to be @@ -367,6 +379,298 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { return old; } +/** + * This debug method records an invalid context vector and it's + * associated context vector and URL in a simple flat file mapping which + * resides in the verification directory and is named context.map + * + * @update jevering 6/06/98 + * @param path is the directory structure indicating the bad context vector + * @param pURLRef is the associated URL + * @param filename to record mapping to if not already recorded + * @return TRUE if it is already record (dont rerecord) + */ + +#define CONTEXT_VECTOR_MAP "/vector.map" +#define CONTEXT_VECTOR_STAT "/vector.stat" +#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" +PRBool DebugRecord(char * path, char * pURLRef, char * filename) +{ + char recordPath[2048]; + PRIntn oflags = 0; + + // create the record file name from the verification director + // and the default name. + strcpy(recordPath,gVerificationOutputDir); + strcat(recordPath,CONTEXT_VECTOR_MAP); + + // create the file exists, only open for read/write + // otherwise, create it + if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) + oflags = PR_CREATE_FILE; + oflags |= PR_RDWR; + + // open the record file + PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); + + if (recordFile) { + + char * string = (char *)PR_Malloc(2048); + PRBool found = PR_FALSE; + + // vectors are stored on the format iof "URL vector filename" + // where the vector contains the verification path and + // the filename contains the debug source dump + sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); + + // get the file size, read in the file and parse it line at + // a time to check to see if we have already recorded this + // occurance + + PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); + if (iSize) { + + char * buffer = (char*)PR_Malloc(iSize); + char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); + if (buffer!=NULL && string!=NULL) { + PRInt32 ibufferpos, istringpos; + + // beginning of file for read + PR_Seek(recordFile,0,PR_SEEK_SET); + PR_Read(recordFile,buffer,iSize); + + // run through the file looking for a matching vector + for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) + { + // compare string once we have hit the end of the line + if (buffer[ibufferpos] == '\r') { + stringbuf[istringpos] = '\0'; + istringpos = 0; + // skip newline and space + ibufferpos++; + + if (PL_strlen(stringbuf)) { + char * space; + // chop of the filename for compare + if ((space = PL_strrchr(stringbuf, ' '))!=NULL) + *space = '\0'; + + // we have already recorded this one, free up, and return + if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { + PR_Free(buffer); + PR_Free(stringbuf); + PR_Free(string); + return PR_TRUE; + } + } + } + + // build up the compare string + else + stringbuf[istringpos++] = buffer[ibufferpos]; + } + + // throw away the record file data + PR_Free(buffer); + PR_Free(stringbuf); + } + } + + // if this bad vector was not recorded, add it to record file + + if (!found) { + PR_Seek(recordFile,0,PR_SEEK_END); + PR_Write(recordFile,string,PL_strlen(string)); + } + + PR_Close(recordFile); + PR_Free(string); + } + + // vector was not recorded + return PR_FALSE; +} + +// structure to store the vector statistic information + +typedef struct vector_info { + PRInt32 references; // number of occurances counted + PRInt32 count; // number of tags in the vector + PRBool good_vector; // is this a valid vector? + PRInt32 * vector; // and the vector +} VectorInfo; + +// global table for storing vector statistics and the size +static VectorInfo ** gVectorInfoArray = 0; +static PRInt32 gVectorCount = 0; + +// the statistic vector table grows each time it exceeds this +// stepping value +#define TABLE_SIZE 128 + +// compare function for quick sort. Compares references and +// sorts in decending order + +static int compare( const void *arg1, const void *arg2 ) +{ + VectorInfo ** p1 = (VectorInfo**)arg1; + VectorInfo ** p2 = (VectorInfo**)arg2; + return (*p2)->references - (*p1)->references; +} + +/** + * quick sort the statistic array causing the most frequently + * used vectors to be at the top (this makes it a little speedier + * when looking them up) + */ + +void SortVectorRecord(void) +{ + // of course, sort it only if there is something to sort + if (gVectorCount) { + qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); + } +} + +/** + * This debug routines stores statistical information about a + * context vector. The context vector statistics are stored in + * a global array. The table is resorted each time it grows to + * aid in lookup speed. If a vector has already been noted, its + * reference count is bumped, otherwise it is added to the table + * + * @update jevering 6/11/98 + * @param aTags is the tag list (vector) + * @param count is the size of the vector + * @return + */ + +void NoteVector(PRInt32 aTags[],PRInt32 count, PRBool good_vector) +{ + // if the table doesn't exist, create it + if (!gVectorInfoArray) { + gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); + } + else { + // attempt to look up the vector + for (PRInt32 i = 0; i < gVectorCount; i++) + + // check the vector only if they are the same size, if they + // match then just return without doing further work + if (gVectorInfoArray[i]->count == count) + if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(PRInt32)*count)) { + + // bzzzt. and we have a winner.. bump the ref count + gVectorInfoArray[i]->references++; + return; + } + } + + // the context vector hasn't been noted, so allocate it and + // initialize it one.. add it to the table + VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); + pVectorInfo->references = 1; + pVectorInfo->count = count; + pVectorInfo->good_vector = good_vector; + pVectorInfo->vector = (PRInt32*)PR_Malloc(count*sizeof(PRInt32)); + memcpy(pVectorInfo->vector,aTags,sizeof(PRInt32)*count); + gVectorInfoArray[gVectorCount++] = pVectorInfo; + + // have we maxed out the table? grow it.. sort it.. love it. + if ((gVectorCount % TABLE_SIZE) == 0) { + gVectorInfoArray = (VectorInfo**)realloc( + gVectorInfoArray, + (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); + SortVectorRecord(); + } +} + +void MakeVectorString(char * vector_string, VectorInfo * pInfo) +{ + sprintf (vector_string, "%6d ", pInfo->references); + for (PRInt32 j = 0; j < pInfo->count; j++) { + PL_strcat(vector_string, "<"); + PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); + PL_strcat(vector_string, ">"); + } + PL_strcat(vector_string,"\r\n"); +} + +/** + * This debug routine dumps out the vector statistics to a text + * file in the verification directory and defaults to the name + * "vector.stat". It contains all parsed context vectors and there + * occurance count sorted in decending order. + * + * @update jevering 6/11/98 + * @param + * @return + */ + +extern "C" NS_EXPORT void DumpVectorRecord(void) +{ + // do we have a table? + if (gVectorCount) { + + // hopefully, they wont exceed 1K. + char vector_string[1024]; + char path[1024]; + + path[0] = '\0'; + + // put in the verification directory.. else the root + if (gVerificationOutputDir) + strcpy(path,gVerificationOutputDir); + + strcat(path,CONTEXT_VECTOR_STAT); + + // open the stat file creaming any existing stat file + PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); + if (statisticFile) { + + PRInt32 i; + PRofstream ps; + ps.attach(statisticFile); + + // oh what the heck, sort it again + SortVectorRecord(); + + // cute little header + sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); + ps << vector_string; + + ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; + ps << VECTOR_TABLE_HEADER; + + // dump out the bad vectors encountered + for (i = 0; i < gVectorCount; i++) { + if (!gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + } + + ps << "\r\n\r\nValid context vector summary\r\n"; + ps << VECTOR_TABLE_HEADER; + + // take a big vector table dump (good vectors) + for (i = 0; i < gVectorCount; i++) { + if (gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + // free em up. they mean nothing to me now (I'm such a user) + PR_Free(gVectorInfoArray[i]); + } + } + + // ok, we are done with the table, free it up as well + PR_Free(gVectorInfoArray); + gVectorInfoArray = 0; + gVectorCount = 0; + PR_Close(statisticFile); + } +} /** * This debug method allows us to determine whether or not @@ -378,47 +682,72 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { * @param aDTD is the DTD we plan to ask for verification * @return TRUE if we know how to handle it, else false */ -PRBool VerifyContextVector(PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { + +PRBool VerifyContextVector(CTokenizer * tokenizer, PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { PRBool result=PR_TRUE; + //ok, now see if we understand this vector + + if(0!=gVerificationOutputDir || gRecordingStatistics) + result=aDTD->VerifyContextVector(aTags,count); + + if (gRecordingStatistics) { + NoteVector(aTags,count,result); + } + if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; + char path[2048]; strcpy(path,gVerificationOutputDir); -#endif int i=0; for(i=0;iVerifyContextVector(aTags,count); - if(PR_FALSE==result){ -#ifdef NS_WIN32 - // save file to directory indicated by bad context vector - int iCount = 1; + static PRBool rnd_initialized = PR_FALSE; + + if (!rnd_initialized) { + // seed randomn number generator to aid in temp file + // creation. + rnd_initialized = PR_TRUE; + srand((unsigned)time(NULL)); + } + + // generate a filename to dump the html source into char filename[_MAX_PATH]; do { - sprintf(filename,"%s/html%04d.dbg", path, iCount++); + // use system time to generate a temporary file name + time_t ltime; + time (<ime); + // add in random number so that we can create uniques names + // faster than simply every second. + ltime += (time_t)rand(); + sprintf(filename,"%s/%lX.html", path, ltime); + // try until we find one we can create } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS); - PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); - if (debugFile) { - PR_Write(debugFile,gURLRef,PL_strlen(gURLRef)); - PR_Write(debugFile,"\n",PL_strlen("\n")); - PR_Close(debugFile); + + // check to see if we already recorded an instance of this particular + // bad vector. + if (!DebugRecord(path,gURLRef, filename)) + { + // save file to directory indicated by bad context vector + PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); + // if we were able to open the debug file, then + // write the true URL at the top of the file. + if (debugFile) { + // dump the html source into the newly created file. + if (tokenizer) { + PRofstream ps; + ps.attach(debugFile); + tokenizer->DebugDumpSource(ps); + } + PR_Close(debugFile); + } } -#endif - //add debugging code here to record the fact that we just encountered - //a context vector we don't know how to handle. } } @@ -454,7 +783,7 @@ PRInt32 nsHTMLParser::IterateTokens() { if(aHandler) { theMarkPos=*mCurrentPos; result=(*aHandler)(theToken,this); - VerifyContextVector(mContextStack,mContextStackPos,mDTD); + VerifyContextVector(mTokenizer, mContextStack,mContextStackPos,mDTD); } ++(*mCurrentPos); } diff --git a/parser/htmlparser/src/nsTokenizer.cpp b/parser/htmlparser/src/nsTokenizer.cpp index 62c6967e78a2..8d37467c794c 100644 --- a/parser/htmlparser/src/nsTokenizer.cpp +++ b/parser/htmlparser/src/nsTokenizer.cpp @@ -23,6 +23,13 @@ #include "nsScanner.h" #include "nsIURL.h" +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -33,7 +40,7 @@ * @return */ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aURL,aMode); mParseMode=aMode; @@ -48,7 +55,7 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo * @return */ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aFilename,aMode); mParseMode=aMode; @@ -63,7 +70,7 @@ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,ePars * @return */ CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aMode); mParseMode=aMode; diff --git a/parser/htmlparser/src/prstrm.cpp b/parser/htmlparser/src/prstrm.cpp new file mode 100644 index 000000000000..0b09b4f938ff --- /dev/null +++ b/parser/htmlparser/src/prstrm.cpp @@ -0,0 +1,343 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ +#include "prtypes.h" +#include "prstrm.h" +#include + +const PRIntn STRM_BUFSIZ = 8192; + +PRfilebuf::PRfilebuf(): +_fd(0), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd): +streambuf(), +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen): +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ + PRfilebuf::setbuf(buffptr, bufflen); +} + +PRfilebuf::~PRfilebuf() +{ + if (_opened){ + close(); + }else + sync(); + if (_allocated) + delete base(); +} + +PRfilebuf* +PRfilebuf::open(const char *name, int mode, int flags) +{ + if (_fd != 0) + return 0; // error if already open + PRIntn PRmode = 0; + // translate mode argument + if (!(mode & ios::nocreate)) + PRmode |= PR_CREATE_FILE; + //if (mode & ios::noreplace) + // PRmode |= O_EXCL; + if (mode & ios::app){ + mode |= ios::out; + PRmode |= PR_APPEND; + } + if (mode & ios::trunc){ + mode |= ios::out; // IMPLIED + PRmode |= PR_TRUNCATE; + } + if (mode & ios::out){ + if (mode & ios::in) + PRmode |= PR_RDWR; + else + PRmode |= PR_WRONLY; + if (!(mode & (ios::in|ios::app|ios::ate|ios::noreplace))){ + mode |= ios::trunc; // IMPLIED + PRmode |= PR_TRUNCATE; + } + }else if (mode & ios::in) + PRmode |= PR_RDONLY; + else + return 0; // error if not ios:in or ios::out + + + // + // The usual portable across unix crap... + // NT gets a hokey piece of junk layer that prevents + // access to the API. +#ifdef WIN32 + _fd = PR_Open(name, PRmode, PRmode); +#else + _fd = PR_Open(name, PRmode, flags); +#endif + if (_fd == 0) + return 0; + _opened = PR_TRUE; + if ((!unbuffered()) && (!ebuf())){ + char * sbuf = new char[STRM_BUFSIZ]; + if (!sbuf) + unbuffered(1); + else{ + _allocated = PR_TRUE; + streambuf::setb(sbuf,sbuf+STRM_BUFSIZ,0); + } + } + if (mode & ios::ate){ + if (seekoff(0,ios::end,mode)==EOF){ + close(); + return 0; + } + } + return this; +} + +PRfilebuf* +PRfilebuf::attach(PRFileDesc *fd) +{ + _opened = PR_FALSE; + _fd = fd; + return this; +} + +int +PRfilebuf::overflow(int c) +{ + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) // sync before new buffer created below + return EOF; + + if (!unbuffered()) + setp(base(),ebuf()); + + if (c!=EOF){ + if ((!unbuffered()) && (pptr() < epptr())) // guard against recursion + sputc(c); + else{ + if (PR_Write(_fd, &c, 1)!=1) + return(EOF); + } + } + return(1); // return something other than EOF if successful +} + +int +PRfilebuf::underflow() +{ + int count; + unsigned char tbuf; + + if (in_avail()) + return (int)(unsigned char) *gptr(); + + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) + return EOF; + + if (unbuffered()) + { + if (PR_Read(_fd,(void *)&tbuf,1)<=0) + return EOF; + return (int)tbuf; + } + + if ((count=PR_Read(_fd,(void *)base(),blen())) <= 0) + return EOF; // reached EOF + setg(base(),base(),base()+count); + return (int)(unsigned char) *gptr(); +} + +streambuf* +PRfilebuf::setbuf(char *buffptr, int bufflen) +{ + if (is_open() && (ebuf())) + return 0; + if ((!buffptr) || (bufflen <= 0)) + unbuffered(1); + else + setb(buffptr, buffptr+bufflen, 0); + return this; +} + +streampos +PRfilebuf::seekoff(streamoff offset, ios::seek_dir dir, int /* mode */) +{ + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + PRSeekWhence fdir; + PRInt32 retpos; + switch (dir) { + case ios::beg : + fdir = PR_SEEK_SET; + break; + case ios::cur : + fdir = PR_SEEK_CUR; + break; + case ios::end : + fdir = PR_SEEK_END; + break; + default: + // error + return(EOF); + } + + if (PRfilebuf::sync()==EOF) + return EOF; + if ((retpos=PR_Seek(_fd, offset, fdir))==-1L) + return (EOF); + return((streampos)retpos); + }else + return (EOF); +} + + +int +PRfilebuf::sync() +{ + PRInt32 count; + + if (_fd==0) + return(EOF); + + if (!unbuffered()){ + // Sync write area + if ((count=out_waiting())!=0){ + PRInt32 nout; + if ((nout =PR_Write(_fd, + (void *) pbase(), + (unsigned int)count)) != count){ + if (nout > 0) { + // should set _pptr -= nout + pbump(-(int)nout); + memmove(pbase(), pbase()+nout, (int)(count-nout)); + } + return(EOF); + } + } + setp(0,0); // empty put area + + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + // Sockets can't seek; don't need this + if ((count=in_avail()) > 0){ + if (PR_Seek(_fd, -count, PR_SEEK_CUR)!=-1L) + { + return (EOF); + } + } + } + setg(0,0,0); // empty get area + } + return(0); +} + +PRfilebuf * +PRfilebuf::close() +{ + int retval; + if (_fd==0) + return 0; + + retval = sync(); + + if ((PR_Close(_fd)==0) || (retval==EOF)) + return 0; + _fd = 0; + return this; +} + +PRofstream::PRofstream(): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd): +ostream(new PRfilebuf(fd)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd, char *buff, int bufflen): +ostream(new PRfilebuf(fd, buff, bufflen)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(const char *name, int mode, int flags): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); + if (!rdbuf()->open(name, (mode|ios::out), flags)) + clear(rdstate() | ios::failbit); +} + +PRofstream::~PRofstream() +{ + flush(); + + delete rdbuf(); +#ifdef _PRSTR_BP + _PRSTR_BP = 0; +#endif +} + +streambuf * +PRofstream::setbuf(char * ptr, int len) +{ + if ((is_open()) || (!(rdbuf()->setbuf(ptr, len)))){ + clear(rdstate() | ios::failbit); + return 0; + } + return rdbuf(); +} + +void +PRofstream::attach(PRFileDesc *fd) +{ + if (!(rdbuf()->attach(fd))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::open(const char * name, int mode, int flags) +{ + if (is_open() || !(rdbuf()->open(name, (mode|ios::out), flags))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::close() +{ + clear((rdbuf()->close()) ? 0 : (rdstate() | ios::failbit)); +} + + + diff --git a/parser/htmlparser/src/prstrm.h b/parser/htmlparser/src/prstrm.h new file mode 100644 index 000000000000..d19957889ea7 --- /dev/null +++ b/parser/htmlparser/src/prstrm.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ + +// The originals are in: nsprpub/lib/pstreams/ +// currently not being built into nspr.. these files will go away. + +#ifndef __PRSTRM +#define __PRSTRM + +#include "prtypes.h" +#include "prio.h" +#include + +#if defined(__GNUC__) +#define _PRSTR_BP _strbuf +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#elif defined(WIN32) +#define _PRSTR_BP bp +#define _PRSTR_DELBUF(x) delbuf(x) +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#elif defined(OSF1) +#define _PRSTR_BP m_psb +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#else +#define _PRSTR_BP bp +// Unix compilers don't believe in encapsulation +// At least on Solaris this is also ignored +#define _PRSTR_DELBUF(x) delbuf = x +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#endif + +class PR_IMPLEMENT(PRfilebuf): public streambuf +{ +public: + PRfilebuf(); + PRfilebuf(PRFileDesc *fd); + PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen); + ~PRfilebuf(); + virtual int overflow(int=EOF); + virtual int underflow(); + virtual streambuf *setbuf(char *buff, int bufflen); + virtual streampos seekoff(streamoff, ios::seek_dir, int); + virtual int sync(); + PRfilebuf *open(const char *name, int mode, int flags); + PRfilebuf *attach(PRFileDesc *fd); + PRfilebuf *close(); + int is_open() const {return (_fd != 0);} + PRFileDesc *fd(){return _fd;} + +private: + PRFileDesc * _fd; + PRBool _opened; + PRBool _allocated; +}; + + +class PR_IMPLEMENT(PRofstream) : public ostream { +public: + PRofstream(); + PRofstream(const char *, int mode=ios::out, int flags = 0); + PRofstream(PRFileDesc *); + PRofstream(PRFileDesc *, char *, int); + ~PRofstream(); + + streambuf * setbuf(char *, int); + PRfilebuf* rdbuf() { return (PRfilebuf*) ios::rdbuf(); } + + void attach(PRFileDesc *); + PRFileDesc *fd() {return rdbuf()->fd();} + + int is_open(){return rdbuf()->is_open();} + void open(const char *, int =ios::out, int = 0); + void close(); +}; + +#endif /* __PRSTRM */ \ No newline at end of file