зеркало из https://github.com/mozilla/pjs.git
Major whacking of the debug robot code.
The Debug robot has now been moved into it's own class (xp_com object) and is fairly well separated from the rest of the parser. An argument to pass the ParserDebug object into the parsing process has been added to the Parse member. This should clean up Unix a bit as well as provide the ability to multiple simultaneous robots. This also cleaned up the global variable hackage.
This commit is contained in:
Родитель
651c8e3cc1
Коммит
8d8f3997e5
|
@ -31,6 +31,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
|
@ -43,13 +44,10 @@
|
|||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
|
||||
#ifdef XP_PC
|
||||
#include <direct.h> //this is here for debug reasons...
|
||||
#endif
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
|
||||
|
@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
|||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
|
@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
|
|||
* @return
|
||||
*/
|
||||
CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
|
@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
|||
*/
|
||||
CNavDTD::~CNavDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
|
@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){
|
|||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
|
@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
|
|||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
|
@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
|||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
|
@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
|||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
|
@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
|
|||
//otherwise, intentionally fall through...
|
||||
|
||||
case eHTMLTag_tr:
|
||||
if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
|
||||
if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
|
||||
aVector.Append((PRUnichar)eHTMLTag_td);
|
||||
result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
|
||||
// result=PR_TRUE;
|
||||
|
@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
|
|||
return;
|
||||
}
|
||||
|
||||
void CNavDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
|
||||
class nsHTMLParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class CNavDTD : public nsIDTD {
|
||||
|
||||
|
@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
|
|||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
|
@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
|
|||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
|
|||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -699,7 +695,8 @@ protected:
|
|||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "COtherDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
|
@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
|||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
|
@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
|
|||
* @return
|
||||
*/
|
||||
COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
|
@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
|||
*/
|
||||
COtherDTD::~COtherDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
|
@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){
|
|||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
|
@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
|
|||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
|
@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
|||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
|
@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
|||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
|
@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
|
|||
return;
|
||||
}
|
||||
|
||||
void COtherDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* quick sort the statistic array causing the most frequently
|
||||
* used vectors to be at the top (this makes it a little speedier
|
||||
* when looking them up)
|
||||
*/
|
||||
static void SortVectorRecord(void) {
|
||||
// of course, sort it only if there is something to sort
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
SortVectorRecord();
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord_other(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
SortVectorRecord();
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
#include "nsDeque.h"
|
||||
|
||||
|
||||
|
||||
#define NS_IOtherHTML_DTD_IID \
|
||||
{0x8a5e89c0, 0xd16d, 0x11d1, \
|
||||
{0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}}
|
||||
|
@ -42,6 +41,7 @@
|
|||
|
||||
class nsIParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class COtherDTD : public nsIDTD {
|
||||
|
||||
|
@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
|
|||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
|
@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
|
|||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
|
|||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -701,7 +696,8 @@ protected:
|
|||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
|
|||
CPPSRCS = \
|
||||
nsHTMLContentSink.cpp \
|
||||
nsParserNode.cpp \
|
||||
nsParserDebug.cpp \
|
||||
nsScanner.cpp \
|
||||
nsToken.cpp \
|
||||
nsTokenHandler.cpp \
|
||||
|
@ -41,6 +42,8 @@ EXPORTS = \
|
|||
nsHTMLTokens.h \
|
||||
nsIParserNode.h \
|
||||
nsIParser.h \
|
||||
nsIParserDebug.h \
|
||||
nsIParserFilter.h \
|
||||
nsToken.h \
|
||||
$(NULL)
|
||||
|
||||
|
|
|
@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
|
|||
nsHTMLParser.cpp prstrm.cpp
|
||||
|
||||
EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \
|
||||
nsIParserDebug.h nsIParserFilter.h
|
||||
|
||||
CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
||||
.\$(OBJDIR)\CNavDTD.obj \
|
||||
|
@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
|||
.\$(OBJDIR)\nsHTMLParser.obj \
|
||||
.\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \
|
||||
.\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \
|
||||
.\$(OBJDIR)\prstrm.obj
|
||||
|
||||
LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include "nsIInputStream.h"
|
||||
#include "nsIParserFilter.h"
|
||||
|
||||
/* UNCOMMENT THIS IF STUFF STOPS WORKING...
|
||||
#ifdef XP_PC
|
||||
|
@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
|
|||
static const char* kNullFilename= "Error: Null filename given";
|
||||
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static PRBool gRecordingStatistics=PR_TRUE;
|
||||
static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream
|
||||
static char* gURLRef=0;
|
||||
|
||||
//#define DEBUG_SAVE_SOURCE_DOC 1
|
||||
#ifdef DEBUG_SAVE_SOURCE_DOC
|
||||
|
@ -58,17 +56,6 @@ fstream* gTempStream=0;
|
|||
#endif
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
gVerificationOutputDir = verify_dir;
|
||||
}
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
gRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
|
@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
|
|||
*/
|
||||
nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParserFilter = nsnull;
|
||||
mListener = nsnull;
|
||||
mTransferBuffer=0;
|
||||
mSink=0;
|
||||
|
@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
|||
* @return
|
||||
*/
|
||||
nsHTMLParser::~nsHTMLParser() {
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
NS_IF_RELEASE(mListener);
|
||||
if(mTransferBuffer)
|
||||
delete [] mTransferBuffer;
|
||||
|
@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
|
|||
delete mCurrentPos;
|
||||
mCurrentPos=0;
|
||||
if(mDTD)
|
||||
delete mDTD;
|
||||
NS_RELEASE(mDTD);
|
||||
mDTD=0;
|
||||
if(mScanner)
|
||||
delete mScanner;
|
||||
|
@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
|
||||
{
|
||||
nsIParserFilter* old=mParserFilter;
|
||||
if(old)
|
||||
NS_RELEASE(old);
|
||||
if(aFilter) {
|
||||
mParserFilter=aFilter;
|
||||
NS_ADDREF(aFilter);
|
||||
}
|
||||
return old;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called in order to set the content
|
||||
* sink for this parser to dump nodes to.
|
||||
|
@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
|
|||
mDTD=aDTD;
|
||||
}
|
||||
|
||||
nsIDTD * nsHTMLParser::GetDTD(void) {
|
||||
return mDTD;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
|
@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
|
|||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsIDTD* GetDTD(eParseMode aMode) {
|
||||
nsIDTD* NewDTD(eParseMode aMode) {
|
||||
nsIDTD* aDTD=0;
|
||||
switch(aMode) {
|
||||
case eParseMode_navigator:
|
||||
|
@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
if (aDTD)
|
||||
aDTD->AddRef();
|
||||
return aDTD;
|
||||
}
|
||||
|
||||
|
@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
|||
nsString theBuffer;
|
||||
const int kLocalBufSize=10;
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
if (aFilename)
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mMajorIteration=-1;
|
||||
mMinorIteration=-1;
|
||||
|
||||
|
@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
|||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
|
||||
NS_PRECONDITION(0!=aFilename,kNullFilename);
|
||||
PRInt32 status=kBadFilename;
|
||||
mIncremental=aIncremental;
|
||||
|
||||
if(aFilename) {
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aFilename);
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
|
@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
|||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental) {
|
||||
PRBool aIncremental,
|
||||
nsIParserDebug * aDebug) {
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRInt32 status=kBadURL;
|
||||
|
@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
|||
|
||||
if(aURL) {
|
||||
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (aURL->GetSpec())
|
||||
gURLRef = PL_strdup(aURL->GetSpec());
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aURL->GetSpec());
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
|
@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (mParserFilter)
|
||||
mParserFilter->RawBuffer(mTransferBuffer, &len);
|
||||
|
||||
mScanner->Append(&mTransferBuffer[offset],len);
|
||||
|
||||
} //if
|
||||
|
|
|
@ -1,313 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This class does two primary jobs:
|
||||
* 1) It iterates the tokens provided during the
|
||||
* tokenization process, identifing where elements
|
||||
* begin and end (doing validation and normalization).
|
||||
* 2) It controls and coordinates with an instance of
|
||||
* the IContentSink interface, to coordinate the
|
||||
* the production of the content model.
|
||||
*
|
||||
* The basic operation of this class assumes that an HTML
|
||||
* document is non-normalized. Therefore, we don't process
|
||||
* the document in a normalized way. Don't bother to look
|
||||
* for methods like: doHead() or doBody().
|
||||
*
|
||||
* Instead, in order to be backward compatible, we must
|
||||
* scan the set of tokens and perform this basic set of
|
||||
* operations:
|
||||
* 1) Determine the token type (easy, since the tokens know)
|
||||
* 2) Determine the appropriate section of the HTML document
|
||||
* each token belongs in (HTML,HEAD,BODY,FRAMESET).
|
||||
* 3) Insert content into our document (via the sink) into
|
||||
* the correct section.
|
||||
* 4) In the case of tags that belong in the BODY, we must
|
||||
* ensure that our underlying document state reflects
|
||||
* the appropriate context for our tag.
|
||||
*
|
||||
* For example,if we see a <TR>, we must ensure our
|
||||
* document contains a table into which the row can
|
||||
* be placed. This may result in "implicit containers"
|
||||
* created to ensure a well-formed document.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NS_HTMLPARSER__
|
||||
#define NS_HTMLPARSER__
|
||||
|
||||
#include "nsIParser.h"
|
||||
#include "nsDeque.h"
|
||||
#include "nsParserNode.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "nsIURL.h"
|
||||
#include "nsIStreamListener.h"
|
||||
|
||||
|
||||
#define NS_IHTML_PARSER_IID \
|
||||
{0x2ce606b0, 0xbee6, 0x11d1, \
|
||||
{0xaa, 0xd9, 0x00, 0x80, 0x5f, 0x8a, 0x3e, 0x14}}
|
||||
|
||||
|
||||
class IContentSink;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class CScanner;
|
||||
|
||||
|
||||
class nsHTMLParser : public nsIParser, public nsIStreamListener {
|
||||
|
||||
public:
|
||||
friend class CTokenHandler;
|
||||
|
||||
NS_DECL_ISUPPORTS
|
||||
|
||||
|
||||
/**
|
||||
* default constructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
nsHTMLParser();
|
||||
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
~nsHTMLParser();
|
||||
|
||||
/**
|
||||
* Select given content sink into parser for parser output
|
||||
* @update gess5/11/98
|
||||
* @param aSink is the new sink to be used by parser
|
||||
* @return old sink, or NULL
|
||||
*/
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
|
||||
|
||||
virtual void SetDTD(nsIDTD* aDTD);
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 6/9/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
virtual CScanner* GetScanner(void);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given URL in given mode
|
||||
* @update gess5/11/98
|
||||
* @param aURL is a descriptor for source document
|
||||
* @param aListener is a listener to forward notifications to
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given file in given mode
|
||||
* @update gess5/11/98
|
||||
* @param aFilename is a path for file document
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* @update gess5/11/98
|
||||
* @param anHTMLString contains a string-full of real HTML
|
||||
* @param appendTokens tells us whether we should insert tokens inline, or append them.
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
|
||||
|
||||
/**
|
||||
* This method gets called (automatically) during incremental parsing
|
||||
* @update gess5/11/98
|
||||
* @return TRUE if all went well, otherwise FALSE
|
||||
*/
|
||||
virtual PRInt32 ResumeParse(void);
|
||||
|
||||
/**
|
||||
* Causes the parser to scan foward, collecting nearby (sequential)
|
||||
* attribute tokens into the given node.
|
||||
* @update gess5/11/98
|
||||
* @param node to store attributes
|
||||
* @return number of attributes added to node.
|
||||
*/
|
||||
virtual PRInt32 CollectAttributes(nsCParserNode& aNode,PRInt32 aCount);
|
||||
|
||||
/**
|
||||
* Causes the next skipped-content token (if any) to
|
||||
* be consumed by this node.
|
||||
* @update gess5/11/98
|
||||
* @param node to consume skipped-content
|
||||
* @return number of skipped-content tokens consumed.
|
||||
*/
|
||||
virtual PRInt32 CollectSkippedContent(nsCParserNode& aNode);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpSource(ostream& out);
|
||||
|
||||
|
||||
//*********************************************
|
||||
// These methods are callback methods used by
|
||||
// net lib to let us know about our inputstream.
|
||||
//*********************************************
|
||||
NS_IMETHOD GetBindInfo(void);
|
||||
NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const nsString& aMmsg);
|
||||
NS_IMETHOD OnStartBinding(const char *aContentType);
|
||||
NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
|
||||
NS_IMETHOD OnStopBinding(PRInt32 status, const nsString& aMsg);
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 WillBuildModel(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 DidBuildModel(PRInt32 anErrorCode);
|
||||
|
||||
/**
|
||||
* This method gets called when the tokens have been consumed, and it's time
|
||||
* to build the model via the content sink.
|
||||
* @update gess5/11/98
|
||||
* @return YES if model building went well -- NO otherwise.
|
||||
*/
|
||||
virtual PRInt32 IterateTokens(void);
|
||||
|
||||
private:
|
||||
PRInt32 ParseFileIncrementally(const char* aFilename); //XXX ONLY FOR DEBUG PURPOSES...
|
||||
|
||||
/*******************************************
|
||||
These are the tokenization methods...
|
||||
*******************************************/
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
virtual PRInt32 ConsumeToken(CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Part of the code sandwich, this gets called right before
|
||||
* the tokenization process begins. The main reason for
|
||||
* this call is to allow the delegate to do initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens);
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 Tokenize(void);
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpTokens(ostream& out);
|
||||
|
||||
|
||||
protected:
|
||||
//*********************************************
|
||||
// And now, some data members...
|
||||
//*********************************************
|
||||
|
||||
nsIStreamListener* mListener;
|
||||
nsIContentSink* mSink;
|
||||
|
||||
nsDequeIterator* mCurrentPos;
|
||||
nsDequeIterator* mMarkPos;
|
||||
|
||||
nsIDTD* mDTD;
|
||||
eParseMode mParseMode;
|
||||
PRBool mIncremental;
|
||||
char* mTransferBuffer;
|
||||
|
||||
PRInt32 mMajorIteration;
|
||||
PRInt32 mMinorIteration;
|
||||
|
||||
nsDeque mTokenDeque;
|
||||
CScanner* mScanner;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -37,6 +37,7 @@
|
|||
class nsIParser;
|
||||
class CToken;
|
||||
class nsIContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class nsIDTD : public nsISupports {
|
||||
|
||||
|
@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {
|
|||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
|
||||
virtual void SetURLRef(char * aURLRef) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParent parent tag
|
||||
* @param aChild child tag
|
||||
* @return PR_TRUE if valid container
|
||||
*/
|
||||
virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ class nsString;
|
|||
class CToken;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class nsIParserDebug;
|
||||
|
||||
/**
|
||||
* This class defines the iparser interface. This XPCOM
|
||||
|
@ -60,9 +61,10 @@ class nsIParser : public nsISupports {
|
|||
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE) = 0;
|
||||
PRBool aIncremental=PR_TRUE,
|
||||
nsIParserDebug * aDebug = 0) = 0;
|
||||
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;
|
||||
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
|
||||
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/8/98
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NS_IPARSERDEBUG__
|
||||
#define NS_IPARSERDEBUG__
|
||||
|
||||
#include "nsISupports.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "prtypes.h"
|
||||
|
||||
#define NS_IPARSERDEBUG_IID \
|
||||
{0x7b68c220, 0x0685, 0x11d2, \
|
||||
{0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
|
||||
|
||||
|
||||
class nsIDTD;
|
||||
class nsHTMLParser;
|
||||
|
||||
class nsIParserDebug : public nsISupports {
|
||||
|
||||
public:
|
||||
|
||||
virtual void SetVerificationDirectory(char * verify_dir) = 0;
|
||||
|
||||
virtual void SetRecordStatistics(PRBool bval) = 0;
|
||||
|
||||
virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
|
||||
|
||||
virtual void DumpVectorRecord(void) = 0;
|
||||
|
||||
};
|
||||
|
||||
extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
|
||||
|
||||
#endif /* NS_IPARSERDEBUG__ */
|
|
@ -0,0 +1,51 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 6/17/98
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef IPARSERFILTER
|
||||
#define IPARSERFILTER
|
||||
|
||||
#include "nsISupports.h"
|
||||
|
||||
class CToken;
|
||||
|
||||
#define NS_IPARSERFILTER_IID \
|
||||
{0x14d6ff0, 0x0610, 0x11d2, \
|
||||
{0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
|
||||
|
||||
|
||||
class nsIParserFilter : public nsISupports {
|
||||
public:
|
||||
|
||||
NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
|
||||
|
||||
NS_IMETHOD WillAddToken(CToken & token) = 0;
|
||||
|
||||
NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
|
||||
};
|
||||
|
||||
extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,534 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 06/18/98
|
||||
*
|
||||
* This file contains the parser debugger object which aids in
|
||||
* walking links and reporting statistic information, reporting
|
||||
* bad vectors.
|
||||
*/
|
||||
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsHTMLParser.h"
|
||||
#include "nsIParserDebug.h"
|
||||
#include "nsCRT.h"
|
||||
#include "prenv.h" //this is here for debug reasons...
|
||||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
class CParserDebug : public nsIParserDebug {
|
||||
public:
|
||||
|
||||
CParserDebug(char * aVerifyDir = 0);
|
||||
~CParserDebug();
|
||||
|
||||
NS_DECL_ISUPPORTS
|
||||
|
||||
void SetVerificationDirectory(char * verify_dir);
|
||||
void SetRecordStatistics(PRBool bval);
|
||||
PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
|
||||
void DumpVectorRecord(void);
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
|
||||
private:
|
||||
VectorInfo ** mVectorInfoArray;
|
||||
PRInt32 mVectorCount;
|
||||
char * mVerificationDir;
|
||||
PRBool mRecordingStatistics;
|
||||
|
||||
PRBool DebugRecord(char * path, char * pURLRef, char * filename);
|
||||
void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
|
||||
void MakeVectorString(char * vector_string, VectorInfo * pInfo);
|
||||
};
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
*
|
||||
* @update jevering 3/25/98
|
||||
* @param nsIParser** ptr to newly instantiated parser
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
|
||||
NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
|
||||
{
|
||||
CParserDebug *it = new CParserDebug();
|
||||
|
||||
if (it == 0) {
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
|
||||
}
|
||||
|
||||
CParserDebug::CParserDebug(char * aVerifyDir)
|
||||
{
|
||||
NS_INIT_REFCNT();
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
if (aVerifyDir)
|
||||
mVerificationDir = PL_strdup(aVerifyDir);
|
||||
else {
|
||||
char * pString = PR_GetEnv("VERIFY_PARSER");
|
||||
if (pString)
|
||||
mVerificationDir = PL_strdup(pString);
|
||||
else
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mRecordingStatistics = PR_TRUE;
|
||||
}
|
||||
|
||||
CParserDebug::~CParserDebug()
|
||||
{
|
||||
if (mVerificationDir)
|
||||
PL_strfree(mVerificationDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called as part of our COM-like interfaces.
|
||||
* Its purpose is to create an interface to parser object
|
||||
* of some type.
|
||||
*
|
||||
* @update gess 4/8/98
|
||||
* @param nsIID id of object to discover
|
||||
* @param aInstancePtr ptr to newly discovered interface
|
||||
* @return NS_xxx result code
|
||||
*/
|
||||
nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
||||
{
|
||||
if (NULL == aInstancePtr) {
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
}
|
||||
|
||||
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else {
|
||||
*aInstancePtr=0;
|
||||
return NS_NOINTERFACE;
|
||||
}
|
||||
((nsISupports*) *aInstancePtr)->AddRef();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMPL_ADDREF(CParserDebug)
|
||||
NS_IMPL_RELEASE(CParserDebug)
|
||||
|
||||
void CParserDebug::SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
if (mVerificationDir) {
|
||||
PL_strfree(mVerificationDir);
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mVerificationDir = PL_strdup(verify_dir);
|
||||
}
|
||||
|
||||
void CParserDebug::SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
mRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,mVerificationDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* compare function for quick sort. Compares references and
|
||||
* sorts in decending order
|
||||
*/
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!mVectorInfoArray) {
|
||||
mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < mVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (mVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
mVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
mVectorInfoArray[mVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((mVectorCount % TABLE_SIZE) == 0) {
|
||||
mVectorInfoArray = (VectorInfo**)realloc(
|
||||
mVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (mVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (mVerificationDir)
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (!mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (mVectorInfoArray[i]->vector)
|
||||
PR_Free(mVectorInfoArray[i]->vector);
|
||||
PR_Free(mVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(mVectorInfoArray);
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef)
|
||||
{
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=mVerificationDir || mRecordingStatistics) {
|
||||
|
||||
if(aDTD && aContextStackPos>1) {
|
||||
for (int i = 0; i < aContextStackPos-1; i++)
|
||||
if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
|
||||
result = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mRecordingStatistics) {
|
||||
NoteVector(aContextStack,aContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=mVerificationDir) {
|
||||
char path[2048];
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<aContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(aContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path, aURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
if (aParser)
|
||||
aParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
|
@ -31,6 +31,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
|
@ -43,13 +44,10 @@
|
|||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
|
||||
#ifdef XP_PC
|
||||
#include <direct.h> //this is here for debug reasons...
|
||||
#endif
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
|
||||
|
@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
|||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
|
@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
|
|||
* @return
|
||||
*/
|
||||
CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
|
@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
|||
*/
|
||||
CNavDTD::~CNavDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
|
@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){
|
|||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
|
@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
|
|||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
|
@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
|||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
|
@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
|||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
|
@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
|
|||
//otherwise, intentionally fall through...
|
||||
|
||||
case eHTMLTag_tr:
|
||||
if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
|
||||
if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
|
||||
aVector.Append((PRUnichar)eHTMLTag_td);
|
||||
result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
|
||||
// result=PR_TRUE;
|
||||
|
@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
|
|||
return;
|
||||
}
|
||||
|
||||
void CNavDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
|
||||
class nsHTMLParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class CNavDTD : public nsIDTD {
|
||||
|
||||
|
@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
|
|||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
|
@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
|
|||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
|
|||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -699,7 +695,8 @@ protected:
|
|||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "COtherDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
|
@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
|||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
|
@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
|
|||
* @return
|
||||
*/
|
||||
COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
|
@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
|||
*/
|
||||
COtherDTD::~COtherDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
|
@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){
|
|||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
|
@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
|
|||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
|
@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
|||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
|
@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
|||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
|
@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
|
|||
return;
|
||||
}
|
||||
|
||||
void COtherDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* quick sort the statistic array causing the most frequently
|
||||
* used vectors to be at the top (this makes it a little speedier
|
||||
* when looking them up)
|
||||
*/
|
||||
static void SortVectorRecord(void) {
|
||||
// of course, sort it only if there is something to sort
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
SortVectorRecord();
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord_other(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
SortVectorRecord();
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
#include "nsDeque.h"
|
||||
|
||||
|
||||
|
||||
#define NS_IOtherHTML_DTD_IID \
|
||||
{0x8a5e89c0, 0xd16d, 0x11d1, \
|
||||
{0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}}
|
||||
|
@ -42,6 +41,7 @@
|
|||
|
||||
class nsIParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class COtherDTD : public nsIDTD {
|
||||
|
||||
|
@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
|
|||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
|
@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
|
|||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
|
|||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
|
@ -701,7 +696,8 @@ protected:
|
|||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
|
|||
CPPSRCS = \
|
||||
nsHTMLContentSink.cpp \
|
||||
nsParserNode.cpp \
|
||||
nsParserDebug.cpp \
|
||||
nsScanner.cpp \
|
||||
nsToken.cpp \
|
||||
nsTokenHandler.cpp \
|
||||
|
@ -41,6 +42,8 @@ EXPORTS = \
|
|||
nsHTMLTokens.h \
|
||||
nsIParserNode.h \
|
||||
nsIParser.h \
|
||||
nsIParserDebug.h \
|
||||
nsIParserFilter.h \
|
||||
nsToken.h \
|
||||
$(NULL)
|
||||
|
||||
|
|
|
@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
|
|||
nsHTMLParser.cpp prstrm.cpp
|
||||
|
||||
EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \
|
||||
nsIParserDebug.h nsIParserFilter.h
|
||||
|
||||
CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
||||
.\$(OBJDIR)\CNavDTD.obj \
|
||||
|
@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
|||
.\$(OBJDIR)\nsHTMLParser.obj \
|
||||
.\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \
|
||||
.\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \
|
||||
.\$(OBJDIR)\prstrm.obj
|
||||
|
||||
LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include "nsIInputStream.h"
|
||||
#include "nsIParserFilter.h"
|
||||
|
||||
/* UNCOMMENT THIS IF STUFF STOPS WORKING...
|
||||
#ifdef XP_PC
|
||||
|
@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
|
|||
static const char* kNullFilename= "Error: Null filename given";
|
||||
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static PRBool gRecordingStatistics=PR_TRUE;
|
||||
static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream
|
||||
static char* gURLRef=0;
|
||||
|
||||
//#define DEBUG_SAVE_SOURCE_DOC 1
|
||||
#ifdef DEBUG_SAVE_SOURCE_DOC
|
||||
|
@ -58,17 +56,6 @@ fstream* gTempStream=0;
|
|||
#endif
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
gVerificationOutputDir = verify_dir;
|
||||
}
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
gRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
|
@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
|
|||
*/
|
||||
nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParserFilter = nsnull;
|
||||
mListener = nsnull;
|
||||
mTransferBuffer=0;
|
||||
mSink=0;
|
||||
|
@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
|||
* @return
|
||||
*/
|
||||
nsHTMLParser::~nsHTMLParser() {
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
NS_IF_RELEASE(mListener);
|
||||
if(mTransferBuffer)
|
||||
delete [] mTransferBuffer;
|
||||
|
@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
|
|||
delete mCurrentPos;
|
||||
mCurrentPos=0;
|
||||
if(mDTD)
|
||||
delete mDTD;
|
||||
NS_RELEASE(mDTD);
|
||||
mDTD=0;
|
||||
if(mScanner)
|
||||
delete mScanner;
|
||||
|
@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
|
||||
{
|
||||
nsIParserFilter* old=mParserFilter;
|
||||
if(old)
|
||||
NS_RELEASE(old);
|
||||
if(aFilter) {
|
||||
mParserFilter=aFilter;
|
||||
NS_ADDREF(aFilter);
|
||||
}
|
||||
return old;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called in order to set the content
|
||||
* sink for this parser to dump nodes to.
|
||||
|
@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
|
|||
mDTD=aDTD;
|
||||
}
|
||||
|
||||
nsIDTD * nsHTMLParser::GetDTD(void) {
|
||||
return mDTD;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
|
@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
|
|||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsIDTD* GetDTD(eParseMode aMode) {
|
||||
nsIDTD* NewDTD(eParseMode aMode) {
|
||||
nsIDTD* aDTD=0;
|
||||
switch(aMode) {
|
||||
case eParseMode_navigator:
|
||||
|
@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
if (aDTD)
|
||||
aDTD->AddRef();
|
||||
return aDTD;
|
||||
}
|
||||
|
||||
|
@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
|||
nsString theBuffer;
|
||||
const int kLocalBufSize=10;
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
if (aFilename)
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mMajorIteration=-1;
|
||||
mMinorIteration=-1;
|
||||
|
||||
|
@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
|||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
|
||||
NS_PRECONDITION(0!=aFilename,kNullFilename);
|
||||
PRInt32 status=kBadFilename;
|
||||
mIncremental=aIncremental;
|
||||
|
||||
if(aFilename) {
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aFilename);
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
|
@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
|||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental) {
|
||||
PRBool aIncremental,
|
||||
nsIParserDebug * aDebug) {
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRInt32 status=kBadURL;
|
||||
|
@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
|||
|
||||
if(aURL) {
|
||||
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (aURL->GetSpec())
|
||||
gURLRef = PL_strdup(aURL->GetSpec());
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aURL->GetSpec());
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
|
@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (mParserFilter)
|
||||
mParserFilter->RawBuffer(mTransferBuffer, &len);
|
||||
|
||||
mScanner->Append(&mTransferBuffer[offset],len);
|
||||
|
||||
} //if
|
||||
|
|
|
@ -1,313 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This class does two primary jobs:
|
||||
* 1) It iterates the tokens provided during the
|
||||
* tokenization process, identifing where elements
|
||||
* begin and end (doing validation and normalization).
|
||||
* 2) It controls and coordinates with an instance of
|
||||
* the IContentSink interface, to coordinate the
|
||||
* the production of the content model.
|
||||
*
|
||||
* The basic operation of this class assumes that an HTML
|
||||
* document is non-normalized. Therefore, we don't process
|
||||
* the document in a normalized way. Don't bother to look
|
||||
* for methods like: doHead() or doBody().
|
||||
*
|
||||
* Instead, in order to be backward compatible, we must
|
||||
* scan the set of tokens and perform this basic set of
|
||||
* operations:
|
||||
* 1) Determine the token type (easy, since the tokens know)
|
||||
* 2) Determine the appropriate section of the HTML document
|
||||
* each token belongs in (HTML,HEAD,BODY,FRAMESET).
|
||||
* 3) Insert content into our document (via the sink) into
|
||||
* the correct section.
|
||||
* 4) In the case of tags that belong in the BODY, we must
|
||||
* ensure that our underlying document state reflects
|
||||
* the appropriate context for our tag.
|
||||
*
|
||||
* For example,if we see a <TR>, we must ensure our
|
||||
* document contains a table into which the row can
|
||||
* be placed. This may result in "implicit containers"
|
||||
* created to ensure a well-formed document.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NS_HTMLPARSER__
|
||||
#define NS_HTMLPARSER__
|
||||
|
||||
#include "nsIParser.h"
|
||||
#include "nsDeque.h"
|
||||
#include "nsParserNode.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "nsIURL.h"
|
||||
#include "nsIStreamListener.h"
|
||||
|
||||
|
||||
#define NS_IHTML_PARSER_IID \
|
||||
{0x2ce606b0, 0xbee6, 0x11d1, \
|
||||
{0xaa, 0xd9, 0x00, 0x80, 0x5f, 0x8a, 0x3e, 0x14}}
|
||||
|
||||
|
||||
class IContentSink;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class CScanner;
|
||||
|
||||
|
||||
class nsHTMLParser : public nsIParser, public nsIStreamListener {
|
||||
|
||||
public:
|
||||
friend class CTokenHandler;
|
||||
|
||||
NS_DECL_ISUPPORTS
|
||||
|
||||
|
||||
/**
|
||||
* default constructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
nsHTMLParser();
|
||||
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
~nsHTMLParser();
|
||||
|
||||
/**
|
||||
* Select given content sink into parser for parser output
|
||||
* @update gess5/11/98
|
||||
* @param aSink is the new sink to be used by parser
|
||||
* @return old sink, or NULL
|
||||
*/
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
|
||||
|
||||
virtual void SetDTD(nsIDTD* aDTD);
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 6/9/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
virtual CScanner* GetScanner(void);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given URL in given mode
|
||||
* @update gess5/11/98
|
||||
* @param aURL is a descriptor for source document
|
||||
* @param aListener is a listener to forward notifications to
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given file in given mode
|
||||
* @update gess5/11/98
|
||||
* @param aFilename is a path for file document
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* @update gess5/11/98
|
||||
* @param anHTMLString contains a string-full of real HTML
|
||||
* @param appendTokens tells us whether we should insert tokens inline, or append them.
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
|
||||
|
||||
/**
|
||||
* This method gets called (automatically) during incremental parsing
|
||||
* @update gess5/11/98
|
||||
* @return TRUE if all went well, otherwise FALSE
|
||||
*/
|
||||
virtual PRInt32 ResumeParse(void);
|
||||
|
||||
/**
|
||||
* Causes the parser to scan foward, collecting nearby (sequential)
|
||||
* attribute tokens into the given node.
|
||||
* @update gess5/11/98
|
||||
* @param node to store attributes
|
||||
* @return number of attributes added to node.
|
||||
*/
|
||||
virtual PRInt32 CollectAttributes(nsCParserNode& aNode,PRInt32 aCount);
|
||||
|
||||
/**
|
||||
* Causes the next skipped-content token (if any) to
|
||||
* be consumed by this node.
|
||||
* @update gess5/11/98
|
||||
* @param node to consume skipped-content
|
||||
* @return number of skipped-content tokens consumed.
|
||||
*/
|
||||
virtual PRInt32 CollectSkippedContent(nsCParserNode& aNode);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpSource(ostream& out);
|
||||
|
||||
|
||||
//*********************************************
|
||||
// These methods are callback methods used by
|
||||
// net lib to let us know about our inputstream.
|
||||
//*********************************************
|
||||
NS_IMETHOD GetBindInfo(void);
|
||||
NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const nsString& aMmsg);
|
||||
NS_IMETHOD OnStartBinding(const char *aContentType);
|
||||
NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
|
||||
NS_IMETHOD OnStopBinding(PRInt32 status, const nsString& aMsg);
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 WillBuildModel(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 DidBuildModel(PRInt32 anErrorCode);
|
||||
|
||||
/**
|
||||
* This method gets called when the tokens have been consumed, and it's time
|
||||
* to build the model via the content sink.
|
||||
* @update gess5/11/98
|
||||
* @return YES if model building went well -- NO otherwise.
|
||||
*/
|
||||
virtual PRInt32 IterateTokens(void);
|
||||
|
||||
private:
|
||||
PRInt32 ParseFileIncrementally(const char* aFilename); //XXX ONLY FOR DEBUG PURPOSES...
|
||||
|
||||
/*******************************************
|
||||
These are the tokenization methods...
|
||||
*******************************************/
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
virtual PRInt32 ConsumeToken(CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Part of the code sandwich, this gets called right before
|
||||
* the tokenization process begins. The main reason for
|
||||
* this call is to allow the delegate to do initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens);
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 Tokenize(void);
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpTokens(ostream& out);
|
||||
|
||||
|
||||
protected:
|
||||
//*********************************************
|
||||
// And now, some data members...
|
||||
//*********************************************
|
||||
|
||||
nsIStreamListener* mListener;
|
||||
nsIContentSink* mSink;
|
||||
|
||||
nsDequeIterator* mCurrentPos;
|
||||
nsDequeIterator* mMarkPos;
|
||||
|
||||
nsIDTD* mDTD;
|
||||
eParseMode mParseMode;
|
||||
PRBool mIncremental;
|
||||
char* mTransferBuffer;
|
||||
|
||||
PRInt32 mMajorIteration;
|
||||
PRInt32 mMinorIteration;
|
||||
|
||||
nsDeque mTokenDeque;
|
||||
CScanner* mScanner;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -37,6 +37,7 @@
|
|||
class nsIParser;
|
||||
class CToken;
|
||||
class nsIContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class nsIDTD : public nsISupports {
|
||||
|
||||
|
@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {
|
|||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
|
||||
virtual void SetURLRef(char * aURLRef) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParent parent tag
|
||||
* @param aChild child tag
|
||||
* @return PR_TRUE if valid container
|
||||
*/
|
||||
virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ class nsString;
|
|||
class CToken;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class nsIParserDebug;
|
||||
|
||||
/**
|
||||
* This class defines the iparser interface. This XPCOM
|
||||
|
@ -60,9 +61,10 @@ class nsIParser : public nsISupports {
|
|||
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE) = 0;
|
||||
PRBool aIncremental=PR_TRUE,
|
||||
nsIParserDebug * aDebug = 0) = 0;
|
||||
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;
|
||||
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
|
||||
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/8/98
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NS_IPARSERDEBUG__
|
||||
#define NS_IPARSERDEBUG__
|
||||
|
||||
#include "nsISupports.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "prtypes.h"
|
||||
|
||||
#define NS_IPARSERDEBUG_IID \
|
||||
{0x7b68c220, 0x0685, 0x11d2, \
|
||||
{0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
|
||||
|
||||
|
||||
class nsIDTD;
|
||||
class nsHTMLParser;
|
||||
|
||||
class nsIParserDebug : public nsISupports {
|
||||
|
||||
public:
|
||||
|
||||
virtual void SetVerificationDirectory(char * verify_dir) = 0;
|
||||
|
||||
virtual void SetRecordStatistics(PRBool bval) = 0;
|
||||
|
||||
virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
|
||||
|
||||
virtual void DumpVectorRecord(void) = 0;
|
||||
|
||||
};
|
||||
|
||||
extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
|
||||
|
||||
#endif /* NS_IPARSERDEBUG__ */
|
|
@ -0,0 +1,51 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 6/17/98
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef IPARSERFILTER
|
||||
#define IPARSERFILTER
|
||||
|
||||
#include "nsISupports.h"
|
||||
|
||||
class CToken;
|
||||
|
||||
#define NS_IPARSERFILTER_IID \
|
||||
{0x14d6ff0, 0x0610, 0x11d2, \
|
||||
{0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
|
||||
|
||||
|
||||
class nsIParserFilter : public nsISupports {
|
||||
public:
|
||||
|
||||
NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
|
||||
|
||||
NS_IMETHOD WillAddToken(CToken & token) = 0;
|
||||
|
||||
NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
|
||||
};
|
||||
|
||||
extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,534 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 06/18/98
|
||||
*
|
||||
* This file contains the parser debugger object which aids in
|
||||
* walking links and reporting statistic information, reporting
|
||||
* bad vectors.
|
||||
*/
|
||||
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsHTMLParser.h"
|
||||
#include "nsIParserDebug.h"
|
||||
#include "nsCRT.h"
|
||||
#include "prenv.h" //this is here for debug reasons...
|
||||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
class CParserDebug : public nsIParserDebug {
|
||||
public:
|
||||
|
||||
CParserDebug(char * aVerifyDir = 0);
|
||||
~CParserDebug();
|
||||
|
||||
NS_DECL_ISUPPORTS
|
||||
|
||||
void SetVerificationDirectory(char * verify_dir);
|
||||
void SetRecordStatistics(PRBool bval);
|
||||
PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
|
||||
void DumpVectorRecord(void);
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
|
||||
private:
|
||||
VectorInfo ** mVectorInfoArray;
|
||||
PRInt32 mVectorCount;
|
||||
char * mVerificationDir;
|
||||
PRBool mRecordingStatistics;
|
||||
|
||||
PRBool DebugRecord(char * path, char * pURLRef, char * filename);
|
||||
void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
|
||||
void MakeVectorString(char * vector_string, VectorInfo * pInfo);
|
||||
};
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
*
|
||||
* @update jevering 3/25/98
|
||||
* @param nsIParser** ptr to newly instantiated parser
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
|
||||
NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
|
||||
{
|
||||
CParserDebug *it = new CParserDebug();
|
||||
|
||||
if (it == 0) {
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
|
||||
}
|
||||
|
||||
CParserDebug::CParserDebug(char * aVerifyDir)
|
||||
{
|
||||
NS_INIT_REFCNT();
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
if (aVerifyDir)
|
||||
mVerificationDir = PL_strdup(aVerifyDir);
|
||||
else {
|
||||
char * pString = PR_GetEnv("VERIFY_PARSER");
|
||||
if (pString)
|
||||
mVerificationDir = PL_strdup(pString);
|
||||
else
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mRecordingStatistics = PR_TRUE;
|
||||
}
|
||||
|
||||
CParserDebug::~CParserDebug()
|
||||
{
|
||||
if (mVerificationDir)
|
||||
PL_strfree(mVerificationDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called as part of our COM-like interfaces.
|
||||
* Its purpose is to create an interface to parser object
|
||||
* of some type.
|
||||
*
|
||||
* @update gess 4/8/98
|
||||
* @param nsIID id of object to discover
|
||||
* @param aInstancePtr ptr to newly discovered interface
|
||||
* @return NS_xxx result code
|
||||
*/
|
||||
nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
||||
{
|
||||
if (NULL == aInstancePtr) {
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
}
|
||||
|
||||
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else {
|
||||
*aInstancePtr=0;
|
||||
return NS_NOINTERFACE;
|
||||
}
|
||||
((nsISupports*) *aInstancePtr)->AddRef();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMPL_ADDREF(CParserDebug)
|
||||
NS_IMPL_RELEASE(CParserDebug)
|
||||
|
||||
void CParserDebug::SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
if (mVerificationDir) {
|
||||
PL_strfree(mVerificationDir);
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mVerificationDir = PL_strdup(verify_dir);
|
||||
}
|
||||
|
||||
void CParserDebug::SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
mRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,mVerificationDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* compare function for quick sort. Compares references and
|
||||
* sorts in decending order
|
||||
*/
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!mVectorInfoArray) {
|
||||
mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < mVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (mVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
mVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
mVectorInfoArray[mVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((mVectorCount % TABLE_SIZE) == 0) {
|
||||
mVectorInfoArray = (VectorInfo**)realloc(
|
||||
mVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (mVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (mVerificationDir)
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (!mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (mVectorInfoArray[i]->vector)
|
||||
PR_Free(mVectorInfoArray[i]->vector);
|
||||
PR_Free(mVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(mVectorInfoArray);
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef)
|
||||
{
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=mVerificationDir || mRecordingStatistics) {
|
||||
|
||||
if(aDTD && aContextStackPos>1) {
|
||||
for (int i = 0; i < aContextStackPos-1; i++)
|
||||
if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
|
||||
result = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mRecordingStatistics) {
|
||||
NoteVector(aContextStack,aContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=mVerificationDir) {
|
||||
char path[2048];
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<aContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(aContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path, aURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
if (aParser)
|
||||
aParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
Загрузка…
Ссылка в новой задаче