gecko-dev/htmlparser/src/nsDTDDebug.cpp

566 строки
17 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
* IBM Corp.
*/
/**
* MODULE NOTES:
* @update jevering 06/18/98
*
* This file contains the parser debugger object which aids in
* walking links and reporting statistic information, reporting
* bad vectors.
*/
#include "CNavDTD.h"
#include "nsHTMLTokens.h"
#include "nsVoidArray.h"
#include "nsParser.h"
#include "nsIDTDDebug.h"
#include "nsCRT.h"
#include "prenv.h" //this is here for debug reasons...
#include "prtypes.h" //this is here for debug reasons...
#include "prio.h"
#include "plstr.h"
#include "prstrm.h"
#include <time.h>
#include "prmem.h"
#include "nsQuickSort.h"
#define CONTEXT_VECTOR_MAP "/vector.map"
#define CONTEXT_VECTOR_STAT "/vector.stat"
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
// structure to store the vector statistic information
typedef struct vector_info {
PRInt32 references; // number of occurrences counted
PRInt32 count; // number of tags in the vector
PRBool good_vector; // is this a valid vector?
eHTMLTags* vector; // and the vector
} VectorInfo;
// the statistic vector table grows each time it exceeds this
// stepping value
#define TABLE_SIZE 128
class CDTDDebug : public nsIDTDDebug {
public:
CDTDDebug(char * aVerifyDir = 0);
virtual ~CDTDDebug();
NS_DECL_ISUPPORTS
void SetVerificationDirectory(char * verify_dir);
void SetRecordStatistics(PRBool bval);
PRBool Verify(nsIDTD * aDTD, nsIParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], nsString& aURLRef);
void DumpVectorRecord(void);
// global table for storing vector statistics and the size
private:
VectorInfo ** mVectorInfoArray;
PRInt32 mVectorCount;
char * mVerificationDir;
PRBool mRecordingStatistics;
PRBool DebugRecord(char * path, nsString& pURLRef, char * filename);
void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
void MakeVectorString(char * vector_string, VectorInfo * pInfo);
};
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kIDebugParserIID, NS_IDTDDEBUG_IID);
/**
* This method is defined in nsIParser. It is used to
* cause the COM-like construction of an nsParser.
*
* @update jevering 3/25/98
* @param nsIParser** ptr to newly instantiated parser
* @return NS_xxx error result
*/
NS_EXPORT nsresult NS_NewDTDDebug(nsIDTDDebug** aInstancePtrResult)
{
CDTDDebug *it = new CDTDDebug();
if (it == 0) {
return NS_ERROR_OUT_OF_MEMORY;
}
return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
}
CDTDDebug::CDTDDebug(char * aVerifyDir)
{
NS_INIT_REFCNT();
mVectorInfoArray = 0;
mVectorCount = 0;
if (aVerifyDir)
mVerificationDir = PL_strdup(aVerifyDir);
else {
char * pString = PR_GetEnv("VERIFY_PARSER");
if (pString)
mVerificationDir = PL_strdup(pString);
else
mVerificationDir = 0;
}
mRecordingStatistics = PR_TRUE;
}
CDTDDebug::~CDTDDebug()
{
if (mVerificationDir)
PL_strfree(mVerificationDir);
}
/**
* This method gets called as part of our COM-like interfaces.
* Its purpose is to create an interface to parser object
* of some type.
*
* @update gess 4/8/98
* @param nsIID id of object to discover
* @param aInstancePtr ptr to newly discovered interface
* @return NS_xxx result code
*/
nsresult CDTDDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)
{
if (NULL == aInstancePtr) {
return NS_ERROR_NULL_POINTER;
}
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
*aInstancePtr = (nsIDTDDebug*)(this);
}
else if(aIID.Equals(kIDebugParserIID)) { //do IDTDDebug base class...
*aInstancePtr = (nsIDTDDebug*)(this);
}
else {
*aInstancePtr=0;
return NS_NOINTERFACE;
}
NS_ADDREF_THIS();
return NS_OK;
}
NS_IMPL_ADDREF(CDTDDebug)
NS_IMPL_RELEASE(CDTDDebug)
void CDTDDebug::SetVerificationDirectory(char * verify_dir)
{
if (mVerificationDir) {
PL_strfree(mVerificationDir);
mVerificationDir = 0;
}
mVerificationDir = PL_strdup(verify_dir);
}
void CDTDDebug::SetRecordStatistics(PRBool bval)
{
mRecordingStatistics = bval;
}
/**
* This debug method records an invalid context vector and it's
* associated context vector and URL in a simple flat file mapping which
* resides in the verification directory and is named context.map
*
* @update jevering 6/06/98
* @param path is the directory structure indicating the bad context vector
* @param pURLRef is the associated URL
* @param filename to record mapping to if not already recorded
* @return TRUE if it is already record (dont rerecord)
*/
PRBool CDTDDebug::DebugRecord(char * path, nsString& aURLRef, char * filename)
{
char recordPath[2048];
PRIntn oflags = 0;
// create the record file name from the verification director
// and the default name.
strcpy(recordPath,mVerificationDir);
strcat(recordPath,CONTEXT_VECTOR_MAP);
// create the file exists, only open for read/write
// otherwise, create it
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
oflags = PR_CREATE_FILE;
oflags |= PR_RDWR;
// open the record file
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
if (recordFile) {
char * string = (char *)PR_Malloc(2048);
PRBool found = PR_FALSE;
// vectors are stored on the format iof "URL vector filename"
// where the vector contains the verification path and
// the filename contains the debug source dump
char ubuffer[513];
aURLRef.ToCString(ubuffer,sizeof(ubuffer));
sprintf(string,"%s %s %s\r\n", ubuffer, path, filename);
// get the file size, read in the file and parse it line at
// a time to check to see if we have already recorded this
// occurrence
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
if (iSize) {
char * buffer = (char*)PR_Malloc(iSize);
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
if (buffer!=NULL && string!=NULL) {
PRInt32 ibufferpos, istringpos;
// beginning of file for read
PR_Seek(recordFile,0,PR_SEEK_SET);
PR_Read(recordFile,buffer,iSize);
// run through the file looking for a matching vector
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
{
// compare string once we have hit the end of the line
if (buffer[ibufferpos] == '\r') {
stringbuf[istringpos] = '\0';
istringpos = 0;
// skip newline and space
ibufferpos++;
if (PL_strlen(stringbuf)) {
char * space;
// chop of the filename for compare
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
*space = '\0';
// we have already recorded this one, free up, and return
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
PR_Free(buffer);
PR_Free(stringbuf);
PR_Free(string);
return PR_TRUE;
}
}
}
// build up the compare string
else
stringbuf[istringpos++] = buffer[ibufferpos];
}
// throw away the record file data
PR_Free(buffer);
PR_Free(stringbuf);
}
}
// if this bad vector was not recorded, add it to record file
if (!found) {
PR_Seek(recordFile,0,PR_SEEK_END);
PR_Write(recordFile,string,PL_strlen(string));
}
PR_Close(recordFile);
PR_Free(string);
}
// vector was not recorded
return PR_FALSE;
}
/**
* compare function for quick sort. Compares references and
* sorts in descending order
*/
static int PR_CALLBACK compare( const void *arg1, const void *arg2 , void *unused)
{
VectorInfo ** p1 = (VectorInfo**)arg1;
VectorInfo ** p2 = (VectorInfo**)arg2;
return (*p2)->references - (*p1)->references;
}
/**
* This debug routines stores statistical information about a
* context vector. The context vector statistics are stored in
* a global array. The table is resorted each time it grows to
* aid in lookup speed. If a vector has already been noted, its
* reference count is bumped, otherwise it is added to the table
*
* @update jevering 6/11/98
* @param aTags is the tag list (vector)
* @param count is the size of the vector
* @return
*/
void CDTDDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
{
// if the table doesn't exist, create it
if (!mVectorInfoArray) {
mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
}
else {
// attempt to look up the vector
for (PRInt32 i = 0; i < mVectorCount; i++)
// check the vector only if they are the same size, if they
// match then just return without doing further work
if (mVectorInfoArray[i]->count == count) {
PRBool match = PR_TRUE;
for (PRInt32 j = 0; j < count; j++)
if (mVectorInfoArray[i]->vector[j] != aTags[j]) {
match = PR_FALSE;
break;
}
if (match) {
// bzzzt. and we have a winner.. bump the ref count
mVectorInfoArray[i]->references++;
return;
}
}
}
// the context vector hasn't been noted, so allocate it and
// initialize it one.. add it to the table
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
pVectorInfo->references = 1;
pVectorInfo->count = count;
pVectorInfo->good_vector = good_vector;
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
for (PRInt32 i = 0; i < count; i++)
pVectorInfo->vector[i] = aTags[i];
mVectorInfoArray[mVectorCount++] = pVectorInfo;
// have we maxed out the table? grow it.. sort it.. love it.
if ((mVectorCount % TABLE_SIZE) == 0) {
mVectorInfoArray = (VectorInfo**)realloc(
mVectorInfoArray,
(sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
if (mVectorCount) {
NS_QuickSort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare, NULL);
}
}
}
void CDTDDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
{
sprintf (vector_string, "%6d ", pInfo->references);
for (PRInt32 j = 0; j < pInfo->count; j++) {
PL_strcat(vector_string, "<");
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
PL_strcat(vector_string, ">");
}
PL_strcat(vector_string,"\r\n");
}
/**
* This debug routine dumps out the vector statistics to a text
* file in the verification directory and defaults to the name
* "vector.stat". It contains all parsed context vectors and there
* occurrence count sorted in descending order.
*
* @update jevering 6/11/98
* @param
* @return
*/
void CDTDDebug::DumpVectorRecord(void)
{
// do we have a table?
if (mVectorCount) {
// hopefully, they wont exceed 1K.
char vector_string[1024];
char path[1024];
path[0] = '\0';
// put in the verification directory.. else the root
if (mVerificationDir)
strcpy(path,mVerificationDir);
strcat(path,CONTEXT_VECTOR_STAT);
// open the stat file creaming any existing stat file
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
if (statisticFile) {
PRInt32 i;
PRofstream ps;
ps.attach(statisticFile);
// oh what the heck, sort it again
if (mVectorCount) {
NS_QuickSort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare, NULL);
}
// cute little header
sprintf(vector_string,"Context vector occurrence results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
ps << vector_string;
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
ps << VECTOR_TABLE_HEADER;
// dump out the bad vectors encountered
for (i = 0; i < mVectorCount; i++) {
if (!mVectorInfoArray[i]->good_vector) {
MakeVectorString(vector_string, mVectorInfoArray[i]);
ps << vector_string;
}
}
ps << "\r\n\r\nValid context vector summary\r\n";
ps << VECTOR_TABLE_HEADER;
// take a big vector table dump (good vectors)
for (i = 0; i < mVectorCount; i++) {
if (mVectorInfoArray[i]->good_vector) {
MakeVectorString(vector_string, mVectorInfoArray[i]);
ps << vector_string;
}
// free em up. they mean nothing to me now (I'm such a user)
if (mVectorInfoArray[i]->vector)
PR_Free(mVectorInfoArray[i]->vector);
PR_Free(mVectorInfoArray[i]);
} //for
PR_Close(statisticFile);
}//if
// ok, we are done with the table, free it up as well
PR_Free(mVectorInfoArray);
mVectorInfoArray = 0;
mVectorCount = 0;
} //if
}
/**
* This debug method allows us to determine whether or not
* we've seen (and can handle) the given context vector.
*
* @update gess4/22/98
* @param tags is an array of eHTMLTags
* @param count represents the number of items in the tags array
* @param aDTD is the DTD we plan to ask for verification
* @return TRUE if we know how to handle it, else false
*/
PRBool CDTDDebug::Verify(nsIDTD * aDTD, nsIParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], nsString& aURLRef)
{
PRBool result=PR_TRUE;
//ok, now see if we understand this vector
nsParser* theParser=(nsParser*)aParser;
if(0!=mVerificationDir || mRecordingStatistics) {
if(aDTD && aContextStackPos>1) {
for (int i = 0; i < aContextStackPos-1; i++)
if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
result = PR_FALSE;
break;
}
}
}
if (mRecordingStatistics) {
NoteVector(aContextStack,aContextStackPos,result);
}
if(0!=mVerificationDir) {
char path[2048];
strcpy(path,mVerificationDir);
int i=0;
for(i=0;i<aContextStackPos;i++){
strcat(path,"/");
const char* name=GetTagName(aContextStack[i]);
strcat(path,name);
PR_MkDir(path,0);
}
if(PR_FALSE==result){
static PRBool rnd_initialized = PR_FALSE;
if (!rnd_initialized) {
// seed randomn number generator to aid in temp file
// creation.
rnd_initialized = PR_TRUE;
srand((unsigned)time(NULL));
}
// generate a filename to dump the html source into
char filename[1024];
do {
// use system time to generate a temporary file name
time_t ltime;
time (&ltime);
// add in random number so that we can create uniques names
// faster than simply every second.
ltime += (time_t)rand();
sprintf(filename,"%s/%lX.html", path, ltime);
// try until we find one we can create
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
// check to see if we already recorded an instance of this particular
// bad vector.
if (!DebugRecord(path, aURLRef, filename))
{
// save file to directory indicated by bad context vector
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
// if we were able to open the debug file, then
// write the true URL at the top of the file.
if (debugFile) {
// dump the html source into the newly created file.
/******************************************************
* RICKG, DO WE REALLY NEED A FILE DESCRIPTOR HERE? *
******************************************************/
PRofstream ps;
ps.attach(debugFile);
if (theParser) {
// XXX Hack Remove file descriptors
nsFileSpec fileSpec(filename);
nsOutputFileStream out(fileSpec);
theParser->DebugDumpSource(out);
//theParser->DebugDumpSource(ps);
}
PR_Close(debugFile);
}
}
}
}
return result;
}