gecko-dev/htmlparser/src/nsTokenizer.cpp

328 строки
7.2 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#include <fstream.h>
#include "nsTokenizer.h"
#include "nsToken.h"
#include "nsScanner.h"
#include "nsIParserFilter.h"
#include "nsIURL.h"
static void TokenFreeProc(void * pToken)
{
if (pToken!=NULL) {
CToken * pCToken = (CToken*)pToken;
delete pCToken;
}
}
/**
* Default constructor
*
* @update gess 3/25/98
* @param aFilename -- name of file to be tokenized
* @param aDelegate -- ref to delegate to be used to tokenize
* @return
*/
CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
mTokenDeque(PR_TRUE,TokenFreeProc) {
mParserFilter = aIFilter;
mDelegate=aDelegate;
mScanner=new CScanner(aURL,aMode);
mParseMode=aMode;
}
/**
* Default constructor
*
* @update gess 3/25/98
* @param aFilename -- name of file to be tokenized
* @param aDelegate -- ref to delegate to be used to tokenize
* @return
*/
CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
mTokenDeque(PR_TRUE,TokenFreeProc) {
mParserFilter = aIFilter;
mDelegate=aDelegate;
mScanner=new CScanner(aFilename,aMode);
mParseMode=aMode;
}
/**
* Default constructor
*
* @update gess 3/25/98
* @param aFilename -- name of file to be tokenized
* @param aDelegate -- ref to delegate to be used to tokenize
* @return
*/
CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
mTokenDeque(PR_TRUE,TokenFreeProc) {
mParserFilter = aIFilter;
mDelegate=aDelegate;
mScanner=new CScanner(aMode);
mParseMode=aMode;
}
/**
* default destructor
*
* @update gess 3/25/98
* @param
* @return
*/
CTokenizer::~CTokenizer() {
delete mScanner;
mDelegate->Destroy();
mScanner=0;
}
/**
*
*
* @update gess 5/13/98
* @param
* @return
*/
PRBool CTokenizer::Append(nsString& aBuffer) {
if(mScanner)
return mScanner->Append(aBuffer);
return PR_FALSE;
}
/**
*
*
* @update gess 5/21/98
* @param
* @return
*/
PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){
if(mScanner)
return mScanner->Append(aBuffer,aLen);
return PR_FALSE;
}
/**
* Retrieve a reference to the internal token deque.
*
* @update gess 4/20/98
* @return deque reference
*/
nsDeque& CTokenizer::GetDeque(void) {
return mTokenDeque;
}
/**
* Cause the tokenizer to consume the next token, and
* return an error result.
*
* @update gess 3/25/98
* @param anError -- ref to error code
* @return new token or null
*/
PRInt32 CTokenizer::GetToken(CToken*& aToken) {
PRInt32 result=mDelegate->GetToken(*mScanner,aToken);
return result;
}
/**
* Retrieve the number of elements in the deque
*
* @update gess 3/25/98
* @param
* @return int containing element count
*/
PRInt32 CTokenizer::GetSize(void) {
return mTokenDeque.GetSize();
}
/**
* Part of the code sandwich, this gets called right before
* the tokenization process begins. The main reason for
* this call is to allow the delegate to do initialization.
*
* @update gess 3/25/98
* @param
* @return TRUE if it's ok to proceed
*/
PRBool CTokenizer::WillTokenize(PRBool aIncremental){
PRBool result=PR_TRUE;
result=mDelegate->WillTokenize(aIncremental);
return result;
}
/**
*
* @update gess 3/25/98
* @return TRUE if it's ok to proceed
*/
PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
CToken* theToken=0;
PRInt32 result=kNoError;
WillTokenize(PR_TRUE);
while(kNoError==result) {
result=GetToken(theToken);
if(theToken && (kNoError==result)) {
#ifdef VERBOSE_DEBUG
theToken->DebugDumpToken(cout);
#endif
PRBool bWillAdd = PR_TRUE;
if (mParserFilter)
bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
mTokenDeque.Push(theToken);
}
}
else if (theToken)
delete theToken;
}
if(kEOF==result)
result=kNoError;
DidTokenize(PR_TRUE);
return result;
}
/**
* This is the primary control routine. It iteratively
* consumes tokens until an error occurs or you run out
* of data.
*
* @update gess 3/25/98
* @return error code
*/
PRInt32 CTokenizer::Tokenize(int anIteration) {
CToken* theToken=0;
PRInt32 result=kNoError;
PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
while((PR_FALSE==done) && (kNoError==result)) {
mScanner->Mark();
result=GetToken(theToken);
if(kNoError==result) {
if(theToken) {
#ifdef VERBOSE_DEBUG
theToken->DebugDumpToken(cout);
#endif
PRBool bWillAdd = PR_TRUE;
if (mParserFilter)
bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
mTokenDeque.Push(theToken);
}
}
}
else {
if(theToken)
delete theToken;
mScanner->RewindToMark();
}
}
if((PR_TRUE==done) && (kInterrupted!=result))
DidTokenize(PR_TRUE);
return result;
}
/**
* This is the tail-end of the code sandwich for the
* tokenization process. It gets called once tokenziation
* has completed.
*
* @update gess 3/25/98
* @param
* @return TRUE if all went well
*/
PRBool CTokenizer::DidTokenize(PRBool aIncremental) {
PRBool result=mDelegate->DidTokenize(aIncremental);
#ifdef VERBOSE_DEBUG
DebugDumpTokens(cout);
#endif
return result;
}
/**
* This debug routine is used to cause the tokenizer to
* iterate its token list, asking each token to dump its
* contents to the given output stream.
*
* @update gess 3/25/98
* @param
* @return
*/
void CTokenizer::DebugDumpTokens(ostream& out) {
nsDequeIterator b=mTokenDeque.Begin();
nsDequeIterator e=mTokenDeque.End();
CToken* theToken;
while(b!=e) {
theToken=(CToken*)(b++);
theToken->DebugDumpToken(out);
}
}
/**
* This debug routine is used to cause the tokenizer to
* iterate its token list, asking each token to dump its
* contents to the given output stream.
*
* @update gess 3/25/98
* @param
* @return
*/
void CTokenizer::DebugDumpSource(ostream& out) {
nsDequeIterator b=mTokenDeque.Begin();
nsDequeIterator e=mTokenDeque.End();
CToken* theToken;
while(b!=e) {
theToken=(CToken*)(b++);
theToken->DebugDumpSource(out);
}
}
/**
*
*
* @update gess 3/25/98
* @param
* @return
*/
void CTokenizer::SelfTest(void) {
#ifdef _DEBUG
#endif
}