зеркало из https://github.com/mozilla/pjs.git
Fixed many a parser memory leaks.
Attribute tokens where being leaked. Fixed by making CToken destructor virtual. The DTDs were not being properly deleted. Added a member to nsITokenizerDelegate to Destroy() Fixed a few spots here and there where tokens were getting allocated, but not deleted when a kError is passed back invalid.
This commit is contained in:
Родитель
9635153ef3
Коммит
4cf367dd1b
|
@ -1,509 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include "CNavDelegate.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "CNavDTD.h"
|
||||
|
||||
|
||||
// Note: We already handle the following special case conditions:
|
||||
// 1) If you see </>, simply treat it as a bad tag.
|
||||
// 2) If you see </ ...>, treat it like a comment.
|
||||
// 3) If you see <> or <_ (< space) simply treat it as text.
|
||||
// 4) If you see <[!a..z] (< followed by non-alpha), treat it as text.
|
||||
|
||||
static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CNavDelegate::CNavDelegate() :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CNavDelegate::CNavDelegate(CNavDelegate& aDelegate) :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
eParseMode CNavDelegate::GetParseMode(void) const {
|
||||
return eParseMode_unknown;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cause delegate to create and return a new DTD.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @return new DTD or null
|
||||
*/
|
||||
nsIDTD* CNavDelegate::GetDTD(void) const{
|
||||
return new CNavDTD();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of some kind of tagged
|
||||
* element. We don't know yet if it's a tag or a comment.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aChar is the last char read
|
||||
* @param aScanner is represents our input source
|
||||
* @param aToken is the out arg holding our new token
|
||||
* @return error code (may return kInterrupted).
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
|
||||
nsAutoString empty("");
|
||||
PRInt32 result=aScanner.GetChar(aChar);
|
||||
|
||||
if(kNoError==result) {
|
||||
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
}//if
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
|
||||
if((0!=aToken) && (kNoError==result)) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after we've consumed a start
|
||||
* tag, and we now have to consume its attributes.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRBool done=PR_FALSE;
|
||||
PRInt32 result=kNoError;
|
||||
nsAutoString as("");
|
||||
PRInt16 theAttrCount=0;
|
||||
|
||||
while((!done) && (result==kNoError)) {
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(kNoError==result){
|
||||
theAttrCount++;
|
||||
mTokenDeque.Push(theToken);
|
||||
}//if
|
||||
else delete theToken; //we can't keep it...
|
||||
}//if
|
||||
|
||||
if(kNoError==result){
|
||||
result=aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}//if
|
||||
}//if
|
||||
}//while
|
||||
|
||||
aToken->SetAttributeCount(theAttrCount);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a special case method. It's job is to consume
|
||||
* all of the given tag up to an including the end tag.
|
||||
*
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
//In the case that we just read the given tag, we should go and
|
||||
//consume all the input until we find a matching end tag.
|
||||
|
||||
nsAutoString endTag("</");
|
||||
endTag.Append(aString);
|
||||
endTag.Append(">");
|
||||
aToken=new CSkippedContentToken(endTag);
|
||||
return aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of a tag.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRInt32 theDequeSize=mTokenDeque.GetSize();
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
aToken=new CStartToken(nsAutoString(""));
|
||||
|
||||
if(aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(kNoError==result) {
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner,aToken);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
if(kNoError==result) {
|
||||
nsString& str=aToken->GetText();
|
||||
CToken* skippedToken=0;
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
|
||||
|
||||
if((kNoError==result) && skippedToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=skippedToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(skippedToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
} //if
|
||||
} //if
|
||||
} //if
|
||||
|
||||
//EEEEECCCCKKKK!!!
|
||||
//This code is confusing, so pay attention.
|
||||
//If you're here, it's because we were in the midst of consuming a start
|
||||
//tag but ran out of data (not in the stream, but in this *part* of the stream.
|
||||
//For simplicity, we have to unwind our input. Therefore, we pop and discard
|
||||
//any new tokens we've cued this round. Later we can get smarter about this.
|
||||
if(kNoError!=result) {
|
||||
while(mTokenDeque.GetSize()>theDequeSize) {
|
||||
delete mTokenDeque.PopBack();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "&" has been consumed
|
||||
* and we know we're at the start of an entity.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRUnichar ch;
|
||||
PRInt32 result=aScanner.GetChar(ch);
|
||||
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(0,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
temp.Append(ch);
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
}//if
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after whitespace has been
|
||||
* consumed and we know we're at the start a whitespace run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken = new CWhitespaceToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<!" has been consumed
|
||||
* and we know we're at the start of a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken = new CCommentToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a known text char has
|
||||
* been consumed and we should read a text run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken=new CTextToken(aString)) {
|
||||
PRUnichar ch=0;
|
||||
result=aToken->Consume(ch,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a newline has been consumed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken=new CNewlineToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method repeatedly called by the tokenizer.
|
||||
* Each time, we determine the kind of token were about to
|
||||
* read, and then we call the appropriate method to handle
|
||||
* that token type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
aToken=0;
|
||||
if(mTokenDeque.GetSize()>0) {
|
||||
aToken=(CToken*)mTokenDeque.Pop();
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
PRInt32 result=kNoError;
|
||||
if(kNoError==result){
|
||||
|
||||
PRUnichar aChar;
|
||||
result=aScanner.GetChar(aChar);
|
||||
switch(result) {
|
||||
case kEOF:
|
||||
break;
|
||||
|
||||
case kInterrupted:
|
||||
aScanner.RewindToMark();
|
||||
break;
|
||||
|
||||
case kNoError:
|
||||
default:
|
||||
switch(aChar) {
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
|
||||
case kNotFound:
|
||||
break;
|
||||
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
} //switch
|
||||
break;
|
||||
} //switch
|
||||
if(kNoError==result)
|
||||
result=aScanner.Eof();
|
||||
} //while
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CToken* CNavDelegate::CreateTokenOfType(eHTMLTokenTypes aType) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is by the tokenizer, once for each new token
|
||||
* we've constructed. This method determines whether or not
|
||||
* the new token (argument) should be accepted as a valid
|
||||
* token. If so, the token is added to the deque of tokens
|
||||
* contained within the tokenzier. If no, the token is
|
||||
* ignored.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aToken: token to be tested for acceptance
|
||||
* @return TRUE if token should be accepted.
|
||||
*/
|
||||
PRBool CNavDelegate::WillAddToken(CToken& /*aToken*/) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just before a stream
|
||||
* is parsed. This method is called so that the delegate
|
||||
* can do any "pre-parsing" initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool CNavDelegate::WillTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just after a stream
|
||||
* was parsed. This method is called so that the delegate
|
||||
* can do any "post-parsing" cleanup.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool CNavDelegate::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the selftest method for the delegate class.
|
||||
* Unfortunately, there's not much you can do with this
|
||||
* class alone, so we do the selftesting as part of the
|
||||
* parser class.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
*/
|
||||
void CNavDelegate::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,222 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This class is used as the HTML tokenizer delegate.
|
||||
*
|
||||
* The tokenzier class has the smarts to open an source,
|
||||
* and iterate over its characters to produce a list of
|
||||
* tokens. The tokenizer doesn't know HTML, which is
|
||||
* where this delegate comes into play.
|
||||
*
|
||||
* The tokenizer calls methods on this class to help
|
||||
* with the creation of HTML-specific tokens from a source
|
||||
* stream.
|
||||
*
|
||||
* The interface here is very simple, mainly the call
|
||||
* to GetToken(), which Consumes bytes from the underlying
|
||||
* scanner.stream, and produces an HTML specific CToken.
|
||||
*/
|
||||
|
||||
#ifndef _NAV_DELEGATE
|
||||
#define _NAV_DELEGATE
|
||||
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
#include "nsDeque.h"
|
||||
#include "nsIDTD.h"
|
||||
|
||||
class CNavDelegate : public ITokenizerDelegate {
|
||||
public:
|
||||
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
CNavDelegate();
|
||||
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
CNavDelegate(CNavDelegate& aDelegate);
|
||||
|
||||
/**
|
||||
* Consume next token from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 GetToken(CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Ask if its ok to add this token
|
||||
* @update gess 5/11/98
|
||||
* @param aToken is the token to be added
|
||||
* @return True if ok to add the given token
|
||||
*/
|
||||
virtual PRBool WillAddToken(CToken& aToken);
|
||||
|
||||
/**
|
||||
* Called as a preprocess -- tells delegate that tokenization will begin
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization is incremental
|
||||
* @return TRUE if ok to continue -- FALSE if process should stop
|
||||
*/
|
||||
virtual PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* Postprocess -- called to say that tokenization has concluded
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization was incremental
|
||||
* @return TRUE if all went well--FALSE if you encountered an error
|
||||
*/
|
||||
virtual PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* DEPRECATED. Tells us what mode the delegate is operating in.
|
||||
* @update gess 5/11/98
|
||||
* @return parse mode
|
||||
*/
|
||||
virtual eParseMode GetParseMode(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the DTD required by this delegate
|
||||
* (The parser will call this prior to tokenization)
|
||||
* @update gess 5/11/98
|
||||
* @return ptr to DTD -- should NOT be null.
|
||||
*/
|
||||
virtual nsIDTD* GetDTD(void) const;
|
||||
|
||||
/**
|
||||
* Conduct self test.
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Called to cause delegate to create a token of given type.
|
||||
* @update gess 5/11/98
|
||||
* @param aType represents the kind of token you want to create.
|
||||
* @return new token or NULL
|
||||
*/
|
||||
virtual CToken* CreateTokenOfType(eHTMLTokenTypes aType);
|
||||
|
||||
/**
|
||||
* Retrieve the next TAG from the given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve next START tag from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve collection of HTML/XML attributes from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a sequence of text from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aString will contain retrieved text.
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve an entity from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a whitespace sequence from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a comment from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve newlines from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Causes content to be skipped up to sequence contained in aString.
|
||||
* @update gess 5/11/98
|
||||
* @param aString ????
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,449 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include "COtherDelegate.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "CNavDTD.h"
|
||||
|
||||
|
||||
// Note: We already handle the following special case conditions:
|
||||
// 1) If you see </>, simply treat it as a bad tag.
|
||||
// 2) If you see </ ...>, treat it like a comment.
|
||||
// 3) If you see <> or <_ (< space) simply treat it as text.
|
||||
// 4) If you see <~ (< followed by non-alpha), treat it as text.
|
||||
|
||||
static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
|
||||
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
COtherDelegate::COtherDelegate() :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
COtherDelegate::COtherDelegate(COtherDelegate& aDelegate) :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
eParseMode COtherDelegate::GetParseMode(void) const {
|
||||
return eParseMode_unknown;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cause delegate to create and return a new DTD.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @return new DTD or null
|
||||
*/
|
||||
nsIDTD* COtherDelegate::GetDTD(void) const{
|
||||
return new CNavDTD();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of some kind of tagged
|
||||
* element. We don't know yet if it's a tag or a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
|
||||
nsAutoString empty("");
|
||||
PRInt32 result=aScanner.GetChar(aChar);
|
||||
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
|
||||
if(0!=aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after we've consumed a start
|
||||
* tag, and we now have to consume its attributes.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @return
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRBool done=PR_FALSE;
|
||||
nsAutoString as("");
|
||||
PRInt32 result=kNoError;
|
||||
while((!done) && (result==kNoError)) {
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result= theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a special case method. It's job is to consume
|
||||
* all of the given tag up to an including the end tag.
|
||||
*
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
//In the case that we just read the given tag, we should go and
|
||||
//consume all the input until we find a matching end tag.
|
||||
|
||||
nsAutoString endTag("</");
|
||||
endTag.Append(aString);
|
||||
endTag.Append(">");
|
||||
aToken=new CSkippedContentToken(endTag);
|
||||
PRInt32 result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of a tag.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken=new CStartToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner,aToken);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
nsString& str=aToken->GetText();
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,aToken);
|
||||
|
||||
if(aToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=aToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(aToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
} //if
|
||||
} //if
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "&" has been consumed
|
||||
* and we know we're at the start of an entity.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRUnichar ch;
|
||||
PRInt32 result=aScanner.GetChar(ch);
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(ch,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after whitespace has been
|
||||
* consumed and we know we're at the start a whitespace run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken = new CWhitespaceToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<!" has been consumed
|
||||
* and we know we're at the start of a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken = new CCommentToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a known text char has
|
||||
* been consumed and we should read a text run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken){
|
||||
aToken=new CTextToken(aString);
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
PRUnichar ch;
|
||||
result=aToken->Consume(ch,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a newline has been consumed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken=new CNewlineToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method repeatedly called by the tokenizer.
|
||||
* Each time, we determine the kind of token were about to
|
||||
* read, and then we call the appropriate method to handle
|
||||
* that token type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
|
||||
PRInt32 result=kNoError;
|
||||
PRUnichar aChar;
|
||||
|
||||
if(mTokenDeque.GetSize()>0) {
|
||||
aToken=(CToken*)mTokenDeque.Pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
while(!aScanner.Eof()) {
|
||||
result=aScanner.GetChar(aChar);
|
||||
switch(aChar) {
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
case kNotFound:
|
||||
break;
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
} //switch
|
||||
if(result==kEOF)
|
||||
result=0;
|
||||
} //while
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CToken* COtherDelegate::CreateTokenOfType(eHTMLTokenTypes aType) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is by the tokenizer, once for each new token
|
||||
* we've constructed. This method determines whether or not
|
||||
* the new token (argument) should be accepted as a valid
|
||||
* token. If so, the token is added to the deque of tokens
|
||||
* contained within the tokenzier. If no, the token is
|
||||
* ignored.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aToken: token to be tested for acceptance
|
||||
* @return TRUE if token should be accepted.
|
||||
*/
|
||||
PRBool COtherDelegate::WillAddToken(CToken& /*aToken*/) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just before a stream
|
||||
* is parsed. This method is called so that the delegate
|
||||
* can do any "pre-parsing" initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool COtherDelegate::WillTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just after a stream
|
||||
* was parsed. This method is called so that the delegate
|
||||
* can do any "post-parsing" cleanup.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool COtherDelegate::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the selftest method for the delegate class.
|
||||
* Unfortunately, there's not much you can do with this
|
||||
* class alone, so we do the selftesting as part of the
|
||||
* parser class.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
*/
|
||||
void COtherDelegate::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,222 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This class is used as the HTML tokenizer delegate.
|
||||
*
|
||||
* The tokenzier class has the smarts to open an source,
|
||||
* and iterate over its characters to produce a list of
|
||||
* tokens. The tokenizer doesn't know HTML, which is
|
||||
* where this delegate comes into play.
|
||||
*
|
||||
* The tokenizer calls methods on this class to help
|
||||
* with the creation of HTML-specific tokens from a source
|
||||
* stream.
|
||||
*
|
||||
* The interface here is very simple, mainly the call
|
||||
* to GetToken(), which Consumes bytes from the underlying
|
||||
* scanner.stream, and produces an HTML specific CToken.
|
||||
*/
|
||||
|
||||
#ifndef _OTHER_DELEGATE
|
||||
#define _OTHER_DELEGATE
|
||||
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
#include "nsDeque.h"
|
||||
#include "nsIDTD.h"
|
||||
|
||||
class COtherDelegate : public ITokenizerDelegate {
|
||||
public:
|
||||
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
COtherDelegate();
|
||||
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
COtherDelegate(COtherDelegate& aDelegate);
|
||||
|
||||
/**
|
||||
* Consume next token from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 GetToken(CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Ask if its ok to add this token
|
||||
* @update gess 5/11/98
|
||||
* @param aToken is the token to be added
|
||||
* @return True if ok to add the given token
|
||||
*/
|
||||
virtual PRBool WillAddToken(CToken& aToken);
|
||||
|
||||
/**
|
||||
* Called as a preprocess -- tells delegate that tokenization will begin
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization is incremental
|
||||
* @return TRUE if ok to continue -- FALSE if process should stop
|
||||
*/
|
||||
virtual PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* Postprocess -- called to say that tokenization has concluded
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization was incremental
|
||||
* @return TRUE if all went well--FALSE if you encountered an error
|
||||
*/
|
||||
virtual PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* DEPRECATED. Tells us what mode the delegate is operating in.
|
||||
* @update gess 5/11/98
|
||||
* @return parse mode
|
||||
*/
|
||||
virtual eParseMode GetParseMode(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the DTD required by this delegate
|
||||
* (The parser will call this prior to tokenization)
|
||||
* @update gess 5/11/98
|
||||
* @return ptr to DTD -- should NOT be null.
|
||||
*/
|
||||
virtual nsIDTD* GetDTD(void) const;
|
||||
|
||||
/**
|
||||
* Conduct self test.
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Called to cause delegate to create a token of given type.
|
||||
* @update gess 5/11/98
|
||||
* @param aType represents the kind of token you want to create.
|
||||
* @return new token or NULL
|
||||
*/
|
||||
virtual CToken* CreateTokenOfType(eHTMLTokenTypes aType);
|
||||
|
||||
/**
|
||||
* Retrieve the next TAG from the given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve next START tag from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve collection of HTML/XML attributes from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a sequence of text from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aString will contain retrieved text.
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve an entity from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a whitespace sequence from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a comment from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve newlines from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Causes content to be skipped up to sequence contained in aString.
|
||||
* @update gess 5/11/98
|
||||
* @param aString ????
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This virtual base class is used to define the basic
|
||||
* tokenizer delegate interface. As you can see, it is
|
||||
* very simple.
|
||||
*
|
||||
* The only routines we use at this point are getToken()
|
||||
* and willAddToken(). While getToken() is obvious,
|
||||
* willAddToken() may not be. The purpose of the method
|
||||
* is to allow the delegate to decide whether or not a
|
||||
* given token that was just read in the tokenization process
|
||||
* should be included in the total list of tokens. This
|
||||
* method gives you a chance to say, "No, ignore this token".
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ITOKENIZERDELEGATE
|
||||
#define ITOKENIZERDELEGATE
|
||||
|
||||
#include "prtypes.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "nsIDTD.h"
|
||||
|
||||
class CScanner;
|
||||
class CToken;
|
||||
|
||||
class ITokenizerDelegate {
|
||||
public:
|
||||
|
||||
virtual PRBool WillTokenize(PRBool aIncremental)=0;
|
||||
virtual PRBool DidTokenize(PRBool aIncremental)=0;
|
||||
|
||||
virtual PRInt32 GetToken(CScanner& aScanner,CToken*& aToken)=0;
|
||||
virtual PRBool WillAddToken(CToken& aToken)=0;
|
||||
|
||||
virtual eParseMode GetParseMode(void) const=0;
|
||||
virtual nsIDTD* GetDTD(void) const=0;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -71,7 +71,7 @@ class CToken {
|
|||
* destructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
~CToken();
|
||||
virtual ~CToken();
|
||||
|
||||
/**
|
||||
* Retrieve string value of the token
|
||||
|
|
|
@ -1,314 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
|
||||
#include <fstream.h>
|
||||
#include "nsTokenizer.h"
|
||||
#include "nsToken.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsIURL.h"
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aURL,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aFilename,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::~CTokenizer() {
|
||||
delete mScanner;
|
||||
delete mDelegate;
|
||||
mScanner=0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(nsString& aBuffer) {
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/21/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer,aLen);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
nsDeque& CTokenizer::GetDeque(void) {
|
||||
return mTokenDeque;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CTokenizer::GetToken(CToken*& aToken) {
|
||||
PRInt32 result=mDelegate->GetToken(*mScanner,aToken);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the number of elements in the deque
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return int containing element count
|
||||
*/
|
||||
PRInt32 CTokenizer::GetSize(void) {
|
||||
return mTokenDeque.GetSize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Part of the code sandwich, this gets called right before
|
||||
* the tokenization process begins. The main reason for
|
||||
* this call is to allow the delegate to do initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRBool CTokenizer::WillTokenize(PRBool aIncremental){
|
||||
PRBool result=PR_TRUE;
|
||||
result=mDelegate->WillTokenize(aIncremental);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
WillTokenize(PR_TRUE);
|
||||
|
||||
while(kNoError==result) {
|
||||
result=GetToken(theToken);
|
||||
if(theToken && (kNoError==result)) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(kEOF==result)
|
||||
result=kNoError;
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(int anIteration) {
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
|
||||
|
||||
|
||||
while((PR_FALSE==done) && (kNoError==result)) {
|
||||
mScanner->Mark();
|
||||
result=GetToken(theToken);
|
||||
if(kNoError==result) {
|
||||
if(theToken) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
if(theToken)
|
||||
delete theToken;
|
||||
mScanner->RewindToMark();
|
||||
}
|
||||
}
|
||||
if((PR_TRUE==done) && (kInterrupted!=result))
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool CTokenizer::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=mDelegate->DidTokenize(aIncremental);
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
DebugDumpTokens(cout);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpTokens(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpToken(out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpSource(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpSource(out);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -1,509 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include "CNavDelegate.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "CNavDTD.h"
|
||||
|
||||
|
||||
// Note: We already handle the following special case conditions:
|
||||
// 1) If you see </>, simply treat it as a bad tag.
|
||||
// 2) If you see </ ...>, treat it like a comment.
|
||||
// 3) If you see <> or <_ (< space) simply treat it as text.
|
||||
// 4) If you see <[!a..z] (< followed by non-alpha), treat it as text.
|
||||
|
||||
static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CNavDelegate::CNavDelegate() :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CNavDelegate::CNavDelegate(CNavDelegate& aDelegate) :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
eParseMode CNavDelegate::GetParseMode(void) const {
|
||||
return eParseMode_unknown;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cause delegate to create and return a new DTD.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @return new DTD or null
|
||||
*/
|
||||
nsIDTD* CNavDelegate::GetDTD(void) const{
|
||||
return new CNavDTD();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of some kind of tagged
|
||||
* element. We don't know yet if it's a tag or a comment.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aChar is the last char read
|
||||
* @param aScanner is represents our input source
|
||||
* @param aToken is the out arg holding our new token
|
||||
* @return error code (may return kInterrupted).
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
|
||||
nsAutoString empty("");
|
||||
PRInt32 result=aScanner.GetChar(aChar);
|
||||
|
||||
if(kNoError==result) {
|
||||
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
}//if
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
|
||||
if((0!=aToken) && (kNoError==result)) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after we've consumed a start
|
||||
* tag, and we now have to consume its attributes.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRBool done=PR_FALSE;
|
||||
PRInt32 result=kNoError;
|
||||
nsAutoString as("");
|
||||
PRInt16 theAttrCount=0;
|
||||
|
||||
while((!done) && (result==kNoError)) {
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(kNoError==result){
|
||||
theAttrCount++;
|
||||
mTokenDeque.Push(theToken);
|
||||
}//if
|
||||
else delete theToken; //we can't keep it...
|
||||
}//if
|
||||
|
||||
if(kNoError==result){
|
||||
result=aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}//if
|
||||
}//if
|
||||
}//while
|
||||
|
||||
aToken->SetAttributeCount(theAttrCount);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a special case method. It's job is to consume
|
||||
* all of the given tag up to an including the end tag.
|
||||
*
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
//In the case that we just read the given tag, we should go and
|
||||
//consume all the input until we find a matching end tag.
|
||||
|
||||
nsAutoString endTag("</");
|
||||
endTag.Append(aString);
|
||||
endTag.Append(">");
|
||||
aToken=new CSkippedContentToken(endTag);
|
||||
return aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of a tag.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRInt32 theDequeSize=mTokenDeque.GetSize();
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
aToken=new CStartToken(nsAutoString(""));
|
||||
|
||||
if(aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(kNoError==result) {
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner,aToken);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
if(kNoError==result) {
|
||||
nsString& str=aToken->GetText();
|
||||
CToken* skippedToken=0;
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
|
||||
|
||||
if((kNoError==result) && skippedToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=skippedToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(skippedToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
} //if
|
||||
} //if
|
||||
} //if
|
||||
|
||||
//EEEEECCCCKKKK!!!
|
||||
//This code is confusing, so pay attention.
|
||||
//If you're here, it's because we were in the midst of consuming a start
|
||||
//tag but ran out of data (not in the stream, but in this *part* of the stream.
|
||||
//For simplicity, we have to unwind our input. Therefore, we pop and discard
|
||||
//any new tokens we've cued this round. Later we can get smarter about this.
|
||||
if(kNoError!=result) {
|
||||
while(mTokenDeque.GetSize()>theDequeSize) {
|
||||
delete mTokenDeque.PopBack();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "&" has been consumed
|
||||
* and we know we're at the start of an entity.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRUnichar ch;
|
||||
PRInt32 result=aScanner.GetChar(ch);
|
||||
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(0,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
temp.Append(ch);
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
}//if
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after whitespace has been
|
||||
* consumed and we know we're at the start a whitespace run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken = new CWhitespaceToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<!" has been consumed
|
||||
* and we know we're at the start of a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken = new CCommentToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a known text char has
|
||||
* been consumed and we should read a text run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken=new CTextToken(aString)) {
|
||||
PRUnichar ch=0;
|
||||
result=aToken->Consume(ch,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a newline has been consumed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken=new CNewlineToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method repeatedly called by the tokenizer.
|
||||
* Each time, we determine the kind of token were about to
|
||||
* read, and then we call the appropriate method to handle
|
||||
* that token type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
aToken=0;
|
||||
if(mTokenDeque.GetSize()>0) {
|
||||
aToken=(CToken*)mTokenDeque.Pop();
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
PRInt32 result=kNoError;
|
||||
if(kNoError==result){
|
||||
|
||||
PRUnichar aChar;
|
||||
result=aScanner.GetChar(aChar);
|
||||
switch(result) {
|
||||
case kEOF:
|
||||
break;
|
||||
|
||||
case kInterrupted:
|
||||
aScanner.RewindToMark();
|
||||
break;
|
||||
|
||||
case kNoError:
|
||||
default:
|
||||
switch(aChar) {
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
|
||||
case kNotFound:
|
||||
break;
|
||||
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
} //switch
|
||||
break;
|
||||
} //switch
|
||||
if(kNoError==result)
|
||||
result=aScanner.Eof();
|
||||
} //while
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CToken* CNavDelegate::CreateTokenOfType(eHTMLTokenTypes aType) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is by the tokenizer, once for each new token
|
||||
* we've constructed. This method determines whether or not
|
||||
* the new token (argument) should be accepted as a valid
|
||||
* token. If so, the token is added to the deque of tokens
|
||||
* contained within the tokenzier. If no, the token is
|
||||
* ignored.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aToken: token to be tested for acceptance
|
||||
* @return TRUE if token should be accepted.
|
||||
*/
|
||||
PRBool CNavDelegate::WillAddToken(CToken& /*aToken*/) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just before a stream
|
||||
* is parsed. This method is called so that the delegate
|
||||
* can do any "pre-parsing" initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool CNavDelegate::WillTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just after a stream
|
||||
* was parsed. This method is called so that the delegate
|
||||
* can do any "post-parsing" cleanup.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool CNavDelegate::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the selftest method for the delegate class.
|
||||
* Unfortunately, there's not much you can do with this
|
||||
* class alone, so we do the selftesting as part of the
|
||||
* parser class.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
*/
|
||||
void CNavDelegate::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,222 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This class is used as the HTML tokenizer delegate.
|
||||
*
|
||||
* The tokenzier class has the smarts to open an source,
|
||||
* and iterate over its characters to produce a list of
|
||||
* tokens. The tokenizer doesn't know HTML, which is
|
||||
* where this delegate comes into play.
|
||||
*
|
||||
* The tokenizer calls methods on this class to help
|
||||
* with the creation of HTML-specific tokens from a source
|
||||
* stream.
|
||||
*
|
||||
* The interface here is very simple, mainly the call
|
||||
* to GetToken(), which Consumes bytes from the underlying
|
||||
* scanner.stream, and produces an HTML specific CToken.
|
||||
*/
|
||||
|
||||
#ifndef _NAV_DELEGATE
|
||||
#define _NAV_DELEGATE
|
||||
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
#include "nsDeque.h"
|
||||
#include "nsIDTD.h"
|
||||
|
||||
class CNavDelegate : public ITokenizerDelegate {
|
||||
public:
|
||||
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
CNavDelegate();
|
||||
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
CNavDelegate(CNavDelegate& aDelegate);
|
||||
|
||||
/**
|
||||
* Consume next token from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 GetToken(CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Ask if its ok to add this token
|
||||
* @update gess 5/11/98
|
||||
* @param aToken is the token to be added
|
||||
* @return True if ok to add the given token
|
||||
*/
|
||||
virtual PRBool WillAddToken(CToken& aToken);
|
||||
|
||||
/**
|
||||
* Called as a preprocess -- tells delegate that tokenization will begin
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization is incremental
|
||||
* @return TRUE if ok to continue -- FALSE if process should stop
|
||||
*/
|
||||
virtual PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* Postprocess -- called to say that tokenization has concluded
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization was incremental
|
||||
* @return TRUE if all went well--FALSE if you encountered an error
|
||||
*/
|
||||
virtual PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* DEPRECATED. Tells us what mode the delegate is operating in.
|
||||
* @update gess 5/11/98
|
||||
* @return parse mode
|
||||
*/
|
||||
virtual eParseMode GetParseMode(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the DTD required by this delegate
|
||||
* (The parser will call this prior to tokenization)
|
||||
* @update gess 5/11/98
|
||||
* @return ptr to DTD -- should NOT be null.
|
||||
*/
|
||||
virtual nsIDTD* GetDTD(void) const;
|
||||
|
||||
/**
|
||||
* Conduct self test.
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Called to cause delegate to create a token of given type.
|
||||
* @update gess 5/11/98
|
||||
* @param aType represents the kind of token you want to create.
|
||||
* @return new token or NULL
|
||||
*/
|
||||
virtual CToken* CreateTokenOfType(eHTMLTokenTypes aType);
|
||||
|
||||
/**
|
||||
* Retrieve the next TAG from the given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve next START tag from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve collection of HTML/XML attributes from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a sequence of text from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aString will contain retrieved text.
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve an entity from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a whitespace sequence from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a comment from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve newlines from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Causes content to be skipped up to sequence contained in aString.
|
||||
* @update gess 5/11/98
|
||||
* @param aString ????
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,449 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include "COtherDelegate.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "CNavDTD.h"
|
||||
|
||||
|
||||
// Note: We already handle the following special case conditions:
|
||||
// 1) If you see </>, simply treat it as a bad tag.
|
||||
// 2) If you see </ ...>, treat it like a comment.
|
||||
// 3) If you see <> or <_ (< space) simply treat it as text.
|
||||
// 4) If you see <~ (< followed by non-alpha), treat it as text.
|
||||
|
||||
static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
|
||||
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
COtherDelegate::COtherDelegate() :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @updated gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
COtherDelegate::COtherDelegate(COtherDelegate& aDelegate) :
|
||||
ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
eParseMode COtherDelegate::GetParseMode(void) const {
|
||||
return eParseMode_unknown;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cause delegate to create and return a new DTD.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @return new DTD or null
|
||||
*/
|
||||
nsIDTD* COtherDelegate::GetDTD(void) const{
|
||||
return new CNavDTD();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of some kind of tagged
|
||||
* element. We don't know yet if it's a tag or a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
|
||||
nsAutoString empty("");
|
||||
PRInt32 result=aScanner.GetChar(aChar);
|
||||
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
|
||||
if(0!=aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after we've consumed a start
|
||||
* tag, and we now have to consume its attributes.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @return
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRBool done=PR_FALSE;
|
||||
nsAutoString as("");
|
||||
PRInt32 result=kNoError;
|
||||
while((!done) && (result==kNoError)) {
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result= theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a special case method. It's job is to consume
|
||||
* all of the given tag up to an including the end tag.
|
||||
*
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
|
||||
//In the case that we just read the given tag, we should go and
|
||||
//consume all the input until we find a matching end tag.
|
||||
|
||||
nsAutoString endTag("</");
|
||||
endTag.Append(aString);
|
||||
endTag.Append(">");
|
||||
aToken=new CSkippedContentToken(endTag);
|
||||
PRInt32 result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of a tag.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken=new CStartToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner,aToken);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
nsString& str=aToken->GetText();
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,aToken);
|
||||
|
||||
if(aToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=aToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(aToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
} //if
|
||||
} //if
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "&" has been consumed
|
||||
* and we know we're at the start of an entity.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRUnichar ch;
|
||||
PRInt32 result=aScanner.GetChar(ch);
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(ch,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after whitespace has been
|
||||
* consumed and we know we're at the start a whitespace run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken = new CWhitespaceToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<!" has been consumed
|
||||
* and we know we're at the start of a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken = new CCommentToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a known text char has
|
||||
* been consumed and we should read a text run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken){
|
||||
aToken=new CTextToken(aString);
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
PRUnichar ch;
|
||||
result=aToken->Consume(ch,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a newline has been consumed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken=new CNewlineToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method repeatedly called by the tokenizer.
|
||||
* Each time, we determine the kind of token were about to
|
||||
* read, and then we call the appropriate method to handle
|
||||
* that token type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 COtherDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
|
||||
PRInt32 result=kNoError;
|
||||
PRUnichar aChar;
|
||||
|
||||
if(mTokenDeque.GetSize()>0) {
|
||||
aToken=(CToken*)mTokenDeque.Pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
while(!aScanner.Eof()) {
|
||||
result=aScanner.GetChar(aChar);
|
||||
switch(aChar) {
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
case kNotFound:
|
||||
break;
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
} //switch
|
||||
if(result==kEOF)
|
||||
result=0;
|
||||
} //while
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CToken* COtherDelegate::CreateTokenOfType(eHTMLTokenTypes aType) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is by the tokenizer, once for each new token
|
||||
* we've constructed. This method determines whether or not
|
||||
* the new token (argument) should be accepted as a valid
|
||||
* token. If so, the token is added to the deque of tokens
|
||||
* contained within the tokenzier. If no, the token is
|
||||
* ignored.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aToken: token to be tested for acceptance
|
||||
* @return TRUE if token should be accepted.
|
||||
*/
|
||||
PRBool COtherDelegate::WillAddToken(CToken& /*aToken*/) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just before a stream
|
||||
* is parsed. This method is called so that the delegate
|
||||
* can do any "pre-parsing" initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool COtherDelegate::WillTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called by the parser, just after a stream
|
||||
* was parsed. This method is called so that the delegate
|
||||
* can do any "post-parsing" cleanup.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if preinitialization completed successfully
|
||||
*/
|
||||
PRBool COtherDelegate::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=PR_TRUE;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the selftest method for the delegate class.
|
||||
* Unfortunately, there's not much you can do with this
|
||||
* class alone, so we do the selftesting as part of the
|
||||
* parser class.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
*/
|
||||
void COtherDelegate::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,222 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This class is used as the HTML tokenizer delegate.
|
||||
*
|
||||
* The tokenzier class has the smarts to open an source,
|
||||
* and iterate over its characters to produce a list of
|
||||
* tokens. The tokenizer doesn't know HTML, which is
|
||||
* where this delegate comes into play.
|
||||
*
|
||||
* The tokenizer calls methods on this class to help
|
||||
* with the creation of HTML-specific tokens from a source
|
||||
* stream.
|
||||
*
|
||||
* The interface here is very simple, mainly the call
|
||||
* to GetToken(), which Consumes bytes from the underlying
|
||||
* scanner.stream, and produces an HTML specific CToken.
|
||||
*/
|
||||
|
||||
#ifndef _OTHER_DELEGATE
|
||||
#define _OTHER_DELEGATE
|
||||
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
#include "nsDeque.h"
|
||||
#include "nsIDTD.h"
|
||||
|
||||
class COtherDelegate : public ITokenizerDelegate {
|
||||
public:
|
||||
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
COtherDelegate();
|
||||
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
COtherDelegate(COtherDelegate& aDelegate);
|
||||
|
||||
/**
|
||||
* Consume next token from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 GetToken(CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Ask if its ok to add this token
|
||||
* @update gess 5/11/98
|
||||
* @param aToken is the token to be added
|
||||
* @return True if ok to add the given token
|
||||
*/
|
||||
virtual PRBool WillAddToken(CToken& aToken);
|
||||
|
||||
/**
|
||||
* Called as a preprocess -- tells delegate that tokenization will begin
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization is incremental
|
||||
* @return TRUE if ok to continue -- FALSE if process should stop
|
||||
*/
|
||||
virtual PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* Postprocess -- called to say that tokenization has concluded
|
||||
* @update gess 5/11/98
|
||||
* @param aIncremental tells us if tokenization was incremental
|
||||
* @return TRUE if all went well--FALSE if you encountered an error
|
||||
*/
|
||||
virtual PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* DEPRECATED. Tells us what mode the delegate is operating in.
|
||||
* @update gess 5/11/98
|
||||
* @return parse mode
|
||||
*/
|
||||
virtual eParseMode GetParseMode(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the DTD required by this delegate
|
||||
* (The parser will call this prior to tokenization)
|
||||
* @update gess 5/11/98
|
||||
* @return ptr to DTD -- should NOT be null.
|
||||
*/
|
||||
virtual nsIDTD* GetDTD(void) const;
|
||||
|
||||
/**
|
||||
* Conduct self test.
|
||||
* @update gess 5/11/98
|
||||
*/
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Called to cause delegate to create a token of given type.
|
||||
* @update gess 5/11/98
|
||||
* @param aType represents the kind of token you want to create.
|
||||
* @return new token or NULL
|
||||
*/
|
||||
virtual CToken* CreateTokenOfType(eHTMLTokenTypes aType);
|
||||
|
||||
/**
|
||||
* Retrieve the next TAG from the given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve next START tag from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve collection of HTML/XML attributes from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a sequence of text from given scanner.
|
||||
* @update gess 5/11/98
|
||||
* @param aString will contain retrieved text.
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve an entity from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a whitespace sequence from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve a comment from the given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve newlines from given scanner
|
||||
* @update gess 5/11/98
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Causes content to be skipped up to sequence contained in aString.
|
||||
* @update gess 5/11/98
|
||||
* @param aString ????
|
||||
* @param aChar last char read from scanner
|
||||
* @param aScanner is the input source
|
||||
* @param aToken is the next token (or null)
|
||||
* @return error code
|
||||
*/
|
||||
virtual PRInt32 ConsumeContentToEndTag(const nsString& aString,PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
|
||||
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/1/98
|
||||
*
|
||||
* This virtual base class is used to define the basic
|
||||
* tokenizer delegate interface. As you can see, it is
|
||||
* very simple.
|
||||
*
|
||||
* The only routines we use at this point are getToken()
|
||||
* and willAddToken(). While getToken() is obvious,
|
||||
* willAddToken() may not be. The purpose of the method
|
||||
* is to allow the delegate to decide whether or not a
|
||||
* given token that was just read in the tokenization process
|
||||
* should be included in the total list of tokens. This
|
||||
* method gives you a chance to say, "No, ignore this token".
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ITOKENIZERDELEGATE
|
||||
#define ITOKENIZERDELEGATE
|
||||
|
||||
#include "prtypes.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "nsIDTD.h"
|
||||
|
||||
class CScanner;
|
||||
class CToken;
|
||||
|
||||
class ITokenizerDelegate {
|
||||
public:
|
||||
|
||||
virtual PRBool WillTokenize(PRBool aIncremental)=0;
|
||||
virtual PRBool DidTokenize(PRBool aIncremental)=0;
|
||||
|
||||
virtual PRInt32 GetToken(CScanner& aScanner,CToken*& aToken)=0;
|
||||
virtual PRBool WillAddToken(CToken& aToken)=0;
|
||||
|
||||
virtual eParseMode GetParseMode(void) const=0;
|
||||
virtual nsIDTD* GetDTD(void) const=0;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -71,7 +71,7 @@ class CToken {
|
|||
* destructor
|
||||
* @update gess5/11/98
|
||||
*/
|
||||
~CToken();
|
||||
virtual ~CToken();
|
||||
|
||||
/**
|
||||
* Retrieve string value of the token
|
||||
|
|
|
@ -1,314 +0,0 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
|
||||
#include <fstream.h>
|
||||
#include "nsTokenizer.h"
|
||||
#include "nsToken.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsIURL.h"
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aURL,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aFilename,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::~CTokenizer() {
|
||||
delete mScanner;
|
||||
delete mDelegate;
|
||||
mScanner=0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(nsString& aBuffer) {
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/21/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer,aLen);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
nsDeque& CTokenizer::GetDeque(void) {
|
||||
return mTokenDeque;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CTokenizer::GetToken(CToken*& aToken) {
|
||||
PRInt32 result=mDelegate->GetToken(*mScanner,aToken);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the number of elements in the deque
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return int containing element count
|
||||
*/
|
||||
PRInt32 CTokenizer::GetSize(void) {
|
||||
return mTokenDeque.GetSize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Part of the code sandwich, this gets called right before
|
||||
* the tokenization process begins. The main reason for
|
||||
* this call is to allow the delegate to do initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRBool CTokenizer::WillTokenize(PRBool aIncremental){
|
||||
PRBool result=PR_TRUE;
|
||||
result=mDelegate->WillTokenize(aIncremental);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
WillTokenize(PR_TRUE);
|
||||
|
||||
while(kNoError==result) {
|
||||
result=GetToken(theToken);
|
||||
if(theToken && (kNoError==result)) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(kEOF==result)
|
||||
result=kNoError;
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(int anIteration) {
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
|
||||
|
||||
|
||||
while((PR_FALSE==done) && (kNoError==result)) {
|
||||
mScanner->Mark();
|
||||
result=GetToken(theToken);
|
||||
if(kNoError==result) {
|
||||
if(theToken) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
if(theToken)
|
||||
delete theToken;
|
||||
mScanner->RewindToMark();
|
||||
}
|
||||
}
|
||||
if((PR_TRUE==done) && (kInterrupted!=result))
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool CTokenizer::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=mDelegate->DidTokenize(aIncremental);
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
DebugDumpTokens(cout);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpTokens(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpToken(out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpSource(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpSource(out);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче