table bug fixes, heading fixes, performance, docs

This commit is contained in:
rickg%netscape.com 1998-07-23 08:21:02 +00:00
Родитель be692e043a
Коммит bc0982b1b2
36 изменённых файлов: 1470 добавлений и 756 удалений

Просмотреть файл

@ -16,20 +16,6 @@
* Reserved.
*/
/**
* MODULE NOTES:
* @update gess 4/8/98
*
*
*/
/**
* TRANSIENT STYLE-HANDLING NOTES:
* @update gess 6/15/98
*
* ...add comments here about transient style stack.
*
*/
#include "nsIDTDDebug.h"
#include "CNavDTD.h"
@ -1451,6 +1437,15 @@ PRBool CNavDTD::CanOmit(eHTMLTags aParent,eHTMLTags aChild) const {
} //switch
break;
//this code prevents table container elements from
//opening unless a table is actually already opened.
case eHTMLTag_tr: case eHTMLTag_thead:
case eHTMLTag_tfoot: case eHTMLTag_tbody:
case eHTMLTag_td:
if(PR_FALSE==HasOpenContainer(eHTMLTag_table))
result=PR_TRUE;
break;
case eHTMLTag_entity:
switch((eHTMLTags)aParent) {
case eHTMLTag_tr: case eHTMLTag_table:
@ -1472,17 +1467,23 @@ PRBool CNavDTD::CanOmit(eHTMLTags aParent,eHTMLTags aChild) const {
/**
*
* This method is called when you want to determine if one tag is
* synonymous with another. Cases where this are true include style
* tags (where <i> is allowed to close <b> for example). Another
* is <H?>, where any open heading tag can be closed by any close heading tag.
* @update gess6/16/98
* @param
* @return
*/
PRBool IsCompatibleStyleTag(eHTMLTags aTag1,eHTMLTags aTag2) {
PRBool IsCompatibleTag(eHTMLTags aTag1,eHTMLTags aTag2) {
PRBool result=PR_FALSE;
if(0!=strchr(gStyleTags,aTag1)) {
result=PRBool(0!=strchr(gStyleTags,aTag2));
}
if(0!=strchr(gHeadingTags,aTag1)) {
result=PRBool(0!=strchr(gHeadingTags,aTag2));
}
return result;
}
@ -1521,10 +1522,22 @@ PRBool CNavDTD::CanOmitEndTag(eHTMLTags aParent,eHTMLTags aChild) const {
} //switch
break;
//It turns out that a <Hn> can be closed by any other <H?>
//This code makes them all seem compatible.
case eHTMLTag_h1: case eHTMLTag_h2:
case eHTMLTag_h3: case eHTMLTag_h4:
case eHTMLTag_h5: case eHTMLTag_h6:
if(0!=strchr(gHeadingTags,aParent)) {
result=PR_FALSE;
break;
}
//Otherwise, IT's OK TO FALL THROUGH HERE...
default:
if(IsGatedFromClosing(aChild))
result=PR_TRUE;
else if(IsCompatibleStyleTag(aChild,GetTopNode()))
else if(IsCompatibleTag(aChild,GetTopNode()))
result=PR_FALSE;
else result=(!HasOpenContainer(aChild));
break;
@ -1883,7 +1896,7 @@ nsresult CNavDTD::OpenTransientStyles(eHTMLTags aTag){
eHTMLTags theTag=mStyleStack.mTags[pos];
if(PR_FALSE==HasOpenContainer(theTag)) {
CStartToken token(GetTagName(theTag));
CStartToken token(theTag);
nsCParserNode theNode(&token);
switch(theTag) {
@ -2304,7 +2317,7 @@ nsresult CNavDTD::CloseContainersTo(eHTMLTags aTag,PRBool aUpdateStyles){
}
eHTMLTags theTopTag=GetTopNode();
if(IsCompatibleStyleTag(aTag,theTopTag)) {
if(IsCompatibleTag(aTag,theTopTag)) {
//if you're here, it's because we're trying to close one style tag,
//but a different one is actually open. Because this is NAV4x
//compatibililty mode, we must close the one that's really open.
@ -2570,7 +2583,6 @@ CNavDTD::UpdateStyleStackForCloseTag(eHTMLTags aTag,eHTMLTags anActualTag){
nsresult
CNavDTD::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
nsAutoString empty("");
nsresult result=aScanner.GetChar(aChar);
if(NS_OK==result) {
@ -2581,12 +2593,12 @@ CNavDTD::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
result=aScanner.Peek(ch);
if(NS_OK==result) {
if(nsString::IsAlpha(ch))
aToken=new CEndToken(empty);
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
aToken=new CEndToken(eHTMLTag_unknown);
else aToken=new CCommentToken(); //Special case: </ ...> is treated as a comment
}//if
break;
case kExclamation:
aToken=new CCommentToken(empty);
aToken=new CCommentToken();
break;
default:
if(nsString::IsAlpha(aChar))
@ -2621,11 +2633,10 @@ nsresult
CNavDTD::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken) {
PRBool done=PR_FALSE;
nsresult result=NS_OK;
nsAutoString as("");
PRInt16 theAttrCount=0;
while((!done) && (result==NS_OK)) {
CAttributeToken* theToken= new CAttributeToken(as);
CAttributeToken* theToken= new CAttributeToken();
if(theToken){
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
@ -2635,7 +2646,7 @@ CNavDTD::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToke
//and a textkey of "/". We should destroy it, and tell the
//start token it was empty.
nsString& key=theToken->GetKey();
nsString& text=theToken->GetText();
nsString& text=theToken->GetStringValueXXX();
if((key[0]==kForwardSlash) && (0==text.Length())){
//tada! our special case! Treat it like an empty start tag...
aToken->SetEmpty(PR_TRUE);
@ -2696,12 +2707,15 @@ CNavDTD::ConsumeContentToEndTag(const nsString& aString,
* @param anErrorCode: arg that will hold error condition
* @return new token or null
*/
static char gSpecialTags[]={ eHTMLTag_script, eHTMLTag_style, eHTMLTag_title, eHTMLTag_textarea, 0};
nsresult
CNavDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
PRInt32 theDequeSize=mTokenDeque.GetSize();
nsresult result=NS_OK;
aToken=new CStartToken(nsAutoString(""));
aToken=new CStartToken(eHTMLTag_unknown);
if(aToken) {
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
@ -2713,19 +2727,20 @@ CNavDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
//consume all the content itself.
if(NS_OK==result) {
nsString& str=aToken->GetText();
CToken* skippedToken=0;
if(str.EqualsIgnoreCase("SCRIPT") ||
str.EqualsIgnoreCase("STYLE") ||
str.EqualsIgnoreCase("TITLE") ||
str.EqualsIgnoreCase("TEXTAREA")) {
eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID();
if(0!=strchr(gSpecialTags,theTag)){
//Do special case handling for <script>, <style>, <title> or <textarea>...
CToken* skippedToken=0;
nsString& str=aToken->GetStringValueXXX();
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
if((NS_OK==result) && skippedToken){
//now we strip the ending sequence from our new SkippedContent token...
PRInt32 slen=str.Length()+3;
nsString& skippedText=skippedToken->GetText();
nsString& skippedText=skippedToken->GetStringValueXXX();
skippedText.Cut(skippedText.Length()-slen,slen);
mTokenDeque.Push(skippedToken);
@ -2733,7 +2748,7 @@ CNavDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
//In the case that we just read a given tag, we should go and
//consume all the tag content itself (and throw it all away).
CEndToken* endtoken=new CEndToken(str);
CEndToken* endtoken=new CEndToken(theTag);
mTokenDeque.Push(endtoken);
} //if
} //if
@ -2774,11 +2789,11 @@ CNavDTD::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
if(NS_OK==result) {
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or &#000.
aToken = new CEntityToken(nsAutoString(""));
aToken = new CEntityToken();
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
}
else if(kHashsign==ch) {
aToken = new CEntityToken(nsAutoString(""));
aToken = new CEntityToken();
result=aToken->Consume(0,aScanner);
}
else {
@ -2805,7 +2820,7 @@ nsresult
CNavDTD::ConsumeWhitespace(PRUnichar aChar,
CScanner& aScanner,
CToken*& aToken) {
aToken = new CWhitespaceToken(nsAutoString(""));
aToken = new CWhitespaceToken();
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);
@ -2825,7 +2840,7 @@ CNavDTD::ConsumeWhitespace(PRUnichar aChar,
*/
nsresult
CNavDTD::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
aToken = new CCommentToken(nsAutoString(""));
aToken = new CCommentToken();
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);
@ -2868,7 +2883,7 @@ nsresult
CNavDTD::ConsumeNewline(PRUnichar aChar,
CScanner& aScanner,
CToken*& aToken){
aToken=new CNewlineToken(nsAutoString(""));
aToken=new CNewlineToken();
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);

Просмотреть файл

@ -19,11 +19,62 @@
/**
* MODULE NOTES:
* @update gess 4/8/98
* @update gess 7/15/98
*
* NavDTD is an implementation of the nsIDTD interface.
* In particular, this class captures the behaviors of the original
* Navigator parser productions.
*
* This DTD, like any other in NGLayout, provides a few basic services:
* - First, the DTD collaborates with the Parser class to convert plain
* text into a sequence of HTMLTokens.
* - Second, the DTD describes containment rules for known elements.
* - Third the DTD controls and coordinates the interaction between the
* parsing system and content sink. (The content sink is the interface
* that serves as a proxy for content model).
* - Fourth the DTD maintains an internal style-stack to handle residual (leaky)
* style tags.
*
* You're most likely working in this class file because
* you want to add or change a behavior inherent in this DTD. The remainder
* of this section will describe what you need to do to affect the kind of
* change you want in this DTD.
*
* RESIDUAL-STYLE HANDLNG:
* There are a number of ways to represent style in an HTML document.
* 1) explicit style tags (<B>, <I> etc)
* 2) implicit styles (like those implicit in <Hn>)
* 3) CSS based styles
*
* Residual style handling results from explicit style tags that are
* not closed. Consider this example: <p>text <b>bold </p>
* When the <p> tag closes, the <b> tag is NOT automatically closed.
* Unclosed style tags are handled by the process we call residual-style
* tag handling.
*
* There are two aspects to residual style tag handling. The first is the
* construction and managing of a stack of residual style tags. The
* second is the automatic emission of residual style tags onto leaf content
* in subsequent portions of the document.This step is necessary to propagate
* the expected style behavior to subsequent portions of the document.
*
* Construction and managing the residual style stack is an inline process that
* occurs during the model building phase of the parse process. During the model-
* building phase of the parse process, a content stack is maintained which tracks
* the open container hierarchy. If a style tag(s) fails to be closed when a normal
* container is closed, that style tag is placed onto the residual style stack. If
* that style tag is subsequently closed (in most contexts), it is popped off the
* residual style stack -- and are of no further concern.
*
* Residual style tag emission occurs when the style stack is not empty, and leaf
* content occurs. In our earlier example, the <b> tag "leaked" out of the <p>
* container. Just before the next leaf is emitted (in this or another container) the
* style tags that are on the stack are emitted in succession. These same residual
* style tags get closed automatically when the leaf's container closes, or if a
* child container is opened.
*
*
*/
#ifndef NS_NAVHTMLDTD__
#define NS_NAVHTMLDTD__
@ -49,9 +100,10 @@ class nsParser;
/***************************************************************
First define a helper class called CTagStack.
Before digging into the NavDTD, we'll define a helper
class called CTagStack.
Simple, we've built ourselves a little data structure that
Simply put, we've built ourselves a little data structure that
serves as a stack for htmltags (and associated bits).
What's special is that if you #define _dynstack 1, the stack
size can grow dynamically (like you'ld want in a release build.)
@ -105,20 +157,16 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
public:
/**
* Common constructor for navdtd. You probably want to call
* NS_NewNavHTMLDTD().
*
*
* @update gess 4/9/98
* @param
* @return
* @update gess 7/9/98
*/
CNavDTD();
/**
*
*
* @update gess 4/9/98
* @param
* @return
* Virtual destructor -- you know what to do
* @update gess 7/9/98
*/
virtual ~CNavDTD();
@ -126,49 +174,59 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* This method is called to determine if the given DTD can parse
* a document in a given source-type.
* NOTE: Parsing always assumes that the end result will involve
* storing the result in the main content model.
* @update gess6/24/98
* @param
* @return TRUE if this DTD can satisfy the request; FALSE otherwise.
* a document of a given source-type.
* Note that parsing assumes that the end result will always be stored
* in the main content model. Of course, it's up to you which content-
* model you pass in to the parser, so you can always control the process.
*
* @update gess 7/15/98
* @param aContentType contains the name of a filetype that you are
* being asked to parse).
* @return TRUE if this DTD parse the given type; FALSE otherwise.
*/
virtual PRBool CanParse(nsString& aContentType, PRInt32 aVersion);
/**
*
* This method gets called to determine if the DTD can determine the
* kind of data contained in the given buffer string. If you know the
* type, the you should enter its stringname aType.
* @update gess7/7/98
* @param
* @return
* @param aBuffer contains data to be examined for autodetection.
* @param aType will contain a typename you specify.
* @return unknown, valid (if you know the type), invalid (if you dont)
*/
virtual eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType);
/**
*
* Sets a debugger into the DTD to help up debug the process.
* @update jevering6/23/98
* @param
* @return
* @param aDTDDedug is a ptr to the debug object you want us to use
*/
virtual void SetDTDDebug(nsIDTDDebug * aDTDDebug);
/**
*
* The parser uses a code sandwich to wrap the parsing process. Before
* the process begins, WillBuildModel() is called. Afterwards the parser
* calls DidBuildModel().
* @update gess5/18/98
* @param
* @return
* @param aFilename is the name of the file being parsed.
* @return error code (almost always 0)
*/
NS_IMETHOD WillBuildModel(nsString& aFilename);
/**
*
* The parser uses a code sandwich to wrap the parsing process. Before
* the process begins, WillBuildModel() is called. Afterwards the parser
* calls DidBuildModel().
* @update gess5/18/98
* @param
* @return
* @param anErrorCode contans the last error that occured
* @return error code
*/
NS_IMETHOD DidBuildModel(PRInt32 anErrorCode);
/**
*
* This method is called by the parser, once for each token
* that has been constructed during the tokenization phase.
* @update gess 3/25/98
* @param aToken -- token object to be put into content model
* @return 0 if all is well; non-zero is an error
@ -176,11 +234,13 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
NS_IMETHOD HandleToken(CToken* aToken);
/**
*
* Set parser is called to notify the DTD which parser is driving
* the DTD. This is needed by the DTD later, for various parser
* callback methods.
*
* @update gess 3/25/98
* @param
* @return
* @param aParser pts to the controlling parser
* @return nada.
*/
virtual void SetParser(nsIParser* aParser);
@ -196,21 +256,22 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
*
* If the parse process gets interrupted, this method gets called
* prior to the process resuming.
* @update gess5/18/98
* @param
* @return
* @return error code -- usually kNoError (0)
*/
NS_IMETHOD WillResumeParse(void);
/**
*
* If the parse process is about to be interrupted, this method
* will be called just prior.
* @update gess5/18/98
* @param
* @return
* @return error code -- usually kNoError (0)
*/
NS_IMETHOD WillInterruptParse(void);
/**
* Select given content sink into parser for parser output
* @update gess5/11/98
@ -231,8 +292,8 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
/**
* This method is called to determine whether or not a tag
* of one type can contain a tag of another type.
* This method is called to determine whether a tag
* of one of its children can contain a given child tag.
*
* @update gess 3/25/98
* @param aParent -- tag enum of parent container
@ -243,22 +304,23 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* This method gets called to determine whether a given
* tag can contain newlines. Most do not.
* child tag can be omitted by the given parent.
*
* @update gess 3/25/98
* @param aTag -- tag to test for containership
* @return PR_TRUE if given tag can contain other tags
* @param aParent -- parent tag being asked about omitting given child
* @param aChild -- child tag being tested for omittability by parent
* @return PR_TRUE if given tag can be omitted
*/
virtual PRBool CanOmit(eHTMLTags aParent,eHTMLTags aChild)const;
/**
* This method gets called to determine whether a given
* tag can contain newlines. Most do not.
* This is called to determine if the given parent can omit the
* given child (end tag).
*
* @update gess 3/25/98
* @param aParent -- tag type of parent
* @param aChild -- tag type of child
* @return PR_TRUE if given tag can contain other tags
* @return PR_TRUE if given tag can contain omit child (end tag)
*/
virtual PRBool CanOmitEndTag(eHTMLTags aParent,eHTMLTags aChild)const;
@ -273,12 +335,13 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
virtual PRBool IsContainer(eHTMLTags aTags) const;
/**
* This method does two things: 1st, help construct
* our own internal model of the content-stack; and
* 2nd, pass this message on to the sink.
* @update gess4/6/98
* @param aNode -- next node to be added to model
* @return TRUE if ok, FALSE if error
* Call this if you want the DTD to give you a default
* Parent tag for given child tag. This is needed in cases
* such as propagation.
*
* @update gess 7/6/98
* @param aTag -- child to determine dflt parent tag for
* @return enum of parent tag -- potentially eHTMLTag_unknown
*/
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
@ -315,7 +378,9 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* This method is used to determine the index on the stack of the
* nearest container tag that can constrain autoclosure.
* nearest container tag that can constrain autoclosure. It is possible
* that no tag on the stack will gate autoclosure.
*
* @update gess 7/15/98
* @param id of tag you want to test for
* @return index of gating tag on context stack. kNotFound otherwise
@ -324,7 +389,9 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* Retrieve the tag type of the topmost item on context vector stack
* Accessor that retrieves the tag type of the topmost item on context
* vector stack.
*
* @update gess5/11/98
* @return tag type (may be unknown)
*/
@ -350,8 +417,8 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
* The following set of methods are used to partially construct
* the content model (via the sink) according to the type of token.
* @update gess5/11/98
* @param aToken is the start token to be handled
* @return TRUE if the token was handled.
* @param aToken is the token (of a given type) to be handled
* @return error code representing construction state; usually 0.
*/
nsresult HandleStartToken(CToken* aToken);
nsresult HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsIParserNode& aNode);
@ -383,11 +450,12 @@ protected:
//*************************************************
/**
* The next set of method open given HTML element.
* The next set of method open given HTML elements of
* various types.
*
* @update gess5/11/98
* @param HTML (node) to be opened in content sink.
* @return TRUE if all went well.
* @param node to be opened in content sink.
* @return error code representing error condition-- usually 0.
*/
nsresult OpenHTML(const nsIParserNode& aNode);
nsresult OpenHead(const nsIParserNode& aNode);
@ -402,7 +470,7 @@ protected:
*
* @update gess5/11/98
* @param HTML (node) to be opened in content sink.
* @return TRUE if all went well.
* @return error code - 0 if all went well.
*/
nsresult CloseHTML(const nsIParserNode& aNode);
nsresult CloseHead(const nsIParserNode& aNode);
@ -416,7 +484,7 @@ protected:
* The special purpose methods automatically close
* one or more open containers.
* @update gess5/11/98
* @return TRUE if all went well.
* @return error code - 0 if all went well.
*/
nsresult CloseTopmostContainer();
nsresult CloseContainersTo(eHTMLTags aTag,PRBool aUpdateStyles);
@ -426,7 +494,7 @@ protected:
* Causes leaf to be added to sink at current vector pos.
* @update gess5/11/98
* @param aNode is leaf node to be added.
* @return TRUE if all went well -- FALSE otherwise.
* @return error code - 0 if all went well.
*/
nsresult AddLeaf(const nsIParserNode& aNode);
@ -436,7 +504,7 @@ protected:
* a fall out.
* @update gess5/11/98
* @param child to be added (somewhere) to context vector stack.
* @return TRUE if succeeds, otherwise FALSE
* @return error code - 0 if all went well.
*/
nsresult ReduceContextStackFor(eHTMLTags aChildTag);
@ -449,6 +517,15 @@ protected:
*/
nsresult CreateContextStackFor(eHTMLTags aChildTag);
/**
* This set of methods is used to create and manage the set of
* transient styles that occur as a result of poorly formed HTML
* or bugs in the original navigator.
*
* @update gess5/11/98
* @param aTag -- represents the transient style tag to be handled.
* @return error code -- usually 0
*/
nsresult OpenTransientStyles(eHTMLTags aTag);
nsresult CloseTransientStyles(eHTMLTags aTag);
nsresult UpdateStyleStackForOpenTag(eHTMLTags aTag,eHTMLTags aActualTag);

Просмотреть файл

@ -16,12 +16,6 @@
* Reserved.
*/
/**
* MODULE NOTES:
* @update gess 4/1/98
*
*/
#include "CParserContext.h"
#include "nsToken.h"
@ -39,19 +33,18 @@ public:
CTokenDeallocator gTokenDeallocator;
CParserContext::CParserContext(CScanner* aScanner,
CParserContext* aPreviousContext,
nsIStreamObserver* aListener) :
CParserContext::CParserContext(CScanner* aScanner,void* aKey,nsIStreamObserver* aListener) :
mSourceType(),
mTokenDeque(gTokenDeallocator)
{
mScanner=aScanner;
mPrevContext=aPreviousContext;
mKey=aKey;
mPrevContext=0;
mListener=aListener;
NS_IF_ADDREF(mListener);
mParseMode=eParseMode_unknown;
mAutoDetectStatus=eUnknownDetect;
mTransferBuffer=new char[eTransferBufferSize+1];
mTransferBuffer=0;
mCurrentPos=0;
mMarkPos=0;
mDTD=0;

Просмотреть файл

@ -46,7 +46,7 @@ public:
enum {eTransferBufferSize=4096};
CParserContext( CScanner* aScanner,
CParserContext* aPreviousContext=0,
void* aKey=0,
nsIStreamObserver* aListener=0);
@ -67,6 +67,7 @@ public:
nsIStreamObserver* mListener;
CParserContext* mPrevContext;
void* mKey;
};

Просмотреть файл

@ -143,11 +143,11 @@ nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
/**
*
* @update gess7/7/98
* @update gess7/22/98
* @param
* @return
*/
void nsHTMLContentSinkStream::SetOutputStream(ostream& aStream) {
void nsHTMLContentSinkStream::SetOutputStream(ostream& aStream){
mOutput=&aStream;
}

Просмотреть файл

@ -19,18 +19,21 @@
/**
* MODULE NOTES:
* @update gpk 7/12/98
* @update gess 7/20/98
*
* This file declares the concrete HTMLContentSink class.
* This class is used during the parsing process as the
* primary interface between the parser and the content
* model.
* This content sink writes to a stream. If no stream
is declared in the constructor then all output goes
to cout.
The file is pretty printed according to the pretty
printing interface. subclasses may choose to override
this behavior or set runtime flags for desired
resutls.
* If you've been paying attention to our many content sink classes, you may be
* asking yourself, "why do we need yet another one?" The answer is that this
* implementation, unlike all the others, really sends its output a given stream
* rather than to an actual content sink (as defined in our HTML document system).
*
* We use this class for a number of purposes:
* 1) For actual document i/o using XIF (xml interchange format)
* 2) For document conversions
* 3) For debug purposes (to cause output to go to cout or a file)
*
* If no stream is declared in the constructor then all output goes to cout.
* The file is pretty printed according to the pretty printing interface. subclasses
* may choose to override this behavior or set runtime flags for desired results.
*/
#ifndef NS_HTMLCONTENTSINK_STREAM
@ -54,33 +57,34 @@ class nsHTMLContentSinkStream : public nsIHTMLContentSink {
public:
/**
*
* Standard constructor
* @update gess7/7/98
* @param
* @return
*/
nsHTMLContentSinkStream();
/**
*
* Constructor with associated stream. If you use this, it means that you want
* this class to emits its output to the stream you provide.
* @update gess7/7/98
* @param
* @return
* @param aStream -- ref to stream where you want output sent
*/
nsHTMLContentSinkStream(ostream& aStream);
/**
*
* virtual destructor
* @update gess7/7/98
* @param
* @return
*/
virtual ~nsHTMLContentSinkStream();
void SetOutputStream(ostream& aStream);
// nsISupports
NS_DECL_ISUPPORTS
// nsIContentSink
/*******************************************************************
* The following methods are inherited from nsIContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD WillBuildModel(void);
NS_IMETHOD DidBuildModel(PRInt32 aQualityLevel);
NS_IMETHOD WillInterrupt(void);
@ -89,7 +93,10 @@ class nsHTMLContentSinkStream : public nsIHTMLContentSink {
NS_IMETHOD CloseContainer(const nsIParserNode& aNode);
NS_IMETHOD AddLeaf(const nsIParserNode& aNode);
// nsIHTMLContentSink
/*******************************************************************
* The following methods are inherited from nsIHTMLContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD PushMark();
NS_IMETHOD SetTitle(const nsString& aValue);
NS_IMETHOD OpenHTML(const nsIParserNode& aNode);
@ -105,13 +112,6 @@ class nsHTMLContentSinkStream : public nsIHTMLContentSink {
NS_IMETHOD OpenFrameset(const nsIParserNode& aNode);
NS_IMETHOD CloseFrameset(const nsIParserNode& aNode);
/**
*
* @update gess7/7/98
* @param
* @return
*/
void SetOutputStream(ostream& aStream);
public:
void SetLowerCaseTags(PRBool aDoLowerCase) { mLowerCaseTags = aDoLowerCase; }

Просмотреть файл

@ -39,6 +39,7 @@ static nsAutoString gDigits("0123456789");
static nsAutoString gWhitespace(" \t\b");
static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:");
static const char* gUserdefined = "userdefined";
static const char* gEmpty = "";
const PRInt32 kMAXNAMELEN=10;
@ -111,23 +112,63 @@ CHTMLToken::CHTMLToken(const nsString& aName) : CToken(aName) {
* @param
* @return
*/
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(GetTagName(aTag)) {
mTypeID=aTag;
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
}
/**
* Setter method that changes the string value of this token
* @update gess5/11/98
* @param name is a char* value containing new string value
*/
void CHTMLToken::SetStringValue(const char* name){
mTextValue=name;
mStringInit=PR_TRUE;
}
/*
* default constructor
/**
* This method retrieves the value of this internal string.
*
* @update gess 3/25/98
* @param
* @return
* @return nsString reference to internal string value
*/
CStartToken::CStartToken(const nsString& aName) : CHTMLToken(aName) {
mAttributed=PR_FALSE;
mEmpty=PR_FALSE;
static nsAutoString gTagName;
nsString& CHTMLToken::GetStringValueXXX(void) {
if(!mStringInit) {
if((mTypeID>eHTMLTag_unknown) && (mTypeID<eHTMLTag_userdefined)) {
const char* str=GetTagName(mTypeID);
if(str)
gTagName=str;
else gTagName="";
return gTagName;
}
}
return mTextValue;
}
/**
* This method retrieves the value of this internal string
* as a cstring.
*
* @update gess 3/25/98
* @return char* rep of internal string value
*/
char* CHTMLToken::GetCStringValue(char* aBuffer, PRInt32 aMaxLen) {
if(!mStringInit) {
if((mTypeID>eHTMLTag_unknown) && (mTypeID<eHTMLTag_userdefined)) {
const char* str=GetTagName(mTypeID);
if(str)
strcpy(aBuffer,str);
else aBuffer[0]=0;
}
}
else mTextValue.ToCString(aBuffer,aMaxLen);
return aBuffer;
}
/*
* constructor from tag id
*
@ -141,7 +182,20 @@ CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}
/*
* default destructor
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CStartToken::CStartToken(nsString& aString) : CHTMLToken(aString) {
mAttributed=PR_FALSE;
mEmpty=PR_FALSE;
}
/*
* This method returns the typeid (the tag type) for this token.
*
* @update gess 3/25/98
* @param
@ -249,6 +303,14 @@ nsresult CStartToken::Consume(PRUnichar aChar, CScanner& aScanner) {
mTextValue=aChar;
nsresult result=aScanner.ReadWhile(mTextValue,gIdentChars,PR_FALSE);
char buffer[300];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
eHTMLTags theTag= NS_TagToEnum(buffer);
if((theTag>eHTMLTag_unknown) && (theTag<eHTMLTag_userdefined)) {
mTypeID=theTag;
}
else mStringInit=PR_TRUE;
//Good. Now, let's skip whitespace after the identifier,
//and see if the next char is ">". If so, we have a complete
@ -278,11 +340,22 @@ nsresult CStartToken::Consume(PRUnichar aChar, CScanner& aScanner) {
* @return
*/
void CStartToken::DebugDumpSource(ostream& out) {
char* cp=mTextValue.ToNewCString();
out << "<" << *cp;
char buffer[200];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << "<" << buffer;
if(!mAttributed)
out << ">";
delete cp;
}
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}
@ -294,7 +367,6 @@ void CStartToken::DebugDumpSource(ostream& out) {
* @return
*/
CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) {
mTypeID=eHTMLTag_unknown;
}
/*
@ -315,6 +387,15 @@ nsresult CEndToken::Consume(PRUnichar aChar, CScanner& aScanner) {
mTextValue="";
static nsAutoString terminals(">");
nsresult result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE);
char buffer[300];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
eHTMLTags theTag= NS_TagToEnum(buffer);
if((theTag>eHTMLTag_unknown) && (theTag<eHTMLTag_userdefined)) {
mTypeID=theTag;
}
else mStringInit=PR_TRUE;
if(NS_OK==result)
result=aScanner.GetChar(aChar); //eat the closing '>;
return result;
@ -334,7 +415,7 @@ PRInt32 CEndToken::GetTypeID(){
if(eHTMLTag_unknown==mTypeID) {
nsAutoString tmp(mTextValue);
tmp.ToUpperCase();
char cbuf[20];
char cbuf[200];
tmp.ToCString(cbuf, sizeof(cbuf));
mTypeID = NS_TagToEnum(cbuf);
switch(mTypeID) {
@ -379,14 +460,24 @@ PRInt32 CEndToken::GetTokenType(void) {
* @return
*/
void CEndToken::DebugDumpSource(ostream& out) {
char* cp=mTextValue.ToNewCString();
out << "</" << *cp << ">";
delete cp;
char buffer[200];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << "</" << buffer << ">";
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
}
/*
* Default constructor
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
@ -432,6 +523,16 @@ nsresult CTextToken::Consume(PRUnichar, CScanner& aScanner) {
return result;
};
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
}
/*
* Default constructor
@ -505,6 +606,17 @@ PRInt32 CCommentToken::GetTokenType(void) {
return eToken_comment;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
}
/*
* default constructor
*
@ -544,7 +656,7 @@ PRInt32 CNewlineToken::GetTokenType(void) {
* @update gess 3/25/98
* @return nsString reference to internal string value
*/
nsString& CNewlineToken::GetText(void) {
nsString& CNewlineToken::GetStringValueXXX(void) {
static nsAutoString theStr("\n");
return theStr;
}
@ -589,6 +701,16 @@ nsresult CNewlineToken::Consume(PRUnichar aChar, CScanner& aScanner) {
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
@ -640,11 +762,11 @@ PRInt32 CAttributeToken::GetTokenType(void) {
* @return
*/
void CAttributeToken::DebugDumpToken(ostream& out) {
char* cp=mTextKey.ToNewCString();
out << "[" << GetClassName() << "] " << *cp << "=";
delete cp;
char* cp2=mTextValue.ToNewCString();
out << *cp2 << ": " << mTypeID << endl;
char buffer[200];
mTextKey.ToCString(buffer,sizeof(buffer)-1);
out << "[" << GetClassName() << "] " << buffer << "=";
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << buffer << ": " << mTypeID << endl;
}
@ -772,18 +894,28 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, CScanner& aScanner) {
* @return
*/
void CAttributeToken::DebugDumpSource(ostream& out) {
char* cp=mTextKey.ToNewCString();
out << " " << *cp;
delete cp;
if(mTextValue.Length()) {
cp=mTextValue.ToNewCString();
out << "=" << *cp;
delete cp;
char buffer[200];
mTextKey.ToCString(buffer,sizeof(buffer)-1);
out << " " << buffer;
if(mTextValue.Length()){
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << "=" << buffer;
}
if(mLastAttribute)
out<<">";
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
}
/*
* default constructor
*
@ -837,6 +969,16 @@ nsresult CWhitespaceToken::Consume(PRUnichar aChar, CScanner& aScanner) {
return result;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
}
/*
* default constructor
*
@ -1111,6 +1253,16 @@ void CEntityToken::DebugDumpSource(ostream& out) {
delete cp;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) {
}
/*
*
*
@ -1133,6 +1285,16 @@ PRInt32 CScriptToken::GetTokenType(void) {
return eToken_script;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) {
}
/*
*
*
@ -1155,11 +1317,21 @@ PRInt32 CStyleToken::GetTokenType(void) {
return eToken_style;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CSkippedContentToken::CSkippedContentToken() : CAttributeToken(eHTMLTag_unknown) {
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
@ -1251,7 +1423,9 @@ public:
const char* GetTagName(PRInt32 aTag) {
const char* result = NS_EnumToTag((nsHTMLTag) aTag);
if (0 == result) {
if(aTag>=eHTMLTag_userdefined)
result = gUserdefined;
else result= gEmpty;
}
return result;
}

Просмотреть файл

@ -21,13 +21,19 @@
* MODULE NOTES:
* @update gess 4/1/98
*
* This file contains the declarations for all the
* HTML specific token types that our HTML tokenizer
* delegate understands.
* This file contains the declarations for all the HTML specific token types that
* our DTD's understand. In fact, the same set of token types are used for XML.
* Currently we have tokens for text, comments, start and end tags, entities,
* attributes, style, script and skipped content. Whitespace and newlines also
* have their own token types, but don't count on them to stay forever.
*
* If you want to add a new kind of token, this is
* the place to do it. You should also add a bit of glue
* code to the HTML tokenizer delegate class.
* If you're looking for the html tags, they're in a file called nsHTMLTag.h/cpp.
*
* Most of the token types have a similar API. They have methods to get the type
* of token (GetTokenType); those that represent HTML tags also have a method to
* get type tag type (GetTypeID). In addition, most have a method that causes the
* token to help in the parsing process called (Consume). We've also thrown in a
* few standard debugging methods as well.
*/
#ifndef HTMLTOKENS_H
@ -39,6 +45,10 @@
class CScanner;
/*******************************************************************
* This enum defines the set of token types that we currently support.
*******************************************************************/
enum eHTMLTokenTypes {
eToken_unknown=0,
eToken_start=1, eToken_end, eToken_comment, eToken_entity,
@ -56,34 +66,36 @@ const char* GetTagName(PRInt32 aTag);
/**
* This declares the basic token type used in the html-
* parser.
*
* This declares the basic token type used in the HTML DTD's.
* @update gess 3/25/98
*/
class CHTMLToken : public CToken {
public:
CHTMLToken(eHTMLTags aTag);
CHTMLToken(const nsString& aString);
virtual void SetStringValue(const char* aValue);
virtual nsString& GetStringValueXXX(void);
virtual char* GetCStringValue(char* aBuffer, PRInt32 aMaxLen);
protected:
};
/**
* This declares start tokens, which always take the
* form <xxxx>. This class also knows how to consume
* related attributes.
* This declares start tokens, which always take the form <xxxx>.
* This class also knows how to consume related attributes.
*
* @update gess 3/25/98
*/
class CStartToken: public CHTMLToken {
public:
CStartToken(eHTMLTags aTag);
CStartToken(const nsString& aString);
CStartToken(nsString& aName);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual PRInt32 GetTypeID(void);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
PRBool IsAttributed(void);
void SetAttributed(PRBool aValue);
PRBool IsEmpty(void);
@ -105,6 +117,7 @@ class CStartToken: public CHTMLToken {
*/
class CEndToken: public CHTMLToken {
public:
CEndToken(eHTMLTags aTag);
CEndToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual PRInt32 GetTypeID(void);
@ -124,6 +137,7 @@ class CEndToken: public CHTMLToken {
*/
class CCommentToken: public CHTMLToken {
public:
CCommentToken();
CCommentToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
@ -141,6 +155,7 @@ class CCommentToken: public CHTMLToken {
*/
class CEntityToken : public CHTMLToken {
public:
CEntityToken();
CEntityToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
@ -168,6 +183,7 @@ class CEntityToken : public CHTMLToken {
*/
class CWhitespaceToken: public CHTMLToken {
public:
CWhitespaceToken();
CWhitespaceToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
@ -183,6 +199,7 @@ class CWhitespaceToken: public CHTMLToken {
*/
class CTextToken: public CHTMLToken {
public:
CTextToken();
CTextToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
@ -200,6 +217,7 @@ class CTextToken: public CHTMLToken {
*/
class CAttributeToken: public CHTMLToken {
public:
CAttributeToken();
CAttributeToken(const nsString& aString);
CAttributeToken(const nsString& aKey, const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
@ -223,11 +241,12 @@ class CAttributeToken: public CHTMLToken {
*/
class CNewlineToken: public CHTMLToken {
public:
CNewlineToken();
CNewlineToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
virtual nsString& GetText(void);
virtual nsString& GetStringValueXXX(void);
};
@ -242,7 +261,7 @@ class CNewlineToken: public CHTMLToken {
*/
class CScriptToken: public CHTMLToken {
public:
CScriptToken();
CScriptToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
@ -260,6 +279,7 @@ class CScriptToken: public CHTMLToken {
*/
class CStyleToken: public CHTMLToken {
public:
CStyleToken();
CStyleToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
@ -275,6 +295,7 @@ class CStyleToken: public CHTMLToken {
*/
class CSkippedContentToken: public CAttributeToken {
public:
CSkippedContentToken();
CSkippedContentToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);

Просмотреть файл

@ -24,10 +24,14 @@
* @update gess 4/1/98
*
* This file declares the concrete IContentSink interface.
* This pure virtual interface is used as the "glue" that
* connects the parsing process to the content model
* construction process.
* This pure virtual interface is used as the "glue" that connects the parsing
* process to the content model construction process.
*
* The icontentsink interface is a very lightweight wrapper that represents the
* content-sink model building process. There is another one that you may care
* about more, which is the IHTMLContentSink interface. (See that file for details).
*/
#include "nsIParserNode.h"
#include "nsISupports.h"

Просмотреть файл

@ -20,10 +20,16 @@
/**
* MODULE NOTES:
* @update gess 4/8/98
* @update gess 7/20/98
*
* This interface defines standard interface for DTD's. Note that this isn't HTML specific.
* DTD's have several primary functions within the parser system:
* 1) To coordinate the consumption of an input stream via the parser
* 2) To serve as proxy to represent the containment rules of the underlying document
* 3) To offer autodetection services to the parser (mainly for doc conversion)
*
*/
#include "nshtmlpars.h"
#include "nsISupports.h"
#include "prtypes.h"
@ -44,26 +50,22 @@ class nsIDTD : public nsISupports {
public:
/**
*
* Default constructor
* @update gess6/24/98
* @param
* @return
*/
virtual ~nsIDTD() {};
/**
* This method is called to determine whether or not a tag
* of one type can contain a tag of another type.
* This method informs the DTD about the parser being used to drive the parse process
*
* @update gess 3/25/98
* @param aParent -- tag enum of parent container
* @param aChild -- tag enum of child container
* @return PR_TRUE if parent can contain child
* @param aParse -- ptr to parser object
* @return nada
*/
virtual void SetParser(nsIParser* aParser)=0;
/**
* Select given content sink into parser for parser output
* Select given content sink into DTD for output
* @update gess5/11/98
* @param aSink is the new sink to be used by parser
* @return old sink, or NULL
@ -76,63 +78,72 @@ class nsIDTD : public nsISupports {
* NOTE: Parsing always assumes that the end result will involve
* storing the result in the main content model.
* @update gess6/24/98
* @param
* @param aContentType -- string representing type of doc to be converted (ie text/html)
* @return TRUE if this DTD can satisfy the request; FALSE otherwise.
*/
virtual PRBool CanParse(nsString& aContentType, PRInt32 aVersion)=0;
/**
*
* This method, typically called by the parser, is used to try to autodetect the
* type of data contained in the given buffer. The implementor should look at the
* buffers contents to try to determine its encoding type.
* @update gess7/7/98
* @param
* @return
* @param aBuffer-contains data to be scanned to autodetect type
* @param aType-will hold the result if type is autodetected
* @return eValid (if detected), eInvalid (if not) or eUnknown (if nothing can be done)
*/
virtual eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType)=0;
/**
*
* Called by the parser just before the parsing process begins
* @update gess5/18/98
* @param
* @param aFilename--string that contains name of file being parsed (if applicable)
* @return
*/
NS_IMETHOD WillBuildModel(nsString& aFilename)=0;
/**
*
* Called by the parser after the parsing process has concluded
* @update gess5/18/98
* @param
* @param anErrorCode - contains error code resulting from parse process
* @return
*/
NS_IMETHOD DidBuildModel(PRInt32 anErrorCode)=0;
/**
*
* Called during model building phase of parse process. Each token created during
* the parse phase is stored in a deque (in the parser) and are passed to this method
* so that the DTD can process the token. Ultimately, the DTD will transform given
* token into calls onto a contentsink.
* @update gess 3/25/98
* @param aToken -- token object to be put into content model
* @return error code (usually 0)
*/
NS_IMETHOD HandleToken(CToken* aToken)=0;
/**
* Cause the tokenizer to consume the next token, and
* Cause the tokenizer to consume and create the next token, and
* return an error result.
*
* @update gess 3/25/98
* @param aToken -- will contain newly created and consumed token
* @return error code (usually 0)
*/
NS_IMETHOD ConsumeToken(CToken*& aToken)=0;
/**
*
* If the parse process gets interrupted midway, this method is called by the
* parser prior to resuming the process.
* @update gess5/18/98
* @param
* @return
* @return ignored
*/
NS_IMETHOD WillResumeParse(void)=0;
/**
*
* If the parse process gets interrupted, this method is called by the parser
* to notify the DTD that interruption will occur.
* @update gess5/18/98
* @param
* @return
* @return ignored
*/
NS_IMETHOD WillInterruptParse(void)=0;
@ -157,3 +168,4 @@ class nsIDTD : public nsISupports {
};
#endif /* nsIDTD_h___ */

Просмотреть файл

@ -19,6 +19,16 @@
#define NS_IPARSER___
/**
* MODULE NOTES:
* @update gess 4/1/98
*
* This class defines the iparser interface. This XPCOM
* inteface is all that parser clients ever need to see.
*
**/
#include "nshtmlpars.h"
#include "nsISupports.h"
#include "nsIStreamListener.h"
@ -48,10 +58,34 @@ class nsIDTDDebug;
class nsIParser : public nsISupports {
public:
/**
* Call this method if you have a DTD that you want to share with the parser.
* Registered DTD's get remembered until the system shuts down.
*
* @update gess 3/25/98
* @param aDTD -- ptr DTD that you're publishing the services of
*/
virtual void RegisterDTD(nsIDTD* aDTD)=0;
/**
* Call this method once you've created a parser, and want to instruct it
* where to send its output.
*
* @update gess 3/25/98
* @param aContentSink -- ptr to content sink that will receive output
* @return ptr to previously set contentsink (usually null)
*/
virtual nsIContentSink* SetContentSink(nsIContentSink* aContentSink)=0;
/**
* This internal method is used when the parser needs to determine the
* type of content it's being asked to parse.
*
* @update gess 3/25/98
* @param aBuffer -- contains data to be tested (autodetected) for type
* @param aType -- string where you store the detected type (if any)
* @return autodetect enum (valid, invalid, unknown)
*/
virtual eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType)=0;
/**
@ -64,21 +98,21 @@ class nsIParser : public nsISupports {
*/
virtual PRInt32 ConsumeToken(CToken*& aToken)=0;
/******************************************************************************************
* Parse methods always begin with an input source, and perform conversions
* until you wind up with HTML in your actual content model.
* until you wind up being emitted to the given contentsink (which may or may not
* be a proxy for the NGLayout content model).
******************************************************************************************/
virtual PRInt32 Parse(nsIURL* aURL,nsIStreamObserver* aListener = nsnull,nsIDTDDebug * aDTDDebug = 0) = 0;
virtual PRInt32 Parse(nsIInputStream* pIStream,nsIStreamObserver* aListener,nsIDTDDebug* aDTDDebug = 0)=0;
virtual PRInt32 Parse(nsString& aFilename)=0;
virtual PRInt32 Parse(fstream& aStream)=0;
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
virtual PRInt32 Parse(nsString& aSourceBuffer,PRBool anHTMLString)=0;
/**
* This method gets called when the tokens have been consumed, and it's time
* to build the model via the content sink.
* @update gess5/11/98
* @return YES if model building went well -- NO otherwise.
* @return error code -- 0 if model building went well .
*/
virtual PRInt32 BuildModel(void)=0;

Просмотреть файл

@ -20,6 +20,10 @@
* MODULE NOTES:
* @update jevering 6/17/98
*
* This interface is not yet used; it was intended to allow an observer object
* to "look at" the i/o stream coming into the parser before, during and after
* the parser saw it. The intention of this was to allow an observer to modify
* the stream at various stages.
*/
#ifndef IPARSERFILTER

Просмотреть файл

@ -374,7 +374,7 @@ eAutoDetectResult nsParser::AutoDetectContentType(nsString& aBuffer,nsString& aT
*
* @update gess5/18/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::WillBuildModel(nsString& aFilename){
@ -395,7 +395,7 @@ PRInt32 nsParser::WillBuildModel(nsString& aFilename){
*
* @update gess5/18/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::DidBuildModel(PRInt32 anErrorCode) {
//One last thing...close any open containers.
@ -407,72 +407,34 @@ PRInt32 nsParser::DidBuildModel(PRInt32 anErrorCode) {
return result;
}
/**
* This is the main controlling routine in the parsing process.
* Note that it may get called multiple times for the same scanner,
* since this is a pushed based system, and all the tokens may
* not have been consumed by the scanner during a given invocation
* of this method.
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
* This method adds a new parser context to the list,
* pushing the current one to the next position.
* @update gess7/22/98
* @param ptr to new context
* @return nada
*/
PRInt32 nsParser::Parse(nsString& aFilename){
PRInt32 status=kBadFilename;
void nsParser::PushContext(CParserContext& aContext) {
aContext.mPrevContext=mParserContext;
mParserContext=&aContext;
}
if(aFilename) {
//ok, time to create our tokenizer and begin the process
mParserContext = new CParserContext(new CScanner(aFilename),mParserContext);
mParserContext->mScanner->Eof();
if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),
mParserContext->mSourceType))
{
WillBuildModel(aFilename);
status=ResumeParse();
DidBuildModel(status);
} //if
/**
* This method pops the topmost context off the stack,
* returning it to the user. The next context (if any)
* becomes the current context.
* @update gess7/22/98
* @return prev. context
*/
CParserContext* nsParser::PopContext() {
CParserContext* oldContext=mParserContext;
if(oldContext) {
mParserContext=oldContext->mPrevContext;
}
return status;
return oldContext;
}
/**
* Cause parser to parse input from given stream
* @update gess5/11/98
* @param aStream is the i/o source
* @return TRUE if all went well -- FALSE otherwise
*/
PRInt32 nsParser::Parse(fstream& aStream){
PRInt32 status=kNoError;
//ok, time to create our tokenizer and begin the process
mParserContext = new CParserContext(new CScanner(kUnknownFilename,aStream,PR_FALSE),mParserContext);
mParserContext->mScanner->Eof();
if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),
mParserContext->mSourceType)) {
WillBuildModel(mParserContext->mScanner->GetFilename());
status=ResumeParse();
DidBuildModel(status);
} //if
return status;
}
/**
*
* @update gess7/13/98
* @param
* @return
*/
PRInt32 nsParser::Parse(nsIInputStream* pIStream,nsIStreamObserver* aListener,nsIDTDDebug* aDTDDebug){
PRInt32 result=kNoError;
return result;
}
/**
* This is the main controlling routine in the parsing process.
@ -486,7 +448,7 @@ PRInt32 nsParser::Parse(nsIInputStream* pIStream,nsIStreamObserver* aListener,ns
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener, nsIDTDDebug * aDTDDebug) {
NS_PRECONDITION(0!=aURL,kNullURL);
@ -500,34 +462,68 @@ PRInt32 nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener, nsIDTDDebug *
if(aURL) {
nsAutoString theName(aURL->GetSpec());
mParserContext=new CParserContext(new CScanner(theName,PR_FALSE),mParserContext,aListener);
CParserContext* cp=new CParserContext(new CScanner(theName,PR_FALSE),aURL,aListener);
PushContext(*cp);
status=NS_OK;
}
return status;
}
/**
* Cause parser to parse input from given stream
* @update gess5/11/98
* @param aStream is the i/o source
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Parse(fstream& aStream){
PRInt32 status=kNoError;
//ok, time to create our tokenizer and begin the process
CParserContext* pc=new CParserContext(new CScanner(kUnknownFilename,aStream,PR_FALSE),&aStream,0);
PushContext(*pc);
mParserContext->mScanner->Eof();
if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),
mParserContext->mSourceType)) {
WillBuildModel(mParserContext->mScanner->GetFilename());
status=ResumeParse();
DidBuildModel(status);
} //if
pc=PopContext();
delete pc;
return status;
}
/**
* Call this method if all you want to do is parse 1 string full of HTML text.
* In particular, this method should be called by the DOM when it has an HTML
* string to feed to the parser in real-time.
*
* @update gess5/11/98
* @param anHTMLString contains a string-full of real HTML
* @param appendTokens tells us whether we should insert tokens inline, or append them.
* @return TRUE if all went well -- FALSE otherwise
* @param aSourceBuffer contains a string-full of real content
* @param anHTMLString tells us whether we should assume the content is HTML (usually true)
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
PRInt32 nsParser::Parse(nsString& aSourceBuffer,PRBool anHTMLString){
PRInt32 result=kNoError;
mParserContext = new CParserContext(new CScanner(kUnknownFilename),mParserContext,0);
CParserContext* pc=new CParserContext(new CScanner(kUnknownFilename),&aSourceBuffer,0);
PushContext(*pc);
if(PR_TRUE==anHTMLString)
pc->mSourceType="text/html";
mParserContext->mScanner->Append(aSourceBuffer);
if(eValidDetect==AutoDetectContentType(aSourceBuffer,mParserContext->mSourceType)) {
WillBuildModel(mParserContext->mScanner->GetFilename());
result=ResumeParse();
DidBuildModel(result);
}
pc=PopContext();
delete pc;
return result;
}
@ -539,7 +535,7 @@ PRInt32 nsParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
*
* @update gess 3/25/98
* @param
* @return PR_TRUE if parsing concluded successfully.
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::ResumeParse() {
PRInt32 result=kNoError;
@ -560,7 +556,7 @@ PRInt32 nsParser::ResumeParse() {
*
* @update gess 3/25/98
* @param
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::BuildModel() {
@ -575,6 +571,20 @@ PRInt32 nsParser::BuildModel() {
mMinorIteration++;
CToken* theToken=(CToken*)mParserContext->mCurrentPos->GetCurrent();
/**************************************************************************
The point of this code to serve as a testbed for parser reentrancy.
If you set recurse=1, we go reentrant, passing a text string of HTML onto
the parser for inline processing, just like javascript/DOM would do it.
And guess what? It worked the first time!
Uncomment the following code to enable the test:
int recurse=0;
if(recurse){
nsString theString("<table border=1><tr><td BGCOLOR=blue>cell</td></tr></table>");
Parse(theString,PR_TRUE);
}
**************************************************************************/
theMarkPos=*mParserContext->mCurrentPos;
result=mParserContext->mDTD->HandleToken(theToken);
++(*mParserContext->mCurrentPos);
@ -651,7 +661,7 @@ PRInt32 nsParser::CollectSkippedContent(nsCParserNode& aNode,PRInt32& aCount) {
*
* @update gess 5/12/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::GetBindInfo(nsIURL* aURL){
nsresult result=0;
@ -663,7 +673,7 @@ nsresult nsParser::GetBindInfo(nsIURL* aURL){
*
* @update gess 5/12/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult
nsParser::OnProgress(nsIURL* aURL, PRInt32 aProgress, PRInt32 aProgressMax,
@ -681,7 +691,7 @@ nsParser::OnProgress(nsIURL* aURL, PRInt32 aProgress, PRInt32 aProgressMax,
*
* @update gess 5/12/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::OnStartBinding(nsIURL* aURL, const char *aSourceType){
if (nsnull != mObserver) {
@ -718,7 +728,11 @@ nsresult nsParser::OnDataAvailable(nsIURL* aURL, nsIInputStream *pIStream, PRInt
int len=1; //init to a non-zero value
int err;
if(!mParserContext->mTransferBuffer)
mParserContext->mTransferBuffer = new char[CParserContext::eTransferBufferSize+1];
while (len > 0) {
len = pIStream->Read(&err, mParserContext->mTransferBuffer, 0, mParserContext->eTransferBufferSize);
if(len>0) {
@ -793,7 +807,7 @@ PRBool nsParser::WillTokenize(){
* of data.
*
* @update gess 3/25/98
* @return error code
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Tokenize(){
CToken* theToken=0;
@ -803,7 +817,7 @@ PRInt32 nsParser::Tokenize(){
while((PR_FALSE==done) && (kNoError==result)) {
mParserContext->mScanner->Mark();
result=ConsumeToken(theToken);
result=mParserContext->mDTD->ConsumeToken(theToken);
if(kNoError==result) {
if(theToken) {

Просмотреть файл

@ -128,25 +128,6 @@ friend class CTokenHandler;
nsIStreamObserver* aListener,
nsIDTDDebug* aDTDDebug = 0);
/**
* Cause parser to parse input from given nsIInputStream
* @update gess5/11/98
* @param pIStream is an nsIInputStream
* @param aListener is a listener to forward notifications to
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsIInputStream* pIStream,
nsIStreamObserver* aListener,
nsIDTDDebug* aDTDDebug = 0);
/**
* Cause parser to parse input from given file in given mode
* @update gess5/11/98
* @param aFilename is a path for file document
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsString& aFilename);
/**
* Cause parser to parse input from given stream
* @update gess5/11/98
@ -161,7 +142,7 @@ friend class CTokenHandler;
* @param appendTokens tells us whether we should insert tokens inline, or append them.
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
virtual PRInt32 Parse(nsString& aSourceBuffer,PRBool anHTMLString);
/**
* This method gets called (automatically) during incremental parsing
@ -310,6 +291,8 @@ private:
*/
eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType);
void PushContext(CParserContext& aContext);
CParserContext* PopContext();
protected:
//*********************************************

Просмотреть файл

@ -107,7 +107,7 @@ const nsString& nsCParserNode::GetName() const {
* @return string ref of text from internal token
*/
const nsString& nsCParserNode::GetText() const {
return mToken->GetText();
return mToken->GetStringValueXXX();
}
/**
@ -123,7 +123,7 @@ const nsString& nsCParserNode::GetSkippedContent() const {
if(mAttributes[mAttributeCount-1]) {
CSkippedContentToken* sc=(CSkippedContentToken*)(mAttributes[mAttributeCount-1]);
if(sc) {
return sc->GetText();
return sc->GetStringValueXXX();
}
}
}
@ -191,7 +191,7 @@ const nsString& nsCParserNode::GetKeyAt(PRInt32 anIndex) const {
*/
const nsString& nsCParserNode::GetValueAt(PRInt32 anIndex) const {
NS_PRECONDITION(anIndex<mAttributeCount, "Bad attr index");
return (mAttributes[anIndex])->GetText();
return (mAttributes[anIndex])->GetStringValueXXX();
}

Просмотреть файл

@ -22,11 +22,25 @@
/**
* Default constructor
*
* @update gess 7/21/98
*/
CToken::CToken(PRInt32 aTag) : mTextValue() {
mTypeID=aTag;
mStringInit=PR_FALSE;
mUnused=PR_FALSE;
mAttrCount=0;
}
/**
* Constructor with string for tagname assignment
*
* @update gess 3/25/98
* @param nsString--name of token
*/
CToken::CToken(const nsString& aName) : mTextValue(aName) {
mTypeID=0;
mStringInit=PR_TRUE;
mUnused=PR_FALSE;
mAttrCount=0;
}
@ -38,6 +52,8 @@ CToken::CToken(const nsString& aName) : mTextValue(aName) {
*/
CToken::CToken(const char* aName) : mTextValue(aName) {
mTypeID=0;
mStringInit=PR_TRUE;
mUnused=PR_FALSE;
mAttrCount=0;
}
@ -64,15 +80,6 @@ nsresult CToken::Consume(PRUnichar aChar,CScanner& aScanner) {
return result;
}
/**
* Method used to set the string value of this token
*
* @update gess 3/25/98
* @param aValue -- char* containing new value
*/
void CToken::SetStringValue(const char* aValue) {
mTextValue=aValue;
}
/**
* This debug method causes the token to dump its content
@ -103,13 +110,12 @@ void CToken::DebugDumpSource(ostream& anOutputStream) {
}
/**
* This method retrieves the value of this internal string.
*
* @update gess 3/25/98
* @return nsString reference to internal string value
* Setter method that changes the string value of this token
* @update gess5/11/98
* @param name is a char* value containing new string value
*/
nsString& CToken::GetStringValue(void) {
return mTextValue;
void CToken::SetStringValue(const char* name){
mTextValue=name;
}
/**
@ -118,10 +124,22 @@ nsString& CToken::GetStringValue(void) {
* @update gess 3/25/98
* @return nsString reference to internal string value
*/
nsString& CToken::GetText(void) {
nsString& CToken::GetStringValueXXX(void) {
return mTextValue;
}
/**
* This method retrieves the value of this internal string
* as a cstring.
*
* @update gess 3/25/98
* @return char* rep of internal string value
*/
char* CToken::GetCStringValue(char* aBuffer, PRInt32 aMaxLen) {
strcpy(aBuffer,"string");
return aBuffer;
}
/**
* Sets the internal ordinal value for this token.
* This method is deprecated, and will soon be going away.

Просмотреть файл

@ -55,6 +55,12 @@ class CToken {
/**
* Default constructor
* @update gess7/21/98
*/
CToken(PRInt32 aTag=0);
/**
* Constructor with string assignment for tag
* @update gess5/11/98
* @param aName is the given name of the token
*/
@ -78,14 +84,7 @@ class CToken {
* @update gess5/11/98
* @return reference to string containing string value
*/
virtual nsString& GetStringValue(void);
/**
* Get text of this token
* @update gess5/11/98
* @return string ref containing text value of this token
*/
virtual nsString& GetText(void);
virtual nsString& GetStringValueXXX(void);
/**
* Setter method that changes the string value of this token
@ -94,6 +93,13 @@ class CToken {
*/
virtual void SetStringValue(const char* name);
/**
* Retrieve string value of the token as a c-string
* @update gess5/11/98
* @return reference to string containing string value
*/
virtual char* GetCStringValue(char* aBuffer, PRInt32 aMaxLen);
/**
* Sets the ordinal value of this token (not currently used)
* @update gess5/11/98
@ -170,8 +176,9 @@ class CToken {
protected:
PRInt32 mTypeID;
PRInt16 mAttrCount;
PRInt16 mUnused;
nsString mTextValue;
PRBool mStringInit;
PRBool mUnused;
nsAutoString mTextValue;
};

Просмотреть файл

@ -245,7 +245,7 @@ PRInt32 XIFDispatchTokenHandler(CToken* aToken,nsIDTD* aDTD) {
eHTMLTokenTypes theType= (eHTMLTokenTypes)aToken->GetTokenType();
nsXIFDTD* theDTD=(nsXIFDTD*)aDTD;
nsString& name = aToken->GetStringValue();
nsString& name = aToken->GetStringValueXXX();
eXIFTags type = DetermineXIFTagType(name);
if (type != eXIFTag_userdefined)
@ -519,7 +519,7 @@ PRInt32 nsXIFDTD::HandleTextToken(CToken* aToken) {
if (type == eXIFTag_text)
{
nsString& temp = aToken->GetText();
nsString& temp = aToken->GetStringValueXXX();
if (temp != "<xml version=\"1.0\"?>")
{
@ -1427,7 +1427,7 @@ PRInt32 nsXIFDTD::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartTok
//and a textkey of "/". We should destroy it, and tell the
//start token it was empty.
nsString& key=theToken->GetKey();
nsString& text=theToken->GetText();
nsString& text=theToken->GetStringValueXXX();
if((key[0]==kForwardSlash) && (0==text.Length())){
//tada! our special case! Treat it like an empty start tag...
aToken->SetEmpty(PR_TRUE);
@ -1488,7 +1488,7 @@ PRInt32 nsXIFDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aT
PRInt32 theDequeSize=mTokenDeque.GetSize();
PRInt32 result=kNoError;
aToken=new CStartToken(nsAutoString(""));
aToken=new CStartToken(eHTMLTag_unknown);
if(aToken) {
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...

Просмотреть файл

@ -16,20 +16,6 @@
* Reserved.
*/
/**
* MODULE NOTES:
* @update gess 4/8/98
*
*
*/
/**
* TRANSIENT STYLE-HANDLING NOTES:
* @update gess 6/15/98
*
* ...add comments here about transient style stack.
*
*/
#include "nsIDTDDebug.h"
#include "CNavDTD.h"
@ -1451,6 +1437,15 @@ PRBool CNavDTD::CanOmit(eHTMLTags aParent,eHTMLTags aChild) const {
} //switch
break;
//this code prevents table container elements from
//opening unless a table is actually already opened.
case eHTMLTag_tr: case eHTMLTag_thead:
case eHTMLTag_tfoot: case eHTMLTag_tbody:
case eHTMLTag_td:
if(PR_FALSE==HasOpenContainer(eHTMLTag_table))
result=PR_TRUE;
break;
case eHTMLTag_entity:
switch((eHTMLTags)aParent) {
case eHTMLTag_tr: case eHTMLTag_table:
@ -1472,17 +1467,23 @@ PRBool CNavDTD::CanOmit(eHTMLTags aParent,eHTMLTags aChild) const {
/**
*
* This method is called when you want to determine if one tag is
* synonymous with another. Cases where this are true include style
* tags (where <i> is allowed to close <b> for example). Another
* is <H?>, where any open heading tag can be closed by any close heading tag.
* @update gess6/16/98
* @param
* @return
*/
PRBool IsCompatibleStyleTag(eHTMLTags aTag1,eHTMLTags aTag2) {
PRBool IsCompatibleTag(eHTMLTags aTag1,eHTMLTags aTag2) {
PRBool result=PR_FALSE;
if(0!=strchr(gStyleTags,aTag1)) {
result=PRBool(0!=strchr(gStyleTags,aTag2));
}
if(0!=strchr(gHeadingTags,aTag1)) {
result=PRBool(0!=strchr(gHeadingTags,aTag2));
}
return result;
}
@ -1521,10 +1522,22 @@ PRBool CNavDTD::CanOmitEndTag(eHTMLTags aParent,eHTMLTags aChild) const {
} //switch
break;
//It turns out that a <Hn> can be closed by any other <H?>
//This code makes them all seem compatible.
case eHTMLTag_h1: case eHTMLTag_h2:
case eHTMLTag_h3: case eHTMLTag_h4:
case eHTMLTag_h5: case eHTMLTag_h6:
if(0!=strchr(gHeadingTags,aParent)) {
result=PR_FALSE;
break;
}
//Otherwise, IT's OK TO FALL THROUGH HERE...
default:
if(IsGatedFromClosing(aChild))
result=PR_TRUE;
else if(IsCompatibleStyleTag(aChild,GetTopNode()))
else if(IsCompatibleTag(aChild,GetTopNode()))
result=PR_FALSE;
else result=(!HasOpenContainer(aChild));
break;
@ -1883,7 +1896,7 @@ nsresult CNavDTD::OpenTransientStyles(eHTMLTags aTag){
eHTMLTags theTag=mStyleStack.mTags[pos];
if(PR_FALSE==HasOpenContainer(theTag)) {
CStartToken token(GetTagName(theTag));
CStartToken token(theTag);
nsCParserNode theNode(&token);
switch(theTag) {
@ -2304,7 +2317,7 @@ nsresult CNavDTD::CloseContainersTo(eHTMLTags aTag,PRBool aUpdateStyles){
}
eHTMLTags theTopTag=GetTopNode();
if(IsCompatibleStyleTag(aTag,theTopTag)) {
if(IsCompatibleTag(aTag,theTopTag)) {
//if you're here, it's because we're trying to close one style tag,
//but a different one is actually open. Because this is NAV4x
//compatibililty mode, we must close the one that's really open.
@ -2570,7 +2583,6 @@ CNavDTD::UpdateStyleStackForCloseTag(eHTMLTags aTag,eHTMLTags anActualTag){
nsresult
CNavDTD::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
nsAutoString empty("");
nsresult result=aScanner.GetChar(aChar);
if(NS_OK==result) {
@ -2581,12 +2593,12 @@ CNavDTD::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
result=aScanner.Peek(ch);
if(NS_OK==result) {
if(nsString::IsAlpha(ch))
aToken=new CEndToken(empty);
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
aToken=new CEndToken(eHTMLTag_unknown);
else aToken=new CCommentToken(); //Special case: </ ...> is treated as a comment
}//if
break;
case kExclamation:
aToken=new CCommentToken(empty);
aToken=new CCommentToken();
break;
default:
if(nsString::IsAlpha(aChar))
@ -2621,11 +2633,10 @@ nsresult
CNavDTD::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken) {
PRBool done=PR_FALSE;
nsresult result=NS_OK;
nsAutoString as("");
PRInt16 theAttrCount=0;
while((!done) && (result==NS_OK)) {
CAttributeToken* theToken= new CAttributeToken(as);
CAttributeToken* theToken= new CAttributeToken();
if(theToken){
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
@ -2635,7 +2646,7 @@ CNavDTD::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToke
//and a textkey of "/". We should destroy it, and tell the
//start token it was empty.
nsString& key=theToken->GetKey();
nsString& text=theToken->GetText();
nsString& text=theToken->GetStringValueXXX();
if((key[0]==kForwardSlash) && (0==text.Length())){
//tada! our special case! Treat it like an empty start tag...
aToken->SetEmpty(PR_TRUE);
@ -2696,12 +2707,15 @@ CNavDTD::ConsumeContentToEndTag(const nsString& aString,
* @param anErrorCode: arg that will hold error condition
* @return new token or null
*/
static char gSpecialTags[]={ eHTMLTag_script, eHTMLTag_style, eHTMLTag_title, eHTMLTag_textarea, 0};
nsresult
CNavDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
PRInt32 theDequeSize=mTokenDeque.GetSize();
nsresult result=NS_OK;
aToken=new CStartToken(nsAutoString(""));
aToken=new CStartToken(eHTMLTag_unknown);
if(aToken) {
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
@ -2713,19 +2727,20 @@ CNavDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
//consume all the content itself.
if(NS_OK==result) {
nsString& str=aToken->GetText();
CToken* skippedToken=0;
if(str.EqualsIgnoreCase("SCRIPT") ||
str.EqualsIgnoreCase("STYLE") ||
str.EqualsIgnoreCase("TITLE") ||
str.EqualsIgnoreCase("TEXTAREA")) {
eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID();
if(0!=strchr(gSpecialTags,theTag)){
//Do special case handling for <script>, <style>, <title> or <textarea>...
CToken* skippedToken=0;
nsString& str=aToken->GetStringValueXXX();
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
if((NS_OK==result) && skippedToken){
//now we strip the ending sequence from our new SkippedContent token...
PRInt32 slen=str.Length()+3;
nsString& skippedText=skippedToken->GetText();
nsString& skippedText=skippedToken->GetStringValueXXX();
skippedText.Cut(skippedText.Length()-slen,slen);
mTokenDeque.Push(skippedToken);
@ -2733,7 +2748,7 @@ CNavDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
//In the case that we just read a given tag, we should go and
//consume all the tag content itself (and throw it all away).
CEndToken* endtoken=new CEndToken(str);
CEndToken* endtoken=new CEndToken(theTag);
mTokenDeque.Push(endtoken);
} //if
} //if
@ -2774,11 +2789,11 @@ CNavDTD::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
if(NS_OK==result) {
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or &#000.
aToken = new CEntityToken(nsAutoString(""));
aToken = new CEntityToken();
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
}
else if(kHashsign==ch) {
aToken = new CEntityToken(nsAutoString(""));
aToken = new CEntityToken();
result=aToken->Consume(0,aScanner);
}
else {
@ -2805,7 +2820,7 @@ nsresult
CNavDTD::ConsumeWhitespace(PRUnichar aChar,
CScanner& aScanner,
CToken*& aToken) {
aToken = new CWhitespaceToken(nsAutoString(""));
aToken = new CWhitespaceToken();
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);
@ -2825,7 +2840,7 @@ CNavDTD::ConsumeWhitespace(PRUnichar aChar,
*/
nsresult
CNavDTD::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
aToken = new CCommentToken(nsAutoString(""));
aToken = new CCommentToken();
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);
@ -2868,7 +2883,7 @@ nsresult
CNavDTD::ConsumeNewline(PRUnichar aChar,
CScanner& aScanner,
CToken*& aToken){
aToken=new CNewlineToken(nsAutoString(""));
aToken=new CNewlineToken();
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);

Просмотреть файл

@ -19,11 +19,62 @@
/**
* MODULE NOTES:
* @update gess 4/8/98
* @update gess 7/15/98
*
* NavDTD is an implementation of the nsIDTD interface.
* In particular, this class captures the behaviors of the original
* Navigator parser productions.
*
* This DTD, like any other in NGLayout, provides a few basic services:
* - First, the DTD collaborates with the Parser class to convert plain
* text into a sequence of HTMLTokens.
* - Second, the DTD describes containment rules for known elements.
* - Third the DTD controls and coordinates the interaction between the
* parsing system and content sink. (The content sink is the interface
* that serves as a proxy for content model).
* - Fourth the DTD maintains an internal style-stack to handle residual (leaky)
* style tags.
*
* You're most likely working in this class file because
* you want to add or change a behavior inherent in this DTD. The remainder
* of this section will describe what you need to do to affect the kind of
* change you want in this DTD.
*
* RESIDUAL-STYLE HANDLNG:
* There are a number of ways to represent style in an HTML document.
* 1) explicit style tags (<B>, <I> etc)
* 2) implicit styles (like those implicit in <Hn>)
* 3) CSS based styles
*
* Residual style handling results from explicit style tags that are
* not closed. Consider this example: <p>text <b>bold </p>
* When the <p> tag closes, the <b> tag is NOT automatically closed.
* Unclosed style tags are handled by the process we call residual-style
* tag handling.
*
* There are two aspects to residual style tag handling. The first is the
* construction and managing of a stack of residual style tags. The
* second is the automatic emission of residual style tags onto leaf content
* in subsequent portions of the document.This step is necessary to propagate
* the expected style behavior to subsequent portions of the document.
*
* Construction and managing the residual style stack is an inline process that
* occurs during the model building phase of the parse process. During the model-
* building phase of the parse process, a content stack is maintained which tracks
* the open container hierarchy. If a style tag(s) fails to be closed when a normal
* container is closed, that style tag is placed onto the residual style stack. If
* that style tag is subsequently closed (in most contexts), it is popped off the
* residual style stack -- and are of no further concern.
*
* Residual style tag emission occurs when the style stack is not empty, and leaf
* content occurs. In our earlier example, the <b> tag "leaked" out of the <p>
* container. Just before the next leaf is emitted (in this or another container) the
* style tags that are on the stack are emitted in succession. These same residual
* style tags get closed automatically when the leaf's container closes, or if a
* child container is opened.
*
*
*/
#ifndef NS_NAVHTMLDTD__
#define NS_NAVHTMLDTD__
@ -49,9 +100,10 @@ class nsParser;
/***************************************************************
First define a helper class called CTagStack.
Before digging into the NavDTD, we'll define a helper
class called CTagStack.
Simple, we've built ourselves a little data structure that
Simply put, we've built ourselves a little data structure that
serves as a stack for htmltags (and associated bits).
What's special is that if you #define _dynstack 1, the stack
size can grow dynamically (like you'ld want in a release build.)
@ -105,20 +157,16 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
public:
/**
* Common constructor for navdtd. You probably want to call
* NS_NewNavHTMLDTD().
*
*
* @update gess 4/9/98
* @param
* @return
* @update gess 7/9/98
*/
CNavDTD();
/**
*
*
* @update gess 4/9/98
* @param
* @return
* Virtual destructor -- you know what to do
* @update gess 7/9/98
*/
virtual ~CNavDTD();
@ -126,49 +174,59 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* This method is called to determine if the given DTD can parse
* a document in a given source-type.
* NOTE: Parsing always assumes that the end result will involve
* storing the result in the main content model.
* @update gess6/24/98
* @param
* @return TRUE if this DTD can satisfy the request; FALSE otherwise.
* a document of a given source-type.
* Note that parsing assumes that the end result will always be stored
* in the main content model. Of course, it's up to you which content-
* model you pass in to the parser, so you can always control the process.
*
* @update gess 7/15/98
* @param aContentType contains the name of a filetype that you are
* being asked to parse).
* @return TRUE if this DTD parse the given type; FALSE otherwise.
*/
virtual PRBool CanParse(nsString& aContentType, PRInt32 aVersion);
/**
*
* This method gets called to determine if the DTD can determine the
* kind of data contained in the given buffer string. If you know the
* type, the you should enter its stringname aType.
* @update gess7/7/98
* @param
* @return
* @param aBuffer contains data to be examined for autodetection.
* @param aType will contain a typename you specify.
* @return unknown, valid (if you know the type), invalid (if you dont)
*/
virtual eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType);
/**
*
* Sets a debugger into the DTD to help up debug the process.
* @update jevering6/23/98
* @param
* @return
* @param aDTDDedug is a ptr to the debug object you want us to use
*/
virtual void SetDTDDebug(nsIDTDDebug * aDTDDebug);
/**
*
* The parser uses a code sandwich to wrap the parsing process. Before
* the process begins, WillBuildModel() is called. Afterwards the parser
* calls DidBuildModel().
* @update gess5/18/98
* @param
* @return
* @param aFilename is the name of the file being parsed.
* @return error code (almost always 0)
*/
NS_IMETHOD WillBuildModel(nsString& aFilename);
/**
*
* The parser uses a code sandwich to wrap the parsing process. Before
* the process begins, WillBuildModel() is called. Afterwards the parser
* calls DidBuildModel().
* @update gess5/18/98
* @param
* @return
* @param anErrorCode contans the last error that occured
* @return error code
*/
NS_IMETHOD DidBuildModel(PRInt32 anErrorCode);
/**
*
* This method is called by the parser, once for each token
* that has been constructed during the tokenization phase.
* @update gess 3/25/98
* @param aToken -- token object to be put into content model
* @return 0 if all is well; non-zero is an error
@ -176,11 +234,13 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
NS_IMETHOD HandleToken(CToken* aToken);
/**
*
* Set parser is called to notify the DTD which parser is driving
* the DTD. This is needed by the DTD later, for various parser
* callback methods.
*
* @update gess 3/25/98
* @param
* @return
* @param aParser pts to the controlling parser
* @return nada.
*/
virtual void SetParser(nsIParser* aParser);
@ -196,21 +256,22 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
*
* If the parse process gets interrupted, this method gets called
* prior to the process resuming.
* @update gess5/18/98
* @param
* @return
* @return error code -- usually kNoError (0)
*/
NS_IMETHOD WillResumeParse(void);
/**
*
* If the parse process is about to be interrupted, this method
* will be called just prior.
* @update gess5/18/98
* @param
* @return
* @return error code -- usually kNoError (0)
*/
NS_IMETHOD WillInterruptParse(void);
/**
* Select given content sink into parser for parser output
* @update gess5/11/98
@ -231,8 +292,8 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
/**
* This method is called to determine whether or not a tag
* of one type can contain a tag of another type.
* This method is called to determine whether a tag
* of one of its children can contain a given child tag.
*
* @update gess 3/25/98
* @param aParent -- tag enum of parent container
@ -243,22 +304,23 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* This method gets called to determine whether a given
* tag can contain newlines. Most do not.
* child tag can be omitted by the given parent.
*
* @update gess 3/25/98
* @param aTag -- tag to test for containership
* @return PR_TRUE if given tag can contain other tags
* @param aParent -- parent tag being asked about omitting given child
* @param aChild -- child tag being tested for omittability by parent
* @return PR_TRUE if given tag can be omitted
*/
virtual PRBool CanOmit(eHTMLTags aParent,eHTMLTags aChild)const;
/**
* This method gets called to determine whether a given
* tag can contain newlines. Most do not.
* This is called to determine if the given parent can omit the
* given child (end tag).
*
* @update gess 3/25/98
* @param aParent -- tag type of parent
* @param aChild -- tag type of child
* @return PR_TRUE if given tag can contain other tags
* @return PR_TRUE if given tag can contain omit child (end tag)
*/
virtual PRBool CanOmitEndTag(eHTMLTags aParent,eHTMLTags aChild)const;
@ -273,12 +335,13 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
virtual PRBool IsContainer(eHTMLTags aTags) const;
/**
* This method does two things: 1st, help construct
* our own internal model of the content-stack; and
* 2nd, pass this message on to the sink.
* @update gess4/6/98
* @param aNode -- next node to be added to model
* @return TRUE if ok, FALSE if error
* Call this if you want the DTD to give you a default
* Parent tag for given child tag. This is needed in cases
* such as propagation.
*
* @update gess 7/6/98
* @param aTag -- child to determine dflt parent tag for
* @return enum of parent tag -- potentially eHTMLTag_unknown
*/
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
@ -315,7 +378,9 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* This method is used to determine the index on the stack of the
* nearest container tag that can constrain autoclosure.
* nearest container tag that can constrain autoclosure. It is possible
* that no tag on the stack will gate autoclosure.
*
* @update gess 7/15/98
* @param id of tag you want to test for
* @return index of gating tag on context stack. kNotFound otherwise
@ -324,7 +389,9 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
/**
* Retrieve the tag type of the topmost item on context vector stack
* Accessor that retrieves the tag type of the topmost item on context
* vector stack.
*
* @update gess5/11/98
* @return tag type (may be unknown)
*/
@ -350,8 +417,8 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
* The following set of methods are used to partially construct
* the content model (via the sink) according to the type of token.
* @update gess5/11/98
* @param aToken is the start token to be handled
* @return TRUE if the token was handled.
* @param aToken is the token (of a given type) to be handled
* @return error code representing construction state; usually 0.
*/
nsresult HandleStartToken(CToken* aToken);
nsresult HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsIParserNode& aNode);
@ -383,11 +450,12 @@ protected:
//*************************************************
/**
* The next set of method open given HTML element.
* The next set of method open given HTML elements of
* various types.
*
* @update gess5/11/98
* @param HTML (node) to be opened in content sink.
* @return TRUE if all went well.
* @param node to be opened in content sink.
* @return error code representing error condition-- usually 0.
*/
nsresult OpenHTML(const nsIParserNode& aNode);
nsresult OpenHead(const nsIParserNode& aNode);
@ -402,7 +470,7 @@ protected:
*
* @update gess5/11/98
* @param HTML (node) to be opened in content sink.
* @return TRUE if all went well.
* @return error code - 0 if all went well.
*/
nsresult CloseHTML(const nsIParserNode& aNode);
nsresult CloseHead(const nsIParserNode& aNode);
@ -416,7 +484,7 @@ protected:
* The special purpose methods automatically close
* one or more open containers.
* @update gess5/11/98
* @return TRUE if all went well.
* @return error code - 0 if all went well.
*/
nsresult CloseTopmostContainer();
nsresult CloseContainersTo(eHTMLTags aTag,PRBool aUpdateStyles);
@ -426,7 +494,7 @@ protected:
* Causes leaf to be added to sink at current vector pos.
* @update gess5/11/98
* @param aNode is leaf node to be added.
* @return TRUE if all went well -- FALSE otherwise.
* @return error code - 0 if all went well.
*/
nsresult AddLeaf(const nsIParserNode& aNode);
@ -436,7 +504,7 @@ protected:
* a fall out.
* @update gess5/11/98
* @param child to be added (somewhere) to context vector stack.
* @return TRUE if succeeds, otherwise FALSE
* @return error code - 0 if all went well.
*/
nsresult ReduceContextStackFor(eHTMLTags aChildTag);
@ -449,6 +517,15 @@ protected:
*/
nsresult CreateContextStackFor(eHTMLTags aChildTag);
/**
* This set of methods is used to create and manage the set of
* transient styles that occur as a result of poorly formed HTML
* or bugs in the original navigator.
*
* @update gess5/11/98
* @param aTag -- represents the transient style tag to be handled.
* @return error code -- usually 0
*/
nsresult OpenTransientStyles(eHTMLTags aTag);
nsresult CloseTransientStyles(eHTMLTags aTag);
nsresult UpdateStyleStackForOpenTag(eHTMLTags aTag,eHTMLTags aActualTag);

Просмотреть файл

@ -16,12 +16,6 @@
* Reserved.
*/
/**
* MODULE NOTES:
* @update gess 4/1/98
*
*/
#include "CParserContext.h"
#include "nsToken.h"
@ -39,19 +33,18 @@ public:
CTokenDeallocator gTokenDeallocator;
CParserContext::CParserContext(CScanner* aScanner,
CParserContext* aPreviousContext,
nsIStreamObserver* aListener) :
CParserContext::CParserContext(CScanner* aScanner,void* aKey,nsIStreamObserver* aListener) :
mSourceType(),
mTokenDeque(gTokenDeallocator)
{
mScanner=aScanner;
mPrevContext=aPreviousContext;
mKey=aKey;
mPrevContext=0;
mListener=aListener;
NS_IF_ADDREF(mListener);
mParseMode=eParseMode_unknown;
mAutoDetectStatus=eUnknownDetect;
mTransferBuffer=new char[eTransferBufferSize+1];
mTransferBuffer=0;
mCurrentPos=0;
mMarkPos=0;
mDTD=0;

Просмотреть файл

@ -46,7 +46,7 @@ public:
enum {eTransferBufferSize=4096};
CParserContext( CScanner* aScanner,
CParserContext* aPreviousContext=0,
void* aKey=0,
nsIStreamObserver* aListener=0);
@ -67,6 +67,7 @@ public:
nsIStreamObserver* mListener;
CParserContext* mPrevContext;
void* mKey;
};

Просмотреть файл

@ -143,11 +143,11 @@ nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
/**
*
* @update gess7/7/98
* @update gess7/22/98
* @param
* @return
*/
void nsHTMLContentSinkStream::SetOutputStream(ostream& aStream) {
void nsHTMLContentSinkStream::SetOutputStream(ostream& aStream){
mOutput=&aStream;
}

Просмотреть файл

@ -19,18 +19,21 @@
/**
* MODULE NOTES:
* @update gpk 7/12/98
* @update gess 7/20/98
*
* This file declares the concrete HTMLContentSink class.
* This class is used during the parsing process as the
* primary interface between the parser and the content
* model.
* This content sink writes to a stream. If no stream
is declared in the constructor then all output goes
to cout.
The file is pretty printed according to the pretty
printing interface. subclasses may choose to override
this behavior or set runtime flags for desired
resutls.
* If you've been paying attention to our many content sink classes, you may be
* asking yourself, "why do we need yet another one?" The answer is that this
* implementation, unlike all the others, really sends its output a given stream
* rather than to an actual content sink (as defined in our HTML document system).
*
* We use this class for a number of purposes:
* 1) For actual document i/o using XIF (xml interchange format)
* 2) For document conversions
* 3) For debug purposes (to cause output to go to cout or a file)
*
* If no stream is declared in the constructor then all output goes to cout.
* The file is pretty printed according to the pretty printing interface. subclasses
* may choose to override this behavior or set runtime flags for desired results.
*/
#ifndef NS_HTMLCONTENTSINK_STREAM
@ -54,33 +57,34 @@ class nsHTMLContentSinkStream : public nsIHTMLContentSink {
public:
/**
*
* Standard constructor
* @update gess7/7/98
* @param
* @return
*/
nsHTMLContentSinkStream();
/**
*
* Constructor with associated stream. If you use this, it means that you want
* this class to emits its output to the stream you provide.
* @update gess7/7/98
* @param
* @return
* @param aStream -- ref to stream where you want output sent
*/
nsHTMLContentSinkStream(ostream& aStream);
/**
*
* virtual destructor
* @update gess7/7/98
* @param
* @return
*/
virtual ~nsHTMLContentSinkStream();
void SetOutputStream(ostream& aStream);
// nsISupports
NS_DECL_ISUPPORTS
// nsIContentSink
/*******************************************************************
* The following methods are inherited from nsIContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD WillBuildModel(void);
NS_IMETHOD DidBuildModel(PRInt32 aQualityLevel);
NS_IMETHOD WillInterrupt(void);
@ -89,7 +93,10 @@ class nsHTMLContentSinkStream : public nsIHTMLContentSink {
NS_IMETHOD CloseContainer(const nsIParserNode& aNode);
NS_IMETHOD AddLeaf(const nsIParserNode& aNode);
// nsIHTMLContentSink
/*******************************************************************
* The following methods are inherited from nsIHTMLContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD PushMark();
NS_IMETHOD SetTitle(const nsString& aValue);
NS_IMETHOD OpenHTML(const nsIParserNode& aNode);
@ -105,13 +112,6 @@ class nsHTMLContentSinkStream : public nsIHTMLContentSink {
NS_IMETHOD OpenFrameset(const nsIParserNode& aNode);
NS_IMETHOD CloseFrameset(const nsIParserNode& aNode);
/**
*
* @update gess7/7/98
* @param
* @return
*/
void SetOutputStream(ostream& aStream);
public:
void SetLowerCaseTags(PRBool aDoLowerCase) { mLowerCaseTags = aDoLowerCase; }

Просмотреть файл

@ -39,6 +39,7 @@ static nsAutoString gDigits("0123456789");
static nsAutoString gWhitespace(" \t\b");
static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:");
static const char* gUserdefined = "userdefined";
static const char* gEmpty = "";
const PRInt32 kMAXNAMELEN=10;
@ -111,23 +112,63 @@ CHTMLToken::CHTMLToken(const nsString& aName) : CToken(aName) {
* @param
* @return
*/
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(GetTagName(aTag)) {
mTypeID=aTag;
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
}
/**
* Setter method that changes the string value of this token
* @update gess5/11/98
* @param name is a char* value containing new string value
*/
void CHTMLToken::SetStringValue(const char* name){
mTextValue=name;
mStringInit=PR_TRUE;
}
/*
* default constructor
/**
* This method retrieves the value of this internal string.
*
* @update gess 3/25/98
* @param
* @return
* @return nsString reference to internal string value
*/
CStartToken::CStartToken(const nsString& aName) : CHTMLToken(aName) {
mAttributed=PR_FALSE;
mEmpty=PR_FALSE;
static nsAutoString gTagName;
nsString& CHTMLToken::GetStringValueXXX(void) {
if(!mStringInit) {
if((mTypeID>eHTMLTag_unknown) && (mTypeID<eHTMLTag_userdefined)) {
const char* str=GetTagName(mTypeID);
if(str)
gTagName=str;
else gTagName="";
return gTagName;
}
}
return mTextValue;
}
/**
* This method retrieves the value of this internal string
* as a cstring.
*
* @update gess 3/25/98
* @return char* rep of internal string value
*/
char* CHTMLToken::GetCStringValue(char* aBuffer, PRInt32 aMaxLen) {
if(!mStringInit) {
if((mTypeID>eHTMLTag_unknown) && (mTypeID<eHTMLTag_userdefined)) {
const char* str=GetTagName(mTypeID);
if(str)
strcpy(aBuffer,str);
else aBuffer[0]=0;
}
}
else mTextValue.ToCString(aBuffer,aMaxLen);
return aBuffer;
}
/*
* constructor from tag id
*
@ -141,7 +182,20 @@ CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}
/*
* default destructor
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CStartToken::CStartToken(nsString& aString) : CHTMLToken(aString) {
mAttributed=PR_FALSE;
mEmpty=PR_FALSE;
}
/*
* This method returns the typeid (the tag type) for this token.
*
* @update gess 3/25/98
* @param
@ -249,6 +303,14 @@ nsresult CStartToken::Consume(PRUnichar aChar, CScanner& aScanner) {
mTextValue=aChar;
nsresult result=aScanner.ReadWhile(mTextValue,gIdentChars,PR_FALSE);
char buffer[300];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
eHTMLTags theTag= NS_TagToEnum(buffer);
if((theTag>eHTMLTag_unknown) && (theTag<eHTMLTag_userdefined)) {
mTypeID=theTag;
}
else mStringInit=PR_TRUE;
//Good. Now, let's skip whitespace after the identifier,
//and see if the next char is ">". If so, we have a complete
@ -278,11 +340,22 @@ nsresult CStartToken::Consume(PRUnichar aChar, CScanner& aScanner) {
* @return
*/
void CStartToken::DebugDumpSource(ostream& out) {
char* cp=mTextValue.ToNewCString();
out << "<" << *cp;
char buffer[200];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << "<" << buffer;
if(!mAttributed)
out << ">";
delete cp;
}
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}
@ -294,7 +367,6 @@ void CStartToken::DebugDumpSource(ostream& out) {
* @return
*/
CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) {
mTypeID=eHTMLTag_unknown;
}
/*
@ -315,6 +387,15 @@ nsresult CEndToken::Consume(PRUnichar aChar, CScanner& aScanner) {
mTextValue="";
static nsAutoString terminals(">");
nsresult result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE);
char buffer[300];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
eHTMLTags theTag= NS_TagToEnum(buffer);
if((theTag>eHTMLTag_unknown) && (theTag<eHTMLTag_userdefined)) {
mTypeID=theTag;
}
else mStringInit=PR_TRUE;
if(NS_OK==result)
result=aScanner.GetChar(aChar); //eat the closing '>;
return result;
@ -334,7 +415,7 @@ PRInt32 CEndToken::GetTypeID(){
if(eHTMLTag_unknown==mTypeID) {
nsAutoString tmp(mTextValue);
tmp.ToUpperCase();
char cbuf[20];
char cbuf[200];
tmp.ToCString(cbuf, sizeof(cbuf));
mTypeID = NS_TagToEnum(cbuf);
switch(mTypeID) {
@ -379,14 +460,24 @@ PRInt32 CEndToken::GetTokenType(void) {
* @return
*/
void CEndToken::DebugDumpSource(ostream& out) {
char* cp=mTextValue.ToNewCString();
out << "</" << *cp << ">";
delete cp;
char buffer[200];
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << "</" << buffer << ">";
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
}
/*
* Default constructor
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
@ -432,6 +523,16 @@ nsresult CTextToken::Consume(PRUnichar, CScanner& aScanner) {
return result;
};
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
}
/*
* Default constructor
@ -505,6 +606,17 @@ PRInt32 CCommentToken::GetTokenType(void) {
return eToken_comment;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
}
/*
* default constructor
*
@ -544,7 +656,7 @@ PRInt32 CNewlineToken::GetTokenType(void) {
* @update gess 3/25/98
* @return nsString reference to internal string value
*/
nsString& CNewlineToken::GetText(void) {
nsString& CNewlineToken::GetStringValueXXX(void) {
static nsAutoString theStr("\n");
return theStr;
}
@ -589,6 +701,16 @@ nsresult CNewlineToken::Consume(PRUnichar aChar, CScanner& aScanner) {
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
@ -640,11 +762,11 @@ PRInt32 CAttributeToken::GetTokenType(void) {
* @return
*/
void CAttributeToken::DebugDumpToken(ostream& out) {
char* cp=mTextKey.ToNewCString();
out << "[" << GetClassName() << "] " << *cp << "=";
delete cp;
char* cp2=mTextValue.ToNewCString();
out << *cp2 << ": " << mTypeID << endl;
char buffer[200];
mTextKey.ToCString(buffer,sizeof(buffer)-1);
out << "[" << GetClassName() << "] " << buffer << "=";
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << buffer << ": " << mTypeID << endl;
}
@ -772,18 +894,28 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, CScanner& aScanner) {
* @return
*/
void CAttributeToken::DebugDumpSource(ostream& out) {
char* cp=mTextKey.ToNewCString();
out << " " << *cp;
delete cp;
if(mTextValue.Length()) {
cp=mTextValue.ToNewCString();
out << "=" << *cp;
delete cp;
char buffer[200];
mTextKey.ToCString(buffer,sizeof(buffer)-1);
out << " " << buffer;
if(mTextValue.Length()){
mTextValue.ToCString(buffer,sizeof(buffer)-1);
out << "=" << buffer;
}
if(mLastAttribute)
out<<">";
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
}
/*
* default constructor
*
@ -837,6 +969,16 @@ nsresult CWhitespaceToken::Consume(PRUnichar aChar, CScanner& aScanner) {
return result;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
}
/*
* default constructor
*
@ -1111,6 +1253,16 @@ void CEntityToken::DebugDumpSource(ostream& out) {
delete cp;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) {
}
/*
*
*
@ -1133,6 +1285,16 @@ PRInt32 CScriptToken::GetTokenType(void) {
return eToken_script;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) {
}
/*
*
*
@ -1155,11 +1317,21 @@ PRInt32 CStyleToken::GetTokenType(void) {
return eToken_style;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CSkippedContentToken::CSkippedContentToken() : CAttributeToken(eHTMLTag_unknown) {
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
@ -1251,7 +1423,9 @@ public:
const char* GetTagName(PRInt32 aTag) {
const char* result = NS_EnumToTag((nsHTMLTag) aTag);
if (0 == result) {
if(aTag>=eHTMLTag_userdefined)
result = gUserdefined;
else result= gEmpty;
}
return result;
}

Просмотреть файл

@ -21,13 +21,19 @@
* MODULE NOTES:
* @update gess 4/1/98
*
* This file contains the declarations for all the
* HTML specific token types that our HTML tokenizer
* delegate understands.
* This file contains the declarations for all the HTML specific token types that
* our DTD's understand. In fact, the same set of token types are used for XML.
* Currently we have tokens for text, comments, start and end tags, entities,
* attributes, style, script and skipped content. Whitespace and newlines also
* have their own token types, but don't count on them to stay forever.
*
* If you want to add a new kind of token, this is
* the place to do it. You should also add a bit of glue
* code to the HTML tokenizer delegate class.
* If you're looking for the html tags, they're in a file called nsHTMLTag.h/cpp.
*
* Most of the token types have a similar API. They have methods to get the type
* of token (GetTokenType); those that represent HTML tags also have a method to
* get type tag type (GetTypeID). In addition, most have a method that causes the
* token to help in the parsing process called (Consume). We've also thrown in a
* few standard debugging methods as well.
*/
#ifndef HTMLTOKENS_H
@ -39,6 +45,10 @@
class CScanner;
/*******************************************************************
* This enum defines the set of token types that we currently support.
*******************************************************************/
enum eHTMLTokenTypes {
eToken_unknown=0,
eToken_start=1, eToken_end, eToken_comment, eToken_entity,
@ -56,34 +66,36 @@ const char* GetTagName(PRInt32 aTag);
/**
* This declares the basic token type used in the html-
* parser.
*
* This declares the basic token type used in the HTML DTD's.
* @update gess 3/25/98
*/
class CHTMLToken : public CToken {
public:
CHTMLToken(eHTMLTags aTag);
CHTMLToken(const nsString& aString);
virtual void SetStringValue(const char* aValue);
virtual nsString& GetStringValueXXX(void);
virtual char* GetCStringValue(char* aBuffer, PRInt32 aMaxLen);
protected:
};
/**
* This declares start tokens, which always take the
* form <xxxx>. This class also knows how to consume
* related attributes.
* This declares start tokens, which always take the form <xxxx>.
* This class also knows how to consume related attributes.
*
* @update gess 3/25/98
*/
class CStartToken: public CHTMLToken {
public:
CStartToken(eHTMLTags aTag);
CStartToken(const nsString& aString);
CStartToken(nsString& aName);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual PRInt32 GetTypeID(void);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
PRBool IsAttributed(void);
void SetAttributed(PRBool aValue);
PRBool IsEmpty(void);
@ -105,6 +117,7 @@ class CStartToken: public CHTMLToken {
*/
class CEndToken: public CHTMLToken {
public:
CEndToken(eHTMLTags aTag);
CEndToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual PRInt32 GetTypeID(void);
@ -124,6 +137,7 @@ class CEndToken: public CHTMLToken {
*/
class CCommentToken: public CHTMLToken {
public:
CCommentToken();
CCommentToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
@ -141,6 +155,7 @@ class CCommentToken: public CHTMLToken {
*/
class CEntityToken : public CHTMLToken {
public:
CEntityToken();
CEntityToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
@ -168,6 +183,7 @@ class CEntityToken : public CHTMLToken {
*/
class CWhitespaceToken: public CHTMLToken {
public:
CWhitespaceToken();
CWhitespaceToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
@ -183,6 +199,7 @@ class CWhitespaceToken: public CHTMLToken {
*/
class CTextToken: public CHTMLToken {
public:
CTextToken();
CTextToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
@ -200,6 +217,7 @@ class CTextToken: public CHTMLToken {
*/
class CAttributeToken: public CHTMLToken {
public:
CAttributeToken();
CAttributeToken(const nsString& aString);
CAttributeToken(const nsString& aKey, const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
@ -223,11 +241,12 @@ class CAttributeToken: public CHTMLToken {
*/
class CNewlineToken: public CHTMLToken {
public:
CNewlineToken();
CNewlineToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
virtual nsString& GetText(void);
virtual nsString& GetStringValueXXX(void);
};
@ -242,7 +261,7 @@ class CNewlineToken: public CHTMLToken {
*/
class CScriptToken: public CHTMLToken {
public:
CScriptToken();
CScriptToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
@ -260,6 +279,7 @@ class CScriptToken: public CHTMLToken {
*/
class CStyleToken: public CHTMLToken {
public:
CStyleToken();
CStyleToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
@ -275,6 +295,7 @@ class CStyleToken: public CHTMLToken {
*/
class CSkippedContentToken: public CAttributeToken {
public:
CSkippedContentToken();
CSkippedContentToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);

Просмотреть файл

@ -24,10 +24,14 @@
* @update gess 4/1/98
*
* This file declares the concrete IContentSink interface.
* This pure virtual interface is used as the "glue" that
* connects the parsing process to the content model
* construction process.
* This pure virtual interface is used as the "glue" that connects the parsing
* process to the content model construction process.
*
* The icontentsink interface is a very lightweight wrapper that represents the
* content-sink model building process. There is another one that you may care
* about more, which is the IHTMLContentSink interface. (See that file for details).
*/
#include "nsIParserNode.h"
#include "nsISupports.h"

Просмотреть файл

@ -20,10 +20,16 @@
/**
* MODULE NOTES:
* @update gess 4/8/98
* @update gess 7/20/98
*
* This interface defines standard interface for DTD's. Note that this isn't HTML specific.
* DTD's have several primary functions within the parser system:
* 1) To coordinate the consumption of an input stream via the parser
* 2) To serve as proxy to represent the containment rules of the underlying document
* 3) To offer autodetection services to the parser (mainly for doc conversion)
*
*/
#include "nshtmlpars.h"
#include "nsISupports.h"
#include "prtypes.h"
@ -44,26 +50,22 @@ class nsIDTD : public nsISupports {
public:
/**
*
* Default constructor
* @update gess6/24/98
* @param
* @return
*/
virtual ~nsIDTD() {};
/**
* This method is called to determine whether or not a tag
* of one type can contain a tag of another type.
* This method informs the DTD about the parser being used to drive the parse process
*
* @update gess 3/25/98
* @param aParent -- tag enum of parent container
* @param aChild -- tag enum of child container
* @return PR_TRUE if parent can contain child
* @param aParse -- ptr to parser object
* @return nada
*/
virtual void SetParser(nsIParser* aParser)=0;
/**
* Select given content sink into parser for parser output
* Select given content sink into DTD for output
* @update gess5/11/98
* @param aSink is the new sink to be used by parser
* @return old sink, or NULL
@ -76,63 +78,72 @@ class nsIDTD : public nsISupports {
* NOTE: Parsing always assumes that the end result will involve
* storing the result in the main content model.
* @update gess6/24/98
* @param
* @param aContentType -- string representing type of doc to be converted (ie text/html)
* @return TRUE if this DTD can satisfy the request; FALSE otherwise.
*/
virtual PRBool CanParse(nsString& aContentType, PRInt32 aVersion)=0;
/**
*
* This method, typically called by the parser, is used to try to autodetect the
* type of data contained in the given buffer. The implementor should look at the
* buffers contents to try to determine its encoding type.
* @update gess7/7/98
* @param
* @return
* @param aBuffer-contains data to be scanned to autodetect type
* @param aType-will hold the result if type is autodetected
* @return eValid (if detected), eInvalid (if not) or eUnknown (if nothing can be done)
*/
virtual eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType)=0;
/**
*
* Called by the parser just before the parsing process begins
* @update gess5/18/98
* @param
* @param aFilename--string that contains name of file being parsed (if applicable)
* @return
*/
NS_IMETHOD WillBuildModel(nsString& aFilename)=0;
/**
*
* Called by the parser after the parsing process has concluded
* @update gess5/18/98
* @param
* @param anErrorCode - contains error code resulting from parse process
* @return
*/
NS_IMETHOD DidBuildModel(PRInt32 anErrorCode)=0;
/**
*
* Called during model building phase of parse process. Each token created during
* the parse phase is stored in a deque (in the parser) and are passed to this method
* so that the DTD can process the token. Ultimately, the DTD will transform given
* token into calls onto a contentsink.
* @update gess 3/25/98
* @param aToken -- token object to be put into content model
* @return error code (usually 0)
*/
NS_IMETHOD HandleToken(CToken* aToken)=0;
/**
* Cause the tokenizer to consume the next token, and
* Cause the tokenizer to consume and create the next token, and
* return an error result.
*
* @update gess 3/25/98
* @param aToken -- will contain newly created and consumed token
* @return error code (usually 0)
*/
NS_IMETHOD ConsumeToken(CToken*& aToken)=0;
/**
*
* If the parse process gets interrupted midway, this method is called by the
* parser prior to resuming the process.
* @update gess5/18/98
* @param
* @return
* @return ignored
*/
NS_IMETHOD WillResumeParse(void)=0;
/**
*
* If the parse process gets interrupted, this method is called by the parser
* to notify the DTD that interruption will occur.
* @update gess5/18/98
* @param
* @return
* @return ignored
*/
NS_IMETHOD WillInterruptParse(void)=0;
@ -157,3 +168,4 @@ class nsIDTD : public nsISupports {
};
#endif /* nsIDTD_h___ */

Просмотреть файл

@ -19,6 +19,16 @@
#define NS_IPARSER___
/**
* MODULE NOTES:
* @update gess 4/1/98
*
* This class defines the iparser interface. This XPCOM
* inteface is all that parser clients ever need to see.
*
**/
#include "nshtmlpars.h"
#include "nsISupports.h"
#include "nsIStreamListener.h"
@ -48,10 +58,34 @@ class nsIDTDDebug;
class nsIParser : public nsISupports {
public:
/**
* Call this method if you have a DTD that you want to share with the parser.
* Registered DTD's get remembered until the system shuts down.
*
* @update gess 3/25/98
* @param aDTD -- ptr DTD that you're publishing the services of
*/
virtual void RegisterDTD(nsIDTD* aDTD)=0;
/**
* Call this method once you've created a parser, and want to instruct it
* where to send its output.
*
* @update gess 3/25/98
* @param aContentSink -- ptr to content sink that will receive output
* @return ptr to previously set contentsink (usually null)
*/
virtual nsIContentSink* SetContentSink(nsIContentSink* aContentSink)=0;
/**
* This internal method is used when the parser needs to determine the
* type of content it's being asked to parse.
*
* @update gess 3/25/98
* @param aBuffer -- contains data to be tested (autodetected) for type
* @param aType -- string where you store the detected type (if any)
* @return autodetect enum (valid, invalid, unknown)
*/
virtual eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType)=0;
/**
@ -64,21 +98,21 @@ class nsIParser : public nsISupports {
*/
virtual PRInt32 ConsumeToken(CToken*& aToken)=0;
/******************************************************************************************
* Parse methods always begin with an input source, and perform conversions
* until you wind up with HTML in your actual content model.
* until you wind up being emitted to the given contentsink (which may or may not
* be a proxy for the NGLayout content model).
******************************************************************************************/
virtual PRInt32 Parse(nsIURL* aURL,nsIStreamObserver* aListener = nsnull,nsIDTDDebug * aDTDDebug = 0) = 0;
virtual PRInt32 Parse(nsIInputStream* pIStream,nsIStreamObserver* aListener,nsIDTDDebug* aDTDDebug = 0)=0;
virtual PRInt32 Parse(nsString& aFilename)=0;
virtual PRInt32 Parse(fstream& aStream)=0;
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
virtual PRInt32 Parse(nsString& aSourceBuffer,PRBool anHTMLString)=0;
/**
* This method gets called when the tokens have been consumed, and it's time
* to build the model via the content sink.
* @update gess5/11/98
* @return YES if model building went well -- NO otherwise.
* @return error code -- 0 if model building went well .
*/
virtual PRInt32 BuildModel(void)=0;

Просмотреть файл

@ -20,6 +20,10 @@
* MODULE NOTES:
* @update jevering 6/17/98
*
* This interface is not yet used; it was intended to allow an observer object
* to "look at" the i/o stream coming into the parser before, during and after
* the parser saw it. The intention of this was to allow an observer to modify
* the stream at various stages.
*/
#ifndef IPARSERFILTER

Просмотреть файл

@ -374,7 +374,7 @@ eAutoDetectResult nsParser::AutoDetectContentType(nsString& aBuffer,nsString& aT
*
* @update gess5/18/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::WillBuildModel(nsString& aFilename){
@ -395,7 +395,7 @@ PRInt32 nsParser::WillBuildModel(nsString& aFilename){
*
* @update gess5/18/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::DidBuildModel(PRInt32 anErrorCode) {
//One last thing...close any open containers.
@ -407,72 +407,34 @@ PRInt32 nsParser::DidBuildModel(PRInt32 anErrorCode) {
return result;
}
/**
* This is the main controlling routine in the parsing process.
* Note that it may get called multiple times for the same scanner,
* since this is a pushed based system, and all the tokens may
* not have been consumed by the scanner during a given invocation
* of this method.
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
* This method adds a new parser context to the list,
* pushing the current one to the next position.
* @update gess7/22/98
* @param ptr to new context
* @return nada
*/
PRInt32 nsParser::Parse(nsString& aFilename){
PRInt32 status=kBadFilename;
void nsParser::PushContext(CParserContext& aContext) {
aContext.mPrevContext=mParserContext;
mParserContext=&aContext;
}
if(aFilename) {
//ok, time to create our tokenizer and begin the process
mParserContext = new CParserContext(new CScanner(aFilename),mParserContext);
mParserContext->mScanner->Eof();
if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),
mParserContext->mSourceType))
{
WillBuildModel(aFilename);
status=ResumeParse();
DidBuildModel(status);
} //if
/**
* This method pops the topmost context off the stack,
* returning it to the user. The next context (if any)
* becomes the current context.
* @update gess7/22/98
* @return prev. context
*/
CParserContext* nsParser::PopContext() {
CParserContext* oldContext=mParserContext;
if(oldContext) {
mParserContext=oldContext->mPrevContext;
}
return status;
return oldContext;
}
/**
* Cause parser to parse input from given stream
* @update gess5/11/98
* @param aStream is the i/o source
* @return TRUE if all went well -- FALSE otherwise
*/
PRInt32 nsParser::Parse(fstream& aStream){
PRInt32 status=kNoError;
//ok, time to create our tokenizer and begin the process
mParserContext = new CParserContext(new CScanner(kUnknownFilename,aStream,PR_FALSE),mParserContext);
mParserContext->mScanner->Eof();
if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),
mParserContext->mSourceType)) {
WillBuildModel(mParserContext->mScanner->GetFilename());
status=ResumeParse();
DidBuildModel(status);
} //if
return status;
}
/**
*
* @update gess7/13/98
* @param
* @return
*/
PRInt32 nsParser::Parse(nsIInputStream* pIStream,nsIStreamObserver* aListener,nsIDTDDebug* aDTDDebug){
PRInt32 result=kNoError;
return result;
}
/**
* This is the main controlling routine in the parsing process.
@ -486,7 +448,7 @@ PRInt32 nsParser::Parse(nsIInputStream* pIStream,nsIStreamObserver* aListener,ns
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener, nsIDTDDebug * aDTDDebug) {
NS_PRECONDITION(0!=aURL,kNullURL);
@ -500,34 +462,68 @@ PRInt32 nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener, nsIDTDDebug *
if(aURL) {
nsAutoString theName(aURL->GetSpec());
mParserContext=new CParserContext(new CScanner(theName,PR_FALSE),mParserContext,aListener);
CParserContext* cp=new CParserContext(new CScanner(theName,PR_FALSE),aURL,aListener);
PushContext(*cp);
status=NS_OK;
}
return status;
}
/**
* Cause parser to parse input from given stream
* @update gess5/11/98
* @param aStream is the i/o source
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Parse(fstream& aStream){
PRInt32 status=kNoError;
//ok, time to create our tokenizer and begin the process
CParserContext* pc=new CParserContext(new CScanner(kUnknownFilename,aStream,PR_FALSE),&aStream,0);
PushContext(*pc);
mParserContext->mScanner->Eof();
if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),
mParserContext->mSourceType)) {
WillBuildModel(mParserContext->mScanner->GetFilename());
status=ResumeParse();
DidBuildModel(status);
} //if
pc=PopContext();
delete pc;
return status;
}
/**
* Call this method if all you want to do is parse 1 string full of HTML text.
* In particular, this method should be called by the DOM when it has an HTML
* string to feed to the parser in real-time.
*
* @update gess5/11/98
* @param anHTMLString contains a string-full of real HTML
* @param appendTokens tells us whether we should insert tokens inline, or append them.
* @return TRUE if all went well -- FALSE otherwise
* @param aSourceBuffer contains a string-full of real content
* @param anHTMLString tells us whether we should assume the content is HTML (usually true)
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
PRInt32 nsParser::Parse(nsString& aSourceBuffer,PRBool anHTMLString){
PRInt32 result=kNoError;
mParserContext = new CParserContext(new CScanner(kUnknownFilename),mParserContext,0);
CParserContext* pc=new CParserContext(new CScanner(kUnknownFilename),&aSourceBuffer,0);
PushContext(*pc);
if(PR_TRUE==anHTMLString)
pc->mSourceType="text/html";
mParserContext->mScanner->Append(aSourceBuffer);
if(eValidDetect==AutoDetectContentType(aSourceBuffer,mParserContext->mSourceType)) {
WillBuildModel(mParserContext->mScanner->GetFilename());
result=ResumeParse();
DidBuildModel(result);
}
pc=PopContext();
delete pc;
return result;
}
@ -539,7 +535,7 @@ PRInt32 nsParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
*
* @update gess 3/25/98
* @param
* @return PR_TRUE if parsing concluded successfully.
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::ResumeParse() {
PRInt32 result=kNoError;
@ -560,7 +556,7 @@ PRInt32 nsParser::ResumeParse() {
*
* @update gess 3/25/98
* @param
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::BuildModel() {
@ -575,6 +571,20 @@ PRInt32 nsParser::BuildModel() {
mMinorIteration++;
CToken* theToken=(CToken*)mParserContext->mCurrentPos->GetCurrent();
/**************************************************************************
The point of this code to serve as a testbed for parser reentrancy.
If you set recurse=1, we go reentrant, passing a text string of HTML onto
the parser for inline processing, just like javascript/DOM would do it.
And guess what? It worked the first time!
Uncomment the following code to enable the test:
int recurse=0;
if(recurse){
nsString theString("<table border=1><tr><td BGCOLOR=blue>cell</td></tr></table>");
Parse(theString,PR_TRUE);
}
**************************************************************************/
theMarkPos=*mParserContext->mCurrentPos;
result=mParserContext->mDTD->HandleToken(theToken);
++(*mParserContext->mCurrentPos);
@ -651,7 +661,7 @@ PRInt32 nsParser::CollectSkippedContent(nsCParserNode& aNode,PRInt32& aCount) {
*
* @update gess 5/12/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::GetBindInfo(nsIURL* aURL){
nsresult result=0;
@ -663,7 +673,7 @@ nsresult nsParser::GetBindInfo(nsIURL* aURL){
*
* @update gess 5/12/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult
nsParser::OnProgress(nsIURL* aURL, PRInt32 aProgress, PRInt32 aProgressMax,
@ -681,7 +691,7 @@ nsParser::OnProgress(nsIURL* aURL, PRInt32 aProgress, PRInt32 aProgressMax,
*
* @update gess 5/12/98
* @param
* @return
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::OnStartBinding(nsIURL* aURL, const char *aSourceType){
if (nsnull != mObserver) {
@ -718,7 +728,11 @@ nsresult nsParser::OnDataAvailable(nsIURL* aURL, nsIInputStream *pIStream, PRInt
int len=1; //init to a non-zero value
int err;
if(!mParserContext->mTransferBuffer)
mParserContext->mTransferBuffer = new char[CParserContext::eTransferBufferSize+1];
while (len > 0) {
len = pIStream->Read(&err, mParserContext->mTransferBuffer, 0, mParserContext->eTransferBufferSize);
if(len>0) {
@ -793,7 +807,7 @@ PRBool nsParser::WillTokenize(){
* of data.
*
* @update gess 3/25/98
* @return error code
* @return error code -- 0 if ok, non-zero if error.
*/
PRInt32 nsParser::Tokenize(){
CToken* theToken=0;
@ -803,7 +817,7 @@ PRInt32 nsParser::Tokenize(){
while((PR_FALSE==done) && (kNoError==result)) {
mParserContext->mScanner->Mark();
result=ConsumeToken(theToken);
result=mParserContext->mDTD->ConsumeToken(theToken);
if(kNoError==result) {
if(theToken) {

Просмотреть файл

@ -128,25 +128,6 @@ friend class CTokenHandler;
nsIStreamObserver* aListener,
nsIDTDDebug* aDTDDebug = 0);
/**
* Cause parser to parse input from given nsIInputStream
* @update gess5/11/98
* @param pIStream is an nsIInputStream
* @param aListener is a listener to forward notifications to
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsIInputStream* pIStream,
nsIStreamObserver* aListener,
nsIDTDDebug* aDTDDebug = 0);
/**
* Cause parser to parse input from given file in given mode
* @update gess5/11/98
* @param aFilename is a path for file document
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsString& aFilename);
/**
* Cause parser to parse input from given stream
* @update gess5/11/98
@ -161,7 +142,7 @@ friend class CTokenHandler;
* @param appendTokens tells us whether we should insert tokens inline, or append them.
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
virtual PRInt32 Parse(nsString& aSourceBuffer,PRBool anHTMLString);
/**
* This method gets called (automatically) during incremental parsing
@ -310,6 +291,8 @@ private:
*/
eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType);
void PushContext(CParserContext& aContext);
CParserContext* PopContext();
protected:
//*********************************************

Просмотреть файл

@ -107,7 +107,7 @@ const nsString& nsCParserNode::GetName() const {
* @return string ref of text from internal token
*/
const nsString& nsCParserNode::GetText() const {
return mToken->GetText();
return mToken->GetStringValueXXX();
}
/**
@ -123,7 +123,7 @@ const nsString& nsCParserNode::GetSkippedContent() const {
if(mAttributes[mAttributeCount-1]) {
CSkippedContentToken* sc=(CSkippedContentToken*)(mAttributes[mAttributeCount-1]);
if(sc) {
return sc->GetText();
return sc->GetStringValueXXX();
}
}
}
@ -191,7 +191,7 @@ const nsString& nsCParserNode::GetKeyAt(PRInt32 anIndex) const {
*/
const nsString& nsCParserNode::GetValueAt(PRInt32 anIndex) const {
NS_PRECONDITION(anIndex<mAttributeCount, "Bad attr index");
return (mAttributes[anIndex])->GetText();
return (mAttributes[anIndex])->GetStringValueXXX();
}

Просмотреть файл

@ -22,11 +22,25 @@
/**
* Default constructor
*
* @update gess 7/21/98
*/
CToken::CToken(PRInt32 aTag) : mTextValue() {
mTypeID=aTag;
mStringInit=PR_FALSE;
mUnused=PR_FALSE;
mAttrCount=0;
}
/**
* Constructor with string for tagname assignment
*
* @update gess 3/25/98
* @param nsString--name of token
*/
CToken::CToken(const nsString& aName) : mTextValue(aName) {
mTypeID=0;
mStringInit=PR_TRUE;
mUnused=PR_FALSE;
mAttrCount=0;
}
@ -38,6 +52,8 @@ CToken::CToken(const nsString& aName) : mTextValue(aName) {
*/
CToken::CToken(const char* aName) : mTextValue(aName) {
mTypeID=0;
mStringInit=PR_TRUE;
mUnused=PR_FALSE;
mAttrCount=0;
}
@ -64,15 +80,6 @@ nsresult CToken::Consume(PRUnichar aChar,CScanner& aScanner) {
return result;
}
/**
* Method used to set the string value of this token
*
* @update gess 3/25/98
* @param aValue -- char* containing new value
*/
void CToken::SetStringValue(const char* aValue) {
mTextValue=aValue;
}
/**
* This debug method causes the token to dump its content
@ -103,13 +110,12 @@ void CToken::DebugDumpSource(ostream& anOutputStream) {
}
/**
* This method retrieves the value of this internal string.
*
* @update gess 3/25/98
* @return nsString reference to internal string value
* Setter method that changes the string value of this token
* @update gess5/11/98
* @param name is a char* value containing new string value
*/
nsString& CToken::GetStringValue(void) {
return mTextValue;
void CToken::SetStringValue(const char* name){
mTextValue=name;
}
/**
@ -118,10 +124,22 @@ nsString& CToken::GetStringValue(void) {
* @update gess 3/25/98
* @return nsString reference to internal string value
*/
nsString& CToken::GetText(void) {
nsString& CToken::GetStringValueXXX(void) {
return mTextValue;
}
/**
* This method retrieves the value of this internal string
* as a cstring.
*
* @update gess 3/25/98
* @return char* rep of internal string value
*/
char* CToken::GetCStringValue(char* aBuffer, PRInt32 aMaxLen) {
strcpy(aBuffer,"string");
return aBuffer;
}
/**
* Sets the internal ordinal value for this token.
* This method is deprecated, and will soon be going away.

Просмотреть файл

@ -55,6 +55,12 @@ class CToken {
/**
* Default constructor
* @update gess7/21/98
*/
CToken(PRInt32 aTag=0);
/**
* Constructor with string assignment for tag
* @update gess5/11/98
* @param aName is the given name of the token
*/
@ -78,14 +84,7 @@ class CToken {
* @update gess5/11/98
* @return reference to string containing string value
*/
virtual nsString& GetStringValue(void);
/**
* Get text of this token
* @update gess5/11/98
* @return string ref containing text value of this token
*/
virtual nsString& GetText(void);
virtual nsString& GetStringValueXXX(void);
/**
* Setter method that changes the string value of this token
@ -94,6 +93,13 @@ class CToken {
*/
virtual void SetStringValue(const char* name);
/**
* Retrieve string value of the token as a c-string
* @update gess5/11/98
* @return reference to string containing string value
*/
virtual char* GetCStringValue(char* aBuffer, PRInt32 aMaxLen);
/**
* Sets the ordinal value of this token (not currently used)
* @update gess5/11/98
@ -170,8 +176,9 @@ class CToken {
protected:
PRInt32 mTypeID;
PRInt16 mAttrCount;
PRInt16 mUnused;
nsString mTextValue;
PRBool mStringInit;
PRBool mUnused;
nsAutoString mTextValue;
};

Просмотреть файл

@ -245,7 +245,7 @@ PRInt32 XIFDispatchTokenHandler(CToken* aToken,nsIDTD* aDTD) {
eHTMLTokenTypes theType= (eHTMLTokenTypes)aToken->GetTokenType();
nsXIFDTD* theDTD=(nsXIFDTD*)aDTD;
nsString& name = aToken->GetStringValue();
nsString& name = aToken->GetStringValueXXX();
eXIFTags type = DetermineXIFTagType(name);
if (type != eXIFTag_userdefined)
@ -519,7 +519,7 @@ PRInt32 nsXIFDTD::HandleTextToken(CToken* aToken) {
if (type == eXIFTag_text)
{
nsString& temp = aToken->GetText();
nsString& temp = aToken->GetStringValueXXX();
if (temp != "<xml version=\"1.0\"?>")
{
@ -1427,7 +1427,7 @@ PRInt32 nsXIFDTD::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartTok
//and a textkey of "/". We should destroy it, and tell the
//start token it was empty.
nsString& key=theToken->GetKey();
nsString& text=theToken->GetText();
nsString& text=theToken->GetStringValueXXX();
if((key[0]==kForwardSlash) && (0==text.Length())){
//tada! our special case! Treat it like an empty start tag...
aToken->SetEmpty(PR_TRUE);
@ -1488,7 +1488,7 @@ PRInt32 nsXIFDTD::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aT
PRInt32 theDequeSize=mTokenDeque.GetSize();
PRInt32 result=kNoError;
aToken=new CStartToken(nsAutoString(""));
aToken=new CStartToken(eHTMLTag_unknown);
if(aToken) {
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...