gecko-dev/parser/htmlparser/src/nsParser.h

327 строки
10 KiB
C
Исходник Обычный вид История

1998-04-14 00:24:54 +04:00
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
1998-04-14 00:24:54 +04:00
/**
* MODULE NOTES:
* @update gess 4/1/98
*
* This class does two primary jobs:
* 1) It iterates the tokens provided during the
* tokenization process, identifing where elements
* begin and end (doing validation and normalization).
* 2) It controls and coordinates with an instance of
* the IContentSink interface, to coordinate the
* the production of the content model.
*
* The basic operation of this class assumes that an HTML
* document is non-normalized. Therefore, we don't process
* the document in a normalized way. Don't bother to look
* for methods like: doHead() or doBody().
*
* Instead, in order to be backward compatible, we must
* scan the set of tokens and perform this basic set of
* operations:
* 1) Determine the token type (easy, since the tokens know)
* 2) Determine the appropriate section of the HTML document
* each token belongs in (HTML,HEAD,BODY,FRAMESET).
* 3) Insert content into our document (via the sink) into
* the correct section.
* 4) In the case of tags that belong in the BODY, we must
* ensure that our underlying document state reflects
* the appropriate context for our tag.
*
* For example,if we see a <TR>, we must ensure our
* document contains a table into which the row can
* be placed. This may result in "implicit containers"
* created to ensure a well-formed document.
*
*/
#ifndef NS_PARSER__
#define NS_PARSER__
1998-04-14 00:24:54 +04:00
#include "nsIParser.h"
#include "nsDeque.h"
#include "nsParserNode.h"
#include "nsParserTypes.h"
1998-05-15 02:19:08 +04:00
#include "nsIURL.h"
#include "CParserContext.h"
#include "nsParserCIID.h"
1998-04-14 00:24:54 +04:00
class IContentSink;
class nsIHTMLContentSink;
class nsIDTD;
class CScanner;
class nsIParserFilter;
1998-07-28 08:16:13 +04:00
#include <fstream.h>
1998-04-14 00:24:54 +04:00
class nsParser : public nsIParser, public nsIStreamListener {
1998-04-14 00:24:54 +04:00
public:
friend class CTokenHandler;
1998-04-14 00:24:54 +04:00
NS_DECL_ISUPPORTS
1998-05-12 04:59:32 +04:00
/**
* default constructor
* @update gess5/11/98
*/
nsParser();
1998-05-12 04:59:32 +04:00
/**
* Destructor
* @update gess5/11/98
*/
1998-07-14 01:42:45 +04:00
virtual ~nsParser();
1998-05-12 04:59:32 +04:00
/**
* Select given content sink into parser for parser output
* @update gess5/11/98
* @param aSink is the new sink to be used by parser
* @return old sink, or NULL
*/
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
/**
* retrive the sink set into the parser
* @update gess5/11/98
* @param aSink is the new sink to be used by parser
* @return old sink, or NULL
*/
virtual nsIContentSink* GetContentSink(void);
1998-11-11 14:55:32 +03:00
/**
* Call this method once you've created a parser, and want to instruct it
* about the command which caused the parser to be constructed. For example,
* this allows us to select a DTD which can do, say, view-source.
*
* @update gess 3/25/98
* @param aContentSink -- ptr to content sink that will receive output
* @return ptr to previously set contentsink (usually null)
*/
virtual void SetCommand(const char* aCommand);
virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
virtual void RegisterDTD(nsIDTD* aDTD);
/**
* Retrieve the scanner from the topmost parser context
*
* @update gess 6/9/98
* @return ptr to scanner
*/
virtual eParseMode GetParseMode(void);
/**
* Retrieve the scanner from the topmost parser context
*
* @update gess 6/9/98
* @return ptr to scanner
*/
virtual CScanner* GetScanner(void);
1998-05-12 04:59:32 +04:00
/**
* Cause parser to parse input from given URL
1998-05-12 04:59:32 +04:00
* @update gess5/11/98
* @param aURL is a descriptor for source document
* @param aListener is a listener to forward notifications to
1998-05-12 04:59:32 +04:00
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aEnableVerify=PR_FALSE);
1998-07-10 09:35:23 +04:00
/**
* Cause parser to parse input from given stream
* @update gess5/11/98
* @param aStream is the i/o source
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(fstream& aStream,PRBool aEnableVerify=PR_FALSE);
1998-07-10 09:35:23 +04:00
1998-05-15 02:19:08 +04:00
/**
* @update gess5/11/98
* @param anHTMLString contains a string-full of real HTML
* @param appendTokens tells us whether we should insert tokens inline, or append them.
* @return TRUE if all went well -- FALSE otherwise
*/
virtual PRInt32 Parse(nsString& aSourceBuffer,PRBool anHTMLString,PRBool aEnableVerify=PR_FALSE);
1998-05-12 04:59:32 +04:00
/**
* Call this when you want control whether or not the parser will parse
* and tokenize input (TRUE), or whether it just caches input to be
* parsed later (FALSE).
*
* @update gess 9/1/98
* @param aState determines whether we parse/tokenize or just cache.
* @return current state
*/
virtual PRBool EnableParser(PRBool aState);
1998-05-12 04:59:32 +04:00
/**
* This method gets called (automatically) during incremental parsing
* @update gess5/11/98
* @return TRUE if all went well, otherwise FALSE
*/
virtual PRInt32 ResumeParse();
1998-05-12 04:59:32 +04:00
1998-08-04 01:04:54 +04:00
virtual CToken* PushToken(CToken* theToken);
virtual CToken* PopToken();
virtual CToken* PeekToken();
1998-05-12 04:59:32 +04:00
/**
* This debug routine is used to cause the tokenizer to
* iterate its token list, asking each token to dump its
* contents to the given output stream.
*
* @update gess 3/25/98
* @param
* @return
1998-05-12 04:59:32 +04:00
*/
void DebugDumpSource(ostream& out);
1998-05-12 04:59:32 +04:00
//*********************************************
1998-05-15 02:19:08 +04:00
// These methods are callback methods used by
// net lib to let us know about our inputstream.
//*********************************************
NS_IMETHOD GetBindInfo(nsIURL* aURL);
NS_IMETHOD OnProgress(nsIURL* aURL, PRInt32 Progress, PRInt32 ProgressMax);
NS_IMETHOD OnStatus(nsIURL* aURL, const nsString& aMmsg);
NS_IMETHOD OnStartBinding(nsIURL* aURL, const char *aContentType);
NS_IMETHOD OnDataAvailable(nsIURL* aURL, nsIInputStream *pIStream, PRInt32 length);
NS_IMETHOD OnStopBinding(nsIURL* aURL, PRInt32 status, const nsString& aMsg);
1998-05-12 04:59:32 +04:00
protected:
/**
*
* @update gess5/18/98
* @param
* @return
1998-05-12 04:59:32 +04:00
*/
PRInt32 WillBuildModel(nsString& aFilename,nsIDTD* mDefaultDTD=0);
1998-05-12 04:59:32 +04:00
/**
*
* @update gess5/18/98
* @param
* @return
1998-05-12 04:59:32 +04:00
*/
PRInt32 DidBuildModel(PRInt32 anErrorCode);
1998-05-12 04:59:32 +04:00
/**
* This method gets called when the tokens have been consumed, and it's time
* to build the model via the content sink.
1998-05-12 04:59:32 +04:00
* @update gess5/11/98
* @return YES if model building went well -- NO otherwise.
1998-05-12 04:59:32 +04:00
*/
virtual PRInt32 BuildModel(void);
private:
1998-05-12 04:59:32 +04:00
/*******************************************
These are the tokenization methods...
*******************************************/
1998-05-12 04:59:32 +04:00
/**
* Part of the code sandwich, this gets called right before
* the tokenization process begins. The main reason for
* this call is to allow the delegate to do initialization.
*
* @update gess 3/25/98
* @param
* @return TRUE if it's ok to proceed
1998-05-12 04:59:32 +04:00
*/
PRBool WillTokenize();
1998-05-12 04:59:32 +04:00
1998-05-12 04:59:32 +04:00
/**
* This is the primary control routine. It iteratively
* consumes tokens until an error occurs or you run out
* of data.
*
* @update gess 3/25/98
* @return error code
1998-05-12 04:59:32 +04:00
*/
PRInt32 Tokenize();
1998-05-12 04:59:32 +04:00
/**
* This is the tail-end of the code sandwich for the
* tokenization process. It gets called once tokenziation
* has completed.
*
* @update gess 3/25/98
* @param
* @return TRUE if all went well
1998-05-12 04:59:32 +04:00
*/
PRBool DidTokenize();
1998-05-12 04:59:32 +04:00
/**
* This debug routine is used to cause the tokenizer to
* iterate its token list, asking each token to dump its
* contents to the given output stream.
*
* @update gess 3/25/98
* @param
* @return
1998-05-12 04:59:32 +04:00
*/
void DebugDumpTokens(ostream& out);
1998-05-12 04:59:32 +04:00
/**
* This method is used as a backstop to compute the kind of content
* that is contained in the scanner stream. This method is important
* because it allows us to defer the resolution of our DTD (and hence)
* filters and maybe eventually sinks based on the input type.
*
* @update gess6/22/98
* @param
* @return TRUE if we figured it out.
*/
1998-07-10 09:35:23 +04:00
eAutoDetectResult AutoDetectContentType(nsString& aBuffer,nsString& aType);
void PushContext(CParserContext& aContext);
CParserContext* PopContext();
1998-05-15 02:19:08 +04:00
protected:
1998-05-12 04:59:32 +04:00
//*********************************************
// And now, some data members...
//*********************************************
enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
CParserContext* mParserContext;
1998-07-17 06:35:23 +04:00
PRInt32 mMajorIteration;
PRInt32 mMinorIteration;
nsIStreamObserver* mObserver;
nsIContentSink* mSink;
nsIParserFilter* mParserFilter;
PRBool mDTDVerification;
PRBool mParserEnabled;
1998-11-11 14:55:32 +03:00
nsString mCommand;
eStreamState mStreamListenerState; //this is really only here for debug purposes.
1998-04-14 00:24:54 +04:00
};
#endif