Added new sink that outputs a Text stream instead of

an HTML stream.
This commit is contained in:
kostello%netscape.com 1999-03-04 21:52:57 +00:00
Родитель 5634ac2617
Коммит 99ea5afe50
12 изменённых файлов: 1818 добавлений и 36 удалений

Просмотреть файл

@ -6,6 +6,7 @@ nshtmlpars.h
nsIContentSink.h
nsIHTMLContentSink.h
nsHTMLContentSinkStream.h
nsHTMLToTXTSinkStream.h
nsITokenizer.h
nsHTMLTokens.h
nsIParserNode.h

Просмотреть файл

@ -50,6 +50,7 @@ CPPSRCS= \
nsToken.cpp \
nsTokenHandler.cpp \
nsHTMLContentSinkStream.cpp \
nsHTMLToTXTSinkStream.cpp \
nsValidDTD.cpp \
nsWellFormedDTD.cpp \
nsViewSourceHTML.cpp \
@ -64,6 +65,7 @@ EXPORTS = \
nsIExpatTokenizer.h \
nsIHTMLContentSink.h \
nsHTMLContentSinkStream.h \
nsHTMLToTXTSinkStream.h \
nsHTMLEntities.h \
nsHTMLTokens.h \
nsILoggingSink.h \

Просмотреть файл

@ -46,6 +46,7 @@ CPPSRCS= \
nsToken.cpp \
nsTokenHandler.cpp \
nsHTMLContentSinkStream.cpp \
nsHTMLToTXTSinkStream.cpp \
nsValidDTD.cpp \
nsWellFormedDTD.cpp \
nsViewSourceHTML.cpp \
@ -77,6 +78,7 @@ CPP_OBJS= \
.\$(OBJDIR)\nsToken.obj \
.\$(OBJDIR)\nsTokenHandler.obj \
.\$(OBJDIR)\nsHTMLContentSinkStream.obj \
.\$(OBJDIR)\nsHTMLToTXTSinkStream.obj \
.\$(OBJDIR)\nsValidDTD.obj \
.\$(OBJDIR)\nsWellFormedDTD.obj \
.\$(OBJDIR)\nsViewSourceHTML.obj \
@ -93,6 +95,7 @@ EXPORTS= \
nsIHTMLContentSink.h \
nsILoggingSink.h \
nsHTMLContentSinkStream.h \
nsHTMLToTXTSinkStream.h \
nsHTMLEntities.h \
nsHTMLTokens.h \
nsIParserNode.h \

Просмотреть файл

@ -41,28 +41,154 @@ static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
static char* gHeaderComment = "<!-- This page was created by the NGLayout output system. -->";
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
const int gTabSize=2;
static char gBuffer[500];
static char gBuffer[1024];
/** PRETTY PRINTING PROTOTYPES **/
PRBool IsInline(eHTMLTags aTag);
PRBool IsBlockLevel(eHTMLTags aTag);
PRInt32 BreakBeforeOpen(eHTMLTags aTag);
PRInt32 BreakAfterOpen(eHTMLTags aTag);
PRInt32 BreakBeforeClose(eHTMLTags aTag);
PRInt32 BreakAfterClose(eHTMLTags aTag);
PRBool IndentChildren(eHTMLTags aTag);
PRBool PreformattedChildren(eHTMLTags aTag);
PRBool EatOpen(eHTMLTags aTag);
PRBool EatClose(eHTMLTags aTag);
PRBool PermitWSBeforeOpen(eHTMLTags aTag);
PRBool PermitWSAfterOpen(eHTMLTags aTag);
PRBool PermitWSBeforeClose(eHTMLTags aTag);
PRBool PermitWSAfterClose(eHTMLTags aTag);
PRBool IgnoreWS(eHTMLTags aTag);
class nsTagFormat
{
public:
void Init(PRBool aBefore, PRBool aStart, PRBool aEnd, PRBool aAfter);
void SetIndentGroup(PRUint8 aGroup);
void SetFormat(PRBool aOnOff);
public:
PRBool mBreakBefore;
PRBool mBreakStart;
PRBool mBreakEnd;
PRBool mBreakAfter;
PRUint8 mIndentGroup; // zero for none
PRBool mFormat; // format (on|off)
};
void nsTagFormat::Init(PRBool aBefore, PRBool aStart, PRBool aEnd, PRBool aAfter)
{
mBreakBefore = aBefore;
mBreakStart = aStart;
mBreakEnd = aEnd;
mBreakAfter = aAfter;
mFormat = PR_TRUE;
}
void nsTagFormat::SetIndentGroup(PRUint8 aGroup)
{
mIndentGroup = aGroup;
}
void nsTagFormat::SetFormat(PRBool aOnOff)
{
mFormat = aOnOff;
}
class nsPrettyPrinter
{
public:
void Init(PRBool aIndentEnable = PR_TRUE, PRUint8 aColSize = 2, PRUint8 aTabSize = 8, PRBool aUseTabs = PR_FALSE );
PRBool mIndentEnable;
PRUint8 mIndentColSize;
PRUint8 mIndentTabSize;
PRBool mIndentUseTabs;
PRBool mAutowrapEnable;
PRUint32 mAutoWrapColWidth;
nsString mBreak; // CRLF, CR, LF
nsTagFormat mTagFormat[NS_HTML_TAG_MAX+1];
};
void nsPrettyPrinter::Init(PRBool aIndentEnable, PRUint8 aColSize, PRUint8 aTabSize, PRBool aUseTabs)
{
mIndentEnable = aIndentEnable;
mIndentColSize = aColSize;
mIndentTabSize = aTabSize;
mIndentUseTabs = aUseTabs;
mAutowrapEnable = PR_TRUE;
mAutoWrapColWidth = 72;
mBreak = "\n"; // CRLF, CR, LF
for (PRUint32 i = 0; i < NS_HTML_TAG_MAX; i++)
mTagFormat[i].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_a].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_abbr].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_applet].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_area].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_b].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_base].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_blockquote].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_body].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_br].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_caption].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_center].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_dd].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_dir].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_div].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_dl].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_dt].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_embed].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_form].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_frame].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_frameset].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h1].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h2].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h3].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h4].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h5].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h6].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_head].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_hr].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_html].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_ilayer].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_input].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_isindex].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_layer].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_li].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_link].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_map].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_menu].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_meta].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_object].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_ol].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_option].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_p].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_param].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_pre].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_script].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_select].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_style].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_table].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_td].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_textarea].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_th].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_title].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_tr].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_ul].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
}
static PRBool IsInline(eHTMLTags aTag);
static PRBool IsBlockLevel(eHTMLTags aTag);
static PRInt32 BreakBeforeOpen(eHTMLTags aTag);
static PRInt32 BreakAfterOpen(eHTMLTags aTag);
static PRInt32 BreakBeforeClose(eHTMLTags aTag);
static PRInt32 BreakAfterClose(eHTMLTags aTag);
static PRBool IndentChildren(eHTMLTags aTag);
static PRBool PreformattedChildren(eHTMLTags aTag);
static PRBool EatOpen(eHTMLTags aTag);
static PRBool EatClose(eHTMLTags aTag);
static PRBool PermitWSBeforeOpen(eHTMLTags aTag);
static PRBool PermitWSAfterOpen(eHTMLTags aTag);
static PRBool PermitWSBeforeClose(eHTMLTags aTag);
static PRBool PermitWSAfterClose(eHTMLTags aTag);
static PRBool IgnoreWS(eHTMLTags aTag);
@ -150,6 +276,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHead
* @return
*/
nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoFormat,PRBool aDoHeader) {
NS_INIT_REFCNT();
mOutput = &aStream;
mLowerCaseTags = PR_TRUE;
memset(mHTMLTagStack,0,sizeof(mHTMLTagStack));

Просмотреть файл

@ -0,0 +1,626 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are Copyright (C) 1998
* Netscape Communications Corporation. All Rights Reserved.
*/
/**
* MODULE NOTES:
*
* This file declares the concrete TXT ContentSink class.
* This class is used during the parsing process as the
* primary interface between the parser and the content
* model.
*/
#include "nsHTMLToTXTSinkStream.h"
#include "nsHTMLTokens.h"
#include <iostream.h>
#include "nsString.h"
#include "nsIParser.h"
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
const int gTabSize=2;
static char gBuffer[1024];
static PRBool IsInline(eHTMLTags aTag);
static PRBool IsBlockLevel(eHTMLTags aTag);
/**
* This method gets called as part of our COM-like interfaces.
* Its purpose is to create an interface to parser object
* of some type.
*
* @update gpk02/03/99
* @param nsIID id of object to discover
* @param aInstancePtr ptr to newly discovered interface
* @return NS_xxx result code
*/
nsresult
nsHTMLToTXTSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr)
{
if (NULL == aInstancePtr) {
return NS_ERROR_NULL_POINTER;
}
if(aIID.Equals(kISupportsIID)) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if(aIID.Equals(kIContentSinkIID)) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if(aIID.Equals(kIHTMLContentSinkIID)) {
*aInstancePtr = (nsIHTMLContentSink*)(this);
}
else {
*aInstancePtr=0;
return NS_NOINTERFACE;
}
NS_ADDREF_THIS();
return NS_OK;
}
NS_IMPL_ADDREF(nsHTMLToTXTSinkStream)
NS_IMPL_RELEASE(nsHTMLToTXTSinkStream)
/**
* This method is defined in nsIParser. It is used to
* cause the COM-like construction of an nsParser.
*
* @update gpk02/03/99
* @param nsIParser** ptr to newly instantiated parser
* @return NS_xxx error result
*/
NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult) {
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream();
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
/**
* Construct a content sink stream.
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() {
NS_INIT_REFCNT();
mOutput=&cout;
mColPos = 0;
mIndent = 0;
mDoOutput = PR_FALSE;
}
/**
* Construct a content sink stream.
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
NS_INIT_REFCNT();
mOutput = &aStream;
mColPos = 0;
mIndent = 0;
mDoOutput = PR_FALSE;
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
mOutput=0; //we don't own the stream we're given; just forget it.
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
NS_IMETHODIMP_(void)
nsHTMLToTXTSinkStream::SetOutputStream(ostream& aStream){
mOutput=&aStream;
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void OpenTagWithAttributes(const char* theTag,const nsIParserNode& aNode,int tab,ostream& aStream,PRBool aNewline) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void OpenTag(const char* theTag,int tab,ostream& aStream,PRBool aNewline) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void CloseTag(const char* theTag,int tab,ostream& aStream) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void WritePair(eHTMLTags aTag,const nsString& theContent,int tab,ostream& aStream) {
const char* titleStr = GetTagName(aTag);
OpenTag(titleStr,tab,aStream,PR_FALSE);
theContent.ToCString(gBuffer,sizeof(gBuffer)-1);
aStream << gBuffer;
CloseTag(titleStr,0,aStream);
}
/**
* This method gets called by the parser when it encounters
* a title tag and wants to set the document title in the sink.
*
* @update gpk02/03/99
* @param nsString reference to new title value
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::SetTitle(const nsString& aValue){
return NS_OK;
}
/**
* This method is used to open the outer HTML container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenHTML(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the outer HTML container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseHTML(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open the only HEAD container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenHead(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the only HEAD container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseHead(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open the main BODY container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenBody(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the main BODY container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseBody(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open a new FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenForm(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the outer FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseForm(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open a new FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenMap(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the outer FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseMap(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open the FRAMESET container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenFrameset(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the FRAMESET container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseFrameset(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a leaf node to the current container in the content
* model.
*
* @updated gpk 06/18/98
* @param
* @return
*/
nsresult
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream)
{
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
if (mDoOutput == PR_FALSE)
return NS_OK;
if (type == eHTMLTag_text) {
const nsString& text = aNode.GetText();
text.ToCString(gBuffer,sizeof(gBuffer)-1);
aStream << gBuffer;
mColPos += text.Length();
}
else if (type == eHTMLTag_whitespace)
{
if (PR_TRUE)
{
const nsString& text = aNode.GetText();
text.ToCString(gBuffer,sizeof(gBuffer)-1);
aStream << gBuffer;
mColPos += text.Length();
}
}
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a PI node to the current container in the content
* model.
*
* @updated gpk02/03/99
* @param
* @return
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a comment node to the current container in the content
* model.
*
* @updated gpk02/03/99
* @param
* @return
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to a general container.
* This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
if (type == eHTMLTag_body)
mDoOutput = PR_TRUE;
return NS_OK;
}
/**
* This method is used to close a generic container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
if (type == eHTMLTag_body)
mDoOutput = PR_FALSE;
if (IsBlockLevel(type))
{
if (mColPos != 0)
{
*mOutput << endl;
mColPos = 0;
}
}
return NS_OK;
}
/**
* This method is used to add a leaf to the currently
* open container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
nsresult result = NS_OK;
if(mOutput) {
result = AddLeaf(aNode,*mOutput);
}
return result;
}
/**
* This method gets called when the parser begins the process
* of building the content model via the content sink.
*
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::WillBuildModel(void){
return NS_OK;
}
/**
* This method gets called when the parser concludes the process
* of building the content model via the content sink.
*
* @param aQualityLevel describes how well formed the doc was.
* 0=GOOD; 1=FAIR; 2=POOR;
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::DidBuildModel(PRInt32 aQualityLevel) {
return NS_OK;
}
/**
* This method gets called when the parser gets i/o blocked,
* and wants to notify the sink that it may be a while before
* more data is available.
*
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::WillInterrupt(void) {
return NS_OK;
}
/**
* This method gets called when the parser i/o gets unblocked,
* and we're about to start dumping content again to the sink.
*
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::WillResume(void) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLToTXTSinkStream::SetParser(nsIParser* aParser) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLToTXTSinkStream::NotifyError(nsresult aErrorResult)
{
return NS_OK;
}
PRBool IsInline(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_a:
case eHTMLTag_address:
case eHTMLTag_big:
case eHTMLTag_blink:
case eHTMLTag_b:
case eHTMLTag_br:
case eHTMLTag_cite:
case eHTMLTag_code:
case eHTMLTag_dfn:
case eHTMLTag_em:
case eHTMLTag_font:
case eHTMLTag_img:
case eHTMLTag_i:
case eHTMLTag_kbd:
case eHTMLTag_keygen:
case eHTMLTag_nobr:
case eHTMLTag_samp:
case eHTMLTag_small:
case eHTMLTag_spacer:
case eHTMLTag_span:
case eHTMLTag_strike:
case eHTMLTag_strong:
case eHTMLTag_sub:
case eHTMLTag_sup:
case eHTMLTag_td:
case eHTMLTag_textarea:
case eHTMLTag_tt:
case eHTMLTag_var:
case eHTMLTag_wbr:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
PRBool IsBlockLevel(eHTMLTags aTag)
{
return !IsInline(aTag);
}

Просмотреть файл

@ -0,0 +1,132 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/**
* MODULE NOTES:
*
* If you've been paying attention to our many content sink classes, you may be
* asking yourself, "why do we need yet another one?" The answer is that this
* implementation, unlike all the others, really sends its output a given stream
* rather than to an actual content sink (as defined in our HTML document system).
*
* We use this class for a number of purposes:
* 1) For actual document i/o using XIF (xml interchange format)
* 2) For document conversions
* 3) For debug purposes (to cause output to go to cout or a file)
*
* If no stream is declared in the constructor then all output goes to cout.
* The file is pretty printed according to the pretty printing interface. subclasses
* may choose to override this behavior or set runtime flags for desired results.
*/
#ifndef NS_HTMLTOTEXTSINK_STREAM
#define NS_HTMLTOTEXTSINK_STREAM
#include "nsIParserNode.h"
#include "nsIHTMLContentSink.h"
#include "nshtmlpars.h"
#include "nsHTMLTokens.h"
#define NS_HTMLTOTEXTSINK_STREAM_IID \
{0xa39c6bff, 0x15f0, 0x11d2, \
{0x80, 0x41, 0x0, 0x10, 0x4b, 0x98, 0x3f, 0xd4}}
#ifndef XP_MAC
class ostream;
#endif
class nsHTMLToTXTSinkStream : public nsIHTMLContentSink {
public:
/**
* Standard constructor
* @update gpk02/03/99
*/
nsHTMLToTXTSinkStream();
nsHTMLToTXTSinkStream(ostream& aStream);
/**
* virtual destructor
* @update gpk02/03/99
*/
virtual ~nsHTMLToTXTSinkStream();
NS_IMETHOD_(void) SetOutputStream(ostream& aStream);
// nsISupports
NS_DECL_ISUPPORTS
/*******************************************************************
* The following methods are inherited from nsIContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD WillBuildModel(void);
NS_IMETHOD DidBuildModel(PRInt32 aQualityLevel);
NS_IMETHOD WillInterrupt(void);
NS_IMETHOD WillResume(void);
NS_IMETHOD SetParser(nsIParser* aParser);
NS_IMETHOD OpenContainer(const nsIParserNode& aNode);
NS_IMETHOD CloseContainer(const nsIParserNode& aNode);
NS_IMETHOD AddLeaf(const nsIParserNode& aNode);
NS_IMETHOD NotifyError(nsresult aErrorResult);
NS_IMETHOD AddComment(const nsIParserNode& aNode);
NS_IMETHOD AddProcessingInstruction(const nsIParserNode& aNode);
/*******************************************************************
* The following methods are inherited from nsIHTMLContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD SetTitle(const nsString& aValue);
NS_IMETHOD OpenHTML(const nsIParserNode& aNode);
NS_IMETHOD CloseHTML(const nsIParserNode& aNode);
NS_IMETHOD OpenHead(const nsIParserNode& aNode);
NS_IMETHOD CloseHead(const nsIParserNode& aNode);
NS_IMETHOD OpenBody(const nsIParserNode& aNode);
NS_IMETHOD CloseBody(const nsIParserNode& aNode);
NS_IMETHOD OpenForm(const nsIParserNode& aNode);
NS_IMETHOD CloseForm(const nsIParserNode& aNode);
NS_IMETHOD OpenMap(const nsIParserNode& aNode);
NS_IMETHOD CloseMap(const nsIParserNode& aNode);
NS_IMETHOD OpenFrameset(const nsIParserNode& aNode);
NS_IMETHOD CloseFrameset(const nsIParserNode& aNode);
protected:
nsresult AddLeaf(const nsIParserNode& aNode, ostream& aStream);
void WriteAttributes(const nsIParserNode& aNode,ostream& aStream);
protected:
ostream* mOutput;
PRInt32 mIndent;
PRInt32 mColPos;
PRBool mDoOutput;
};
extern NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult);
#endif

Просмотреть файл

@ -6,6 +6,7 @@ nshtmlpars.h
nsIContentSink.h
nsIHTMLContentSink.h
nsHTMLContentSinkStream.h
nsHTMLToTXTSinkStream.h
nsITokenizer.h
nsHTMLTokens.h
nsIParserNode.h

Просмотреть файл

@ -50,6 +50,7 @@ CPPSRCS= \
nsToken.cpp \
nsTokenHandler.cpp \
nsHTMLContentSinkStream.cpp \
nsHTMLToTXTSinkStream.cpp \
nsValidDTD.cpp \
nsWellFormedDTD.cpp \
nsViewSourceHTML.cpp \
@ -64,6 +65,7 @@ EXPORTS = \
nsIExpatTokenizer.h \
nsIHTMLContentSink.h \
nsHTMLContentSinkStream.h \
nsHTMLToTXTSinkStream.h \
nsHTMLEntities.h \
nsHTMLTokens.h \
nsILoggingSink.h \

Просмотреть файл

@ -46,6 +46,7 @@ CPPSRCS= \
nsToken.cpp \
nsTokenHandler.cpp \
nsHTMLContentSinkStream.cpp \
nsHTMLToTXTSinkStream.cpp \
nsValidDTD.cpp \
nsWellFormedDTD.cpp \
nsViewSourceHTML.cpp \
@ -77,6 +78,7 @@ CPP_OBJS= \
.\$(OBJDIR)\nsToken.obj \
.\$(OBJDIR)\nsTokenHandler.obj \
.\$(OBJDIR)\nsHTMLContentSinkStream.obj \
.\$(OBJDIR)\nsHTMLToTXTSinkStream.obj \
.\$(OBJDIR)\nsValidDTD.obj \
.\$(OBJDIR)\nsWellFormedDTD.obj \
.\$(OBJDIR)\nsViewSourceHTML.obj \
@ -93,6 +95,7 @@ EXPORTS= \
nsIHTMLContentSink.h \
nsILoggingSink.h \
nsHTMLContentSinkStream.h \
nsHTMLToTXTSinkStream.h \
nsHTMLEntities.h \
nsHTMLTokens.h \
nsIParserNode.h \

Просмотреть файл

@ -41,28 +41,154 @@ static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
static char* gHeaderComment = "<!-- This page was created by the NGLayout output system. -->";
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
const int gTabSize=2;
static char gBuffer[500];
static char gBuffer[1024];
/** PRETTY PRINTING PROTOTYPES **/
PRBool IsInline(eHTMLTags aTag);
PRBool IsBlockLevel(eHTMLTags aTag);
PRInt32 BreakBeforeOpen(eHTMLTags aTag);
PRInt32 BreakAfterOpen(eHTMLTags aTag);
PRInt32 BreakBeforeClose(eHTMLTags aTag);
PRInt32 BreakAfterClose(eHTMLTags aTag);
PRBool IndentChildren(eHTMLTags aTag);
PRBool PreformattedChildren(eHTMLTags aTag);
PRBool EatOpen(eHTMLTags aTag);
PRBool EatClose(eHTMLTags aTag);
PRBool PermitWSBeforeOpen(eHTMLTags aTag);
PRBool PermitWSAfterOpen(eHTMLTags aTag);
PRBool PermitWSBeforeClose(eHTMLTags aTag);
PRBool PermitWSAfterClose(eHTMLTags aTag);
PRBool IgnoreWS(eHTMLTags aTag);
class nsTagFormat
{
public:
void Init(PRBool aBefore, PRBool aStart, PRBool aEnd, PRBool aAfter);
void SetIndentGroup(PRUint8 aGroup);
void SetFormat(PRBool aOnOff);
public:
PRBool mBreakBefore;
PRBool mBreakStart;
PRBool mBreakEnd;
PRBool mBreakAfter;
PRUint8 mIndentGroup; // zero for none
PRBool mFormat; // format (on|off)
};
void nsTagFormat::Init(PRBool aBefore, PRBool aStart, PRBool aEnd, PRBool aAfter)
{
mBreakBefore = aBefore;
mBreakStart = aStart;
mBreakEnd = aEnd;
mBreakAfter = aAfter;
mFormat = PR_TRUE;
}
void nsTagFormat::SetIndentGroup(PRUint8 aGroup)
{
mIndentGroup = aGroup;
}
void nsTagFormat::SetFormat(PRBool aOnOff)
{
mFormat = aOnOff;
}
class nsPrettyPrinter
{
public:
void Init(PRBool aIndentEnable = PR_TRUE, PRUint8 aColSize = 2, PRUint8 aTabSize = 8, PRBool aUseTabs = PR_FALSE );
PRBool mIndentEnable;
PRUint8 mIndentColSize;
PRUint8 mIndentTabSize;
PRBool mIndentUseTabs;
PRBool mAutowrapEnable;
PRUint32 mAutoWrapColWidth;
nsString mBreak; // CRLF, CR, LF
nsTagFormat mTagFormat[NS_HTML_TAG_MAX+1];
};
void nsPrettyPrinter::Init(PRBool aIndentEnable, PRUint8 aColSize, PRUint8 aTabSize, PRBool aUseTabs)
{
mIndentEnable = aIndentEnable;
mIndentColSize = aColSize;
mIndentTabSize = aTabSize;
mIndentUseTabs = aUseTabs;
mAutowrapEnable = PR_TRUE;
mAutoWrapColWidth = 72;
mBreak = "\n"; // CRLF, CR, LF
for (PRUint32 i = 0; i < NS_HTML_TAG_MAX; i++)
mTagFormat[i].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_a].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_abbr].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_applet].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_area].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_b].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_base].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_blockquote].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_body].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_br].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_caption].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_center].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_dd].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_dir].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_div].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_dl].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_dt].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_embed].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_form].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_frame].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_frameset].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h1].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h2].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h3].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h4].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h5].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h6].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_head].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_hr].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_html].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_ilayer].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_input].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_isindex].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_layer].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_li].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_link].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_map].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_menu].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_meta].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_object].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_ol].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_option].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_p].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_param].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_pre].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_script].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_select].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_style].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_table].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_td].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_textarea].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_th].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_title].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_tr].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_ul].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
}
static PRBool IsInline(eHTMLTags aTag);
static PRBool IsBlockLevel(eHTMLTags aTag);
static PRInt32 BreakBeforeOpen(eHTMLTags aTag);
static PRInt32 BreakAfterOpen(eHTMLTags aTag);
static PRInt32 BreakBeforeClose(eHTMLTags aTag);
static PRInt32 BreakAfterClose(eHTMLTags aTag);
static PRBool IndentChildren(eHTMLTags aTag);
static PRBool PreformattedChildren(eHTMLTags aTag);
static PRBool EatOpen(eHTMLTags aTag);
static PRBool EatClose(eHTMLTags aTag);
static PRBool PermitWSBeforeOpen(eHTMLTags aTag);
static PRBool PermitWSAfterOpen(eHTMLTags aTag);
static PRBool PermitWSBeforeClose(eHTMLTags aTag);
static PRBool PermitWSAfterClose(eHTMLTags aTag);
static PRBool IgnoreWS(eHTMLTags aTag);
@ -150,6 +276,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHead
* @return
*/
nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoFormat,PRBool aDoHeader) {
NS_INIT_REFCNT();
mOutput = &aStream;
mLowerCaseTags = PR_TRUE;
memset(mHTMLTagStack,0,sizeof(mHTMLTagStack));

Просмотреть файл

@ -0,0 +1,626 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are Copyright (C) 1998
* Netscape Communications Corporation. All Rights Reserved.
*/
/**
* MODULE NOTES:
*
* This file declares the concrete TXT ContentSink class.
* This class is used during the parsing process as the
* primary interface between the parser and the content
* model.
*/
#include "nsHTMLToTXTSinkStream.h"
#include "nsHTMLTokens.h"
#include <iostream.h>
#include "nsString.h"
#include "nsIParser.h"
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
const int gTabSize=2;
static char gBuffer[1024];
static PRBool IsInline(eHTMLTags aTag);
static PRBool IsBlockLevel(eHTMLTags aTag);
/**
* This method gets called as part of our COM-like interfaces.
* Its purpose is to create an interface to parser object
* of some type.
*
* @update gpk02/03/99
* @param nsIID id of object to discover
* @param aInstancePtr ptr to newly discovered interface
* @return NS_xxx result code
*/
nsresult
nsHTMLToTXTSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr)
{
if (NULL == aInstancePtr) {
return NS_ERROR_NULL_POINTER;
}
if(aIID.Equals(kISupportsIID)) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if(aIID.Equals(kIContentSinkIID)) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if(aIID.Equals(kIHTMLContentSinkIID)) {
*aInstancePtr = (nsIHTMLContentSink*)(this);
}
else {
*aInstancePtr=0;
return NS_NOINTERFACE;
}
NS_ADDREF_THIS();
return NS_OK;
}
NS_IMPL_ADDREF(nsHTMLToTXTSinkStream)
NS_IMPL_RELEASE(nsHTMLToTXTSinkStream)
/**
* This method is defined in nsIParser. It is used to
* cause the COM-like construction of an nsParser.
*
* @update gpk02/03/99
* @param nsIParser** ptr to newly instantiated parser
* @return NS_xxx error result
*/
NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult) {
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream();
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
/**
* Construct a content sink stream.
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() {
NS_INIT_REFCNT();
mOutput=&cout;
mColPos = 0;
mIndent = 0;
mDoOutput = PR_FALSE;
}
/**
* Construct a content sink stream.
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
NS_INIT_REFCNT();
mOutput = &aStream;
mColPos = 0;
mIndent = 0;
mDoOutput = PR_FALSE;
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
mOutput=0; //we don't own the stream we're given; just forget it.
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
NS_IMETHODIMP_(void)
nsHTMLToTXTSinkStream::SetOutputStream(ostream& aStream){
mOutput=&aStream;
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void OpenTagWithAttributes(const char* theTag,const nsIParserNode& aNode,int tab,ostream& aStream,PRBool aNewline) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void OpenTag(const char* theTag,int tab,ostream& aStream,PRBool aNewline) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void CloseTag(const char* theTag,int tab,ostream& aStream) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void WritePair(eHTMLTags aTag,const nsString& theContent,int tab,ostream& aStream) {
const char* titleStr = GetTagName(aTag);
OpenTag(titleStr,tab,aStream,PR_FALSE);
theContent.ToCString(gBuffer,sizeof(gBuffer)-1);
aStream << gBuffer;
CloseTag(titleStr,0,aStream);
}
/**
* This method gets called by the parser when it encounters
* a title tag and wants to set the document title in the sink.
*
* @update gpk02/03/99
* @param nsString reference to new title value
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::SetTitle(const nsString& aValue){
return NS_OK;
}
/**
* This method is used to open the outer HTML container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenHTML(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the outer HTML container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseHTML(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open the only HEAD container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenHead(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the only HEAD container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseHead(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open the main BODY container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenBody(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the main BODY container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseBody(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open a new FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenForm(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the outer FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseForm(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open a new FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenMap(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the outer FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseMap(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to open the FRAMESET container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenFrameset(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to close the FRAMESET container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseFrameset(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a leaf node to the current container in the content
* model.
*
* @updated gpk 06/18/98
* @param
* @return
*/
nsresult
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream)
{
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
if (mDoOutput == PR_FALSE)
return NS_OK;
if (type == eHTMLTag_text) {
const nsString& text = aNode.GetText();
text.ToCString(gBuffer,sizeof(gBuffer)-1);
aStream << gBuffer;
mColPos += text.Length();
}
else if (type == eHTMLTag_whitespace)
{
if (PR_TRUE)
{
const nsString& text = aNode.GetText();
text.ToCString(gBuffer,sizeof(gBuffer)-1);
aStream << gBuffer;
mColPos += text.Length();
}
}
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a PI node to the current container in the content
* model.
*
* @updated gpk02/03/99
* @param
* @return
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a comment node to the current container in the content
* model.
*
* @updated gpk02/03/99
* @param
* @return
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode){
return NS_OK;
}
/**
* This method is used to a general container.
* This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
if (type == eHTMLTag_body)
mDoOutput = PR_TRUE;
return NS_OK;
}
/**
* This method is used to close a generic container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
if (type == eHTMLTag_body)
mDoOutput = PR_FALSE;
if (IsBlockLevel(type))
{
if (mColPos != 0)
{
*mOutput << endl;
mColPos = 0;
}
}
return NS_OK;
}
/**
* This method is used to add a leaf to the currently
* open container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
nsresult result = NS_OK;
if(mOutput) {
result = AddLeaf(aNode,*mOutput);
}
return result;
}
/**
* This method gets called when the parser begins the process
* of building the content model via the content sink.
*
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::WillBuildModel(void){
return NS_OK;
}
/**
* This method gets called when the parser concludes the process
* of building the content model via the content sink.
*
* @param aQualityLevel describes how well formed the doc was.
* 0=GOOD; 1=FAIR; 2=POOR;
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::DidBuildModel(PRInt32 aQualityLevel) {
return NS_OK;
}
/**
* This method gets called when the parser gets i/o blocked,
* and wants to notify the sink that it may be a while before
* more data is available.
*
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::WillInterrupt(void) {
return NS_OK;
}
/**
* This method gets called when the parser i/o gets unblocked,
* and we're about to start dumping content again to the sink.
*
* @update gpk02/03/99
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::WillResume(void) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLToTXTSinkStream::SetParser(nsIParser* aParser) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLToTXTSinkStream::NotifyError(nsresult aErrorResult)
{
return NS_OK;
}
PRBool IsInline(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_a:
case eHTMLTag_address:
case eHTMLTag_big:
case eHTMLTag_blink:
case eHTMLTag_b:
case eHTMLTag_br:
case eHTMLTag_cite:
case eHTMLTag_code:
case eHTMLTag_dfn:
case eHTMLTag_em:
case eHTMLTag_font:
case eHTMLTag_img:
case eHTMLTag_i:
case eHTMLTag_kbd:
case eHTMLTag_keygen:
case eHTMLTag_nobr:
case eHTMLTag_samp:
case eHTMLTag_small:
case eHTMLTag_spacer:
case eHTMLTag_span:
case eHTMLTag_strike:
case eHTMLTag_strong:
case eHTMLTag_sub:
case eHTMLTag_sup:
case eHTMLTag_td:
case eHTMLTag_textarea:
case eHTMLTag_tt:
case eHTMLTag_var:
case eHTMLTag_wbr:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
PRBool IsBlockLevel(eHTMLTags aTag)
{
return !IsInline(aTag);
}

Просмотреть файл

@ -0,0 +1,132 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/**
* MODULE NOTES:
*
* If you've been paying attention to our many content sink classes, you may be
* asking yourself, "why do we need yet another one?" The answer is that this
* implementation, unlike all the others, really sends its output a given stream
* rather than to an actual content sink (as defined in our HTML document system).
*
* We use this class for a number of purposes:
* 1) For actual document i/o using XIF (xml interchange format)
* 2) For document conversions
* 3) For debug purposes (to cause output to go to cout or a file)
*
* If no stream is declared in the constructor then all output goes to cout.
* The file is pretty printed according to the pretty printing interface. subclasses
* may choose to override this behavior or set runtime flags for desired results.
*/
#ifndef NS_HTMLTOTEXTSINK_STREAM
#define NS_HTMLTOTEXTSINK_STREAM
#include "nsIParserNode.h"
#include "nsIHTMLContentSink.h"
#include "nshtmlpars.h"
#include "nsHTMLTokens.h"
#define NS_HTMLTOTEXTSINK_STREAM_IID \
{0xa39c6bff, 0x15f0, 0x11d2, \
{0x80, 0x41, 0x0, 0x10, 0x4b, 0x98, 0x3f, 0xd4}}
#ifndef XP_MAC
class ostream;
#endif
class nsHTMLToTXTSinkStream : public nsIHTMLContentSink {
public:
/**
* Standard constructor
* @update gpk02/03/99
*/
nsHTMLToTXTSinkStream();
nsHTMLToTXTSinkStream(ostream& aStream);
/**
* virtual destructor
* @update gpk02/03/99
*/
virtual ~nsHTMLToTXTSinkStream();
NS_IMETHOD_(void) SetOutputStream(ostream& aStream);
// nsISupports
NS_DECL_ISUPPORTS
/*******************************************************************
* The following methods are inherited from nsIContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD WillBuildModel(void);
NS_IMETHOD DidBuildModel(PRInt32 aQualityLevel);
NS_IMETHOD WillInterrupt(void);
NS_IMETHOD WillResume(void);
NS_IMETHOD SetParser(nsIParser* aParser);
NS_IMETHOD OpenContainer(const nsIParserNode& aNode);
NS_IMETHOD CloseContainer(const nsIParserNode& aNode);
NS_IMETHOD AddLeaf(const nsIParserNode& aNode);
NS_IMETHOD NotifyError(nsresult aErrorResult);
NS_IMETHOD AddComment(const nsIParserNode& aNode);
NS_IMETHOD AddProcessingInstruction(const nsIParserNode& aNode);
/*******************************************************************
* The following methods are inherited from nsIHTMLContentSink.
* Please see that file for details.
*******************************************************************/
NS_IMETHOD SetTitle(const nsString& aValue);
NS_IMETHOD OpenHTML(const nsIParserNode& aNode);
NS_IMETHOD CloseHTML(const nsIParserNode& aNode);
NS_IMETHOD OpenHead(const nsIParserNode& aNode);
NS_IMETHOD CloseHead(const nsIParserNode& aNode);
NS_IMETHOD OpenBody(const nsIParserNode& aNode);
NS_IMETHOD CloseBody(const nsIParserNode& aNode);
NS_IMETHOD OpenForm(const nsIParserNode& aNode);
NS_IMETHOD CloseForm(const nsIParserNode& aNode);
NS_IMETHOD OpenMap(const nsIParserNode& aNode);
NS_IMETHOD CloseMap(const nsIParserNode& aNode);
NS_IMETHOD OpenFrameset(const nsIParserNode& aNode);
NS_IMETHOD CloseFrameset(const nsIParserNode& aNode);
protected:
nsresult AddLeaf(const nsIParserNode& aNode, ostream& aStream);
void WriteAttributes(const nsIParserNode& aNode,ostream& aStream);
protected:
ostream* mOutput;
PRInt32 mIndent;
PRInt32 mColPos;
PRBool mDoOutput;
};
extern NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult);
#endif