fixed DTD selection bugs; entity bugs; conditionally enabled Strict DTD

This commit is contained in:
rickg%netscape.com 2000-04-17 06:33:31 +00:00
Родитель 2440048faf
Коммит e7f8ee498d
32 изменённых файлов: 4784 добавлений и 4086 удалений

Просмотреть файл

@ -23,6 +23,8 @@
//#define ENABLE_CRC
//#define RICKG_DEBUG
#define ENABLE_RESIDUALSTYLE
//#define ALLOW_TR_AS_CHILD_OF_TABLE //by setting this to true, TR is allowable directly in TABLE.
#ifdef RICKG_DEBUG
#include <fstream.h>
#endif
@ -155,7 +157,7 @@ CNavDTD::CNavDTD() : nsIDTD(),
mExpectedCRC32=0;
mDTDState=NS_OK;
mStyleHandlingEnabled=PR_TRUE;
mDocType=eHTMLText;
mDocType=eHTML3Text;
mRequestedHead=PR_FALSE;
mIsFormContainer=PR_FALSE;
@ -369,7 +371,6 @@ PRBool CNavDTD::Verify(nsString& aURLRef,nsIParser* aParser){
eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBuffer, PRInt32 aVersion) {
eAutoDetectResult result=eUnknownDetect;
if(eViewSource==aParserContext.mParserCommand) {
if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kPlainTextContentType)) {
result=ePrimaryDetect;
@ -380,7 +381,7 @@ eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBu
}
else {
if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kHTMLTextContentType)) {
result=ePrimaryDetect;
result=(eParseMode_strict==aParserContext.mParseMode) ? eValidDetect : ePrimaryDetect;
}
else if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kPlainTextContentType)) {
result=ePrimaryDetect;
@ -392,7 +393,10 @@ eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBu
result = eValidDetect ;
if(0==aParserContext.mMimeType.Length()) {
aParserContext.SetMimeType(NS_ConvertToString(kHTMLTextContentType));
result = (theBufHasXML) ? eValidDetect : ePrimaryDetect;
if(!theBufHasXML) {
result=(eParseMode_strict==aParserContext.mParseMode) ? eValidDetect : ePrimaryDetect;
}
else result=eValidDetect;
}
}
}
@ -783,9 +787,6 @@ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){
case eToken_attribute:
result=HandleAttributeToken(theToken); break;
case eToken_style:
result=HandleStyleToken(theToken); break;
case eToken_instruction:
result=HandleProcessingInstructionToken(theToken); break;
@ -1890,21 +1891,6 @@ nsresult CNavDTD::HandleScriptToken(const nsIParserNode *aNode) {
return result;
}
/**
* This method gets called when a style token has been
* encountered in the parse process.
*
* @update gess 3/25/98
* @param aToken -- next (start) token to be handled
* @return PR_TRUE if all went well; PR_FALSE if error occured
*/
nsresult CNavDTD::HandleStyleToken(CToken* aToken){
NS_PRECONDITION(0!=aToken,kNullToken);
// CStyleToken* st = (CStyleToken*)(aToken);
return NS_OK;
}
/**
* This method gets called when an "instruction" token has been
@ -2114,7 +2100,18 @@ nsresult CNavDTD::CollectSkippedContent(nsCParserNode& aNode,PRInt32 &aCount) {
* @return PR_TRUE if parent can contain child
*/
PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const {
return gHTMLElements[aParent].CanContain((eHTMLTags)aChild);
PRBool result=gHTMLElements[aParent].CanContain((eHTMLTags)aChild);
#ifdef ALLOW_TR_AS_CHILD_OF_TABLE
if(!result) {
//XXX This vile hack is here to support bug 30378, which allows
//table to contain tr directly in an html32 document.
if((eHTMLTag_tr==aChild) && (eHTMLTag_table==aParent)) {
result=PR_TRUE;
}
}
#endif
return result;
}
/**
@ -3485,6 +3482,13 @@ nsresult CNavDTD::CreateContextStackFor(eHTMLTags aChildTag){
if(PR_TRUE==bResult){
while(theLen) {
theTag=(eHTMLTags)mScratch[--theLen];
#ifdef ALLOW_TR_AS_CHILD_OF_TABLE
if((eHTML3Text==mDocType) && (eHTMLTag_tbody==theTag)) {
//the prev. condition prevents us from emitting tbody in html3.2 docs; fix bug 30378
continue;
}
#endif
CStartToken *theToken=(CStartToken*)mTokenRecycler->CreateTokenOfType(eToken_start,theTag);
HandleStartToken(theToken); //these should all wind up on contextstack, so don't recycle.
}

Просмотреть файл

@ -402,7 +402,6 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
nsresult HandleCommentToken(CToken* aToken);
nsresult HandleAttributeToken(CToken* aToken);
nsresult HandleScriptToken(const nsIParserNode *aNode);
nsresult HandleStyleToken(CToken* aToken);
nsresult HandleProcessingInstructionToken(CToken* aToken);
nsresult HandleDocTypeDeclToken(CToken* aToken);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -79,8 +79,8 @@
*
*
*/
#ifndef NS_NAVHTMLDTD__
#define NS_NAVHTMLDTD__
#ifndef NS_OTHERDTD__
#define NS_OTHERDTD__
#include "nsIDTD.h"
#include "nsISupports.h"
@ -259,29 +259,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
*/
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild) const;
/**
* This method is called to determine whether or not a tag
* of one type can contain a tag of another type.
*
* @update gess 3/25/98
* @param aParent -- int tag of parent container
* @param aChild -- int tag of child container
* @return PR_TRUE if parent can contain child
*/
virtual PRBool CanPropagate(eHTMLTags aParent,eHTMLTags aChild,PRBool aParentContains) ;
/**
* This method gets called to determine whether a given
* child tag can be omitted by the given parent.
*
* @update gess 3/25/98
* @param aParent -- parent tag being asked about omitting given child
* @param aChild -- child tag being tested for omittability by parent
* @param aParentContains -- can be 0,1,-1 (false,true, unknown)
* @return PR_TRUE if given tag can be omitted
*/
virtual PRBool CanOmit(eHTMLTags aParent,eHTMLTags aChild,PRBool& aParentContains) ;
/**
* This method gets called to determine whether a given
* tag is itself a container
@ -292,38 +269,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
*/
virtual PRBool IsContainer(PRInt32 aTag) const;
/**
* This method tries to design a context map (without actually
* changing our parser state) from the parent down to the
* child.
*
* @update gess4/6/98
* @param aParent -- tag type of parent
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
virtual PRBool ForwardPropagate(nsString& aSequence,eHTMLTags aParentTag,eHTMLTags aChildTag);
/**
* This method tries to design a context map (without actually
* changing our parser state) from the child up to the parent.
*
* @update gess4/6/98
* @param aParent -- tag type of parent
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
virtual PRBool BackwardPropagate(nsString& aSequence,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
/**
* Attempt forward and/or backward propagation for the given
* child within the current context vector stack.
* @update gess5/11/98
* @param type of child to be propagated.
* @return TRUE if succeeds, otherwise FALSE
*/
nsresult CreateContextStackFor(eHTMLTags aChildTag);
/**
* Ask parser if a given container is open ANYWHERE on stack
* @update gess5/11/98
@ -340,30 +285,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
*/
virtual PRBool HasOpenContainer(const eHTMLTags aTagSet[],PRInt32 aCount) const;
/**
* Accessor that retrieves the tag type of the topmost item on context
* vector stack.
*
* @update gess5/11/98
* @return tag type (may be unknown)
*/
virtual eHTMLTags GetTopNode() const;
/**
* Finds the topmost occurance of given tag within context vector stack.
* @update gess5/11/98
* @param tag to be found
* @return index of topmost tag occurance -- may be -1 (kNotFound).
*/
// virtual PRInt32 GetTopmostIndexOf(eHTMLTags aTag) const;
/**
* Finds the topmost occurance of given tag within context vector stack.
* @update gess5/11/98
* @param tag to be found
* @return index of topmost tag occurance -- may be -1 (kNotFound).
*/
virtual PRInt32 LastOf(eHTMLTags aTagSet[],PRInt32 aCount) const;
/**
* Use this id you want to stop the building content model
@ -395,13 +316,11 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
* @return error code representing construction state; usually 0.
*/
nsresult HandleStartToken(CToken* aToken);
nsresult HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsIParserNode *aNode);
nsresult HandleEndToken(CToken* aToken);
nsresult HandleEntityToken(CToken* aToken);
nsresult HandleCommentToken(CToken* aToken);
nsresult HandleAttributeToken(CToken* aToken);
nsresult HandleScriptToken(const nsIParserNode *aNode);
nsresult HandleStyleToken(CToken* aToken);
nsresult HandleProcessingInstructionToken(CToken* aToken);
nsresult HandleDocTypeDeclToken(CToken* aToken);
@ -460,20 +379,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
nsresult AddLeaf(const nsIParserNode *aNode);
nsresult AddHeadLeaf(nsIParserNode *aNode);
/**
* This set of methods is used to create and manage the set of
* transient styles that occur as a result of poorly formed HTML
* or bugs in the original navigator.
*
* @update gess5/11/98
* @param aTag -- represents the transient style tag to be handled.
* @return error code -- usually 0
*/
nsresult OpenTransientStyles(eHTMLTags aChildTag);
nsresult CloseTransientStyles(eHTMLTags aChildTag);
nsresult PopStyle(eHTMLTags aTag);
nsresult DoFragment(PRBool aFlag);
protected:
@ -481,8 +386,6 @@ protected:
nsresult CollectSkippedContent(nsCParserNode& aNode,PRInt32& aCount);
nsresult WillHandleStartTag(CToken* aToken,eHTMLTags aChildTag,nsCParserNode& aNode);
nsresult DidHandleStartTag(nsCParserNode& aNode,eHTMLTags aChildTag);
nsresult HandleOmittedTag(CToken* aToken,eHTMLTags aChildTag,eHTMLTags aParent,nsIParserNode *aNode);
nsresult HandleSavedTokens(PRInt32 anIndex);
nsCParserNode* CreateNode(void);
void RecycleNode(nsCParserNode* aNode);
void RecycleNodes(nsEntryStack *aNodeStack);
@ -492,8 +395,6 @@ protected:
nsDTDContext* mHeadContext;
nsDTDContext* mBodyContext;
nsDTDContext* mFormContext;
nsDTDContext* mMapContext;
nsDTDContext* mTempContext;
PRBool mHasOpenForm;
PRBool mHasOpenMap;
PRInt32 mHasOpenHead;
@ -520,6 +421,7 @@ protected:
PRUint32 mExpectedCRC32;
nsAutoString mScratch; //used for various purposes; non-persistent
PRBool mStyleHandlingEnabled;
PRBool mEnableStrict;
eParserDocType mDocType;
#ifdef NS_DEBUG
@ -531,6 +433,7 @@ protected:
extern NS_HTMLPARS nsresult NS_NewOtherHTMLDTD(nsIDTD** aInstancePtrResult);
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -23,6 +23,7 @@
#include "CParserContext.h"
#include "nsToken.h"
#include "prenv.h"
MOZ_DECL_CTOR_COUNTER(CParserContext);
@ -130,7 +131,7 @@ void CParserContext::SetMimeType(const nsString& aMimeType){
mDocType=ePlainText;
if(mMimeType.EqualsWithConversion(kHTMLTextContentType))
mDocType=eHTMLText;
mDocType=eHTML4Text;
else if(mMimeType.EqualsWithConversion(kXMLTextContentType))
mDocType=eXMLText;
else if(mMimeType.EqualsWithConversion(kXULTextContentType))
@ -139,7 +140,148 @@ void CParserContext::SetMimeType(const nsString& aMimeType){
mDocType=eXMLText;
else if(mMimeType.EqualsWithConversion(kXIFTextContentType))
mDocType=eXMLText;
}
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
eParseMode CParserContext::DetermineParseMode(const nsString& theBuffer) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
mParseMode = eParseMode_unknown;
PRInt32 theIndex=theBuffer.Find("<!",PR_FALSE,-1);
if(kNotFound<theIndex)
theIndex=theBuffer.Find("DOCTYPE",PR_TRUE,theIndex+1,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=theBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=theBuffer.Find("//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
PRInt32 theErr=0;
PRInt32 theMajorVersion=3;
//note that if we don't find '>', then we just scan the first 512 bytes.
if(0<=theSubIndex) {
PRInt32 theStartPos=theSubIndex+5;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
theSubIndex=theBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eXHTMLText;
mParseMode=eParseMode_strict;
theMajorVersion=1;
}
else {
NS_NOTREACHED("no mime type set");
theSubIndex=theBuffer.Find("ISO/IEC 15445:",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=4;
theSubIndex+=15;
}
else {
theSubIndex=theBuffer.Find("HTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=3;
}
else {
theSubIndex=theBuffer.Find("HYPERTEXT MARKUP LANGUAGE",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML3Text;
mParseMode=eParseMode_quirks;
theSubIndex+=20;
}
}
}
}
}
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
theStartPos=theBuffer.FindCharInSet("123456789",theStartPos);
if(0<=theStartPos) {
theBuffer.Mid(theNum,theStartPos-1,3);
theMajorVersion=theNum.ToInteger(&theErr);
}
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
theStartPos+=3;
theCount=theEnd-theStartPos;
if((theBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(theBuffer.Find("LOOSE",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
mParseMode=eParseMode_noquirks;
}
if(eXHTMLText!=mDocType) {
if (0==theErr){
switch(theMajorVersion) {
case 0: case 1: case 2: case 3:
if(mDocType!=eXHTMLText){
mParseMode=eParseMode_quirks; //be as backward compatible as possible
mDocType=eHTML3Text;
}
break;
default:
if(5<theMajorVersion) {
mParseMode=eParseMode_noquirks;
}
break;
} //switch
}
}
} //if
else {
PRInt32 thePos=theBuffer.Find("HTML",PR_TRUE,1,50);
if(kNotFound!=thePos) {
mDocType=eHTML4Text;
PRInt32 theIDPos=theBuffer.Find("PublicID",thePos);
if(kNotFound==theIDPos)
theIDPos=theBuffer.Find("SystemID",thePos);
mParseMode=(kNotFound==theIDPos) ? eParseMode_quirks : eParseMode_strict;
}
}
}
else if(kNotFound<(theIndex=theBuffer.Find("?XML",PR_TRUE,0,128))) {
mParseMode=eParseMode_noquirks;
}
else {
//this is debug only, and will go away by the time we ship...
theIndex=theBuffer.Find("NOQUIRKS",PR_TRUE,0,128);
mDocType=eHTML4Text;
if(kNotFound<theIndex) {
mParseMode=eParseMode_noquirks;
}
}
if(theModeStr) {
if(0==nsCRT::strcasecmp(theModeStr,"strict"))
mParseMode=eParseMode_strict;
}
else mParseMode = (eParseMode_unknown==mParseMode)? eParseMode_quirks : mParseMode;
return mParseMode;
}

Просмотреть файл

@ -58,6 +58,7 @@ public:
PRBool aCopyUnused=PR_FALSE);
CParserContext( const CParserContext& aContext);
eParseMode DetermineParseMode(const nsString& theBuffer);
~CParserContext();

Просмотреть файл

@ -987,7 +987,7 @@ nsresult nsObserverTopic::Notify(eHTMLTags aTag,nsIParserNode& aNode,void* aUniq
mKeys.Push((PRUnichar*)mSourceKey.GetUnicode());
intValue.AppendInt(PRInt32(theCharsetSource),10);
intValue.Append(PRInt32(theCharsetSource),10);
mValues.Push((PRUnichar*)intValue.GetUnicode());
mKeys.Push((PRUnichar*)mDTDKey.GetUnicode());

Просмотреть файл

@ -28,8 +28,6 @@
*/
#include "nsElementTable.h"
#include <fstream.h>
/*****************************************************************************
Now it's time to list all the html elements all with their capabilities...

Просмотреть файл

@ -502,7 +502,8 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
if(aToken) {
((CStartToken*)aToken)->mOrigin=aScanner.GetOffset()-1; // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204.
result= aToken->Consume(aChar,aScanner,eHTMLText==mDocType); //tell new token to finish consuming text...
PRBool isHTML=((eHTML3Text==mDocType) || (eHTML4Text==mDocType));
result= aToken->Consume(aChar,aScanner,isHTML); //tell new token to finish consuming text...
if(NS_SUCCEEDED(result)) {

Просмотреть файл

@ -55,7 +55,7 @@
CLASS_EXPORT_HTMLPARS nsHTMLTokenizer : public nsITokenizer {
public:
nsHTMLTokenizer( PRInt32 aParseMode=eParseMode_quirks,
eParserDocType aDocType=eHTMLText,
eParserDocType aDocType=eHTML3Text,
eParserCommands aCommand=eViewNormal);
virtual ~nsHTMLTokenizer();

Просмотреть файл

@ -248,7 +248,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode
}
}
else {
mTextValue.AssignWithConversion(aChar);
mTextValue.Assign(aChar);
result=aScanner.ReadIdentifier(mTextValue);
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
@ -1477,7 +1477,7 @@ PRInt32 CWhitespaceToken::GetTokenType(void) {
* @return error result
*/
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
mTextValue.AssignWithConversion(aChar);
mTextValue.Assign(aChar);
nsresult result=aScanner.ReadWhitespace(mTextValue);
if(NS_OK==result) {
mTextValue.StripChar(kCR);
@ -1522,7 +1522,7 @@ CEntityToken::CEntityToken(const nsString& aName) : CHTMLToken(aName) {
*/
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
if(aChar)
mTextValue.AssignWithConversion(aChar);
mTextValue.Assign(aChar);
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
return result;
}
@ -1688,7 +1688,7 @@ PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
value = nsHTMLEntities::EntityToUnicode(mTextValue);
if(-1<value) {
//we found a named entity...
aString.AssignWithConversion(PRUnichar(value));
aString.Assign(PRUnichar(value));
}
}//else
}//if

Просмотреть файл

@ -55,7 +55,7 @@ enum eParseMode {
eParseMode_unknown=0,
eParseMode_quirks, //pre 5.0 versions
eParseMode_noquirks, //raptor versions...)
eParseMode_other,
eParseMode_strict,
eParseMode_autodetect
};

Просмотреть файл

@ -68,8 +68,10 @@ enum eCRCQuality {
enum eParserDocType {
ePlainText = 0,
eHTMLText,
eXMLText
eXMLText,
eXHTMLText,
eHTML3Text,
eHTML4Text
};

Просмотреть файл

@ -29,11 +29,9 @@
#include "nsString.h"
#include "nsCRT.h"
#include "nsScanner.h"
#include "prenv.h" //this is here for debug reasons...
#include "plstr.h"
#include "nsIParserFilter.h"
#include "nshtmlpars.h"
#include "CNavDTD.h"
#include "nsWellFormedDTD.h"
#include "nsViewSourceHTML.h"
#include "nsHTMLContentSinkStream.h" //this is here so we can get a null sink, which really should be gotten from nsICOntentSink.h
@ -42,6 +40,8 @@
#include "nsIProgressEventSink.h"
#include "nsIBufferInputStream.h"
#include "CRtfDTD.h"
#include "CNavDTD.h"
#include "COtherDTD.h"
//#define rickgdebug
@ -95,7 +95,9 @@ public:
//Note: To cut down on startup time/overhead, we defer the construction of non-html DTD's.
nsIDTD* theDTD;
NS_NewNavHTMLDTD(&theDTD); //do this as the default HTML DTD...
NS_NewNavHTMLDTD(&theDTD); //do this as a default HTML DTD...
mDTDDeque.Push(theDTD);
NS_NewOtherHTMLDTD(&theDTD); //do this as the default DTD for strict documents...
mDTDDeque.Push(theDTD);
mHasViewSourceDTD=PR_FALSE;
@ -481,26 +483,36 @@ PRBool FindSuitableDTD( CParserContext& aParserContext,nsString& aBuffer) {
return PR_FALSE;
}
char* doctypes[] = {
"<!DOCTYPE \"-//W3O//DTD W3 HTML 3.0//EN//\">",
#ifdef NS_DEBUG
static const char* doctypes[] = {
//here are a few HTML doctypes we'll treat as strict...
"<!DOCTYPE HTML PUBLIC PublicID SystemID>",
"<!DOCTYPE HTML SYSTEM SystemID>",
"<!DOCTYPE \"-//W3C//DTD HTML 5.0//EN\">",
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0 STRICT//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.01//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HyperText Markup Language//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">",
"<!DOCTYPE \"-//SoftQuad Software//DTD HoTMetaL PRO 6.::19990601::extensions to HTML 4.//EN\">",
//here are the XHTML doctypes we'll treat as strict...
"<!DOCTYPE \"-//W3C//DTD XHTML 1.0 Strict//EN\">",
"<!DOCTYPE \"-//W3C//DTD XHTML 1.0 Transitional//EN\">",
"<!DOCTYPE \"-//W3C//DTD XHTML 1.0 Frameset//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HyperText Markup Language//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">",
//these we treat as standard (no quirks)...
"<!DOCTYPE \"-//W3C//DTD HTML Experimental 19960712//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.01 Transitional//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.1 Frameset//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0 Transitional//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0 Frameset//EN\">",
"<!DOCTYPE \"-//SoftQuad Software//DTD HoTMetaL PRO 6.::19990601::extensions to HTML 4.//EN\">",
//these we treat as quirks... (along with any other we encounter)...
"<!DOCTYPE \"-//W3O//DTD W3 HTML 3.0//EN//\">",
"<!DOCTYPE \"-//IETF//DTD HTML//EN//3.\">",
"<!DOCTYPE \"-//W3C//DTD W3 HTML 3.0//EN//\">",
"<!DOCTYPE \"-//W3C//DTD W3 HTML 3.0//EN\">",
@ -542,7 +554,6 @@ char* doctypes[] = {
"<!DOCTYPE \"-//WebTechs//DTD Mozilla HTML//EN\">",
"<!DOCTYPE \"-//WebTechs//DTD Mozilla HTML 2//EN\">",
"<!DOCTYPE \"-//Netscape Comm Corp //DTD HTML//EN\">",
"<!DOCTYPE \"-//Netscape Comm Corp //DTD HTML//EN\">",
"<!DOCTYPE \"-//Netscape Comm Corp //DTD Strict HTML//EN\">",
"<!DOCTYPE \"-//Microsoft//DTD Internet Explorer 2.0 HTML//EN\">",
"<!DOCTYPE \"-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN\">",
@ -558,127 +569,16 @@ char* doctypes[] = {
"<!DOCTYPE \"-//O'Reilly and Associates//DTD HTML 2.0//EN\">",
"<!DOCTYPE \"-//SQ//DTD HTML 2. HoTMetaL + extensions//EN\">",
"<!DOCTYPE \"-//Spyglass//DTD HTML 2.0 Extended//EN\">",
"<!DOCTYPE \"+//Silmaril//DTD HTML Pro v0r11 19970101//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML Experimental 19960712//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2 Final//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2 Draft//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML Experimental 970421//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2S Draft//EN\">",
"<!DOCTYPE \"-//IETF//DTD HTML i18n//EN\">",
0
};
#endif
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
static
eParseMode DetermineParseMode(nsParser& aParser) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
const char* other="other";
eParseMode result=eParseMode_unknown;
nsScanner* theScanner=aParser.GetScanner();
if(theScanner){
nsString& theBuffer=theScanner->GetBuffer();
PRInt32 theIndex=theBuffer.Find("<!",PR_FALSE,-1);
if(kNotFound<theIndex)
theIndex=theBuffer.Find("DOCTYPE",PR_TRUE,theIndex+1,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=theBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=theBuffer.Find("-//W3C//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
//note that if we don't find '>', then we just scan the first 512 bytes.
PRInt32 theStartPos=theSubIndex+11;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
if(kNotFound<(theSubIndex=theBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount))) {
//this logic has been changed so that ALL XHTML doc's are no quirks.
result=eParseMode_noquirks;
}
else if(kNotFound<(theSubIndex=theBuffer.Find("HTML",PR_TRUE,theStartPos,theCount))) {
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
theBuffer.Mid(theNum,theStartPos-1,3);
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
PRInt32 theErr;
PRInt32 theMajorVersion=theNum.ToInteger(&theErr);
switch(theMajorVersion) {
case 4:
theStartPos=theSubIndex+3;
theCount=theEnd-theStartPos;
if(kNotFound<theBuffer.Find("STRICT",PR_TRUE,theStartPos,theCount)) {
result=eParseMode_noquirks;
}
else
if((theBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(theBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
result=eParseMode_quirks; // XXX -HACK- Set the appropriate mode.
}
break;
default:
result= (theMajorVersion<4) ? eParseMode_quirks : eParseMode_noquirks;
break;
}
}
else if(kNotFound<(theSubIndex=theBuffer.Find("HYPERTEXT MARKUP LANGUAGE",PR_TRUE,theStartPos,theCount))) {
result=eParseMode_quirks;
}
}
else if(kNotFound<(theSubIndex=theBuffer.Find("ISO/IEC 15445:1999",PR_TRUE,theIndex+8,theEnd-(theIndex+8)))) {
result=eParseMode_noquirks;
}
}
else if(kNotFound<(theIndex=theBuffer.Find("?XML",PR_TRUE,0,128))) {
result=eParseMode_noquirks;
}
else {
//this is debug only, and will go away by the time we ship...
theIndex=theBuffer.Find("NOQUIRKS",PR_TRUE,0,128);
if(kNotFound<theIndex) {
result=eParseMode_noquirks;
}
}
}
if(theModeStr)
if(0==nsCRT::strcasecmp(other,theModeStr))
return eParseMode_other;
return (eParseMode_unknown==result)? eParseMode_quirks:result;
}
/**
* This gets called just prior to the model actually
@ -695,13 +595,32 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){
nsresult result=NS_OK;
#if 0
static PRBool tested=PR_FALSE;
#ifdef NS_DEBUG
if(!tested) {
tested=PR_TRUE;
const char** theDocType=doctypes;
while(*theDocType) {
nsAutoString theType(*theDocType);
eParseMode result=mParserContext->DetermineParseMode(theType);
theDocType++;
}
}
#endif
#endif
if(mParserContext){
if(eUnknownDetect==mParserContext->mAutoDetectStatus) {
mMajorIteration=-1;
mMinorIteration=-1;
if(PR_TRUE==FindSuitableDTD(*mParserContext,mParserContext->mScanner->GetBuffer())) {
mParserContext->mParseMode=DetermineParseMode(*this);
// mParserContext->mStreamListenerState=eOnDataAvail;
nsString& theBuffer=mParserContext->mScanner->GetBuffer();
mParserContext->DetermineParseMode(theBuffer);
if(PR_TRUE==FindSuitableDTD(*mParserContext,theBuffer)) {
mParserContext->mDTD->WillBuildModel( *mParserContext,mSink);
}//if
}//if

Просмотреть файл

@ -221,7 +221,7 @@ CViewSourceHTML::CViewSourceHTML()
mSink=0;
mLineNumber=0;
mTokenizer=0;
mDocType=eHTMLText;
mDocType=eHTML3Text;
#ifdef rickgdebug
gDumpFile = new fstream("c:/temp/viewsource.xml",ios::trunc);
@ -895,7 +895,7 @@ NS_IMETHODIMP CViewSourceHTML::HandleToken(CToken* aToken,nsIParser* aParser) {
case eToken_start:
result=WriteTag(mStartTag,aToken,aToken->GetAttributeCount(),PR_TRUE);
if(((eHTMLText==mDocType) || (eXMLText==mDocType)) && mParser && (NS_OK==result)) {
if((ePlainText!=mDocType) && mParser && (NS_OK==result)) {
CObserverService* theService=mParser->GetObserverService();
if(theService) {
CParserContext* pc=mParser->PeekContext();

Просмотреть файл

@ -23,6 +23,8 @@
//#define ENABLE_CRC
//#define RICKG_DEBUG
#define ENABLE_RESIDUALSTYLE
//#define ALLOW_TR_AS_CHILD_OF_TABLE //by setting this to true, TR is allowable directly in TABLE.
#ifdef RICKG_DEBUG
#include <fstream.h>
#endif
@ -155,7 +157,7 @@ CNavDTD::CNavDTD() : nsIDTD(),
mExpectedCRC32=0;
mDTDState=NS_OK;
mStyleHandlingEnabled=PR_TRUE;
mDocType=eHTMLText;
mDocType=eHTML3Text;
mRequestedHead=PR_FALSE;
mIsFormContainer=PR_FALSE;
@ -369,7 +371,6 @@ PRBool CNavDTD::Verify(nsString& aURLRef,nsIParser* aParser){
eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBuffer, PRInt32 aVersion) {
eAutoDetectResult result=eUnknownDetect;
if(eViewSource==aParserContext.mParserCommand) {
if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kPlainTextContentType)) {
result=ePrimaryDetect;
@ -380,7 +381,7 @@ eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBu
}
else {
if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kHTMLTextContentType)) {
result=ePrimaryDetect;
result=(eParseMode_strict==aParserContext.mParseMode) ? eValidDetect : ePrimaryDetect;
}
else if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kPlainTextContentType)) {
result=ePrimaryDetect;
@ -392,7 +393,10 @@ eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBu
result = eValidDetect ;
if(0==aParserContext.mMimeType.Length()) {
aParserContext.SetMimeType(NS_ConvertToString(kHTMLTextContentType));
result = (theBufHasXML) ? eValidDetect : ePrimaryDetect;
if(!theBufHasXML) {
result=(eParseMode_strict==aParserContext.mParseMode) ? eValidDetect : ePrimaryDetect;
}
else result=eValidDetect;
}
}
}
@ -783,9 +787,6 @@ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){
case eToken_attribute:
result=HandleAttributeToken(theToken); break;
case eToken_style:
result=HandleStyleToken(theToken); break;
case eToken_instruction:
result=HandleProcessingInstructionToken(theToken); break;
@ -1890,21 +1891,6 @@ nsresult CNavDTD::HandleScriptToken(const nsIParserNode *aNode) {
return result;
}
/**
* This method gets called when a style token has been
* encountered in the parse process.
*
* @update gess 3/25/98
* @param aToken -- next (start) token to be handled
* @return PR_TRUE if all went well; PR_FALSE if error occured
*/
nsresult CNavDTD::HandleStyleToken(CToken* aToken){
NS_PRECONDITION(0!=aToken,kNullToken);
// CStyleToken* st = (CStyleToken*)(aToken);
return NS_OK;
}
/**
* This method gets called when an "instruction" token has been
@ -2114,7 +2100,18 @@ nsresult CNavDTD::CollectSkippedContent(nsCParserNode& aNode,PRInt32 &aCount) {
* @return PR_TRUE if parent can contain child
*/
PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const {
return gHTMLElements[aParent].CanContain((eHTMLTags)aChild);
PRBool result=gHTMLElements[aParent].CanContain((eHTMLTags)aChild);
#ifdef ALLOW_TR_AS_CHILD_OF_TABLE
if(!result) {
//XXX This vile hack is here to support bug 30378, which allows
//table to contain tr directly in an html32 document.
if((eHTMLTag_tr==aChild) && (eHTMLTag_table==aParent)) {
result=PR_TRUE;
}
}
#endif
return result;
}
/**
@ -3485,6 +3482,13 @@ nsresult CNavDTD::CreateContextStackFor(eHTMLTags aChildTag){
if(PR_TRUE==bResult){
while(theLen) {
theTag=(eHTMLTags)mScratch[--theLen];
#ifdef ALLOW_TR_AS_CHILD_OF_TABLE
if((eHTML3Text==mDocType) && (eHTMLTag_tbody==theTag)) {
//the prev. condition prevents us from emitting tbody in html3.2 docs; fix bug 30378
continue;
}
#endif
CStartToken *theToken=(CStartToken*)mTokenRecycler->CreateTokenOfType(eToken_start,theTag);
HandleStartToken(theToken); //these should all wind up on contextstack, so don't recycle.
}

Просмотреть файл

@ -402,7 +402,6 @@ CLASS_EXPORT_HTMLPARS CNavDTD : public nsIDTD {
nsresult HandleCommentToken(CToken* aToken);
nsresult HandleAttributeToken(CToken* aToken);
nsresult HandleScriptToken(const nsIParserNode *aNode);
nsresult HandleStyleToken(CToken* aToken);
nsresult HandleProcessingInstructionToken(CToken* aToken);
nsresult HandleDocTypeDeclToken(CToken* aToken);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -79,8 +79,8 @@
*
*
*/
#ifndef NS_NAVHTMLDTD__
#define NS_NAVHTMLDTD__
#ifndef NS_OTHERDTD__
#define NS_OTHERDTD__
#include "nsIDTD.h"
#include "nsISupports.h"
@ -259,29 +259,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
*/
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild) const;
/**
* This method is called to determine whether or not a tag
* of one type can contain a tag of another type.
*
* @update gess 3/25/98
* @param aParent -- int tag of parent container
* @param aChild -- int tag of child container
* @return PR_TRUE if parent can contain child
*/
virtual PRBool CanPropagate(eHTMLTags aParent,eHTMLTags aChild,PRBool aParentContains) ;
/**
* This method gets called to determine whether a given
* child tag can be omitted by the given parent.
*
* @update gess 3/25/98
* @param aParent -- parent tag being asked about omitting given child
* @param aChild -- child tag being tested for omittability by parent
* @param aParentContains -- can be 0,1,-1 (false,true, unknown)
* @return PR_TRUE if given tag can be omitted
*/
virtual PRBool CanOmit(eHTMLTags aParent,eHTMLTags aChild,PRBool& aParentContains) ;
/**
* This method gets called to determine whether a given
* tag is itself a container
@ -292,38 +269,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
*/
virtual PRBool IsContainer(PRInt32 aTag) const;
/**
* This method tries to design a context map (without actually
* changing our parser state) from the parent down to the
* child.
*
* @update gess4/6/98
* @param aParent -- tag type of parent
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
virtual PRBool ForwardPropagate(nsString& aSequence,eHTMLTags aParentTag,eHTMLTags aChildTag);
/**
* This method tries to design a context map (without actually
* changing our parser state) from the child up to the parent.
*
* @update gess4/6/98
* @param aParent -- tag type of parent
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
virtual PRBool BackwardPropagate(nsString& aSequence,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
/**
* Attempt forward and/or backward propagation for the given
* child within the current context vector stack.
* @update gess5/11/98
* @param type of child to be propagated.
* @return TRUE if succeeds, otherwise FALSE
*/
nsresult CreateContextStackFor(eHTMLTags aChildTag);
/**
* Ask parser if a given container is open ANYWHERE on stack
* @update gess5/11/98
@ -340,30 +285,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
*/
virtual PRBool HasOpenContainer(const eHTMLTags aTagSet[],PRInt32 aCount) const;
/**
* Accessor that retrieves the tag type of the topmost item on context
* vector stack.
*
* @update gess5/11/98
* @return tag type (may be unknown)
*/
virtual eHTMLTags GetTopNode() const;
/**
* Finds the topmost occurance of given tag within context vector stack.
* @update gess5/11/98
* @param tag to be found
* @return index of topmost tag occurance -- may be -1 (kNotFound).
*/
// virtual PRInt32 GetTopmostIndexOf(eHTMLTags aTag) const;
/**
* Finds the topmost occurance of given tag within context vector stack.
* @update gess5/11/98
* @param tag to be found
* @return index of topmost tag occurance -- may be -1 (kNotFound).
*/
virtual PRInt32 LastOf(eHTMLTags aTagSet[],PRInt32 aCount) const;
/**
* Use this id you want to stop the building content model
@ -395,13 +316,11 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
* @return error code representing construction state; usually 0.
*/
nsresult HandleStartToken(CToken* aToken);
nsresult HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsIParserNode *aNode);
nsresult HandleEndToken(CToken* aToken);
nsresult HandleEntityToken(CToken* aToken);
nsresult HandleCommentToken(CToken* aToken);
nsresult HandleAttributeToken(CToken* aToken);
nsresult HandleScriptToken(const nsIParserNode *aNode);
nsresult HandleStyleToken(CToken* aToken);
nsresult HandleProcessingInstructionToken(CToken* aToken);
nsresult HandleDocTypeDeclToken(CToken* aToken);
@ -460,20 +379,6 @@ CLASS_EXPORT_HTMLPARS COtherDTD : public nsIDTD {
nsresult AddLeaf(const nsIParserNode *aNode);
nsresult AddHeadLeaf(nsIParserNode *aNode);
/**
* This set of methods is used to create and manage the set of
* transient styles that occur as a result of poorly formed HTML
* or bugs in the original navigator.
*
* @update gess5/11/98
* @param aTag -- represents the transient style tag to be handled.
* @return error code -- usually 0
*/
nsresult OpenTransientStyles(eHTMLTags aChildTag);
nsresult CloseTransientStyles(eHTMLTags aChildTag);
nsresult PopStyle(eHTMLTags aTag);
nsresult DoFragment(PRBool aFlag);
protected:
@ -481,8 +386,6 @@ protected:
nsresult CollectSkippedContent(nsCParserNode& aNode,PRInt32& aCount);
nsresult WillHandleStartTag(CToken* aToken,eHTMLTags aChildTag,nsCParserNode& aNode);
nsresult DidHandleStartTag(nsCParserNode& aNode,eHTMLTags aChildTag);
nsresult HandleOmittedTag(CToken* aToken,eHTMLTags aChildTag,eHTMLTags aParent,nsIParserNode *aNode);
nsresult HandleSavedTokens(PRInt32 anIndex);
nsCParserNode* CreateNode(void);
void RecycleNode(nsCParserNode* aNode);
void RecycleNodes(nsEntryStack *aNodeStack);
@ -492,8 +395,6 @@ protected:
nsDTDContext* mHeadContext;
nsDTDContext* mBodyContext;
nsDTDContext* mFormContext;
nsDTDContext* mMapContext;
nsDTDContext* mTempContext;
PRBool mHasOpenForm;
PRBool mHasOpenMap;
PRInt32 mHasOpenHead;
@ -520,6 +421,7 @@ protected:
PRUint32 mExpectedCRC32;
nsAutoString mScratch; //used for various purposes; non-persistent
PRBool mStyleHandlingEnabled;
PRBool mEnableStrict;
eParserDocType mDocType;
#ifdef NS_DEBUG
@ -531,6 +433,7 @@ protected:
extern NS_HTMLPARS nsresult NS_NewOtherHTMLDTD(nsIDTD** aInstancePtrResult);
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -23,6 +23,7 @@
#include "CParserContext.h"
#include "nsToken.h"
#include "prenv.h"
MOZ_DECL_CTOR_COUNTER(CParserContext);
@ -130,7 +131,7 @@ void CParserContext::SetMimeType(const nsString& aMimeType){
mDocType=ePlainText;
if(mMimeType.EqualsWithConversion(kHTMLTextContentType))
mDocType=eHTMLText;
mDocType=eHTML4Text;
else if(mMimeType.EqualsWithConversion(kXMLTextContentType))
mDocType=eXMLText;
else if(mMimeType.EqualsWithConversion(kXULTextContentType))
@ -139,7 +140,148 @@ void CParserContext::SetMimeType(const nsString& aMimeType){
mDocType=eXMLText;
else if(mMimeType.EqualsWithConversion(kXIFTextContentType))
mDocType=eXMLText;
}
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
eParseMode CParserContext::DetermineParseMode(const nsString& theBuffer) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
mParseMode = eParseMode_unknown;
PRInt32 theIndex=theBuffer.Find("<!",PR_FALSE,-1);
if(kNotFound<theIndex)
theIndex=theBuffer.Find("DOCTYPE",PR_TRUE,theIndex+1,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=theBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=theBuffer.Find("//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
PRInt32 theErr=0;
PRInt32 theMajorVersion=3;
//note that if we don't find '>', then we just scan the first 512 bytes.
if(0<=theSubIndex) {
PRInt32 theStartPos=theSubIndex+5;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
theSubIndex=theBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eXHTMLText;
mParseMode=eParseMode_strict;
theMajorVersion=1;
}
else {
NS_NOTREACHED("no mime type set");
theSubIndex=theBuffer.Find("ISO/IEC 15445:",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=4;
theSubIndex+=15;
}
else {
theSubIndex=theBuffer.Find("HTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=3;
}
else {
theSubIndex=theBuffer.Find("HYPERTEXT MARKUP LANGUAGE",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML3Text;
mParseMode=eParseMode_quirks;
theSubIndex+=20;
}
}
}
}
}
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
theStartPos=theBuffer.FindCharInSet("123456789",theStartPos);
if(0<=theStartPos) {
theBuffer.Mid(theNum,theStartPos-1,3);
theMajorVersion=theNum.ToInteger(&theErr);
}
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
theStartPos+=3;
theCount=theEnd-theStartPos;
if((theBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(theBuffer.Find("LOOSE",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
mParseMode=eParseMode_noquirks;
}
if(eXHTMLText!=mDocType) {
if (0==theErr){
switch(theMajorVersion) {
case 0: case 1: case 2: case 3:
if(mDocType!=eXHTMLText){
mParseMode=eParseMode_quirks; //be as backward compatible as possible
mDocType=eHTML3Text;
}
break;
default:
if(5<theMajorVersion) {
mParseMode=eParseMode_noquirks;
}
break;
} //switch
}
}
} //if
else {
PRInt32 thePos=theBuffer.Find("HTML",PR_TRUE,1,50);
if(kNotFound!=thePos) {
mDocType=eHTML4Text;
PRInt32 theIDPos=theBuffer.Find("PublicID",thePos);
if(kNotFound==theIDPos)
theIDPos=theBuffer.Find("SystemID",thePos);
mParseMode=(kNotFound==theIDPos) ? eParseMode_quirks : eParseMode_strict;
}
}
}
else if(kNotFound<(theIndex=theBuffer.Find("?XML",PR_TRUE,0,128))) {
mParseMode=eParseMode_noquirks;
}
else {
//this is debug only, and will go away by the time we ship...
theIndex=theBuffer.Find("NOQUIRKS",PR_TRUE,0,128);
mDocType=eHTML4Text;
if(kNotFound<theIndex) {
mParseMode=eParseMode_noquirks;
}
}
if(theModeStr) {
if(0==nsCRT::strcasecmp(theModeStr,"strict"))
mParseMode=eParseMode_strict;
}
else mParseMode = (eParseMode_unknown==mParseMode)? eParseMode_quirks : mParseMode;
return mParseMode;
}

Просмотреть файл

@ -58,6 +58,7 @@ public:
PRBool aCopyUnused=PR_FALSE);
CParserContext( const CParserContext& aContext);
eParseMode DetermineParseMode(const nsString& theBuffer);
~CParserContext();

Просмотреть файл

@ -987,7 +987,7 @@ nsresult nsObserverTopic::Notify(eHTMLTags aTag,nsIParserNode& aNode,void* aUniq
mKeys.Push((PRUnichar*)mSourceKey.GetUnicode());
intValue.AppendInt(PRInt32(theCharsetSource),10);
intValue.Append(PRInt32(theCharsetSource),10);
mValues.Push((PRUnichar*)intValue.GetUnicode());
mKeys.Push((PRUnichar*)mDTDKey.GetUnicode());

Просмотреть файл

@ -28,8 +28,6 @@
*/
#include "nsElementTable.h"
#include <fstream.h>
/*****************************************************************************
Now it's time to list all the html elements all with their capabilities...

Просмотреть файл

@ -502,7 +502,8 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
if(aToken) {
((CStartToken*)aToken)->mOrigin=aScanner.GetOffset()-1; // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204.
result= aToken->Consume(aChar,aScanner,eHTMLText==mDocType); //tell new token to finish consuming text...
PRBool isHTML=((eHTML3Text==mDocType) || (eHTML4Text==mDocType));
result= aToken->Consume(aChar,aScanner,isHTML); //tell new token to finish consuming text...
if(NS_SUCCEEDED(result)) {

Просмотреть файл

@ -55,7 +55,7 @@
CLASS_EXPORT_HTMLPARS nsHTMLTokenizer : public nsITokenizer {
public:
nsHTMLTokenizer( PRInt32 aParseMode=eParseMode_quirks,
eParserDocType aDocType=eHTMLText,
eParserDocType aDocType=eHTML3Text,
eParserCommands aCommand=eViewNormal);
virtual ~nsHTMLTokenizer();

Просмотреть файл

@ -248,7 +248,7 @@ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode
}
}
else {
mTextValue.AssignWithConversion(aChar);
mTextValue.Assign(aChar);
result=aScanner.ReadIdentifier(mTextValue);
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
@ -1477,7 +1477,7 @@ PRInt32 CWhitespaceToken::GetTokenType(void) {
* @return error result
*/
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
mTextValue.AssignWithConversion(aChar);
mTextValue.Assign(aChar);
nsresult result=aScanner.ReadWhitespace(mTextValue);
if(NS_OK==result) {
mTextValue.StripChar(kCR);
@ -1522,7 +1522,7 @@ CEntityToken::CEntityToken(const nsString& aName) : CHTMLToken(aName) {
*/
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
if(aChar)
mTextValue.AssignWithConversion(aChar);
mTextValue.Assign(aChar);
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
return result;
}
@ -1688,7 +1688,7 @@ PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
value = nsHTMLEntities::EntityToUnicode(mTextValue);
if(-1<value) {
//we found a named entity...
aString.AssignWithConversion(PRUnichar(value));
aString.Assign(PRUnichar(value));
}
}//else
}//if

Просмотреть файл

@ -55,7 +55,7 @@ enum eParseMode {
eParseMode_unknown=0,
eParseMode_quirks, //pre 5.0 versions
eParseMode_noquirks, //raptor versions...)
eParseMode_other,
eParseMode_strict,
eParseMode_autodetect
};

Просмотреть файл

@ -68,8 +68,10 @@ enum eCRCQuality {
enum eParserDocType {
ePlainText = 0,
eHTMLText,
eXMLText
eXMLText,
eXHTMLText,
eHTML3Text,
eHTML4Text
};

Просмотреть файл

@ -29,11 +29,9 @@
#include "nsString.h"
#include "nsCRT.h"
#include "nsScanner.h"
#include "prenv.h" //this is here for debug reasons...
#include "plstr.h"
#include "nsIParserFilter.h"
#include "nshtmlpars.h"
#include "CNavDTD.h"
#include "nsWellFormedDTD.h"
#include "nsViewSourceHTML.h"
#include "nsHTMLContentSinkStream.h" //this is here so we can get a null sink, which really should be gotten from nsICOntentSink.h
@ -42,6 +40,8 @@
#include "nsIProgressEventSink.h"
#include "nsIBufferInputStream.h"
#include "CRtfDTD.h"
#include "CNavDTD.h"
#include "COtherDTD.h"
//#define rickgdebug
@ -95,7 +95,9 @@ public:
//Note: To cut down on startup time/overhead, we defer the construction of non-html DTD's.
nsIDTD* theDTD;
NS_NewNavHTMLDTD(&theDTD); //do this as the default HTML DTD...
NS_NewNavHTMLDTD(&theDTD); //do this as a default HTML DTD...
mDTDDeque.Push(theDTD);
NS_NewOtherHTMLDTD(&theDTD); //do this as the default DTD for strict documents...
mDTDDeque.Push(theDTD);
mHasViewSourceDTD=PR_FALSE;
@ -481,26 +483,36 @@ PRBool FindSuitableDTD( CParserContext& aParserContext,nsString& aBuffer) {
return PR_FALSE;
}
char* doctypes[] = {
"<!DOCTYPE \"-//W3O//DTD W3 HTML 3.0//EN//\">",
#ifdef NS_DEBUG
static const char* doctypes[] = {
//here are a few HTML doctypes we'll treat as strict...
"<!DOCTYPE HTML PUBLIC PublicID SystemID>",
"<!DOCTYPE HTML SYSTEM SystemID>",
"<!DOCTYPE \"-//W3C//DTD HTML 5.0//EN\">",
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0 STRICT//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.01//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HyperText Markup Language//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">",
"<!DOCTYPE \"-//SoftQuad Software//DTD HoTMetaL PRO 6.::19990601::extensions to HTML 4.//EN\">",
//here are the XHTML doctypes we'll treat as strict...
"<!DOCTYPE \"-//W3C//DTD XHTML 1.0 Strict//EN\">",
"<!DOCTYPE \"-//W3C//DTD XHTML 1.0 Transitional//EN\">",
"<!DOCTYPE \"-//W3C//DTD XHTML 1.0 Frameset//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HyperText Markup Language//EN\">",
"<!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">",
//these we treat as standard (no quirks)...
"<!DOCTYPE \"-//W3C//DTD HTML Experimental 19960712//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.01 Transitional//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.1 Frameset//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0 Transitional//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 4.0 Frameset//EN\">",
"<!DOCTYPE \"-//SoftQuad Software//DTD HoTMetaL PRO 6.::19990601::extensions to HTML 4.//EN\">",
//these we treat as quirks... (along with any other we encounter)...
"<!DOCTYPE \"-//W3O//DTD W3 HTML 3.0//EN//\">",
"<!DOCTYPE \"-//IETF//DTD HTML//EN//3.\">",
"<!DOCTYPE \"-//W3C//DTD W3 HTML 3.0//EN//\">",
"<!DOCTYPE \"-//W3C//DTD W3 HTML 3.0//EN\">",
@ -542,7 +554,6 @@ char* doctypes[] = {
"<!DOCTYPE \"-//WebTechs//DTD Mozilla HTML//EN\">",
"<!DOCTYPE \"-//WebTechs//DTD Mozilla HTML 2//EN\">",
"<!DOCTYPE \"-//Netscape Comm Corp //DTD HTML//EN\">",
"<!DOCTYPE \"-//Netscape Comm Corp //DTD HTML//EN\">",
"<!DOCTYPE \"-//Netscape Comm Corp //DTD Strict HTML//EN\">",
"<!DOCTYPE \"-//Microsoft//DTD Internet Explorer 2.0 HTML//EN\">",
"<!DOCTYPE \"-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN\">",
@ -558,127 +569,16 @@ char* doctypes[] = {
"<!DOCTYPE \"-//O'Reilly and Associates//DTD HTML 2.0//EN\">",
"<!DOCTYPE \"-//SQ//DTD HTML 2. HoTMetaL + extensions//EN\">",
"<!DOCTYPE \"-//Spyglass//DTD HTML 2.0 Extended//EN\">",
"<!DOCTYPE \"+//Silmaril//DTD HTML Pro v0r11 19970101//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML Experimental 19960712//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2 Final//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2 Draft//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML Experimental 970421//EN\">",
"<!DOCTYPE \"-//W3C//DTD HTML 3.2S Draft//EN\">",
"<!DOCTYPE \"-//IETF//DTD HTML i18n//EN\">",
0
};
#endif
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
static
eParseMode DetermineParseMode(nsParser& aParser) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
const char* other="other";
eParseMode result=eParseMode_unknown;
nsScanner* theScanner=aParser.GetScanner();
if(theScanner){
nsString& theBuffer=theScanner->GetBuffer();
PRInt32 theIndex=theBuffer.Find("<!",PR_FALSE,-1);
if(kNotFound<theIndex)
theIndex=theBuffer.Find("DOCTYPE",PR_TRUE,theIndex+1,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=theBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=theBuffer.Find("-//W3C//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
//note that if we don't find '>', then we just scan the first 512 bytes.
PRInt32 theStartPos=theSubIndex+11;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
if(kNotFound<(theSubIndex=theBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount))) {
//this logic has been changed so that ALL XHTML doc's are no quirks.
result=eParseMode_noquirks;
}
else if(kNotFound<(theSubIndex=theBuffer.Find("HTML",PR_TRUE,theStartPos,theCount))) {
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
theBuffer.Mid(theNum,theStartPos-1,3);
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
PRInt32 theErr;
PRInt32 theMajorVersion=theNum.ToInteger(&theErr);
switch(theMajorVersion) {
case 4:
theStartPos=theSubIndex+3;
theCount=theEnd-theStartPos;
if(kNotFound<theBuffer.Find("STRICT",PR_TRUE,theStartPos,theCount)) {
result=eParseMode_noquirks;
}
else
if((theBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(theBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
result=eParseMode_quirks; // XXX -HACK- Set the appropriate mode.
}
break;
default:
result= (theMajorVersion<4) ? eParseMode_quirks : eParseMode_noquirks;
break;
}
}
else if(kNotFound<(theSubIndex=theBuffer.Find("HYPERTEXT MARKUP LANGUAGE",PR_TRUE,theStartPos,theCount))) {
result=eParseMode_quirks;
}
}
else if(kNotFound<(theSubIndex=theBuffer.Find("ISO/IEC 15445:1999",PR_TRUE,theIndex+8,theEnd-(theIndex+8)))) {
result=eParseMode_noquirks;
}
}
else if(kNotFound<(theIndex=theBuffer.Find("?XML",PR_TRUE,0,128))) {
result=eParseMode_noquirks;
}
else {
//this is debug only, and will go away by the time we ship...
theIndex=theBuffer.Find("NOQUIRKS",PR_TRUE,0,128);
if(kNotFound<theIndex) {
result=eParseMode_noquirks;
}
}
}
if(theModeStr)
if(0==nsCRT::strcasecmp(other,theModeStr))
return eParseMode_other;
return (eParseMode_unknown==result)? eParseMode_quirks:result;
}
/**
* This gets called just prior to the model actually
@ -695,13 +595,32 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){
nsresult result=NS_OK;
#if 0
static PRBool tested=PR_FALSE;
#ifdef NS_DEBUG
if(!tested) {
tested=PR_TRUE;
const char** theDocType=doctypes;
while(*theDocType) {
nsAutoString theType(*theDocType);
eParseMode result=mParserContext->DetermineParseMode(theType);
theDocType++;
}
}
#endif
#endif
if(mParserContext){
if(eUnknownDetect==mParserContext->mAutoDetectStatus) {
mMajorIteration=-1;
mMinorIteration=-1;
if(PR_TRUE==FindSuitableDTD(*mParserContext,mParserContext->mScanner->GetBuffer())) {
mParserContext->mParseMode=DetermineParseMode(*this);
// mParserContext->mStreamListenerState=eOnDataAvail;
nsString& theBuffer=mParserContext->mScanner->GetBuffer();
mParserContext->DetermineParseMode(theBuffer);
if(PR_TRUE==FindSuitableDTD(*mParserContext,theBuffer)) {
mParserContext->mDTD->WillBuildModel( *mParserContext,mSink);
}//if
}//if

Просмотреть файл

@ -221,7 +221,7 @@ CViewSourceHTML::CViewSourceHTML()
mSink=0;
mLineNumber=0;
mTokenizer=0;
mDocType=eHTMLText;
mDocType=eHTML3Text;
#ifdef rickgdebug
gDumpFile = new fstream("c:/temp/viewsource.xml",ios::trunc);
@ -895,7 +895,7 @@ NS_IMETHODIMP CViewSourceHTML::HandleToken(CToken* aToken,nsIParser* aParser) {
case eToken_start:
result=WriteTag(mStartTag,aToken,aToken->GetAttributeCount(),PR_TRUE);
if(((eHTMLText==mDocType) || (eXMLText==mDocType)) && mParser && (NS_OK==result)) {
if((ePlainText!=mDocType) && mParser && (NS_OK==result)) {
CObserverService* theService=mParser->GetObserverService();
if(theService) {
CParserContext* pc=mParser->PeekContext();